diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index b317dabd5dda..a6c4ead9bbf2 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -41,11 +41,15 @@ mod decimal; mod dictionary; mod list; mod map; +mod run_array; mod string; use crate::cast::decimal::*; use crate::cast::dictionary::*; use crate::cast::list::*; use crate::cast::map::*; +use crate::cast::run_array::{ + can_cast_to_run_end_encoded, cast_to_run_end_encoded, run_end_encoded_cast, +}; use crate::cast::string::*; use arrow_buffer::IntervalMonthDayNano; @@ -137,6 +141,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { can_cast_types(from_value_type, to_value_type) } (Dictionary(_, value_type), _) => can_cast_types(value_type, to_type), + (RunEndEncoded(_, value_type), _) => can_cast_types(value_type.data_type(), to_type), + (_, RunEndEncoded(_, _value_type)) => can_cast_to_run_end_encoded(from_type, to_type), (_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type), (List(list_from) | LargeList(list_from), List(list_to) | LargeList(list_to)) => { can_cast_types(list_from.data_type(), list_to.data_type()) @@ -739,6 +745,32 @@ pub fn cast_with_options( | Map(_, _) | Dictionary(_, _), ) => Ok(new_null_array(to_type, array.len())), + (RunEndEncoded(index_type, _), _) => { + let mut new_cast_options = cast_options.clone(); + new_cast_options.safe = false; + match index_type.data_type() { + Int16 => run_end_encoded_cast::(array, to_type, &new_cast_options), + Int32 => run_end_encoded_cast::(array, to_type, &new_cast_options), + Int64 => run_end_encoded_cast::(array, to_type, &new_cast_options), + _ => Err(ArrowError::CastError(format!( + "Casting from run end encoded type {from_type:?} to {to_type:?} not supported", + ))), + } + } + (_, RunEndEncoded(index_type, value_type)) => match index_type.data_type() { + Int16 => { + cast_to_run_end_encoded::(array, value_type.data_type(), cast_options) + } + Int32 => { + cast_to_run_end_encoded::(array, value_type.data_type(), cast_options) + } + Int64 => { + cast_to_run_end_encoded::(array, value_type.data_type(), cast_options) + } + _ => Err(ArrowError::CastError(format!( + "Casting from type {from_type:?} to run end encoded type {to_type:?} not supported", + ))), + }, (Dictionary(index_type, _), _) => match **index_type { Int8 => dictionary_cast::(array, to_type, cast_options), Int16 => dictionary_cast::(array, to_type, cast_options), @@ -10684,4 +10716,433 @@ mod tests { )) as ArrayRef; assert_eq!(*fixed_array, *r); } + + #[cfg(test)] + mod run_end_encoded_tests { + use super::*; + use arrow_schema::{DataType, Field}; + use std::sync::Arc; + + #[test] + fn test_run_end_encoded_to_primitive() { + // Create a RunEndEncoded array: [1, 1, 2, 2, 2, 3] + let run_ends = Int32Array::from(vec![2, 5, 6]); + let values = Int32Array::from(vec![1, 2, 3]); + let run_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(run_array) as ArrayRef; + // Cast to Int64 + let cast_result = cast(&array_ref, &DataType::Int64).unwrap(); + // Verify the result is a RunArray with Int64 values + let result_run_array = cast_result.as_any().downcast_ref::().unwrap(); + assert_eq!( + result_run_array.values(), + &[1i64, 1i64, 2i64, 2i64, 2i64, 3i64] + ); + } + + #[test] + fn test_run_end_encoded_to_string() { + let run_ends = Int32Array::from(vec![2, 3, 5]); + let values = Int32Array::from(vec![10, 20, 30]); + let run_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(run_array) as ArrayRef; + + // Cast to String + let cast_result = cast(&array_ref, &DataType::Utf8).unwrap(); + + // Verify the result is a RunArray with String values + let result_array = cast_result.as_any().downcast_ref::().unwrap(); + // Check that values are correct + assert_eq!(result_array.value(0), "10"); + assert_eq!(result_array.value(1), "10"); + assert_eq!(result_array.value(2), "20"); + } + + #[test] + fn test_primitive_to_run_end_encoded() { + // Create an Int32 array with repeated values: [1, 1, 2, 2, 2, 3] + let source_array = Int32Array::from(vec![1, 1, 2, 2, 2, 3]); + let array_ref = Arc::new(source_array) as ArrayRef; + + // Cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + + // Verify the result is a RunArray + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + + // Check run structure: runs should end at positions [2, 5, 6] + assert_eq!(result_run_array.run_ends().values(), &[2, 5, 6]); + + // Check values: should be [1, 2, 3] + let values_array = result_run_array.values().as_primitive::(); + assert_eq!(values_array.values(), &[1, 2, 3]); + } + + #[test] + fn test_primitive_to_run_end_encoded_with_nulls() { + let source_array = Int32Array::from(vec![ + Some(1), + Some(1), + None, + None, + Some(2), + Some(2), + Some(3), + Some(3), + None, + None, + Some(4), + Some(4), + Some(5), + Some(5), + None, + None, + ]); + let array_ref = Arc::new(source_array) as ArrayRef; + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + assert_eq!( + result_run_array.run_ends().values(), + &[2, 4, 6, 8, 10, 12, 14, 16] + ); + assert_eq!( + result_run_array + .values() + .as_primitive::() + .values(), + &[1, 0, 2, 3, 0, 4, 5, 0] + ); + assert_eq!(result_run_array.values().null_count(), 3); + } + + #[test] + fn test_primitive_to_run_end_encoded_with_nulls_consecutive() { + let source_array = Int64Array::from(vec![ + Some(1), + Some(1), + None, + None, + None, + None, + None, + None, + None, + None, + Some(4), + Some(20), + Some(500), + Some(500), + None, + None, + ]); + let array_ref = Arc::new(source_array) as ArrayRef; + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int16, false)), + Arc::new(Field::new("values", DataType::Int64, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + assert_eq!( + result_run_array.run_ends().values(), + &[2, 10, 11, 12, 14, 16] + ); + assert_eq!( + result_run_array + .values() + .as_primitive::() + .values(), + &[1, 0, 4, 20, 500, 0] + ); + assert_eq!(result_run_array.values().null_count(), 2); + } + + #[test] + fn test_string_to_run_end_encoded() { + // Create a String array with repeated values: ["a", "a", "b", "c", "c"] + let source_array = StringArray::from(vec!["a", "a", "b", "c", "c"]); + let array_ref = Arc::new(source_array) as ArrayRef; + + // Cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + + // Verify the result is a RunArray + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + + // Check run structure: runs should end at positions [2, 3, 5] + assert_eq!(result_run_array.run_ends().values(), &[2, 3, 5]); + + // Check values: should be ["a", "b", "c"] + let values_array = result_run_array.values().as_string::(); + assert_eq!(values_array.value(0), "a"); + assert_eq!(values_array.value(1), "b"); + assert_eq!(values_array.value(2), "c"); + } + + #[test] + fn test_cast_with_type_conversion() { + // Create an Int32 array: [1, 1, 2, 2, 3] + let source_array = Int32Array::from(vec![1, 1, 2, 2, 3]); + let array_ref = Arc::new(source_array) as ArrayRef; + + // Cast to RunEndEncoded (values get converted to strings) + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + + // Verify the result is a RunArray with String values + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + + // Check that values were converted to strings + assert_eq!(result_run_array.values().data_type(), &DataType::Utf8); + + // Check run structure: runs should end at positions [2, 4, 5] + assert_eq!(result_run_array.run_ends().values(), &[2, 4, 5]); + + // Check values: should be ["1", "2", "3"] + let values_array = result_run_array.values().as_string::(); + assert_eq!(values_array.value(0), "1"); + assert_eq!(values_array.value(1), "2"); + assert_eq!(values_array.value(2), "3"); + } + + #[test] + fn test_empty_array_to_run_end_encoded() { + // Create an empty Int32 array + let source_array = Int32Array::from(Vec::::new()); + let array_ref = Arc::new(source_array) as ArrayRef; + + // Cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + + // Verify the result is an empty RunArray + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + + // Check that both run_ends and values are empty + assert_eq!(result_run_array.run_ends().len(), 0); + assert_eq!(result_run_array.values().len(), 0); + } + + #[test] + fn test_run_end_encoded_with_nulls() { + // Create a RunEndEncoded array with nulls: [1, 1, null, 2, 2] + let run_ends = Int32Array::from(vec![2, 3, 5]); + let values = Int32Array::from(vec![Some(1), None, Some(2)]); + let run_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(run_array) as ArrayRef; + + // Cast to String + let cast_result = cast(&array_ref, &DataType::Utf8).unwrap(); + + // Verify the result preserves nulls + let result_run_array = cast_result.as_any().downcast_ref::().unwrap(); + assert_eq!(result_run_array.value(0), "1"); + assert!(result_run_array.is_null(2)); + assert_eq!(result_run_array.value(4), "2"); + } + + #[test] + fn test_different_index_types() { + // Test with Int16 index type + let source_array = Int32Array::from(vec![1, 1, 2, 3, 3]); + let array_ref = Arc::new(source_array) as ArrayRef; + + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int16, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + assert_eq!(cast_result.data_type(), &target_type); + + // Test with Int64 index type + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int64, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + assert_eq!(cast_result.data_type(), &target_type); + } + + #[test] + fn test_unsupported_cast_to_run_end_encoded() { + // Create a Struct array - complex nested type that might not be supported + let field = Field::new("item", DataType::Int32, false); + let struct_array = StructArray::from(vec![( + Arc::new(field), + Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef, + )]); + let array_ref = Arc::new(struct_array) as ArrayRef; + + // This should fail because: + // 1. The target type is not RunEndEncoded + // 2. The target type is not supported for casting from StructArray + let cast_result = cast(&array_ref, &DataType::FixedSizeBinary(10)); + + // Expect this to fail + assert!(cast_result.is_err()); + } + + #[test] + fn test_cast_run_end_encoded_int64_to_int16_should_fail() { + /// Test casting RunEndEncoded to RunEndEncoded should fail + use arrow_array::{Int64Array, RunArray, StringArray}; + use arrow_schema::{DataType, Field}; + use std::sync::Arc; + + // Construct a valid REE array with Int64 run-ends + let run_ends = Int64Array::from(vec![100_000, 400_000, 700_000]); // values too large for Int16 + let values = StringArray::from(vec!["a", "b", "c"]); + + let ree_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(ree_array) as ArrayRef; + + // Attempt to cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int16, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_options = CastOptions { + safe: false, // This should make it fail instead of returning nulls + format_options: FormatOptions::default(), + }; + + // This should fail due to run-end overflow + let result: Result, ArrowError> = + cast_with_options(&array_ref, &target_type, &cast_options); + + match result { + Err(e) => { + assert!(e + .to_string() + .contains("Cast error: Can't cast value 100000 to type Int16")); + } + Ok(_array_ref) => { + panic!("This should not happen"); + } + } + } + + #[test] + fn test_cast_run_end_encoded_int16_to_int64_should_succeed() { + /// Test casting RunEndEncoded to RunEndEncoded should succeed + use arrow_array::{Int16Array, RunArray, StringArray}; + use arrow_schema::{DataType, Field}; + use std::sync::Arc; + + // Construct a valid REE array with Int16 run-ends + let run_ends = Int16Array::from(vec![2, 5, 8]); // values that fit in Int16 + let values = StringArray::from(vec!["a", "b", "c"]); + + let ree_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(ree_array) as ArrayRef; + + // Attempt to cast to RunEndEncoded (upcast should succeed) + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int64, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_options = CastOptions { + safe: false, + format_options: FormatOptions::default(), + }; + + // This should succeed due to valid upcast + let result: Result, ArrowError> = + cast_with_options(&array_ref, &target_type, &cast_options); + + match result { + Ok(array_ref) => { + // Downcast to RunArray + let run_array = array_ref + .as_any() + .downcast_ref::>() + .unwrap(); + + // Verify the cast worked correctly + // Assert the values were cast correctly + assert_eq!(run_array.run_ends().values(), &[2i64, 5i64, 8i64]); + assert_eq!(run_array.values().as_string::().value(0), "a"); + assert_eq!(run_array.values().as_string::().value(1), "b"); + assert_eq!(run_array.values().as_string::().value(2), "c"); + } + Err(e) => { + panic!("Cast should have succeeded but failed: {}", e); + } + } + } + + #[test] + fn test_cast_run_end_encoded_int32_to_int16_should_fail() { + /// Test casting RunEndEncoded to RunEndEncoded should fail + use arrow_array::{Int32Array, RunArray, StringArray}; + use arrow_schema::{DataType, Field}; + use std::sync::Arc; + + // Construct a valid REE array with Int32 run-ends + let run_ends = Int32Array::from(vec![1000, 50000, 80000]); // values too large for Int16 + let values = StringArray::from(vec!["x", "y", "z"]); + + let ree_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(ree_array) as ArrayRef; + + // Attempt to cast to RunEndEncoded (downcast should fail) + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int16, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_options = CastOptions { + safe: false, + format_options: FormatOptions::default(), + }; + + // This should fail due to run-end overflow + let result: Result, ArrowError> = + cast_with_options(&array_ref, &target_type, &cast_options); + + match result { + Ok(_) => { + panic!("Cast should have failed due to overflow but succeeded"); + } + Err(e) => { + // Verify the error is about overflow/out of range + assert!(e.to_string().contains("Can't cast value")); + } + } + } + } } diff --git a/arrow-cast/src/cast/run_array.rs b/arrow-cast/src/cast/run_array.rs new file mode 100644 index 000000000000..3b82ffd4f5bd --- /dev/null +++ b/arrow-cast/src/cast/run_array.rs @@ -0,0 +1,269 @@ +use crate::cast::*; + +/// Attempts to cast a Run-End Encoded array to another type, handling both REE-to-REE +/// and REE-to-other type conversions with proper validation and error handling. +/// +/// # Arguments +/// * `array` - The input Run-End Encoded array to be cast +/// * `to_type` - The target data type for the casting operation +/// * `cast_options` - Options controlling the casting behavior (e.g., safe vs unsafe) +/// +/// # Returns +/// A `Result` containing the new `ArrayRef` or an `ArrowError` if casting fails +/// +/// # Behavior +/// This function handles two main casting scenarios: +/// +/// ## Case 1: REE-to-REE Casting +/// When casting to another Run-End Encoded type: +/// - Casts both the `values` and `run_ends` to their target types +/// - Validates that run-end casting only allows upcasts (Int16→Int32, Int16→Int64, Int32→Int64) +/// - Preserves the REE structure while updating both fields +/// - Returns a new `RunArray` with the appropriate run-end type (Int16, Int32, or Int64) +/// +/// ## Case 2: REE-to-Other Casting +/// When casting to a non-REE type: +/// - Expands the REE array to its logical form by unpacking all values +/// - Applies the target type casting to the expanded array +/// - Returns a regular array of the target type (e.g., StringArray, Int64Array) +/// +/// # Error Handling, error occurs if: +/// - the input array is not a Run-End Encoded array +/// - run-end downcasting would cause overflow +/// - the target run-end type is unsupported +/// - Propagates errors from underlying casting operations +/// +/// # Safety Considerations +/// - Run-end casting uses `safe: false` to prevent silent overflow +/// - Only upcasts are allowed for run-ends to maintain valid REE structure +/// - Unpacking preserves null values and array length +/// - Type validation ensures only supported run-end types (Int16, Int32, Int64) +/// +/// # Performance Notes +/// - REE-to-REE casting is efficient as it operates on the compressed structure +/// - REE-to-other casting requires full unpacking, which may be expensive for large arrays +/// - Run-end validation adds minimal overhead for safety +pub(crate) fn run_end_encoded_cast( + array: &dyn Array, + to_type: &DataType, + cast_options: &CastOptions, +) -> Result { + match array.data_type() { + DataType::RunEndEncoded(_, _) => { + let run_array = array + .as_any() + .downcast_ref::>() + .ok_or_else(|| ArrowError::CastError("Expected RunArray".to_string()))?; + + let values = run_array.values(); + + match to_type { + // CASE 1: Stay as RunEndEncoded, cast only the values + DataType::RunEndEncoded(target_index_field, target_value_field) => { + let cast_values = + cast_with_options(values, target_value_field.data_type(), cast_options)?; + + let run_ends_array = PrimitiveArray::::from_iter_values( + run_array.run_ends().values().iter().copied(), + ); + let cast_run_ends = cast_with_options( + &run_ends_array, + target_index_field.data_type(), + cast_options, + )?; + let new_run_array: ArrayRef = match target_index_field.data_type() { + DataType::Int16 => { + let re = cast_run_ends.as_primitive::(); + Arc::new(RunArray::::try_new(re, cast_values.as_ref())?) + } + DataType::Int32 => { + let re = cast_run_ends.as_primitive::(); + Arc::new(RunArray::::try_new(re, cast_values.as_ref())?) + } + DataType::Int64 => { + let re = cast_run_ends.as_primitive::(); + Arc::new(RunArray::::try_new(re, cast_values.as_ref())?) + } + _ => { + return Err(ArrowError::CastError( + "Run-end type must be i16, i32, or i64".to_string(), + )) + } + }; + Ok(Arc::new(new_run_array)) + } + + // CASE 2: Expand to logical form + _ => { + let total_len = run_array.len(); + let indices = Int32Array::from_iter_values( + (0..total_len).map(|i| run_array.get_physical_index(i) as i32), + ); + + let taken = take(values.as_ref(), &indices, None)?; + + if taken.data_type() != to_type { + cast_with_options(taken.as_ref(), to_type, cast_options) + } else { + Ok(taken) + } + } + } + } + + _ => Err(ArrowError::CastError(format!( + "Cannot cast array of type {:?} to RunEndEncodedArray", + array.data_type() + ))), + } +} + +/// Attempts to cast an array to a RunEndEncoded array with the specified index type K +/// and value type. This function performs run-end encoding on the input array. +/// +/// # Arguments +/// * `array` - The input array to be run-end encoded +/// * `value_type` - The target data type for the values in the RunEndEncoded array +/// * `cast_options` - Options controlling the casting behavior +/// +/// # Returns +/// A `Result` containing the new `RunArray` or an `ArrowError` if casting fails +/// +/// # Process +/// 1. Cast the input array to the target value type if needed +/// 2. Iterate through the array to identify runs of consecutive equal values +/// 3. Build run_ends array indicating where each run terminates +/// 4. Build values array containing the unique values for each run +/// 5. Construct and return the RunArray +pub(crate) fn cast_to_run_end_encoded( + array: &dyn Array, + value_type: &DataType, + cast_options: &CastOptions, +) -> Result { + // Step 1: Cast the input array to the target value type if necessary + let cast_array = if array.data_type() == value_type { + // No casting needed, use the array as-is + make_array(array.to_data()) + } else { + // Cast to the target value type + cast_with_options(array, value_type, cast_options)? + }; + + // Step 2: Run-end encode the cast array + // We'll use a builder to construct the RunArray efficiently + let mut run_ends_builder = PrimitiveBuilder::::new(); + + if cast_array.len() == 0 { + // Handle empty array case + let empty_run_ends = run_ends_builder.finish(); + let empty_values = make_array(ArrayData::new_empty(value_type)); + return Ok(Arc::new(RunArray::::try_new( + &empty_run_ends, + empty_values.as_ref(), + )?)); + } + + // Create a temporary builder to construct the run array + // We'll iterate through and build runs by comparing adjacent elements + let mut run_ends_vec = Vec::new(); + let mut values_indices = Vec::new(); + + // Add the first element as the start of the first run + values_indices.push(0); + // Step 3: Identify runs of consecutive equal values + for i in 1..cast_array.len() { + // We can afford to perform the simple comparison here as we already validated the type in [can_cast_run_end_encoded] + let values_equal = match (cast_array.is_null(i), cast_array.is_null(i - 1)) { + (true, true) => true, // Both null + (false, false) => { + // Both non-null - use slice comparison as a basic approach + // This is a simplified implementation + cast_array.slice(i, 1).to_data() == cast_array.slice(i - 1, 1).to_data() + } + _ => false, // One null, one not null + }; + + if !values_equal { + // End current run, start new run + run_ends_vec.push(i); + values_indices.push(i); + } + } + + // Add the final run end + run_ends_vec.push(cast_array.len()); + + // Step 4: Build the run_ends array + for run_end in run_ends_vec { + run_ends_builder.append_value( + K::Native::from_usize(run_end) + .ok_or_else(|| ArrowError::CastError("Run end index out of range".to_string()))?, + ); + } + let run_ends_array = run_ends_builder.finish(); + + // Step 5: Build the values array by taking elements at the run start positions + let indices = PrimitiveArray::::from_iter_values( + values_indices.iter().map(|&idx| idx as u32), + ); + let values_array = take(&cast_array, &indices, None)?; + + // Step 6: Create and return the RunArray + let run_array = RunArray::::try_new(&run_ends_array, values_array.as_ref())?; + Ok(Arc::new(run_array)) +} + +/// Checks if a given data type can be cast to a RunEndEncoded array. +/// +/// # Arguments +/// * `from_type` - The source data type to be checked +/// * `to_type` - The target data type to be checked +/// +pub(crate) fn can_cast_to_run_end_encoded(from_type: &DataType, to_type: &DataType) -> bool { + match to_type { + DataType::RunEndEncoded(_, _) => { + // Check if from_type supports equality (can be REE-encoded) + match from_type { + // Primitive types - support equality + DataType::Boolean + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 + | DataType::Float32 + | DataType::Float64 => true, + + // String types - support equality + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => true, + + // Binary types - support equality + DataType::Binary + | DataType::LargeBinary + | DataType::BinaryView + | DataType::FixedSizeBinary(_) => true, + + // Temporal types - support equality + DataType::Date32 + | DataType::Date64 + | DataType::Timestamp(_, _) + | DataType::Time32(_) + | DataType::Time64(_) + | DataType::Duration(_) + | DataType::Interval(_) => true, + + // Decimal types - support equality + DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => true, + + // Already REE-encoded - can be re-encoded + DataType::RunEndEncoded(_, _) => true, + + _ => false, + } + } + _ => false, // Not casting to REE type + } +}