@@ -149,16 +149,6 @@ fn instantiate_static_filter(
149149 DataType :: UInt64 => Ok ( Arc :: new ( UInt64StaticFilter :: try_new ( & in_array) ?) ) ,
150150 // Boolean
151151 DataType :: Boolean => Ok ( Arc :: new ( BooleanStaticFilter :: try_new ( & in_array) ?) ) ,
152- // String types
153- DataType :: Utf8 => Ok ( Arc :: new ( Utf8StaticFilter :: try_new ( & in_array) ?) ) ,
154- DataType :: LargeUtf8 => Ok ( Arc :: new ( LargeUtf8StaticFilter :: try_new ( & in_array) ?) ) ,
155- DataType :: Utf8View => Ok ( Arc :: new ( Utf8ViewStaticFilter :: try_new ( & in_array) ?) ) ,
156- // Binary types
157- DataType :: Binary => Ok ( Arc :: new ( BinaryStaticFilter :: try_new ( & in_array) ?) ) ,
158- DataType :: LargeBinary => {
159- Ok ( Arc :: new ( LargeBinaryStaticFilter :: try_new ( & in_array) ?) )
160- }
161- DataType :: BinaryView => Ok ( Arc :: new ( BinaryViewStaticFilter :: try_new ( & in_array) ?) ) ,
162152 _ => {
163153 /* fall through to generic implementation for unsupported types (Float32/Float64, Struct, etc.) */
164154 Ok ( Arc :: new ( ArrayStaticFilter :: try_new ( in_array) ?) )
@@ -419,113 +409,6 @@ impl StaticFilter for BooleanStaticFilter {
419409 }
420410}
421411
422- // Macro to generate hash-based static filter implementations for string and binary types
423- // This avoids copying string/binary data by storing only the original array and hash indices
424- macro_rules! define_hash_based_static_filter {
425- ( $name: ident, |$arr_param: ident| $downcast: expr) => {
426- struct $name {
427- in_array: ArrayRef ,
428- state: RandomState ,
429- map: HashMap <usize , ( ) , ( ) >,
430- null_count: usize ,
431- }
432-
433- impl $name {
434- fn try_new( in_array: & ArrayRef ) -> Result <Self > {
435- let null_count = in_array. null_count( ) ;
436- let in_array_clone = Arc :: clone( in_array) ;
437- let state = RandomState :: new( ) ;
438- let mut map: HashMap <usize , ( ) , ( ) > = HashMap :: with_hasher( ( ) ) ;
439-
440- with_hashes( [ in_array. as_ref( ) ] , & state, |hashes| -> Result <( ) > {
441- let cmp = make_comparator( in_array, in_array, SortOptions :: default ( ) ) ?;
442-
443- let insert_value = |idx| {
444- let hash = hashes[ idx] ;
445- if let RawEntryMut :: Vacant ( v) = map
446- . raw_entry_mut( )
447- . from_hash( hash, |x| cmp( * x, idx) . is_eq( ) )
448- {
449- v. insert_with_hasher( hash, idx, ( ) , |x| hashes[ * x] ) ;
450- }
451- } ;
452-
453- match in_array. nulls( ) {
454- Some ( nulls) => {
455- BitIndexIterator :: new( nulls. validity( ) , nulls. offset( ) , nulls. len( ) )
456- . for_each( insert_value)
457- }
458- None => ( 0 ..in_array. len( ) ) . for_each( insert_value) ,
459- }
460-
461- Ok ( ( ) )
462- } ) ?;
463-
464- Ok ( Self {
465- in_array: in_array_clone,
466- state,
467- map,
468- null_count,
469- } )
470- }
471- }
472-
473- impl StaticFilter for $name {
474- fn null_count( & self ) -> usize {
475- self . null_count
476- }
477-
478- fn contains( & self , v: & dyn Array , negated: bool ) -> Result <BooleanArray > {
479- // Handle dictionary arrays by recursing on the values
480- downcast_dictionary_array! {
481- v => {
482- let values_contains = self . contains( v. values( ) . as_ref( ) , negated) ?;
483- let result = take( & values_contains, v. keys( ) , None ) ?;
484- return Ok ( downcast_array( result. as_ref( ) ) )
485- }
486- _ => { }
487- }
488-
489- let haystack_has_nulls = self . null_count > 0 ;
490-
491- // Use hash-based lookup with verification
492- with_hashes( [ v] , & self . state, |hashes| {
493- let cmp = make_comparator( v, & self . in_array, SortOptions :: default ( ) ) ?;
494-
495- Ok ( BooleanArray :: from_iter( ( 0 ..v. len( ) ) . map( |i| {
496- if v. is_null( i) {
497- return None ;
498- }
499-
500- let hash = hashes[ i] ;
501- let contains = self
502- . map
503- . raw_entry( )
504- . from_hash( hash, |idx| cmp( i, * idx) . is_eq( ) )
505- . is_some( ) ;
506-
507- match contains {
508- true => Some ( !negated) ,
509- false if haystack_has_nulls => None ,
510- false => Some ( negated) ,
511- }
512- } ) ) )
513- } )
514- }
515- }
516- } ;
517- }
518-
519- // String static filters
520- define_hash_based_static_filter ! ( Utf8StaticFilter , |arr| arr. as_string_opt:: <i32 >( ) ) ;
521- define_hash_based_static_filter ! ( LargeUtf8StaticFilter , |arr| arr. as_string_opt:: <i64 >( ) ) ;
522- define_hash_based_static_filter ! ( Utf8ViewStaticFilter , |arr| arr. as_string_view_opt( ) ) ;
523-
524- // Binary static filters
525- define_hash_based_static_filter ! ( BinaryStaticFilter , |arr| arr. as_binary_opt:: <i32 >( ) ) ;
526- define_hash_based_static_filter ! ( LargeBinaryStaticFilter , |arr| arr. as_binary_opt:: <i64 >( ) ) ;
527- define_hash_based_static_filter ! ( BinaryViewStaticFilter , |arr| arr. as_binary_view_opt( ) ) ;
528-
529412/// Evaluates the list of expressions into an array, flattening any dictionaries
530413fn evaluate_list (
531414 list : & [ Arc < dyn PhysicalExpr > ] ,
0 commit comments