1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use arrow:: array:: {
19- Array , ArrayRef , Float32Array , Int32Array , StringArray , StringViewArray ,
20- } ;
18+ use arrow:: array:: { Array , ArrayRef , Float32Array , Int32Array , StringArray } ;
2119use arrow:: datatypes:: { Field , Schema } ;
2220use arrow:: record_batch:: RecordBatch ;
2321use criterion:: { criterion_group, criterion_main, Criterion } ;
2422use datafusion_common:: ScalarValue ;
2523use datafusion_physical_expr:: expressions:: { col, in_list, lit} ;
2624use rand:: distr:: Alphanumeric ;
2725use rand:: prelude:: * ;
28- use std:: any:: TypeId ;
2926use std:: hint:: black_box;
3027use std:: sync:: Arc ;
3128
32- /// Measures how long `in_list(col("a"), exprs)` takes to evaluate against a single RecordBatch.
3329fn do_bench ( c : & mut Criterion , name : & str , values : ArrayRef , exprs : & [ ScalarValue ] ) {
3430 let schema = Schema :: new ( vec ! [ Field :: new( "a" , values. data_type( ) . clone( ) , true ) ] ) ;
3531 let exprs = exprs. iter ( ) . map ( |s| lit ( s. clone ( ) ) ) . collect ( ) ;
@@ -41,128 +37,77 @@ fn do_bench(c: &mut Criterion, name: &str, values: ArrayRef, exprs: &[ScalarValu
4137 } ) ;
4238}
4339
44- /// Generates a random alphanumeric string of the specified length.
4540fn random_string ( rng : & mut StdRng , len : usize ) -> String {
4641 let value = rng. sample_iter ( & Alphanumeric ) . take ( len) . collect ( ) ;
4742 String :: from_utf8 ( value) . unwrap ( )
4843}
4944
50- const IN_LIST_LENGTHS : [ usize ; 3 ] = [ 3 , 8 , 100 ] ;
51- const NULL_PERCENTS : [ f64 ; 2 ] = [ 0. , 0.2 ] ;
52- const STRING_LENGTHS : [ usize ; 3 ] = [ 3 , 12 , 100 ] ;
53- const ARRAY_LENGTH : usize = 1024 ;
54-
55- /// Returns a friendly type name for the array type.
56- fn array_type_name < A : ' static > ( ) -> & ' static str {
57- let id = TypeId :: of :: < A > ( ) ;
58- if id == TypeId :: of :: < StringArray > ( ) {
59- "Utf8"
60- } else if id == TypeId :: of :: < StringViewArray > ( ) {
61- "Utf8View"
62- } else if id == TypeId :: of :: < Float32Array > ( ) {
63- "Float32"
64- } else if id == TypeId :: of :: < Int32Array > ( ) {
65- "Int32"
66- } else {
67- "Unknown"
45+ fn do_benches (
46+ c : & mut Criterion ,
47+ array_length : usize ,
48+ in_list_length : usize ,
49+ null_percent : f64 ,
50+ ) {
51+ let mut rng = StdRng :: seed_from_u64 ( 120320 ) ;
52+ for string_length in [ 5 , 10 , 20 ] {
53+ let values: StringArray = ( 0 ..array_length)
54+ . map ( |_| {
55+ rng. random_bool ( null_percent)
56+ . then ( || random_string ( & mut rng, string_length) )
57+ } )
58+ . collect ( ) ;
59+
60+ let in_list: Vec < _ > = ( 0 ..in_list_length)
61+ . map ( |_| ScalarValue :: from ( random_string ( & mut rng, string_length) ) )
62+ . collect ( ) ;
63+
64+ do_bench (
65+ c,
66+ & format ! (
67+ "in_list_utf8({string_length}) ({array_length}, {null_percent}) IN ({in_list_length}, 0)"
68+ ) ,
69+ Arc :: new ( values) ,
70+ & in_list,
71+ )
6872 }
69- }
70-
71- /// Builds a benchmark name from array type, list size, and null percentage.
72- fn bench_name < A : ' static > ( in_list_length : usize , null_percent : f64 ) -> String {
73- format ! (
74- "in_list/{}/list={in_list_length}/nulls={}%" ,
75- array_type_name:: <A >( ) ,
76- ( null_percent * 100.0 ) as u32
77- )
78- }
7973
80- /// Runs in_list benchmarks for a string array type across all list-size × null-ratio × string-length combinations.
81- fn bench_string_type < A > (
82- c : & mut Criterion ,
83- rng : & mut StdRng ,
84- make_scalar : fn ( String ) -> ScalarValue ,
85- ) where
86- A : Array + FromIterator < Option < String > > + ' static ,
87- {
88- for in_list_length in IN_LIST_LENGTHS {
89- for null_percent in NULL_PERCENTS {
90- for string_length in STRING_LENGTHS {
91- let values: A = ( 0 ..ARRAY_LENGTH )
92- . map ( |_| {
93- rng. random_bool ( 1.0 - null_percent)
94- . then ( || random_string ( rng, string_length) )
95- } )
96- . collect ( ) ;
74+ let values: Float32Array = ( 0 ..array_length)
75+ . map ( |_| rng. random_bool ( null_percent) . then ( || rng. random ( ) ) )
76+ . collect ( ) ;
9777
98- let in_list: Vec < _ > = ( 0 ..in_list_length)
99- . map ( |_| make_scalar ( random_string ( rng, string_length ) ) )
100- . collect ( ) ;
78+ let in_list: Vec < _ > = ( 0 ..in_list_length)
79+ . map ( |_| ScalarValue :: Float32 ( Some ( rng. random ( ) ) ) )
80+ . collect ( ) ;
10181
102- do_bench (
103- c,
104- & format ! (
105- "{}/str={string_length}" ,
106- bench_name:: <A >( in_list_length, null_percent)
107- ) ,
108- Arc :: new ( values) ,
109- & in_list,
110- )
111- }
112- }
113- }
114- }
82+ do_bench (
83+ c,
84+ & format ! ( "in_list_f32 ({array_length}, {null_percent}) IN ({in_list_length}, 0)" ) ,
85+ Arc :: new ( values) ,
86+ & in_list,
87+ ) ;
11588
116- /// Runs in_list benchmarks for a numeric array type across all list-size × null-ratio combinations.
117- fn bench_numeric_type < T , A > (
118- c : & mut Criterion ,
119- rng : & mut StdRng ,
120- mut gen_value : impl FnMut ( & mut StdRng ) -> T ,
121- make_scalar : fn ( T ) -> ScalarValue ,
122- ) where
123- A : Array + FromIterator < Option < T > > + ' static ,
124- {
125- for in_list_length in IN_LIST_LENGTHS {
126- for null_percent in NULL_PERCENTS {
127- let values: A = ( 0 ..ARRAY_LENGTH )
128- . map ( |_| rng. random_bool ( 1.0 - null_percent) . then ( || gen_value ( rng) ) )
129- . collect ( ) ;
89+ let values: Int32Array = ( 0 ..array_length)
90+ . map ( |_| rng. random_bool ( null_percent) . then ( || rng. random ( ) ) )
91+ . collect ( ) ;
13092
131- let in_list: Vec < _ > = ( 0 ..in_list_length)
132- . map ( |_| make_scalar ( gen_value ( rng) ) )
133- . collect ( ) ;
93+ let in_list: Vec < _ > = ( 0 ..in_list_length)
94+ . map ( |_| ScalarValue :: Int32 ( Some ( rng. random ( ) ) ) )
95+ . collect ( ) ;
13496
135- do_bench (
136- c,
137- & bench_name :: < A > ( in_list_length, null_percent) ,
138- Arc :: new ( values) ,
139- & in_list,
140- ) ;
141- }
142- }
97+ do_bench (
98+ c,
99+ & format ! ( "in_list_i32 ({array_length}, {null_percent}) IN ({in_list_length}, 0)" ) ,
100+ Arc :: new ( values) ,
101+ & in_list,
102+ )
143103}
144104
145- /// Entry point: registers in_list benchmarks for Utf8, Utf8View, Float32, and Int32 arrays.
146105fn criterion_benchmark ( c : & mut Criterion ) {
147- let mut rng = StdRng :: seed_from_u64 ( 120320 ) ;
148-
149- // Benchmarks for string array types (Utf8, Utf8View)
150- bench_string_type :: < StringArray > ( c, & mut rng, |s| ScalarValue :: Utf8 ( Some ( s) ) ) ;
151- bench_string_type :: < StringViewArray > ( c, & mut rng, |s| ScalarValue :: Utf8View ( Some ( s) ) ) ;
152-
153- // Benchmarks for numeric types
154- bench_numeric_type :: < f32 , Float32Array > (
155- c,
156- & mut rng,
157- |rng| rng. random ( ) ,
158- |v| ScalarValue :: Float32 ( Some ( v) ) ,
159- ) ;
160- bench_numeric_type :: < i32 , Int32Array > (
161- c,
162- & mut rng,
163- |rng| rng. random ( ) ,
164- |v| ScalarValue :: Int32 ( Some ( v) ) ,
165- ) ;
106+ for in_list_length in [ 1 , 3 , 10 , 100 ] {
107+ for null_percent in [ 0. , 0.2 ] {
108+ do_benches ( c, 1024 , in_list_length, null_percent)
109+ }
110+ }
166111}
167112
168113criterion_group ! ( benches, criterion_benchmark) ;
0 commit comments