1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use arrow:: array:: { Array , ArrayRef , Float32Array , Int32Array , StringArray } ;
18+ use arrow:: array:: {
19+ Array , ArrayRef , Float32Array , Int32Array , StringArray , StringViewArray ,
20+ } ;
1921use arrow:: datatypes:: { Field , Schema } ;
2022use arrow:: record_batch:: RecordBatch ;
2123use criterion:: { criterion_group, criterion_main, Criterion } ;
2224use datafusion_common:: ScalarValue ;
2325use datafusion_physical_expr:: expressions:: { col, in_list, lit} ;
2426use rand:: distr:: Alphanumeric ;
2527use rand:: prelude:: * ;
28+ use std:: any:: TypeId ;
2629use std:: hint:: black_box;
2730use std:: sync:: Arc ;
2831
32+ /// Measures how long `in_list(col("a"), exprs)` takes to evaluate against a single RecordBatch.
2933fn do_bench ( c : & mut Criterion , name : & str , values : ArrayRef , exprs : & [ ScalarValue ] ) {
3034 let schema = Schema :: new ( vec ! [ Field :: new( "a" , values. data_type( ) . clone( ) , true ) ] ) ;
3135 let exprs = exprs. iter ( ) . map ( |s| lit ( s. clone ( ) ) ) . collect ( ) ;
@@ -37,78 +41,129 @@ fn do_bench(c: &mut Criterion, name: &str, values: ArrayRef, exprs: &[ScalarValu
3741 } ) ;
3842}
3943
44+ /// Generates a random alphanumeric string of the specified length.
4045fn random_string ( rng : & mut StdRng , len : usize ) -> String {
4146 let value = rng. sample_iter ( & Alphanumeric ) . take ( len) . collect ( ) ;
4247 String :: from_utf8 ( value) . unwrap ( )
4348}
4449
45- fn do_benches (
46- c : & mut Criterion ,
47- array_length : usize ,
48- in_list_length : usize ,
49- null_percent : f64 ,
50- ) {
51- let mut rng = StdRng :: seed_from_u64 ( 120320 ) ;
52- for string_length in [ 5 , 10 , 20 ] {
53- let values: StringArray = ( 0 ..array_length)
54- . map ( |_| {
55- rng. random_bool ( null_percent)
56- . then ( || random_string ( & mut rng, string_length) )
57- } )
58- . collect ( ) ;
59-
60- let in_list: Vec < _ > = ( 0 ..in_list_length)
61- . map ( |_| ScalarValue :: from ( random_string ( & mut rng, string_length) ) )
62- . collect ( ) ;
63-
64- do_bench (
65- c,
66- & format ! (
67- "in_list_utf8({string_length}) ({array_length}, {null_percent}) IN ({in_list_length}, 0)"
68- ) ,
69- Arc :: new ( values) ,
70- & in_list,
71- )
50+ const IN_LIST_LENGTHS : [ usize ; 3 ] = [ 3 , 8 , 100 ] ;
51+ const NULL_PERCENTS : [ f64 ; 2 ] = [ 0. , 0.2 ] ;
52+ const STRING_LENGTHS : [ usize ; 3 ] = [ 3 , 12 , 100 ] ;
53+ const ARRAY_LENGTH : usize = 1024 ;
54+
55+ /// Returns a friendly type name for the array type.
56+ fn array_type_name < A : ' static > ( ) -> & ' static str {
57+ let id = TypeId :: of :: < A > ( ) ;
58+ if id == TypeId :: of :: < StringArray > ( ) {
59+ "Utf8"
60+ } else if id == TypeId :: of :: < StringViewArray > ( ) {
61+ "Utf8View"
62+ } else if id == TypeId :: of :: < Float32Array > ( ) {
63+ "Float32"
64+ } else if id == TypeId :: of :: < Int32Array > ( ) {
65+ "Int32"
66+ } else {
67+ "Unknown"
7268 }
69+ }
7370
74- let values: Float32Array = ( 0 ..array_length)
75- . map ( |_| rng. random_bool ( null_percent) . then ( || rng. random ( ) ) )
76- . collect ( ) ;
71+ /// Builds a benchmark name from array type, list size, and null percentage.
72+ fn bench_name < A : ' static > ( in_list_length : usize , null_percent : f64 ) -> String {
73+ format ! (
74+ "in_list/{}/list={in_list_length}/nulls={}%" ,
75+ array_type_name:: <A >( ) ,
76+ ( null_percent * 100.0 ) as u32
77+ )
78+ }
7779
78- let in_list: Vec < _ > = ( 0 ..in_list_length)
79- . map ( |_| ScalarValue :: Float32 ( Some ( rng. random ( ) ) ) )
80- . collect ( ) ;
80+ /// Runs in_list benchmarks for a string array type across all list-size × null-ratio × string-length combinations.
81+ fn bench_string_type < A > (
82+ c : & mut Criterion ,
83+ rng : & mut StdRng ,
84+ make_scalar : fn ( String ) -> ScalarValue ,
85+ ) where
86+ A : Array + FromIterator < Option < String > > + ' static ,
87+ {
88+ for in_list_length in IN_LIST_LENGTHS {
89+ for null_percent in NULL_PERCENTS {
90+ for string_length in STRING_LENGTHS {
91+ let values: A = ( 0 ..ARRAY_LENGTH )
92+ . map ( |_| {
93+ rng. random_bool ( 1.0 - null_percent)
94+ . then ( || random_string ( rng, string_length) )
95+ } )
96+ . collect ( ) ;
8197
82- do_bench (
83- c,
84- & format ! ( "in_list_f32 ({array_length}, {null_percent}) IN ({in_list_length}, 0)" ) ,
85- Arc :: new ( values) ,
86- & in_list,
87- ) ;
98+ let in_list: Vec < _ > = ( 0 ..in_list_length)
99+ . map ( |_| make_scalar ( random_string ( rng, string_length) ) )
100+ . collect ( ) ;
88101
89- let values: Int32Array = ( 0 ..array_length)
90- . map ( |_| rng. random_bool ( null_percent) . then ( || rng. random ( ) ) )
91- . collect ( ) ;
102+ do_bench (
103+ c,
104+ & format ! (
105+ "{}/str={string_length}" ,
106+ bench_name:: <A >( in_list_length, null_percent)
107+ ) ,
108+ Arc :: new ( values) ,
109+ & in_list,
110+ )
111+ }
112+ }
113+ }
114+ }
92115
93- let in_list: Vec < _ > = ( 0 ..in_list_length)
94- . map ( |_| ScalarValue :: Int32 ( Some ( rng. random ( ) ) ) )
95- . collect ( ) ;
116+ /// Runs in_list benchmarks for a numeric array type across all list-size × null-ratio combinations.
117+ fn bench_numeric_type < T , A > (
118+ c : & mut Criterion ,
119+ rng : & mut StdRng ,
120+ mut gen_value : impl FnMut ( & mut StdRng ) -> T ,
121+ make_scalar : fn ( T ) -> ScalarValue ,
122+ ) where
123+ A : Array + FromIterator < Option < T > > + ' static ,
124+ {
125+ for in_list_length in IN_LIST_LENGTHS {
126+ for null_percent in NULL_PERCENTS {
127+ let values: A = ( 0 ..ARRAY_LENGTH )
128+ . map ( |_| rng. random_bool ( 1.0 - null_percent) . then ( || gen_value ( rng) ) )
129+ . collect ( ) ;
96130
97- do_bench (
98- c,
99- & format ! ( "in_list_i32 ({array_length}, {null_percent}) IN ({in_list_length}, 0)" ) ,
100- Arc :: new ( values) ,
101- & in_list,
102- )
103- }
131+ let in_list: Vec < _ > = ( 0 ..in_list_length)
132+ . map ( |_| make_scalar ( gen_value ( rng) ) )
133+ . collect ( ) ;
104134
105- fn criterion_benchmark ( c : & mut Criterion ) {
106- for in_list_length in [ 1 , 3 , 10 , 100 ] {
107- for null_percent in [ 0. , 0.2 ] {
108- do_benches ( c, 1024 , in_list_length, null_percent)
135+ do_bench (
136+ c,
137+ & bench_name :: < A > ( in_list_length, null_percent) ,
138+ Arc :: new ( values) ,
139+ & in_list,
140+ ) ;
109141 }
110142 }
111143}
112144
145+ /// Entry point: registers in_list benchmarks for Utf8, Utf8View, Float32, and Int32 arrays.
146+ fn criterion_benchmark ( c : & mut Criterion ) {
147+ let mut rng = StdRng :: seed_from_u64 ( 120320 ) ;
148+
149+ // Benchmarks for string array types (Utf8, Utf8View)
150+ bench_string_type :: < StringArray > ( c, & mut rng, |s| ScalarValue :: Utf8 ( Some ( s) ) ) ;
151+ bench_string_type :: < StringViewArray > ( c, & mut rng, |s| ScalarValue :: Utf8View ( Some ( s) ) ) ;
152+
153+ // Benchmarks for numeric types
154+ bench_numeric_type :: < f32 , Float32Array > (
155+ c,
156+ & mut rng,
157+ |rng| rng. random ( ) ,
158+ |v| ScalarValue :: Float32 ( Some ( v) ) ,
159+ ) ;
160+ bench_numeric_type :: < i32 , Int32Array > (
161+ c,
162+ & mut rng,
163+ |rng| rng. random ( ) ,
164+ |v| ScalarValue :: Int32 ( Some ( v) ) ,
165+ ) ;
166+ }
167+
113168criterion_group ! ( benches, criterion_benchmark) ;
114169criterion_main ! ( benches) ;
0 commit comments