8080 // Create initial CRC state
8181 let mut crc_state = W :: create_state ( state, params. refin , ops) ;
8282
83+ // Extract keys once and pass by reference to avoid repeated stack copies
84+ let keys = extract_keys_array ( params) ;
85+
8386 // Process data differently based on length
8487 // On ARM M4 Max, ARM c8g, x86 c7a, and x86 c7i, using 128 bytes is a measurably faster
8588 // threshold than 256 bytes...
@@ -91,19 +94,13 @@ where
9194 bytes,
9295 & mut crc_state,
9396 reflector,
94- extract_keys_array ( params ) ,
97+ & keys ,
9598 ops,
9699 ) ;
97100 }
98101
99102 // Process large inputs with SIMD-optimized approach
100- process_large_aligned :: < T , W > (
101- bytes,
102- & mut crc_state,
103- reflector,
104- extract_keys_array ( params) ,
105- ops,
106- )
103+ process_large_aligned :: < T , W > ( bytes, & mut crc_state, reflector, & keys, ops)
107104}
108105
109106/// Process data with the selected strategy
@@ -118,7 +115,7 @@ unsafe fn process_by_strategy<T: ArchOps, W: EnhancedCrcWidth>(
118115 data : & [ u8 ] ,
119116 state : & mut CrcState < T :: Vector > ,
120117 reflector : Reflector < T :: Vector > ,
121- keys : [ u64 ; 23 ] ,
118+ keys : & [ u64 ; 23 ] ,
122119 ops : & T ,
123120) -> W :: Value
124121where
@@ -153,7 +150,7 @@ unsafe fn process_large_aligned<T: ArchOps, W: EnhancedCrcWidth>(
153150 bytes : & [ u8 ] ,
154151 state : & mut CrcState < T :: Vector > ,
155152 reflector : Reflector < T :: Vector > ,
156- keys : [ u64 ; 23 ] ,
153+ keys : & [ u64 ; 23 ] ,
157154 ops : & T ,
158155) -> W :: Value
159156where
@@ -174,7 +171,7 @@ where
174171
175172 // try to use the enhanced SIMD implementation first, fall back to non-enhanced if necessary
176173 if rest. is_empty ( )
177- || !ops. process_enhanced_simd_blocks :: < W > ( state, first, rest, & reflector, keys)
174+ || !ops. process_enhanced_simd_blocks :: < W > ( state, first, rest, & reflector, * keys)
178175 {
179176 process_simd_chunks :: < T , W > ( state, first, rest, & reflector, keys, ops) ;
180177 }
@@ -207,7 +204,7 @@ unsafe fn process_simd_chunks<T: ArchOps, W: EnhancedCrcWidth>(
207204 first : & [ T :: Vector ; 8 ] ,
208205 rest : & [ [ T :: Vector ; 8 ] ] ,
209206 reflector : & Reflector < T :: Vector > ,
210- keys : [ u64 ; 23 ] ,
207+ keys : & [ u64 ; 23 ] ,
211208 ops : & T ,
212209) where
213210 T :: Vector : Copy ,
@@ -271,7 +268,7 @@ unsafe fn process_simd_chunks<T: ArchOps, W: EnhancedCrcWidth>(
271268 }
272269
273270 // Perform final reduction and update state
274- let final_value = W :: perform_final_reduction ( res, state. reflected , keys, ops) ;
271+ let final_value = W :: perform_final_reduction ( res, state. reflected , * keys, ops) ;
275272 * state = W :: create_state ( final_value, state. reflected , ops) ;
276273}
277274
@@ -286,7 +283,7 @@ unsafe fn process_exactly_16<T: ArchOps, W: EnhancedCrcWidth>(
286283 data : & [ u8 ] ,
287284 state : & mut CrcState < T :: Vector > ,
288285 reflector : & Reflector < T :: Vector > ,
289- keys : [ u64 ; 23 ] ,
286+ keys : & [ u64 ; 23 ] ,
290287 ops : & T ,
291288) -> W :: Value
292289where
@@ -296,7 +293,7 @@ where
296293 W :: perform_final_reduction (
297294 process_16_byte_block ( data. as_ptr ( ) , state. value , reflector, ops) ,
298295 state. reflected ,
299- keys,
296+ * keys,
300297 ops,
301298 )
302299}
@@ -386,7 +383,7 @@ unsafe fn process_17_to_31<T: ArchOps, W: EnhancedCrcWidth>(
386383 data : & [ u8 ] ,
387384 state : & mut CrcState < T :: Vector > ,
388385 reflector : & Reflector < T :: Vector > ,
389- keys : [ u64 ; 23 ] ,
386+ keys : & [ u64 ; 23 ] ,
390387 ops : & T ,
391388) -> W :: Value
392389where
@@ -413,7 +410,7 @@ where
413410 ) ;
414411
415412 // Perform final reduction
416- W :: perform_final_reduction ( final_xmm7, state. reflected , keys, ops)
413+ W :: perform_final_reduction ( final_xmm7, state. reflected , * keys, ops)
417414}
418415
419416// Process inputs between 32 and 255 bytes
@@ -428,7 +425,7 @@ unsafe fn process_32_to_255<T: ArchOps, W: EnhancedCrcWidth>(
428425 data : & [ u8 ] ,
429426 state : & mut CrcState < T :: Vector > ,
430427 reflector : & Reflector < T :: Vector > ,
431- keys : [ u64 ; 23 ] ,
428+ keys : & [ u64 ; 23 ] ,
432429 ops : & T ,
433430) -> W :: Value
434431where
@@ -478,7 +475,7 @@ where
478475 }
479476
480477 // Perform final reduction
481- W :: perform_final_reduction ( xmm7, state. reflected , keys, ops)
478+ W :: perform_final_reduction ( xmm7, state. reflected , * keys, ops)
482479}
483480
484481/// Data region descriptor for overlapping SIMD reads in CRC processing
@@ -512,7 +509,7 @@ struct DataRegion<'a> {
512509unsafe fn get_last_two_xmms < T : ArchOps , W : EnhancedCrcWidth > (
513510 region : DataRegion ,
514511 current_state : T :: Vector ,
515- keys : [ u64 ; 23 ] ,
512+ keys : & [ u64 ; 23 ] ,
516513 reflector : & Reflector < T :: Vector > ,
517514 reflected : bool ,
518515 ops : & T ,
0 commit comments