Skip to content

Commit 83e13eb

Browse files
committed
Pass large array parametes (keys) by reference.
1 parent cfedf53 commit 83e13eb

File tree

5 files changed

+25
-28
lines changed

5 files changed

+25
-28
lines changed

src/algorithm.rs

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ where
8080
// Create initial CRC state
8181
let mut crc_state = W::create_state(state, params.refin, ops);
8282

83+
// Extract keys once and pass by reference to avoid repeated stack copies
84+
let keys = extract_keys_array(params);
85+
8386
// Process data differently based on length
8487
// On ARM M4 Max, ARM c8g, x86 c7a, and x86 c7i, using 128 bytes is a measurably faster
8588
// threshold than 256 bytes...
@@ -91,19 +94,13 @@ where
9194
bytes,
9295
&mut crc_state,
9396
reflector,
94-
extract_keys_array(params),
97+
&keys,
9598
ops,
9699
);
97100
}
98101

99102
// Process large inputs with SIMD-optimized approach
100-
process_large_aligned::<T, W>(
101-
bytes,
102-
&mut crc_state,
103-
reflector,
104-
extract_keys_array(params),
105-
ops,
106-
)
103+
process_large_aligned::<T, W>(bytes, &mut crc_state, reflector, &keys, ops)
107104
}
108105

109106
/// Process data with the selected strategy
@@ -118,7 +115,7 @@ unsafe fn process_by_strategy<T: ArchOps, W: EnhancedCrcWidth>(
118115
data: &[u8],
119116
state: &mut CrcState<T::Vector>,
120117
reflector: Reflector<T::Vector>,
121-
keys: [u64; 23],
118+
keys: &[u64; 23],
122119
ops: &T,
123120
) -> W::Value
124121
where
@@ -153,7 +150,7 @@ unsafe fn process_large_aligned<T: ArchOps, W: EnhancedCrcWidth>(
153150
bytes: &[u8],
154151
state: &mut CrcState<T::Vector>,
155152
reflector: Reflector<T::Vector>,
156-
keys: [u64; 23],
153+
keys: &[u64; 23],
157154
ops: &T,
158155
) -> W::Value
159156
where
@@ -174,7 +171,7 @@ where
174171

175172
// try to use the enhanced SIMD implementation first, fall back to non-enhanced if necessary
176173
if rest.is_empty()
177-
|| !ops.process_enhanced_simd_blocks::<W>(state, first, rest, &reflector, keys)
174+
|| !ops.process_enhanced_simd_blocks::<W>(state, first, rest, &reflector, *keys)
178175
{
179176
process_simd_chunks::<T, W>(state, first, rest, &reflector, keys, ops);
180177
}
@@ -207,7 +204,7 @@ unsafe fn process_simd_chunks<T: ArchOps, W: EnhancedCrcWidth>(
207204
first: &[T::Vector; 8],
208205
rest: &[[T::Vector; 8]],
209206
reflector: &Reflector<T::Vector>,
210-
keys: [u64; 23],
207+
keys: &[u64; 23],
211208
ops: &T,
212209
) where
213210
T::Vector: Copy,
@@ -271,7 +268,7 @@ unsafe fn process_simd_chunks<T: ArchOps, W: EnhancedCrcWidth>(
271268
}
272269

273270
// Perform final reduction and update state
274-
let final_value = W::perform_final_reduction(res, state.reflected, keys, ops);
271+
let final_value = W::perform_final_reduction(res, state.reflected, *keys, ops);
275272
*state = W::create_state(final_value, state.reflected, ops);
276273
}
277274

@@ -286,7 +283,7 @@ unsafe fn process_exactly_16<T: ArchOps, W: EnhancedCrcWidth>(
286283
data: &[u8],
287284
state: &mut CrcState<T::Vector>,
288285
reflector: &Reflector<T::Vector>,
289-
keys: [u64; 23],
286+
keys: &[u64; 23],
290287
ops: &T,
291288
) -> W::Value
292289
where
@@ -296,7 +293,7 @@ where
296293
W::perform_final_reduction(
297294
process_16_byte_block(data.as_ptr(), state.value, reflector, ops),
298295
state.reflected,
299-
keys,
296+
*keys,
300297
ops,
301298
)
302299
}
@@ -386,7 +383,7 @@ unsafe fn process_17_to_31<T: ArchOps, W: EnhancedCrcWidth>(
386383
data: &[u8],
387384
state: &mut CrcState<T::Vector>,
388385
reflector: &Reflector<T::Vector>,
389-
keys: [u64; 23],
386+
keys: &[u64; 23],
390387
ops: &T,
391388
) -> W::Value
392389
where
@@ -413,7 +410,7 @@ where
413410
);
414411

415412
// Perform final reduction
416-
W::perform_final_reduction(final_xmm7, state.reflected, keys, ops)
413+
W::perform_final_reduction(final_xmm7, state.reflected, *keys, ops)
417414
}
418415

419416
// Process inputs between 32 and 255 bytes
@@ -428,7 +425,7 @@ unsafe fn process_32_to_255<T: ArchOps, W: EnhancedCrcWidth>(
428425
data: &[u8],
429426
state: &mut CrcState<T::Vector>,
430427
reflector: &Reflector<T::Vector>,
431-
keys: [u64; 23],
428+
keys: &[u64; 23],
432429
ops: &T,
433430
) -> W::Value
434431
where
@@ -478,7 +475,7 @@ where
478475
}
479476

480477
// Perform final reduction
481-
W::perform_final_reduction(xmm7, state.reflected, keys, ops)
478+
W::perform_final_reduction(xmm7, state.reflected, *keys, ops)
482479
}
483480

484481
/// Data region descriptor for overlapping SIMD reads in CRC processing
@@ -512,7 +509,7 @@ struct DataRegion<'a> {
512509
unsafe fn get_last_two_xmms<T: ArchOps, W: EnhancedCrcWidth>(
513510
region: DataRegion,
514511
current_state: T::Vector,
515-
keys: [u64; 23],
512+
keys: &[u64; 23],
516513
reflector: &Reflector<T::Vector>,
517514
reflected: bool,
518515
ops: &T,

src/combine.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ fn gf2_matrix_square(square: &mut [u64; 64], mat: &[u64; 64]) {
8484
first sequence of bytes, crc2 is the CRC of the immediately following
8585
sequence of bytes, and len2 is the length of the second sequence. The CRC
8686
of the combined sequence is returned. */
87-
pub fn checksums(mut crc1: u64, crc2: u64, mut len2: u64, params: CrcParams) -> u64 {
87+
pub fn checksums(mut crc1: u64, crc2: u64, mut len2: u64, params: &CrcParams) -> u64 {
8888
let mut col: u64;
8989
let mut even = [0u64; 64]; /* even-power-of-two zeros operator */
9090
let mut odd = [0u64; 64]; /* odd-power-of-two zeros operator */

src/crc32/algorithm.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ pub(crate) unsafe fn process_0_to_15<T: ArchOps, W: EnhancedCrcWidth>(
237237
data: &[u8],
238238
state: &mut CrcState<T::Vector>,
239239
reflector: &Reflector<T::Vector>,
240-
keys: [u64; 23],
240+
keys: &[u64; 23],
241241
ops: &T,
242242
) -> W::Value
243243
where
@@ -321,7 +321,7 @@ where
321321
};
322322

323323
if len >= 4 {
324-
return W::perform_final_reduction(xmm7, state.reflected, keys, ops);
324+
return W::perform_final_reduction(xmm7, state.reflected, *keys, ops);
325325
}
326326

327327
let final_state = CrcState {

src/crc64/algorithm.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ pub(crate) unsafe fn process_0_to_15<T: ArchOps, W: EnhancedCrcWidth>(
213213
data: &[u8],
214214
state: &mut CrcState<T::Vector>,
215215
reflector: &Reflector<T::Vector>,
216-
keys: [u64; 23],
216+
keys: &[u64; 23],
217217
ops: &T,
218218
) -> W::Value
219219
where
@@ -285,7 +285,7 @@ where
285285

286286
if len >= CRC_HALF_CHUNK_SIZE as i32 {
287287
// For 8-15 bytes, perform additional folding
288-
return W::perform_final_reduction(xmm7, state.reflected, keys, ops);
288+
return W::perform_final_reduction(xmm7, state.reflected, *keys, ops);
289289
}
290290

291291
let final_state = CrcState {

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ impl Digest {
592592
self.state ^ self.params.xorout,
593593
other_crc,
594594
other.amount,
595-
self.params,
595+
&self.params,
596596
) ^ self.params.xorout;
597597
}
598598

@@ -893,7 +893,7 @@ pub fn checksum_combine(
893893
) -> u64 {
894894
let params = get_calculator_params(algorithm).1;
895895

896-
combine::checksums(checksum1, checksum2, checksum2_len, params)
896+
combine::checksums(checksum1, checksum2, checksum2_len, &params)
897897
}
898898

899899
/// Combines two CRC checksums using custom CRC parameters.
@@ -926,7 +926,7 @@ pub fn checksum_combine_with_params(
926926
checksum2: u64,
927927
checksum2_len: u64,
928928
) -> u64 {
929-
combine::checksums(checksum1, checksum2, checksum2_len, params)
929+
combine::checksums(checksum1, checksum2, checksum2_len, &params)
930930
}
931931

932932
/// Returns the target used to calculate the CRC checksum for the specified algorithm.

0 commit comments

Comments
 (0)