Use as_chunks in analyze_source_file_sse2

real-eren · real-eren · commit 7058609ed1c5 · 2025-02-22T21:56:22.000-05:00
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs
@@ -69,21 +69,18 @@ cfg_match! {
 
             const CHUNK_SIZE: usize = 16;
 
-            let src_bytes = src.as_bytes();
-
-            let chunk_count = src.len() / CHUNK_SIZE;
+            let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
 
             // This variable keeps track of where we should start decoding a
             // chunk. If a multi-byte character spans across chunk boundaries,
             // we need to skip that part in the next chunk because we already
             // handled it.
             let mut intra_chunk_offset = 0;
 
-            for chunk_index in 0..chunk_count {
-                let ptr = src_bytes.as_ptr() as *const __m128i;
+            for (chunk_index, chunk) in chunks.iter().enumerate() {
                 // We don't know if the pointer is aligned to 16 bytes, so we
                 // use `loadu`, which supports unaligned loading.
-                let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
+                let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
 
                 // For character in the chunk, see if its byte value is < 0, which
                 // indicates that it's part of a UTF-8 char.
@@ -124,7 +121,7 @@ cfg_match! {
             }
 
             // There might still be a tail left to analyze
-            let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
+            let tail_start = src.len() - tail.len() + intra_chunk_offset;
             if tail_start < src.len() {
                 analyze_source_file_generic(
                     &src[tail_start..],
@@ -194,21 +191,18 @@ cfg_match! {
 
             const CHUNK_SIZE: usize = 16;
 
-            let src_bytes = src.as_bytes();
-
-            let chunk_count = src.len() / CHUNK_SIZE;
+            let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
 
             // This variable keeps track of where we should start decoding a
             // chunk. If a multi-byte character spans across chunk boundaries,
             // we need to skip that part in the next chunk because we already
             // handled it.
             let mut intra_chunk_offset = 0;
 
-            for chunk_index in 0..chunk_count {
-                let ptr = src_bytes.as_ptr() as *const __m128i;
+            for (chunk_index, chunk) in chunks.iter().enumerate() {
                 // We don't know if the pointer is aligned to 16 bytes, so we
                 // use `loadu`, which supports unaligned loading.
-                let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
+                let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
 
                 // For character in the chunk, see if its byte value is < 0, which
                 // indicates that it's part of a UTF-8 char.
@@ -249,7 +243,7 @@ cfg_match! {
             }
 
             // There might still be a tail left to analyze
-            let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
+            let tail_start = src.len() - tail.len() + intra_chunk_offset;
             if tail_start < src.len() {
                 analyze_source_file_generic(
                     &src[tail_start..],
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
@@ -31,6 +31,7 @@
 #![feature(round_char_boundary)]
 #![feature(rustc_attrs)]
 #![feature(rustdoc_internals)]
+#![feature(slice_as_chunks)]
 #![warn(unreachable_pub)]
 // tidy-alphabetical-end