Skip to content

Commit f5c1b9c

Browse files
authored
Optimize must_be_2_3_continuation (#115)
1 parent c046554 commit f5c1b9c

File tree

7 files changed

+36
-89
lines changed

7 files changed

+36
-89
lines changed

src/implementation/aarch64/neon.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
//! Contains the aarch64 UTF-8 validation implementation.
22
33
use core::arch::aarch64::{
4-
uint8x16_t, vandq_u8, vcgtq_u8, vdupq_n_u8, veorq_u8, vextq_u8, vld1q_u8, vmaxvq_u8,
5-
vmovq_n_u8, vorrq_u8, vqsubq_u8, vqtbl1q_u8, vshrq_n_u8,
4+
uint8x16_t, vandq_u8, vdupq_n_u8, veorq_u8, vextq_u8, vld1q_u8, vmaxvq_u8, vmovq_n_u8,
5+
vorrq_u8, vqsubq_u8, vqtbl1q_u8, vshrq_n_u8,
66
};
77

88
use crate::implementation::helpers::Utf8CheckAlgorithm;
@@ -185,11 +185,6 @@ impl SimdU8Value {
185185
Self::from(vextq_u8(prev.0, self.0, 16 - 3))
186186
}
187187

188-
#[inline]
189-
unsafe fn unsigned_gt(self, other: Self) -> Self {
190-
Self::from(vcgtq_u8(self.0, other.0))
191-
}
192-
193188
#[inline]
194189
unsafe fn any_bit_set(self) -> bool {
195190
vmaxvq_u8(self.0) != 0
@@ -211,9 +206,8 @@ impl From<uint8x16_t> for SimdU8Value {
211206
impl Utf8CheckAlgorithm<SimdU8Value> {
212207
#[inline]
213208
unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
214-
let is_third_byte = prev2.unsigned_gt(SimdU8Value::splat(0b1110_0000 - 1));
215-
let is_fourth_byte = prev3.unsigned_gt(SimdU8Value::splat(0b1111_0000 - 1));
216-
209+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
210+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
217211
is_third_byte.or(is_fourth_byte)
218212
}
219213
}

src/implementation/armv7/neon.rs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
//! Contains the aarch64 UTF-8 validation implementation.
22
33
use core::arch::arm::{
4-
uint8x16_t, uint8x8x2_t, vandq_u8, vcgtq_u8, vcombine_u8, vdupq_n_u8, veorq_u8, vextq_u8,
5-
vget_high_u8, vget_lane_u8, vget_low_u8, vld1q_u8, vmovq_n_u8, vorrq_u8, vpmax_u8, vqsubq_u8,
6-
vshrq_n_u8, vtbl2_u8,
4+
uint8x16_t, uint8x8x2_t, vandq_u8, vcombine_u8, vdupq_n_u8, veorq_u8, vextq_u8, vget_high_u8,
5+
vget_lane_u8, vget_low_u8, vld1q_u8, vmovq_n_u8, vorrq_u8, vpmax_u8, vqsubq_u8, vshrq_n_u8,
6+
vtbl2_u8,
77
};
88

99
use crate::implementation::helpers::Utf8CheckAlgorithm;
@@ -217,12 +217,6 @@ impl SimdU8Value {
217217
Self(vextq_u8(prev.0, self.0, 16 - 3))
218218
}
219219

220-
#[inline]
221-
#[target_feature(enable = "neon")]
222-
unsafe fn unsigned_gt(self, other: Self) -> Self {
223-
Self(vcgtq_u8(self.0, other.0))
224-
}
225-
226220
#[inline]
227221
#[target_feature(enable = "neon")]
228222
unsafe fn any_bit_set(self) -> bool {
@@ -247,9 +241,8 @@ impl Utf8CheckAlgorithm<SimdU8Value> {
247241
#[inline]
248242
#[target_feature(enable = "neon")]
249243
unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
250-
let is_third_byte = prev2.unsigned_gt(SimdU8Value::splat(0b1110_0000 - 1));
251-
let is_fourth_byte = prev3.unsigned_gt(SimdU8Value::splat(0b1111_0000 - 1));
252-
244+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
245+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
253246
is_third_byte.or(is_fourth_byte)
254247
}
255248
}

src/implementation/portable/simd128.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,6 @@ impl SimdU8Value {
176176
))
177177
}
178178

179-
#[inline]
180-
fn unsigned_gt(self, other: Self) -> Self {
181-
let gt = self.0.simd_gt(other.0).to_int();
182-
Self::from(gt.cast())
183-
}
184-
185179
#[inline]
186180
fn any_bit_set(self) -> bool {
187181
if HAS_FAST_REDUCE_MAX {
@@ -211,9 +205,8 @@ impl From<u8x16> for SimdU8Value {
211205
impl Utf8CheckAlgorithm<SimdU8Value> {
212206
#[inline]
213207
fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
214-
let is_third_byte = prev2.unsigned_gt(SimdU8Value::splat(0b1110_0000 - 1));
215-
let is_fourth_byte = prev3.unsigned_gt(SimdU8Value::splat(0b1111_0000 - 1));
216-
208+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
209+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
217210
is_third_byte.or(is_fourth_byte)
218211
}
219212
}

src/implementation/portable/simd256.rs

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,6 @@ impl SimdU8Value {
188188
))
189189
}
190190

191-
#[inline]
192-
fn unsigned_gt(self, other: Self) -> Self {
193-
let gt = self.0.simd_gt(other.0).to_int();
194-
Self::from(gt.cast())
195-
}
196-
197191
#[inline]
198192
fn any_bit_set(self) -> bool {
199193
self.0 != u8x32::splat(0)
@@ -216,12 +210,9 @@ impl From<u8x32> for SimdU8Value {
216210
impl Utf8CheckAlgorithm<SimdU8Value> {
217211
#[inline]
218212
fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
219-
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0b1110_0000 - 1));
220-
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0b1111_0000 - 1));
221-
222-
is_third_byte
223-
.or(is_fourth_byte)
224-
.unsigned_gt(SimdU8Value::splat0())
213+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
214+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
215+
is_third_byte.or(is_fourth_byte)
225216
}
226217
}
227218

src/implementation/wasm32/simd128.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
//! Contains the wasm32 UTF-8 validation implementation.
22
33
use core::arch::wasm32::{
4-
u8x16, u8x16_all_true, u8x16_gt, u8x16_lt, u8x16_shr, u8x16_shuffle, u8x16_splat,
5-
u8x16_sub_sat, u8x16_swizzle, v128, v128_and, v128_any_true, v128_or, v128_xor,
4+
u8x16, u8x16_all_true, u8x16_lt, u8x16_shr, u8x16_shuffle, u8x16_splat, u8x16_sub_sat,
5+
u8x16_swizzle, v128, v128_and, v128_any_true, v128_or, v128_xor,
66
};
77

88
use crate::implementation::helpers::Utf8CheckAlgorithm;
@@ -226,11 +226,6 @@ impl SimdU8Value {
226226
>(prev.0, self.0))
227227
}
228228

229-
#[inline]
230-
unsafe fn unsigned_gt(self, other: Self) -> Self {
231-
Self::from(u8x16_gt(self.0, other.0))
232-
}
233-
234229
#[inline]
235230
unsafe fn any_bit_set(self) -> bool {
236231
v128_any_true(self.0)
@@ -254,9 +249,8 @@ impl From<v128> for SimdU8Value {
254249
impl Utf8CheckAlgorithm<SimdU8Value> {
255250
#[inline]
256251
unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
257-
let is_third_byte = prev2.unsigned_gt(SimdU8Value::splat(0b1110_0000 - 1));
258-
let is_fourth_byte = prev3.unsigned_gt(SimdU8Value::splat(0b1111_0000 - 1));
259-
252+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
253+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
260254
is_third_byte.or(is_fourth_byte)
261255
}
262256
}

src/implementation/x86/avx2.rs

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@
22
33
#[cfg(target_arch = "x86")]
44
use core::arch::x86::{
5-
__m256i, _mm256_alignr_epi8, _mm256_and_si256, _mm256_cmpgt_epi8, _mm256_loadu_si256,
6-
_mm256_movemask_epi8, _mm256_or_si256, _mm256_permute2x128_si256, _mm256_set1_epi8,
7-
_mm256_setr_epi8, _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi16,
8-
_mm256_subs_epu8, _mm256_testz_si256, _mm256_xor_si256, _mm_prefetch, _MM_HINT_T0,
5+
__m256i, _mm256_alignr_epi8, _mm256_and_si256, _mm256_loadu_si256, _mm256_movemask_epi8,
6+
_mm256_or_si256, _mm256_permute2x128_si256, _mm256_set1_epi8, _mm256_setr_epi8,
7+
_mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi16, _mm256_subs_epu8,
8+
_mm256_testz_si256, _mm256_xor_si256, _mm_prefetch, _MM_HINT_T0,
99
};
1010
#[cfg(target_arch = "x86_64")]
1111
use core::arch::x86_64::{
12-
__m256i, _mm256_alignr_epi8, _mm256_and_si256, _mm256_cmpgt_epi8, _mm256_loadu_si256,
13-
_mm256_movemask_epi8, _mm256_or_si256, _mm256_permute2x128_si256, _mm256_set1_epi8,
14-
_mm256_setr_epi8, _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi16,
15-
_mm256_subs_epu8, _mm256_testz_si256, _mm256_xor_si256, _mm_prefetch, _MM_HINT_T0,
12+
__m256i, _mm256_alignr_epi8, _mm256_and_si256, _mm256_loadu_si256, _mm256_movemask_epi8,
13+
_mm256_or_si256, _mm256_permute2x128_si256, _mm256_set1_epi8, _mm256_setr_epi8,
14+
_mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi16, _mm256_subs_epu8,
15+
_mm256_testz_si256, _mm256_xor_si256, _mm_prefetch, _MM_HINT_T0,
1616
};
1717

1818
use crate::implementation::helpers::Utf8CheckAlgorithm;
@@ -210,12 +210,6 @@ impl SimdU8Value {
210210
))
211211
}
212212

213-
#[target_feature(enable = "avx2")]
214-
#[inline]
215-
unsafe fn signed_gt(self, other: Self) -> Self {
216-
Self::from(_mm256_cmpgt_epi8(self.0, other.0))
217-
}
218-
219213
#[target_feature(enable = "avx2")]
220214
#[inline]
221215
unsafe fn any_bit_set(self) -> bool {
@@ -240,12 +234,9 @@ impl Utf8CheckAlgorithm<SimdU8Value> {
240234
#[target_feature(enable = "avx2")]
241235
#[inline]
242236
unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
243-
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0b1110_0000 - 1));
244-
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0b1111_0000 - 1));
245-
246-
is_third_byte
247-
.or(is_fourth_byte)
248-
.signed_gt(SimdU8Value::splat0())
237+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
238+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
239+
is_third_byte.or(is_fourth_byte)
249240
}
250241
}
251242

src/implementation/x86/sse42.rs

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
33
#[cfg(target_arch = "x86")]
44
use core::arch::x86::{
5-
__m128i, _mm_alignr_epi8, _mm_and_si128, _mm_cmpgt_epi8, _mm_loadu_si128, _mm_movemask_epi8,
6-
_mm_or_si128, _mm_prefetch, _mm_set1_epi8, _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8,
5+
__m128i, _mm_alignr_epi8, _mm_and_si128, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128,
6+
_mm_prefetch, _mm_set1_epi8, _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8,
77
_mm_srli_epi16, _mm_subs_epu8, _mm_testz_si128, _mm_xor_si128, _MM_HINT_T0,
88
};
99
#[cfg(target_arch = "x86_64")]
1010
use core::arch::x86_64::{
11-
__m128i, _mm_alignr_epi8, _mm_and_si128, _mm_cmpgt_epi8, _mm_loadu_si128, _mm_movemask_epi8,
12-
_mm_or_si128, _mm_prefetch, _mm_set1_epi8, _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8,
11+
__m128i, _mm_alignr_epi8, _mm_and_si128, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128,
12+
_mm_prefetch, _mm_set1_epi8, _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8,
1313
_mm_srli_epi16, _mm_subs_epu8, _mm_testz_si128, _mm_xor_si128, _MM_HINT_T0,
1414
};
1515

@@ -195,12 +195,6 @@ impl SimdU8Value {
195195
Self::from(_mm_alignr_epi8(self.0, prev.0, 16 - 3))
196196
}
197197

198-
#[target_feature(enable = "sse4.2")]
199-
#[inline]
200-
unsafe fn signed_gt(self, other: Self) -> Self {
201-
Self::from(_mm_cmpgt_epi8(self.0, other.0))
202-
}
203-
204198
#[target_feature(enable = "sse4.2")]
205199
#[inline]
206200
unsafe fn any_bit_set(self) -> bool {
@@ -225,12 +219,9 @@ impl Utf8CheckAlgorithm<SimdU8Value> {
225219
#[target_feature(enable = "sse4.2")]
226220
#[inline]
227221
unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
228-
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0b1110_0000 - 1));
229-
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0b1111_0000 - 1));
230-
231-
is_third_byte
232-
.or(is_fourth_byte)
233-
.signed_gt(SimdU8Value::splat0())
222+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
223+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
224+
is_third_byte.or(is_fourth_byte)
234225
}
235226
}
236227

0 commit comments

Comments
 (0)