diff --git a/src/ops/i16.rs b/src/ops/i16.rs index 2835271..97b32ee 100644 --- a/src/ops/i16.rs +++ b/src/ops/i16.rs @@ -231,19 +231,13 @@ impl_op! { _mm512_movm_epi16(_mm512_cmplt_epi16_mask(a, b)) } for Avx2(a: __m256i, b: __m256i) -> __m256i { - let gt = _mm256_cmpgt_epi16(a, b); - let eq = _mm256_cmpeq_epi16(a, b); - _mm256_andnot_si256(_mm256_or_si256(gt, eq), _mm256_set1_epi16(u32::MAX as i16)) + _mm256_cmpgt_epi16(b, a) } for Sse41(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi16(a, b); - let eq = _mm_cmpeq_epi16(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi16(u32::MAX as i16)) + _mm_cmpgt_epi16(b, a) } for Sse2(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi16(a, b); - let eq = _mm_cmpeq_epi16(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi16(u32::MAX as i16)) + _mm_cmpgt_epi16(b, a) } for Scalar(a: i16, b: i16) -> i16 { if a < b { diff --git a/src/ops/i32.rs b/src/ops/i32.rs index 1014726..8596516 100644 --- a/src/ops/i32.rs +++ b/src/ops/i32.rs @@ -244,19 +244,13 @@ impl_op! { _mm512_movm_epi32(k) } for Avx2(a: __m256i, b: __m256i) -> __m256i { - let gt = _mm256_cmpgt_epi32(a, b); - let eq = _mm256_cmpeq_epi32(a, b); - _mm256_andnot_si256(_mm256_or_si256(gt, eq), _mm256_set1_epi32(u32::MAX as i32)) + _mm256_cmpgt_epi32(b, a) } for Sse41(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi32(a, b); - let eq = _mm_cmpeq_epi32(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi32(u32::MAX as i32)) + _mm_cmpgt_epi32(b, a) } for Sse2(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi32(a, b); - let eq = _mm_cmpeq_epi32(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi32(u32::MAX as i32)) + _mm_cmpgt_epi32(b, a) } for Scalar(a: i32, b: i32) -> i32 { if a < b { diff --git a/src/ops/i64.rs b/src/ops/i64.rs index 7b925f9..d64b7aa 100644 --- a/src/ops/i64.rs +++ b/src/ops/i64.rs @@ -310,19 +310,13 @@ impl_op! { _mm512_movm_epi64(k) } for Avx2(a: __m256i, b: __m256i) -> __m256i { - let gt = _mm256_cmpgt_epi64(a, b); - let eq = _mm256_cmpeq_epi64(a, b); - _mm256_andnot_si256(_mm256_or_si256(gt, eq), _mm256_set1_epi64x(u64::MAX as i64)) + _mm256_cmpgt_epi64(b, a) } for Sse41(a: __m128i, b: __m128i) -> __m128i { - let gt = sse_cmpgt_epi64_compat(a, b); - let eq = sse_cmpeq_epi64_compat(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi64x(u64::MAX as i64)) + _mm_cmpgt_epi64(b, a) } for Sse2(a: __m128i, b: __m128i) -> __m128i { - let gt = sse_cmpgt_epi64_compat(a, b); - let eq = sse_cmpeq_epi64_compat(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi64x(u64::MAX as i64)) + _mm_cmpgt_epi64(b, a) } for Scalar(a: i64, b: i64) -> i64 { if a < b { diff --git a/src/ops/i8.rs b/src/ops/i8.rs index 072a928..856a0e9 100644 --- a/src/ops/i8.rs +++ b/src/ops/i8.rs @@ -276,19 +276,13 @@ impl_op! { _mm512_movm_epi8(k) } for Avx2(a: __m256i, b: __m256i) -> __m256i { - let gt = _mm256_cmpgt_epi8(a, b); - let eq = _mm256_cmpeq_epi8(a, b); - _mm256_andnot_si256(_mm256_or_si256(gt, eq), _mm256_set1_epi8(u32::MAX as i8)) + _mm256_cmpgt_epi8(b, a) } for Sse41(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi8(a, b); - let eq = _mm_cmpeq_epi8(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi8(u32::MAX as i8)) + _mm_cmpgt_epi8(b, a) } for Sse2(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi8(a, b); - let eq = _mm_cmpeq_epi8(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi8(u32::MAX as i8)) + _mm_cmpgt_epi8(b, a) } for Scalar(a: i8, b: i8) -> i8 { if a < b {