@@ -945,8 +945,23 @@ pub const fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
945945#[ target_feature( enable = "avx2" ) ]
946946#[ cfg_attr( test, assert_instr( vphaddw) ) ]
947947#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
948- pub fn _mm256_hadd_epi16 ( a : __m256i , b : __m256i ) -> __m256i {
949- unsafe { transmute ( phaddw ( a. as_i16x16 ( ) , b. as_i16x16 ( ) ) ) }
948+ #[ rustc_const_unstable( feature = "stdarch_const_intrinsics" , issue = "none" ) ]
949+ pub const fn _mm256_hadd_epi16 ( a : __m256i , b : __m256i ) -> __m256i {
950+ let a = a. as_i16x16 ( ) ;
951+ let b = b. as_i16x16 ( ) ;
952+ unsafe {
953+ let even: i16x16 = simd_shuffle ! (
954+ a,
955+ b,
956+ [ 0 , 2 , 4 , 6 , 16 , 18 , 20 , 22 , 8 , 10 , 12 , 14 , 24 , 26 , 28 , 30 ]
957+ ) ;
958+ let odd: i16x16 = simd_shuffle ! (
959+ a,
960+ b,
961+ [ 1 , 3 , 5 , 7 , 17 , 19 , 21 , 23 , 9 , 11 , 13 , 15 , 25 , 27 , 29 , 31 ]
962+ ) ;
963+ simd_add ( even, odd) . as_m256i ( )
964+ }
950965}
951966
952967/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
@@ -956,8 +971,15 @@ pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
956971#[ target_feature( enable = "avx2" ) ]
957972#[ cfg_attr( test, assert_instr( vphaddd) ) ]
958973#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
959- pub fn _mm256_hadd_epi32 ( a : __m256i , b : __m256i ) -> __m256i {
960- unsafe { transmute ( phaddd ( a. as_i32x8 ( ) , b. as_i32x8 ( ) ) ) }
974+ #[ rustc_const_unstable( feature = "stdarch_const_intrinsics" , issue = "none" ) ]
975+ pub const fn _mm256_hadd_epi32 ( a : __m256i , b : __m256i ) -> __m256i {
976+ let a = a. as_i32x8 ( ) ;
977+ let b = b. as_i32x8 ( ) ;
978+ unsafe {
979+ let even: i32x8 = simd_shuffle ! ( a, b, [ 0 , 2 , 8 , 10 , 4 , 6 , 12 , 14 ] ) ;
980+ let odd: i32x8 = simd_shuffle ! ( a, b, [ 1 , 3 , 9 , 11 , 5 , 7 , 13 , 15 ] ) ;
981+ simd_add ( even, odd) . as_m256i ( )
982+ }
961983}
962984
963985/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
@@ -979,8 +1001,23 @@ pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
9791001#[ target_feature( enable = "avx2" ) ]
9801002#[ cfg_attr( test, assert_instr( vphsubw) ) ]
9811003#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
982- pub fn _mm256_hsub_epi16 ( a : __m256i , b : __m256i ) -> __m256i {
983- unsafe { transmute ( phsubw ( a. as_i16x16 ( ) , b. as_i16x16 ( ) ) ) }
1004+ #[ rustc_const_unstable( feature = "stdarch_const_intrinsics" , issue = "none" ) ]
1005+ pub const fn _mm256_hsub_epi16 ( a : __m256i , b : __m256i ) -> __m256i {
1006+ let a = a. as_i16x16 ( ) ;
1007+ let b = b. as_i16x16 ( ) ;
1008+ unsafe {
1009+ let even: i16x16 = simd_shuffle ! (
1010+ a,
1011+ b,
1012+ [ 0 , 2 , 4 , 6 , 16 , 18 , 20 , 22 , 8 , 10 , 12 , 14 , 24 , 26 , 28 , 30 ]
1013+ ) ;
1014+ let odd: i16x16 = simd_shuffle ! (
1015+ a,
1016+ b,
1017+ [ 1 , 3 , 5 , 7 , 17 , 19 , 21 , 23 , 9 , 11 , 13 , 15 , 25 , 27 , 29 , 31 ]
1018+ ) ;
1019+ simd_sub ( even, odd) . as_m256i ( )
1020+ }
9841021}
9851022
9861023/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
@@ -990,8 +1027,15 @@ pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
9901027#[ target_feature( enable = "avx2" ) ]
9911028#[ cfg_attr( test, assert_instr( vphsubd) ) ]
9921029#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
993- pub fn _mm256_hsub_epi32 ( a : __m256i , b : __m256i ) -> __m256i {
994- unsafe { transmute ( phsubd ( a. as_i32x8 ( ) , b. as_i32x8 ( ) ) ) }
1030+ #[ rustc_const_unstable( feature = "stdarch_const_intrinsics" , issue = "none" ) ]
1031+ pub const fn _mm256_hsub_epi32 ( a : __m256i , b : __m256i ) -> __m256i {
1032+ let a = a. as_i32x8 ( ) ;
1033+ let b = b. as_i32x8 ( ) ;
1034+ unsafe {
1035+ let even: i32x8 = simd_shuffle ! ( a, b, [ 0 , 2 , 8 , 10 , 4 , 6 , 12 , 14 ] ) ;
1036+ let odd: i32x8 = simd_shuffle ! ( a, b, [ 1 , 3 , 9 , 11 , 5 , 7 , 13 , 15 ] ) ;
1037+ simd_sub ( even, odd) . as_m256i ( )
1038+ }
9951039}
9961040
9971041/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
@@ -1769,8 +1813,14 @@ pub const fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) ->
17691813#[ target_feature( enable = "avx2" ) ]
17701814#[ cfg_attr( test, assert_instr( vpmaddwd) ) ]
17711815#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1772- pub fn _mm256_madd_epi16 ( a : __m256i , b : __m256i ) -> __m256i {
1773- unsafe { transmute ( pmaddwd ( a. as_i16x16 ( ) , b. as_i16x16 ( ) ) ) }
1816+ #[ rustc_const_unstable( feature = "stdarch_const_intrinsics" , issue = "none" ) ]
1817+ pub const fn _mm256_madd_epi16 ( a : __m256i , b : __m256i ) -> __m256i {
1818+ unsafe {
1819+ let r: i32x16 = simd_mul ( simd_cast ( a. as_i16x16 ( ) ) , simd_cast ( b. as_i16x16 ( ) ) ) ;
1820+ let even: i32x8 = simd_shuffle ! ( r, r, [ 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ] ) ;
1821+ let odd: i32x8 = simd_shuffle ! ( r, r, [ 1 , 3 , 5 , 7 , 9 , 11 , 13 , 15 ] ) ;
1822+ simd_add ( even, odd) . as_m256i ( )
1823+ }
17741824}
17751825
17761826/// Vertically multiplies each unsigned 8-bit integer from `a` with the
@@ -3716,20 +3766,10 @@ pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
37163766
37173767#[ allow( improper_ctypes) ]
37183768unsafe extern "C" {
3719- #[ link_name = "llvm.x86.avx2.phadd.w" ]
3720- fn phaddw ( a : i16x16 , b : i16x16 ) -> i16x16 ;
3721- #[ link_name = "llvm.x86.avx2.phadd.d" ]
3722- fn phaddd ( a : i32x8 , b : i32x8 ) -> i32x8 ;
37233769 #[ link_name = "llvm.x86.avx2.phadd.sw" ]
37243770 fn phaddsw ( a : i16x16 , b : i16x16 ) -> i16x16 ;
3725- #[ link_name = "llvm.x86.avx2.phsub.w" ]
3726- fn phsubw ( a : i16x16 , b : i16x16 ) -> i16x16 ;
3727- #[ link_name = "llvm.x86.avx2.phsub.d" ]
3728- fn phsubd ( a : i32x8 , b : i32x8 ) -> i32x8 ;
37293771 #[ link_name = "llvm.x86.avx2.phsub.sw" ]
37303772 fn phsubsw ( a : i16x16 , b : i16x16 ) -> i16x16 ;
3731- #[ link_name = "llvm.x86.avx2.pmadd.wd" ]
3732- fn pmaddwd ( a : i16x16 , b : i16x16 ) -> i32x8 ;
37333773 #[ link_name = "llvm.x86.avx2.pmadd.ub.sw" ]
37343774 fn pmaddubsw ( a : u8x32 , b : u8x32 ) -> i16x16 ;
37353775 #[ link_name = "llvm.x86.avx2.maskload.d" ]
0 commit comments