2424// Wrap intrinsics so we can pass them as function pointers
2525// - OP: intrinsics name prefix, e.g., vorrq
2626// - RT: type traits to deduce intrinsics return types
27- #define WRAP_BINARY_INT_EXCLUDING_64 (OP, RT ) \
27+ #define WRAP_BINARY_INT_EXCLUDING_64 (OP ) \
2828 namespace wrap { \
29- inline RT< uint8x16_t > _##OP##_u8 (uint8x16_t a, uint8x16_t b) { return ::OP##_u8 (a, b); } \
30- inline RT< int8x16_t > _##OP##_s8 (int8x16_t a, int8x16_t b) { return ::OP##_s8 (a, b); } \
31- inline RT< uint16x8_t > _##OP##_u16(uint16x8_t a, uint16x8_t b) { return ::OP##_u16 (a, b); } \
32- inline RT< int16x8_t > _##OP##_s16(int16x8_t a, int16x8_t b) { return ::OP##_s16 (a, b); } \
33- inline RT< uint32x4_t > _##OP##_u32(uint32x4_t a, uint32x4_t b) { return ::OP##_u32 (a, b); } \
34- inline RT< int32x4_t > _##OP##_s32(int32x4_t a, int32x4_t b) { return ::OP##_s32 (a, b); } \
29+ inline uint8x16_t _##OP##_u8 (uint8x16_t a, uint8x16_t b) { return ::OP##_u8 (a, b); } \
30+ inline int8x16_t _##OP##_s8 (int8x16_t a, int8x16_t b) { return ::OP##_s8 (a, b); } \
31+ inline uint16x8_t _##OP##_u16(uint16x8_t a, uint16x8_t b) { return ::OP##_u16 (a, b); } \
32+ inline int16x8_t _##OP##_s16(int16x8_t a, int16x8_t b) { return ::OP##_s16 (a, b); } \
33+ inline uint32x4_t _##OP##_u32(uint32x4_t a, uint32x4_t b) { return ::OP##_u32 (a, b); } \
34+ inline int32x4_t _##OP##_s32(int32x4_t a, int32x4_t b) { return ::OP##_s32 (a, b); } \
3535 }
3636
37- #define WRAP_BINARY_INT (OP, RT ) \
38- WRAP_BINARY_INT_EXCLUDING_64 (OP, RT ) \
37+ #define WRAP_BINARY_INT (OP ) \
38+ WRAP_BINARY_INT_EXCLUDING_64 (OP) \
3939 namespace wrap { \
40- inline RT< uint64x2_t > _##OP##_u64 (uint64x2_t a, uint64x2_t b) { return ::OP##_u64 (a, b); } \
41- inline RT< int64x2_t > _##OP##_s64 (int64x2_t a, int64x2_t b) { return ::OP##_s64 (a, b); } \
40+ inline uint64x2_t _##OP##_u64 (uint64x2_t a, uint64x2_t b) { return ::OP##_u64 (a, b); } \
41+ inline int64x2_t _##OP##_s64 (int64x2_t a, int64x2_t b) { return ::OP##_s64 (a, b); } \
4242 }
4343
44- #define WRAP_BINARY_FLOAT (OP, RT ) \
44+ #define WRAP_BINARY_FLOAT (OP ) \
4545 namespace wrap { \
46- inline RT< float32x4_t > _##OP##_f32(float32x4_t a, float32x4_t b) { return ::OP##_f32 (a, b); } \
46+ inline float32x4_t _##OP##_f32(float32x4_t a, float32x4_t b) { return ::OP##_f32 (a, b); } \
4747 }
4848
4949#define WRAP_UNARY_INT_EXCLUDING_64 (OP ) \
@@ -87,139 +87,6 @@ namespace xsimd
8787
8888 namespace detail
8989 {
90- template <template <class > class return_type , class ... T>
91- struct neon_dispatcher_base
92- {
93- struct unary
94- {
95- using container_type = std::tuple<return_type<T> (*)(T)...>;
96- const container_type m_func;
97-
98- template <class U >
99- return_type<U> apply (U rhs) const
100- {
101- using func_type = return_type<U> (*)(U);
102- auto func = xsimd::detail::get<func_type>(m_func);
103- return func (rhs);
104- }
105- };
106-
107- struct binary
108- {
109- using container_type = std::tuple<return_type<T> (*)(T, T) ...>;
110- const container_type m_func;
111-
112- template <class U >
113- return_type<U> apply (U lhs, U rhs) const
114- {
115- using func_type = return_type<U> (*)(U, U);
116- auto func = xsimd::detail::get<func_type>(m_func);
117- return func (lhs, rhs);
118- }
119- };
120- };
121-
122- /* **************************
123- * arithmetic dispatchers *
124- ***************************/
125-
126- template <class T >
127- using identity_return_type = T;
128-
129- template <class ... T>
130- struct neon_dispatcher_impl : neon_dispatcher_base<identity_return_type, T...>
131- {
132- };
133-
134-
135- using neon_dispatcher = neon_dispatcher_impl<uint8x16_t , int8x16_t ,
136- uint16x8_t , int16x8_t ,
137- uint32x4_t , int32x4_t ,
138- uint64x2_t , int64x2_t ,
139- float32x4_t >;
140-
141- using excluding_int64_dispatcher = neon_dispatcher_impl<uint8x16_t , int8x16_t ,
142- uint16x8_t , int16x8_t ,
143- uint32x4_t , int32x4_t ,
144- float32x4_t >;
145-
146- /* *************************
147- * comparison dispatchers *
148- **************************/
149-
150- template <class T >
151- struct comp_return_type_impl ;
152-
153- template <>
154- struct comp_return_type_impl <uint8x16_t >
155- {
156- using type = uint8x16_t ;
157- };
158-
159- // MSVC uses same underlying type for all vector variants which would cause C++ function overload ambiguity
160- #if !defined(_WIN32) || (defined(__clang__))
161- template <>
162- struct comp_return_type_impl <int8x16_t >
163- {
164- using type = uint8x16_t ;
165- };
166-
167- template <>
168- struct comp_return_type_impl <uint16x8_t >
169- {
170- using type = uint16x8_t ;
171- };
172-
173- template <>
174- struct comp_return_type_impl <int16x8_t >
175- {
176- using type = uint16x8_t ;
177- };
178-
179- template <>
180- struct comp_return_type_impl <uint32x4_t >
181- {
182- using type = uint32x4_t ;
183- };
184-
185- template <>
186- struct comp_return_type_impl <int32x4_t >
187- {
188- using type = uint32x4_t ;
189- };
190-
191- template <>
192- struct comp_return_type_impl <uint64x2_t >
193- {
194- using type = uint64x2_t ;
195- };
196-
197- template <>
198- struct comp_return_type_impl <int64x2_t >
199- {
200- using type = uint64x2_t ;
201- };
202-
203- template <>
204- struct comp_return_type_impl <float32x4_t >
205- {
206- using type = uint32x4_t ;
207- };
208- #endif
209-
210- template <class T >
211- using comp_return_type = typename comp_return_type_impl<T>::type;
212-
213- template <class ... T>
214- struct neon_comp_dispatcher_impl : neon_dispatcher_base<comp_return_type, T...>
215- {
216- };
217-
218- using excluding_int64_comp_dispatcher = neon_comp_dispatcher_impl<uint8x16_t , int8x16_t ,
219- uint16x8_t , int16x8_t ,
220- uint32x4_t , int32x4_t ,
221- float32x4_t >;
222-
22390 /* *************************************
22491 * enabling / disabling metafunctions *
22592 **************************************/
@@ -627,8 +494,8 @@ namespace xsimd
627494 * add *
628495 *******/
629496
630- WRAP_BINARY_INT (vaddq, detail::identity_return_type )
631- WRAP_BINARY_FLOAT (vaddq, detail::identity_return_type )
497+ WRAP_BINARY_INT (vaddq)
498+ WRAP_BINARY_FLOAT (vaddq)
632499
633500 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
634501 batch<T, A> add (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -645,7 +512,7 @@ namespace xsimd
645512 * sadd *
646513 ********/
647514
648- WRAP_BINARY_INT (vqaddq, detail::identity_return_type )
515+ WRAP_BINARY_INT (vqaddq)
649516
650517 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
651518 batch<T, A> sadd (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -662,8 +529,8 @@ namespace xsimd
662529 * sub *
663530 *******/
664531
665- WRAP_BINARY_INT (vsubq, detail::identity_return_type )
666- WRAP_BINARY_FLOAT (vsubq, detail::identity_return_type )
532+ WRAP_BINARY_INT (vsubq)
533+ WRAP_BINARY_FLOAT (vsubq)
667534
668535 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
669536 batch<T, A> sub (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -680,7 +547,7 @@ namespace xsimd
680547 * ssub *
681548 ********/
682549
683- WRAP_BINARY_INT (vqsubq, detail::identity_return_type )
550+ WRAP_BINARY_INT (vqsubq)
684551
685552 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
686553 batch<T, A> ssub (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -698,8 +565,8 @@ namespace xsimd
698565 * mul *
699566 *******/
700567
701- WRAP_BINARY_INT_EXCLUDING_64 (vmulq, detail::identity_return_type )
702- WRAP_BINARY_FLOAT (vmulq, detail::identity_return_type )
568+ WRAP_BINARY_INT_EXCLUDING_64 (vmulq)
569+ WRAP_BINARY_FLOAT (vmulq)
703570
704571 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
705572 batch<T, A> mul (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -751,8 +618,8 @@ namespace xsimd
751618 * eq *
752619 ******/
753620
754- WRAP_BINARY_INT_EXCLUDING_64 (vceqq, detail::comp_return_type )
755- WRAP_BINARY_FLOAT (vceqq, detail::comp_return_type )
621+ WRAP_BINARY_INT_EXCLUDING_64 (vceqq)
622+ WRAP_BINARY_FLOAT (vceqq)
756623
757624 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
758625 batch_bool<T, A> eq (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -797,8 +664,8 @@ namespace xsimd
797664 * lt *
798665 ******/
799666
800- WRAP_BINARY_INT_EXCLUDING_64 (vcltq, detail::comp_return_type )
801- WRAP_BINARY_FLOAT (vcltq, detail::comp_return_type )
667+ WRAP_BINARY_INT_EXCLUDING_64 (vcltq)
668+ WRAP_BINARY_FLOAT (vcltq)
802669
803670 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
804671 batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -821,8 +688,8 @@ namespace xsimd
821688 * le *
822689 ******/
823690
824- WRAP_BINARY_INT_EXCLUDING_64 (vcleq, detail::comp_return_type )
825- WRAP_BINARY_FLOAT (vcleq, detail::comp_return_type )
691+ WRAP_BINARY_INT_EXCLUDING_64 (vcleq)
692+ WRAP_BINARY_FLOAT (vcleq)
826693
827694 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
828695 batch_bool<T, A> le (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -845,8 +712,8 @@ namespace xsimd
845712 * gt *
846713 ******/
847714
848- WRAP_BINARY_INT_EXCLUDING_64 (vcgtq, detail::comp_return_type )
849- WRAP_BINARY_FLOAT (vcgtq, detail::comp_return_type )
715+ WRAP_BINARY_INT_EXCLUDING_64 (vcgtq)
716+ WRAP_BINARY_FLOAT (vcgtq)
850717
851718 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
852719 batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -869,8 +736,8 @@ namespace xsimd
869736 * ge *
870737 ******/
871738
872- WRAP_BINARY_INT_EXCLUDING_64 (vcgeq, detail::comp_return_type )
873- WRAP_BINARY_FLOAT (vcgeq, detail::comp_return_type )
739+ WRAP_BINARY_INT_EXCLUDING_64 (vcgeq)
740+ WRAP_BINARY_FLOAT (vcgeq)
874741
875742 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
876743 batch_bool<T, A> get (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -893,7 +760,7 @@ namespace xsimd
893760 * bitwise_and *
894761 ***************/
895762
896- WRAP_BINARY_INT (vandq, detail::identity_return_type )
763+ WRAP_BINARY_INT (vandq)
897764
898765 namespace detail
899766 {
@@ -930,7 +797,7 @@ namespace xsimd
930797 * bitwise_or *
931798 **************/
932799
933- WRAP_BINARY_INT (vorrq, detail::identity_return_type )
800+ WRAP_BINARY_INT (vorrq)
934801
935802 namespace detail
936803 {
@@ -967,7 +834,7 @@ namespace xsimd
967834 * bitwise_xor *
968835 ***************/
969836
970- WRAP_BINARY_INT (veorq, detail::identity_return_type )
837+ WRAP_BINARY_INT (veorq)
971838
972839 namespace detail
973840 {
@@ -1085,7 +952,7 @@ namespace xsimd
1085952 * bitwise_andnot *
1086953 ******************/
1087954
1088- WRAP_BINARY_INT (vbicq, detail::identity_return_type )
955+ WRAP_BINARY_INT (vbicq)
1089956
1090957 namespace detail
1091958 {
@@ -1121,8 +988,8 @@ namespace xsimd
1121988 * min *
1122989 *******/
1123990
1124- WRAP_BINARY_INT_EXCLUDING_64 (vminq, detail::identity_return_type )
1125- WRAP_BINARY_FLOAT (vminq, detail::identity_return_type )
991+ WRAP_BINARY_INT_EXCLUDING_64 (vminq)
992+ WRAP_BINARY_FLOAT (vminq)
1126993
1127994 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
1128995 batch<T, A> min (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -1145,8 +1012,8 @@ namespace xsimd
11451012 * max *
11461013 *******/
11471014
1148- WRAP_BINARY_INT_EXCLUDING_64 (vmaxq, detail::identity_return_type )
1149- WRAP_BINARY_FLOAT (vmaxq, detail::identity_return_type )
1015+ WRAP_BINARY_INT_EXCLUDING_64 (vmaxq)
1016+ WRAP_BINARY_FLOAT (vmaxq)
11501017
11511018 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
11521019 batch<T, A> max (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -1355,30 +1222,6 @@ namespace xsimd
13551222 inline float32x4_t _vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c) { return ::vbslq_f32 (a, b, c); }
13561223 }
13571224
1358- namespace detail
1359- {
1360- template <class ... T>
1361- struct neon_select_dispatcher_impl
1362- {
1363- using container_type = std::tuple<T (*)(comp_return_type<T>, T, T)...>;
1364- const container_type m_func;
1365-
1366- template <class U >
1367- U apply (comp_return_type<U> cond, U lhs, U rhs) const
1368- {
1369- using func_type = U (*)(comp_return_type<U>, U, U);
1370- auto func = xsimd::detail::get<func_type>(m_func);
1371- return func (cond, lhs, rhs);
1372- }
1373- };
1374-
1375- using neon_select_dispatcher = neon_select_dispatcher_impl<uint8x16_t , int8x16_t ,
1376- uint16x8_t , int16x8_t ,
1377- uint32x4_t , int32x4_t ,
1378- uint64x2_t , int64x2_t ,
1379- float32x4_t >;
1380- }
1381-
13821225 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
13831226 batch<T, A> select (batch_bool<T, A> const & cond, batch<T, A> const & a, batch<T, A> const & b, requires_arch<neon>)
13841227 {
0 commit comments