diff --git a/include/xsimd/arch/xsimd_rvv.hpp b/include/xsimd/arch/xsimd_rvv.hpp index 6d1eae59d..12ae39280 100644 --- a/include/xsimd/arch/xsimd_rvv.hpp +++ b/include/xsimd/arch/xsimd_rvv.hpp @@ -16,6 +16,7 @@ #include "../config/xsimd_macros.hpp" #include "../types/xsimd_batch_constant.hpp" #include "../types/xsimd_rvv_register.hpp" +#include "../types/xsimd_utils.hpp" #include "../utils/xsimd_type_traits.hpp" #include "./xsimd_constants.hpp" @@ -288,8 +289,6 @@ namespace xsimd { namespace detail { - template - using rvv_fix_char_t = types::detail::rvv_fix_char_t; template using rvv_reg_t = types::detail::rvv_reg_t; template @@ -375,26 +374,6 @@ namespace xsimd index = __riscv_vsll(index, shift, batch::size); return __riscv_vadd(index, T(offset), batch::size); } - - // enable for signed integers - template - using rvv_enable_signed_int_t = std::enable_if_t::value && std::is_signed::value, int>; - - // enable for unsigned integers - template - using rvv_enable_unsigned_int_t = std::enable_if_t::value && std::is_unsigned::value, int>; - - // enable for floating points - template - using rvv_enable_floating_point_t = std::enable_if_t::value, int>; - - // enable for signed integers or floating points - template - using rvv_enable_signed_int_or_floating_point_t = std::enable_if_t::value, int>; - - // enable for all RVE supported types - template - using rvv_enable_all_t = std::enable_if_t::value, int>; } // namespace detail /******************** @@ -409,7 +388,7 @@ namespace xsimd // A bit of a dance, here, because rvvmv_splat has no other // argument from which to deduce type, and T=char is not // supported. - detail::rvv_fix_char_t arg_not_char(arg); + project_num_t arg_not_char(arg); const auto splat = detail::rvvmv_splat(arg_not_char); return detail::rvv_reg_t(splat.get_bytes(), types::detail::XSIMD_RVV_BITCAST); } @@ -432,13 +411,13 @@ namespace xsimd XSIMD_RVV_OVERLOAD(rvvse, (__riscv_vse XSIMD_RVV_S _v_ XSIMD_RVV_TSM), , void(T*, vec)) } - template = 0> + template = 0> XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { - return detail::rvvle(reinterpret_cast const*>(src)); + return detail::rvvle(reinterpret_cast const*>(src)); } - template = 0> + template = 0> XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return load_aligned(src, convert(), rvv {}); @@ -485,7 +464,7 @@ namespace xsimd return __riscv_vslidedown(vv, vv.vl / 2, vv.vl); } - template = 0> + template = 0> XSIMD_INLINE batch, A> load_complex(batch const& lo, batch const& hi, requires_arch) noexcept { const auto real_index = vindex, 0, 1>(); @@ -502,13 +481,13 @@ namespace xsimd * Store * *********/ - template = 0> + template = 0> XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { - detail::rvvse(reinterpret_cast*>(dst), src); + detail::rvvse(reinterpret_cast*>(dst), src); } - template = 0> + template = 0> XSIMD_INLINE void store_unaligned(T* dst, batch const& src, requires_arch) noexcept { store_aligned(dst, src, rvv {}); @@ -536,7 +515,7 @@ namespace xsimd { using UU = as_unsigned_integer_t; const auto uindex = detail::rvv_to_unsigned_batch(index); - auto* base = reinterpret_cast*>(dst); + auto* base = reinterpret_cast*>(dst); // or rvvsuxei const auto bi = detail::rvvmul_splat(uindex, sizeof(T)); detail::rvvsoxei(base, bi, vals); @@ -548,7 +527,7 @@ namespace xsimd { using UU = as_unsigned_integer_t; const auto uindex = detail::rvv_to_unsigned_batch(index); - auto const* base = reinterpret_cast const*>(src); + auto const* base = reinterpret_cast const*>(src); // or rvvluxei const auto bi = detail::rvvmul_splat(uindex, sizeof(T)); return detail::rvvloxei(base, bi); @@ -639,7 +618,7 @@ namespace xsimd } // namespace detail // add - template = 0> + template = 0> XSIMD_INLINE batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvadd(lhs, rhs); @@ -653,7 +632,7 @@ namespace xsimd } // sub - template = 0> + template = 0> XSIMD_INLINE batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvsub(lhs, rhs); @@ -667,35 +646,35 @@ namespace xsimd } // mul - template = 0> + template = 0> XSIMD_INLINE batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmul(lhs, rhs); } // div - template = 0> + template = 0> XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvdiv(lhs, rhs); } // max - template = 0> + template = 0> XSIMD_INLINE batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmax(lhs, rhs); } // min - template = 0> + template = 0> XSIMD_INLINE batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmin(lhs, rhs); } // neg - template = 0> + template = 0> XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { using S = as_signed_integer_t; @@ -704,27 +683,27 @@ namespace xsimd return detail::rvvreinterpret(result); } - template = 0> + template = 0> XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { return detail::rvvneg(arg); } // abs - template = 0> + template = 0> XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { return arg; } - template = 0> + template = 0> XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { return detail::rvvabs(arg); } // fma: x * y + z - template = 0> + template = 0> XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { // also detail::rvvmadd(x, y, z); @@ -732,7 +711,7 @@ namespace xsimd } // fnma: z - x * y - template = 0> + template = 0> XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { // also detail::rvvnmsub(x, y, z); @@ -740,7 +719,7 @@ namespace xsimd } // fms: x * y - z - template = 0> + template = 0> XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { // also vfmsac(z, x, y), but lacking integer version @@ -749,7 +728,7 @@ namespace xsimd } // fnms: - x * y - z - template = 0> + template = 0> XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { // also vfnmacc(z, x, y), but lacking integer version @@ -782,7 +761,7 @@ namespace xsimd return detail::rvvand(lhs, rhs); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs); @@ -791,7 +770,7 @@ namespace xsimd return detail::rvvreinterpret(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmand(lhs, rhs); @@ -805,7 +784,7 @@ namespace xsimd return detail::rvvand(lhs, not_rhs); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs); @@ -815,7 +794,7 @@ namespace xsimd return detail::rvvreinterpret(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmandn(lhs, rhs); @@ -828,7 +807,7 @@ namespace xsimd return detail::rvvor(lhs, rhs); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs); @@ -837,7 +816,7 @@ namespace xsimd return detail::rvvreinterpret(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmor(lhs, rhs); @@ -850,7 +829,7 @@ namespace xsimd return detail::rvvxor(lhs, rhs); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs); @@ -859,7 +838,7 @@ namespace xsimd return detail::rvvreinterpret(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmxor(lhs, rhs); @@ -872,7 +851,7 @@ namespace xsimd return detail::rvvnot(arg); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_not(batch const& arg, requires_arch) noexcept { const auto arg_bits = detail::rvv_to_unsigned_batch(arg); @@ -880,7 +859,7 @@ namespace xsimd return detail::rvvreinterpret(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_not(batch_bool const& arg, requires_arch) noexcept { return detail::rvvmnot(arg); @@ -967,7 +946,7 @@ namespace xsimd } } // reduce_add - template ::value_type, detail::rvv_enable_all_t = 0> + template ::value_type, detail::enable_arithmetic_t = 0> XSIMD_INLINE V reduce_add(batch const& arg, requires_arch) noexcept { const auto zero = detail::broadcast(T(0)); @@ -976,7 +955,7 @@ namespace xsimd } // reduce_max - template = 0> + template = 0> XSIMD_INLINE T reduce_max(batch const& arg, requires_arch) noexcept { const auto lowest = detail::broadcast(std::numeric_limits::lowest()); @@ -985,7 +964,7 @@ namespace xsimd } // reduce_min - template = 0> + template = 0> XSIMD_INLINE T reduce_min(batch const& arg, requires_arch) noexcept { const auto max = detail::broadcast(std::numeric_limits::max()); @@ -994,7 +973,7 @@ namespace xsimd } // haddp - template = 0> + template = 0> XSIMD_INLINE batch haddp(const batch* row, requires_arch) noexcept { constexpr std::size_t size = batch::size; @@ -1012,13 +991,13 @@ namespace xsimd ***************/ // eq - template = 0> + template = 0> XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmseq(lhs, rhs); } - template = 0> + template = 0> XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { const auto neq_result = detail::rvvmxor(lhs, rhs); @@ -1026,41 +1005,41 @@ namespace xsimd } // neq - template = 0> + template = 0> XSIMD_INLINE batch_bool neq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmsne(lhs, rhs); } - template = 0> + template = 0> XSIMD_INLINE batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmxor(lhs, rhs); } // lt - template = 0> + template = 0> XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmslt(lhs, rhs); } // le - template = 0> + template = 0> XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmsle(lhs, rhs); } // gt - template = 0> + template = 0> XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmsgt(lhs, rhs); } // ge - template = 0> + template = 0> XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmsge(lhs, rhs); @@ -1116,7 +1095,7 @@ namespace xsimd // extract_pair - template = 0> + template = 0> XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, size_t n, requires_arch) noexcept { const auto tmp = detail::rvvslidedown(rhs, n); @@ -1124,7 +1103,7 @@ namespace xsimd } // select - template = 0> + template = 0> XSIMD_INLINE batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept { return detail::rvvmerge(b, a, cond); @@ -1137,7 +1116,7 @@ namespace xsimd } // zip_lo - template = 0> + template = 0> XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto index = detail::vindex, 0, -1>(); @@ -1148,7 +1127,7 @@ namespace xsimd } // zip_hi - template = 0> + template = 0> XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto index = detail::vindex, batch::size / 2, -1>(); @@ -1159,7 +1138,7 @@ namespace xsimd } // store_complex - template = 0> + template = 0> XSIMD_INLINE void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { const auto lo = zip_lo(src.real(), src.imag()); @@ -1169,7 +1148,7 @@ namespace xsimd store_aligned(buf + lo.size, hi, rvv {}); } - template = 0> + template = 0> XSIMD_INLINE void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { store_complex_aligned(dst, src, rvv {}); @@ -1187,7 +1166,7 @@ namespace xsimd } // rsqrt - template = 0> + template = 0> XSIMD_INLINE batch rsqrt(batch const& arg, requires_arch) noexcept { auto approx = detail::rvvfrsqrt7(arg); @@ -1196,14 +1175,14 @@ namespace xsimd } // sqrt - template = 0> + template = 0> XSIMD_INLINE batch sqrt(batch const& arg, requires_arch) noexcept { return detail::rvvfsqrt(arg); } // reciprocal - template = 0> + template = 0> XSIMD_INLINE batch reciprocal(const batch& arg, requires_arch) noexcept { return detail::rvvfrec7(arg); @@ -1278,20 +1257,20 @@ namespace xsimd } // first - template = 0> + template = 0> XSIMD_INLINE T first(batch const& arg, requires_arch) noexcept { return detail::rvvmv_lane0(arg); } - template = 0> + template = 0> XSIMD_INLINE std::complex first(batch, A> const& arg, requires_arch) noexcept { return std::complex { detail::rvvmv_lane0(arg.real()), detail::rvvmv_lane0(arg.imag()) }; } // insert - template = 0> + template = 0> XSIMD_INLINE batch insert(batch const& arg, T val, index, requires_arch) noexcept { const auto mask = detail::pmask(uint64_t(1) << I); @@ -1299,14 +1278,14 @@ namespace xsimd } // get - template = 0> + template = 0> XSIMD_INLINE T get(batch const& arg, size_t i, requires_arch) noexcept { const auto tmp = detail::rvvslidedown(arg, i); return detail::rvvmv_lane0(tmp); } - template = 0> + template = 0> XSIMD_INLINE std::complex get(batch, A> const& arg, size_t i, requires_arch) noexcept { const auto tmpr = detail::rvvslidedown(arg.real(), i); @@ -1315,28 +1294,28 @@ namespace xsimd } // all - template = 0> + template = 0> XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return detail::rvvcpop(arg) == batch_bool::size; } // any - template = 0> + template = 0> XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return detail::rvvcpop(arg) > 0; } // bitwise_cast - template = 0, detail::rvv_enable_all_t = 0> + template = 0, detail::enable_arithmetic_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return detail::rvv_reg_t(arg.data.get_bytes(), types::detail::XSIMD_RVV_BITCAST); } // batch_bool_cast - template = 0> + template = 0> XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& arg, batch_bool const&, requires_arch) noexcept { using intermediate_t = typename detail::rvv_bool_t; @@ -1344,7 +1323,7 @@ namespace xsimd } // from_bool - template = 0> + template = 0> XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { const auto zero = broadcast(T(0), rvv {}); @@ -1382,7 +1361,7 @@ namespace xsimd } // slide_left - template = 0> + template = 0> XSIMD_INLINE batch slide_left(batch const& arg, requires_arch) noexcept { const auto zero = broadcast(uint8_t(0), rvv {}); @@ -1391,7 +1370,7 @@ namespace xsimd } // slide_right - template = 0> + template = 0> XSIMD_INLINE batch slide_right(batch const& arg, requires_arch) noexcept { using reg_t = detail::rvv_reg_t; @@ -1400,7 +1379,7 @@ namespace xsimd } // isnan - template = 0> + template = 0> XSIMD_INLINE batch_bool isnan(batch const& arg, requires_arch) noexcept { return !(arg == arg); @@ -1433,7 +1412,7 @@ namespace xsimd } // round - template = 0> + template = 0> XSIMD_INLINE batch round(batch const& arg, requires_arch) noexcept { // Round ties away from zero. @@ -1442,7 +1421,7 @@ namespace xsimd } // nearbyint - template = 0> + template = 0> XSIMD_INLINE batch nearbyint(batch const& arg, requires_arch) noexcept { // Round according to current rounding mode. diff --git a/include/xsimd/arch/xsimd_sve.hpp b/include/xsimd/arch/xsimd_sve.hpp index 6636f4c37..05109dfd1 100644 --- a/include/xsimd/arch/xsimd_sve.hpp +++ b/include/xsimd/arch/xsimd_sve.hpp @@ -59,26 +59,10 @@ namespace xsimd template XSIMD_INLINE uint64_t sve_pcount(svbool_t p) noexcept { return sve_pcount_impl(p, index {}); } - // enable for signed integers - template - using sve_enable_signed_int_t = std::enable_if_t::value && std::is_signed::value, int>; - - // enable for unsigned integers - template - using sve_enable_unsigned_int_t = std::enable_if_t::value && !std::is_signed::value, int>; - - // enable for floating points - template - using sve_enable_floating_point_t = std::enable_if_t::value, int>; - // enable for signed integers or floating points template using sve_enable_signed_int_or_floating_point_t = std::enable_if_t::value, int>; - // enable for all SVE supported types - template - using sve_enable_all_t = std::enable_if_t::value, int>; - // `sizeless` is the matching sizeless SVE type. xsimd stores SVE // vectors as fixed-size attributed types (arm_sve_vector_bits), // which clang treats as implicitly convertible to every sizeless @@ -88,55 +72,33 @@ namespace xsimd // 1-vector candidate. template using sve_sizeless_t = xsimd::types::detail::sizeless_sve_vector_type; - - // Remap integer Ts to their matching fixed-width counterpart - // so svld1/svst1 see the pointer type their overload set expects; - // pass non-integer Ts through unchanged. - template >::value> - struct sve_fix_integer_impl - { - using type = T; - }; - template - struct sve_fix_integer_impl - { - using type = std::conditional_t::value, - sized_int_t, sized_uint_t>; - }; - - // SVE load/store intrinsics are overloaded on these pointer for integer - // types, but some platform have explicit different types between - // `long` vs `long long` or `char` vs `int8_t`. - // We remap the type to avoid these. - template - using sve_fix_char_t = typename sve_fix_integer_impl::type; } // namespace detail /********* * Load * *********/ - template = 0> + template = 0> XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { - return svld1(detail::sve_ptrue(), reinterpret_cast const*>(src)); + return svld1(detail::sve_ptrue(), reinterpret_cast const*>(src)); } - template = 0> + template = 0> XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return load_aligned(src, convert(), sve {}); } // load_masked - template = 0> + template = 0> XSIMD_INLINE batch load_masked(T const* mem, batch_bool_constant, Mode, requires_arch) noexcept { - return svld1(detail::sve_pmask(), reinterpret_cast const*>(mem)); + return svld1(detail::sve_pmask(), reinterpret_cast const*>(mem)); } // load_complex - template = 0> + template = 0> XSIMD_INLINE batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept { const T* buf = reinterpret_cast(mem); @@ -146,7 +108,7 @@ namespace xsimd return batch, A> { real, imag }; } - template = 0> + template = 0> XSIMD_INLINE batch, A> load_complex_unaligned(std::complex const* mem, convert>, requires_arch) noexcept { return load_complex_aligned(mem, convert> {}, sve {}); @@ -156,20 +118,20 @@ namespace xsimd * Store * *********/ - template = 0> + template = 0> XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { - svst1(detail::sve_ptrue(), reinterpret_cast*>(dst), src); + svst1(detail::sve_ptrue(), reinterpret_cast*>(dst), src); } - template = 0> + template = 0> XSIMD_INLINE void store_unaligned(T* dst, batch const& src, requires_arch) noexcept { store_aligned(dst, src, sve {}); } // store_complex - template = 0> + template = 0> XSIMD_INLINE void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { using v2type = std::conditional_t<(sizeof(T) == 4), svfloat32x2_t, svfloat64x2_t>; @@ -180,7 +142,7 @@ namespace xsimd svst2(detail::sve_ptrue(), buf, tmp); } - template = 0> + template = 0> XSIMD_INLINE void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { store_complex_aligned(dst, src, sve {}); @@ -275,7 +237,7 @@ namespace xsimd return svdup_n_f64(arg); } - template = 0> + template = 0> XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return broadcast(val, sve {}); @@ -286,7 +248,7 @@ namespace xsimd **************/ // add - template = 0> + template = 0> XSIMD_INLINE batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svadd_x(detail::sve_ptrue(), lhs, rhs); @@ -300,7 +262,7 @@ namespace xsimd } // sub - template = 0> + template = 0> XSIMD_INLINE batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svsub_x(detail::sve_ptrue(), lhs, rhs); @@ -314,7 +276,7 @@ namespace xsimd } // mul - template = 0> + template = 0> XSIMD_INLINE batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svmul_x(detail::sve_ptrue(), lhs, rhs); @@ -328,14 +290,14 @@ namespace xsimd } // max - template = 0> + template = 0> XSIMD_INLINE batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svmax_x(detail::sve_ptrue(), lhs, rhs); } // min - template = 0> + template = 0> XSIMD_INLINE batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svmin_x(detail::sve_ptrue(), lhs, rhs); @@ -366,48 +328,48 @@ namespace xsimd return svreinterpret_u64(svneg_x(detail::sve_ptrue(), svreinterpret_s64(static_cast>(arg)))); } - template = 0> + template = 0> XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { return svneg_x(detail::sve_ptrue(), arg); } // abs - template = 0> + template = 0> XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { return arg; } - template = 0> + template = 0> XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { return svabs_x(detail::sve_ptrue(), arg); } // fma: x * y + z - template = 0> + template = 0> XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return svmad_x(detail::sve_ptrue(), x, y, z); } // fnma: z - x * y - template = 0> + template = 0> XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return svmsb_x(detail::sve_ptrue(), x, y, z); } // fms: x * y - z - template = 0> + template = 0> XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return -fnma(x, y, z, sve {}); } // fnms: - x * y - z - template = 0> + template = 0> XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return -fma(x, y, z, sve {}); @@ -442,7 +404,7 @@ namespace xsimd return svreinterpret_f64(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return svand_z(detail::sve_ptrue(), lhs, rhs); @@ -473,7 +435,7 @@ namespace xsimd return svreinterpret_f64(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return svbic_z(detail::sve_ptrue(), lhs, rhs); @@ -504,7 +466,7 @@ namespace xsimd return svreinterpret_f64(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return svorr_z(detail::sve_ptrue(), lhs, rhs); @@ -535,7 +497,7 @@ namespace xsimd return svreinterpret_f64(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return sveor_z(detail::sve_ptrue(), lhs, rhs); @@ -564,7 +526,7 @@ namespace xsimd return svreinterpret_f64(result_bits); } - template = 0> + template = 0> XSIMD_INLINE batch_bool bitwise_not(batch_bool const& arg, requires_arch) noexcept { return svnot_z(detail::sve_ptrue(), arg); @@ -623,7 +585,7 @@ namespace xsimd } // bitwise_rshift - template = 0> + template = 0> XSIMD_INLINE batch bitwise_rshift(batch const& arg, int n, requires_arch) noexcept { constexpr std::size_t size = sizeof(typename batch::value_type) * 8; @@ -631,13 +593,13 @@ namespace xsimd return svlsr_x(detail::sve_ptrue(), arg, static_cast(n)); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svlsr_x(detail::sve_ptrue(), lhs, rhs); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_rshift(batch const& arg, int n, requires_arch) noexcept { constexpr std::size_t size = sizeof(typename batch::value_type) * 8; @@ -645,7 +607,7 @@ namespace xsimd return svasr_x(detail::sve_ptrue(), arg, static_cast>(n)); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svasr_x(detail::sve_ptrue(), lhs, detail::sve_to_unsigned_batch(rhs)); @@ -656,7 +618,7 @@ namespace xsimd **************/ // reduce_add - template ::value_type, detail::sve_enable_all_t = 0> + template ::value_type, detail::enable_arithmetic_t = 0> XSIMD_INLINE V reduce_add(batch const& arg, requires_arch) noexcept { // sve integer reduction results are promoted to 64 bits @@ -664,21 +626,21 @@ namespace xsimd } // reduce_max - template = 0> + template = 0> XSIMD_INLINE T reduce_max(batch const& arg, requires_arch) noexcept { return svmaxv(detail::sve_ptrue(), arg); } // reduce_min - template = 0> + template = 0> XSIMD_INLINE T reduce_min(batch const& arg, requires_arch) noexcept { return svminv(detail::sve_ptrue(), arg); } // haddp - template = 0> + template = 0> XSIMD_INLINE batch haddp(const batch* row, requires_arch) noexcept { constexpr std::size_t size = batch::size; @@ -695,13 +657,13 @@ namespace xsimd ***************/ // eq - template = 0> + template = 0> XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmpeq(detail::sve_ptrue(), lhs, rhs); } - template = 0> + template = 0> XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { const auto neq_result = sveor_z(detail::sve_ptrue(), lhs, rhs); @@ -709,41 +671,41 @@ namespace xsimd } // neq - template = 0> + template = 0> XSIMD_INLINE batch_bool neq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmpne(detail::sve_ptrue(), lhs, rhs); } - template = 0> + template = 0> XSIMD_INLINE batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return sveor_z(detail::sve_ptrue(), lhs, rhs); } // lt - template = 0> + template = 0> XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmplt(detail::sve_ptrue(), lhs, rhs); } // le - template = 0> + template = 0> XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmple(detail::sve_ptrue(), lhs, rhs); } // gt - template = 0> + template = 0> XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmpgt(detail::sve_ptrue(), lhs, rhs); } // ge - template = 0> + template = 0> XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmpge(detail::sve_ptrue(), lhs, rhs); @@ -754,7 +716,7 @@ namespace xsimd ***************/ // rotate_left - template = 0> + template = 0> XSIMD_INLINE batch rotate_left(batch const& a, requires_arch) noexcept { return svext(a, a, N); @@ -835,7 +797,7 @@ namespace xsimd } } - template = 0> + template = 0> XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, requires_arch) noexcept { constexpr std::size_t size = batch::size; @@ -844,7 +806,7 @@ namespace xsimd } // select - template = 0> + template = 0> XSIMD_INLINE batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept { return svsel(cond, static_cast>(a), static_cast>(b)); @@ -857,14 +819,14 @@ namespace xsimd } // zip_lo - template = 0> + template = 0> XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svzip1(lhs, rhs); } // zip_hi - template = 0> + template = 0> XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svzip2(lhs, rhs); @@ -875,21 +837,21 @@ namespace xsimd *****************************/ // rsqrt - template = 0> + template = 0> XSIMD_INLINE batch rsqrt(batch const& arg, requires_arch) noexcept { return svrsqrte(arg); } // sqrt - template = 0> + template = 0> XSIMD_INLINE batch sqrt(batch const& arg, requires_arch) noexcept { return svsqrt_x(detail::sve_ptrue(), arg); } // reciprocal - template = 0> + template = 0> XSIMD_INLINE batch reciprocal(const batch& arg, requires_arch) noexcept { return svrecpe(arg); @@ -980,7 +942,7 @@ namespace xsimd XSIMD_INLINE V sve_iota() noexcept { return sve_iota_impl(index {}); } } // namespace detail - template = 0> + template = 0> XSIMD_INLINE batch insert(batch const& arg, T val, index, requires_arch) noexcept { // create a predicate with only the I-th lane activated @@ -990,96 +952,96 @@ namespace xsimd } // first - template = 0> + template = 0> XSIMD_INLINE T first(batch const& self, requires_arch) noexcept { return self.data[0]; } // all - template = 0> + template = 0> XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return detail::sve_pcount(arg) == batch_bool::size; } // any - template = 0> + template = 0> XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return svptest_any(arg, arg); } // bitwise_cast - template = 0, detail::enable_sized_unsigned_t = 0> + template = 0, detail::enable_sized_unsigned_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_u8(static_cast>(arg)); } - template = 0, detail::enable_sized_signed_t = 0> + template = 0, detail::enable_sized_signed_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_s8(static_cast>(arg)); } - template = 0, detail::enable_sized_unsigned_t = 0> + template = 0, detail::enable_sized_unsigned_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_u16(static_cast>(arg)); } - template = 0, detail::enable_sized_signed_t = 0> + template = 0, detail::enable_sized_signed_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_s16(static_cast>(arg)); } - template = 0, detail::enable_sized_unsigned_t = 0> + template = 0, detail::enable_sized_unsigned_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_u32(static_cast>(arg)); } - template = 0, detail::enable_sized_signed_t = 0> + template = 0, detail::enable_sized_signed_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_s32(static_cast>(arg)); } - template = 0, detail::enable_sized_unsigned_t = 0> + template = 0, detail::enable_sized_unsigned_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_u64(static_cast>(arg)); } - template = 0, detail::enable_sized_signed_t = 0> + template = 0, detail::enable_sized_signed_t = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_s64(static_cast>(arg)); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_f32(static_cast>(arg)); } - template = 0> + template = 0> XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_f64(static_cast>(arg)); } // batch_bool_cast - template = 0> + template = 0> XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& arg, batch_bool const&, requires_arch) noexcept { return arg.data; } // from_bool - template = 0> + template = 0> XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return select(arg, batch(1), batch(0)); @@ -1113,7 +1075,7 @@ namespace xsimd }; } // namespace detail - template = 0> + template = 0> XSIMD_INLINE batch slide_left(batch const& arg, requires_arch) noexcept { return detail::sve_slider_left()(arg); @@ -1147,21 +1109,21 @@ namespace xsimd }; } // namespace detail - template = 0> + template = 0> XSIMD_INLINE batch slide_right(batch const& arg, requires_arch) noexcept { return detail::sve_slider_right()(arg); } // isnan - template = 0> + template = 0> XSIMD_INLINE batch_bool isnan(batch const& arg, requires_arch) noexcept { return !(arg == arg); } // nearbyint - template = 0> + template = 0> XSIMD_INLINE batch nearbyint(batch const& arg, requires_arch) noexcept { return svrintx_x(detail::sve_ptrue(), arg); @@ -1183,7 +1145,7 @@ namespace xsimd } // ldexp - template = 0> + template = 0> XSIMD_INLINE batch ldexp(const batch& x, const batch, A>& exp, requires_arch) noexcept { return svscale_x(detail::sve_ptrue(), x, exp); diff --git a/include/xsimd/types/xsimd_rvv_register.hpp b/include/xsimd/types/xsimd_rvv_register.hpp index 83f8bfcfd..cd9e84ba0 100644 --- a/include/xsimd/types/xsimd_rvv_register.hpp +++ b/include/xsimd/types/xsimd_rvv_register.hpp @@ -13,8 +13,9 @@ #ifndef XSIMD_RVV_REGISTER_HPP #define XSIMD_RVV_REGISTER_HPP -#include "xsimd_common_arch.hpp" -#include "xsimd_register.hpp" +#include "../utils/xsimd_type_traits.hpp" +#include "./xsimd_common_arch.hpp" +#include "./xsimd_register.hpp" #if XSIMD_WITH_RVV #include @@ -287,15 +288,6 @@ namespace xsimd { }; - // It's difficult dealing with both char and whichever *int8_t type - // is compatible with char, so just avoid it altogether. - // - using rvv_char_t = std::conditional_t::value, int8_t, uint8_t>; - template - using rvv_fix_char_t = std::conditional_t< - std::is_same>::value, - rvv_char_t, T>; - // An explicit constructor isn't really explicit enough to allow // implicit bit-casting operations between incompatible types, so // we add this vacuous flag argument when we're serious: @@ -334,7 +326,7 @@ namespace xsimd operator register_type() const noexcept { return value.get(); } }; template - using rvv_reg_t = std::conditional_t::value, rvv_reg, Width>, void>; + using rvv_reg_t = std::conditional_t::value, rvv_reg, Width>, void>; // And some more of the same stuff for bool types, which have // similar problems and similar workarounds. @@ -396,7 +388,7 @@ namespace xsimd template using rvv_bool_t = std::enable_if_t < !std::is_void::value, - rvv_bool, Width>; + rvv_bool, Width>; template struct rvv_vector_type_impl; diff --git a/include/xsimd/types/xsimd_utils.hpp b/include/xsimd/types/xsimd_utils.hpp index 3284e97c0..6af62c1a0 100644 --- a/include/xsimd/types/xsimd_utils.hpp +++ b/include/xsimd/types/xsimd_utils.hpp @@ -206,9 +206,25 @@ namespace xsimd * enabling / disabling metafunctions * **************************************/ + template + using enable_arithmetic_t = std::enable_if_t::value, int>; + + /// Enable signed integral or floating point + template + using enable_signed_numeral_t = std::enable_if_t::value, int>; + + template + using enable_floating_point_t = std::enable_if_t::value, int>; + template using enable_integral_t = std::enable_if_t::value, int>; + template + using enable_signed_integral_t = std::enable_if_t::value && std::is_signed::value, int>; + + template + using enable_unsigned_integral_t = std::enable_if_t::value && std::is_unsigned::value, int>; + template using enable_sized_signed_t = std::enable_if_t::value && std::is_signed::value && sizeof(T) == S, int>; diff --git a/include/xsimd/utils/xsimd_type_traits.hpp b/include/xsimd/utils/xsimd_type_traits.hpp index a3f6842f5..20fc90ed8 100644 --- a/include/xsimd/utils/xsimd_type_traits.hpp +++ b/include/xsimd/utils/xsimd_type_traits.hpp @@ -57,7 +57,7 @@ namespace xsimd } /** - * @ingroup batch_traits + * @ingroup type_traits * * Signed integer type with exactly @c S bytes (1, 2, 4, or 8). * @@ -67,7 +67,7 @@ namespace xsimd using sized_int_t = typename detail::sized_num_types::signed_type; /** - * @ingroup batch_traits + * @ingroup type_traits * * Unsigned integer type with exactly @c S bytes (1, 2, 4, or 8). * @@ -77,7 +77,7 @@ namespace xsimd using sized_uint_t = typename detail::sized_num_types::unsigned_type; /** - * @ingroup batch_traits + * @ingroup type_traits * * Floating-point type with exactly @c S bytes (4 for @c float, 8 for @c double). * Yields @c void for sizes without a standard floating-point type (1, 2). @@ -89,30 +89,48 @@ namespace xsimd namespace detail { - template - struct widen; + template + struct remap_num + { + using type = T; + }; - template - struct widen::value>> + template + struct remap_num::value>> { - using type = xsimd::sized_fp_t; + using type = xsimd::sized_fp_t; }; - template - struct widen::value && std::is_signed::value>> + template + struct remap_num::value && std::is_signed::value>> { - using type = xsimd::sized_int_t; + using type = xsimd::sized_int_t; }; - template - struct widen::value && std::is_unsigned::value>> + template + struct remap_num::value && std::is_unsigned::value>> { - using type = xsimd::sized_uint_t; + using type = xsimd::sized_uint_t; }; } /** - * @ingroup batch_traits + * @ingroup type_traits + * + * Remap numeral types to their fixed sized variant (``[u]int{8,16,32}_t`` + * and pass through other types). + * Certain platforms have different types (*i.e.* not aliases) between + * ``char`` and ``int8_t``, or ``long long`` and ``int{32,64}_t``, with SIMD + * intrinsicts only defined for some of them. + * Handling them requires to cast to a known predictable type. + * + * @tparam T arithmetic type to project from. + */ + template + using project_num_t = typename detail::remap_num::type; + + /** + * @ingroup type_traits * * The next-wider arithmetic type for @c T: doubles the size while preserving * signedness for integers and yielding @c double for @c float. @@ -121,7 +139,7 @@ namespace xsimd * @tparam T arithmetic type to widen. */ template - using widen_t = typename detail::widen::type; + using widen_t = typename detail::remap_num::type; } #endif