From 8696ba57f8474b62150966f56484b586f6c83e5f Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 13 Jun 2025 10:12:20 +0200 Subject: [PATCH 1/7] Rename swizzle -> shuffle in test names This reflect actual test behavior --- test/test_shuffle.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/test_shuffle.cpp b/test/test_shuffle.cpp index 1c7c9884b..bc89aafd5 100644 --- a/test/test_shuffle.cpp +++ b/test/test_shuffle.cpp @@ -561,13 +561,13 @@ struct shuffle_test CHECK_BATCH_EQ(b_res, b_ref); } - void swizzle() + void shuffle() { B b_lhs = B::load_unaligned(lhs.data()); B b_rhs = B::load_unaligned(rhs.data()); { - struct swizzle_lo_generator + struct shuffle_lo_generator { static constexpr size_t get(size_t index, size_t size) { @@ -580,13 +580,13 @@ struct shuffle_test ref[i] = lhs[size - i - 1]; B b_ref = B::load_unaligned(ref.data()); - INFO("swizzle first batch"); - B b_res = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant()); + INFO("shuffle first batch"); + B b_res = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant()); CHECK_BATCH_EQ(b_res, b_ref); } { - struct swizzle_hi_generator + struct shuffle_hi_generator { static constexpr size_t get(size_t index, size_t size) { @@ -599,8 +599,8 @@ struct shuffle_test ref[i] = rhs[size - i - 1]; B b_ref = B::load_unaligned(ref.data()); - INFO("swizzle second batch"); - B b_res = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant()); + INFO("shuffle second batch"); + B b_res = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant()); CHECK_BATCH_EQ(b_res, b_ref); } } @@ -709,9 +709,9 @@ TEST_CASE_TEMPLATE("[shuffle]", B, BATCH_FLOAT_TYPES, xsimd::batch, xs { Test.select(); } - SUBCASE("swizzle") + SUBCASE("shuffle") { - Test.swizzle(); + Test.shuffle(); } SUBCASE("transpose") { @@ -733,12 +733,12 @@ TEST_CASE_TEMPLATE("[small integer transpose]", B, xsimd::batch, xsimd } #if (XSIMD_WITH_SSE2 && !XSIMD_WITH_AVX) -TEST_CASE_TEMPLATE("[small integer swizzle]", B, xsimd::batch, xsimd::batch) +TEST_CASE_TEMPLATE("[small integer shuffle]", B, xsimd::batch, xsimd::batch) { shuffle_test Test; - SUBCASE("swizzle") + SUBCASE("shuffle") { - Test.swizzle(); + Test.shuffle(); } } #endif From fc2659da8e78db387e0f30925449306d2c1bde86 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 13 Jun 2025 10:13:00 +0200 Subject: [PATCH 2/7] Provide a generic implementation of swizzle with constant mask As a side effect, improve test coverage for swizzle to all integer types. --- include/xsimd/arch/common/xsimd_common_details.hpp | 3 +++ include/xsimd/arch/common/xsimd_common_memory.hpp | 13 +++++++++++-- test/test_utils.hpp | 8 +------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/xsimd/arch/common/xsimd_common_details.hpp b/include/xsimd/arch/common/xsimd_common_details.hpp index 0a8abc419..03088eec4 100644 --- a/include/xsimd/arch/common/xsimd_common_details.hpp +++ b/include/xsimd/arch/common/xsimd_common_details.hpp @@ -90,6 +90,9 @@ namespace xsimd XSIMD_INLINE std::pair, batch> sincos(batch const& self) noexcept; template XSIMD_INLINE batch sqrt(batch const& self) noexcept; + template + XSIMD_INLINE typename std::enable_if::value, batch>::type + swizzle(batch const& x, batch_constant mask) noexcept; template XSIMD_INLINE batch tan(batch const& self) noexcept; template diff --git a/include/xsimd/arch/common/xsimd_common_memory.hpp b/include/xsimd/arch/common/xsimd_common_memory.hpp index 812ad0bca..773177233 100644 --- a/include/xsimd/arch/common/xsimd_common_memory.hpp +++ b/include/xsimd/arch/common/xsimd_common_memory.hpp @@ -341,7 +341,7 @@ namespace xsimd } }; - return swizzle(self, make_batch_constant, rotate_generator, A>(), A {}); + return swizzle(self, make_batch_constant, rotate_generator, A>()); } template @@ -362,7 +362,7 @@ namespace xsimd } }; - return swizzle(self, make_batch_constant, rotate_generator, A>(), A {}); + return swizzle(self, make_batch_constant, rotate_generator, A>()); } template @@ -611,6 +611,15 @@ namespace xsimd return batch::load_aligned(out_buffer); } + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + { + constexpr size_t size = batch::size; + alignas(A::alignment()) T self_buffer[size]; + store_aligned(&self_buffer[0], self); + return { self_buffer[Is]... }; + } + template XSIMD_INLINE batch, A> swizzle(batch, A> const& self, batch mask, requires_arch) noexcept { diff --git a/test/test_utils.hpp b/test/test_utils.hpp index 4b56dc01b..80914f331 100644 --- a/test/test_utils.hpp +++ b/test/test_utils.hpp @@ -591,13 +591,7 @@ namespace xsimd #define BATCH_TYPES BATCH_INT_TYPES, BATCH_FLOAT_TYPES #define BATCH_MATH_TYPES xsimd::batch, BATCH_FLOAT_TYPES -#if !XSIMD_WITH_AVX || XSIMD_WITH_AVX2 -#define BATCH_SWIZZLE_TAIL , xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch -#else -#define BATCH_SWIZZLE_TAIL -#endif - -#define BATCH_SWIZZLE_TYPES BATCH_FLOAT_TYPES, BATCH_COMPLEX_TYPES BATCH_SWIZZLE_TAIL +#define BATCH_SWIZZLE_TYPES BATCH_FLOAT_TYPES, BATCH_COMPLEX_TYPES, BATCH_INT_TYPES /******************** * conversion utils * From a96932417e34c8eae48b47035d8d9a38bb43fbf8 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 13 Jun 2025 12:01:20 +0200 Subject: [PATCH 3/7] Fix rotate_left implementation on ssse3 and avx2 for [u]int16 And also provide an optimized version for [u]int8 --- include/xsimd/arch/xsimd_avx2.hpp | 12 +++++++++++- include/xsimd/arch/xsimd_ssse3.hpp | 13 ++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index 3ab8517a9..e9c3604d8 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -657,11 +657,21 @@ namespace xsimd // rotate_left template - XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept { return _mm256_alignr_epi8(self, self, N); } template + XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept + { + return bitwise_cast(rotate_left(bitwise_cast(self), avx2 {})); + } + template + XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept + { + return _mm256_alignr_epi8(self, self, 2 * N); + } + template XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept { return bitwise_cast(rotate_left(bitwise_cast(self), avx2 {})); diff --git a/include/xsimd/arch/xsimd_ssse3.hpp b/include/xsimd/arch/xsimd_ssse3.hpp index b38e398b4..b612d438e 100644 --- a/include/xsimd/arch/xsimd_ssse3.hpp +++ b/include/xsimd/arch/xsimd_ssse3.hpp @@ -107,11 +107,22 @@ namespace xsimd // rotate_left template - XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept { return _mm_alignr_epi8(self, self, N); } template + XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept + { + return bitwise_cast(rotate_left(bitwise_cast(self), ssse3 {})); + } + + template + XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept + { + return _mm_alignr_epi8(self, self, 2 * N); + } + template XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept { return bitwise_cast(rotate_left(bitwise_cast(self), ssse3 {})); From 160fb58ee6d72999c14b62a58d543b7a5248ae4b Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 13 Jun 2025 15:25:55 +0200 Subject: [PATCH 4/7] Fix implementation of rotate_left on avx2 and improve test coverage for rotate_left --- include/xsimd/arch/xsimd_avx2.hpp | 21 +++++++++++++++++++-- test/test_batch_manip.cpp | 19 ++++++++++++++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index e9c3604d8..0a17ce512 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -659,7 +659,15 @@ namespace xsimd template XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept { - return _mm256_alignr_epi8(self, self, N); + auto other = _mm256_permute2x128_si256(self, self, 0x1); + if (N < 16) + { + return _mm256_alignr_epi8(other, self, N); + } + else + { + return _mm256_alignr_epi8(self, other, N - 16); + } } template XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept @@ -669,7 +677,15 @@ namespace xsimd template XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept { - return _mm256_alignr_epi8(self, self, 2 * N); + auto other = _mm256_permute2x128_si256(self, self, 0x1); + if (N < 8) + { + return _mm256_alignr_epi8(other, self, 2 * N); + } + else + { + return _mm256_alignr_epi8(self, other, 2 * (N - 8)); + } } template XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept @@ -886,6 +902,7 @@ namespace xsimd } // swizzle (dynamic mask) + template XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { diff --git a/test/test_batch_manip.cpp b/test/test_batch_manip.cpp index 000491cd8..34c9c1032 100644 --- a/test/test_batch_manip.cpp +++ b/test/test_batch_manip.cpp @@ -20,7 +20,7 @@ namespace xsimd struct init_swizzle_base { using swizzle_vector_type = std::array; - swizzle_vector_type lhs_in, exped_reverse, exped_fill, exped_dup, exped_ror, exped_rol; + swizzle_vector_type lhs_in, exped_reverse, exped_fill, exped_dup, exped_ror, exped_rol, exped_rol2; template std::vector create_swizzle_vectors() @@ -42,12 +42,14 @@ namespace xsimd exped_dup[i] = lhs_in[2 * (i / 2)]; exped_ror[i] = lhs_in[(i - 1) % N]; exped_rol[i] = lhs_in[(i + 1) % N]; + exped_rol2[i] = lhs_in[(i + N - 1) % N]; } vects.push_back(std::move(exped_reverse)); vects.push_back(std::move(exped_fill)); vects.push_back(std::move(exped_dup)); vects.push_back(std::move(exped_ror)); vects.push_back(std::move(exped_rol)); + vects.push_back(std::move(exped_rol2)); return vects; } @@ -176,6 +178,20 @@ struct swizzle_test CHECK_BATCH_EQ(b_res, b_exped); } + void rotate_left_inv() + { + xsimd::init_swizzle_base swizzle_base; + auto swizzle_vecs = swizzle_base.create_swizzle_vectors(); + auto v_lhs = swizzle_vecs[0]; + auto v_exped = swizzle_vecs[6]; + + B b_lhs = B::load_unaligned(v_lhs.data()); + B b_exped = B::load_unaligned(v_exped.data()); + + B b_res = xsimd::rotate_left(b_lhs); + CHECK_BATCH_EQ(b_res, b_exped); + } + void swizzle_reverse() { xsimd::init_swizzle_base swizzle_base; @@ -248,6 +264,7 @@ TEST_CASE_TEMPLATE("[swizzle]", B, BATCH_SWIZZLE_TYPES) SUBCASE("rotate") { Test.rotate_left(); + Test.rotate_left_inv(); Test.rotate_right(); } From 5d989bc7d7ba569de1392c4239ee2c63c6a7173d Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 13 Jun 2025 15:41:25 +0200 Subject: [PATCH 5/7] Disable faulty implementation fo rotate_left on avx512 --- include/xsimd/arch/xsimd_avx512bw.hpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/include/xsimd/arch/xsimd_avx512bw.hpp b/include/xsimd/arch/xsimd_avx512bw.hpp index 832cba6a1..0395853dc 100644 --- a/include/xsimd/arch/xsimd_avx512bw.hpp +++ b/include/xsimd/arch/xsimd_avx512bw.hpp @@ -429,18 +429,6 @@ namespace xsimd return detail::compare_int_avx512bw(self, other); } - // rotate_left - template - XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept - { - return _mm512_alignr_epi8(self, self, N); - } - template - XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept - { - return bitwise_cast(rotate_left(bitwise_cast(self), avx512bw {})); - } - // sadd template ::value, void>::type> XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept From 5e1a34d72431381fb9b0648975b239d31abe4354 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 13 Jun 2025 16:08:58 +0200 Subject: [PATCH 6/7] Avoid warning in rotate_left test --- include/xsimd/arch/xsimd_neon.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/xsimd/arch/xsimd_neon.hpp b/include/xsimd/arch/xsimd_neon.hpp index ac3cdacd0..17642268d 100644 --- a/include/xsimd/arch/xsimd_neon.hpp +++ b/include/xsimd/arch/xsimd_neon.hpp @@ -2771,10 +2771,11 @@ namespace xsimd XSIMD_INLINE batch rotate_left(batch const& a, requires_arch) noexcept { using register_type = typename batch::register_type; + // Adding modulo to avoid warning. const detail::neon_dispatcher::binary dispatcher = { - std::make_tuple(wrap::rotate_left_u8, wrap::rotate_left_s8, wrap::rotate_left_u16, wrap::rotate_left_s16, - wrap::rotate_left_u32, wrap::rotate_left_s32, wrap::rotate_left_u64, wrap::rotate_left_s64, - wrap::rotate_left_f32) + std::make_tuple(wrap::rotate_left_u8, wrap::rotate_left_s8, wrap::rotate_left_u16, wrap::rotate_left_s16, + wrap::rotate_left_u32, wrap::rotate_left_s32, wrap::rotate_left_u64, wrap::rotate_left_s64, + wrap::rotate_left_f32) }; return dispatcher.apply(register_type(a), register_type(a)); } From db26b8c44b328093aba0bf1d0bf4b50526434342 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 13 Jun 2025 18:31:03 +0200 Subject: [PATCH 7/7] fix avx512 --- include/xsimd/arch/xsimd_avx512f.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp index dc46cd867..26947dffc 100644 --- a/include/xsimd/arch/xsimd_avx512f.hpp +++ b/include/xsimd/arch/xsimd_avx512f.hpp @@ -32,6 +32,8 @@ namespace xsimd XSIMD_INLINE batch incr_if(batch const& self, Mask const& mask, requires_arch) noexcept; template XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept; + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept; template XSIMD_INLINE void transpose(batch* matrix_begin, batch* matrix_end, requires_arch) noexcept; template