From 8705787a801c980515f53a27bba6ca66356e4721 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 27 Oct 2025 22:54:30 -0500 Subject: [PATCH 1/2] Fix template argument order for batch_bool loads `batch_bool::load_(un)aligned` dispatches to `kernel::load_(un)aligned`, which requires that the architecture be the first template parameter. --- include/xsimd/arch/xsimd_avx2.hpp | 2 +- include/xsimd/arch/xsimd_neon.hpp | 12 ++++++------ include/xsimd/arch/xsimd_sse4_1.hpp | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index 87e5dafae..a9e30b4ab 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -573,7 +573,7 @@ namespace xsimd // load_unaligned - template ::value>::type> + template ::value>::type> XSIMD_INLINE batch_bool load_unaligned(bool const* mem, batch_bool, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) diff --git a/include/xsimd/arch/xsimd_neon.hpp b/include/xsimd/arch/xsimd_neon.hpp index ad2a25246..5eae38efc 100644 --- a/include/xsimd/arch/xsimd_neon.hpp +++ b/include/xsimd/arch/xsimd_neon.hpp @@ -574,39 +574,39 @@ namespace xsimd } /* batch bool version */ - template = 0> + template = 0> XSIMD_INLINE batch_bool load_unaligned(bool const* mem, batch_bool, requires_arch) noexcept { auto vmem = load_unaligned((unsigned char const*)mem, convert {}, A {}); return { 0 - vmem.data }; } - template = 0> + template = 0> XSIMD_INLINE batch_bool load_aligned(bool const* mem, batch_bool t, requires_arch r) noexcept { return load_unaligned(mem, t, r); } - template = 0> + template = 0> XSIMD_INLINE batch_bool load_unaligned(bool const* mem, batch_bool, requires_arch) noexcept { uint16x8_t vmem = vmovl_u8(vld1_u8((unsigned char const*)mem)); return { 0 - vmem }; } - template = 0> + template = 0> XSIMD_INLINE batch_bool load_aligned(bool const* mem, batch_bool t, requires_arch r) noexcept { return load_unaligned(mem, t, r); } - template = 0> + template = 0> XSIMD_INLINE batch_bool load_unaligned(bool const* mem, batch_bool, requires_arch) noexcept { uint8x8_t tmp = vreinterpret_u8_u32(vset_lane_u32(*(unsigned int*)mem, vdup_n_u32(0), 0)); return { 0 - vmovl_u16(vget_low_u16(vmovl_u8(tmp))) }; } - template = 0> + template = 0> XSIMD_INLINE batch_bool load_aligned(bool const* mem, batch_bool t, requires_arch r) noexcept { return load_unaligned(mem, t, r); diff --git a/include/xsimd/arch/xsimd_sse4_1.hpp b/include/xsimd/arch/xsimd_sse4_1.hpp index 130d50476..1a64fc878 100644 --- a/include/xsimd/arch/xsimd_sse4_1.hpp +++ b/include/xsimd/arch/xsimd_sse4_1.hpp @@ -125,7 +125,7 @@ namespace xsimd // load_unaligned - template ::value && sizeof(T) > 1)>::type> + template ::value && sizeof(T) > 1)>::type> XSIMD_INLINE batch_bool load_unaligned(bool const* mem, batch_bool, requires_arch) noexcept { // GCC <12 have missing or buggy unaligned load intrinsics; use memcpy to work around this. From 84a07b4131a7c47f557d408ec76051ed90debdff Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 2 Nov 2025 23:30:53 -0600 Subject: [PATCH 2/2] remove unneeded overload --- include/xsimd/arch/xsimd_neon.hpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/xsimd/arch/xsimd_neon.hpp b/include/xsimd/arch/xsimd_neon.hpp index 5eae38efc..27c532d9b 100644 --- a/include/xsimd/arch/xsimd_neon.hpp +++ b/include/xsimd/arch/xsimd_neon.hpp @@ -612,13 +612,6 @@ namespace xsimd return load_unaligned(mem, t, r); } - template - XSIMD_INLINE batch_bool load_aligned(bool const* mem, batch_bool t, requires_arch r) noexcept - { - uint8x8_t tmp = vreinterpret_u8_u32(vset_lane_u32(*(unsigned int*)mem, vdup_n_u32(0), 0)); - return { 0 - vmovl_u16(vget_low_u16(vmovl_u8(tmp))) }; - } - /********* * store * *********/