diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 5ac3e8769..b3c9c53c8 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -16,6 +16,7 @@ #include #include +#include "../types/xsimd_batch_constant.hpp" #include "../types/xsimd_sse2_register.hpp" namespace xsimd @@ -1952,8 +1953,7 @@ namespace xsimd return _mm_sub_pd(self, other); } - // swizzle - + // swizzle (constant mask) template XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { @@ -2024,7 +2024,7 @@ namespace xsimd return hi_all; } // Only pick elements from the low lane - XSIMD_IF_CONSTEXPR((V0 < 4) && (V1 < 4) && (V2 < 4) && (V3 < 4) && (V4 < 4) && (V5 < 4) && (V6 < 4) && (V7 < 4)) + XSIMD_IF_CONSTEXPR(detail::is_only_from_lo(mask)) { // permute within each sub lane constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3); @@ -2036,7 +2036,7 @@ namespace xsimd return _mm_unpacklo_epi64(lol, loh); } // Only pick elements from the high lane - XSIMD_IF_CONSTEXPR((V0 >= 4) && (V1 >= 4) && (V2 >= 4) && (V3 >= 4) && (V4 >= 4) && (V5 >= 4) && (V6 >= 4) && (V7 >= 4)) + XSIMD_IF_CONSTEXPR(detail::is_only_from_hi(mask)) { // permute within each sub lane constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3); @@ -2063,7 +2063,7 @@ namespace xsimd __m128i hi = _mm_unpackhi_epi64(hil, hih); // mask to choose the right lane - batch_bool_constant blend_mask; + constexpr auto blend_mask = mask < make_batch_constant(); // blend the two permutes return select(blend_mask, batch(lo), batch(hi));