From e66da98afd235b888542d0617b16831f18fca336 Mon Sep 17 00:00:00 2001 From: Fikret Ardal Date: Wed, 2 Jul 2025 03:03:34 +0300 Subject: [PATCH 1/4] add first() function to extract the first lane from a batch. Implemented only for x86_64 architecture --- .../xsimd/arch/common/xsimd_common_memory.hpp | 19 +++++++ include/xsimd/arch/xsimd_avx.hpp | 53 ++++++++++++++++++- include/xsimd/arch/xsimd_avx512f.hpp | 51 ++++++++++++++++++ include/xsimd/arch/xsimd_sse2.hpp | 51 ++++++++++++++++++ include/xsimd/types/xsimd_batch.hpp | 31 +++++++++++ test/test_batch.cpp | 11 ++++ test/test_batch_complex.cpp | 8 +++ 7 files changed, 223 insertions(+), 1 deletion(-) diff --git a/include/xsimd/arch/common/xsimd_common_memory.hpp b/include/xsimd/arch/common/xsimd_common_memory.hpp index 773177233..3914a6f63 100644 --- a/include/xsimd/arch/common/xsimd_common_memory.hpp +++ b/include/xsimd/arch/common/xsimd_common_memory.hpp @@ -260,6 +260,25 @@ namespace xsimd return buffer[i]; } + // first + template + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + return get(self, 0, common {}); + } + + template + XSIMD_INLINE T first(batch_bool const& self, requires_arch) noexcept + { + return get(self, 0, common {}); + } + + template + XSIMD_INLINE auto first(batch, A> const& self, requires_arch) noexcept -> typename batch, A>::value_type + { + return get(self, 0, common {}); + } + // load template XSIMD_INLINE batch_bool load_unaligned(bool const* mem, batch_bool, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp index 883f055be..ff0055a06 100644 --- a/include/xsimd/arch/xsimd_avx.hpp +++ b/include/xsimd/arch/xsimd_avx.hpp @@ -20,7 +20,6 @@ namespace xsimd { - namespace kernel { using namespace types; @@ -1861,6 +1860,58 @@ namespace xsimd auto hi = _mm256_unpackhi_pd(self, other); return _mm256_insertf128_pd(lo, _mm256_castpd256_pd128(hi), 1); } + + // first + template + XSIMD_INLINE float first(batch const& self, requires_arch) noexcept + { + return _mm256_cvtss_f32(self); + } + + template + XSIMD_INLINE double first(batch const& self, requires_arch) noexcept + { + return _mm256_cvtsd_f64(self); + } + + template ::value, void>::type> + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return static_cast(_mm256_cvtsi256_si32(self) & 0xFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return static_cast(_mm256_cvtsi256_si32(self) & 0xFFFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return static_cast(_mm256_cvtsi256_si32(self)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + __m128i low = _mm256_castsi256_si128(self); + return static_cast(_mm_cvtsi128_si64(low)); + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } + + template + XSIMD_INLINE std::complex first(batch, A> const& self, requires_arch) noexcept + { + return { first(self.real(), A {}), first(self.imag(), A {}) }; + } + + template + XSIMD_INLINE bool first(batch_bool const& self, requires_arch) noexcept + { + return first(batch(self), A {}); + } } } diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp index 26947dffc..91b587e96 100644 --- a/include/xsimd/arch/xsimd_avx512f.hpp +++ b/include/xsimd/arch/xsimd_avx512f.hpp @@ -2339,6 +2339,57 @@ namespace xsimd 2)); } + // first + template + XSIMD_INLINE float first(batch const& self, requires_arch) noexcept + { + return _mm512_cvtss_f32(self); + } + + template + XSIMD_INLINE double first(batch const& self, requires_arch) noexcept + { + return _mm512_cvtsd_f64(self); + } + + template ::value, void>::type> + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return static_cast(_mm512_cvtsi512_si32(self) & 0xFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return static_cast(_mm512_cvtsi512_si32(self) & 0xFFFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return static_cast(_mm512_cvtsi512_si32(self)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + return static_cast(_mm_cvtsi128_si64(_mm512_castsi512_si128(self))); + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } + + template + XSIMD_INLINE std::complex first(batch, A> const& self, requires_arch) noexcept + { + return { first(self.real(), A {}), first(self.imag(), A {}) }; + } + + template + XSIMD_INLINE bool first(batch_bool const& self, requires_arch) noexcept + { + return first(batch(self), A {}); + } + } } diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 94d7af5d4..56aebc3d3 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -1782,6 +1782,57 @@ namespace xsimd { return _mm_unpacklo_pd(self, other); } + + // first + template + XSIMD_INLINE float first(batch const& self, requires_arch) noexcept + { + return _mm_cvtss_f32(self); + } + + template + XSIMD_INLINE double first(batch const& self, requires_arch) noexcept + { + return _mm_cvtsd_f64(self); + } + + template ::value, void>::type> + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return static_cast(_mm_cvtsi128_si32(self) & 0xFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return static_cast(_mm_cvtsi128_si32(self) & 0xFFFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return static_cast(_mm_cvtsi128_si32(self)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + return static_cast(_mm_cvtsi128_si64(self)); + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } + + template + XSIMD_INLINE std::complex first(batch, A> const& self, requires_arch) noexcept + { + return { first(self.real(), A {}), first(self.imag(), A {}) }; + } + + template + XSIMD_INLINE bool first(batch_bool const& self, requires_arch) noexcept + { + return first(batch(self), A {}); + } } } diff --git a/include/xsimd/types/xsimd_batch.hpp b/include/xsimd/types/xsimd_batch.hpp index b54d84aae..e23a52e8b 100644 --- a/include/xsimd/types/xsimd_batch.hpp +++ b/include/xsimd/types/xsimd_batch.hpp @@ -159,6 +159,8 @@ namespace xsimd XSIMD_INLINE T get(std::size_t i) const noexcept; + XSIMD_INLINE T first() const noexcept; + // comparison operators. Defined as friend to enable automatic // conversion of parameters from scalar to batch, at the cost of using a // proxy implementation from details::. @@ -314,6 +316,8 @@ namespace xsimd XSIMD_INLINE bool get(std::size_t i) const noexcept; + XSIMD_INLINE bool first() const noexcept; + // mask operations XSIMD_INLINE uint64_t mask() const noexcept; XSIMD_INLINE static batch_bool from_mask(uint64_t mask) noexcept; @@ -405,6 +409,8 @@ namespace xsimd XSIMD_INLINE value_type get(std::size_t i) const noexcept; + XSIMD_INLINE value_type first() const noexcept; + #ifdef XSIMD_ENABLE_XTL_COMPLEX // xtl-related methods template @@ -693,6 +699,16 @@ namespace xsimd return kernel::get(*this, i, A {}); } + /** + * Retrieve the first scalar element in this batch. + */ + template + XSIMD_INLINE T batch::first() const noexcept + { + detail::static_check_supported_config(); + return kernel::first(*this, A {}); + } + /****************************** * batch comparison operators * ******************************/ @@ -1005,6 +1021,13 @@ namespace xsimd return kernel::get(*this, i, A {}); } + template + XSIMD_INLINE bool batch_bool::first() const noexcept + { + detail::static_check_supported_config(); + return kernel::first(*this, A {}); + } + /*********************************** * batch_bool comparison operators * ***********************************/ @@ -1077,6 +1100,7 @@ namespace xsimd { } + template template XSIMD_INLINE auto batch_bool::make_register(detail::index_sequence, U u, V... v) noexcept -> register_type @@ -1248,6 +1272,13 @@ namespace xsimd return kernel::get(*this, i, A {}); } + template + XSIMD_INLINE auto batch, A>::first() const noexcept -> value_type + { + detail::static_check_supported_config, A>(); + return kernel::first(*this, A {}); + } + /************************************** * batch xtl-related methods * **************************************/ diff --git a/test/test_batch.cpp b/test/test_batch.cpp index 05c13b4b8..394779072 100644 --- a/test/test_batch.cpp +++ b/test/test_batch.cpp @@ -152,6 +152,12 @@ struct batch_test } } + void test_first_element() const + { + batch_type res = batch_lhs(); + CHECK_EQ(res.first(), lhs[0]); + } + void test_arithmetic() const { // +batch @@ -934,6 +940,11 @@ TEST_CASE_TEMPLATE("[batch]", B, BATCH_TYPES) Test.test_access_operator(); } + SUBCASE("first element") + { + Test.test_first_element(); + } + SUBCASE("arithmetic") { Test.test_arithmetic(); diff --git a/test/test_batch_complex.cpp b/test/test_batch_complex.cpp index e06b31807..47ed9ca5b 100644 --- a/test/test_batch_complex.cpp +++ b/test/test_batch_complex.cpp @@ -176,6 +176,12 @@ struct batch_complex_test } } + void test_first_element() const + { + batch_type res = batch_lhs(); + CHECK_EQ(res.first(), lhs[0]); + } + void test_arithmetic() const { // +batch @@ -675,6 +681,8 @@ TEST_CASE_TEMPLATE("[xsimd complex batches]", B, BATCH_COMPLEX_TYPES) SUBCASE("access_operator") { Test.test_access_operator(); } + SUBCASE("first element") { Test.test_first_element(); } + SUBCASE("arithmetic") { Test.test_arithmetic(); } SUBCASE("computed_assignment") { Test.test_computed_assignment(); } From ca5199dc704664d8f2eca38a420800b5c4bef4b1 Mon Sep 17 00:00:00 2001 From: Fikret Ardal Date: Wed, 2 Jul 2025 15:50:21 +0300 Subject: [PATCH 2/4] remove redundancy by making complex and batch_bool first implementation common --- include/xsimd/arch/common/xsimd_common_memory.hpp | 4 ++-- include/xsimd/arch/xsimd_avx.hpp | 11 ----------- include/xsimd/arch/xsimd_avx512f.hpp | 12 ------------ include/xsimd/arch/xsimd_sse2.hpp | 11 ----------- 4 files changed, 2 insertions(+), 36 deletions(-) diff --git a/include/xsimd/arch/common/xsimd_common_memory.hpp b/include/xsimd/arch/common/xsimd_common_memory.hpp index 3914a6f63..4ad148a6f 100644 --- a/include/xsimd/arch/common/xsimd_common_memory.hpp +++ b/include/xsimd/arch/common/xsimd_common_memory.hpp @@ -270,13 +270,13 @@ namespace xsimd template XSIMD_INLINE T first(batch_bool const& self, requires_arch) noexcept { - return get(self, 0, common {}); + return first(batch(self), A {}); } template XSIMD_INLINE auto first(batch, A> const& self, requires_arch) noexcept -> typename batch, A>::value_type { - return get(self, 0, common {}); + return { first(self.real(), A {}), first(self.imag(), A {}) }; } // load diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp index ff0055a06..82bb631fc 100644 --- a/include/xsimd/arch/xsimd_avx.hpp +++ b/include/xsimd/arch/xsimd_avx.hpp @@ -1901,17 +1901,6 @@ namespace xsimd } } - template - XSIMD_INLINE std::complex first(batch, A> const& self, requires_arch) noexcept - { - return { first(self.real(), A {}), first(self.imag(), A {}) }; - } - - template - XSIMD_INLINE bool first(batch_bool const& self, requires_arch) noexcept - { - return first(batch(self), A {}); - } } } diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp index 91b587e96..7c0793862 100644 --- a/include/xsimd/arch/xsimd_avx512f.hpp +++ b/include/xsimd/arch/xsimd_avx512f.hpp @@ -2378,18 +2378,6 @@ namespace xsimd } } - template - XSIMD_INLINE std::complex first(batch, A> const& self, requires_arch) noexcept - { - return { first(self.real(), A {}), first(self.imag(), A {}) }; - } - - template - XSIMD_INLINE bool first(batch_bool const& self, requires_arch) noexcept - { - return first(batch(self), A {}); - } - } } diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 56aebc3d3..ab19fc4ca 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -1822,17 +1822,6 @@ namespace xsimd } } - template - XSIMD_INLINE std::complex first(batch, A> const& self, requires_arch) noexcept - { - return { first(self.real(), A {}), first(self.imag(), A {}) }; - } - - template - XSIMD_INLINE bool first(batch_bool const& self, requires_arch) noexcept - { - return first(batch(self), A {}); - } } } From cc1c078902f7302073af056805ce80b7b655ca77 Mon Sep 17 00:00:00 2001 From: Fikret Ardal Date: Wed, 2 Jul 2025 21:21:30 +0300 Subject: [PATCH 3/4] fix style issues and fix the implementation of first() for 32-bit architectures in windows --- include/xsimd/arch/xsimd_avx.hpp | 5 ++--- include/xsimd/arch/xsimd_avx512f.hpp | 3 ++- include/xsimd/arch/xsimd_sse2.hpp | 8 ++++++++ include/xsimd/types/xsimd_batch.hpp | 1 - 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp index 82bb631fc..76eed49c2 100644 --- a/include/xsimd/arch/xsimd_avx.hpp +++ b/include/xsimd/arch/xsimd_avx.hpp @@ -1891,8 +1891,8 @@ namespace xsimd } else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) { - __m128i low = _mm256_castsi256_si128(self); - return static_cast(_mm_cvtsi128_si64(low)); + batch low = _mm256_castsi256_si128(self); + return first(low, sse4_2 {}); } else { @@ -1900,7 +1900,6 @@ namespace xsimd return {}; } } - } } diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp index 7c0793862..3a57ee403 100644 --- a/include/xsimd/arch/xsimd_avx512f.hpp +++ b/include/xsimd/arch/xsimd_avx512f.hpp @@ -2369,7 +2369,8 @@ namespace xsimd } else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) { - return static_cast(_mm_cvtsi128_si64(_mm512_castsi512_si128(self))); + batch low = _mm512_castsi256_si128(self); + return first(low, sse4_2 {}); } else { diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index ab19fc4ca..59a39363b 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -1813,7 +1813,15 @@ namespace xsimd } else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) { +#if defined(__x86_64__) return static_cast(_mm_cvtsi128_si64(self)); +#else + __m128i m; + _mm_storel_epi64(&m, self); + int64_t i; + std::memcpy(&i, &m, sizeof(i)); + return i; +#endif } else { diff --git a/include/xsimd/types/xsimd_batch.hpp b/include/xsimd/types/xsimd_batch.hpp index e23a52e8b..0a6a07153 100644 --- a/include/xsimd/types/xsimd_batch.hpp +++ b/include/xsimd/types/xsimd_batch.hpp @@ -1100,7 +1100,6 @@ namespace xsimd { } - template template XSIMD_INLINE auto batch_bool::make_register(detail::index_sequence, U u, V... v) noexcept -> register_type From c36837d32d860e96ef4d40077bfd2eb391bff354 Mon Sep 17 00:00:00 2001 From: Fikret Ardal Date: Wed, 2 Jul 2025 21:28:26 +0300 Subject: [PATCH 4/4] fix function name in avx512f --- include/xsimd/arch/xsimd_avx512f.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp index 3a57ee403..a2fc88616 100644 --- a/include/xsimd/arch/xsimd_avx512f.hpp +++ b/include/xsimd/arch/xsimd_avx512f.hpp @@ -2369,7 +2369,7 @@ namespace xsimd } else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) { - batch low = _mm512_castsi256_si128(self); + batch low = _mm512_castsi512_si128(self); return first(low, sse4_2 {}); } else