Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
- { compiler: 'clang', version: '18', flags: 'avx512' }
- { compiler: 'clang', version: '18', flags: 'avx_128' }
- { compiler: 'clang', version: '18', flags: 'avx2_128' }
- { compiler: 'clang', version: '18', flags: 'avx512vl_128' }
- { compiler: 'clang', version: '18', flags: 'avx512vl_256' }
steps:
- name: Setup compiler
Expand Down Expand Up @@ -97,6 +98,10 @@ jobs:
if [[ '${{ matrix.sys.flags }}' == 'avx512' ]]; then
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=skylake-avx512"
fi
if [[ '${{ matrix.sys.flags }}' == 'avx512vl_128' ]]; then
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=skylake-avx512"
CXXFLAGS="$CXX_FLAGS -DXSIMD_DEFAULT_ARCH=avx512vl_128"
fi
if [[ '${{ matrix.sys.flags }}' == 'avx512vl_256' ]]; then
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=skylake-avx512"
CXXFLAGS="$CXX_FLAGS -DXSIMD_DEFAULT_ARCH=avx512vl_256"
Expand Down
36 changes: 29 additions & 7 deletions include/xsimd/arch/xsimd_avx512f.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1518,15 +1518,37 @@ namespace xsimd
{
// Adapted from https://github.com/serge-sans-paille/fast-bitset-from-bool-array
// Generate a bitset from an array of boolean.
XSIMD_INLINE unsigned char tobitset(unsigned char unpacked[8])
template<size_t N>
XSIMD_INLINE unsigned char tobitset(unsigned char unpacked[N])
{
uint64_t data;
memcpy(&data, unpacked, sizeof(uint64_t));
static_assert(N==8 || N==4 || N == 2, "valid pack size");
XSIMD_IF_CONSTEXPR(N==8) {
uint64_t data;
memcpy(&data, unpacked, sizeof(uint64_t));

const uint64_t magic = (0x80 + 0x4000 + 0x200000 + 0x10000000 + 0x0800000000 + 0x040000000000 + 0x02000000000000 + 0x0100000000000000);
const uint64_t magic = (0x80 + 0x4000 + 0x200000 + 0x10000000 + 0x0800000000 + 0x040000000000 + 0x02000000000000 + 0x0100000000000000);

unsigned char res = ((data * magic) >> 56) & 0xFF;
return res;
unsigned char res = ((data * magic) >> 56) & 0xFF;
return res;
}
else XSIMD_IF_CONSTEXPR(N==4) {
uint32_t data;
memcpy(&data, unpacked, sizeof(uint32_t));

const uint32_t magic = (0x80 + 0x4000 + 0x200000 + 0x10000000);

unsigned char res = ((data * magic) >> 24) & 0xFF;
return res;
}
else XSIMD_IF_CONSTEXPR(N==2) {
uint16_t data;
memcpy(&data, unpacked, sizeof(uint16_t));

const uint16_t magic = (0x80 + 0x4000);

unsigned char res = ((data * magic) >> 8) & 0xFF;
return res;
}
}
}

Expand All @@ -1541,7 +1563,7 @@ namespace xsimd
register_type mask = 0;
for (std::size_t i = 0; i < iter; ++i)
{
unsigned char block = detail::tobitset((unsigned char*)mem + i * 8);
unsigned char block = detail::tobitset<8>((unsigned char*)mem + i * 8);
mask |= (register_type(block) << (i * 8));
}
return mask;
Expand Down
647 changes: 647 additions & 0 deletions include/xsimd/arch/xsimd_avx512vl_128.hpp

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions include/xsimd/arch/xsimd_avx512vl_256.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,14 @@ namespace xsimd
{
using register_type = typename batch_bool<T, A>::register_type;
constexpr auto size = batch_bool<T, A>::size;
constexpr auto iter = size / 4;
static_assert((size % 4) == 0, "incorrect size of bool batch");
constexpr auto chunk_size = size >= 8 ? 8 : 4;
constexpr auto iter = size / chunk_size;
static_assert((size % chunk_size) == 0, "incorrect size of bool batch");
register_type mask = 0;
for (std::size_t i = 0; i < iter; ++i)
{
unsigned char block = detail::tobitset((unsigned char*)mem + i * 4);
mask |= (register_type(block) << (i * 4));
unsigned char block = detail::tobitset<chunk_size>((unsigned char*)mem + i * chunk_size);
mask |= (register_type(block) << (i * chunk_size));
}
return mask;
}
Expand Down
1 change: 1 addition & 0 deletions include/xsimd/arch/xsimd_isa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@

#if XSIMD_WITH_AVX512VL
#include "./xsimd_avx512vl.hpp"
#include "./xsimd_avx512vl_128.hpp"
#include "./xsimd_avx512vl_256.hpp"
#endif

Expand Down
2 changes: 1 addition & 1 deletion include/xsimd/config/xsimd_arch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ namespace xsimd

using all_x86_architectures = arch_list<
avx512vnni<avx512vbmi2>, avx512vbmi2, avx512vbmi, avx512ifma, avx512pf, avx512vnni<avx512bw>, avx512bw, avx512er, avx512dq, avx512vl, avx512cd, avx512f,
avxvnni, avx512vl_256, fma3<avx2>, avx2, fma3<avx>, avx, avx2_128, avx_128, fma4, fma3<sse4_2>,
avxvnni, avx512vl_256, fma3<avx2>, avx2, fma3<avx>, avx, avx512vl_128, avx2_128, avx_128, fma4, fma3<sse4_2>,
sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;

using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>;
Expand Down
4 changes: 3 additions & 1 deletion include/xsimd/config/xsimd_cpu_features_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,9 @@ namespace xsimd

inline bool avx512vl() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512vl>(); }

inline bool avx512vl_256() const noexcept { return avx512_enabled() && osxsave() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512vl>(); }
inline bool avx512vl_128() const noexcept { return avx512vl() && osxsave(); }

inline bool avx512vl_256() const noexcept { return avx512vl_128(); }

inline bool avx512vbmi() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::avx512vbmi>(); }

Expand Down
19 changes: 19 additions & 0 deletions include/xsimd/types/xsimd_avx512vl_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ namespace xsimd
static constexpr char const* name() noexcept { return "avx512vl"; }
};

/**
* @ingroup architectures
*
* AVX512VL instructions extension for 128 bits registers
*/
struct avx512vl_128 : avx2_128
{
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VL; }
static constexpr bool available() noexcept { return true; }
static constexpr char const* name() noexcept { return "avx512vl/128"; }
};

/**
* @ingroup architectures
*
Expand Down Expand Up @@ -57,6 +69,13 @@ namespace xsimd

XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vl, avx512cd);

template <class T>
struct get_bool_simd_register<T, avx512vl_128>
{
using type = simd_avx512_bool_register<T>;
};
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vl_128, avx2_128);

template <class T>
struct get_bool_simd_register<T, avx512vl_256>
{
Expand Down
Loading