diff --git a/include/xsimd/config/xsimd_cpu_features.hpp b/include/xsimd/config/xsimd_cpu_features.hpp new file mode 100644 index 000000000..8d9e05a8a --- /dev/null +++ b/include/xsimd/config/xsimd_cpu_features.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_CPU_FEATURES_HPP +#define XSIMD_CPU_FEATURES_HPP + +#include "./xsimd_cpu_features_arm.hpp" +#include "./xsimd_cpu_features_ppc.hpp" +#include "./xsimd_cpu_features_riscv.hpp" +#include "./xsimd_cpu_features_x86.hpp" + +namespace xsimd +{ + + /** + * Cross-platform CPU feature detection class. + * + * All member functions are safe to work on with all platforms. + * + * @warning This class is *not* thread safe. + * Its internal lazy querying structure makes even `const` member function prone to data race. + * The structure is also generally not appropriate for directly branching (e.g. on + * ``cpu_features::avx2``) because it include a branch that the compiler cannot optimize. + * The current appropriate way to use this class for dynamic dispatching is to store the + * result of the function calls (e.g. @ref cpu_features) into (static) constants. + * This is done in @ref xsimd::available_architectures. + * + * @see xsimd::dispatch + * @see xsimd::available_architectures + */ + class cpu_features : public ppc_cpu_features, + public riscv_cpu_features, + public arm_cpu_features, + public x86_cpu_features + { + }; + +} + +#endif diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 38b84c79f..5e19e74c5 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -13,10 +13,7 @@ #define XSIMD_CPUID_HPP #include "../types/xsimd_all_registers.hpp" -#include "./xsimd_cpu_features_arm.hpp" -#include "./xsimd_cpu_features_ppc.hpp" -#include "./xsimd_cpu_features_riscv.hpp" -#include "./xsimd_cpu_features_x86.hpp" +#include "./xsimd_cpu_features.hpp" #include "./xsimd_inline.hpp" namespace xsimd @@ -81,58 +78,54 @@ namespace xsimd wasm = 1; #endif - // Safe on all platforms, it will be false if non PowerPC. - const auto ppc_cpu = xsimd::ppc_cpu_features(); + const auto cpu = xsimd::cpu_features(); - vsx = ppc_cpu.vsx(); + vsx = cpu.vsx(); - // Safe on all platforms, it will be all false if non risc-v. - const auto riscv_cpu = xsimd::riscv_cpu_features(); + rvv128 = cpu.rvv() && (cpu.rvv_size_bytes() >= (128 / 8)); + rvv256 = cpu.rvv() && (cpu.rvv_size_bytes() >= (256 / 8)); + rvv512 = cpu.rvv() && (cpu.rvv_size_bytes() >= (512 / 8)); - rvv128 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (128 / 8)); - rvv256 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (256 / 8)); - rvv512 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (512 / 8)); + neon = cpu.neon(); + neon64 = cpu.neon64(); + i8mm_neon64 = cpu.neon64() && cpu.i8mm(); - // Safe on all platforms, it will be all false if non arm. - const auto arm_cpu = xsimd::arm_cpu_features(); + // Running SVE128 on a SVE256 machine is more tricky than the x86 equivalent + // of running SSE code on an AVX machine and requires to explicitly change the + // vector length using `prctl` (per thread setting). + // This is something we have not tested and not integrated in xsimd so the safe + // default is to assume only one valid SVE width at runtime. + sve128 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 128); + sve256 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 256); + sve512 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 512); - neon = arm_cpu.neon(); - neon64 = arm_cpu.neon64(); - i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm(); - sve128 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (128 / 8)); - sve256 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (256 / 8)); - sve512 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (512 / 8)); - - // Safe on all platforms, it will be all false if non x86. - const auto x86_cpu = xsimd::x86_cpu_features(); - - sse2 = x86_cpu.sse2(); - sse3 = x86_cpu.sse3(); - ssse3 = x86_cpu.ssse3(); - sse4_1 = x86_cpu.sse4_1(); - sse4_2 = x86_cpu.sse4_2(); - fma3_sse42 = x86_cpu.fma3(); + sse2 = cpu.sse2(); + sse3 = cpu.sse3(); + ssse3 = cpu.ssse3(); + sse4_1 = cpu.sse4_1(); + sse4_2 = cpu.sse4_2(); + fma3_sse42 = cpu.fma3(); // sse4a not implemented in cpu_id yet // xop not implemented in cpu_id yet - avx = x86_cpu.avx(); + avx = cpu.avx(); fma3_avx = avx && fma3_sse42; - fma4 = x86_cpu.fma4(); - avx2 = x86_cpu.avx2(); - avxvnni = x86_cpu.avxvnni(); + fma4 = cpu.fma4(); + avx2 = cpu.avx2(); + avxvnni = cpu.avxvnni(); fma3_avx2 = avx2 && fma3_sse42; - avx512f = x86_cpu.avx512f(); - avx512cd = x86_cpu.avx512cd(); - avx512dq = x86_cpu.avx512dq(); - avx512bw = x86_cpu.avx512bw(); - avx512er = x86_cpu.avx512er(); - avx512pf = x86_cpu.avx512pf(); - avx512ifma = x86_cpu.avx512ifma(); - avx512vbmi = x86_cpu.avx512vbmi(); - avx512vbmi2 = x86_cpu.avx512vbmi2(); - avx512vnni_bw = x86_cpu.avx512vnni_bw(); + avx512f = cpu.avx512f(); + avx512cd = cpu.avx512cd(); + avx512dq = cpu.avx512dq(); + avx512bw = cpu.avx512bw(); + avx512er = cpu.avx512er(); + avx512pf = cpu.avx512pf(); + avx512ifma = cpu.avx512ifma(); + avx512vbmi = cpu.avx512vbmi(); + avx512vbmi2 = cpu.avx512vbmi2(); + avx512vnni_bw = cpu.avx512vnni_bw(); avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw; } }; diff --git a/test/test_cpu_features.cpp b/test/test_cpu_features.cpp index 0e127e855..1c77ef5c8 100644 --- a/test/test_cpu_features.cpp +++ b/test/test_cpu_features.cpp @@ -52,7 +52,7 @@ namespace detail */ TEST_CASE("[cpu_features] x86 implication chains") { - xsimd::x86_cpu_features cpu; + xsimd::cpu_features cpu; // SSE implication chain CHECK_IMPLICATION(cpu.sse4_2(), cpu.sse4_1()); @@ -118,7 +118,7 @@ TEST_CASE("[cpu_features] x86 manufacturer from environment") TEST_CASE("[cpu_features] x86 features from environment") { - xsimd::x86_cpu_features cpu; + xsimd::cpu_features cpu; CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_SSE2", cpu.sse2()); CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_SSE3", cpu.sse3()); @@ -148,7 +148,7 @@ TEST_CASE("[cpu_features] arm implication chains") TEST_CASE("[cpu_features] arm features from environment") { - xsimd::arm_cpu_features cpu; + xsimd::cpu_features cpu; CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_NEON", cpu.neon()); CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_NEON64", cpu.neon64()); @@ -158,21 +158,21 @@ TEST_CASE("[cpu_features] arm features from environment") TEST_CASE("[cpu_features] risc-v implication chains") { - xsimd::riscv_cpu_features cpu; + xsimd::cpu_features cpu; CHECK_IMPLICATION(cpu.rvv(), cpu.rvv_size_bytes() >= (128 / 8)); } TEST_CASE("[cpu_features] risc-v features from environment") { - xsimd::riscv_cpu_features cpu; + xsimd::cpu_features cpu; CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_RVV", cpu.rvv()); } TEST_CASE("[cpu_features] ppc features from environment") { - xsimd::ppc_cpu_features cpu; + xsimd::cpu_features cpu; CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_VSX", cpu.vsx()); }