diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp index 49af1b179..cc15a70cc 100644 --- a/include/xsimd/config/xsimd_config.hpp +++ b/include/xsimd/config/xsimd_config.hpp @@ -357,12 +357,23 @@ /** * @ingroup xsimd_config_macro * - * Set to 1 if NEON64 is available at compile-time, to 0 otherwise. + * Set to 1 if the target is in the ARM architecture family in 64 bits, to 0 otherwise */ #if defined(__aarch64__) || defined(_M_ARM64) -#define XSIMD_WITH_NEON64 1 +#define XSIMD_TARGET_ARM64 1 #else -#define XSIMD_WITH_NEON64 0 +#define XSIMD_TARGET_ARM64 0 +#endif + +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if the target is in the ARM architecture family, to 0 otherwise + */ +#if defined(__arm__) || defined(_M_ARM) || XSIMD_TARGET_ARM64 +#define XSIMD_TARGET_ARM 1 +#else +#define XSIMD_TARGET_ARM 0 #endif /** @@ -370,12 +381,26 @@ * * Set to 1 if NEON is available at compile-time, to 0 otherwise. */ -#if (defined(__ARM_NEON) && __ARM_ARCH >= 7) || XSIMD_WITH_NEON64 +#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && defined(__ARM_NEON)) || XSIMD_TARGET_ARM64 #define XSIMD_WITH_NEON 1 #else #define XSIMD_WITH_NEON 0 #endif +// Neon is always available on Arm64, though it is theoritially possible to compile +// without it, such as -march=armv8-a+nosimd. +// Note that MSVC may never define __ARM_NEON even when available. +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if NEON64 is available at compile-time, to 0 otherwise. + */ +#if XSIMD_TARGET_ARM64 +#define XSIMD_WITH_NEON64 1 +#else +#define XSIMD_WITH_NEON64 0 +#endif + /** * @ingroup xsimd_config_macro * @@ -497,4 +522,15 @@ #define XSIMD_NO_SUPPORTED_ARCHITECTURE #endif +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if the target is a linux + */ +#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) +#define XSIMD_WITH_LINUX_GETAUXVAL 1 +#else +#define XSIMD_WITH_LINUX_GETAUXVAL 0 +#endif + #endif diff --git a/include/xsimd/config/xsimd_cpu_features_arm.hpp b/include/xsimd/config/xsimd_cpu_features_arm.hpp new file mode 100644 index 000000000..062e15b3e --- /dev/null +++ b/include/xsimd/config/xsimd_cpu_features_arm.hpp @@ -0,0 +1,110 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ***************************************************************************/ + +#ifndef XSIMD_CPU_FEATURES_ARM_HPP +#define XSIMD_CPU_FEATURES_ARM_HPP + +#include "./xsimd_config.hpp" + +#if XSIMD_WITH_LINUX_GETAUXVAL +#include "./xsimd_getauxval.hpp" +#endif + +namespace xsimd +{ + /** + * An opinionated CPU feature detection utility for ARM. + * + * Combines compile-time knowledge with runtime detection when available. + * On Linux, runtime detection uses getauxval to query the auxiliary vector. + * On other platforms, only compile-time information is used. + * + * This is well defined on all architectures. It will always return false on + * non-ARM architectures. + */ + class arm_cpu_features + { + public: + arm_cpu_features() noexcept = default; + + inline bool neon() const noexcept + { +#if XSIMD_TARGET_ARM && !XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return hwcap().all_bits_set(); +#else + return static_cast(XSIMD_WITH_NEON); +#endif + } + + constexpr bool neon64() const noexcept + { + return static_cast(XSIMD_WITH_NEON64); + } + + inline bool sve() const noexcept + { +#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return hwcap().all_bits_set(); +#else + return false; +#endif + } + + inline bool i8mm() const noexcept + { +#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return hwcap2().all_bits_set(); +#else + return false; +#endif + } + + private: +#if XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL + enum class status + { + hwcap_valid = 0, + hwcap2_valid = 1, + }; + + using status_bitset = utils::uint_bitset; + + mutable status_bitset m_status {}; + + mutable xsimd::linux_hwcap m_hwcap {}; + + inline xsimd::linux_hwcap const& hwcap() const noexcept + { + if (!m_status.bit_is_set()) + { + m_hwcap = xsimd::linux_hwcap::read(); + m_status.set_bit(); + } + return m_hwcap; + } + +#if XSIMD_TARGET_ARM64 + mutable xsimd::linux_hwcap2 m_hwcap2 {}; + + inline xsimd::linux_hwcap2 const& hwcap2() const noexcept + { + if (!m_status.bit_is_set()) + { + m_hwcap2 = xsimd::linux_hwcap2::read(); + m_status.set_bit(); + } + return m_hwcap2; + } +#endif +#endif // XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL + }; +} +#endif diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 6871637e3..2f78f90bd 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -13,17 +13,13 @@ #define XSIMD_CPUID_HPP #include "../types/xsimd_all_registers.hpp" +#include "./xsimd_cpu_features_arm.hpp" #include "./xsimd_cpu_features_x86.hpp" -#include "xsimd_inline.hpp" +#include "./xsimd_inline.hpp" -#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector)) +#if XSIMD_WITH_LINUX_GETAUXVAL && defined(__riscv_vector) #include #include - -#ifndef HWCAP2_I8MM -#define HWCAP2_I8MM (1 << 13) -#endif - #endif namespace xsimd @@ -92,29 +88,24 @@ namespace xsimd vsx = 1; #endif -#if defined(__aarch64__) || defined(_M_ARM64) - neon = 1; - neon64 = 1; -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) - i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM); - sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE); -#endif - -#elif defined(__ARM_NEON) || defined(_M_ARM) +#if XSIMD_WITH_LINUX_GETAUXVAL +#if defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0 -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) - neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON); -#endif - -#elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0 - -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) #ifndef HWCAP_V #define HWCAP_V (1 << ('V' - 'A')) #endif rvv = bool(getauxval(AT_HWCAP) & HWCAP_V); #endif #endif + + // Safe on all platforms, it will be all false if non arm. + const auto arm_cpu = xsimd::arm_cpu_features(); + + neon = arm_cpu.neon(); + neon64 = arm_cpu.neon64(); + i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm(); + sve = arm_cpu.sve(); + // Safe on all platforms, it will be all false if non x86. const auto x86_cpu = xsimd::x86_cpu_features(); diff --git a/include/xsimd/config/xsimd_getauxval.hpp b/include/xsimd/config/xsimd_getauxval.hpp new file mode 100644 index 000000000..ee5cf1cf4 --- /dev/null +++ b/include/xsimd/config/xsimd_getauxval.hpp @@ -0,0 +1,139 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ***************************************************************************/ + +#ifndef XSIMD_GETAUXVAL_HPP +#define XSIMD_GETAUXVAL_HPP + +#include "../utils/bits.hpp" +#include "./xsimd_config.hpp" + +#if XSIMD_WITH_LINUX_GETAUXVAL +#include // getauxval +#endif + +namespace xsimd +{ + namespace detail + { + using linux_getauxval_t = unsigned long; + + inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept; + + /** + * Base class for getauxval querying. + */ + template + class linux_auxval : private utils::uint_bitset + { + using bitset_t = utils::uint_bitset; + using bitset_t::bitset_t; + + public: + using aux = A; + + inline static linux_auxval read() + { + return linux_auxval(linux_getauxval(type)); + } + + /** Create a value which returns false to everything. */ + constexpr linux_auxval() noexcept = default; + + using bitset_t::all_bits_set; + }; + + template + using make_auxiliary_val_t = linux_auxval; + } + + /* + * Hardware Capabilities Register (HWCAP) for Linux. + * + * On Linux systems, the kernel exposes some CPU features through the + * auxiliary vector, which can be queried via `getauxval(AT_HWCAP)`. + * This utility parses such bit values. + * + * @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt + */ + struct linux_hwcap_traits + { +#if XSIMD_WITH_LINUX_GETAUXVAL + static constexpr detail::linux_getauxval_t type = AT_HWCAP; +#else + static constexpr detail::linux_getauxval_t type = 0; +#endif + + enum class aux + { +#if XSIMD_WITH_LINUX_GETAUXVAL +#if XSIMD_TARGET_ARM64 + /** Scalable Vector Extension. */ + sve = 22, +#elif XSIMD_TARGET_ARM + /** Neon vector extension. */ + neon = 12, +#endif +#endif + }; + }; + + using linux_hwcap = detail::make_auxiliary_val_t; + + /* + * Extended Hardware Capabilities Register (HWCAP2) for Linux. + * + * On Linux systems, the kernel exposes some CPU additional features through the + * auxiliary vector, which can be queried via `getauxval(AT_HWCAP2)`. + * + * @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt + */ + struct linux_hwcap2_traits + { +#if XSIMD_WITH_LINUX_GETAUXVAL + static constexpr detail::linux_getauxval_t type = AT_HWCAP2; +#else + static constexpr detail::linux_getauxval_t type = 0; +#endif + + enum class aux + { +#if XSIMD_WITH_LINUX_GETAUXVAL +#if XSIMD_TARGET_ARM64 + /** 8 bits integer matrix multiplication. */ + i8mm = 13, +#endif +#endif + }; + }; + + using linux_hwcap2 = detail::make_auxiliary_val_t; + + /******************** + * Implementation * + ********************/ + + namespace detail + { +#if XSIMD_WITH_LINUX_GETAUXVAL + inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept + { + return getauxval(type); + } +#else + inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept + { + return {}; // All bits set to 0 + } +#endif + } +} + +#endif