From 7168d0924b88eba3e02e57ba7291afcb496ff2ec Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 14:59:18 +0100 Subject: [PATCH 01/10] Add ARM target detection in config --- include/xsimd/config/xsimd_config.hpp | 44 ++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp index 49af1b179..cc15a70cc 100644 --- a/include/xsimd/config/xsimd_config.hpp +++ b/include/xsimd/config/xsimd_config.hpp @@ -357,12 +357,23 @@ /** * @ingroup xsimd_config_macro * - * Set to 1 if NEON64 is available at compile-time, to 0 otherwise. + * Set to 1 if the target is in the ARM architecture family in 64 bits, to 0 otherwise */ #if defined(__aarch64__) || defined(_M_ARM64) -#define XSIMD_WITH_NEON64 1 +#define XSIMD_TARGET_ARM64 1 #else -#define XSIMD_WITH_NEON64 0 +#define XSIMD_TARGET_ARM64 0 +#endif + +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if the target is in the ARM architecture family, to 0 otherwise + */ +#if defined(__arm__) || defined(_M_ARM) || XSIMD_TARGET_ARM64 +#define XSIMD_TARGET_ARM 1 +#else +#define XSIMD_TARGET_ARM 0 #endif /** @@ -370,12 +381,26 @@ * * Set to 1 if NEON is available at compile-time, to 0 otherwise. */ -#if (defined(__ARM_NEON) && __ARM_ARCH >= 7) || XSIMD_WITH_NEON64 +#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && defined(__ARM_NEON)) || XSIMD_TARGET_ARM64 #define XSIMD_WITH_NEON 1 #else #define XSIMD_WITH_NEON 0 #endif +// Neon is always available on Arm64, though it is theoritially possible to compile +// without it, such as -march=armv8-a+nosimd. +// Note that MSVC may never define __ARM_NEON even when available. +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if NEON64 is available at compile-time, to 0 otherwise. + */ +#if XSIMD_TARGET_ARM64 +#define XSIMD_WITH_NEON64 1 +#else +#define XSIMD_WITH_NEON64 0 +#endif + /** * @ingroup xsimd_config_macro * @@ -497,4 +522,15 @@ #define XSIMD_NO_SUPPORTED_ARCHITECTURE #endif +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if the target is a linux + */ +#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) +#define XSIMD_WITH_LINUX_GETAUXVAL 1 +#else +#define XSIMD_WITH_LINUX_GETAUXVAL 0 +#endif + #endif From 02d9d3576afce905cf97d7d7bf0d1b6bcc810b56 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 15:30:29 +0100 Subject: [PATCH 02/10] Add getauxval safe implementation --- include/xsimd/config/xsimd_getauxval.hpp | 141 +++++++++++++++++++++++ include/xsimd/utils/bits.hpp | 2 + 2 files changed, 143 insertions(+) create mode 100644 include/xsimd/config/xsimd_getauxval.hpp diff --git a/include/xsimd/config/xsimd_getauxval.hpp b/include/xsimd/config/xsimd_getauxval.hpp new file mode 100644 index 000000000..8ab923b8b --- /dev/null +++ b/include/xsimd/config/xsimd_getauxval.hpp @@ -0,0 +1,141 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ***************************************************************************/ + +#ifndef XSIMD_GETAUXVAL_HPP +#define XSIMD_GETAUXVAL_HPP + +#include "../utils/bits.hpp" +#include "./xsimd_config.hpp" + +#if XSIMD_WITH_LINUX_GETAUXVAL +#include +#endif + +namespace xsimd +{ + namespace detail + { + using linux_getauxval_t = unsigned long; + + inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept; + + /** + * Base class for getauxval querying. + */ + template + class linux_auxval : private utils::uint_bitset + { + using bitset_t = utils::uint_bitset; + + public: + using aux = A; + + inline static linux_auxval read() + { + return bitset_t(linux_getauxval(type)); + } + + /** Create a value which returns false to everything. */ + constexpr linux_auxval() noexcept = default; + + using bitset_t::all_bits_set; + }; + + template + using make_auxiliary_val_t = linux_auxval; + } + + /* + * Hardware Capabilities Register (HWCAP) for Linux. + * + * On Linux systems, the kernel exposes some CPU features through the + * auxiliary vector, which can be queried via `getauxval(AT_HWCAP)`. + * This utility parses such bit values. + * + * @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt + */ + struct linux_hwcap_traits + { +#if XSIMD_WITH_LINUX_GETAUXVAL + static constexpr detail::linux_getauxval_t type = AT_HWCAP; +#else + static constexpr detail::linux_getauxval_t type = 0; +#endif + + enum class aux + { +#if XSIMD_WITH_LINUX_GETAUXVAL +#if XSIMD_TARGET_ARM64 + /** Scalable Vector Extension. */ + sve = HWCAP_SVE, +#elif XSIMD_TARGET_ARM + /** Neon vector extension. */ + neon = HWCAP_NEON, +#endif +#endif + }; + }; + + using linux_hwcap = detail::make_auxiliary_val_t; + + /* + * Extended Hardware Capabilities Register (HWCAP2) for Linux. + * + * On Linux systems, the kernel exposes some CPU additional features through the + * auxiliary vector, which can be queried via `getauxval(AT_HWCAP2)`. + * + * @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt + */ + struct linux_hwcap2_traits + { +#if XSIMD_WITH_LINUX_GETAUXVAL + static constexpr detail::linux_getauxval_t type = AT_HWCAP2; +#else + static constexpr detail::linux_getauxval_t type = 0; +#endif + + enum class aux + { +#if XSIMD_WITH_LINUX_GETAUXVAL +#if XSIMD_TARGET_ARM64 +#ifndef HWCAP2_I8MM +#define HWCAP2_I8MM (1 << 13) +#endif + /** 8 bits integer matrix multiplication. */ + i8mm = HWCAP2_I8MM, +#endif +#endif + }; + }; + + using linux_hwcap2 = detail::make_auxiliary_val_t; + + /******************** + * Implementation * + ********************/ + + namespace detail + { +#if XSIMD_WITH_LINUX_GETAUXVAL + inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept + { + return getauxval(type); + } +#else + inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept + { + return {}; // All bits set to 0 + } +#endif + } +} + +#endif diff --git a/include/xsimd/utils/bits.hpp b/include/xsimd/utils/bits.hpp index ffa09f8e7..001e01b9a 100644 --- a/include/xsimd/utils/bits.hpp +++ b/include/xsimd/utils/bits.hpp @@ -55,6 +55,8 @@ namespace xsimd /* The enum type whose values name individual bits. */ using key_type = K; + constexpr uint_bitset() noexcept = default; + /* Construct from a raw bit pattern. */ constexpr explicit uint_bitset(storage_type bitset = {}) noexcept : m_bitset(bitset) From b8def473a2e14833a2a7fa07b06674883b8869d3 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 16:05:14 +0100 Subject: [PATCH 03/10] Add arm_cpu_features --- .../xsimd/config/xsimd_cpu_features_arm.hpp | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 include/xsimd/config/xsimd_cpu_features_arm.hpp diff --git a/include/xsimd/config/xsimd_cpu_features_arm.hpp b/include/xsimd/config/xsimd_cpu_features_arm.hpp new file mode 100644 index 000000000..9dddd2384 --- /dev/null +++ b/include/xsimd/config/xsimd_cpu_features_arm.hpp @@ -0,0 +1,110 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ***************************************************************************/ + +#ifndef XSIMD_CPU_FEATURES_ARM_HPP +#define XSIMD_CPU_FEATURES_ARM_HPP + + +#include "./xsimd_config.hpp" + +#if XSIMD_WITH_LINUX_GETAUXVAL +#include "./xsimd_getauxval.hpp" +#endif + +namespace xsimd +{ + /** + * An opinionated CPU feature detection utility for ARM. + * + * Combines compile-time knowledge with runtime detection when available. + * On Linux, runtime detection uses getauxval to query the auxiliary vector. + * On other platforms, only compile-time information is used. + * + * This is well defined on all architectures. It will always return false on + * non-ARM architectures. + */ + class arm_cpu_features + { + public: + arm_cpu_features() noexcept = default; + + inline bool neon() const noexcept + { +#if XSIMD_TARGET_ARM && !XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return get_hwcap().all_bits_set(); +#else + return static_cast(XSIMD_WITH_NEON); +#endif + } + + constexpr bool neon64() const noexcept + { + return static_cast(XSIMD_WITH_NEON64); + } + + inline bool sve() const noexcept + { +#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return get_hwcap().all_bits_set(); +#else + return false; +#endif + } + + inline bool i8mm() const noexcept + { +#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return get_hwcap2().all_bits_set(); +#else + return false; +#endif + } + + private: +#if XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL + enum class status + { + hwcap_valid = 0, + hwcap2_valid = 1, + }; + + using status_bitset = utils::uint_bitset; + + mutable status_bitset m_status {}; + + mutable xsimd::linux_hwcap m_hwcap {}; + + inline xsimd::linux_hwcap const& get_hwcap() const noexcept + { + if (!m_status.bit_is_set()) + { + m_hwcap = xsimd::linux_hwcap::read(); + m_status.set_bit(); + } + return m_hwcap; + } + +#if XSIMD_TARGET_ARM64 + mutable xsimd::linux_hwcap2 m_hwcap2 {}; + inline xsimd::linux_hwcap2 const& get_hwcap2() const noexcept + { + if (!m_status.bit_is_set()) + { + m_hwcap2 = xsimd::linux_hwcap2::read(); + m_status.set_bit(); + } + return m_hwcap2; + } +#endif +#endif // XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL + }; +} +#endif From 88640c501bae11ede44e3f3071e244335c90bf7b Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 17:15:32 +0100 Subject: [PATCH 04/10] Fix uint_bitset ctor --- include/xsimd/utils/bits.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/xsimd/utils/bits.hpp b/include/xsimd/utils/bits.hpp index 001e01b9a..ffa09f8e7 100644 --- a/include/xsimd/utils/bits.hpp +++ b/include/xsimd/utils/bits.hpp @@ -55,8 +55,6 @@ namespace xsimd /* The enum type whose values name individual bits. */ using key_type = K; - constexpr uint_bitset() noexcept = default; - /* Construct from a raw bit pattern. */ constexpr explicit uint_bitset(storage_type bitset = {}) noexcept : m_bitset(bitset) From 8a3bf5045dc96b2be87c770389d44b09cbc1b136 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 17:25:16 +0100 Subject: [PATCH 05/10] Add missing header --- include/xsimd/config/xsimd_getauxval.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/xsimd/config/xsimd_getauxval.hpp b/include/xsimd/config/xsimd_getauxval.hpp index 8ab923b8b..bc8831805 100644 --- a/include/xsimd/config/xsimd_getauxval.hpp +++ b/include/xsimd/config/xsimd_getauxval.hpp @@ -16,6 +16,7 @@ #include "./xsimd_config.hpp" #if XSIMD_WITH_LINUX_GETAUXVAL +#include #include #endif From d554c4b196130cbb02c1576240445b943a6cda1e Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 17:26:05 +0100 Subject: [PATCH 06/10] Use arm_cpu_features --- include/xsimd/config/xsimd_cpuid.hpp | 33 +++++++++++----------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 6871637e3..0b8d6f354 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -13,15 +13,13 @@ #define XSIMD_CPUID_HPP #include "../types/xsimd_all_registers.hpp" +#include "./xsimd_cpu_features_arm.hpp" #include "./xsimd_cpu_features_x86.hpp" #include "xsimd_inline.hpp" -#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector)) +#if XSIMD_WITH_LINUX_GETAUXVAL #include #include - -#ifndef HWCAP2_I8MM -#define HWCAP2_I8MM (1 << 13) #endif #endif @@ -92,29 +90,24 @@ namespace xsimd vsx = 1; #endif -#if defined(__aarch64__) || defined(_M_ARM64) - neon = 1; - neon64 = 1; -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) - i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM); - sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE); -#endif - -#elif defined(__ARM_NEON) || defined(_M_ARM) - -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) - neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON); -#endif +#if XSIMD_WITH_LINUX_GETAUXVAL +#if defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0 -#elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0 - -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) #ifndef HWCAP_V #define HWCAP_V (1 << ('V' - 'A')) #endif rvv = bool(getauxval(AT_HWCAP) & HWCAP_V); #endif #endif + + // Safe on all platforms, it will be all false if non arm. + const auto arm_cpu = xsimd::arm_cpu_features(); + + neon = arm_cpu.neon(); + neon64 = arm_cpu.neon64(); + i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm(); + sve = arm_cpu.sve(); + // Safe on all platforms, it will be all false if non x86. const auto x86_cpu = xsimd::x86_cpu_features(); From b1b0c3366a84117b0863f46f439c53ca5efe53d9 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 17:27:47 +0100 Subject: [PATCH 07/10] Fix syntax --- include/xsimd/config/xsimd_cpuid.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 0b8d6f354..11c63463b 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -15,15 +15,13 @@ #include "../types/xsimd_all_registers.hpp" #include "./xsimd_cpu_features_arm.hpp" #include "./xsimd_cpu_features_x86.hpp" -#include "xsimd_inline.hpp" +#include "./xsimd_inline.hpp" #if XSIMD_WITH_LINUX_GETAUXVAL #include #include #endif -#endif - namespace xsimd { namespace detail From d3e305ffb1017f55def7234f5cad7abae8597b31 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 18:34:34 +0100 Subject: [PATCH 08/10] Fix auxv header --- include/xsimd/config/xsimd_cpuid.hpp | 2 +- include/xsimd/config/xsimd_getauxval.hpp | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 11c63463b..2f78f90bd 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -17,7 +17,7 @@ #include "./xsimd_cpu_features_x86.hpp" #include "./xsimd_inline.hpp" -#if XSIMD_WITH_LINUX_GETAUXVAL +#if XSIMD_WITH_LINUX_GETAUXVAL && defined(__riscv_vector) #include #include #endif diff --git a/include/xsimd/config/xsimd_getauxval.hpp b/include/xsimd/config/xsimd_getauxval.hpp index bc8831805..30ede23a9 100644 --- a/include/xsimd/config/xsimd_getauxval.hpp +++ b/include/xsimd/config/xsimd_getauxval.hpp @@ -16,8 +16,10 @@ #include "./xsimd_config.hpp" #if XSIMD_WITH_LINUX_GETAUXVAL -#include -#include +#include // getauxval +#if XSIMD_TARGET_ARM +#include // HWCAP_XXX +#endif #endif namespace xsimd From 381fb6a5c20f7f721a275d32b447958f790f154f Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 20:47:20 +0100 Subject: [PATCH 09/10] Fix shift amounts --- include/xsimd/config/xsimd_cpu_features_arm.hpp | 12 ++++++------ include/xsimd/config/xsimd_getauxval.hpp | 12 +++--------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/include/xsimd/config/xsimd_cpu_features_arm.hpp b/include/xsimd/config/xsimd_cpu_features_arm.hpp index 9dddd2384..062e15b3e 100644 --- a/include/xsimd/config/xsimd_cpu_features_arm.hpp +++ b/include/xsimd/config/xsimd_cpu_features_arm.hpp @@ -12,7 +12,6 @@ #ifndef XSIMD_CPU_FEATURES_ARM_HPP #define XSIMD_CPU_FEATURES_ARM_HPP - #include "./xsimd_config.hpp" #if XSIMD_WITH_LINUX_GETAUXVAL @@ -39,7 +38,7 @@ namespace xsimd inline bool neon() const noexcept { #if XSIMD_TARGET_ARM && !XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL - return get_hwcap().all_bits_set(); + return hwcap().all_bits_set(); #else return static_cast(XSIMD_WITH_NEON); #endif @@ -53,7 +52,7 @@ namespace xsimd inline bool sve() const noexcept { #if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL - return get_hwcap().all_bits_set(); + return hwcap().all_bits_set(); #else return false; #endif @@ -62,7 +61,7 @@ namespace xsimd inline bool i8mm() const noexcept { #if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL - return get_hwcap2().all_bits_set(); + return hwcap2().all_bits_set(); #else return false; #endif @@ -82,7 +81,7 @@ namespace xsimd mutable xsimd::linux_hwcap m_hwcap {}; - inline xsimd::linux_hwcap const& get_hwcap() const noexcept + inline xsimd::linux_hwcap const& hwcap() const noexcept { if (!m_status.bit_is_set()) { @@ -94,7 +93,8 @@ namespace xsimd #if XSIMD_TARGET_ARM64 mutable xsimd::linux_hwcap2 m_hwcap2 {}; - inline xsimd::linux_hwcap2 const& get_hwcap2() const noexcept + + inline xsimd::linux_hwcap2 const& hwcap2() const noexcept { if (!m_status.bit_is_set()) { diff --git a/include/xsimd/config/xsimd_getauxval.hpp b/include/xsimd/config/xsimd_getauxval.hpp index 30ede23a9..acc4a1540 100644 --- a/include/xsimd/config/xsimd_getauxval.hpp +++ b/include/xsimd/config/xsimd_getauxval.hpp @@ -17,9 +17,6 @@ #if XSIMD_WITH_LINUX_GETAUXVAL #include // getauxval -#if XSIMD_TARGET_ARM -#include // HWCAP_XXX -#endif #endif namespace xsimd @@ -78,10 +75,10 @@ namespace xsimd #if XSIMD_WITH_LINUX_GETAUXVAL #if XSIMD_TARGET_ARM64 /** Scalable Vector Extension. */ - sve = HWCAP_SVE, + sve = 22, #elif XSIMD_TARGET_ARM /** Neon vector extension. */ - neon = HWCAP_NEON, + neon = 12, #endif #endif }; @@ -109,11 +106,8 @@ namespace xsimd { #if XSIMD_WITH_LINUX_GETAUXVAL #if XSIMD_TARGET_ARM64 -#ifndef HWCAP2_I8MM -#define HWCAP2_I8MM (1 << 13) -#endif /** 8 bits integer matrix multiplication. */ - i8mm = HWCAP2_I8MM, + i8mm = 13, #endif #endif }; From 269e82e2de4e4e9288dc1eeeaae8320e2ffc71f6 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 18 Mar 2026 21:18:30 +0100 Subject: [PATCH 10/10] Fix ctor --- include/xsimd/config/xsimd_getauxval.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/xsimd/config/xsimd_getauxval.hpp b/include/xsimd/config/xsimd_getauxval.hpp index acc4a1540..ee5cf1cf4 100644 --- a/include/xsimd/config/xsimd_getauxval.hpp +++ b/include/xsimd/config/xsimd_getauxval.hpp @@ -34,13 +34,14 @@ namespace xsimd class linux_auxval : private utils::uint_bitset { using bitset_t = utils::uint_bitset; + using bitset_t::bitset_t; public: using aux = A; inline static linux_auxval read() { - return bitset_t(linux_getauxval(type)); + return linux_auxval(linux_getauxval(type)); } /** Create a value which returns false to everything. */