Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions include/xsimd/config/xsimd_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,25 +357,50 @@
/**
* @ingroup xsimd_config_macro
*
* Set to 1 if NEON64 is available at compile-time, to 0 otherwise.
* Set to 1 if the target is in the ARM architecture family in 64 bits, to 0 otherwise
*/
#if defined(__aarch64__) || defined(_M_ARM64)
#define XSIMD_WITH_NEON64 1
#define XSIMD_TARGET_ARM64 1
#else
#define XSIMD_WITH_NEON64 0
#define XSIMD_TARGET_ARM64 0
#endif

/**
* @ingroup xsimd_config_macro
*
* Set to 1 if the target is in the ARM architecture family, to 0 otherwise
*/
#if defined(__arm__) || defined(_M_ARM) || XSIMD_TARGET_ARM64
#define XSIMD_TARGET_ARM 1
#else
#define XSIMD_TARGET_ARM 0
#endif

/**
* @ingroup xsimd_config_macro
*
* Set to 1 if NEON is available at compile-time, to 0 otherwise.
*/
#if (defined(__ARM_NEON) && __ARM_ARCH >= 7) || XSIMD_WITH_NEON64
#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && defined(__ARM_NEON)) || XSIMD_TARGET_ARM64
#define XSIMD_WITH_NEON 1
#else
#define XSIMD_WITH_NEON 0
#endif

// Neon is always available on Arm64, though it is theoritially possible to compile
// without it, such as -march=armv8-a+nosimd.
// Note that MSVC may never define __ARM_NEON even when available.
/**
* @ingroup xsimd_config_macro
*
* Set to 1 if NEON64 is available at compile-time, to 0 otherwise.
*/
#if XSIMD_TARGET_ARM64
#define XSIMD_WITH_NEON64 1
#else
#define XSIMD_WITH_NEON64 0
#endif

/**
* @ingroup xsimd_config_macro
*
Expand Down Expand Up @@ -497,4 +522,15 @@
#define XSIMD_NO_SUPPORTED_ARCHITECTURE
#endif

/**
* @ingroup xsimd_config_macro
*
* Set to 1 if the target is a linux
*/
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
#define XSIMD_WITH_LINUX_GETAUXVAL 1
#else
#define XSIMD_WITH_LINUX_GETAUXVAL 0
#endif

#endif
110 changes: 110 additions & 0 deletions include/xsimd/config/xsimd_cpu_features_arm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
***************************************************************************/

#ifndef XSIMD_CPU_FEATURES_ARM_HPP
#define XSIMD_CPU_FEATURES_ARM_HPP

#include "./xsimd_config.hpp"

#if XSIMD_WITH_LINUX_GETAUXVAL
#include "./xsimd_getauxval.hpp"
#endif

namespace xsimd
{
/**
* An opinionated CPU feature detection utility for ARM.
*
* Combines compile-time knowledge with runtime detection when available.
* On Linux, runtime detection uses getauxval to query the auxiliary vector.
* On other platforms, only compile-time information is used.
*
* This is well defined on all architectures. It will always return false on
* non-ARM architectures.
*/
class arm_cpu_features
{
public:
arm_cpu_features() noexcept = default;

inline bool neon() const noexcept
{
#if XSIMD_TARGET_ARM && !XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL
return hwcap().all_bits_set<linux_hwcap::aux::neon>();
#else
return static_cast<bool>(XSIMD_WITH_NEON);
#endif
}

constexpr bool neon64() const noexcept
{
return static_cast<bool>(XSIMD_WITH_NEON64);
}

inline bool sve() const noexcept
{
#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL
return hwcap().all_bits_set<linux_hwcap::aux::sve>();
#else
return false;
#endif
}

inline bool i8mm() const noexcept
{
#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL
return hwcap2().all_bits_set<linux_hwcap2::aux::i8mm>();
#else
return false;
#endif
}

private:
#if XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL
enum class status
{
hwcap_valid = 0,
hwcap2_valid = 1,
};

using status_bitset = utils::uint_bitset<status, std::uint32_t>;

mutable status_bitset m_status {};

mutable xsimd::linux_hwcap m_hwcap {};

inline xsimd::linux_hwcap const& hwcap() const noexcept
{
if (!m_status.bit_is_set<status::hwcap_valid>())
{
m_hwcap = xsimd::linux_hwcap::read();
m_status.set_bit<status::hwcap_valid>();
}
return m_hwcap;
}

#if XSIMD_TARGET_ARM64
mutable xsimd::linux_hwcap2 m_hwcap2 {};

inline xsimd::linux_hwcap2 const& hwcap2() const noexcept
{
if (!m_status.bit_is_set<status::hwcap2_valid>())
{
m_hwcap2 = xsimd::linux_hwcap2::read();
m_status.set_bit<status::hwcap2_valid>();
}
return m_hwcap2;
}
#endif
#endif // XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL
};
}
#endif
37 changes: 14 additions & 23 deletions include/xsimd/config/xsimd_cpuid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,13 @@
#define XSIMD_CPUID_HPP

#include "../types/xsimd_all_registers.hpp"
#include "./xsimd_cpu_features_arm.hpp"
#include "./xsimd_cpu_features_x86.hpp"
#include "xsimd_inline.hpp"
#include "./xsimd_inline.hpp"

#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
#if XSIMD_WITH_LINUX_GETAUXVAL && defined(__riscv_vector)
#include <asm/hwcap.h>
#include <sys/auxv.h>

#ifndef HWCAP2_I8MM
#define HWCAP2_I8MM (1 << 13)
#endif

#endif

namespace xsimd
Expand Down Expand Up @@ -92,29 +88,24 @@ namespace xsimd
vsx = 1;
#endif

#if defined(__aarch64__) || defined(_M_ARM64)
neon = 1;
neon64 = 1;
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM);
sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE);
#endif

#elif defined(__ARM_NEON) || defined(_M_ARM)
#if XSIMD_WITH_LINUX_GETAUXVAL
#if defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0

#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON);
#endif

#elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0

#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
#ifndef HWCAP_V
#define HWCAP_V (1 << ('V' - 'A'))
#endif
rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
#endif
#endif

// Safe on all platforms, it will be all false if non arm.
const auto arm_cpu = xsimd::arm_cpu_features();

neon = arm_cpu.neon();
neon64 = arm_cpu.neon64();
i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm();
sve = arm_cpu.sve();

// Safe on all platforms, it will be all false if non x86.
const auto x86_cpu = xsimd::x86_cpu_features();

Expand Down
139 changes: 139 additions & 0 deletions include/xsimd/config/xsimd_getauxval.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
***************************************************************************/

#ifndef XSIMD_GETAUXVAL_HPP
#define XSIMD_GETAUXVAL_HPP

#include "../utils/bits.hpp"
#include "./xsimd_config.hpp"

#if XSIMD_WITH_LINUX_GETAUXVAL
#include <sys/auxv.h> // getauxval
#endif

namespace xsimd
{
namespace detail
{
using linux_getauxval_t = unsigned long;

inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept;

/**
* Base class for getauxval querying.
*/
template <linux_getauxval_t type, typename A>
class linux_auxval : private utils::uint_bitset<A, linux_getauxval_t>
{
using bitset_t = utils::uint_bitset<A, linux_getauxval_t>;
using bitset_t::bitset_t;

public:
using aux = A;

inline static linux_auxval read()
{
return linux_auxval(linux_getauxval(type));
}

/** Create a value which returns false to everything. */
constexpr linux_auxval() noexcept = default;

using bitset_t::all_bits_set;
};

template <typename Traits>
using make_auxiliary_val_t = linux_auxval<Traits::type, typename Traits::aux>;
}

/*
* Hardware Capabilities Register (HWCAP) for Linux.
*
* On Linux systems, the kernel exposes some CPU features through the
* auxiliary vector, which can be queried via `getauxval(AT_HWCAP)`.
* This utility parses such bit values.
*
* @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt
*/
struct linux_hwcap_traits
{
#if XSIMD_WITH_LINUX_GETAUXVAL
static constexpr detail::linux_getauxval_t type = AT_HWCAP;
#else
static constexpr detail::linux_getauxval_t type = 0;
#endif

enum class aux
{
#if XSIMD_WITH_LINUX_GETAUXVAL
#if XSIMD_TARGET_ARM64
/** Scalable Vector Extension. */
sve = 22,
#elif XSIMD_TARGET_ARM
/** Neon vector extension. */
neon = 12,
#endif
#endif
};
};

using linux_hwcap = detail::make_auxiliary_val_t<linux_hwcap_traits>;

/*
* Extended Hardware Capabilities Register (HWCAP2) for Linux.
*
* On Linux systems, the kernel exposes some CPU additional features through the
* auxiliary vector, which can be queried via `getauxval(AT_HWCAP2)`.
*
* @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt
*/
struct linux_hwcap2_traits
{
#if XSIMD_WITH_LINUX_GETAUXVAL
static constexpr detail::linux_getauxval_t type = AT_HWCAP2;
#else
static constexpr detail::linux_getauxval_t type = 0;
#endif

enum class aux
{
#if XSIMD_WITH_LINUX_GETAUXVAL
#if XSIMD_TARGET_ARM64
/** 8 bits integer matrix multiplication. */
i8mm = 13,
#endif
#endif
};
};

using linux_hwcap2 = detail::make_auxiliary_val_t<linux_hwcap2_traits>;

/********************
* Implementation *
********************/

namespace detail
{
#if XSIMD_WITH_LINUX_GETAUXVAL
inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept
{
return getauxval(type);
}
#else
inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept
{
return {}; // All bits set to 0
}
#endif
}
}

#endif
Loading