Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
102 commits
Select commit Hold shift + click to select a range
2394c68
std::simd core functionalities
fbusato Mar 31, 2026
098ea92
std::simd load and store functions
fbusato Mar 31, 2026
d65a896
remove simd_abi namespace
fbusato Mar 31, 2026
28dbe31
type_traits unit tests and formatting
fbusato Apr 1, 2026
49ffb87
flags unit test
fbusato Apr 1, 2026
e8f7ae3
renaming
fbusato Apr 2, 2026
b5e5fee
basic_mask unit test
fbusato Apr 3, 2026
e333e93
basic_mask unit tests
fbusato Apr 3, 2026
6c32bf3
basic_mask small improvements
fbusato Apr 3, 2026
a243f92
fix CI failures
fbusato Apr 3, 2026
cb854c3
extend basic_mask test coverage
fbusato Apr 3, 2026
05d27e1
basic_vec operator unit tests
fbusato Apr 3, 2026
8a129f0
add floating-point
fbusato Apr 3, 2026
2dec0ab
add extended floating point types and deduction guide tests
fbusato Apr 3, 2026
0a053cb
fix deduction guides
fbusato Apr 3, 2026
f6dd824
remove macros from fixed_size_vec.h
fbusato Apr 7, 2026
72d6e1c
additional tests and simplifications
fbusato Apr 8, 2026
d4d6ea6
address comments and CI
fbusato Apr 8, 2026
99e0412
fixed alignment and added more unit tests
fbusato Apr 8, 2026
bfd7ed0
std::simd load and store functions
fbusato Mar 31, 2026
bcb42d6
Merge branch 'simd-load-store' of github.com:fbusato/cccl into simd-l…
fbusato Apr 8, 2026
4e31656
fix nanespace
fbusato Apr 8, 2026
0bfe9e0
refactoring
fbusato Apr 9, 2026
089a55c
remove reductions
fbusato Apr 13, 2026
979d861
avoid internal initialization in C++20
fbusato Apr 13, 2026
ffb0de0
std::simd load and store functions
fbusato Mar 31, 2026
9936d2f
fix nanespace
fbusato Apr 8, 2026
4ca9f01
refactoring
fbusato Apr 9, 2026
38ec52d
Merge branch 'simd-load-store' of github.com:fbusato/cccl into simd-l…
fbusato Apr 13, 2026
28f4698
improve load/store functions
fbusato Apr 13, 2026
2ab22c2
unit test
fbusato Apr 14, 2026
d33579b
improve unit test
fbusato Apr 14, 2026
b8ea745
clean up
fbusato Apr 14, 2026
dade385
fix MSVC warnings
fbusato Apr 15, 2026
8fc6989
add code gen checks
fbusato Apr 15, 2026
6a8b6c6
fix headers
fbusato Apr 15, 2026
0377ea4
fix NVRTC c++17
fbusato Apr 15, 2026
caf5793
std::simd load and store functions
fbusato Mar 31, 2026
9c067e6
fix nanespace
fbusato Apr 8, 2026
11612d4
refactoring
fbusato Apr 9, 2026
1976e10
std::simd load and store functions
fbusato Mar 31, 2026
5ea4ad5
fix nanespace
fbusato Apr 8, 2026
c2f4f8c
refactoring
fbusato Apr 9, 2026
1283c95
improve load/store functions
fbusato Apr 13, 2026
f9d7197
unit test
fbusato Apr 14, 2026
98df8b5
improve unit test
fbusato Apr 14, 2026
d4a45f0
clean up
fbusato Apr 14, 2026
79eb7ec
fix MSVC warnings
fbusato Apr 15, 2026
db1d779
add code gen checks
fbusato Apr 15, 2026
502afbe
fix MSVC warning
fbusato Apr 15, 2026
c4f27e3
Merge branch 'simd-load-store' of github.com:fbusato/cccl into simd-l…
fbusato Apr 15, 2026
7e6c233
exclude N=1 for vectorization
fbusato Apr 15, 2026
d34d5d3
add specialization header
fbusato Apr 15, 2026
fa8ae7c
add specialization header
fbusato Apr 15, 2026
fda62b4
use inline
fbusato Apr 15, 2026
e9fc7bf
std::simd load and store functions
fbusato Mar 31, 2026
da28ebb
fix nanespace
fbusato Apr 8, 2026
23741e0
refactoring
fbusato Apr 9, 2026
830b037
std::simd load and store functions
fbusato Mar 31, 2026
4c86ee7
fix nanespace
fbusato Apr 8, 2026
c9e96d2
refactoring
fbusato Apr 9, 2026
ec9dcfe
improve load/store functions
fbusato Apr 13, 2026
99c09fb
unit test
fbusato Apr 14, 2026
5be6cd0
improve unit test
fbusato Apr 14, 2026
e60e97b
clean up
fbusato Apr 14, 2026
5958f8a
fix MSVC warnings
fbusato Apr 15, 2026
1758ca1
add code gen checks
fbusato Apr 15, 2026
c7786a3
fix MSVC warning
fbusato Apr 15, 2026
a3cde82
std::simd load and store functions
fbusato Mar 31, 2026
943c842
fix nanespace
fbusato Apr 8, 2026
b0bef70
refactoring
fbusato Apr 9, 2026
58fcbb7
std::simd load and store functions
fbusato Mar 31, 2026
742efe6
fix nanespace
fbusato Apr 8, 2026
39add40
refactoring
fbusato Apr 9, 2026
54b9537
improve load/store functions
fbusato Apr 13, 2026
8ce963c
unit test
fbusato Apr 14, 2026
3906173
improve unit test
fbusato Apr 14, 2026
7234308
clean up
fbusato Apr 14, 2026
cd12aec
fix MSVC warnings
fbusato Apr 15, 2026
f6eb60a
add code gen checks
fbusato Apr 15, 2026
6aace73
exclude N=1 for vectorization
fbusato Apr 15, 2026
aa5acd9
add inline
fbusato Apr 15, 2026
338bd14
Merge branch 'simd-load-store' of github.com:fbusato/cccl into simd-l…
fbusato Apr 15, 2026
526215a
Update libcudacxx/include/cuda/std/__simd/basic_vec.h
fbusato Apr 16, 2026
a998247
remove c++17/20 dispatch
fbusato Apr 16, 2026
9fbcec6
minor simplification
fbusato Apr 16, 2026
2693a1f
follow c++ specification for non-specialized classes
fbusato Apr 16, 2026
0d3a3d3
extend deduction tests
fbusato Apr 16, 2026
574e878
use is_extended_arithmetic
fbusato Apr 16, 2026
996020a
use fwd
fbusato Apr 16, 2026
09ca81c
fix unit test headers
fbusato Apr 16, 2026
185bb13
use const parameters
fbusato Apr 16, 2026
5be8f3d
Merge branch 'main' into simd-core
fbusato Apr 16, 2026
0b0faee
Merge branch 'simd-core' of github.com:fbusato/cccl into simd-core
fbusato Apr 16, 2026
4be9268
Merge simd-core into simd-load-store
fbusato Apr 16, 2026
d44129b
Merge simd-core into simd-load-store
fbusato Apr 16, 2026
f7f8681
Merge branch 'simd-load-store' of github.com:fbusato/cccl into simd-l…
fbusato Apr 16, 2026
9ffc3e0
fix unit test headers
fbusato Apr 16, 2026
5e64748
use noexcept
fbusato Apr 17, 2026
2c6c796
use TEST_FUNC
fbusato Apr 17, 2026
e27c0c2
Merge branch 'simd-core' into simd-load-store
fbusato Apr 17, 2026
0239bc4
use TEST_FUNC in simd.loadstore tests
fbusato Apr 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions libcudacxx/include/cuda/std/__fwd/simd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++ in the CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD___FWD_SIMD_H
#define _CUDA_STD___FWD_SIMD_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__cstddef/types.h>
#include <cuda/std/__simd/abi.h>
#include <cuda/std/__simd/exposition.h>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_SIMD

template <typename _Tp, typename _Abi = native<_Tp>, typename = void>
class basic_vec;

template <size_t _Bytes, typename _Abi = native<__integer_from<_Bytes>>, typename = void>
class basic_mask;

template <typename _Tp, __simd_size_type _Np = __simd_size_v<_Tp, native<_Tp>>>
using vec = basic_vec<_Tp, __deduce_abi_t<_Tp, _Np>>;

template <typename _Tp, __simd_size_type _Np = __simd_size_v<_Tp, native<_Tp>>>
using mask = basic_mask<sizeof(_Tp), __deduce_abi_t<_Tp, _Np>>;

// specializations

template <typename _Tp, typename _Abi>
struct __simd_storage;

template <typename _Tp, typename _Abi>
struct __simd_operations;

template <size_t _Bytes, typename _Abi>
struct __mask_storage;

template <size_t _Bytes, typename _Abi>
struct __mask_operations;

_CCCL_END_NAMESPACE_CUDA_STD_SIMD

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_STD___FWD_SIMD_H
4 changes: 4 additions & 0 deletions libcudacxx/include/cuda/std/__internal/namespaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
#define _CCCL_BEGIN_NAMESPACE_CUDA_DRIVER _CCCL_BEGIN_NAMESPACE(cuda::__driver)
#define _CCCL_END_NAMESPACE_CUDA_DRIVER _CCCL_END_NAMESPACE(cuda::__driver)

// Namespaces related to <simd>
#define _CCCL_BEGIN_NAMESPACE_CUDA_STD_SIMD _CCCL_BEGIN_NAMESPACE(cuda::std::simd)
#define _CCCL_END_NAMESPACE_CUDA_STD_SIMD _CCCL_END_NAMESPACE(cuda::std::simd)

// Namespaces related to <ranges>
#define _CCCL_BEGIN_NAMESPACE_CUDA_STD_RANGES _CCCL_BEGIN_NAMESPACE(cuda::std::ranges)
#define _CCCL_END_NAMESPACE_CUDA_STD_RANGES _CCCL_END_NAMESPACE(cuda::std::ranges)
Expand Down
50 changes: 50 additions & 0 deletions libcudacxx/include/cuda/std/__simd/abi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++ in the CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD___SIMD_ABI_H
#define _CUDA_STD___SIMD_ABI_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__cstddef/types.h>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_SIMD

using __simd_size_type = ptrdiff_t;

// [simd.expos.abi], simd ABI tags
template <__simd_size_type _Np>
struct __fixed_size; // internal ABI tag

template <__simd_size_type _Np>
using fixed_size = __fixed_size<_Np>; // implementation-defined ABI

// TODO(fbusato): this could be optimized by using max access size / sizeof(T)
template <typename>
using native = fixed_size<1>; // implementation-defined ABI

template <typename, __simd_size_type _Np>
using __deduce_abi_t = fixed_size<_Np>; // exposition-only

_CCCL_END_NAMESPACE_CUDA_STD_SIMD

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_STD___SIMD_ABI_H
Loading