Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
83cc753
Fix Doxygen param docs and config to eliminate build warnings
alliepiper Mar 24, 2026
8f39bd8
Fix Sphinx warnings and docs infrastructure
alliepiper Mar 24, 2026
96b70fb
Fix remaining doc build warnings from breathe/Sphinx integration
alliepiper Mar 25, 2026
baa5d18
Revert working changes.
alliepiper Mar 25, 2026
8e196e8
Fix precommit.
alliepiper Mar 25, 2026
f7faa18
Add missing label anchor for uninitialized-async-buffer ref
alliepiper Mar 25, 2026
b465e77
Enable Doxygen warnings-as-errors and fix all trivial doc errors
alliepiper Mar 25, 2026
d1ff367
Restore :: global scope prefix in block_radix_sort doc comments
alliepiper Mar 25, 2026
bc80653
Escape global-scope doc references to suppress Doxygen link errors
alliepiper Mar 25, 2026
4a8815b
Fix unresolved Doxygen link errors for cross-project type references
alliepiper Mar 25, 2026
cacc046
Fix STF shape_of Doxygen errors from misused @name tag
alliepiper Mar 25, 2026
1753bd9
Fix miscellaneous Doxygen structural errors
alliepiper Mar 25, 2026
a8dc4e2
Hide recursive class inheritance from Doxygen
alliepiper Mar 25, 2026
f37e0f2
Fix Doxygen errors for excluded and undefined symbols
alliepiper Mar 25, 2026
78a9bb1
Remove duplicate tuple_element/tuple_size docs from thrust/pair.h
alliepiper Mar 25, 2026
70162c4
Exclude CUB dispatch and kernel internals from Doxygen
alliepiper Mar 25, 2026
a4da072
Restore __hierarchy to cudax Doxyfile INPUT
alliepiper Mar 25, 2026
e706400
Add cuda::experimental::group to breathe skip list
alliepiper Mar 25, 2026
4107fd7
clang-format
alliepiper Mar 25, 2026
7c9c184
Suppress unused-parameter warnings on named allocator ctx params
alliepiper Mar 25, 2026
373baae
Revert unnecessary util_vsmem.cuh doc comment change
alliepiper Mar 25, 2026
d461d93
Add consistent logging to all _BREATHE_SKIP_SYMBOLS skip sites
alliepiper Mar 25, 2026
29883a1
Revert unnecessary param naming on unused operator parameters
alliepiper Mar 25, 2026
ad31617
Auto-detect and install doc build dependencies in gen_docs.bash
alliepiper Mar 25, 2026
c98b0bd
formatting.
alliepiper Mar 25, 2026
83cf9ee
Restore @relates zip_iterator for make_zip_function
alliepiper Mar 30, 2026
474cd76
Merge remote-tracking branch 'origin/main' into 7053_doc_ci_fail
alliepiper Mar 30, 2026
64ae6df
Merge remote-tracking branch 'origin/main' into 7053_doc_ci_fail
alliepiper Mar 30, 2026
c07a739
Merge remote-tracking branch 'origin/main' into 7053_doc_ci_fail
alliepiper Apr 2, 2026
a8faee2
Fix doc toctree references for renamed saturating overflow files
alliepiper Apr 2, 2026
3095614
Apply clang-format
alliepiper Apr 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions .github/actions/docs-build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,10 @@ inputs:
runs:
using: "composite"
steps:
# Install required dependencies
- name: Install docs build dependencies
shell: bash --noprofile --norc -euo pipefail {0}
run: |
sudo apt-get update
sudo apt-get install -y cmake ninja-build python3-venv git flex bison

# Build all docs
# Build all docs (gen_docs.bash detects and installs missing deps)
- name: Build all docs
shell: bash --noprofile --norc -euo pipefail {0}
run: ./docs/gen_docs.bash
run: ./docs/gen_docs.bash --allow-dep-install

# Copy all docs to the right folder
- name: Move docs to right folder
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_radix_sort_onesweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ inline ::std::ostream& operator<<(::std::ostream& os, RadixSortStoreAlgorithm al
template <int NominalBlockThreads4B,
int NominalItemsPerThread4B,
typename ComputeT,
/** \brief Number of private histograms to use in the ranker;
/** Number of private histograms to use in the ranker;
ignored if the ranking algorithm is not one of RADIX_RANK_MATCH_EARLY_COUNTS_* */
int RankNumParts,
/** \brief Ranking algorithm used in the onesweep kernel. Only algorithms that
/** Ranking algorithm used in the onesweep kernel. Only algorithms that
support warp-strided key arrangement and count callbacks are supported. */
RadixRankAlgorithm RankAlgorithm,
BlockScanAlgorithm ScanAlgorithm,
Expand Down
5 changes: 3 additions & 2 deletions cub/cub/agent/agent_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
// SPDX-License-Identifier: BSD-3

/**
* @file cub::AgentScan implements a stateful abstraction of CUDA thread blocks
* for participating in device-wide prefix scan .
* @file
* @brief cub::AgentScan implements a stateful abstraction of CUDA thread blocks
* for participating in device-wide prefix scan.
*/

#pragma once
Expand Down
5 changes: 3 additions & 2 deletions cub/cub/agent/agent_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
// SPDX-License-Identifier: BSD-3

/**
* @file AgentScanByKey implements a stateful abstraction of CUDA thread blocks
* for participating in device-wide prefix scan by key.
* @file
* @brief AgentScanByKey implements a stateful abstraction of CUDA thread blocks
* for participating in device-wide prefix scan by key.
*/

#pragma once
Expand Down
1 change: 1 addition & 0 deletions cub/cub/block/block_load_to_shared.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ namespace detail
//! - Uses special instructions/hardware acceleration when available (cp.async.bulk on Hopper+, copy.async on Ampere).
//! - By guaranteeing 16 byte alignment and size multiple for the global span, a faster path is taken and less shared
//! memory is needed for the destination buffer.
//! @endrst
template <int BlockDimX, int BlockDimY = 1, int BlockDimZ = 1>
struct BlockLoadToShared
{
Expand Down
6 changes: 6 additions & 0 deletions cub/cub/block/block_radix_rank.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,9 @@ public:
*
* @param[in] digit_extractor
* The digit extractor
*
* @param[in] callback
* Callback to receive digit counts
*/
template <typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT, typename CountsCallback>
_CCCL_DEVICE _CCCL_FORCEINLINE void
Expand Down Expand Up @@ -814,6 +817,9 @@ public:
* [(threadIdx.x * BINS_TRACKED_PER_THREAD)
* ...
* (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]
*
* @param[in] callback
* Callback to receive digit counts
*/
template <typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT, typename CountsCallback>
_CCCL_DEVICE _CCCL_FORCEINLINE void RankKeys(
Expand Down
36 changes: 20 additions & 16 deletions cub/cub/block/block_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,10 @@ private:
*
* @param is_keys_only
* Tag whether is keys-only sort
*
* @param decomposer
* Callable object responsible for decomposing a key into a tuple of references to its
* constituent arithmetic types
*/
template <bool DESCENDING, bool KEYS_ONLY, class DecomposerT = detail::identity_decomposer_t>
_CCCL_DEVICE _CCCL_FORCEINLINE void SortBlocked(
Expand Down Expand Up @@ -651,7 +655,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -722,7 +726,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -855,7 +859,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -933,7 +937,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1055,7 +1059,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1126,7 +1130,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1266,7 +1270,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1344,7 +1348,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1479,7 +1483,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1551,7 +1555,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1688,7 +1692,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1762,7 +1766,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1892,7 +1896,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -1964,7 +1968,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -2100,7 +2104,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down Expand Up @@ -2174,7 +2178,7 @@ public:
//! @tparam DecomposerT
//! **[inferred]** Type of a callable object responsible for decomposing a
//! ``KeyT`` into a tuple of references to its constituent arithmetic types:
//! ``::cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! ``cuda::std::tuple<ArithmeticTs&...> operator()(KeyT &key)``.
//! The leftmost element of the tuple is considered the most significant.
//! The call operator must not modify members of the key.
//!
Expand Down
2 changes: 0 additions & 2 deletions cub/cub/device/device_copy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ struct DeviceCopy
//!
//! @param[in] env
//! **[optional]** Execution environment. Default is ``cuda::std::execution::env{}``.
//! @endrst
template <typename InputIt, typename OutputIt, typename SizeIteratorT, typename EnvT = ::cuda::std::execution::env<>>
[[nodiscard]] CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t
Batched(InputIt input_it, OutputIt output_it, SizeIteratorT sizes, ::cuda::std::int64_t num_ranges, EnvT env = {})
Expand Down Expand Up @@ -428,7 +427,6 @@ struct DeviceCopy
//!
//! @param[in] env
//! **[optional]** Execution environment. Default is ``cuda::std::execution::env{}``.
//! @endrst
template <typename T_In,
typename Extents_In,
typename Layout_In,
Expand Down
8 changes: 4 additions & 4 deletions cub/cub/device/device_find.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@ struct DeviceFind
//! .. literalinclude:: ../../../cub/test/catch2_test_device_find_if_api.cu
//! :language: c++
//! :dedent:
//! :start-after: example-begin find-if-predicate
//! :end-before: example-end find-if-predicate
//! :start-after: example-begin find-if-predicate
//! :end-before: example-end find-if-predicate
//!
//! .. literalinclude:: ../../../cub/test/catch2_test_device_find_if_api.cu
//! :language: c++
//! :dedent:
//! :start-after: example-begin device-find-if
//! :end-before: example-end device-find-if
//! :start-after: example-begin device-find-if
//! :end-before: example-end device-find-if
//! @endrst
//!
//! @tparam InputIteratorT
Expand Down
7 changes: 2 additions & 5 deletions cub/cub/device/device_for.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1268,11 +1268,8 @@ public:
//! **[inferred]** A function object with arity equal to the number of extents + 1 for the linear index (iteration).
//! The first parameter is the linear index, followed by one parameter for each dimension coordinate.
//!
//! @param[in] layout
//! Layout object that determines the iteration order (layout_left for column-major, layout_right for row-major)
//!
//! @param[in] extents
//! Extents object that represents a multi-dimensional index space
//! @param[in] layout_mapping
//! Layout mapping object that determines the iteration order and represents a multi-dimensional index space
//!
//! @param[in] op
//! Function object to apply to each linear index (iteration) and multi-dimensional coordinates.
Expand Down
2 changes: 0 additions & 2 deletions cub/cub/device/device_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,6 @@ public:
//!
//! @param[in] env
//! **[optional]** Execution environment. Default is ``cuda::std::execution::env{}``.
//! @endrst
template <typename InputIteratorT,
typename FlagIterator,
typename OutputIteratorT,
Expand Down Expand Up @@ -605,7 +604,6 @@ public:
//!
//! @param[in] env
//! **[optional]** Execution environment. Default is ``cuda::std::execution::env{}``.
//! @endrst
template <
typename InputIteratorT,
typename OutputIteratorT,
Expand Down
Loading
Loading