rapidsai · bdice · Mar 30, 2026
@@ -14,17 +14,20 @@
 #include <raft/util/cudart_utils.hpp>
 #include <rapids_logger/logger.hpp>
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+#include <rmm/mr/cuda_memory_resource.hpp>
 #include <rmm/mr/managed_memory_resource.hpp>
-#include <rmm/mr/owning_wrapper.hpp>
 #include <rmm/mr/per_device_resource.hpp>
-#include <rmm/mr/pool_memory_resource.hpp>
 #include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/mr/pool_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include "../core/exceptions.hpp"
 
+#include <cuda/memory_resource>
+
 #include <cstdint>
 #include <memory>
+#include <optional>
 #include <thread>
 
 extern "C" cuvsError_t cuvsResourcesCreate(cuvsResources_t* res)
@@ -132,60 +135,47 @@ extern "C" cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t byte
 {
   return cuvs::core::translate_exceptions([=] {
     auto res_ptr = reinterpret_cast<raft::resources*>(res);
-    auto mr      = rmm::mr::get_current_device_resource();
-    *ptr         = mr->allocate(raft::resource::get_cuda_stream(*res_ptr), bytes);
+    auto mr      = rmm::mr::get_current_device_resource_ref();
+    *ptr         = mr.allocate(raft::resource::get_cuda_stream(*res_ptr), bytes);
   });
 }
 
 extern "C" cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes)
 {
   return cuvs::core::translate_exceptions([=] {
     auto res_ptr = reinterpret_cast<raft::resources*>(res);
-    auto mr      = rmm::mr::get_current_device_resource();
-    mr->deallocate(raft::resource::get_cuda_stream(*res_ptr), ptr, bytes);
+    auto mr      = rmm::mr::get_current_device_resource_ref();
+    mr.deallocate(raft::resource::get_cuda_stream(*res_ptr), ptr, bytes);
   });
 }
 
-thread_local std::shared_ptr<
-  rmm::mr::owning_wrapper<rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>,
-                          rmm::mr::device_memory_resource>>
-  pool_mr;
+thread_local cuda::mr::any_resource<cuda::mr::device_accessible> pool_upstream;
+thread_local std::optional<rmm::mr::pool_memory_resource> pool_mr;
 
 extern "C" cuvsError_t cuvsRMMPoolMemoryResourceEnable(int initial_pool_size_percent,
                                                        int max_pool_size_percent,
                                                        bool managed)
 {
   return cuvs::core::translate_exceptions([=] {
-    // Upstream memory resource needs to be a cuda_memory_resource
-    auto cuda_mr         = rmm::mr::get_current_device_resource();
-    auto* cuda_mr_casted = dynamic_cast<rmm::mr::cuda_memory_resource*>(cuda_mr);
-    if (cuda_mr_casted == nullptr) {
-      throw std::runtime_error("Current memory resource is not a cuda_memory_resource");
-    }
-
     auto initial_size = rmm::percent_of_free_device_memory(initial_pool_size_percent);
     auto max_size     = rmm::percent_of_free_device_memory(max_pool_size_percent);
 
-    auto mr = std::shared_ptr<rmm::mr::device_memory_resource>();
     if (managed) {
-      mr = std::static_pointer_cast<rmm::mr::device_memory_resource>(
-        std::make_shared<rmm::mr::managed_memory_resource>());
+      pool_upstream = rmm::mr::managed_memory_resource{};
     } else {
-      mr = std::static_pointer_cast<rmm::mr::device_memory_resource>(
-        std::make_shared<rmm::mr::cuda_memory_resource>());
+      pool_upstream = rmm::mr::cuda_memory_resource{};
     }
 
-    pool_mr =
-      rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(mr, initial_size, max_size);
+    pool_mr.emplace(pool_upstream, initial_size, max_size);
 
-    rmm::mr::set_current_device_resource(pool_mr.get());
+    rmm::mr::set_current_device_resource_ref(*pool_mr);
   });
 }
 
 extern "C" cuvsError_t cuvsRMMMemoryResourceReset()
 {
   return cuvs::core::translate_exceptions([=] {
-    rmm::mr::set_current_device_resource(rmm::mr::detail::initial_resource());
+    rmm::mr::reset_current_device_resource_ref();
     pool_mr.reset();
   });
 }

@@ -1,14 +1,16 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #pragma once
 
 #include <raft/core/error.hpp>
 #include <raft/core/logger_macros.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+#include <rmm/detail/aligned.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cuda/memory_resource>
 
 #include <sys/mman.h>
 
@@ -17,37 +19,25 @@
 
 namespace raft::mr {
 /**
- * @brief `device_memory_resource` derived class that uses mmap to allocate memory.
- * This class enables memory allocation using huge pages.
+ * @brief Memory resource that uses mmap to allocate memory with huge pages.
  * It is assumed that the allocated memory is directly accessible on device. This currently only
  * works on GH systems.
  *
  * TODO(tfeher): consider improving or removing this helper once we made progress with
  * https://github.com/rapidsai/raft/issues/1819
  */
-class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
+class cuda_huge_page_resource {
  public:
   cuda_huge_page_resource()                                                  = default;
-  ~cuda_huge_page_resource() override                                        = default;
+  ~cuda_huge_page_resource()                                                 = default;
   cuda_huge_page_resource(cuda_huge_page_resource const&)                    = default;
   cuda_huge_page_resource(cuda_huge_page_resource&&)                         = default;
   auto operator=(cuda_huge_page_resource const&) -> cuda_huge_page_resource& = default;
   auto operator=(cuda_huge_page_resource&&) -> cuda_huge_page_resource&      = default;
 
- private:
-  /**
-   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
-   *
-   * The returned pointer has at least 256B alignment.
-   *
-   * @note Stream argument is ignored
-   *
-   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
-   *
-   * @param bytes The size, in bytes, of the allocation
-   * @return void* Pointer to the newly allocated memory
-   */
-  auto do_allocate(std::size_t bytes, rmm::cuda_stream_view) -> void* override
+  void* allocate(cuda::stream_ref,
+                 std::size_t bytes,
+                 std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
   {
     void* addr{nullptr};
     addr = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -60,36 +50,29 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
     return addr;
   }
 
-  /**
-   * @brief Deallocate memory pointed to by \p p.
-   *
-   * @note Stream argument is ignored.
-   *
-   * @throws Nothing.
-   *
-   * @param p Pointer to be deallocated
-   */
-  void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view) noexcept override
+  void deallocate(cuda::stream_ref,
+                  void* ptr,
+                  std::size_t size,
+                  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
   {
     if (munmap(ptr, size) == -1) { RAFT_LOG_ERROR("huge_page_resource::munmap failed"); }
   }
 
-  /**
-   * @brief Compare this resource to another.
-   *
-   * Two cuda_huge_page_resources always compare equal, because they can each
-   * deallocate memory allocated by the other.
-   *
-   * @throws Nothing.
-   *
-   * @param other The other resource to compare to
-   * @return true If the two resources are equivalent
-   * @return false If the two resources are not equal
-   */
-  [[nodiscard]] auto do_is_equal(device_memory_resource const& other) const noexcept
-    -> bool override
+  void* allocate_sync(std::size_t bytes, std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
+  {
+    return allocate(cuda::stream_ref{cudaStream_t{nullptr}}, bytes, alignment);
+  }
+
+  void deallocate_sync(void* ptr,
+                       std::size_t bytes,
+                       std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
   {
-    return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
+    deallocate(cuda::stream_ref{cudaStream_t{nullptr}}, ptr, bytes, alignment);
   }
+
+  bool operator==(cuda_huge_page_resource const&) const noexcept { return true; }
+
+  friend void get_property(cuda_huge_page_resource const&, cuda::mr::device_accessible) noexcept {}
 };
+static_assert(cuda::mr::resource_with<cuda_huge_page_resource, cuda::mr::device_accessible>);
 }  // namespace raft::mr
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #pragma once
@@ -15,14 +15,16 @@
 #include <raft/core/host_mdspan.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
 #include <raft/util/cudart_utils.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 #include <rmm/mr/failure_callback_resource_adaptor.hpp>
 #include <rmm/mr/managed_memory_resource.hpp>
+#include <rmm/mr/per_device_resource.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <memory>
 #include <type_traits>
@@ -65,17 +67,16 @@ inline auto rmm_oom_callback(std::size_t bytes, void*) -> bool
  */
 class shared_raft_resources {
  public:
-  using pool_mr_type  = rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>;
-  using mr_type       = rmm::mr::failure_callback_resource_adaptor<pool_mr_type>;
+  using pool_mr_type  = rmm::mr::pool_memory_resource;
+  using mr_type       = rmm::mr::failure_callback_resource_adaptor<>;
   using large_mr_type = rmm::mr::managed_memory_resource;
 
   shared_raft_resources()
   try
-    : orig_resource_{rmm::mr::get_current_device_resource()},
-      pool_resource_(orig_resource_, 1024 * 1024 * 1024ull),
-      resource_(&pool_resource_, rmm_oom_callback, nullptr),
+    : pool_resource_(rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull),
+      resource_(pool_resource_, rmm_oom_callback, nullptr),
       large_mr_() {
-    rmm::mr::set_current_device_resource(&resource_);
+    orig_resource_ = rmm::mr::set_current_device_resource_ref(resource_);
   } catch (const std::exception& e) {
     auto cuda_status = cudaGetLastError();
     size_t free      = 0;
@@ -95,15 +96,12 @@ class shared_raft_resources {
   shared_raft_resources(const shared_raft_resources& res)                      = delete;
   auto operator=(const shared_raft_resources& other) -> shared_raft_resources& = delete;
 
-  ~shared_raft_resources() noexcept { rmm::mr::set_current_device_resource(orig_resource_); }
+  ~shared_raft_resources() noexcept { rmm::mr::set_current_device_resource_ref(orig_resource_); }
 
-  auto get_large_memory_resource() noexcept
-  {
-    return static_cast<rmm::mr::device_memory_resource*>(&large_mr_);
-  }
+  auto get_large_memory_resource() noexcept -> rmm::device_async_resource_ref { return large_mr_; }
 
  private:
-  rmm::mr::device_memory_resource* orig_resource_;
+  cuda::mr::any_resource<cuda::mr::device_accessible> orig_resource_;
   pool_mr_type pool_resource_;
   mr_type resource_;
   large_mr_type large_mr_;
@@ -129,12 +127,8 @@ class configured_raft_resources {
       res_{std::make_unique<raft::device_resources>(
         rmm::cuda_stream_view(get_stream_from_global_pool()))}
   {
-    // set the large workspace resource to the raft handle, but without the deleter
-    // (this resource is managed by the shared_res).
     raft::resource::set_large_workspace_resource(
-      *res_,
-      std::shared_ptr<rmm::mr::device_memory_resource>(shared_res_->get_large_memory_resource(),
-                                                       raft::void_op{}));
+      *res_, raft::mr::device_resource{shared_res_->get_large_memory_resource()});
   }
 
   /** Default constructor creates all resources anew. */

@@ -166,9 +166,9 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
   inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type)
   {
     switch (mem_type) {
-      case (AllocatorType::kHostPinned): return &mr_pinned_;
-      case (AllocatorType::kHostHugePage): return &mr_huge_page_;
-      default: return rmm::mr::get_current_device_resource();
+      case (AllocatorType::kHostPinned): return mr_pinned_;
+      case (AllocatorType::kHostHugePage): return mr_huge_page_;
+      default: return rmm::mr::get_current_device_resource_ref();
     }
   }
 };

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -13,7 +13,8 @@
 #include <raft/core/resource/cuda_stream.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+#include <rmm/mr/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 namespace cuvs::neighbors {
 
@@ -87,8 +88,7 @@ void naive_knn(raft::resources const& handle,
                uint32_t k,
                cuvs::distance::DistanceType type)
 {
-  rmm::mr::device_memory_resource* mr = nullptr;
-  auto pool_guard                     = raft::get_pool_memory_resource(mr, 1024 * 1024);
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref();
 
   auto stream = raft::resource::get_cuda_stream(handle);
   dim3 block_dim(16, 32, 1);
@@ -116,8 +116,7 @@ void naive_knn(raft::resources const& handle,
                                           static_cast<int>(k),
                                           dist_topk + offset * k,
                                           indices_topk + offset * k,
-                                          type != cuvs::distance::DistanceType::InnerProduct,
-                                          mr);
+                                          type != cuvs::distance::DistanceType::InnerProduct);
   }
   RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
 }

@@ -364,7 +364,7 @@ void compute_norm(const raft::resources& handle,
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope("compute_norm");
   auto stream = raft::resource::get_cuda_stream(handle);
   rmm::device_uvector<MathT> mapped_dataset(
-    0, stream, mr.value_or(raft::resource::get_workspace_resource(handle)));
+    0, stream, mr.value_or(raft::resource::get_workspace_resource_ref(handle)));
 
   const MathT* dataset_ptr = nullptr;
 
@@ -426,7 +426,7 @@ void predict(const raft::resources& handle,
   auto stream = raft::resource::get_cuda_stream(handle);
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "predict(%zu, %u)", static_cast<size_t>(n_rows), n_clusters);
-  auto mem_res = mr.value_or(raft::resource::get_workspace_resource(handle));
+  auto mem_res = mr.value_or(raft::resource::get_workspace_resource_ref(handle));
   auto [max_minibatch_size, _mem_per_row] =
     calc_minibatch_size<MathT>(n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
   rmm::device_uvector<MathT> cur_dataset(
@@ -1038,7 +1038,7 @@ void build_hierarchical(const raft::resources& handle,
 
   // TODO: Remove the explicit managed memory- we shouldn't be creating this on the user's behalf.
   rmm::mr::managed_memory_resource managed_memory;
-  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle);
+  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle);
   auto [max_minibatch_size, mem_per_row] =
     calc_minibatch_size<MathT>(n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
 
@@ -1079,8 +1079,8 @@ void build_hierarchical(const raft::resources& handle,
     CounterT;
 
   // build coarse clusters (mesoclusters)
-  rmm::device_uvector<LabelT> mesocluster_labels_buf(n_rows, stream, &managed_memory);
-  rmm::device_uvector<CounterT> mesocluster_sizes_buf(n_mesoclusters, stream, &managed_memory);
+  rmm::device_uvector<LabelT> mesocluster_labels_buf(n_rows, stream, managed_memory);
+  rmm::device_uvector<CounterT> mesocluster_sizes_buf(n_mesoclusters, stream, managed_memory);
   {
     rmm::device_uvector<MathT> mesocluster_centers_buf(n_mesoclusters * dim, stream, device_memory);
     build_clusters(handle,
@@ -1136,7 +1136,7 @@ void build_hierarchical(const raft::resources& handle,
                                              fine_clusters_nums_max,
                                              cluster_centers,
                                              mapping_op,
-                                             &managed_memory,
+                                             managed_memory,
                                              device_memory);
   RAFT_EXPECTS(n_clusters_done == n_clusters, "Didn't process all clusters.");