From 44006eeb2f914936f3ea99652967a74698491ef4 Mon Sep 17 00:00:00 2001 From: rmaschal Date: Wed, 1 Apr 2026 18:47:56 -0700 Subject: [PATCH] ScaNN: Fix AVQ prefetch (#1899) Switching to usage of modern RAFT in cuVS (https://github.com/rapidsai/cuvs/pull/1837) introduced a bug where the prefetched gather for AVQ is performed using the stream associated with raft::device_resources rather than the provided stream for copying. This led to two issues: 1) Elimination of the benefit for prefetching, as copies where scheduled on the same stream as other gpu work 2) Possible recall loss. Synchronization was still performed against the copy stream, potentially allowing host to proceed before the prefetch copy is complete. This PR sets the stream associated with the resource to the copy stream before prefetching, and back when done. Authors: - https://github.com/rmaschal Approvers: - Anupam (https://github.com/aamijar) - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/cuvs/pull/1899 --- cpp/src/neighbors/scann/detail/scann_avq.cuh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/src/neighbors/scann/detail/scann_avq.cuh b/cpp/src/neighbors/scann/detail/scann_avq.cuh index e7c1663f3e..6c3bb045e4 100644 --- a/cpp/src/neighbors/scann/detail/scann_avq.cuh +++ b/cpp/src/neighbors/scann/detail/scann_avq.cuh @@ -511,11 +511,16 @@ class cluster_loader { raft::make_device_matrix_view(d_cluster_copy_buf_.data_handle(), size, dim_); if (needs_copy_) { + // For prefetching to overlap with other gpu work + // we need to schedule copies on the provided copy stream stream_ + auto copy_res = raft::resources(res); + raft::resource::set_cuda_stream(copy_res, stream_); + // htod auto h_cluster_ids = raft::make_pinned_vector_view(cluster_ids_buf_.data_handle(), size); - raft::copy(res, h_cluster_ids, cluster_ids); + raft::copy(copy_res, h_cluster_ids, cluster_ids); raft::resource::sync_stream(res, stream_); auto pinned_cluster = raft::make_pinned_matrix_view( @@ -529,9 +534,8 @@ class cluster_loader { sizeof(T) * dim_); } - raft::copy(res, cluster_vectors, raft::make_const_mdspan(pinned_cluster)); + raft::copy(copy_res, cluster_vectors, raft::make_const_mdspan(pinned_cluster)); raft::resource::sync_stream(res, stream_); - } else { // dtod auto dataset_view =