diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp
index 3909098398..c7111aaf4a 100644
--- a/cpp/include/cuvs/neighbors/common.hpp
+++ b/cpp/include/cuvs/neighbors/common.hpp
@@ -286,14 +286,13 @@ auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t
                                 0,
                                 out_array.size() * sizeof(value_type),
                                 raft::resource::get_cuda_stream(res)));
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(out_array.data_handle(),
-                                  sizeof(value_type) * required_stride,
-                                  src.data_handle(),
-                                  sizeof(value_type) * src_stride,
-                                  sizeof(value_type) * src.extent(1),
-                                  src.extent(0),
-                                  cudaMemcpyDefault,
-                                  raft::resource::get_cuda_stream(res)));
+  raft::copy_matrix(out_array.data_handle(),
+                    required_stride,
+                    src.data_handle(),
+                    src_stride,
+                    src.extent(1),
+                    src.extent(0),
+                    raft::resource::get_cuda_stream(res));
 
   return std::make_unique<out_owning_type>(std::move(out_array), out_layout);
 }
@@ -357,14 +356,13 @@ auto make_strided_dataset(
                                 0,
                                 out_array.size() * sizeof(value_type),
                                 raft::resource::get_cuda_stream(res)));
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(out_array.data_handle(),
-                                  sizeof(value_type) * required_stride,
-                                  src.data_handle(),
-                                  sizeof(value_type) * src_stride,
-                                  sizeof(value_type) * src.extent(1),
-                                  src.extent(0),
-                                  cudaMemcpyDefault,
-                                  raft::resource::get_cuda_stream(res)));
+  raft::copy_matrix(out_array.data_handle(),
+                    required_stride,
+                    src.data_handle(),
+                    src_stride,
+                    src.extent(1),
+                    src.extent(0),
+                    raft::resource::get_cuda_stream(res));
 
   return std::make_unique<out_owning_type>(std::move(out_array), out_layout);
 }
diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh
index 8d6ac67d83..71ec2457f6 100644
--- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh
+++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh
@@ -12,6 +12,7 @@
 #include <raft/core/resources.hpp>
 #include <raft/matrix/init.cuh>
 #include <raft/stats/histogram.cuh>
+#include <raft/util/cudart_utils.hpp>
 
 #include <rmm/device_buffer.hpp>
 
@@ -93,14 +94,13 @@ void add_node_core(
   for (const auto& batch : additional_dataset_batch) {
     // Step 1: Obtain K (=base_degree) nearest neighbors of the new vectors by CAGRA search
     // Create queries
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(queries.data_handle(),
-                                    sizeof(T) * dim,
-                                    batch.data(),
-                                    sizeof(T) * additional_dataset_view.stride(0),
-                                    sizeof(T) * dim,
-                                    batch.size(),
-                                    cudaMemcpyDefault,
-                                    raft::resource::get_cuda_stream(handle)));
+    raft::copy_matrix(queries.data_handle(),
+                      dim,
+                      batch.data(),
+                      additional_dataset_view.stride(0),
+                      dim,
+                      batch.size(),
+                      raft::resource::get_cuda_stream(handle));
 
     const auto queries_view = raft::make_device_matrix_view<const T, std::int64_t>(
       queries.data_handle(), batch.size(), dim);
@@ -407,23 +407,20 @@ void extend_core(
     // The padding area must be filled with zeros.!!!!!!!!!!!!!!!!!!!
     memset(host_updated_dataset.data_handle(), 0, sizeof(T) * host_updated_dataset.size());
 
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_updated_dataset.data_handle(),
-                                    sizeof(T) * stride,
-                                    strided_dset->view().data_handle(),
-                                    sizeof(T) * stride,
-                                    sizeof(T) * dim,
-                                    initial_dataset_size,
-                                    cudaMemcpyDefault,
-                                    raft::resource::get_cuda_stream(handle)));
-    RAFT_CUDA_TRY(
-      cudaMemcpy2DAsync(host_updated_dataset.data_handle() + initial_dataset_size * stride,
-                        sizeof(T) * stride,
-                        additional_dataset.data_handle(),
-                        sizeof(T) * additional_dataset.stride(0),
-                        sizeof(T) * dim,
-                        num_new_nodes,
-                        cudaMemcpyDefault,
-                        raft::resource::get_cuda_stream(handle)));
+    raft::copy_matrix(host_updated_dataset.data_handle(),
+                      stride,
+                      strided_dset->view().data_handle(),
+                      stride,
+                      dim,
+                      initial_dataset_size,
+                      raft::resource::get_cuda_stream(handle));
+    raft::copy_matrix(host_updated_dataset.data_handle() + initial_dataset_size * stride,
+                      stride,
+                      additional_dataset.data_handle(),
+                      additional_dataset.stride(0),
+                      dim,
+                      num_new_nodes,
+                      raft::resource::get_cuda_stream(handle));
 
     if (new_dataset_buffer_view.has_value()) {
       updated_dataset_view = new_dataset_buffer_view.value();
diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
index 4e6de1d2b6..323184e757 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
@@ -13,6 +13,7 @@
 #include <raft/core/mdspan_types.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/serialize.hpp>
+#include <raft/util/cudart_utils.hpp>
 
 #include "../../../core/nvtx.hpp"
 #include "../dataset_serialize.hpp"
@@ -169,14 +170,13 @@ void serialize_to_hnswlib(
                  static_cast<size_t>(dataset.extent(0)),
                  static_cast<size_t>(dataset.extent(1)));
     host_dataset = raft::make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
-                                    sizeof(T) * host_dataset.extent(1),
-                                    dataset.data_handle(),
-                                    sizeof(T) * dataset.stride(0),
-                                    sizeof(T) * host_dataset.extent(1),
-                                    dataset.extent(0),
-                                    cudaMemcpyDefault,
-                                    raft::resource::get_cuda_stream(res)));
+    raft::copy_matrix(host_dataset.data_handle(),
+                      host_dataset.extent(1),
+                      dataset.data_handle(),
+                      dataset.stride(0),
+                      host_dataset.extent(1),
+                      dataset.extent(0),
+                      raft::resource::get_cuda_stream(res));
     raft::resource::sync_stream(res);
     host_dataset_view = raft::make_const_mdspan(host_dataset.view());
   }
diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp
index 75c51f4da7..59b983b511 100644
--- a/cpp/src/neighbors/detail/cagra/utils.hpp
+++ b/cpp/src/neighbors/detail/cagra/utils.hpp
@@ -10,6 +10,7 @@
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/matrix/init.cuh>
+#include <raft/util/cudart_utils.hpp>
 #include <raft/util/integer_utils.hpp>
 
 #include <rmm/resource_ref.hpp>
@@ -284,14 +285,13 @@ void copy_with_padding(
   } else {
     // copy with padding
     raft::matrix::fill(res, dst.view(), T(0));
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(dst.data_handle(),
-                                    sizeof(T) * dst.extent(1),
-                                    src.data_handle(),
-                                    sizeof(T) * src.extent(1),
-                                    sizeof(T) * src.extent(1),
-                                    src.extent(0),
-                                    cudaMemcpyDefault,
-                                    raft::resource::get_cuda_stream(res)));
+    raft::copy_matrix(dst.data_handle(),
+                      dst.extent(1),
+                      src.data_handle(),
+                      src.extent(1),
+                      src.extent(1),
+                      src.extent(0),
+                      raft::resource::get_cuda_stream(res));
   }
 }
 }  // namespace cuvs::neighbors::cagra::detail
diff --git a/cpp/src/neighbors/detail/dataset_serialize.hpp b/cpp/src/neighbors/detail/dataset_serialize.hpp
index 7da60ff906..be11f2da53 100644
--- a/cpp/src/neighbors/detail/dataset_serialize.hpp
+++ b/cpp/src/neighbors/detail/dataset_serialize.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #pragma once
@@ -9,6 +9,7 @@
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/resources.hpp>
 #include <raft/core/serialize.hpp>
+#include <raft/util/cudart_utils.hpp>
 
 #include <raft/core/logger.hpp>
 
@@ -44,14 +45,13 @@ void serialize(const raft::resources& res,
   // Remove padding before saving the dataset
   auto src = dataset.view();
   auto dst = raft::make_host_matrix<DataT, IdxT>(n_rows, dim);
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(dst.data_handle(),
-                                  sizeof(DataT) * dim,
-                                  src.data_handle(),
-                                  sizeof(DataT) * stride,
-                                  sizeof(DataT) * dim,
-                                  n_rows,
-                                  cudaMemcpyDefault,
-                                  raft::resource::get_cuda_stream(res)));
+  raft::copy_matrix(dst.data_handle(),
+                    dim,
+                    src.data_handle(),
+                    stride,
+                    dim,
+                    n_rows,
+                    raft::resource::get_cuda_stream(res));
   raft::resource::sync_stream(res);
   raft::serialize_mdspan(res, os, dst.view());
 }
diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp
index bdb3697809..5c8630632c 100644
--- a/cpp/src/neighbors/detail/hnsw.hpp
+++ b/cpp/src/neighbors/detail/hnsw.hpp
@@ -18,6 +18,7 @@
 #include <raft/core/host_mdspan.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/pinned_mdarray.hpp>
+#include <raft/util/cudart_utils.hpp>
 
 #include <hnswlib/hnswalg.h>
 #include <hnswlib/hnswlib.h>
@@ -233,14 +234,13 @@ std::enable_if_t<hierarchy == HnswHierarchy::CPU, std::unique_ptr<index<T>>> fro
                  static_cast<size_t>(cagra_dataset.extent(1)));
     host_dataset =
       raft::make_host_matrix<T, int64_t>(cagra_dataset.extent(0), cagra_dataset.extent(1));
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
-                                    sizeof(T) * host_dataset.extent(1),
-                                    cagra_dataset.data_handle(),
-                                    sizeof(T) * cagra_dataset.stride(0),
-                                    sizeof(T) * host_dataset.extent(1),
-                                    cagra_dataset.extent(0),
-                                    cudaMemcpyDefault,
-                                    raft::resource::get_cuda_stream(res)));
+    raft::copy_matrix(host_dataset.data_handle(),
+                      host_dataset.extent(1),
+                      cagra_dataset.data_handle(),
+                      cagra_dataset.stride(0),
+                      host_dataset.extent(1),
+                      cagra_dataset.extent(0),
+                      raft::resource::get_cuda_stream(res));
     raft::resource::sync_stream(res);
     host_dataset_view = host_dataset.view();
   }
@@ -1048,14 +1048,13 @@ std::enable_if_t<hierarchy == HnswHierarchy::GPU, std::unique_ptr<index<T>>> fro
     }
   } else {
     common::nvtx::range<common::nvtx::domain::cuvs> copy_scope("get_linklist0<device>");
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(appr_algo->get_linklist0(0) + 1,
-                                    appr_algo->size_data_per_element_,
-                                    graph_ptr,
-                                    degree * sizeof(uint32_t),
-                                    degree * sizeof(uint32_t),
-                                    n_rows,
-                                    cudaMemcpyDefault,
-                                    raft::resource::get_cuda_stream(res)));
+    raft::copy_matrix(reinterpret_cast<char*>(appr_algo->get_linklist0(0) + 1),
+                      appr_algo->size_data_per_element_,
+                      reinterpret_cast<const char*>(graph_ptr),
+                      degree * sizeof(uint32_t),
+                      degree * sizeof(uint32_t),
+                      n_rows,
+                      raft::resource::get_cuda_stream(res));
 #pragma omp parallel for num_threads(num_threads)
     for (int64_t i = 0; i < n_rows; i++) {
       appr_algo->setListCount(appr_algo->get_linklist0(i), degree);
diff --git a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh
index 887c9eb448..b9f20e3063 100644
--- a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh
+++ b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh
@@ -16,6 +16,7 @@
 #include <raft/core/nvtx.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/serialize.hpp>
+#include <raft/util/cudart_utils.hpp>
 
 #include "../dataset_serialize.hpp"
 
@@ -156,15 +157,13 @@ void serialize_sector_aligned(raft::resources const& res,
   if (!dataset_strided) { RAFT_FAIL("Invalid dataset"); }
   auto d_data = dataset_strided->view();
   auto h_data = raft::make_host_matrix<T, int64_t>(npts, ndims);
-  auto stride = dataset_strided->stride();
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(h_data.data_handle(),
-                                  sizeof(T) * ndims,
-                                  d_data.data_handle(),
-                                  sizeof(T) * stride,
-                                  sizeof(T) * ndims,
-                                  npts,
-                                  cudaMemcpyDefault,
-                                  raft::resource::get_cuda_stream(res)));
+  raft::copy_matrix(h_data.data_handle(),
+                    ndims,
+                    d_data.data_handle(),
+                    dataset_strided->stride(),
+                    ndims,
+                    npts,
+                    raft::resource::get_cuda_stream(res));
   raft::resource::sync_stream(res);
 
   // buffers
diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh
index fc114dd215..cbe06f5ca4 100644
--- a/cpp/src/neighbors/detail/vpq_dataset.cuh
+++ b/cpp/src/neighbors/detail/vpq_dataset.cuh
@@ -20,6 +20,7 @@
 #include <raft/core/resource/cuda_stream_pool.hpp>
 #include <raft/core/resources.hpp>
 #include <raft/linalg/map.cuh>
+#include <raft/util/cudart_utils.hpp>
 #include <raft/util/integer_utils.hpp>
 #include <raft/util/pow2_utils.cuh>
 #include <raft/util/vectorized.cuh>
@@ -56,14 +57,13 @@ auto subsample(raft::resources const& res,
   size_t trainset_ratio = dataset.extent(0) / n_samples;
   auto result = raft::make_device_matrix<value_type, index_type>(res, n_samples, dataset.extent(1));
 
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(result.data_handle(),
-                                  sizeof(value_type) * dim,
-                                  dataset.data_handle(),
-                                  sizeof(value_type) * dim * trainset_ratio,
-                                  sizeof(value_type) * dim,
-                                  n_samples,
-                                  cudaMemcpyDefault,
-                                  raft::resource::get_cuda_stream(res)));
+  raft::copy_matrix(result.data_handle(),
+                    dim,
+                    dataset.data_handle(),
+                    dim * trainset_ratio,
+                    dim,
+                    n_samples,
+                    raft::resource::get_cuda_stream(res));
   return result;
 }
 
diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
index 06862c083d..229c703505 100644
--- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
+++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
@@ -32,6 +32,7 @@
 #include <raft/linalg/norm.cuh>
 #include <raft/matrix/init.cuh>
 #include <raft/stats/histogram.cuh>
+#include <raft/util/cudart_utils.hpp>
 #include <raft/util/pow2_utils.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -411,14 +412,13 @@ inline auto build(raft::resources const& handle,
     rmm::device_uvector<T> trainset(
       n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource(handle));
     // TODO: a proper sampling
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(),
-                                    sizeof(T) * index.dim(),
-                                    dataset,
-                                    sizeof(T) * index.dim() * trainset_ratio,
-                                    sizeof(T) * index.dim(),
-                                    n_rows_train,
-                                    cudaMemcpyDefault,
-                                    stream));
+    raft::copy_matrix(trainset.data(),
+                      index.dim(),
+                      dataset,
+                      index.dim() * trainset_ratio,
+                      index.dim(),
+                      n_rows_train,
+                      stream);
     auto trainset_const_view =
       raft::make_device_matrix_view<const T, IdxT>(trainset.data(), n_rows_train, index.dim());
     auto centers_view = raft::make_device_matrix_view<float, IdxT>(
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
index 05f83141ad..05c0176faa 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
@@ -44,6 +44,7 @@
 #include <raft/random/rng.cuh>
 #include <raft/stats/histogram.cuh>
 #include <raft/util/cuda_utils.cuh>
+#include <raft/util/cudart_utils.hpp>
 #include <raft/util/device_atomics.cuh>
 #include <raft/util/integer_utils.hpp>
 #include <raft/util/pow2_utils.cuh>
@@ -263,28 +264,15 @@ inline void pad_centers_with_norms(raft::resources const& res,
   // We rely on this to enable padded tensor gemm kernels during coarse search.
   cuvs::spatial::knn::detail::utils::memzero(padded_centers, n_lists * dim_ext, stream);
   // combine cluster_centers and their norms
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(padded_centers,
-                                  sizeof(float) * dim_ext,
-                                  centers,
-                                  sizeof(float) * dim,
-                                  sizeof(float) * dim,
-                                  n_lists,
-                                  cudaMemcpyDefault,
-                                  stream));
+  raft::copy_matrix(padded_centers, dim_ext, centers, dim, dim, n_lists, stream);
 
   rmm::device_uvector<float> center_norms(n_lists, stream);
   raft::linalg::norm<raft::linalg::L2Norm, raft::Apply::ALONG_ROWS>(
     res,
     raft::make_device_matrix_view<const float, uint32_t, raft::row_major>(centers, n_lists, dim),
     raft::make_device_vector_view<float, uint32_t>(center_norms.data(), n_lists));
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(padded_centers + dim,
-                                  sizeof(float) * dim_ext,
-                                  center_norms.data(),
-                                  sizeof(float),
-                                  sizeof(float),
-                                  n_lists,
-                                  cudaMemcpyDefault,
-                                  stream));
+  raft::copy_matrix(
+    padded_centers + dim, dim_ext, center_norms.data(), size_t(1), size_t(1), n_lists, stream);
 }
 
 template <typename IdxT>
@@ -1102,14 +1090,13 @@ void extend(raft::resources const& handle,
     // the kmeans_balanced::predict. Thus, we need the restructuring raft::copy.
     rmm::device_uvector<float> cluster_centers(
       size_t(n_clusters) * size_t(index->dim()), stream, device_memory);
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(cluster_centers.data(),
-                                    sizeof(float) * index->dim(),
-                                    index->centers().data_handle(),
-                                    sizeof(float) * index->dim_ext(),
-                                    sizeof(float) * index->dim(),
-                                    n_clusters,
-                                    cudaMemcpyDefault,
-                                    stream));
+    raft::copy_matrix(cluster_centers.data(),
+                      index->dim(),
+                      index->centers().data_handle(),
+                      index->dim_ext(),
+                      index->dim(),
+                      n_clusters,
+                      stream);
     vec_batches.prefetch_next_batch();
     for (const auto& batch : vec_batches) {
       auto batch_data_view = raft::make_device_matrix_view<const T, internal_extents_t>(
@@ -1656,13 +1643,12 @@ inline void extract_centers(raft::resources const& res,
     cluster_centers.extent(1) == index.dim(),
     "Number of columns in the output buffer for cluster centers and index dim are different");
   auto stream = raft::resource::get_cuda_stream(res);
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(cluster_centers.data_handle(),
-                                  sizeof(float) * index.dim(),
-                                  index.centers().data_handle(),
-                                  sizeof(float) * index.dim_ext(),
-                                  sizeof(float) * index.dim(),
-                                  index.n_lists(),
-                                  cudaMemcpyDefault,
-                                  stream));
+  raft::copy_matrix(cluster_centers.data_handle(),
+                    index.dim(),
+                    index.centers().data_handle(),
+                    index.dim_ext(),
+                    index.dim(),
+                    index.n_lists(),
+                    stream);
 }
 }  // namespace cuvs::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu b/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu
index 54cc5fee87..2deab7a073 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu
@@ -9,6 +9,7 @@
 #include <cuvs/neighbors/common.hpp>
 #include <cuvs/neighbors/ivf_pq.hpp>
 #include <raft/core/mdspan_types.hpp>
+#include <raft/util/cudart_utils.hpp>
 
 namespace cuvs::neighbors::ivf_pq {
 namespace helpers {
@@ -248,16 +249,7 @@ void make_rotation_matrix(raft::resources const& handle,
     float* mat = inplace ? rotation_matrix : buf.data();
     raft::random::normal(handle, rng, mat, n * n, 0.0f, 1.0f);
     raft::linalg::detail::qrGetQ_inplace(handle, mat, n, n, stream);
-    if (!inplace) {
-      RAFT_CUDA_TRY(cudaMemcpy2DAsync(rotation_matrix,
-                                      sizeof(float) * n_cols,
-                                      mat,
-                                      sizeof(float) * n,
-                                      sizeof(float) * n_cols,
-                                      n_rows,
-                                      cudaMemcpyDefault,
-                                      stream));
-    }
+    if (!inplace) { raft::copy_matrix(rotation_matrix, n_cols, mat, n, n_cols, n_rows, stream); }
   } else {
     uint32_t stride = n + 1;
     auto rotation_matrix_view =
diff --git a/cpp/src/preprocessing/quantize/detail/binary.cuh b/cpp/src/preprocessing/quantize/detail/binary.cuh
index 3c61aba562..fe8288a7cb 100644
--- a/cpp/src/preprocessing/quantize/detail/binary.cuh
+++ b/cpp/src/preprocessing/quantize/detail/binary.cuh
@@ -17,6 +17,7 @@
 #include <raft/random/rng.cuh>
 #include <raft/random/sample_without_replacement.cuh>
 #include <raft/stats/mean.cuh>
+#include <raft/util/cudart_utils.hpp>
 #include <thrust/execution_policy.h>
 #include <thrust/sort.h>
 
@@ -238,14 +239,13 @@ auto train(raft::resources const& res,
         thrust::sort(thrust::device, start_ptr, start_ptr + num_samples);
       }
 
-      RAFT_CUDA_TRY(cudaMemcpy2DAsync(threshold_ptr + dim_offset,
-                                      sizeof(T),
-                                      sampled_dataset_chunk.data_handle() + (num_samples - 1) / 2,
-                                      num_samples * sizeof(T),
-                                      sizeof(T),
-                                      dim_chunk,
-                                      cudaMemcpyDefault,
-                                      raft::resource::get_cuda_stream(res)));
+      raft::copy_matrix(threshold_ptr + dim_offset,
+                        size_t(1),
+                        sampled_dataset_chunk.data_handle() + (num_samples - 1) / 2,
+                        num_samples,
+                        size_t(1),
+                        dim_chunk,
+                        raft::resource::get_cuda_stream(res));
     }
   }
   return quantizer;
diff --git a/cpp/src/preprocessing/quantize/detail/pq.cuh b/cpp/src/preprocessing/quantize/detail/pq.cuh
index e04681b974..77fb0ac4f9 100644
--- a/cpp/src/preprocessing/quantize/detail/pq.cuh
+++ b/cpp/src/preprocessing/quantize/detail/pq.cuh
@@ -13,6 +13,7 @@
 #include <cuvs/preprocessing/quantize/pq.hpp>
 #include <raft/core/operators.hpp>
 #include <raft/matrix/init.cuh>
+#include <raft/util/cudart_utils.hpp>
 
 #include "../../../cluster/kmeans_balanced.cuh"
 
@@ -99,14 +100,13 @@ auto train_pq_subspaces(
   }
 
   for (ix_t m = 0; m < pq_dim; m++) {
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(sub_dataset.data_handle(),
-                                    sizeof(MathT) * pq_len,
-                                    trainset_ptr + m * pq_len,
-                                    sizeof(MathT) * dim,
-                                    sizeof(MathT) * pq_len,
-                                    n_rows_train,
-                                    cudaMemcpyDefault,
-                                    raft::resource::get_cuda_stream(res)));
+    raft::copy_matrix(sub_dataset.data_handle(),
+                      pq_len,
+                      trainset_ptr + m * pq_len,
+                      dim,
+                      pq_len,
+                      n_rows_train,
+                      raft::resource::get_cuda_stream(res));
     auto pq_centers_subspace_view = raft::make_device_matrix_view<MathT, uint32_t, raft::row_major>(
       pq_centers.data_handle() + m * pq_n_centers * pq_len, pq_n_centers, pq_len);
     cuvs::neighbors::detail::train_pq_centers<MathT, ix_t>(