Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 14 additions & 16 deletions cpp/include/cuvs/neighbors/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,13 @@ auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t
0,
out_array.size() * sizeof(value_type),
raft::resource::get_cuda_stream(res)));
RAFT_CUDA_TRY(cudaMemcpy2DAsync(out_array.data_handle(),
sizeof(value_type) * required_stride,
src.data_handle(),
sizeof(value_type) * src_stride,
sizeof(value_type) * src.extent(1),
src.extent(0),
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(out_array.data_handle(),
required_stride,
src.data_handle(),
src_stride,
src.extent(1),
src.extent(0),
raft::resource::get_cuda_stream(res));

return std::make_unique<out_owning_type>(std::move(out_array), out_layout);
}
Expand Down Expand Up @@ -357,14 +356,13 @@ auto make_strided_dataset(
0,
out_array.size() * sizeof(value_type),
raft::resource::get_cuda_stream(res)));
RAFT_CUDA_TRY(cudaMemcpy2DAsync(out_array.data_handle(),
sizeof(value_type) * required_stride,
src.data_handle(),
sizeof(value_type) * src_stride,
sizeof(value_type) * src.extent(1),
src.extent(0),
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(out_array.data_handle(),
required_stride,
src.data_handle(),
src_stride,
src.extent(1),
src.extent(0),
raft::resource::get_cuda_stream(res));

return std::make_unique<out_owning_type>(std::move(out_array), out_layout);
}
Expand Down
47 changes: 22 additions & 25 deletions cpp/src/neighbors/detail/cagra/add_nodes.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <raft/core/resources.hpp>
#include <raft/matrix/init.cuh>
#include <raft/stats/histogram.cuh>
#include <raft/util/cudart_utils.hpp>

#include <rmm/device_buffer.hpp>

Expand Down Expand Up @@ -93,14 +94,13 @@ void add_node_core(
for (const auto& batch : additional_dataset_batch) {
// Step 1: Obtain K (=base_degree) nearest neighbors of the new vectors by CAGRA search
// Create queries
RAFT_CUDA_TRY(cudaMemcpy2DAsync(queries.data_handle(),
sizeof(T) * dim,
batch.data(),
sizeof(T) * additional_dataset_view.stride(0),
sizeof(T) * dim,
batch.size(),
cudaMemcpyDefault,
raft::resource::get_cuda_stream(handle)));
raft::copy_matrix(queries.data_handle(),
dim,
batch.data(),
additional_dataset_view.stride(0),
dim,
batch.size(),
raft::resource::get_cuda_stream(handle));

const auto queries_view = raft::make_device_matrix_view<const T, std::int64_t>(
queries.data_handle(), batch.size(), dim);
Expand Down Expand Up @@ -407,23 +407,20 @@ void extend_core(
// The padding area must be filled with zeros.!!!!!!!!!!!!!!!!!!!
memset(host_updated_dataset.data_handle(), 0, sizeof(T) * host_updated_dataset.size());

RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_updated_dataset.data_handle(),
sizeof(T) * stride,
strided_dset->view().data_handle(),
sizeof(T) * stride,
sizeof(T) * dim,
initial_dataset_size,
cudaMemcpyDefault,
raft::resource::get_cuda_stream(handle)));
RAFT_CUDA_TRY(
cudaMemcpy2DAsync(host_updated_dataset.data_handle() + initial_dataset_size * stride,
sizeof(T) * stride,
additional_dataset.data_handle(),
sizeof(T) * additional_dataset.stride(0),
sizeof(T) * dim,
num_new_nodes,
cudaMemcpyDefault,
raft::resource::get_cuda_stream(handle)));
raft::copy_matrix(host_updated_dataset.data_handle(),
stride,
strided_dset->view().data_handle(),
stride,
dim,
initial_dataset_size,
raft::resource::get_cuda_stream(handle));
raft::copy_matrix(host_updated_dataset.data_handle() + initial_dataset_size * stride,
stride,
additional_dataset.data_handle(),
additional_dataset.stride(0),
dim,
num_new_nodes,
raft::resource::get_cuda_stream(handle));

if (new_dataset_buffer_view.has_value()) {
updated_dataset_view = new_dataset_buffer_view.value();
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <raft/core/mdspan_types.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/core/serialize.hpp>
#include <raft/util/cudart_utils.hpp>

#include "../../../core/nvtx.hpp"
#include "../dataset_serialize.hpp"
Expand Down Expand Up @@ -169,14 +170,13 @@ void serialize_to_hnswlib(
static_cast<size_t>(dataset.extent(0)),
static_cast<size_t>(dataset.extent(1)));
host_dataset = raft::make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
sizeof(T) * host_dataset.extent(1),
dataset.data_handle(),
sizeof(T) * dataset.stride(0),
sizeof(T) * host_dataset.extent(1),
dataset.extent(0),
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(host_dataset.data_handle(),
host_dataset.extent(1),
dataset.data_handle(),
dataset.stride(0),
host_dataset.extent(1),
dataset.extent(0),
raft::resource::get_cuda_stream(res));
raft::resource::sync_stream(res);
host_dataset_view = raft::make_const_mdspan(host_dataset.view());
}
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/neighbors/detail/cagra/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <raft/core/device_mdarray.hpp>
#include <raft/core/host_mdarray.hpp>
#include <raft/matrix/init.cuh>
#include <raft/util/cudart_utils.hpp>
#include <raft/util/integer_utils.hpp>

#include <rmm/resource_ref.hpp>
Expand Down Expand Up @@ -284,14 +285,13 @@ void copy_with_padding(
} else {
// copy with padding
raft::matrix::fill(res, dst.view(), T(0));
RAFT_CUDA_TRY(cudaMemcpy2DAsync(dst.data_handle(),
sizeof(T) * dst.extent(1),
src.data_handle(),
sizeof(T) * src.extent(1),
sizeof(T) * src.extent(1),
src.extent(0),
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(dst.data_handle(),
dst.extent(1),
src.data_handle(),
src.extent(1),
src.extent(1),
src.extent(0),
raft::resource::get_cuda_stream(res));
}
}
} // namespace cuvs::neighbors::cagra::detail
18 changes: 9 additions & 9 deletions cpp/src/neighbors/detail/dataset_serialize.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
Expand All @@ -9,6 +9,7 @@
#include <raft/core/host_mdarray.hpp>
#include <raft/core/resources.hpp>
#include <raft/core/serialize.hpp>
#include <raft/util/cudart_utils.hpp>

#include <raft/core/logger.hpp>

Expand Down Expand Up @@ -44,14 +45,13 @@ void serialize(const raft::resources& res,
// Remove padding before saving the dataset
auto src = dataset.view();
auto dst = raft::make_host_matrix<DataT, IdxT>(n_rows, dim);
RAFT_CUDA_TRY(cudaMemcpy2DAsync(dst.data_handle(),
sizeof(DataT) * dim,
src.data_handle(),
sizeof(DataT) * stride,
sizeof(DataT) * dim,
n_rows,
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(dst.data_handle(),
dim,
src.data_handle(),
stride,
dim,
n_rows,
raft::resource::get_cuda_stream(res));
raft::resource::sync_stream(res);
raft::serialize_mdspan(res, os, dst.view());
}
Expand Down
31 changes: 15 additions & 16 deletions cpp/src/neighbors/detail/hnsw.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <raft/core/host_mdspan.hpp>
#include <raft/core/logger.hpp>
#include <raft/core/pinned_mdarray.hpp>
#include <raft/util/cudart_utils.hpp>

#include <hnswlib/hnswalg.h>
#include <hnswlib/hnswlib.h>
Expand Down Expand Up @@ -233,14 +234,13 @@ std::enable_if_t<hierarchy == HnswHierarchy::CPU, std::unique_ptr<index<T>>> fro
static_cast<size_t>(cagra_dataset.extent(1)));
host_dataset =
raft::make_host_matrix<T, int64_t>(cagra_dataset.extent(0), cagra_dataset.extent(1));
RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
sizeof(T) * host_dataset.extent(1),
cagra_dataset.data_handle(),
sizeof(T) * cagra_dataset.stride(0),
sizeof(T) * host_dataset.extent(1),
cagra_dataset.extent(0),
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(host_dataset.data_handle(),
host_dataset.extent(1),
cagra_dataset.data_handle(),
cagra_dataset.stride(0),
host_dataset.extent(1),
cagra_dataset.extent(0),
raft::resource::get_cuda_stream(res));
raft::resource::sync_stream(res);
host_dataset_view = host_dataset.view();
}
Expand Down Expand Up @@ -1048,14 +1048,13 @@ std::enable_if_t<hierarchy == HnswHierarchy::GPU, std::unique_ptr<index<T>>> fro
}
} else {
common::nvtx::range<common::nvtx::domain::cuvs> copy_scope("get_linklist0<device>");
RAFT_CUDA_TRY(cudaMemcpy2DAsync(appr_algo->get_linklist0(0) + 1,
appr_algo->size_data_per_element_,
graph_ptr,
degree * sizeof(uint32_t),
degree * sizeof(uint32_t),
n_rows,
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(reinterpret_cast<char*>(appr_algo->get_linklist0(0) + 1),
appr_algo->size_data_per_element_,
reinterpret_cast<const char*>(graph_ptr),
degree * sizeof(uint32_t),
degree * sizeof(uint32_t),
n_rows,
raft::resource::get_cuda_stream(res));
#pragma omp parallel for num_threads(num_threads)
for (int64_t i = 0; i < n_rows; i++) {
appr_algo->setListCount(appr_algo->get_linklist0(i), degree);
Expand Down
17 changes: 8 additions & 9 deletions cpp/src/neighbors/detail/vamana/vamana_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <raft/core/nvtx.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/core/serialize.hpp>
#include <raft/util/cudart_utils.hpp>

#include "../dataset_serialize.hpp"

Expand Down Expand Up @@ -156,15 +157,13 @@ void serialize_sector_aligned(raft::resources const& res,
if (!dataset_strided) { RAFT_FAIL("Invalid dataset"); }
auto d_data = dataset_strided->view();
auto h_data = raft::make_host_matrix<T, int64_t>(npts, ndims);
auto stride = dataset_strided->stride();
RAFT_CUDA_TRY(cudaMemcpy2DAsync(h_data.data_handle(),
sizeof(T) * ndims,
d_data.data_handle(),
sizeof(T) * stride,
sizeof(T) * ndims,
npts,
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(h_data.data_handle(),
ndims,
d_data.data_handle(),
dataset_strided->stride(),
ndims,
npts,
raft::resource::get_cuda_stream(res));
raft::resource::sync_stream(res);

// buffers
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/neighbors/detail/vpq_dataset.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <raft/core/resource/cuda_stream_pool.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/map.cuh>
#include <raft/util/cudart_utils.hpp>
#include <raft/util/integer_utils.hpp>
#include <raft/util/pow2_utils.cuh>
#include <raft/util/vectorized.cuh>
Expand Down Expand Up @@ -56,14 +57,13 @@ auto subsample(raft::resources const& res,
size_t trainset_ratio = dataset.extent(0) / n_samples;
auto result = raft::make_device_matrix<value_type, index_type>(res, n_samples, dataset.extent(1));

RAFT_CUDA_TRY(cudaMemcpy2DAsync(result.data_handle(),
sizeof(value_type) * dim,
dataset.data_handle(),
sizeof(value_type) * dim * trainset_ratio,
sizeof(value_type) * dim,
n_samples,
cudaMemcpyDefault,
raft::resource::get_cuda_stream(res)));
raft::copy_matrix(result.data_handle(),
dim,
dataset.data_handle(),
dim * trainset_ratio,
dim,
n_samples,
raft::resource::get_cuda_stream(res));
return result;
}

Expand Down
16 changes: 8 additions & 8 deletions cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <raft/linalg/norm.cuh>
#include <raft/matrix/init.cuh>
#include <raft/stats/histogram.cuh>
#include <raft/util/cudart_utils.hpp>
#include <raft/util/pow2_utils.cuh>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -411,14 +412,13 @@ inline auto build(raft::resources const& handle,
rmm::device_uvector<T> trainset(
n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource(handle));
// TODO: a proper sampling
RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(),
sizeof(T) * index.dim(),
dataset,
sizeof(T) * index.dim() * trainset_ratio,
sizeof(T) * index.dim(),
n_rows_train,
cudaMemcpyDefault,
stream));
raft::copy_matrix(trainset.data(),
index.dim(),
dataset,
index.dim() * trainset_ratio,
index.dim(),
n_rows_train,
stream);
auto trainset_const_view =
raft::make_device_matrix_view<const T, IdxT>(trainset.data(), n_rows_train, index.dim());
auto centers_view = raft::make_device_matrix_view<float, IdxT>(
Expand Down
Loading
Loading