From e1e3f754a4ee702968dfb09f6096e6efd6624edd Mon Sep 17 00:00:00 2001 From: Bryce Allen Date: Wed, 12 Jul 2023 16:54:09 -0400 Subject: [PATCH 1/3] ci: update to cuda 11.0.3, ubuntu 20.4 The 10.2 ubuntu 18.04 is no longer on docker hub --- .github/workflows/ccpp.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 987687d1..87fbc1d2 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -194,7 +194,7 @@ jobs: build-cuda: runs-on: ubuntu-latest - container: nvidia/cuda:10.2-devel-ubuntu18.04 + container: nvidia/cuda:11.0.3-devel-ubuntu20.04 env: GTEST_VERSION: 1.10.0 GTEST_ROOT: ${{ github.workspace }}/googletest @@ -217,7 +217,7 @@ jobs: cmake -S googletest-release-${{ env.GTEST_VERSION }} -B build -DCMAKE_INSTALL_PREFIX=${{ env.GTEST_ROOT }} cmake --build build -t install env: - CXX: g++-7 + CXX: g++ - name: cmake thrust run: cmake -S . -B build-cuda -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON - name: cmake thrust build From f827c7d52d17519fa284535c7b3da42d71c4744a Mon Sep 17 00:00:00 2001 From: Bryce Allen Date: Wed, 21 Jun 2023 15:01:11 -0400 Subject: [PATCH 2/3] tests: add test copy managed adapted to device --- tests/test_adapt.cxx | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_adapt.cxx b/tests/test_adapt.cxx index e358d2a3..1671f01f 100644 --- a/tests/test_adapt.cxx +++ b/tests/test_adapt.cxx @@ -28,6 +28,27 @@ TEST(adapt, adapt_complex) EXPECT_EQ(a[N * M - 1], (T{1., -1.})); } +TEST(adapt, adapt_copy) +{ + constexpr int N = 3; + + // managed allocation adapted + auto p_coeff = + gt::backend::gallocator::allocate(N); + auto coeff_adapt = gt::adapt<1>(p_coeff, gt::shape(N)); + + // host allocation + gt::gtensor coeff_gt(gt::shape(N)); + + // copy between managed and host + gt::copy(coeff_adapt, coeff_gt); + + EXPECT_EQ(coeff_gt, coeff_adapt); + + // clean up + gt::backend::gallocator::deallocate(p_coeff); +} + TEST(adapt, adapt_device) { constexpr int N = 10; From 897c8eeecb191e7593fed70027f9e62f93ebc607 Mon Sep 17 00:00:00 2001 From: Bryce Allen Date: Wed, 21 Jun 2023 16:34:46 -0400 Subject: [PATCH 3/3] sycl: use sycl copy if either device accessible --- include/gtensor/backend_host.h | 5 +++++ include/gtensor/backend_sycl.h | 21 ++++++++++++++------- include/gtensor/backend_sycl_device.h | 9 +++++++++ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/include/gtensor/backend_host.h b/include/gtensor/backend_host.h index 49efa736..d9da2aad 100644 --- a/include/gtensor/backend_host.h +++ b/include/gtensor/backend_host.h @@ -101,6 +101,9 @@ struct selector }; } // namespace allocator_impl +// Note: SYCL has a custom host copy +#ifndef GTENSOR_DEVICE_SYCL + namespace copy_impl { template @@ -116,6 +119,8 @@ inline void copy_n(gt::space::host tag_in, gt::space::host tag_out, InputPtr in, } } // namespace copy_impl +#endif // GTENSOR_DEVICE_SYCL + namespace fill_impl { template diff --git a/include/gtensor/backend_sycl.h b/include/gtensor/backend_sycl.h index f23a8827..69a12991 100644 --- a/include/gtensor/backend_sycl.h +++ b/include/gtensor/backend_sycl.h @@ -2,6 +2,7 @@ #ifndef GTENSOR_BACKEND_SYCL_H #define GTENSOR_BACKEND_SYCL_H +#include #include #include #include @@ -140,6 +141,9 @@ namespace copy_impl template inline void sycl_copy_n(InputPtr in, size_type count, OutputPtr out) { + if (gt::raw_pointer_cast(in) == gt::raw_pointer_cast(out)) { + return; + } ::sycl::queue& q = gt::backend::sycl::get_queue(); auto in_raw = gt::raw_pointer_cast(in); auto out_raw = gt::raw_pointer_cast(out); @@ -179,14 +183,20 @@ inline void copy_n(gt::space::host tag_in, gt::space::sycl tag_out, InputPtr in, sycl_copy_n(in, count, out); } -#if 0 template inline void copy_n(gt::space::host tag_in, gt::space::host tag_out, InputPtr in, size_type count, OutputPtr out) { - sycl_copy_n(in, count, out); + if (in == out) { + return; + } + if (gt::backend::sycl::is_device_accessible(in) || + gt::backend::sycl::is_device_accessible(out)) { + sycl_copy_n(in, count, out); + } else { + std::copy_n(in, count, out); + } } -#endif } // namespace copy_impl @@ -251,10 +261,7 @@ class backend_ops template static bool is_device_accessible(const Ptr ptr) { - auto& q = gt::backend::sycl::get_queue(); - auto alloc_type = ::sycl::get_pointer_type(ptr, q.get_context()); - return (alloc_type == ::sycl::usm::alloc::device || - alloc_type == ::sycl::usm::alloc::shared); + return gt::backend::sycl::is_device_accessible(ptr); } template diff --git a/include/gtensor/backend_sycl_device.h b/include/gtensor/backend_sycl_device.h index cf0be422..eafd80af 100644 --- a/include/gtensor/backend_sycl_device.h +++ b/include/gtensor/backend_sycl_device.h @@ -410,6 +410,15 @@ inline void mem_info(size_t* free, size_t* total) #endif // GTENSOR_DEVICE_SYCL_L0 +template +bool is_device_accessible(const Ptr ptr) +{ + auto& q = get_queue(); + auto alloc_type = ::sycl::get_pointer_type(ptr, q.get_context()); + return (alloc_type == ::sycl::usm::alloc::device || + alloc_type == ::sycl::usm::alloc::shared); +} + } // namespace sycl } // namespace backend