diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 987687d1..87fbc1d2 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -194,7 +194,7 @@ jobs: build-cuda: runs-on: ubuntu-latest - container: nvidia/cuda:10.2-devel-ubuntu18.04 + container: nvidia/cuda:11.0.3-devel-ubuntu20.04 env: GTEST_VERSION: 1.10.0 GTEST_ROOT: ${{ github.workspace }}/googletest @@ -217,7 +217,7 @@ jobs: cmake -S googletest-release-${{ env.GTEST_VERSION }} -B build -DCMAKE_INSTALL_PREFIX=${{ env.GTEST_ROOT }} cmake --build build -t install env: - CXX: g++-7 + CXX: g++ - name: cmake thrust run: cmake -S . -B build-cuda -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON - name: cmake thrust build diff --git a/include/gtensor/backend_host.h b/include/gtensor/backend_host.h index 49efa736..d9da2aad 100644 --- a/include/gtensor/backend_host.h +++ b/include/gtensor/backend_host.h @@ -101,6 +101,9 @@ struct selector }; } // namespace allocator_impl +// Note: SYCL has a custom host copy +#ifndef GTENSOR_DEVICE_SYCL + namespace copy_impl { template @@ -116,6 +119,8 @@ inline void copy_n(gt::space::host tag_in, gt::space::host tag_out, InputPtr in, } } // namespace copy_impl +#endif // GTENSOR_DEVICE_SYCL + namespace fill_impl { template diff --git a/include/gtensor/backend_sycl.h b/include/gtensor/backend_sycl.h index f23a8827..69a12991 100644 --- a/include/gtensor/backend_sycl.h +++ b/include/gtensor/backend_sycl.h @@ -2,6 +2,7 @@ #ifndef GTENSOR_BACKEND_SYCL_H #define GTENSOR_BACKEND_SYCL_H +#include #include #include #include @@ -140,6 +141,9 @@ namespace copy_impl template inline void sycl_copy_n(InputPtr in, size_type count, OutputPtr out) { + if (gt::raw_pointer_cast(in) == gt::raw_pointer_cast(out)) { + return; + } ::sycl::queue& q = gt::backend::sycl::get_queue(); auto in_raw = gt::raw_pointer_cast(in); auto out_raw = gt::raw_pointer_cast(out); @@ -179,14 +183,20 @@ inline void copy_n(gt::space::host tag_in, gt::space::sycl tag_out, InputPtr in, sycl_copy_n(in, count, out); } -#if 0 template inline void copy_n(gt::space::host tag_in, gt::space::host tag_out, InputPtr in, size_type count, OutputPtr out) { - sycl_copy_n(in, count, out); + if (in == out) { + return; + } + if (gt::backend::sycl::is_device_accessible(in) || + gt::backend::sycl::is_device_accessible(out)) { + sycl_copy_n(in, count, out); + } else { + std::copy_n(in, count, out); + } } -#endif } // namespace copy_impl @@ -251,10 +261,7 @@ class backend_ops template static bool is_device_accessible(const Ptr ptr) { - auto& q = gt::backend::sycl::get_queue(); - auto alloc_type = ::sycl::get_pointer_type(ptr, q.get_context()); - return (alloc_type == ::sycl::usm::alloc::device || - alloc_type == ::sycl::usm::alloc::shared); + return gt::backend::sycl::is_device_accessible(ptr); } template diff --git a/include/gtensor/backend_sycl_device.h b/include/gtensor/backend_sycl_device.h index cf0be422..eafd80af 100644 --- a/include/gtensor/backend_sycl_device.h +++ b/include/gtensor/backend_sycl_device.h @@ -410,6 +410,15 @@ inline void mem_info(size_t* free, size_t* total) #endif // GTENSOR_DEVICE_SYCL_L0 +template +bool is_device_accessible(const Ptr ptr) +{ + auto& q = get_queue(); + auto alloc_type = ::sycl::get_pointer_type(ptr, q.get_context()); + return (alloc_type == ::sycl::usm::alloc::device || + alloc_type == ::sycl::usm::alloc::shared); +} + } // namespace sycl } // namespace backend diff --git a/tests/test_adapt.cxx b/tests/test_adapt.cxx index e358d2a3..1671f01f 100644 --- a/tests/test_adapt.cxx +++ b/tests/test_adapt.cxx @@ -28,6 +28,27 @@ TEST(adapt, adapt_complex) EXPECT_EQ(a[N * M - 1], (T{1., -1.})); } +TEST(adapt, adapt_copy) +{ + constexpr int N = 3; + + // managed allocation adapted + auto p_coeff = + gt::backend::gallocator::allocate(N); + auto coeff_adapt = gt::adapt<1>(p_coeff, gt::shape(N)); + + // host allocation + gt::gtensor coeff_gt(gt::shape(N)); + + // copy between managed and host + gt::copy(coeff_adapt, coeff_gt); + + EXPECT_EQ(coeff_gt, coeff_adapt); + + // clean up + gt::backend::gallocator::deallocate(p_coeff); +} + TEST(adapt, adapt_device) { constexpr int N = 10;