wdmapp · bd4 · Jul 12, 2023 · Jun 21, 2023 · Jun 21, 2023
diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
@@ -194,7 +194,7 @@ jobs:
 
   build-cuda:
     runs-on: ubuntu-latest
-    container: nvidia/cuda:10.2-devel-ubuntu18.04
+    container: nvidia/cuda:11.0.3-devel-ubuntu20.04
     env:
       GTEST_VERSION: 1.10.0
       GTEST_ROOT: ${{ github.workspace }}/googletest
@@ -217,7 +217,7 @@ jobs:
         cmake -S googletest-release-${{ env.GTEST_VERSION }} -B build -DCMAKE_INSTALL_PREFIX=${{ env.GTEST_ROOT }}
         cmake --build build -t install
       env:
-        CXX: g++-7
+        CXX: g++
     - name: cmake thrust
       run: cmake -S . -B build-cuda -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON
     - name: cmake thrust build

diff --git a/include/gtensor/backend_host.h b/include/gtensor/backend_host.h
@@ -101,6 +101,9 @@ struct selector<T, gt::space::host_only>
 };
 } // namespace allocator_impl
 
+// Note: SYCL has a custom host copy
+#ifndef GTENSOR_DEVICE_SYCL
+
 namespace copy_impl
 {
 template <typename InputPtr, typename OutputPtr>
@@ -116,6 +119,8 @@ inline void copy_n(gt::space::host tag_in, gt::space::host tag_out, InputPtr in,
 }
 } // namespace copy_impl
 
+#endif // GTENSOR_DEVICE_SYCL
+
 namespace fill_impl
 {
 template <typename Ptr, typename T>

diff --git a/include/gtensor/backend_sycl.h b/include/gtensor/backend_sycl.h
@@ -2,6 +2,7 @@
 #ifndef GTENSOR_BACKEND_SYCL_H
 #define GTENSOR_BACKEND_SYCL_H
 
+#include <algorithm>
 #include <cstdlib>
 #include <exception>
 #include <iostream>
@@ -140,6 +141,9 @@ namespace copy_impl
 template <typename InputPtr, typename OutputPtr>
 inline void sycl_copy_n(InputPtr in, size_type count, OutputPtr out)
 {
+  if (gt::raw_pointer_cast(in) == gt::raw_pointer_cast(out)) {
+    return;
+  }
   ::sycl::queue& q = gt::backend::sycl::get_queue();
   auto in_raw = gt::raw_pointer_cast(in);
   auto out_raw = gt::raw_pointer_cast(out);
@@ -179,14 +183,20 @@ inline void copy_n(gt::space::host tag_in, gt::space::sycl tag_out, InputPtr in,
   sycl_copy_n(in, count, out);
 }
 
-#if 0
 template <typename InputPtr, typename OutputPtr>
 inline void copy_n(gt::space::host tag_in, gt::space::host tag_out, InputPtr in,
                    size_type count, OutputPtr out)
 {
-  sycl_copy_n(in, count, out);
+  if (in == out) {
+    return;
+  }
+  if (gt::backend::sycl::is_device_accessible(in) ||
+      gt::backend::sycl::is_device_accessible(out)) {
+    sycl_copy_n(in, count, out);
+  } else {
+    std::copy_n(in, count, out);
+  }
 }
-#endif
 
 } // namespace copy_impl
 
@@ -251,10 +261,7 @@ class backend_ops<gt::space::sycl>
   template <typename Ptr>
   static bool is_device_accessible(const Ptr ptr)
   {
-    auto& q = gt::backend::sycl::get_queue();
-    auto alloc_type = ::sycl::get_pointer_type(ptr, q.get_context());
-    return (alloc_type == ::sycl::usm::alloc::device ||
-            alloc_type == ::sycl::usm::alloc::shared);
+    return gt::backend::sycl::is_device_accessible(ptr);
   }
 
   template <typename Ptr>

diff --git a/include/gtensor/backend_sycl_device.h b/include/gtensor/backend_sycl_device.h
@@ -410,6 +410,15 @@ inline void mem_info(size_t* free, size_t* total)
 
 #endif // GTENSOR_DEVICE_SYCL_L0
 
+template <typename Ptr>
+bool is_device_accessible(const Ptr ptr)
+{
+  auto& q = get_queue();
+  auto alloc_type = ::sycl::get_pointer_type(ptr, q.get_context());
+  return (alloc_type == ::sycl::usm::alloc::device ||
+          alloc_type == ::sycl::usm::alloc::shared);
+}
+
 } // namespace sycl
 
 } // namespace backend

diff --git a/tests/test_adapt.cxx b/tests/test_adapt.cxx
@@ -28,6 +28,27 @@ TEST(adapt, adapt_complex)
   EXPECT_EQ(a[N * M - 1], (T{1., -1.}));
 }
 
+TEST(adapt, adapt_copy)
+{
+  constexpr int N = 3;
+
+  // managed allocation adapted
+  auto p_coeff =
+    gt::backend::gallocator<gt::space::clib_managed>::allocate<double>(N);
+  auto coeff_adapt = gt::adapt<1>(p_coeff, gt::shape(N));
+
+  // host allocation
+  gt::gtensor<double, 1> coeff_gt(gt::shape(N));
+
+  // copy between managed and host
+  gt::copy(coeff_adapt, coeff_gt);
+
+  EXPECT_EQ(coeff_gt, coeff_adapt);
+
+  // clean up
+  gt::backend::gallocator<gt::space::clib_managed>::deallocate<double>(p_coeff);
+}
+
 TEST(adapt, adapt_device)
 {
   constexpr int N = 10;