Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ccpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ jobs:

build-cuda:
runs-on: ubuntu-latest
container: nvidia/cuda:10.2-devel-ubuntu18.04
container: nvidia/cuda:11.0.3-devel-ubuntu20.04
env:
GTEST_VERSION: 1.10.0
GTEST_ROOT: ${{ github.workspace }}/googletest
Expand All @@ -217,7 +217,7 @@ jobs:
cmake -S googletest-release-${{ env.GTEST_VERSION }} -B build -DCMAKE_INSTALL_PREFIX=${{ env.GTEST_ROOT }}
cmake --build build -t install
env:
CXX: g++-7
CXX: g++
- name: cmake thrust
run: cmake -S . -B build-cuda -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON
- name: cmake thrust build
Expand Down
5 changes: 5 additions & 0 deletions include/gtensor/backend_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ struct selector<T, gt::space::host_only>
};
} // namespace allocator_impl

// Note: SYCL has a custom host copy
#ifndef GTENSOR_DEVICE_SYCL

namespace copy_impl
{
template <typename InputPtr, typename OutputPtr>
Expand All @@ -116,6 +119,8 @@ inline void copy_n(gt::space::host tag_in, gt::space::host tag_out, InputPtr in,
}
} // namespace copy_impl

#endif // GTENSOR_DEVICE_SYCL

namespace fill_impl
{
template <typename Ptr, typename T>
Expand Down
21 changes: 14 additions & 7 deletions include/gtensor/backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#ifndef GTENSOR_BACKEND_SYCL_H
#define GTENSOR_BACKEND_SYCL_H

#include <algorithm>
#include <cstdlib>
#include <exception>
#include <iostream>
Expand Down Expand Up @@ -140,6 +141,9 @@ namespace copy_impl
template <typename InputPtr, typename OutputPtr>
inline void sycl_copy_n(InputPtr in, size_type count, OutputPtr out)
{
if (gt::raw_pointer_cast(in) == gt::raw_pointer_cast(out)) {
return;
}
::sycl::queue& q = gt::backend::sycl::get_queue();
auto in_raw = gt::raw_pointer_cast(in);
auto out_raw = gt::raw_pointer_cast(out);
Expand Down Expand Up @@ -179,14 +183,20 @@ inline void copy_n(gt::space::host tag_in, gt::space::sycl tag_out, InputPtr in,
sycl_copy_n(in, count, out);
}

#if 0
template <typename InputPtr, typename OutputPtr>
inline void copy_n(gt::space::host tag_in, gt::space::host tag_out, InputPtr in,
size_type count, OutputPtr out)
{
sycl_copy_n(in, count, out);
if (in == out) {
return;
}
if (gt::backend::sycl::is_device_accessible(in) ||
gt::backend::sycl::is_device_accessible(out)) {
sycl_copy_n(in, count, out);
} else {
std::copy_n(in, count, out);
}
}
#endif

} // namespace copy_impl

Expand Down Expand Up @@ -251,10 +261,7 @@ class backend_ops<gt::space::sycl>
template <typename Ptr>
static bool is_device_accessible(const Ptr ptr)
{
auto& q = gt::backend::sycl::get_queue();
auto alloc_type = ::sycl::get_pointer_type(ptr, q.get_context());
return (alloc_type == ::sycl::usm::alloc::device ||
alloc_type == ::sycl::usm::alloc::shared);
return gt::backend::sycl::is_device_accessible(ptr);
}

template <typename Ptr>
Expand Down
9 changes: 9 additions & 0 deletions include/gtensor/backend_sycl_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,15 @@ inline void mem_info(size_t* free, size_t* total)

#endif // GTENSOR_DEVICE_SYCL_L0

template <typename Ptr>
bool is_device_accessible(const Ptr ptr)
{
auto& q = get_queue();
auto alloc_type = ::sycl::get_pointer_type(ptr, q.get_context());
return (alloc_type == ::sycl::usm::alloc::device ||
alloc_type == ::sycl::usm::alloc::shared);
}

} // namespace sycl

} // namespace backend
Expand Down
21 changes: 21 additions & 0 deletions tests/test_adapt.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,27 @@ TEST(adapt, adapt_complex)
EXPECT_EQ(a[N * M - 1], (T{1., -1.}));
}

TEST(adapt, adapt_copy)
{
constexpr int N = 3;

// managed allocation adapted
auto p_coeff =
gt::backend::gallocator<gt::space::clib_managed>::allocate<double>(N);
auto coeff_adapt = gt::adapt<1>(p_coeff, gt::shape(N));

// host allocation
gt::gtensor<double, 1> coeff_gt(gt::shape(N));

// copy between managed and host
gt::copy(coeff_adapt, coeff_gt);

EXPECT_EQ(coeff_gt, coeff_adapt);

// clean up
gt::backend::gallocator<gt::space::clib_managed>::deallocate<double>(p_coeff);
}

TEST(adapt, adapt_device)
{
constexpr int N = 10;
Expand Down