From a9e4a6f875e7fe282c4e21af97a72ad97500015d Mon Sep 17 00:00:00 2001 From: Bryce Allen Date: Tue, 14 Mar 2023 10:05:40 -0400 Subject: [PATCH] fortran: add gpuAllocatorClearCache - requires a hack to clear cache for most common allocator types - does not work if thrust backend is enabled --- include/gtensor/allocator.h | 2 ++ include/gtensor/backend_common.h | 7 +++-- include/gtensor/gtensor.h | 49 +++++++++++++++++++++++++++++++ src/fortran/gpu_api.cxx | 2 ++ src/fortran/gpu_api_interface.F90 | 3 ++ 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/include/gtensor/allocator.h b/include/gtensor/allocator.h index fe989a05..a02b8b8d 100644 --- a/include/gtensor/allocator.h +++ b/include/gtensor/allocator.h @@ -99,6 +99,8 @@ struct caching_allocator : A GT_INLINE void construct(pointer) {} + // Note: thrust allocators have a non-static deallocate and this does not work + // if thrust is enabled static void clear_cache() { for (auto it = free_.begin(); it != free_.end(); it++) { diff --git a/include/gtensor/backend_common.h b/include/gtensor/backend_common.h index 85d39dc0..c44b82ff 100644 --- a/include/gtensor/backend_common.h +++ b/include/gtensor/backend_common.h @@ -267,8 +267,11 @@ struct wrap_allocator using pointer = gt::space_pointer; using size_type = gt::size_type; - pointer allocate(size_type n) { return pointer(A::template allocate(n)); } - void deallocate(pointer p, size_type n) + static pointer allocate(size_type n) + { + return pointer(A::template allocate(n)); + } + static void deallocate(pointer p, size_type n) { A::deallocate(gt::pointer_traits::get(p)); } diff --git a/include/gtensor/gtensor.h b/include/gtensor/gtensor.h index 58da5183..20c35306 100644 --- a/include/gtensor/gtensor.h +++ b/include/gtensor/gtensor.h @@ -942,6 +942,55 @@ decltype(auto) host_mirror(E& e) return detail::host_mirror::run(e); } +// ====================================================================== +// allocator_clear_caches + +template +using caching_device_allocator = + gt::allocator::caching_allocator>; + +template +using caching_host_allocator = + gt::allocator::caching_allocator>; + +template +using caching_managed_allocator = + gt::allocator::caching_allocator>; + +// TODO: this is a hack, we should re-write caching_allocator to better +// support this use case, i.e. clearing cache after initialization +// and auto-parallelization but before main time loop in an app like +// GENE +inline void allocator_clear_caches() +{ + // Note: thrust allocators have a non-static deallocate +#if defined(GTENSOR_HAVE_DEVICE) && !defined(GTENSOR_USE_THRUST) + gt::caching_device_allocator::clear_cache(); + gt::caching_device_allocator::clear_cache(); + gt::caching_device_allocator>::clear_cache(); + gt::caching_device_allocator>::clear_cache(); + gt::caching_device_allocator::clear_cache(); + gt::caching_device_allocator::clear_cache(); + gt::caching_device_allocator::clear_cache(); + + gt::caching_managed_allocator::clear_cache(); + gt::caching_managed_allocator::clear_cache(); + gt::caching_managed_allocator>::clear_cache(); + gt::caching_managed_allocator>::clear_cache(); + gt::caching_managed_allocator::clear_cache(); + gt::caching_managed_allocator::clear_cache(); + gt::caching_managed_allocator::clear_cache(); + + gt::caching_host_allocator::clear_cache(); + gt::caching_host_allocator::clear_cache(); + gt::caching_host_allocator>::clear_cache(); + gt::caching_host_allocator>::clear_cache(); + gt::caching_host_allocator::clear_cache(); + gt::caching_host_allocator::clear_cache(); + gt::caching_host_allocator::clear_cache(); +#endif +} + } // namespace gt #endif diff --git a/src/fortran/gpu_api.cxx b/src/fortran/gpu_api.cxx index 5f9eddd0..f2bc74d4 100644 --- a/src/fortran/gpu_api.cxx +++ b/src/fortran/gpu_api.cxx @@ -196,3 +196,5 @@ extern "C" int gpuMemcpyAsync(void* dst, const void* src, size_t bytes, } #endif + +extern "C" void gpuAllocatorClearCache() { gt::allocator_clear_caches(); } diff --git a/src/fortran/gpu_api_interface.F90 b/src/fortran/gpu_api_interface.F90 index fa5141e0..a4780226 100644 --- a/src/fortran/gpu_api_interface.F90 +++ b/src/fortran/gpu_api_interface.F90 @@ -284,4 +284,7 @@ subroutine gpuDeviceGet(out_device_id) out_device_id = gt_backend_device_get() end subroutine gpuDeviceGet + subroutine gpuAllocatorClearCache() + end subroutine gpuAllocatorClearCache + end module gpu_api_m