diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5217bbaf..d6fbfe1f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,8 @@ add_subdirectory(mpi_runner) -set(OOMPH_TEST_LEAK_GPU_MEMORY OFF CACHE BOOL "Do not free memory (bug on Piz Daint)") +set(OOMPH_TEST_LEAK_GPU_MEMORY + OFF + CACHE BOOL "Do not free memory (bug on Piz Daint)") # --------------------------------------------------------------------- # compile tests @@ -10,30 +12,57 @@ set(OOMPH_TEST_LEAK_GPU_MEMORY OFF CACHE BOOL "Do not free memory (bug on Piz Da set(serial_tests test_unique_function test_unsafe_shared_ptr) # list of parallel tests to be executed -set(parallel_tests test_context test_send_recv test_send_multi test_cancel test_locality) -#test_tag_range) -if (OOMPH_ENABLE_BARRIER) - list(APPEND parallel_tests test_barrier) +set(parallel_tests test_context test_send_recv test_send_multi test_cancel + test_locality) + +# list of parallel tests that also have device code variants +if(HWMALLOC_ENABLE_DEVICE) + set(device_tests test_send_recv) +endif() + +# test_tag_range) +if(OOMPH_ENABLE_BARRIER) + list(APPEND parallel_tests test_barrier) endif() -# creates an object library (i.e. *.o file) +# creates an object library (i.e. *.o file), if DEVICE is specified, extra flags +# are added and the target name has a suffix function(compile_test t_) - set(t ${t_}_obj) - add_library(${t} OBJECT ${t_}.cpp) - oomph_target_compile_options(${t}) - if (OOMPH_TEST_LEAK_GPU_MEMORY) - target_compile_definitions(${t} PRIVATE OOMPH_TEST_LEAK_GPU_MEMORY) - endif() - target_link_libraries(${t} PRIVATE ext-gtest) - target_link_libraries(${t} PUBLIC oomph) + set(options DEVICE) + cmake_parse_arguments(CT "${options}" "" "" ${ARGN}) + set(source_filename_ "${t_}.cpp") + set(suffix_ "") + if(CT_DEVICE) + # Make a copy the input source file in the build directory, add a suffix + set(suffix_ "_device") + cmake_path(REPLACE_EXTENSION source_filename_ LAST_ONLY "${suffix_}.cpp" + OUTPUT_VARIABLE src_name_) + set(dst_file "${CMAKE_CURRENT_BINARY_DIR}/${src_name_}") + configure_file("${source_filename_}" "${dst_file}" COPYONLY) + set(source_filename_ "${dst_file}") + endif() + set(target_ ${t}${suffix_}_obj) + add_library(${target_} OBJECT ${source_filename_}) + oomph_target_compile_options(${target_}) + target_compile_definitions( + ${target_} + PRIVATE $<$:OOMPH_TEST_LEAK_GPU_MEMORY>) + target_compile_definitions( + ${target_} PRIVATE $<$:TEST_DEVICE_MODE_ONLY>) + target_link_libraries(${target_} PRIVATE ext-gtest) + target_link_libraries(${target_} PUBLIC oomph) endfunction() -# compile an object library for each test -# tests will be compiled only once and then linked against all enabled oomph backends +# compile an object library for each test tests will be compiled only once and +# then linked against all enabled oomph backends list(APPEND all_tests ${serial_tests} ${parallel_tests}) list(REMOVE_DUPLICATES all_tests) foreach(t ${all_tests}) - compile_test(${t}) + compile_test(${t}) + if(${t} IN_LIST device_tests) + # generate a second version of the obj file, but with DEVICE code enabled + compile_test(${t} DEVICE) + endif() endforeach() # --------------------------------------------------------------------- @@ -41,49 +70,55 @@ endforeach() # --------------------------------------------------------------------- function(reg_serial_test t) - add_executable(${t} $) - oomph_target_compile_options(${t}) - target_link_libraries(${t} PRIVATE ext-gtest) - target_link_libraries(${t} PRIVATE oomph_common) - add_test( - NAME ${t} - COMMAND $) + add_executable(${t} $) + oomph_target_compile_options(${t}) + target_link_libraries(${t} PRIVATE ext-gtest) + target_link_libraries(${t} PRIVATE oomph_common) + add_test(NAME ${t} COMMAND $) endfunction() foreach(t ${serial_tests}) - reg_serial_test(${t}) + reg_serial_test(${t}) endforeach() # creates an executable by linking to object file and to selected oomph backend function(reg_parallel_test t_ lib n) - set(t ${t_}_${lib}) - add_executable(${t} $) - oomph_target_compile_options(${t}) - target_link_libraries(${t} PRIVATE gtest_main_mpi) - target_link_libraries(${t} PRIVATE oomph_${lib}) - add_test( - NAME ${t} - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} - $ ${MPIEXEC_POSTFLAGS}) - set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE) + set(t ${t_}_${lib}) + add_executable(${t} $) + oomph_target_compile_options(${t}) + target_link_libraries(${t} PRIVATE gtest_main_mpi) + target_link_libraries(${t} PRIVATE oomph_${lib}) + add_test(NAME ${t} + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} + ${MPIEXEC_PREFLAGS} $ ${MPIEXEC_POSTFLAGS}) + set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE) endfunction() -if (OOMPH_WITH_MPI) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} mpi 4) - endforeach() +if(OOMPH_WITH_MPI) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} mpi 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device mpi 4) + endforeach() endif() -if (OOMPH_WITH_UCX) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} ucx 4) - endforeach() +if(OOMPH_WITH_UCX) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} ucx 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device ucx 4) + endforeach() endif() -if (OOMPH_WITH_LIBFABRIC) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} libfabric 4) - endforeach() +if(OOMPH_WITH_LIBFABRIC) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} libfabric 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device libfabric 4) + endforeach() endif() add_subdirectory(bindings) diff --git a/test/test_send_recv.cpp b/test/test_send_recv.cpp index 0cfd1170..1326eecb 100644 --- a/test/test_send_recv.cpp +++ b/test/test_send_recv.cpp @@ -7,16 +7,21 @@ * Please, refer to the LICENSE file in the root directory. * SPDX-License-Identifier: BSD-3-Clause */ -#include +#ifdef TEST_DEVICE_MODE_ONLY +# ifdef HWMALLOC_ENABLE_DEVICE +# include +# endif +#endif + #include -#include "./mpi_runner/mpi_test_fixture.hpp" -#include -#include -#include +#include +// use this path because device version in build dir needs to find include #include +#include +#include "../test/mpi_runner/mpi_test_fixture.hpp" -#define NITERS 50 -#define SIZE 64 +#define NITERS 50 +#define SIZE 64 #define NTHREADS 4 std::vector> shared_received(NTHREADS); @@ -33,22 +38,22 @@ struct test_environment_base using tag_type = oomph::tag_type; using message = oomph::message_buffer; - oomph::context& ctxt; + oomph::context& ctxt; oomph::communicator comm; - rank_type speer_rank; - rank_type rpeer_rank; - int thread_id; - int num_threads; - tag_type tag; + rank_type speer_rank; + rank_type rpeer_rank; + int thread_id; + int num_threads; + tag_type tag; test_environment_base(oomph::context& c, int tid, int num_t) - : ctxt(c) - , comm(ctxt.get_communicator()) - , speer_rank((comm.rank() + 1) % comm.size()) - , rpeer_rank((comm.rank() + comm.size() - 1) % comm.size()) - , thread_id(tid) - , num_threads(num_t) - , tag(tid) + : ctxt(c) + , comm(ctxt.get_communicator()) + , speer_rank((comm.rank() + 1) % comm.size()) + , rpeer_rank((comm.rank() + comm.size() - 1) % comm.size()) + , thread_id(tid) + , num_threads(num_t) + , tag(tid) { } }; @@ -57,25 +62,26 @@ struct test_environment : public test_environment_base { using base = test_environment_base; - static auto make_buffer(oomph::communicator& comm, std::size_t size, bool user_alloc, - rank_type* ptr) + static auto make_buffer( + oomph::communicator& comm, std::size_t size, bool user_alloc, rank_type* ptr) { - if (user_alloc) return comm.make_buffer(ptr, size); + if (user_alloc) + return comm.make_buffer(ptr, size); else return comm.make_buffer(size); } std::vector raw_smsg; std::vector raw_rmsg; - message smsg; - message rmsg; + message smsg; + message rmsg; test_environment(oomph::context& c, std::size_t size, int tid, int num_t, bool user_alloc) - : base(c, tid, num_t) - , raw_smsg(user_alloc ? size : 0) - , raw_rmsg(user_alloc ? size : 0) - , smsg(make_buffer(comm, size, user_alloc, raw_smsg.data())) - , rmsg(make_buffer(comm, size, user_alloc, raw_rmsg.data())) + : base(c, tid, num_t) + , raw_smsg(user_alloc ? size : 0) + , raw_rmsg(user_alloc ? size : 0) + , smsg(make_buffer(comm, size, user_alloc, raw_smsg.data())) + , rmsg(make_buffer(comm, size, user_alloc, raw_rmsg.data())) { fill_send_buffer(); fill_recv_buffer(); @@ -104,10 +110,11 @@ struct test_environment_device : public test_environment_base { using base = test_environment_base; - static auto make_buffer(oomph::communicator& comm, std::size_t size, bool user_alloc, - rank_type* device_ptr) + static auto make_buffer( + oomph::communicator& comm, std::size_t size, bool user_alloc, rank_type* device_ptr) { - if (user_alloc) return comm.make_device_buffer(device_ptr, size, 0); + if (user_alloc) + return comm.make_device_buffer(device_ptr, size, 0); else return comm.make_device_buffer(size, 0); } @@ -120,37 +127,37 @@ struct test_environment_device : public test_environment_base if (size) m_ptr = hwmalloc::device_malloc(size * sizeof(rank_type)); } device_allocation(device_allocation&& other) - : m_ptr{std::exchange(other.m_ptr, nullptr)} + : m_ptr{std::exchange(other.m_ptr, nullptr)} { } ~device_allocation() { -#ifndef OOMPH_TEST_LEAK_GPU_MEMORY +# ifndef OOMPH_TEST_LEAK_GPU_MEMORY if (m_ptr) hwmalloc::device_free(m_ptr); -#endif +# endif } - rank_type* get() const noexcept { return (rank_type*)m_ptr; } + rank_type* get() const noexcept { return (rank_type*) m_ptr; } }; device_allocation raw_device_smsg; device_allocation raw_device_rmsg; - message smsg; - message rmsg; - - test_environment_device(oomph::context& c, std::size_t size, int tid, int num_t, - bool user_alloc) - : base(c, tid, num_t) -#ifndef OOMPH_TEST_LEAK_GPU_MEMORY - , raw_device_smsg(user_alloc ? size : 0) - , raw_device_rmsg(user_alloc ? size : 0) - , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) - , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) -#else - , raw_device_smsg(size) - , raw_device_rmsg(size) - , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) - , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) -#endif + message smsg; + message rmsg; + + test_environment_device( + oomph::context& c, std::size_t size, int tid, int num_t, bool user_alloc) + : base(c, tid, num_t) +# ifndef OOMPH_TEST_LEAK_GPU_MEMORY + , raw_device_smsg(user_alloc ? size : 0) + , raw_device_rmsg(user_alloc ? size : 0) + , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) + , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) +# else + , raw_device_smsg(size) + , raw_device_rmsg(size) + , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) + , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) +# endif { fill_send_buffer(); fill_recv_buffer(); @@ -178,9 +185,8 @@ struct test_environment_device : public test_environment_base }; #endif -template -void -launch_test(Func f) +template +void launch_test(Func f) { // single threaded { @@ -193,7 +199,7 @@ launch_test(Func f) // multi threaded { - oomph::context ctxt(MPI_COMM_WORLD, true); + oomph::context ctxt(MPI_COMM_WORLD, true); std::vector threads; threads.reserve(NTHREADS); reset_counters(); @@ -210,9 +216,9 @@ launch_test(Func f) // no callback // =========== -template -void -test_send_recv(oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) +template +void test_send_recv( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { Env env(ctxt, size, tid, num_threads, user_alloc); @@ -221,10 +227,7 @@ test_send_recv(oomph::context& ctxt, std::size_t size, int tid, int num_threads, { auto rreq = env.comm.recv(env.rmsg, env.rpeer_rank, env.tag); auto sreq = env.comm.send(env.smsg, env.speer_rank, env.tag); - while (!(rreq.is_ready() && sreq.is_ready())) - { - env.comm.progress(); - }; + while (!(rreq.is_ready() && sreq.is_ready())) { env.comm.progress(); }; EXPECT_TRUE(env.check_recv_buffer()); env.fill_recv_buffer(); } @@ -250,19 +253,19 @@ test_send_recv(oomph::context& ctxt, std::size_t size, int tid, int num_threads, } } -TEST_F(mpi_test_fixture, send_recv) -{ - launch_test(test_send_recv); -#if HWMALLOC_ENABLE_DEVICE - launch_test(test_send_recv); +#ifndef TEST_DEVICE_MODE_ONLY +TEST_F(mpi_test_fixture, send_recv) { launch_test(test_send_recv); } +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_device) { launch_test(test_send_recv); } +# endif #endif -} // callback: pass by l-value reference // =================================== -template -void -test_send_recv_cb(oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) +template +void test_send_recv_cb( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -270,8 +273,8 @@ test_send_recv_cb(oomph::context& ctxt, std::size_t size, int tid, int num_threa Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; auto send_callback = [&](message const&, rank_type, tag_type) { ++sent; }; auto recv_callback = [&](message&, rank_type, tag_type) { ++received; }; @@ -317,20 +320,22 @@ test_send_recv_cb(oomph::context& ctxt, std::size_t size, int tid, int num_threa EXPECT_EQ(sent, NITERS); } -TEST_F(mpi_test_fixture, send_recv_cb) +#ifndef TEST_DEVICE_MODE_ONLY +TEST_F(mpi_test_fixture, send_recv_cb) { launch_test(test_send_recv_cb); } +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_device) { - launch_test(test_send_recv_cb); -#if HWMALLOC_ENABLE_DEVICE launch_test(test_send_recv_cb); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership) // ======================================================= -template -void -test_send_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -338,16 +343,14 @@ test_send_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int nu Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; - auto send_callback = [&](message msg, rank_type, tag_type) - { + auto send_callback = [&](message msg, rank_type, tag_type) { ++sent; env.smsg = std::move(msg); }; - auto recv_callback = [&](message msg, rank_type, tag_type) - { + auto recv_callback = [&](message msg, rank_type, tag_type) { ++received; env.rmsg = std::move(msg); }; @@ -393,20 +396,25 @@ test_send_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int nu EXPECT_EQ(sent, NITERS); } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_disown) { launch_test(test_send_recv_cb_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_disown_device) +{ launch_test(test_send_recv_cb_disown); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership), shared recv // ==================================================================== -template -void -test_send_shared_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_shared_recv_cb_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -416,19 +424,18 @@ test_send_shared_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, thread_id = env.thread_id; - //volatile int received = 0; - volatile int sent = 0; + // volatile int received = 0; + int volatile sent = 0; - auto send_callback = [&](message msg, rank_type, tag_type) - { + auto send_callback = [&](message msg, rank_type, tag_type) { ++sent; env.smsg = std::move(msg); }; - auto recv_callback = [&](message msg, rank_type, tag_type) - { - //std::cout << thread_id << " " << env.thread_id << std::endl; - //if (thread_id != env.thread_id) std::cout << "other thread picked up callback" << std::endl; - //else std::cout << "my thread picked up callback" << std::endl; + auto recv_callback = [&](message msg, rank_type, tag_type) { + // std::cout << thread_id << " " << env.thread_id << std::endl; + // if (thread_id != env.thread_id) std::cout << "other thread picked up + // callback" << std::endl; else std::cout << "my thread picked up callback" + // << std::endl; env.rmsg = std::move(msg); ++shared_received[env.thread_id]; }; @@ -475,20 +482,25 @@ test_send_shared_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, EXPECT_EQ(sent, NITERS); } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_shared_recv_cb_disown) { launch_test(test_send_shared_recv_cb_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_shared_recv_cb_disown_device) +{ launch_test(test_send_shared_recv_cb_disown); -#endif } +# endif +#endif // callback: pass by l-value reference, and resubmit // ================================================= -template -void -test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_resubmit( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -496,13 +508,13 @@ test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; struct recursive_send_callback { - Env& env; - volatile int& sent; + Env& env; + int volatile& sent; void operator()(message& msg, rank_type dst, tag_type tag) { @@ -513,8 +525,8 @@ test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int struct recursive_recv_callback { - Env& env; - volatile int& received; + Env& env; + int volatile& received; void operator()(message& msg, rank_type src, tag_type tag) { @@ -531,20 +543,25 @@ test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int while (sent < NITERS || received < NITERS) { env.comm.progress(); }; } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_resubmit) { launch_test(test_send_recv_cb_resubmit); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_resubmit_device) +{ launch_test(test_send_recv_cb_resubmit); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership), and resubmit // ===================================================================== -template -void -test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_resubmit_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -552,13 +569,13 @@ test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int ti Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; struct recursive_send_callback { - Env& env; - volatile int& sent; + Env& env; + int volatile& sent; void operator()(message msg, rank_type dst, tag_type tag) { @@ -570,8 +587,8 @@ test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int ti struct recursive_recv_callback { - Env& env; - volatile int& received; + Env& env; + int volatile& received; void operator()(message msg, rank_type src, tag_type tag) { @@ -590,10 +607,16 @@ test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int ti while (sent < NITERS || received < NITERS) { env.comm.progress(); }; } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_resubmit_disown) { launch_test(test_send_recv_cb_resubmit_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_resubmit_disown_device) +{ launch_test(test_send_recv_cb_resubmit_disown); -#endif } +# endif +#endif