From d2943e643cf3df70676f0a9034b6df6cb1834dbc Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Wed, 8 Apr 2026 09:06:56 -0600 Subject: [PATCH 1/4] Add Graph exercise solution Signed-off-by: Christian Trott --- Exercises/graph/Solution/CMakeLists.txt | 6 + Exercises/graph/Solution/graph_solution.cpp | 161 ++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 Exercises/graph/Solution/CMakeLists.txt create mode 100644 Exercises/graph/Solution/graph_solution.cpp diff --git a/Exercises/graph/Solution/CMakeLists.txt b/Exercises/graph/Solution/CMakeLists.txt new file mode 100644 index 00000000..8c7e8b8f --- /dev/null +++ b/Exercises/graph/Solution/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.22) +project(KokkosTutorialExecGraph) +include(../../common.cmake) + +add_executable(graph graph_solution.cpp) +target_link_libraries(graph Kokkos::kokkos) diff --git a/Exercises/graph/Solution/graph_solution.cpp b/Exercises/graph/Solution/graph_solution.cpp new file mode 100644 index 00000000..a14207ef --- /dev/null +++ b/Exercises/graph/Solution/graph_solution.cpp @@ -0,0 +1,161 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include + +#include + +#include +#include + +template +constexpr bool is_view_v = false; + +template +constexpr bool is_view_v> = true; + +template +concept view = is_view_v; + +using policy_t = Kokkos::RangePolicy<>; + +template +void init(D data, P pack_ids) { + Kokkos::parallel_for("Init Data", policy_t(0, data.extent(0)), + KOKKOS_LAMBDA(int i) { data(i) = i; }); + Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); + Kokkos::fill_random(pack_ids, rand_pool64, data.extent(0)); +} + +// CUDA does not support auto return type from functions +// which create host device lambdas +template +struct pack_functor { + D data; + P pack_ids; + B buffer; + KOKKOS_FUNCTION void operator() (int i) const { + buffer(i) = data(pack_ids(i)); + } +}; + +template +auto pack(GraphNode node, D data, P pack_ids, B buffer) { + return node.then_parallel_for("Pack One", policy_t(0, pack_ids.extent(0)), + pack_functor{data, pack_ids, buffer}); +} + +template +struct copy_functor { + Dest d; + Src s; + KOKKOS_FUNCTION void operator() (int i) const { + d(i) = s(i); + } +}; + +template +auto transfer(GraphNode node, R recv, S send) { + auto temp_node = node.then_parallel_for("DeepCopy", policy_t(0, recv.extent(0)), + copy_functor{recv, send}); + return temp_node.then_host("HostThing", [=]() { printf("HostTransfer %p %p\n",recv.data(), send.data()); }); +} + +template +auto unpack(GraphNode node, D data, B buffer) { + return node.then_parallel_for("DeepCopy", policy_t(0, buffer.extent(0)), + copy_functor{data, buffer}); +} + +void mpi_style_iteration(int num_elements, int num_mpi_neighs, int num_sendrecv, int num_repeat) { + Kokkos::View data("Data", num_elements + num_sendrecv); + Kokkos::View send_buffer("SendBuf", num_mpi_neighs, num_sendrecv); + Kokkos::View recv_buffer("RecvBuf", num_mpi_neighs, num_sendrecv); + Kokkos::View pack_ids("PackIDS", num_mpi_neighs, num_sendrecv); + init(data, pack_ids); + + Kokkos::Timer timer; + Kokkos::Experimental::Graph graph; + + for(int neigh = 0; neigh < num_mpi_neighs; neigh++) { + auto my_pack_ids = Kokkos::subview(pack_ids, neigh, Kokkos::ALL()); + auto send_buf = Kokkos::subview(send_buffer, neigh, Kokkos::ALL()); + auto recv_buf = Kokkos::subview(recv_buffer, neigh, Kokkos::ALL()); + auto node1 = pack(graph.root_node(), data, my_pack_ids, send_buf); + auto node2 = transfer(node1, recv_buf, send_buf); + auto node3 = unpack(node2, Kokkos::subview(data, Kokkos::pair{num_elements, (int)data.extent(0)}), recv_buf); + } + graph.instantiate(); + Kokkos::fence(); + printf("Graph Create Done\n"); + + double time_create = timer.seconds(); + timer.reset(); + for(int r=0; r < num_repeat; r++) { + graph.submit(); + Kokkos::fence(); + } + double time = timer.seconds(); + printf("Graph Runtime: %lf %lf\n",time*1000, time_create*1000); +} + + +int main( int argc, char* argv[] ) +{ + int64_t N = 1000000; // number of elements + int neighs = 6; // number of neighbors + int num_send = 10000; // number of elements to send/recv + int nrepeat = 10; // number of repeats of the test + + // Read command line arguments. + for ( int i = 0; i < argc; i++ ) { + if ( strcmp( argv[ i ], "-N" ) == 0 ) { + N = atoi( argv[ ++i ] ); + printf( " User N is %d\n", N ); + } + else if ( strcmp( argv[ i ], "-neighs" ) == 0 ) { + neighs = atoi( argv[ ++i ] ); + } + else if ( strcmp( argv[ i ], "-nsend" ) == 0 ) { + num_send = atoi( argv[ ++i ] ); + } + else if ( strcmp( argv[ i ], "-nrepeat" ) == 0 ) { + nrepeat = atoi( argv[ ++i ] ); + } + else if ( ( strcmp( argv[ i ], "-h" ) == 0 ) || ( strcmp( argv[ i ], "-help" ) == 0 ) ) { + printf( " -N : number of elements (default: 1000000)\n" ); + printf( " -neighs : number of neighbors (default: 6)\n" ); + printf( " -nsend : number of send/recv elements (default: 10000)\n" ); + printf( " -nrepeat : number of repetitions (default: 10)\n" ); + printf( " -help (-h): print this message\n\n" ); + exit( 1 ); + } + } + + + Kokkos::initialize( argc, argv ); + { + printf("Execute with %li %i %i %i\n",N, neighs, num_send, nrepeat); + mpi_style_iteration(N, neighs, num_send, nrepeat); + } + Kokkos::finalize(); + + return 0; +} + From 427a04a486b89d1f3875070104b588981b52e7b2 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 8 Apr 2026 10:35:24 -0600 Subject: [PATCH 2/4] Adding begin state for the exercise Signed-off-by: Luc Berger-Vergiat --- Exercises/graph/Begin/CMakeLists.txt | 6 + Exercises/graph/Begin/CMakeLists.txt~ | 6 + Exercises/graph/Begin/graph_begin.cpp | 178 ++++++++++++++++++++ Exercises/graph/Begin/graph_begin.cpp~ | 161 ++++++++++++++++++ Exercises/graph/Solution/graph_solution.cpp | 4 +- 5 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 Exercises/graph/Begin/CMakeLists.txt create mode 100644 Exercises/graph/Begin/CMakeLists.txt~ create mode 100644 Exercises/graph/Begin/graph_begin.cpp create mode 100644 Exercises/graph/Begin/graph_begin.cpp~ diff --git a/Exercises/graph/Begin/CMakeLists.txt b/Exercises/graph/Begin/CMakeLists.txt new file mode 100644 index 00000000..2eb44af3 --- /dev/null +++ b/Exercises/graph/Begin/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.22) +project(KokkosTutorialExecGraph) +include(../../common.cmake) + +add_executable(graph graph_begin.cpp) +target_link_libraries(graph Kokkos::kokkos) diff --git a/Exercises/graph/Begin/CMakeLists.txt~ b/Exercises/graph/Begin/CMakeLists.txt~ new file mode 100644 index 00000000..8c7e8b8f --- /dev/null +++ b/Exercises/graph/Begin/CMakeLists.txt~ @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.22) +project(KokkosTutorialExecGraph) +include(../../common.cmake) + +add_executable(graph graph_solution.cpp) +target_link_libraries(graph Kokkos::kokkos) diff --git a/Exercises/graph/Begin/graph_begin.cpp b/Exercises/graph/Begin/graph_begin.cpp new file mode 100644 index 00000000..1e6e53c0 --- /dev/null +++ b/Exercises/graph/Begin/graph_begin.cpp @@ -0,0 +1,178 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include + +#include + +#include +#include + +template +constexpr bool is_view_v = false; + +template +constexpr bool is_view_v> = true; + +template +concept view = is_view_v; + +using policy_t = Kokkos::RangePolicy<>; + +template +void init(D data, P pack_ids) { + Kokkos::parallel_for("Init Data", policy_t(0, data.extent(0)), + KOKKOS_LAMBDA(int i) { data(i) = i; }); + Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); + Kokkos::fill_random(pack_ids, rand_pool64, data.extent(0)); +} + +// CUDA does not support auto return type from functions +// which create host device lambdas +template +struct pack_functor { + D data; + P pack_ids; + B buffer; + KOKKOS_FUNCTION void operator() (int i) const { + buffer(i) = data(pack_ids(i)); + } +}; + +template +auto pack(GraphNode node, D data, P pack_ids, B buffer) { + return node.then_parallel_for("Pack One", policy_t(0, pack_ids.extent(0)), + pack_functor{data, pack_ids, buffer}); +} + +template +struct copy_functor { + Dest d; + Src s; + KOKKOS_FUNCTION void operator() (int i) const { + d(i) = s(i); + } +}; + +template +auto transfer(GraphNode node, R recv, S send) { + auto temp_node = node.then_parallel_for("DeepCopy", policy_t(0, recv.extent(0)), + copy_functor{recv, send}); + return temp_node.then_host("HostThing", [=]() { printf("HostTransfer %p %p\n",recv.data(), send.data()); }); +} + +template +auto unpack(GraphNode node, D data, B buffer) { + return node.then_parallel_for("DeepCopy", policy_t(0, buffer.extent(0)), + copy_functor{data, buffer}); +} + +void mpi_style_iteration(int num_elements, int num_mpi_neighs, int num_sendrecv, int num_repeat) { + Kokkos::View data("Data", num_elements + num_sendrecv); + Kokkos::View send_buffer("SendBuf", num_mpi_neighs, num_sendrecv); + Kokkos::View recv_buffer("RecvBuf", num_mpi_neighs, num_sendrecv); + Kokkos::View pack_ids("PackIDS", num_mpi_neighs, num_sendrecv); + init(data, pack_ids); + + Kokkos::Timer timer; + // EXERCISE: Create an Kokkos graph to capture work items + // Kokkos::Experimental::Graph graph; + + timer.reset(); + for(int r=0; r < num_repeat; r++) { + for(int neigh = 0; neigh < num_mpi_neighs; neigh++) { + // Create subviews for + auto my_pack_ids = Kokkos::subview(pack_ids, neigh, Kokkos::ALL()); + auto send_buf = Kokkos::subview(send_buffer, neigh, Kokkos::ALL()); + auto recv_buf = Kokkos::subview(recv_buffer, neigh, Kokkos::ALL()); + auto my_data = Kokkos::subview(data, Kokkos::pair{num_elements, (int)data.extent(0)}); + + auto my_pack = pack_functor{data, my_pack_ids, send_buf}; + auto my_transfer = copy_functor{recv_buf, send_buf}; + auto my_unpack = copy_functor{my_data, recv_buf}; + + // EXERCISE: use pack, transfer and unpack functions to create graph nodes + // and remove unnecessary fence! + Kokkos::parallel_for("Pack", policy_t(0, pack_ids.extent(0)), my_pack); + Kokkos::parallel_for("Transfer", policy_t(0, recv_buf.extent(0)), my_transfer); + Kokkos::parallel_for("Unpack", policy_t(0, recv_buf.extent(0)), my_unpack); + Kokkos::fence(); + } + } + double time = timer.seconds(); + printf("Runtime: %lf \n",time*1000); + + // EXERCISE: instantiate the graph object + // Kokkos::fence(); + // printf("Graph Create Done\n"); + + // double time_create = timer.seconds(); + // timer.reset(); + // for(int r=0; r < num_repeat; r++) { + // EXERCISE: ask the graph to exectute its tasks + // Kokkos::fence(); + // } + // double time = timer.seconds(); + // printf("Graph Runtime: %lf %lf\n",time*1000, time_create*1000); +} + + +int main( int argc, char* argv[] ) +{ + int64_t N = 1000000; // number of elements + int neighs = 6; // number of neighbors + int num_send = 10000; // number of elements to send/recv + int nrepeat = 10; // number of repeats of the test + + // Read command line arguments. + for ( int i = 0; i < argc; i++ ) { + if ( strcmp( argv[ i ], "-N" ) == 0 ) { + N = atoi( argv[ ++i ] ); + printf( " User N is %lld\n", N ); + } + else if ( strcmp( argv[ i ], "-neighs" ) == 0 ) { + neighs = atoi( argv[ ++i ] ); + } + else if ( strcmp( argv[ i ], "-nsend" ) == 0 ) { + num_send = atoi( argv[ ++i ] ); + } + else if ( strcmp( argv[ i ], "-nrepeat" ) == 0 ) { + nrepeat = atoi( argv[ ++i ] ); + } + else if ( ( strcmp( argv[ i ], "-h" ) == 0 ) || ( strcmp( argv[ i ], "-help" ) == 0 ) ) { + printf( " -N : number of elements (default: 1000000)\n" ); + printf( " -neighs : number of neighbors (default: 6)\n" ); + printf( " -nsend : number of send/recv elements (default: 10000)\n" ); + printf( " -nrepeat : number of repetitions (default: 10)\n" ); + printf( " -help (-h): print this message\n\n" ); + exit( 1 ); + } + } + + + Kokkos::initialize( argc, argv ); + { + printf("Execute with %lld %i %i %i\n",N, neighs, num_send, nrepeat); + mpi_style_iteration(N, neighs, num_send, nrepeat); + } + Kokkos::finalize(); + + return 0; +} + diff --git a/Exercises/graph/Begin/graph_begin.cpp~ b/Exercises/graph/Begin/graph_begin.cpp~ new file mode 100644 index 00000000..a14207ef --- /dev/null +++ b/Exercises/graph/Begin/graph_begin.cpp~ @@ -0,0 +1,161 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include + +#include + +#include +#include + +template +constexpr bool is_view_v = false; + +template +constexpr bool is_view_v> = true; + +template +concept view = is_view_v; + +using policy_t = Kokkos::RangePolicy<>; + +template +void init(D data, P pack_ids) { + Kokkos::parallel_for("Init Data", policy_t(0, data.extent(0)), + KOKKOS_LAMBDA(int i) { data(i) = i; }); + Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); + Kokkos::fill_random(pack_ids, rand_pool64, data.extent(0)); +} + +// CUDA does not support auto return type from functions +// which create host device lambdas +template +struct pack_functor { + D data; + P pack_ids; + B buffer; + KOKKOS_FUNCTION void operator() (int i) const { + buffer(i) = data(pack_ids(i)); + } +}; + +template +auto pack(GraphNode node, D data, P pack_ids, B buffer) { + return node.then_parallel_for("Pack One", policy_t(0, pack_ids.extent(0)), + pack_functor{data, pack_ids, buffer}); +} + +template +struct copy_functor { + Dest d; + Src s; + KOKKOS_FUNCTION void operator() (int i) const { + d(i) = s(i); + } +}; + +template +auto transfer(GraphNode node, R recv, S send) { + auto temp_node = node.then_parallel_for("DeepCopy", policy_t(0, recv.extent(0)), + copy_functor{recv, send}); + return temp_node.then_host("HostThing", [=]() { printf("HostTransfer %p %p\n",recv.data(), send.data()); }); +} + +template +auto unpack(GraphNode node, D data, B buffer) { + return node.then_parallel_for("DeepCopy", policy_t(0, buffer.extent(0)), + copy_functor{data, buffer}); +} + +void mpi_style_iteration(int num_elements, int num_mpi_neighs, int num_sendrecv, int num_repeat) { + Kokkos::View data("Data", num_elements + num_sendrecv); + Kokkos::View send_buffer("SendBuf", num_mpi_neighs, num_sendrecv); + Kokkos::View recv_buffer("RecvBuf", num_mpi_neighs, num_sendrecv); + Kokkos::View pack_ids("PackIDS", num_mpi_neighs, num_sendrecv); + init(data, pack_ids); + + Kokkos::Timer timer; + Kokkos::Experimental::Graph graph; + + for(int neigh = 0; neigh < num_mpi_neighs; neigh++) { + auto my_pack_ids = Kokkos::subview(pack_ids, neigh, Kokkos::ALL()); + auto send_buf = Kokkos::subview(send_buffer, neigh, Kokkos::ALL()); + auto recv_buf = Kokkos::subview(recv_buffer, neigh, Kokkos::ALL()); + auto node1 = pack(graph.root_node(), data, my_pack_ids, send_buf); + auto node2 = transfer(node1, recv_buf, send_buf); + auto node3 = unpack(node2, Kokkos::subview(data, Kokkos::pair{num_elements, (int)data.extent(0)}), recv_buf); + } + graph.instantiate(); + Kokkos::fence(); + printf("Graph Create Done\n"); + + double time_create = timer.seconds(); + timer.reset(); + for(int r=0; r < num_repeat; r++) { + graph.submit(); + Kokkos::fence(); + } + double time = timer.seconds(); + printf("Graph Runtime: %lf %lf\n",time*1000, time_create*1000); +} + + +int main( int argc, char* argv[] ) +{ + int64_t N = 1000000; // number of elements + int neighs = 6; // number of neighbors + int num_send = 10000; // number of elements to send/recv + int nrepeat = 10; // number of repeats of the test + + // Read command line arguments. + for ( int i = 0; i < argc; i++ ) { + if ( strcmp( argv[ i ], "-N" ) == 0 ) { + N = atoi( argv[ ++i ] ); + printf( " User N is %d\n", N ); + } + else if ( strcmp( argv[ i ], "-neighs" ) == 0 ) { + neighs = atoi( argv[ ++i ] ); + } + else if ( strcmp( argv[ i ], "-nsend" ) == 0 ) { + num_send = atoi( argv[ ++i ] ); + } + else if ( strcmp( argv[ i ], "-nrepeat" ) == 0 ) { + nrepeat = atoi( argv[ ++i ] ); + } + else if ( ( strcmp( argv[ i ], "-h" ) == 0 ) || ( strcmp( argv[ i ], "-help" ) == 0 ) ) { + printf( " -N : number of elements (default: 1000000)\n" ); + printf( " -neighs : number of neighbors (default: 6)\n" ); + printf( " -nsend : number of send/recv elements (default: 10000)\n" ); + printf( " -nrepeat : number of repetitions (default: 10)\n" ); + printf( " -help (-h): print this message\n\n" ); + exit( 1 ); + } + } + + + Kokkos::initialize( argc, argv ); + { + printf("Execute with %li %i %i %i\n",N, neighs, num_send, nrepeat); + mpi_style_iteration(N, neighs, num_send, nrepeat); + } + Kokkos::finalize(); + + return 0; +} + diff --git a/Exercises/graph/Solution/graph_solution.cpp b/Exercises/graph/Solution/graph_solution.cpp index a14207ef..b28ed31b 100644 --- a/Exercises/graph/Solution/graph_solution.cpp +++ b/Exercises/graph/Solution/graph_solution.cpp @@ -127,7 +127,7 @@ int main( int argc, char* argv[] ) for ( int i = 0; i < argc; i++ ) { if ( strcmp( argv[ i ], "-N" ) == 0 ) { N = atoi( argv[ ++i ] ); - printf( " User N is %d\n", N ); + printf( " User N is %lld\n", N ); } else if ( strcmp( argv[ i ], "-neighs" ) == 0 ) { neighs = atoi( argv[ ++i ] ); @@ -151,7 +151,7 @@ int main( int argc, char* argv[] ) Kokkos::initialize( argc, argv ); { - printf("Execute with %li %i %i %i\n",N, neighs, num_send, nrepeat); + printf("Execute with %lld %i %i %i\n",N, neighs, num_send, nrepeat); mpi_style_iteration(N, neighs, num_send, nrepeat); } Kokkos::finalize(); From 8035fbd2fbf6b6f9438374bab71a90e5bb20182a Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Wed, 8 Apr 2026 10:55:45 -0600 Subject: [PATCH 3/4] Update Graph example: add Begin Signed-off-by: Christian Trott --- Exercises/graph/Begin/graph_begin.cpp | 66 ++++++++++++--------- Exercises/graph/Solution/graph_solution.cpp | 8 +-- 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/Exercises/graph/Begin/graph_begin.cpp b/Exercises/graph/Begin/graph_begin.cpp index 1e6e53c0..4ad9ee29 100644 --- a/Exercises/graph/Begin/graph_begin.cpp +++ b/Exercises/graph/Begin/graph_begin.cpp @@ -22,7 +22,7 @@ #include #include -#include +// EXERCISE: Include the right header! template constexpr bool is_view_v = false; @@ -55,9 +55,12 @@ struct pack_functor { } }; -template -auto pack(GraphNode node, D data, P pack_ids, B buffer) { - return node.then_parallel_for("Pack One", policy_t(0, pack_ids.extent(0)), +// EXERCISE: take graph nodes instead of passing in execution space instances +// What should these functions return now? +// Use simple unconstrained templates for the graph node +template +void pack(Exec exec, D data, P pack_ids, B buffer) { + Kokkos::parallel_for("Pack One", policy_t(exec, 0, pack_ids.extent(0)), pack_functor{data, pack_ids, buffer}); } @@ -70,16 +73,21 @@ struct copy_functor { } }; -template -auto transfer(GraphNode node, R recv, S send) { - auto temp_node = node.then_parallel_for("DeepCopy", policy_t(0, recv.extent(0)), +// EXERCISE: take graph nodes instead of passing in execution space instances +// What should these functions return now? +// Use simple unconstrained templates for the graph node +template +auto transfer(Exec exec, R recv, S send) { + Kokkos::parallel_for("DeepCopy", policy_t(exec, 0, recv.extent(0)), copy_functor{recv, send}); - return temp_node.then_host("HostThing", [=]() { printf("HostTransfer %p %p\n",recv.data(), send.data()); }); + // EXERCISE the following should become a host node! + exec.fence(); + printf("HostTransfer %p %p\n",recv.data(), send.data()); } -template -auto unpack(GraphNode node, D data, B buffer) { - return node.then_parallel_for("DeepCopy", policy_t(0, buffer.extent(0)), +template +auto unpack(Exec exec, D data, B buffer) { + Kokkos::parallel_for("DeepCopy", policy_t(exec, 0, buffer.extent(0)), copy_functor{data, buffer}); } @@ -95,6 +103,8 @@ void mpi_style_iteration(int num_elements, int num_mpi_neighs, int num_sendrecv, // Kokkos::Experimental::Graph graph; timer.reset(); + // EXERCISE Start creating your graph here + // Do you need the repeat here? for(int r=0; r < num_repeat; r++) { for(int neigh = 0; neigh < num_mpi_neighs; neigh++) { // Create subviews for @@ -103,29 +113,27 @@ void mpi_style_iteration(int num_elements, int num_mpi_neighs, int num_sendrecv, auto recv_buf = Kokkos::subview(recv_buffer, neigh, Kokkos::ALL()); auto my_data = Kokkos::subview(data, Kokkos::pair{num_elements, (int)data.extent(0)}); - auto my_pack = pack_functor{data, my_pack_ids, send_buf}; - auto my_transfer = copy_functor{recv_buf, send_buf}; - auto my_unpack = copy_functor{my_data, recv_buf}; - - // EXERCISE: use pack, transfer and unpack functions to create graph nodes - // and remove unnecessary fence! - Kokkos::parallel_for("Pack", policy_t(0, pack_ids.extent(0)), my_pack); - Kokkos::parallel_for("Transfer", policy_t(0, recv_buf.extent(0)), my_transfer); - Kokkos::parallel_for("Unpack", policy_t(0, recv_buf.extent(0)), my_unpack); - Kokkos::fence(); + Kokkos::DefaultExecutionSpace exec; + // EXERCISE: pass in graph nodes appropriately to connect functions + pack(exec, data, my_pack_ids, send_buf); + transfer(exec, recv_buf, send_buf); + unpack(exec, Kokkos::subview(data, Kokkos::pair{num_elements, (int)data.extent(0)}), recv_buf); } } - double time = timer.seconds(); - printf("Runtime: %lf \n",time*1000); - // EXERCISE: instantiate the graph object // Kokkos::fence(); // printf("Graph Create Done\n"); + // EXERCISE: measure creation time here + double time = timer.seconds(); + printf("Runtime: %lf \n",time*1000); + + + // EXERCISE: ask the graph to exectute its tasks // double time_create = timer.seconds(); // timer.reset(); // for(int r=0; r < num_repeat; r++) { - // EXERCISE: ask the graph to exectute its tasks + // EXERCISE: submit graph here! // Kokkos::fence(); // } // double time = timer.seconds(); @@ -135,9 +143,9 @@ void mpi_style_iteration(int num_elements, int num_mpi_neighs, int num_sendrecv, int main( int argc, char* argv[] ) { - int64_t N = 1000000; // number of elements + int64_t N = 20000; // number of elements int neighs = 6; // number of neighbors - int num_send = 10000; // number of elements to send/recv + int num_send = 5000; // number of elements to send/recv int nrepeat = 10; // number of repeats of the test // Read command line arguments. @@ -156,9 +164,9 @@ int main( int argc, char* argv[] ) nrepeat = atoi( argv[ ++i ] ); } else if ( ( strcmp( argv[ i ], "-h" ) == 0 ) || ( strcmp( argv[ i ], "-help" ) == 0 ) ) { - printf( " -N : number of elements (default: 1000000)\n" ); + printf( " -N : number of elements (default: 20000)\n" ); printf( " -neighs : number of neighbors (default: 6)\n" ); - printf( " -nsend : number of send/recv elements (default: 10000)\n" ); + printf( " -nsend : number of send/recv elements (default: 5000)\n" ); printf( " -nrepeat : number of repetitions (default: 10)\n" ); printf( " -help (-h): print this message\n\n" ); exit( 1 ); diff --git a/Exercises/graph/Solution/graph_solution.cpp b/Exercises/graph/Solution/graph_solution.cpp index b28ed31b..7820f6ba 100644 --- a/Exercises/graph/Solution/graph_solution.cpp +++ b/Exercises/graph/Solution/graph_solution.cpp @@ -118,9 +118,9 @@ void mpi_style_iteration(int num_elements, int num_mpi_neighs, int num_sendrecv, int main( int argc, char* argv[] ) { - int64_t N = 1000000; // number of elements + int64_t N = 20000; // number of elements int neighs = 6; // number of neighbors - int num_send = 10000; // number of elements to send/recv + int num_send = 5000; // number of elements to send/recv int nrepeat = 10; // number of repeats of the test // Read command line arguments. @@ -139,9 +139,9 @@ int main( int argc, char* argv[] ) nrepeat = atoi( argv[ ++i ] ); } else if ( ( strcmp( argv[ i ], "-h" ) == 0 ) || ( strcmp( argv[ i ], "-help" ) == 0 ) ) { - printf( " -N : number of elements (default: 1000000)\n" ); + printf( " -N : number of elements (default: 20000)\n" ); printf( " -neighs : number of neighbors (default: 6)\n" ); - printf( " -nsend : number of send/recv elements (default: 10000)\n" ); + printf( " -nsend : number of send/recv elements (default: 5000)\n" ); printf( " -nrepeat : number of repetitions (default: 10)\n" ); printf( " -help (-h): print this message\n\n" ); exit( 1 ); From fe099aca4fafb6a97300179b86ef5e7ecc4889c3 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 Apr 2026 11:25:23 -0600 Subject: [PATCH 4/4] Exercises/graph: remove vim temporary files Signed-off-by: Carl Pearson --- Exercises/graph/Begin/CMakeLists.txt~ | 6 - Exercises/graph/Begin/graph_begin.cpp~ | 161 ------------------------- 2 files changed, 167 deletions(-) delete mode 100644 Exercises/graph/Begin/CMakeLists.txt~ delete mode 100644 Exercises/graph/Begin/graph_begin.cpp~ diff --git a/Exercises/graph/Begin/CMakeLists.txt~ b/Exercises/graph/Begin/CMakeLists.txt~ deleted file mode 100644 index 8c7e8b8f..00000000 --- a/Exercises/graph/Begin/CMakeLists.txt~ +++ /dev/null @@ -1,6 +0,0 @@ -cmake_minimum_required(VERSION 3.22) -project(KokkosTutorialExecGraph) -include(../../common.cmake) - -add_executable(graph graph_solution.cpp) -target_link_libraries(graph Kokkos::kokkos) diff --git a/Exercises/graph/Begin/graph_begin.cpp~ b/Exercises/graph/Begin/graph_begin.cpp~ deleted file mode 100644 index a14207ef..00000000 --- a/Exercises/graph/Begin/graph_begin.cpp~ +++ /dev/null @@ -1,161 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include -#include -#include - -#include - -#include -#include - -template -constexpr bool is_view_v = false; - -template -constexpr bool is_view_v> = true; - -template -concept view = is_view_v; - -using policy_t = Kokkos::RangePolicy<>; - -template -void init(D data, P pack_ids) { - Kokkos::parallel_for("Init Data", policy_t(0, data.extent(0)), - KOKKOS_LAMBDA(int i) { data(i) = i; }); - Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); - Kokkos::fill_random(pack_ids, rand_pool64, data.extent(0)); -} - -// CUDA does not support auto return type from functions -// which create host device lambdas -template -struct pack_functor { - D data; - P pack_ids; - B buffer; - KOKKOS_FUNCTION void operator() (int i) const { - buffer(i) = data(pack_ids(i)); - } -}; - -template -auto pack(GraphNode node, D data, P pack_ids, B buffer) { - return node.then_parallel_for("Pack One", policy_t(0, pack_ids.extent(0)), - pack_functor{data, pack_ids, buffer}); -} - -template -struct copy_functor { - Dest d; - Src s; - KOKKOS_FUNCTION void operator() (int i) const { - d(i) = s(i); - } -}; - -template -auto transfer(GraphNode node, R recv, S send) { - auto temp_node = node.then_parallel_for("DeepCopy", policy_t(0, recv.extent(0)), - copy_functor{recv, send}); - return temp_node.then_host("HostThing", [=]() { printf("HostTransfer %p %p\n",recv.data(), send.data()); }); -} - -template -auto unpack(GraphNode node, D data, B buffer) { - return node.then_parallel_for("DeepCopy", policy_t(0, buffer.extent(0)), - copy_functor{data, buffer}); -} - -void mpi_style_iteration(int num_elements, int num_mpi_neighs, int num_sendrecv, int num_repeat) { - Kokkos::View data("Data", num_elements + num_sendrecv); - Kokkos::View send_buffer("SendBuf", num_mpi_neighs, num_sendrecv); - Kokkos::View recv_buffer("RecvBuf", num_mpi_neighs, num_sendrecv); - Kokkos::View pack_ids("PackIDS", num_mpi_neighs, num_sendrecv); - init(data, pack_ids); - - Kokkos::Timer timer; - Kokkos::Experimental::Graph graph; - - for(int neigh = 0; neigh < num_mpi_neighs; neigh++) { - auto my_pack_ids = Kokkos::subview(pack_ids, neigh, Kokkos::ALL()); - auto send_buf = Kokkos::subview(send_buffer, neigh, Kokkos::ALL()); - auto recv_buf = Kokkos::subview(recv_buffer, neigh, Kokkos::ALL()); - auto node1 = pack(graph.root_node(), data, my_pack_ids, send_buf); - auto node2 = transfer(node1, recv_buf, send_buf); - auto node3 = unpack(node2, Kokkos::subview(data, Kokkos::pair{num_elements, (int)data.extent(0)}), recv_buf); - } - graph.instantiate(); - Kokkos::fence(); - printf("Graph Create Done\n"); - - double time_create = timer.seconds(); - timer.reset(); - for(int r=0; r < num_repeat; r++) { - graph.submit(); - Kokkos::fence(); - } - double time = timer.seconds(); - printf("Graph Runtime: %lf %lf\n",time*1000, time_create*1000); -} - - -int main( int argc, char* argv[] ) -{ - int64_t N = 1000000; // number of elements - int neighs = 6; // number of neighbors - int num_send = 10000; // number of elements to send/recv - int nrepeat = 10; // number of repeats of the test - - // Read command line arguments. - for ( int i = 0; i < argc; i++ ) { - if ( strcmp( argv[ i ], "-N" ) == 0 ) { - N = atoi( argv[ ++i ] ); - printf( " User N is %d\n", N ); - } - else if ( strcmp( argv[ i ], "-neighs" ) == 0 ) { - neighs = atoi( argv[ ++i ] ); - } - else if ( strcmp( argv[ i ], "-nsend" ) == 0 ) { - num_send = atoi( argv[ ++i ] ); - } - else if ( strcmp( argv[ i ], "-nrepeat" ) == 0 ) { - nrepeat = atoi( argv[ ++i ] ); - } - else if ( ( strcmp( argv[ i ], "-h" ) == 0 ) || ( strcmp( argv[ i ], "-help" ) == 0 ) ) { - printf( " -N : number of elements (default: 1000000)\n" ); - printf( " -neighs : number of neighbors (default: 6)\n" ); - printf( " -nsend : number of send/recv elements (default: 10000)\n" ); - printf( " -nrepeat : number of repetitions (default: 10)\n" ); - printf( " -help (-h): print this message\n\n" ); - exit( 1 ); - } - } - - - Kokkos::initialize( argc, argv ); - { - printf("Execute with %li %i %i %i\n",N, neighs, num_send, nrepeat); - mpi_style_iteration(N, neighs, num_send, nrepeat); - } - Kokkos::finalize(); - - return 0; -} -