Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ To build Kokkos with OpenMP and CUDA backend, use:
cd extern/kokkos
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=../../../installs/kokkos -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -G Ninja
cmake .. -DCMAKE_INSTALL_PREFIX=../../../installs/kokkos -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -G Ninja
ninja install
```
For a complete instruction on installing Kokkos, see [Kokkos
Expand Down
19 changes: 11 additions & 8 deletions examples/hdiv/hdiv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,11 @@ void set_geo_spherical(const T alpha, const T R, GeoElemVec& elem_geo) {
T ulen = 2.0 / nx;

T Xloc[3 * ET::HEX_NVERTS];
auto HEX_VERTS_CART = ET::get_hex_verts_cart();
for (index_t ii = 0; ii < ET::HEX_NVERTS; ii++) {
T u = 1.0 * ET::HEX_VERTS_CART[ii][0];
T v = 1.0 * ET::HEX_VERTS_CART[ii][1];
T w = 1.0 * ET::HEX_VERTS_CART[ii][2];
T u = 1.0 * HEX_VERTS_CART[ii][0];
T v = 1.0 * HEX_VERTS_CART[ii][1];
T w = 1.0 * HEX_VERTS_CART[ii][2];

Xloc[3 * ii] = (2.0 * u - 1.0) * a;
Xloc[3 * ii + 1] = (2.0 * v - 1.0) * a;
Expand Down Expand Up @@ -247,6 +248,7 @@ void set_geo_spherical(const T alpha, const T R, GeoElemVec& elem_geo) {
}

// We know the 2-direction will be radial...
auto HEX_BOUND_VERTS = ET::get_hex_bound_verts();
for (index_t face = 0; face < ET::HEX_NBOUNDS; face++) {
for (index_t k = 0; k < nx; k++) {
for (index_t j = 0; j < nx; j++) {
Expand Down Expand Up @@ -274,9 +276,9 @@ void set_geo_spherical(const T alpha, const T R, GeoElemVec& elem_geo) {
T z0 = 0.0;

for (index_t jj = 0; jj < 4; jj++) {
x0 += N[jj] * Xloc[3 * ET::HEX_BOUND_VERTS[face][jj]];
y0 += N[jj] * Xloc[3 * ET::HEX_BOUND_VERTS[face][jj] + 1];
z0 += N[jj] * Xloc[3 * ET::HEX_BOUND_VERTS[face][jj] + 2];
x0 += N[jj] * Xloc[3 * HEX_BOUND_VERTS[face][jj]];
y0 += N[jj] * Xloc[3 * HEX_BOUND_VERTS[face][jj] + 1];
z0 += N[jj] * Xloc[3 * HEX_BOUND_VERTS[face][jj] + 2];
}
T norm = std::sqrt(x0 * x0 + y0 * y0 + z0 * z0);

Expand Down Expand Up @@ -603,11 +605,12 @@ void find_spherical_error(bool write_sphere = false) {
}

// For each face add the faces
auto HEX_BOUND_VERTS = ET::get_hex_bound_verts();
for (index_t face = 0; face < ET::HEX_NBOUNDS; face++) {
for (index_t i = 0; i < 4; i++) {
hex[(face + 1) * ET::HEX_NVERTS + i] = hex[ET::HEX_BOUND_VERTS[face][i]];
hex[(face + 1) * ET::HEX_NVERTS + i] = hex[HEX_BOUND_VERTS[face][i]];
hex[(face + 1) * ET::HEX_NVERTS + i + 4] =
hex_ext[ET::HEX_BOUND_VERTS[face][i]];
hex_ext[HEX_BOUND_VERTS[face][i]];
}
}

Expand Down
220 changes: 207 additions & 13 deletions examples/kokkos/scratchpad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include "Kokkos_UnorderedMap.hpp"
#include "a2dobjs.h"
#include "array.h"
#include "multiphysics/fequadrature.h"
#include "multiphysics/fespace.h"

using namespace std;
using T = double;
Expand Down Expand Up @@ -578,22 +580,214 @@ void test_cuda_axpy(int argc, char* argv[]) {
#endif
}

void subview() {
// constexpr A2D::index_t ndof_per_element = 0; // won't work
constexpr A2D::index_t ndof_per_element = 1; // ok
using ElementDofArray = A2D::MultiArrayNew<A2D::index_t* [ndof_per_element]>;
A2D::index_t nelems = 10;
ElementDofArray element_dof("element_dof", nelems);
A2D::index_t elem = 1;
auto elem_dof = Kokkos::subview(element_dof, elem, Kokkos::ALL);
}

void test_cuda_axpy_with_UVM(int argc, char* argv[]) {
#ifdef KOKKOS_ENABLE_CUDA
using ViewDevice_t =
Kokkos::View<T*, Kokkos::LayoutRight, Kokkos::CudaUVMSpace>;
if (argc == 1) {
std::printf("axpy\nusage: ./scratchpad N, where mat size = 2^N\n");
return;
}

// Allocate x and y on host
I N = pow(2, atoi(argv[1]));
I bytes = N * sizeof(T);
double Mbytes = (double)bytes / 1024 / 1024;

std::printf("Allocating x[%d] on host, size: %.2f MB\n", N, Mbytes);
std::printf("Allocating y[%d] on host, size: %.2f MB\n", N, Mbytes);

// Allocate x and y on device
ViewDevice_t x_device("x_device", N);
ViewDevice_t y_device("y_device", N);

for (I i = 0; i < N; i++) {
x_device(i) = 2.4;
y_device(i) = 0.1;
}

// Perform 100 axpy operations on device and time
Kokkos::Timer timer;
int repeat = 100;
T alpha = 0.01;
for (int i = 0; i < repeat; i++) {
auto loop_body = KOKKOS_LAMBDA(const I index) {
// T alpha = Alpha::get_alpha_array()[0]; // This doesn't work
T alpha = Alpha::value_array[0]; // This works
auto x_slice = Kokkos::subview(x_device, Kokkos::ALL);
y_device(index) += alpha * x_slice(index);
};
Kokkos::parallel_for("axpy", N, loop_body);
Kokkos::fence();
}

double elapse = timer.seconds();
printf("averaged time: %.8f ms\n", elapse * 1e3);

// Compute bandwidth:
// x is read once, y is read once and written once
double bandwidth = 3 * Mbytes / 1024 * repeat / elapse;
printf("averaged bandwidth: %.8f GB/s\n", bandwidth);

// Print values
int len = 10;
if (N < len) {
len = N;
}

for (I i = 0; i < len; i++) {
printf("x[%2d]: %8.2f y[%2d]: %8.2f\n", i, x_device(i), i, y_device(i));
}

// Check maximum error
T max_err = T(0);
T val = T(0);
for (I i = 0; i < N; i++) {
val = fabs(repeat * alpha * 2.4 + 0.1 - y_device(i));
if (val > max_err) {
max_err = val;
}
}

printf("Maximum error: %20.10e\n", max_err);
#endif
}

class ParallelVector {
#ifdef KOKKOS_ENABLE_CUDA
using MemSpace = Kokkos::CudaUVMSpace;
#else
using MemSpace = Kokkos::HostSpace;
#endif
using data_t = Kokkos::View<double*, Kokkos::LayoutRight, MemSpace>;

KOKKOS_FUNCTION void set_one_value(int i, double val) const { data(i) = val; }

public:
ParallelVector(int len) : len(len), data("data", len) {}

void set_values(double val) {
auto loop_body = KOKKOS_CLASS_LAMBDA(int i) { set_one_value(i, val); };

Kokkos::parallel_for("loop", len, loop_body);
Kokkos::fence();
}

private:
int len;
data_t data;
};

struct Head {
Head(double val) : val(val) {}
KOKKOS_FUNCTION double get_val() { return val; };

private:
double val;
};
struct Data {
Data(Head& head, double val, int id = 0) : val(val), id(id), head(head) {}
double val;
int id;
Head& head;
};

// This can build, won't work - ``it is generally not valid to have any pointer
// or reference members in the functor''
void test_cuda_functor_pass_by_ref() {
#ifdef KOKKOS_ENABLE_CUDA
Head head(5.6);
Data data(head, 4.2);
Kokkos::View<double*, Kokkos::LayoutRight, Kokkos::CudaUVMSpace> view("view",
10);
auto loop_body = [=] __device__(int i) { view(i) = data.head.get_val(); };
Kokkos::parallel_for("loop", 10, loop_body);
Kokkos::fence();

for (int i = 0; i < 10; i++) {
std::printf("view[%2d]: %.10f\n", i, view[i]);
}
#endif
}

void test_KOKKOS_ENABLE_CXX17() {
#ifdef KOKKOS_ENABLE_CXX17
printf("KOKKOS_ENABLE_CXX17 is defined\n");
#else
printf("KOKKOS_ENABLE_CXX17 is not defined\n");
#endif
}

void test_fespace() {
// Type and literals
using T = double;
constexpr A2D::index_t dim = 2;
constexpr A2D::index_t order = 3;

using FiniteElementSpace = A2D::FESpace<T, dim, A2D::H1Space<T, dim, dim>>;

// We loop over 10 elements in parallel
int nelems = 10;

#ifdef KOKKOS_ENABLE_CUDA
using MemSpace = Kokkos::CudaUVMSpace;
printf("using CUDAUVMSpace\n");
#else
using MemSpace = Kokkos::HostSpace;
#endif

Kokkos::View<T*, Kokkos::LayoutRight, MemSpace> probe("probe", nelems);

auto loop_body = KOKKOS_LAMBDA(int i) {
FiniteElementSpace cref;
A2D::H1Space<T, dim, dim>& s = cref.template get<0>();
// A2D::H1Space<T, dim, dim> s;
s.get_grad()(0, 0) = 1.2;
s.get_grad()(0, 0) *= 1.5 + 1.2;
probe(i) = s.get_grad()(0, 0);
};

Kokkos::parallel_for("loop", nelems, loop_body);
Kokkos::fence();

for (int i = 0; i < 10; i++) {
std::printf("probe[%2d]: %.10f\n", i, probe(i));
}
}

int main(int argc, char* argv[]) {
Kokkos::initialize();
{ // test_axpy(argc, argv);
// test_matvec(argc, argv);
// test_unordered_set();
// test_subview();
// test_sort();
// test_is_same_layout();
// test_complex();
// test_is_complex();
// test_view_is_allocated();
// test_smart_pointer_behavior();
// test_copy();
// test_parallel_for();
// test_matvec(argc, argv);
// test_unordered_set();
// test_subview();
// test_sort();
// test_is_same_layout();
// test_complex();
// test_is_complex();
// test_view_is_allocated();
// test_smart_pointer_behavior();
// test_copy();
// test_parallel_for();
// test_modify_view_from_const_lambda();
test_cuda_axpy(argc, argv);
// test_cuda_axpy(argc, argv);
// subview();
// test_cuda_axpy_with_UVM(argc, argv);
// ParallelVector pv(10);
// pv.set_values(4.2);
// test_cuda_functor_pass_by_ref();
// test_KOKKOS_ENABLE_CXX17();
test_fespace();
}
Kokkos::finalize();
}
}
12 changes: 6 additions & 6 deletions examples/parallel_element/parallel_element.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class TopoElasticityAnalysis {
static constexpr int var_dim = spatial_dim;
static constexpr int block_size = var_dim;

using Vec_t = SolutionVector<T>;
using Vec_t = MultiArrayNew<T *>;
using BSRMat_t = BSRMat<T, block_size, block_size>;

using Quadrature = QuadGaussQuadrature<order>;
Expand Down Expand Up @@ -74,9 +74,9 @@ class TopoElasticityAnalysis {
mesh(conn),
geomesh(conn),
datamesh(conn),
sol(mesh.get_num_dof()),
geo(geomesh.get_num_dof()),
data(datamesh.get_num_dof()),
sol("sol", mesh.get_num_dof()),
geo("geo", geomesh.get_num_dof()),
data("data", datamesh.get_num_dof()),
elem_sol(mesh, sol),
elem_geo(geomesh, geo),
elem_data(datamesh, data) {
Expand Down Expand Up @@ -111,7 +111,7 @@ class TopoElasticityAnalysis {
N = 20;
}

Vec_t res(mesh.get_num_dof());
Vec_t res("res", mesh.get_num_dof());
ElemVec elem_res(mesh, res);
StopWatch watch;

Expand Down Expand Up @@ -166,7 +166,7 @@ int main(int argc, char *argv[]) {
MesherRect2D mesher(nx, ny, lx, ly);
bool randomize = true;
unsigned int seed = 0;
double fraction = 0.2;
double fraction = 0.1;
mesher.set_X_conn<index_t, double>(Xloc.data(), quad.data(), randomize,
seed, fraction);

Expand Down
12 changes: 4 additions & 8 deletions examples/spectral/spectral.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void main_body(std::string type, int ar = 1, double h = 1.0,
using Quadrature = HexGaussQuadrature<degree + 1>;
using DataBasis = FEBasis<T>;
using GeoBasis = FEBasis<T, LagrangeH1HexBasis<T, spatial_dim, degree>>;
using DataElemVec = A2D::ElementVector_Serial<T, DataBasis, BasisVecType>;
using DataElemVec = A2D::ElementVector_Empty;
using GeoElemVec = A2D::ElementVector_Serial<T, GeoBasis, BasisVecType>;
using ElemVec = A2D::ElementVector_Serial<T, Basis, BasisVecType>;
using FE =
Expand All @@ -119,8 +119,7 @@ void main_body(std::string type, int ar = 1, double h = 1.0,
using LOrderDataBasis = FEBasis<T>;
using LOrderGeoBasis =
FEBasis<T, LagrangeH1HexBasis<T, spatial_dim, low_degree>>;
using LOrderDataElemVec =
A2D::ElementVector_Serial<T, LOrderDataBasis, BasisVecType>;
using LOrderDataElemVec = A2D::ElementVector_Empty;
using LOrderGeoElemVec =
A2D::ElementVector_Serial<T, LOrderGeoBasis, BasisVecType>;
using LOrderElemVec = A2D::ElementVector_Serial<T, LOrderBasis, BasisVecType>;
Expand Down Expand Up @@ -193,24 +192,21 @@ void main_body(std::string type, int ar = 1, double h = 1.0,

ElementMesh<Basis> mesh(conn);
ElementMesh<GeoBasis> geomesh(conn);
ElementMesh<DataBasis> datamesh(conn);

ElementMesh<LOrderBasis> lorder_mesh(mesh);
ElementMesh<LOrderGeoBasis> lorder_geomesh(geomesh);
ElementMesh<LOrderDataBasis> lorder_datamesh(datamesh);

SolutionVector<T> sol(mesh.get_num_dof());
SolutionVector<T> geo(geomesh.get_num_dof());
SolutionVector<T> data(datamesh.get_num_dof());

DataElemVec elem_data(datamesh, data);
DataElemVec elem_data;
GeoElemVec elem_geo(geomesh, geo);
ElemVec elem_sol(mesh, sol);

// Set the geometry from the node locations
set_geo_from_hex_nodes<GeoBasis>(nhex, hex, Xloc, elem_geo);

LOrderDataElemVec lorder_elem_data(lorder_datamesh, data);
LOrderDataElemVec lorder_elem_data;
LOrderGeoElemVec lorder_elem_geo(lorder_geomesh, geo);
LOrderElemVec lorder_elem_sol(lorder_mesh, sol);

Expand Down
Loading