From 3d9ac42874f1a95b79cbace279eb5f0f5fb841fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 May 2025 16:22:04 -0500 Subject: [PATCH 01/82] Esirkepov Eq. 24, 31, 38 (wip) --- src/kernels/currents_deposit.hpp | 691 +++++++++++++++++++++---------- 1 file changed, 470 insertions(+), 221 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 98d00a9b..3e97f40b 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -42,6 +42,75 @@ namespace kernel { const array_t tag; const M metric; const real_t charge, inv_dt; + const int interpolation_order; + + private: + Inline void find_indices_and_S(const int i_pos, + array_t& indices, + array_t& S) { + + // find contributing indices + // ToDo: check if this is correct + const auto i_min = floor( + i_pos - (static_cast(interpolation_order) - ONE) * HALF); + + for (int i = 0; i <= interpolation_order; i++) { + indices[i] = i_min + i; + } + + if constexpr (interpolation_order == 1) { + const auto dx = static_cast(x - indices[0]); + S[0] = ONE - dx; + S[1] = dx; + } else if constexpr (interpolation_order == 2) { + // Esirkepov 2001, Eq. 24 + const auto dx = static_cast(indices[1] - x); + S[0] = HALF * SQR(HALF + dx); + S[1] = static_cast(0.75) - SQR(dx); + S[2] = HALF * SQR(HALF - dx); + } else { + // throw error + } + } + + Inline void apply_shape_function(array_t& S0, + array_t& S1, + array_t& PS0, + array_t& PS1, + array_t& IS0, + array_t& IS1, + int* i_min, + int* i_max) { + + // check displacement + const auto shift_I = IS0[0] - IS1[0]; + + if (shift_I > 0) { + // positive shift in x1 direction + for (int i = 0; i <= interpolation_order; i++) { + S0[i] = PS0[i]; + S1[i + 1] = PS1[i]; + } + i_min = IS0[0]; + i_max = IS1[interpolation_order]; + } else if (shift_I < 0) { + // negative shift in x1 direction + for (int i = 0; i <= interpolation_order; i++) { + S0[i + 1] = PS0[i]; + S1[i] = PS1[i]; + } + i_min = IS1[0]; + i_max = IS0[interpolation_order]; + } else { + // no shift + for (int i = 0; i <= interpolation_order; i++) { + S0[i] = PS0[i]; + S1[i] = PS1[i]; + } + i_min = IS1[0]; + i_max = IS1[interpolation_order]; + } + } public: /** @@ -68,7 +137,8 @@ namespace kernel { const array_t& tag, const M& metric, real_t charge, - real_t dt) + const real_t dt, + int interpolation_order) : J { scatter_cur } , i1 { i1 } , i2 { i2 } @@ -90,7 +160,8 @@ namespace kernel { , tag { tag } , metric { metric } , charge { charge } - , inv_dt { ONE / dt } {} + , inv_dt { ONE / dt } + , interpolation_order { interpolation_order } {} /** * @brief Iteration of the loop over particles. @@ -143,241 +214,419 @@ namespace kernel { const real_t coeff { weight(p) * charge }; - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; - - const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + - static_cast(i1(p) > i1_prev(p))) }; - const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + - static_cast( - static_cast(i1(p) > i1_prev(p)) + - i1_prev(p) - i1(p))) }; - const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + dxp_r_1 - - dx1_prev(p)) * - coeff * inv_dt }; - const real_t Fx1_2 { (static_cast( - i1(p) - i1_prev(p) - - static_cast(i1(p) > i1_prev(p))) + - dx1(p) - dxp_r_1) * - coeff * inv_dt }; - - auto J_acc = J.access(); - - // tuple_t dxp_r; - if constexpr (D == Dim::_1D) { - const real_t Fx2_1 { HALF * vp[1] * coeff }; - const real_t Fx2_2 { HALF * vp[1] * coeff }; - - const real_t Fx3_1 { HALF * vp[2] * coeff }; - const real_t Fx3_2 { HALF * vp[2] * coeff }; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; - } else if constexpr (D == Dim::_2D || D == Dim::_3D) { - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * + // ToDo: interpolation_order as parameter + if constexpr (interpolation_order == 0) { + /* + Zig-zag deposit + */ + + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; - const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + - static_cast(i2(p) > i2_prev(p))) }; - const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + + const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + + static_cast(i1(p) > i1_prev(p))) }; + const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + static_cast( - static_cast(i2(p) > i2_prev(p)) + - i2_prev(p) - i2(p))) }; - const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + - dxp_r_2 - dx2_prev(p)) * + static_cast(i1(p) > i1_prev(p)) + + i1_prev(p) - i1(p))) }; + const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + + dxp_r_1 - dx1_prev(p)) * coeff * inv_dt }; - const real_t Fx2_2 { (static_cast( - i2(p) - i2_prev(p) - - static_cast(i2(p) > i2_prev(p))) + - dx2(p) - dxp_r_2) * + const real_t Fx1_2 { (static_cast( + i1(p) - i1_prev(p) - + static_cast(i1(p) > i1_prev(p))) + + dx1(p) - dxp_r_1) * coeff * inv_dt }; - if constexpr (D == Dim::_2D) { + auto J_acc = J.access(); + + // tuple_t dxp_r; + if constexpr (D == Dim::_1D) { + const real_t Fx2_1 { HALF * vp[1] * coeff }; + const real_t Fx2_2 { HALF * vp[1] * coeff }; + const real_t Fx3_1 { HALF * vp[2] * coeff }; const real_t Fx3_2 { HALF * vp[2] * coeff }; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * - (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx3) += Fx3_2 * - (ONE - Wx1_2) * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * - Wx1_2 * - Wx2_2; - } else { - const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * - (dx3(p) + dx3_prev(p)) * + J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; + } else if constexpr (D == Dim::_2D || D == Dim::_3D) { + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * static_cast(INV_2) }; - const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + - static_cast(i3(p) > i3_prev(p))) }; - const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + + const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + + static_cast(i2(p) > i2_prev(p))) }; + const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + static_cast( - static_cast(i3(p) > i3_prev(p)) + - i3_prev(p) - i3(p))) }; - const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + - dxp_r_3 - dx3_prev(p)) * + static_cast(i2(p) > i2_prev(p)) + + i2_prev(p) - i2(p))) }; + const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + + dxp_r_2 - dx2_prev(p)) * coeff * inv_dt }; - const real_t Fx3_2 { (static_cast( - i3(p) - i3_prev(p) - - static_cast(i3(p) > i3_prev(p))) + - dx3(p) - dxp_r_3) * + const real_t Fx2_2 { (static_cast( + i2(p) - i2_prev(p) - + static_cast(i2(p) > i2_prev(p))) + + dx2(p) - dxp_r_2) * coeff * inv_dt }; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + if constexpr (D == Dim::_2D) { + const real_t Fx3_1 { HALF * vp[2] * coeff }; + const real_t Fx3_2 { HALF * vp[2] * coeff }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * + (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + } else { + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * + static_cast(INV_2) }; + const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + + static_cast(i3(p) > i3_prev(p))) }; + const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + static_cast( + static_cast(i3(p) > i3_prev(p)) + + i3_prev(p) - i3(p))) }; + const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + + dxp_r_3 - dx3_prev(p)) * + coeff * inv_dt }; + const real_t Fx3_2 { (static_cast( + i3(p) - i3_prev(p) - + static_cast(i3(p) > i3_prev(p))) + + dx3(p) - dxp_r_3) * + coeff * inv_dt }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + } } - } - } - }; + } else { + /* + Higher order charge conserving current deposition based on + Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + + We need to define the follwowing arrays: + - Shape functions in spatial directions for the particle position + before and after the current timestep. + S0x, S1x, S0y, S1y, S0z, S1z + - Indices this shape function contributes to + IS0, IS1 + - Value of the shape function at the cell positions + PS0, PS1 + - Density composition matrix + Wx, Wy, Wz + */ + + // shape function arrays at time 0 and 1 + vec_t PS0 { ZERO }; + vec_t PS1 { ZERO }; + // indices the shape function contributes to + vec_t IS0 { ZERO }; // ToDo: integer + vec_t IS1 { ZERO }; // ToDo: integer + + // minimum and maximum contributing indices + vec_t i_min { ZERO }; // ToDo: integer + vec_t i_max { ZERO }; // ToDo: integer + + if constexpr (D == Dim::_1D) { + // throw error + } else if constexpr (D == Dim::_2D) { + + // ToDo: check if this is what I need + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * + static_cast(INV_2) }; + + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; + + // define weight functions + vec_t Wx { ZERO }; + vec_t Wy { ZERO }; + vec_t Wz { ZERO }; + + /* + x - direction + */ + // shape function in x direction + vec_t S0x { ZERO }; + vec_t S1x { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i1(p), IS0, PS0); + find_indices_and_PS(i1_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0x, S1x, PS0, PS1, IS0, IS1, &i_min[0], &i_max[0]); + + /* + y - direction + */ + // shape function in x direction + vec_t S0y { ZERO }; + vec_t S1y { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i2(p), IS0, PS0); + find_indices_and_PS(i2_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0y, S1y, PS0, PS1, IS0, IS1, &i_min[1], &i_max[1]); + + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + // Esirkepov 2001, Eq. 38 + Wx[i][j] = HALF * (S1x[i] - S0x[i]) * (S0y[j] + S1y[j]); + Wy[i][j] = HALF * (S1x[i] + S0x[i]) * (S0y[j] - S1y[j]); + Wz[i][j] = THIRD * (S1y[j] * (HALF * S0x[i] + S1x[i]) + + S0y[j] * (HALF * S1x[i] + S0x[i])); + } + } + // ToDo: actual J update + + } else if constexpr (D == Dim::_3D) { + + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * + static_cast(INV_2) }; + + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; + + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * + static_cast(INV_2) }; + + // define weight functions + vec_t + Wx { ZERO }; + vec_t + Wy { ZERO }; + vec_t + Wz { ZERO }; + + /* + x - direction + */ + // shape function in x direction + vec_t S0x { ZERO }; + vec_t S1x { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i1(p), IS0, PS0); + find_indices_and_PS(i1_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0x, S1x, PS0, PS1, IS0, IS1, &i_min[0], &i_max[0]); + + /* + y - direction + */ + // shape function in y direction + vec_t S0y { ZERO }; + vec_t S1y { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i2(p), IS0, PS0); + find_indices_and_PS(i2_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0y, S1y, PS0, PS1, IS0, IS1, &i_min[1], &i_max[1]); + + /* + z - direction + */ + // shape function in z direction + vec_t S0z { ZERO }; + vec_t S1z { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i3(p), IS0, PS0); + find_indices_and_PS(i3_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0z, S1z, PS0, PS1, IS0, IS1, &i_min[2], &i_max[2]); + + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + for (int k = 0; k < interp_order + 2; ++k) { + // Esirkepov 2001, Eq. 31 + Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + ( S0x[i] * S0z[k] + S1x[i] * S1z[k] + + HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + Wz[i][j][k] = THIRD * (S1z[k] - S0[k]) * + (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + } + } + } + + // ToDo: actual J update + } + }; -} // namespace kernel + } // namespace kernel #undef i_di_to_Xi From c984fe03cd36646eaaba0bacce970fff76cb4568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 May 2025 16:31:11 -0500 Subject: [PATCH 02/82] added THIRD --- src/global/utils/numeric.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index cc1191b6..a2da7727 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -39,6 +39,7 @@ inline constexpr float FIVE = 5.0f; inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; +inline constexpr float THIRD = 0.333333f; inline constexpr float INV_2 = 0.5f; inline constexpr float INV_4 = 0.25f; inline constexpr float INV_8 = 0.125f; @@ -54,6 +55,7 @@ inline constexpr double FIVE = 5.0; inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; +inline constexpr double THIRD = 0.3333333333333333; inline constexpr double INV_2 = 0.5; inline constexpr double INV_4 = 0.25; inline constexpr double INV_8 = 0.125; From 1be51852b9d8f01c1bb43db666bdef1759a9a5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 May 2025 16:35:47 -0500 Subject: [PATCH 03/82] bugfix --- src/kernels/currents_deposit.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 3e97f40b..7f9136fe 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -615,7 +615,7 @@ namespace kernel { ( S0x[i] * S0z[k] + S1x[i] * S1z[k] + HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - Wz[i][j][k] = THIRD * (S1z[k] - S0[k]) * + Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * (S0x[i] * S0y[j] + S1x[i] * S1y[j] + HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); } From 180358ef5389e7a345d996d5ea88d5aab5d7cffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 May 2025 16:36:00 -0500 Subject: [PATCH 04/82] formatting --- src/global/utils/numeric.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index a2da7727..9ff262ed 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -39,7 +39,7 @@ inline constexpr float FIVE = 5.0f; inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; -inline constexpr float THIRD = 0.333333f; +inline constexpr float THIRD = 0.333333f; inline constexpr float INV_2 = 0.5f; inline constexpr float INV_4 = 0.25f; inline constexpr float INV_8 = 0.125f; From b81f433c045aaf8af31f6d867d8b72a85224a194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 10 May 2025 19:43:06 -0500 Subject: [PATCH 05/82] redefine vectors to variables + explicit loop unrolling for 2D --- src/kernels/currents_deposit.hpp | 657 ++++++++++++++++++++++--------- 1 file changed, 468 insertions(+), 189 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 7f9136fe..841392f5 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -45,70 +45,101 @@ namespace kernel { const int interpolation_order; private: - Inline void find_indices_and_S(const int i_pos, - array_t& indices, - array_t& S) { - // find contributing indices - // ToDo: check if this is correct - const auto i_min = floor( - i_pos - (static_cast(interpolation_order) - ONE) * HALF); - - for (int i = 0; i <= interpolation_order; i++) { - indices[i] = i_min + i; - } - - if constexpr (interpolation_order == 1) { - const auto dx = static_cast(x - indices[0]); - S[0] = ONE - dx; - S[1] = dx; - } else if constexpr (interpolation_order == 2) { - // Esirkepov 2001, Eq. 24 - const auto dx = static_cast(indices[1] - x); - S[0] = HALF * SQR(HALF + dx); - S[1] = static_cast(0.75) - SQR(dx); - S[2] = HALF * SQR(HALF - dx); - } else { - // throw error - } - } - - Inline void apply_shape_function(array_t& S0, - array_t& S1, - array_t& PS0, - array_t& PS1, - array_t& IS0, - array_t& IS1, - int* i_min, - int* i_max) { - - // check displacement - const auto shift_I = IS0[0] - IS1[0]; - - if (shift_I > 0) { - // positive shift in x1 direction - for (int i = 0; i <= interpolation_order; i++) { - S0[i] = PS0[i]; - S1[i + 1] = PS1[i]; - } - i_min = IS0[0]; - i_max = IS1[interpolation_order]; - } else if (shift_I < 0) { - // negative shift in x1 direction - for (int i = 0; i <= interpolation_order; i++) { - S0[i + 1] = PS0[i]; - S1[i] = PS1[i]; - } - i_min = IS1[0]; - i_max = IS0[interpolation_order]; + Inline void shape_function(real_t* S0_0, + real_t* S0_1, + real_t* S0_2, + real_t* S0_3, + real_t* S1_0, + real_t* S1_1, + real_t* S1_2, + real_t* S1_3, + int* i_min, + int* const i_max int_t i, + const real_t dx, + const int_t i_prev, + const real_t dx_prev) { + + /* + Shape function per particle is a 4 element array. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let x be the particle position at the current timestep + Let * be the particle position at the previous timestep + + + (-1) 0 1 2 3 + ___________________________________ + | | x* | x* | x* | | // shift_i = 0 + |______|______|______|______|______| + | | x | x* | x* | * | // shift_i = 1 + |______|______|______|______|______| + | * | x* | x* | x | | // shift_i = -1 + |______|______|______|______|______| + */ + + // find shift in indices + const auto shift_x { i_prev - i - (dx_prev - dx) }; + + // find indices and define shape function + if (shift_x > 0) { + /* + (-1) 0 1 2 3 + ___________________________________ + | | x | x* | x* | * | // shift_i = 1 + |______|______|______|______|______| + */ + ix_min = i_prev - 2; + ix_max = i + 2; + // shape function, ToDo: fix + S0_0 = HALF * SQR(HALF + dx_prev); + S0_1 = static_cast(0.75) - SQR(dx_prev); + S0_2 = HALF * SQR(HALF - dx_prev); + S0_3 = ZERO; + + S1_0 = ZERO; + S1_1 = HALF * SQR(HALF + dx); + S1_2 = static_cast(0.75) - SQR(dx); + S1_3 = HALF * SQR(HALF - dx); + } else if (shift_x < 0) { + /* + (-1) 0 1 2 3 + ___________________________________ + | * | x* | x* | x | | // shift_i = -1 + |______|______|______|______|______| + */ + ix_min = i - 2; + ix_max = i_prev + 2; + // shape function, ToDo: fix + S0_0 = ZERO; + S0_1 = HALF * SQR(HALF + dx_prev); + S0_2 = static_cast(0.75) - SQR(dx_prev); + S0_3 = HALF * SQR(HALF - dx_prev); + + S1_0 = HALF * SQR(HALF + dx); + S1_1 = static_cast(0.75) - SQR(dx); + S1_2 = HALF * SQR(HALF - dx); + S1_3 = ZERO; } else { - // no shift - for (int i = 0; i <= interpolation_order; i++) { - S0[i] = PS0[i]; - S1[i] = PS1[i]; - } - i_min = IS1[0]; - i_max = IS1[interpolation_order]; + /* + (-1) 0 1 2 3 + ___________________________________ + | | x* | x* | x* | | // shift_i = 0 + |______|______|______|______|______| + */ + ix_min = i - 2; + ix_max = i + 2; + // shape function, ToDo: fix + S0_0 = HALF * SQR(HALF + dx_prev); + S0_1 = static_cast(0.75) - SQR(dx_prev); + S0_2 = HALF * SQR(HALF - dx_prev); + S0_3 = ZERO; + + S1_0 = HALF * SQR(HALF + dx); + S1_1 = static_cast(0.75) - SQR(dx); + S1_2 = HALF * SQR(HALF - dx); + S1_3 = ZERO; } } @@ -457,89 +488,134 @@ namespace kernel { Higher order charge conserving current deposition based on Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - We need to define the follwowing arrays: + We need to define the follwowing variable: - Shape functions in spatial directions for the particle position before and after the current timestep. - S0x, S1x, S0y, S1y, S0z, S1z - - Indices this shape function contributes to - IS0, IS1 - - Value of the shape function at the cell positions - PS0, PS1 + S0_*, S1_* - Density composition matrix - Wx, Wy, Wz + Wx_*, Wy_*, Wz_* */ - // shape function arrays at time 0 and 1 - vec_t PS0 { ZERO }; - vec_t PS1 { ZERO }; - // indices the shape function contributes to - vec_t IS0 { ZERO }; // ToDo: integer - vec_t IS1 { ZERO }; // ToDo: integer - - // minimum and maximum contributing indices - vec_t i_min { ZERO }; // ToDo: integer - vec_t i_max { ZERO }; // ToDo: integer - - if constexpr (D == Dim::_1D) { - // throw error - } else if constexpr (D == Dim::_2D) { - - // ToDo: check if this is what I need - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * - static_cast(INV_2) }; - - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * - static_cast(INV_2) }; - - // define weight functions - vec_t Wx { ZERO }; - vec_t Wy { ZERO }; - vec_t Wz { ZERO }; - - /* + /* x - direction - */ - // shape function in x direction - vec_t S0x { ZERO }; - vec_t S1x { ZERO }; + */ - // find indices and define shape function - find_indices_and_PS(i1(p), IS0, PS0); - find_indices_and_PS(i1_prev(p), IS1, PS1); + // shape function at previous timestep + real_t S0x_0, S0x_1, S0x_2, S0x_3; + // shape function at current timestep + real_t S1x_0, S1x_1, S1x_2, S1x_3; + // indices of the shape function + uint ix_min, ix_max; + // find indices and define shape function + shape_function(&Sx0_0, &Sx0_1, &Sx0_2, &Sx0_3, + &Sx1_0, &Sx1_1, &Sx1_2, &Sx1_3, + &ix_min, &ix_max, + i1(p), dx1(p), + i1_prev(p), dx1_prev(p)); - // apply shape function - apply_shape_function(S0x, S1x, PS0, PS1, IS0, IS1, &i_min[0], &i_max[0]); + if constexpr (D == Dim::_1D) { + // ToDo + } + else if constexpr (D == Dim::_2D) { /* y - direction */ - // shape function in x direction - vec_t S0y { ZERO }; - vec_t S1y { ZERO }; + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + uint iy_min, iy_max; // find indices and define shape function - find_indices_and_PS(i2(p), IS0, PS0); - find_indices_and_PS(i2_prev(p), IS1, PS1); - - // apply shape function - apply_shape_function(S0y, S1y, PS0, PS1, IS0, IS1, &i_min[1], &i_max[1]); + shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, + &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, + &iy_min, &iy_max, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - // Esirkepov 2001, Eq. 38 - Wx[i][j] = HALF * (S1x[i] - S0x[i]) * (S0y[j] + S1y[j]); - Wy[i][j] = HALF * (S1x[i] + S0x[i]) * (S0y[j] - S1y[j]); - Wz[i][j] = THIRD * (S1y[j] * (HALF * S0x[i] + S1x[i]) + - S0y[j] * (HALF * S1x[i] + S0x[i])); - } - } - // ToDo: actual J update - - } else if constexpr (D == Dim::_3D) { + // Unrolled calculations for Wx + const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); + const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); + const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); + const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); + + const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); + const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); + const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); + const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); + + const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); + const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); + const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); + const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); + + const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); + const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); + const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); + const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + + // Unrolled calculations for Wy + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + + // Unrolled calculations for Wz + const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + + S0y_0 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + + S0y_1 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + + S0y_2 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + + S0y_3 * (HALF * S1x_0 + S0x_0)); + + const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + + S0y_0 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + + S0y_1 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + + S0y_2 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + + S0y_3 * (HALF * S1x_1 + S0x_1)); + + const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + + S0y_0 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + + S0y_1 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + + S0y_2 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + + S0y_3 * (HALF * S1x_2 + S0x_2)); + + const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + + S0y_0 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + + S0y_1 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + + S0y_2 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + + S0y_3 * (HALF * S1x_3 + S0x_3)); + // ToDo: check if this is what I need const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; @@ -548,79 +624,282 @@ namespace kernel { (dx2(p) + dx2_prev(p)) * static_cast(INV_2) }; - const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * - (dx3(p) + dx3_prev(p)) * - static_cast(INV_2) }; - - // define weight functions - vec_t - Wx { ZERO }; - vec_t - Wy { ZERO }; - vec_t - Wz { ZERO }; - - /* - x - direction - */ - // shape function in x direction - vec_t S0x { ZERO }; - vec_t S1x { ZERO }; - - // find indices and define shape function - find_indices_and_PS(i1(p), IS0, PS0); - find_indices_and_PS(i1_prev(p), IS1, PS1); - - // apply shape function - apply_shape_function(S0x, S1x, PS0, PS1, IS0, IS1, &i_min[0], &i_max[0]); + // ToDo: actual J update + auto J_acc = J.access(); + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + // Esirkepov 2001, Eq. 39 + J_acc(N_GHOSTS + i_min[0] + i, + N_GHOSTS + i_min[1] + j, + cur::jx1) += coeff * inv_dt * Wx[i][j] * dxp_r_1; + } + } + } + else if constexpr (D == Dim::_3D) { /* y - direction */ - // shape function in y direction - vec_t S0y { ZERO }; - vec_t S1y { ZERO }; + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + uint iy_min, iy_max; // find indices and define shape function - find_indices_and_PS(i2(p), IS0, PS0); - find_indices_and_PS(i2_prev(p), IS1, PS1); - - // apply shape function - apply_shape_function(S0y, S1y, PS0, PS1, IS0, IS1, &i_min[1], &i_max[1]); + shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, + &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, + &iy_min, &iy_max, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); /* z - direction */ - // shape function in z direction - vec_t S0z { ZERO }; - vec_t S1z { ZERO }; + // shape function at previous timestep + real_t S0z_0, S0z_1, S0z_2, S0z_3; + // shape function at current timestep + real_t S1z_0, S1z_1, S1z_2, S1z_3; + // indices of the shape function + uint iz_min, iz_max; // find indices and define shape function - find_indices_and_PS(i3(p), IS0, PS0); - find_indices_and_PS(i3_prev(p), IS1, PS1); - - // apply shape function - apply_shape_function(S0z, S1z, PS0, PS1, IS0, IS1, &i_min[2], &i_max[2]); - - // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - for (int k = 0; k < interp_order + 2; ++k) { - // Esirkepov 2001, Eq. 31 - Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - - Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - ( S0x[i] * S0z[k] + S1x[i] * S1z[k] + - HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - - Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - } - } - } + shape_function(&Sz0_0, &Sz0_1, &Sz0_2, &Sz0_3, + &Sz1_0, &Sz1_1, &Sz1_2, &Sz1_3, + &iz_min, &iz_max, + i3(p), dx3(p), + i3_prev(p), dx3_prev(p)); + + // // Calculate weight function + // for (int i = 0; i < interp_order + 2; ++i) { + // for (int j = 0; j < interp_order + 2; ++j) { + // for (int k = 0; k < interp_order + 2; ++k) { + // // Esirkepov 2001, Eq. 31 + // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + // } + // } + // } + + // Unrolled calculations for Wx, Wy, and Wz + const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); // ToDo: actual J update } From dfc7165c586c0d415ac8dc521373dac3905503f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 10 May 2025 19:48:27 -0500 Subject: [PATCH 06/82] moved interpolation order from variable to compiler directive --- src/kernels/currents_deposit.hpp | 1285 +++++++++++++++--------------- 1 file changed, 639 insertions(+), 646 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 841392f5..24dcf17e 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -45,20 +45,19 @@ namespace kernel { const int interpolation_order; private: - Inline void shape_function(real_t* S0_0, - real_t* S0_1, - real_t* S0_2, - real_t* S0_3, - real_t* S1_0, - real_t* S1_1, - real_t* S1_2, - real_t* S1_3, - int* i_min, - int* const i_max int_t i, - const real_t dx, - const int_t i_prev, - const real_t dx_prev) { + real_t* S0_1, + real_t* S0_2, + real_t* S0_3, + real_t* S1_0, + real_t* S1_1, + real_t* S1_2, + real_t* S1_3, + int* i_min, + int* const i_max int_t i, + const real_t dx, + const int_t i_prev, + const real_t dx_prev) { /* Shape function per particle is a 4 element array. @@ -168,8 +167,7 @@ namespace kernel { const array_t& tag, const M& metric, real_t charge, - const real_t dt, - int interpolation_order) + const real_t dt) : J { scatter_cur } , i1 { i1 } , i2 { i2 } @@ -191,8 +189,7 @@ namespace kernel { , tag { tag } , metric { metric } , charge { charge } - , inv_dt { ONE / dt } - , interpolation_order { interpolation_order } {} + , inv_dt { ONE / dt } {} /** * @brief Iteration of the loop over particles. @@ -246,665 +243,661 @@ namespace kernel { const real_t coeff { weight(p) * charge }; // ToDo: interpolation_order as parameter - if constexpr (interpolation_order == 0) { - /* - Zig-zag deposit - */ +#if (SHAPE_FUNCTION_ORDER == 1) + /* + Zig-zag deposit + */ - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; + + const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + + static_cast(i1(p) > i1_prev(p))) }; + const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + + static_cast( + static_cast(i1(p) > i1_prev(p)) + + i1_prev(p) - i1(p))) }; + const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + dxp_r_1 - + dx1_prev(p)) * + coeff * inv_dt }; + const real_t Fx1_2 { (static_cast( + i1(p) - i1_prev(p) - + static_cast(i1(p) > i1_prev(p))) + + dx1(p) - dxp_r_1) * + coeff * inv_dt }; + + auto J_acc = J.access(); + + // tuple_t dxp_r; + if constexpr (D == Dim::_1D) { + const real_t Fx2_1 { HALF * vp[1] * coeff }; + const real_t Fx2_2 { HALF * vp[1] * coeff }; + + const real_t Fx3_1 { HALF * vp[2] * coeff }; + const real_t Fx3_2 { HALF * vp[2] * coeff }; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; + } else if constexpr (D == Dim::_2D || D == Dim::_3D) { + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * static_cast(INV_2) }; - const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + - static_cast(i1(p) > i1_prev(p))) }; - const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + + const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + + static_cast(i2(p) > i2_prev(p))) }; + const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + static_cast( - static_cast(i1(p) > i1_prev(p)) + - i1_prev(p) - i1(p))) }; - const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + - dxp_r_1 - dx1_prev(p)) * + static_cast(i2(p) > i2_prev(p)) + + i2_prev(p) - i2(p))) }; + const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + + dxp_r_2 - dx2_prev(p)) * coeff * inv_dt }; - const real_t Fx1_2 { (static_cast( - i1(p) - i1_prev(p) - - static_cast(i1(p) > i1_prev(p))) + - dx1(p) - dxp_r_1) * + const real_t Fx2_2 { (static_cast( + i2(p) - i2_prev(p) - + static_cast(i2(p) > i2_prev(p))) + + dx2(p) - dxp_r_2) * coeff * inv_dt }; - auto J_acc = J.access(); - - // tuple_t dxp_r; - if constexpr (D == Dim::_1D) { - const real_t Fx2_1 { HALF * vp[1] * coeff }; - const real_t Fx2_2 { HALF * vp[1] * coeff }; - + if constexpr (D == Dim::_2D) { const real_t Fx3_1 { HALF * vp[2] * coeff }; const real_t Fx3_2 { HALF * vp[2] * coeff }; - J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; - } else if constexpr (D == Dim::_2D || D == Dim::_3D) { - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * + (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx3) += Fx3_2 * + (ONE - Wx1_2) * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * + Wx1_2 * + Wx2_2; + } else { + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * static_cast(INV_2) }; - - const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + - static_cast(i2(p) > i2_prev(p))) }; - const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + + const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + + static_cast(i3(p) > i3_prev(p))) }; + const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + static_cast( - static_cast(i2(p) > i2_prev(p)) + - i2_prev(p) - i2(p))) }; - const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + - dxp_r_2 - dx2_prev(p)) * + static_cast(i3(p) > i3_prev(p)) + + i3_prev(p) - i3(p))) }; + const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + + dxp_r_3 - dx3_prev(p)) * coeff * inv_dt }; - const real_t Fx2_2 { (static_cast( - i2(p) - i2_prev(p) - - static_cast(i2(p) > i2_prev(p))) + - dx2(p) - dxp_r_2) * + const real_t Fx3_2 { (static_cast( + i3(p) - i3_prev(p) - + static_cast(i3(p) > i3_prev(p))) + + dx3(p) - dxp_r_3) * coeff * inv_dt }; - if constexpr (D == Dim::_2D) { - const real_t Fx3_1 { HALF * vp[2] * coeff }; - const real_t Fx3_2 { HALF * vp[2] * coeff }; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * - (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; - } else { - const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * - (dx3(p) + dx3_prev(p)) * - static_cast(INV_2) }; - const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + - static_cast(i3(p) > i3_prev(p))) }; - const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + - static_cast( - static_cast(i3(p) > i3_prev(p)) + - i3_prev(p) - i3(p))) }; - const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + - dxp_r_3 - dx3_prev(p)) * - coeff * inv_dt }; - const real_t Fx3_2 { (static_cast( - i3(p) - i3_prev(p) - - static_cast(i3(p) > i3_prev(p))) + - dx3(p) - dxp_r_3) * - coeff * inv_dt }; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; - } + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } - } else { - /* - Higher order charge conserving current deposition based on - Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - - We need to define the follwowing variable: - - Shape functions in spatial directions for the particle position - before and after the current timestep. - S0_*, S1_* - - Density composition matrix - Wx_*, Wy_*, Wz_* - */ + } +#else // SHAPE_FUNCTION_ORDER + /* + Higher order charge conserving current deposition based on + Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + + We need to define the follwowing variable: + - Shape functions in spatial directions for the particle position + before and after the current timestep. + S0_*, S1_* + - Density composition matrix + Wx_*, Wy_*, Wz_* + */ + + /* + x - direction + */ + + // shape function at previous timestep + real_t S0x_0, S0x_1, S0x_2, S0x_3; + // shape function at current timestep + real_t S1x_0, S1x_1, S1x_2, S1x_3; + // indices of the shape function + uint ix_min, ix_max; + // find indices and define shape function + shape_function(&Sx0_0, &Sx0_1, &Sx0_2, &Sx0_3, + &Sx1_0, &Sx1_1, &Sx1_2, &Sx1_3, + &ix_min, &ix_max, + i1(p), dx1(p), + i1_prev(p), dx1_prev(p)); + + if constexpr (D == Dim::_1D) { + // ToDo + } else if constexpr (D == Dim::_2D) { /* - x - direction + y - direction */ // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3; + real_t S0y_0, S0y_1, S0y_2, S0y_3; // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3; + real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function - uint ix_min, ix_max; + uint iy_min, iy_max; // find indices and define shape function - shape_function(&Sx0_0, &Sx0_1, &Sx0_2, &Sx0_3, - &Sx1_0, &Sx1_1, &Sx1_2, &Sx1_3, - &ix_min, &ix_max, - i1(p), dx1(p), - i1_prev(p), dx1_prev(p)); + shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, + &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, + &iy_min, &iy_max, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + + // Calculate weight function + // Unrolled calculations for Wx + const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); + const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); + const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); + const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); + + const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); + const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); + const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); + const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); + + const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); + const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); + const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); + const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); + + const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); + const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); + const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); + const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + + // Unrolled calculations for Wy + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + + // Unrolled calculations for Wz + const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + + S0y_0 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + + S0y_1 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + + S0y_2 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + + S0y_3 * (HALF * S1x_0 + S0x_0)); + + const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + + S0y_0 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + + S0y_1 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + + S0y_2 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + + S0y_3 * (HALF * S1x_1 + S0x_1)); + + const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + + S0y_0 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + + S0y_1 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + + S0y_2 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + + S0y_3 * (HALF * S1x_2 + S0x_2)); + + const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + + S0y_0 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + + S0y_1 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + + S0y_2 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + + S0y_3 * (HALF * S1x_3 + S0x_3)); + + // ToDo: check if this is what I need + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * + static_cast(INV_2) }; - if constexpr (D == Dim::_1D) { - // ToDo - } - else if constexpr (D == Dim::_2D) { - - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - uint iy_min, iy_max; - // find indices and define shape function - shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, - &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, - &iy_min, &iy_max, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); - - // Calculate weight function - // Unrolled calculations for Wx - const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); - const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); - const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); - const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); - - const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); - const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); - const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); - const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); - - const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); - const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); - const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); - const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - - const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); - const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); - const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); - const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - - // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); - - // Unrolled calculations for Wz - const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + - S0y_0 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + - S0y_1 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + - S0y_2 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + - S0y_3 * (HALF * S1x_0 + S0x_0)); - - const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + - S0y_0 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + - S0y_1 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + - S0y_2 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + - S0y_3 * (HALF * S1x_1 + S0x_1)); - - const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + - S0y_0 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + - S0y_1 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + - S0y_2 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + - S0y_3 * (HALF * S1x_2 + S0x_2)); - - const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + - S0y_0 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + - S0y_1 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + - S0y_2 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + - S0y_3 * (HALF * S1x_3 + S0x_3)); - - // ToDo: check if this is what I need - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * - static_cast(INV_2) }; + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * - static_cast(INV_2) }; + // ToDo: actual J update + auto J_acc = J.access(); - // ToDo: actual J update - auto J_acc = J.access(); - - // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - // Esirkepov 2001, Eq. 39 - J_acc(N_GHOSTS + i_min[0] + i, - N_GHOSTS + i_min[1] + j, - cur::jx1) += coeff * inv_dt * Wx[i][j] * dxp_r_1; - } + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + // Esirkepov 2001, Eq. 39 + J_acc(N_GHOSTS + i_min[0] + i, + N_GHOSTS + i_min[1] + j, + cur::jx1) += coeff * inv_dt * Wx[i][j] * dxp_r_1; } } - else if constexpr (D == Dim::_3D) { - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - uint iy_min, iy_max; - // find indices and define shape function - shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, - &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, - &iy_min, &iy_max, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); - - /* - z - direction - */ - - // shape function at previous timestep - real_t S0z_0, S0z_1, S0z_2, S0z_3; - // shape function at current timestep - real_t S1z_0, S1z_1, S1z_2, S1z_3; - // indices of the shape function - uint iz_min, iz_max; - // find indices and define shape function - shape_function(&Sz0_0, &Sz0_1, &Sz0_2, &Sz0_3, - &Sz1_0, &Sz1_1, &Sz1_2, &Sz1_3, - &iz_min, &iz_max, - i3(p), dx3(p), - i3_prev(p), dx3_prev(p)); - - // // Calculate weight function - // for (int i = 0; i < interp_order + 2; ++i) { - // for (int j = 0; j < interp_order + 2; ++j) { - // for (int k = 0; k < interp_order + 2; ++k) { - // // Esirkepov 2001, Eq. 31 - // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - - // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - - // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - // } - // } - // } - - // Unrolled calculations for Wx, Wy, and Wz - const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - // ToDo: actual J update - } - }; + } else if constexpr (D == Dim::_3D) { + /* + y - direction + */ + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + uint iy_min, iy_max; + // find indices and define shape function + shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, + &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, + &iy_min, &iy_max, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + + /* + z - direction + */ + + // shape function at previous timestep + real_t S0z_0, S0z_1, S0z_2, S0z_3; + // shape function at current timestep + real_t S1z_0, S1z_1, S1z_2, S1z_3; + // indices of the shape function + uint iz_min, iz_max; + // find indices and define shape function + shape_function(&Sz0_0, &Sz0_1, &Sz0_2, &Sz0_3, + &Sz1_0, &Sz1_1, &Sz1_2, &Sz1_3, + &iz_min, &iz_max, + i3(p), dx3(p), + i3_prev(p), dx3_prev(p)); + + // // Calculate weight function + // for (int i = 0; i < interp_order + 2; ++i) { + // for (int j = 0; j < interp_order + 2; ++j) { + // for (int k = 0; k < interp_order + 2; ++k) { + // // Esirkepov 2001, Eq. 31 + // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + // } + // } + // } + + // Unrolled calculations for Wx, Wy, and Wz + const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + // ToDo: actual J update + }; +#endif // SHAPE_FUNCTION_ORDER } // namespace kernel #undef i_di_to_Xi From f1b8cd7c7732678b9a3d2690c1b13a1d39d88048 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 13 May 2025 14:43:45 -0500 Subject: [PATCH 07/82] first attempt at 2D current deposit with Esirkepov --- src/kernels/currents_deposit.hpp | 268 +++++++++++++++++++++---------- 1 file changed, 182 insertions(+), 86 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 24dcf17e..b7a0cb69 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -45,19 +45,19 @@ namespace kernel { const int interpolation_order; private: - Inline void shape_function(real_t* S0_0, - real_t* S0_1, - real_t* S0_2, - real_t* S0_3, - real_t* S1_0, - real_t* S1_1, - real_t* S1_2, - real_t* S1_3, - int* i_min, - int* const i_max int_t i, - const real_t dx, - const int_t i_prev, - const real_t dx_prev) { + Inline void shape_function(real_t* S0_0, + real_t* S0_1, + real_t* S0_2, + real_t* S0_3, + real_t* S1_0, + real_t* S1_1, + real_t* S1_2, + real_t* S1_3, + int* i_min, + const index_t i, + const real_t dx, + const index_t i_prev, + const real_t dx_prev) { /* Shape function per particle is a 4 element array. @@ -89,13 +89,12 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - ix_min = i_prev - 2; - ix_max = i + 2; + i_min = i_prev - 2 + N_GHOSTS; // shape function, ToDo: fix - S0_0 = HALF * SQR(HALF + dx_prev); - S0_1 = static_cast(0.75) - SQR(dx_prev); - S0_2 = HALF * SQR(HALF - dx_prev); - S0_3 = ZERO; + S0_0 = HALF * SQR(HALF + dx_prev); + S0_1 = static_cast(0.75) - SQR(dx_prev); + S0_2 = HALF * SQR(HALF - dx_prev); + S0_3 = ZERO; S1_0 = ZERO; S1_1 = HALF * SQR(HALF + dx); @@ -108,13 +107,12 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - ix_min = i - 2; - ix_max = i_prev + 2; + i_min = i - 2 + N_GHOSTS; // shape function, ToDo: fix - S0_0 = ZERO; - S0_1 = HALF * SQR(HALF + dx_prev); - S0_2 = static_cast(0.75) - SQR(dx_prev); - S0_3 = HALF * SQR(HALF - dx_prev); + S0_0 = ZERO; + S0_1 = HALF * SQR(HALF + dx_prev); + S0_2 = static_cast(0.75) - SQR(dx_prev); + S0_3 = HALF * SQR(HALF - dx_prev); S1_0 = HALF * SQR(HALF + dx); S1_1 = static_cast(0.75) - SQR(dx); @@ -127,13 +125,12 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - ix_min = i - 2; - ix_max = i + 2; + i_min = i - 2 + N_GHOSTS; // shape function, ToDo: fix - S0_0 = HALF * SQR(HALF + dx_prev); - S0_1 = static_cast(0.75) - SQR(dx_prev); - S0_2 = HALF * SQR(HALF - dx_prev); - S0_3 = ZERO; + S0_0 = HALF * SQR(HALF + dx_prev); + S0_1 = static_cast(0.75) - SQR(dx_prev); + S0_2 = HALF * SQR(HALF - dx_prev); + S0_3 = ZERO; S1_0 = HALF * SQR(HALF + dx); S1_1 = static_cast(0.75) - SQR(dx); @@ -497,17 +494,25 @@ namespace kernel { */ // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3; + real_t S0x_0, S0x_1, S0x_2, S0x_3; // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3; + real_t S1x_0, S1x_1, S1x_2, S1x_3; // indices of the shape function - uint ix_min, ix_max; + ncells_t ix_min; // find indices and define shape function - shape_function(&Sx0_0, &Sx0_1, &Sx0_2, &Sx0_3, - &Sx1_0, &Sx1_1, &Sx1_2, &Sx1_3, - &ix_min, &ix_max, - i1(p), dx1(p), - i1_prev(p), dx1_prev(p)); + shape_function(&Sx0_0, + &Sx0_1, + &Sx0_2, + &Sx0_3, + &Sx1_0, + &Sx1_1, + &Sx1_2, + &Sx1_3, + &ix_min, + i1(p), + dx1(p), + i1_prev(p), + dx1_prev(p)); if constexpr (D == Dim::_1D) { // ToDo @@ -518,17 +523,25 @@ namespace kernel { */ // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; + real_t S0y_0, S0y_1, S0y_2, S0y_3; // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; + real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function - uint iy_min, iy_max; + ncells_t iy_min; // find indices and define shape function - shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, - &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, - &iy_min, &iy_max, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); + shape_function(&Sy0_0, + &Sy0_1, + &Sy0_2, + &Sy0_3, + &Sy1_0, + &Sy1_1, + &Sy1_2, + &Sy1_3, + &iy_min, + i2(p), + dx2(p), + i2_prev(p), + dx2_prev(p)); // Calculate weight function // Unrolled calculations for Wx @@ -622,15 +635,80 @@ namespace kernel { // ToDo: actual J update auto J_acc = J.access(); - // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - // Esirkepov 2001, Eq. 39 - J_acc(N_GHOSTS + i_min[0] + i, - N_GHOSTS + i_min[1] + j, - cur::jx1) += coeff * inv_dt * Wx[i][j] * dxp_r_1; - } - } + // Esirkepov 2001, Eq. 39 + /* + x - component + */ + const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; + + /* + y - component + */ + const real_t Qdydt = coeff * inv_dt * dyp_r_1; + J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + + + /* + z - component, simulated direction + */ + const real_t QVz = vp[2] * coeff; + J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + } else if constexpr (D == Dim::_3D) { /* y - direction @@ -643,11 +721,20 @@ namespace kernel { // indices of the shape function uint iy_min, iy_max; // find indices and define shape function - shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, - &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, - &iy_min, &iy_max, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); + shape_function(&Sy0_0, + &Sy0_1, + &Sy0_2, + &Sy0_3, + &Sy1_0, + &Sy1_1, + &Sy1_2, + &Sy1_3, + &iy_min, + &iy_max, + i2(p), + dx2(p), + i2_prev(p), + dx2_prev(p)); /* z - direction @@ -660,31 +747,40 @@ namespace kernel { // indices of the shape function uint iz_min, iz_max; // find indices and define shape function - shape_function(&Sz0_0, &Sz0_1, &Sz0_2, &Sz0_3, - &Sz1_0, &Sz1_1, &Sz1_2, &Sz1_3, - &iz_min, &iz_max, - i3(p), dx3(p), - i3_prev(p), dx3_prev(p)); - - // // Calculate weight function - // for (int i = 0; i < interp_order + 2; ++i) { - // for (int j = 0; j < interp_order + 2; ++j) { - // for (int k = 0; k < interp_order + 2; ++k) { - // // Esirkepov 2001, Eq. 31 - // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - - // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - - // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - // } - // } - // } + shape_function(&Sz0_0, + &Sz0_1, + &Sz0_2, + &Sz0_3, + &Sz1_0, + &Sz1_1, + &Sz1_2, + &Sz1_3, + &iz_min, + &iz_max, + i3(p), + dx3(p), + i3_prev(p), + dx3_prev(p)); + + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + for (int k = 0; k < interp_order + 2; ++k) { + // Esirkepov 2001, Eq. 31 + Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + } + } + } // Unrolled calculations for Wx, Wy, and Wz const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * From cb56279be440121cef766c4a704970e0810b107c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 13 May 2025 14:50:36 -0500 Subject: [PATCH 08/82] more local calculation of weight functions --- src/kernels/currents_deposit.hpp | 137 ++++++++++++++++--------------- 1 file changed, 70 insertions(+), 67 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index b7a0cb69..2cfd679a 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -543,8 +543,25 @@ namespace kernel { i2_prev(p), dx2_prev(p)); - // Calculate weight function - // Unrolled calculations for Wx + + + // ToDo: check if this is what I need + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * + static_cast(INV_2) }; + + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; + + // ToDo: actual J update + auto J_acc = J.access(); + + // Esirkepov 2001, Eq. 39 + /* + x - component + */ + // Calculate weight function - unrolled const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); @@ -565,6 +582,31 @@ namespace kernel { const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + + J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; + + /* + y - component + */ // Unrolled calculations for Wy const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); @@ -586,6 +628,32 @@ namespace kernel { const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + const real_t Qdydt = coeff * inv_dt * dyp_r_1; + + J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + + + /* + z - component, simulated direction + */ // Unrolled calculations for Wz const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + S0y_0 * (HALF * S1x_0 + S0x_0)); @@ -623,71 +691,6 @@ namespace kernel { const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + S0y_3 * (HALF * S1x_3 + S0x_3)); - // ToDo: check if this is what I need - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * - static_cast(INV_2) }; - - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * - static_cast(INV_2) }; - - // ToDo: actual J update - auto J_acc = J.access(); - - // Esirkepov 2001, Eq. 39 - /* - x - component - */ - const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; - - /* - y - component - */ - const real_t Qdydt = coeff * inv_dt * dyp_r_1; - J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; - - - /* - z - component, simulated direction - */ const real_t QVz = vp[2] * coeff; J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; From 9f50dea3ac9c8902ba6331052245f7db0a7a5121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 13 May 2025 14:54:40 -0500 Subject: [PATCH 09/82] switch to row-major order --- src/kernels/currents_deposit.hpp | 86 ++++++++++++++++---------------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 2cfd679a..81fafca5 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -543,8 +543,6 @@ namespace kernel { i2_prev(p), dx2_prev(p)); - - // ToDo: check if this is what I need const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * (dx1(p) + dx1_prev(p)) * @@ -582,26 +580,26 @@ namespace kernel { const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - + const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; - J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; + + J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; /* @@ -628,28 +626,27 @@ namespace kernel { const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); - const real_t Qdydt = coeff * inv_dt * dyp_r_1; - + const real_t Qdydt = coeff * inv_dt * dyp_r_1; + J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; /* z - component, simulated direction @@ -691,25 +688,26 @@ namespace kernel { const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + S0y_3 * (HALF * S1x_3 + S0x_3)); - const real_t QVz = vp[2] * coeff; + const real_t QVz = vp[2] * coeff; + J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; } else if constexpr (D == Dim::_3D) { From 6b32791b27146f5d82826de35d71d94bdee85afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 13 May 2025 15:04:56 -0500 Subject: [PATCH 10/82] first attempt at current deposit jx1 in 3D --- src/kernels/currents_deposit.hpp | 91 ++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 81fafca5..8e6be3d9 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -720,7 +720,7 @@ namespace kernel { // shape function at current timestep real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function - uint iy_min, iy_max; + uint iy_min; // find indices and define shape function shape_function(&Sy0_0, &Sy0_1, @@ -731,7 +731,6 @@ namespace kernel { &Sy1_2, &Sy1_3, &iy_min, - &iy_max, i2(p), dx2(p), i2_prev(p), @@ -746,7 +745,7 @@ namespace kernel { // shape function at current timestep real_t S1z_0, S1z_1, S1z_2, S1z_3; // indices of the shape function - uint iz_min, iz_max; + uint iz_min; // find indices and define shape function shape_function(&Sz0_0, &Sz0_1, @@ -757,7 +756,6 @@ namespace kernel { &Sz1_2, &Sz1_3, &iz_min, - &iz_max, i3(p), dx3(p), i3_prev(p), @@ -992,6 +990,91 @@ namespace kernel { ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + + J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; + J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + // + J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; + // + J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; + // + J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + // + // + J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; + // + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; + // + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; + // + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; + // + // + J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; + // + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; + // + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; + // + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; + // + // + J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; + J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; + // + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; + J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; + // + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; + J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; + // + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; + J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; + // ToDo: actual J update }; #endif // SHAPE_FUNCTION_ORDER From bf788a9a88fbde5827a500d2ad49aa5fc9e7ea4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 14 May 2025 18:06:32 -0500 Subject: [PATCH 11/82] more efficient memory access (this time for real) --- src/kernels/currents_deposit.hpp | 122 +++++++++++++++---------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 8e6be3d9..78e02f0e 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -691,23 +691,23 @@ namespace kernel { const real_t QVz = vp[2] * coeff; J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; } else if constexpr (D == Dim::_3D) { @@ -993,86 +993,86 @@ namespace kernel { const real_t Qdxdt = coeff * inv_dt * dxp_r_1; J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; - J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; - J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; - J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; + J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; // // - J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; // - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; // - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; // - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt* Wx_1_3_0; J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; // // - J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; // - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; // - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; // - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; // // - J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; + J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; // - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; // - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; // - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; // ToDo: actual J update From b6a2811c8e333a745e6c9d13e49d00400b0f52fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 14 May 2025 18:35:49 -0500 Subject: [PATCH 12/82] first attempt at y/z deposit in 3D --- src/kernels/currents_deposit.hpp | 711 ++++++++++++++++++++++++++++--- 1 file changed, 652 insertions(+), 59 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 78e02f0e..a99241c3 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -500,14 +500,14 @@ namespace kernel { // indices of the shape function ncells_t ix_min; // find indices and define shape function - shape_function(&Sx0_0, - &Sx0_1, - &Sx0_2, - &Sx0_3, - &Sx1_0, - &Sx1_1, - &Sx1_2, - &Sx1_3, + shape_function(&S0x_0, + &S0x_1, + &S0x_2, + &S0x_3, + &S1x_0, + &S1x_1, + &S1x_2, + &S1x_3, &ix_min, i1(p), dx1(p), @@ -529,14 +529,14 @@ namespace kernel { // indices of the shape function ncells_t iy_min; // find indices and define shape function - shape_function(&Sy0_0, - &Sy0_1, - &Sy0_2, - &Sy0_3, - &Sy1_0, - &Sy1_1, - &Sy1_2, - &Sy1_3, + shape_function(&S0y_0, + &S0y_1, + &S0y_2, + &S0y_3, + &S1y_0, + &S1y_1, + &S1y_2, + &S1y_3, &iy_min, i2(p), dx2(p), @@ -695,7 +695,7 @@ namespace kernel { J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; @@ -722,14 +722,14 @@ namespace kernel { // indices of the shape function uint iy_min; // find indices and define shape function - shape_function(&Sy0_0, - &Sy0_1, - &Sy0_2, - &Sy0_3, - &Sy1_0, - &Sy1_1, - &Sy1_2, - &Sy1_3, + shape_function(&S0y_0, + &S0y_1, + &S0y_2, + &S0y_3, + &S1y_0, + &S1y_1, + &S1y_2, + &S1y_3, &iy_min, i2(p), dx2(p), @@ -747,14 +747,14 @@ namespace kernel { // indices of the shape function uint iz_min; // find indices and define shape function - shape_function(&Sz0_0, - &Sz0_1, - &Sz0_2, - &Sz0_3, - &Sz1_0, - &Sz1_1, - &Sz1_2, - &Sz1_3, + shape_function(&S0z_0, + &S0z_1, + &S0z_2, + &S0z_3, + &S1z_0, + &S1z_1, + &S1z_2, + &S1z_3, &iz_min, i3(p), dx3(p), @@ -992,33 +992,33 @@ namespace kernel { const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; - J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; + J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; + J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; // - J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; + J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; // - J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; + J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; // // - J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; // - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; @@ -1028,13 +1028,13 @@ namespace kernel { J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; // - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt* Wx_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; // // - J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; @@ -1055,7 +1055,7 @@ namespace kernel { J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; // // - J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; @@ -1070,12 +1070,605 @@ namespace kernel { J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; // - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; - // ToDo: actual J update + /* + y-component + */ + // i = 0 + const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const real_t Qdydt = coeff * inv_dt * dxp_r_2; + + J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; + J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; + // + J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; + // + J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; + // + J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; + // + // + J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; + // + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; + // + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; + // + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; + // + // + J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; + // + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; + // + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; + // + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; + // + // + J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; + J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; + // + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; + J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; + // + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; + J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; + // + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; + J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; + + /* + z - component + */ + const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + + const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + + const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + + const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + + // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + + const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + + const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + + const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + + const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + + const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + + const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + + const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + + const real_t Qdzdt = coeff * inv_dt * dxp_r_3; + + J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; + J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; + // + J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; + // + J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; + // + J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; + // + // + J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; + // + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; + // + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; + // + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; + // + // + J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; + // + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; + // + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; + // + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; + // + // + J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; + J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; + // + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; + J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; + // + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; + J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; + // + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; + J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; }; #endif // SHAPE_FUNCTION_ORDER } // namespace kernel From bbd2b3c2cce8ddcdba13d9e083d67a018acffd65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 15 May 2025 13:27:12 -0500 Subject: [PATCH 13/82] memory reorder --- src/kernels/currents_deposit.hpp | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index a99241c3..789653ee 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -583,23 +583,23 @@ namespace kernel { const real_t Qdxdt = coeff * inv_dt * dxp_r_1; J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; - J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; - J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; - J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; - - J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; - - J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; /* @@ -629,23 +629,23 @@ namespace kernel { const real_t Qdydt = coeff * inv_dt * dyp_r_1; J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; - - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; /* From 4f344eb73cdff2c8d537416d7e583854c5bbd0de Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 15 May 2025 17:14:49 -0400 Subject: [PATCH 14/82] esirkepov WIP --- extern/Kokkos | 2 +- input.example.toml | 49 +- src/engines/srpic.hpp | 63 +- src/framework/parameters.cpp | 12 +- src/framework/tests/comm_nompi.cpp | 2 - src/kernels/currents_deposit.hpp | 2929 ++++++++++++++-------------- src/kernels/faraday_mink.hpp | 160 +- src/kernels/tests/deposit.cpp | 42 +- src/kernels/tests/faraday_mink.cpp | 9 +- 9 files changed, 1670 insertions(+), 1598 deletions(-) diff --git a/extern/Kokkos b/extern/Kokkos index 175257a5..1b1383c6 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 175257a51ff29a0059ec48bcd233ee096b2c0438 +Subproject commit 1b1383c6001f3bfe9fe309ca923c2d786600cc79 diff --git a/input.example.toml b/input.example.toml index 629d4dac..80ab3e59 100644 --- a/input.example.toml +++ b/input.example.toml @@ -196,15 +196,35 @@ # @default: 0 current_filters = "" - [algorithms.toggles] - # Toggle for the field solver: - # @type bool + [algorithms.deposit] + # Enable the current deposition + # @type: bool # @default: true - fieldsolver = "" - # Toggle for the current deposition: - # @type bool + enable = "" + # Order of the particle shape function + # @type: int + # @default: 1 + order = "" + + # @TODO: fix fieldsolver params below + [algorithms.fieldsolver] + # Enable the EM fieldsolver + # @type: bool # @default: true - deposit = "" + enable = "" + # Yee - all 0.0 - default + # 1D + deltax = -0.065 + # 2D + deltay = -0.065 + betaxy = -0.065 + betayx = -0.065 + # 3D - not yet tested + deltaz = 0.0 + betaxz = 0.0 + betazx = 0.0 + betayz = 0.0 + betazy = 0.0 [algorithms.timestep] # Courant-Friedrichs-Lewy number: @@ -249,21 +269,6 @@ # @type: float: > 0 gamma_rad = "" - [algorithms.fieldsolver] - # Yee - all 0.0 - default - # 1D - deltax = -0.065 - # 2D - deltay = -0.065 - betaxy = -0.065 - betayx = -0.065 - # 3D - not yet tested - deltaz = 0.0 - betaxz = 0.0 - betazx = 0.0 - betayz = 0.0 - betazy = 0.0 - [particles] # Fiducial number of particles per cell: # @required diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 62cddd8d..91c84f65 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -76,9 +76,9 @@ namespace ntt { void step_forward(timer::Timers& timers, domain_t& dom) override { const auto fieldsolver_enabled = m_params.template get( - "algorithms.toggles.fieldsolver"); + "algorithms.fieldsolver.enable"); const auto deposit_enabled = m_params.template get( - "algorithms.toggles.deposit"); + "algorithms.deposit.enable"); const auto clear_interval = m_params.template get( "particles.clear_interval"); @@ -203,7 +203,7 @@ namespace ntt { "algorithms.fieldsolver.betayz"); const auto betazy = m_params.template get( "algorithms.fieldsolver.betazy"); - real_t coeff1, coeff2; + real_t coeff1, coeff2; if constexpr (M::Dim == Dim::_2D) { coeff1 = dT / SQR(dx); coeff2 = dT; @@ -508,6 +508,7 @@ namespace ntt { void CurrentsDeposit(domain_t& domain) { auto scatter_cur = Kokkos::Experimental::create_scatter_view( domain.fields.cur); + auto shape_order = params.template get("algorithms.deposit.order"); for (auto& species : domain.species) { if ((species.pusher() == PrtlPusher::NONE) or (species.npart() == 0) or cmp::AlmostZero_host(species.charge())) { @@ -520,31 +521,37 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - Kokkos::parallel_for("CurrentsDeposit", - species.rangeActiveParticles(), - kernel::DepositCurrents_kernel( - scatter_cur, - species.i1, - species.i2, - species.i3, - species.i1_prev, - species.i2_prev, - species.i3_prev, - species.dx1, - species.dx2, - species.dx3, - species.dx1_prev, - species.dx2_prev, - species.dx3_prev, - species.ux1, - species.ux2, - species.ux3, - species.phi, - species.weight, - species.tag, - domain.mesh.metric, - (real_t)(species.charge()), - dt)); + if (shape_order == 1) { + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", + species.rangeActiveParticles(), + kernel::DepositCurrents_kernel( + scatter_cur, + species.i1, species.i2, species.i3, + species.i1_prev, species.i2_prev, species.i3_prev, + species.dx1, species.dx2, species.dx3, + species.dx1_prev, species.dx2_prev, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.phi, species.weight, species.tag, + domain.mesh.metric, (real_t)(species.charge()), dt)); + // clang-format on + } else if (shape_order == 2) { + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", + species.rangeActiveParticles(), + kernel::DepositCurrents_kernel( + scatter_cur, + species.i1, species.i2, species.i3, + species.i1_prev, species.i2_prev, species.i3_prev, + species.dx1, species.dx2, species.dx3, + species.dx1_prev, species.dx2_prev, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.phi, species.weight, species.tag, + domain.mesh.metric, (real_t)(species.charge()), dt)); + // clang-format on + } else { + raise::Error("Invalid shape order for current deposition", HERE); + } } Kokkos::Experimental::contribute(domain.fields.cur, scatter_cur); } diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 6ec7f271..77ea029f 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -409,13 +409,15 @@ namespace ntt { "current_filters", defaults::current_filters)); - /* [algorithms.toggles] ------------------------------------------------- */ - set("algorithms.toggles.fieldsolver", - toml::find_or(toml_data, "algorithms", "toggles", "fieldsolver", true)); - set("algorithms.toggles.deposit", - toml::find_or(toml_data, "algorithms", "toggles", "deposit", true)); + /* [algorithms.deposit] ------------------------------------------------- */ + set("algorithms.deposit.enable", + toml::find_or(toml_data, "algorithms", "deposit", "enable", true)); + set("algorithms.deposit.order", + toml::find_or(toml_data, "algorithms", "deposit", "order", 1)); /* [algorithms.fieldsolver] --------------------------------------------- */ + set("algorithms.fieldsolver.enable", + toml::find_or(toml_data, "algorithms", "fieldsolver", "enable", true)); set("algorithms.fieldsolver.deltax", toml::find_or(toml_data, "algorithms", diff --git a/src/framework/tests/comm_nompi.cpp b/src/framework/tests/comm_nompi.cpp index f9581c1e..c7646ef0 100644 --- a/src/framework/tests/comm_nompi.cpp +++ b/src/framework/tests/comm_nompi.cpp @@ -7,8 +7,6 @@ #include "arch/kokkos_aliases.h" #include "utils/numeric.h" -#include "framework/domain/comm_mpi.hpp" - #include #include diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 789653ee..5ef52bba 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -26,7 +26,7 @@ namespace kernel { /** * @brief Algorithm for the current deposition */ - template + template class DepositCurrents_kernel { static_assert(M::is_metric, "M must be a metric class"); static constexpr auto D = M::Dim; @@ -42,23 +42,20 @@ namespace kernel { const array_t tag; const M metric; const real_t charge, inv_dt; - const int interpolation_order; - - private: - Inline void shape_function(real_t* S0_0, - real_t* S0_1, - real_t* S0_2, - real_t* S0_3, - real_t* S1_0, - real_t* S1_1, - real_t* S1_2, - real_t* S1_3, - int* i_min, - const index_t i, - const real_t dx, - const index_t i_prev, - const real_t dx_prev) { + Inline void shape_function_2nd(real_t& S0_0, + real_t& S0_1, + real_t& S0_2, + real_t& S0_3, + real_t& S1_0, + real_t& S1_1, + real_t& S1_2, + real_t& S1_3, + ncells_t& i_min, + const index_t& i, + const real_t& dx, + const index_t& i_prev, + const real_t& dx_prev) const { /* Shape function per particle is a 4 element array. We need to find which indices are contributing to the shape function @@ -79,7 +76,16 @@ namespace kernel { */ // find shift in indices - const auto shift_x { i_prev - i - (dx_prev - dx) }; + const auto dx_less_half = static_cast(dx < static_cast(0.5)); + const auto dx_prev_less_half = static_cast( + dx_prev < static_cast(0.5)); + const auto shift_x { (i - i_prev) - (dx_less_half - dx_prev_less_half) }; + + const real_t dx_prev_diff = static_cast(dx_prev) + + static_cast( + dx_prev < static_cast(0.5)); + const real_t dx_diff = static_cast(dx) + + static_cast(dx < static_cast(0.5)); // find indices and define shape function if (shift_x > 0) { @@ -89,17 +95,17 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - i_min = i_prev - 2 + N_GHOSTS; - // shape function, ToDo: fix - S0_0 = HALF * SQR(HALF + dx_prev); - S0_1 = static_cast(0.75) - SQR(dx_prev); - S0_2 = HALF * SQR(HALF - dx_prev); - S0_3 = ZERO; + i_min = i_prev - dx_prev_less_half + N_GHOSTS; + + S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); + S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); + S0_2 = HALF * SQR(HALF - dx_prev_diff); + S0_3 = ZERO; S1_0 = ZERO; - S1_1 = HALF * SQR(HALF + dx); - S1_2 = static_cast(0.75) - SQR(dx); - S1_3 = HALF * SQR(HALF - dx); + S1_1 = HALF * SQR(static_cast(1.5) - dx_diff); + S1_2 = static_cast(0.75) - SQR(ONE - dx_diff); + S1_3 = HALF * SQR(HALF - dx_diff); } else if (shift_x < 0) { /* (-1) 0 1 2 3 @@ -107,16 +113,16 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - i_min = i - 2 + N_GHOSTS; - // shape function, ToDo: fix - S0_0 = ZERO; - S0_1 = HALF * SQR(HALF + dx_prev); - S0_2 = static_cast(0.75) - SQR(dx_prev); - S0_3 = HALF * SQR(HALF - dx_prev); - - S1_0 = HALF * SQR(HALF + dx); - S1_1 = static_cast(0.75) - SQR(dx); - S1_2 = HALF * SQR(HALF - dx); + i_min = i - dx_less_half + N_GHOSTS; + + S0_0 = ZERO; + S0_1 = HALF * SQR(static_cast(1.5) - dx_prev_diff); + S0_2 = static_cast(0.75) - SQR(ONE - dx_prev_diff); + S0_3 = HALF * SQR(HALF - dx_prev_diff); + + S1_0 = HALF * SQR(static_cast(1.5) - dx_diff); + S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); + S1_2 = HALF * SQR(HALF - dx_diff); S1_3 = ZERO; } else { /* @@ -125,16 +131,16 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - i_min = i - 2 + N_GHOSTS; - // shape function, ToDo: fix - S0_0 = HALF * SQR(HALF + dx_prev); - S0_1 = static_cast(0.75) - SQR(dx_prev); - S0_2 = HALF * SQR(HALF - dx_prev); - S0_3 = ZERO; - - S1_0 = HALF * SQR(HALF + dx); - S1_1 = static_cast(0.75) - SQR(dx); - S1_2 = HALF * SQR(HALF - dx); + i_min = i - dx_less_half + N_GHOSTS; + + S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); + S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); + S0_2 = HALF * SQR(HALF - dx_prev_diff); + S0_3 = ZERO; + + S1_0 = HALF * SQR(static_cast(1.5) - dx_diff); + S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); + S1_2 = HALF * SQR(HALF - dx_diff); S1_3 = ZERO; } } @@ -186,7 +192,12 @@ namespace kernel { , tag { tag } , metric { metric } , charge { charge } - , inv_dt { ONE / dt } {} + , inv_dt { ONE / dt } { + raise::ErrorIf( + (O == 2u and N_GHOSTS < 2), + "Order of interpolation is 2, but number of ghost cells is < 2", + HERE); + } /** * @brief Iteration of the loop over particles. @@ -240,1438 +251,1452 @@ namespace kernel { const real_t coeff { weight(p) * charge }; // ToDo: interpolation_order as parameter -#if (SHAPE_FUNCTION_ORDER == 1) - /* - Zig-zag deposit - */ + if constexpr (O == 1u) { + /* + Zig-zag deposit + */ - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; - - const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + - static_cast(i1(p) > i1_prev(p))) }; - const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + - static_cast( - static_cast(i1(p) > i1_prev(p)) + - i1_prev(p) - i1(p))) }; - const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + dxp_r_1 - - dx1_prev(p)) * - coeff * inv_dt }; - const real_t Fx1_2 { (static_cast( - i1(p) - i1_prev(p) - - static_cast(i1(p) > i1_prev(p))) + - dx1(p) - dxp_r_1) * - coeff * inv_dt }; - - auto J_acc = J.access(); - - // tuple_t dxp_r; - if constexpr (D == Dim::_1D) { - const real_t Fx2_1 { HALF * vp[1] * coeff }; - const real_t Fx2_2 { HALF * vp[1] * coeff }; - - const real_t Fx3_1 { HALF * vp[2] * coeff }; - const real_t Fx3_2 { HALF * vp[2] * coeff }; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; - } else if constexpr (D == Dim::_2D || D == Dim::_3D) { - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; - const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + - static_cast(i2(p) > i2_prev(p))) }; - const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + + const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + + static_cast(i1(p) > i1_prev(p))) }; + const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + static_cast( - static_cast(i2(p) > i2_prev(p)) + - i2_prev(p) - i2(p))) }; - const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + - dxp_r_2 - dx2_prev(p)) * + static_cast(i1(p) > i1_prev(p)) + + i1_prev(p) - i1(p))) }; + const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + + dxp_r_1 - dx1_prev(p)) * coeff * inv_dt }; - const real_t Fx2_2 { (static_cast( - i2(p) - i2_prev(p) - - static_cast(i2(p) > i2_prev(p))) + - dx2(p) - dxp_r_2) * + const real_t Fx1_2 { (static_cast( + i1(p) - i1_prev(p) - + static_cast(i1(p) > i1_prev(p))) + + dx1(p) - dxp_r_1) * coeff * inv_dt }; - if constexpr (D == Dim::_2D) { + auto J_acc = J.access(); + + // tuple_t dxp_r; + if constexpr (D == Dim::_1D) { + const real_t Fx2_1 { HALF * vp[1] * coeff }; + const real_t Fx2_2 { HALF * vp[1] * coeff }; + const real_t Fx3_1 { HALF * vp[2] * coeff }; const real_t Fx3_2 { HALF * vp[2] * coeff }; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * - (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx3) += Fx3_2 * - (ONE - Wx1_2) * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * - Wx1_2 * - Wx2_2; - } else { - const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * - (dx3(p) + dx3_prev(p)) * + J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; + } else if constexpr (D == Dim::_2D || D == Dim::_3D) { + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * static_cast(INV_2) }; - const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + - static_cast(i3(p) > i3_prev(p))) }; - const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + + const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + + static_cast(i2(p) > i2_prev(p))) }; + const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + static_cast( - static_cast(i3(p) > i3_prev(p)) + - i3_prev(p) - i3(p))) }; - const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + - dxp_r_3 - dx3_prev(p)) * + static_cast(i2(p) > i2_prev(p)) + + i2_prev(p) - i2(p))) }; + const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + + dxp_r_2 - dx2_prev(p)) * coeff * inv_dt }; - const real_t Fx3_2 { (static_cast( - i3(p) - i3_prev(p) - - static_cast(i3(p) > i3_prev(p))) + - dx3(p) - dxp_r_3) * + const real_t Fx2_2 { (static_cast( + i2(p) - i2_prev(p) - + static_cast(i2(p) > i2_prev(p))) + + dx2(p) - dxp_r_2) * coeff * inv_dt }; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + if constexpr (D == Dim::_2D) { + const real_t Fx3_1 { HALF * vp[2] * coeff }; + const real_t Fx3_2 { HALF * vp[2] * coeff }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * + (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + } else { + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * + static_cast(INV_2) }; + const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + + static_cast(i3(p) > i3_prev(p))) }; + const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + static_cast( + static_cast(i3(p) > i3_prev(p)) + + i3_prev(p) - i3(p))) }; + const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + + dxp_r_3 - dx3_prev(p)) * + coeff * inv_dt }; + const real_t Fx3_2 { (static_cast( + i3(p) - i3_prev(p) - + static_cast(i3(p) > i3_prev(p))) + + dx3(p) - dxp_r_3) * + coeff * inv_dt }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + } } - } -#else // SHAPE_FUNCTION_ORDER - /* - Higher order charge conserving current deposition based on - Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - - We need to define the follwowing variable: - - Shape functions in spatial directions for the particle position - before and after the current timestep. - S0_*, S1_* - - Density composition matrix - Wx_*, Wy_*, Wz_* - */ - - /* - x - direction - */ - - // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3; - // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3; - // indices of the shape function - ncells_t ix_min; - // find indices and define shape function - shape_function(&S0x_0, - &S0x_1, - &S0x_2, - &S0x_3, - &S1x_0, - &S1x_1, - &S1x_2, - &S1x_3, - &ix_min, - i1(p), - dx1(p), - i1_prev(p), - dx1_prev(p)); - - if constexpr (D == Dim::_1D) { - // ToDo - } else if constexpr (D == Dim::_2D) { - + } else if constexpr (O == 2u) { /* - y - direction + Higher order charge conserving current deposition based on + Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + + We need to define the follwowing variable: + - Shape functions in spatial directions for the particle position + before and after the current timestep. + S0_*, S1_* + - Density composition matrix + Wx_*, Wy_*, Wz_* */ - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - ncells_t iy_min; - // find indices and define shape function - shape_function(&S0y_0, - &S0y_1, - &S0y_2, - &S0y_3, - &S1y_0, - &S1y_1, - &S1y_2, - &S1y_3, - &iy_min, - i2(p), - dx2(p), - i2_prev(p), - dx2_prev(p)); - - // ToDo: check if this is what I need - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * - static_cast(INV_2) }; - - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * - static_cast(INV_2) }; - - // ToDo: actual J update - auto J_acc = J.access(); - - // Esirkepov 2001, Eq. 39 - /* - x - component - */ - // Calculate weight function - unrolled - const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); - const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); - const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); - const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); - - const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); - const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); - const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); - const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); - - const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); - const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); - const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); - const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - - const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); - const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); - const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); - const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - - const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - - J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; - - /* - y - component - */ - // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); - - const real_t Qdydt = coeff * inv_dt * dyp_r_1; - - J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; - - /* - z - component, simulated direction - */ - // Unrolled calculations for Wz - const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + - S0y_0 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + - S0y_1 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + - S0y_2 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + - S0y_3 * (HALF * S1x_0 + S0x_0)); - - const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + - S0y_0 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + - S0y_1 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + - S0y_2 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + - S0y_3 * (HALF * S1x_1 + S0x_1)); - - const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + - S0y_0 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + - S0y_1 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + - S0y_2 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + - S0y_3 * (HALF * S1x_2 + S0x_2)); - - const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + - S0y_0 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + - S0y_1 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + - S0y_2 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + - S0y_3 * (HALF * S1x_3 + S0x_3)); - - const real_t QVz = vp[2] * coeff; - - J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; - - } else if constexpr (D == Dim::_3D) { - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - uint iy_min; - // find indices and define shape function - shape_function(&S0y_0, - &S0y_1, - &S0y_2, - &S0y_3, - &S1y_0, - &S1y_1, - &S1y_2, - &S1y_3, - &iy_min, - i2(p), - dx2(p), - i2_prev(p), - dx2_prev(p)); - /* - z - direction + x - direction */ // shape function at previous timestep - real_t S0z_0, S0z_1, S0z_2, S0z_3; + real_t S0x_0, S0x_1, S0x_2, S0x_3; // shape function at current timestep - real_t S1z_0, S1z_1, S1z_2, S1z_3; + real_t S1x_0, S1x_1, S1x_2, S1x_3; // indices of the shape function - uint iz_min; + ncells_t ix_min; // find indices and define shape function - shape_function(&S0z_0, - &S0z_1, - &S0z_2, - &S0z_3, - &S1z_0, - &S1z_1, - &S1z_2, - &S1z_3, - &iz_min, - i3(p), - dx3(p), - i3_prev(p), - dx3_prev(p)); - - // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - for (int k = 0; k < interp_order + 2; ++k) { - // Esirkepov 2001, Eq. 31 - Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - - Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - - Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - } - } - } - - // Unrolled calculations for Wx, Wy, and Wz - const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - - J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; - J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; - // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; - // - J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; - // - J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; - // - // - J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; - // - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; - // - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; - // - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; - // - // - J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; - // - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; - // - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; - // - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; - // - // - J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; - J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; - // - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; - // - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; - // - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; - J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; + shape_function_2nd(S0x_0, + S0x_1, + S0x_2, + S0x_3, + S1x_0, + S1x_1, + S1x_2, + S1x_3, + ix_min, + i1(p), + dx1(p), + i1_prev(p), + dx1_prev(p)); - /* - y-component - */ - // i = 0 - const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const real_t Qdydt = coeff * inv_dt * dxp_r_2; - - J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; - J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; - // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; - // - J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; - // - J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; - // - // - J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; - // - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; - // - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; - // - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; - // - // - J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; - // - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; - // - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; - // - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; - // - // - J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; - J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; - // - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; - // - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; - // - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; - J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; + if constexpr (D == Dim::_1D) { + // ToDo + } else if constexpr (D == Dim::_2D) { - /* - z - component - */ - const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - - const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - - const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - - const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - - // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - - const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - - const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - - const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - - const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - - const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - - const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - - const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - - const real_t Qdzdt = coeff * inv_dt * dxp_r_3; - - J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; - J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; - // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; - // - J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; - // - J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; - // - // - J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; - // - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; - // - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; - // - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; - // - // - J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; - // - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; - // - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; - // - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; - // - // - J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; - J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; - // - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; - // - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; - // - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; - J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; - }; -#endif // SHAPE_FUNCTION_ORDER - } // namespace kernel + /* + y - direction + */ + + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + ncells_t iy_min; + // find indices and define shape function + shape_function_2nd(S0y_0, + S0y_1, + S0y_2, + S0y_3, + S1y_0, + S1y_1, + S1y_2, + S1y_3, + iy_min, + i2(p), + dx2(p), + i2_prev(p), + dx2_prev(p)); + + // Esirkepov 2001, Eq. 39 + /* + x - component + */ + // Calculate weight function - unrolled + const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); + const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); + const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); + const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); + + const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); + const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); + const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); + const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); + + const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); + const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); + const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); + const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); + + const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); + const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); + const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); + const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + + // Unrolled calculations for Wy + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + + // Unrolled calculations for Wz + const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + + S0y_0 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + + S0y_1 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + + S0y_2 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + + S0y_3 * (HALF * S1x_0 + S0x_0)); + + const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + + S0y_0 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + + S0y_1 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + + S0y_2 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + + S0y_3 * (HALF * S1x_1 + S0x_1)); + + const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + + S0y_0 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + + S0y_1 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + + S0y_2 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + + S0y_3 * (HALF * S1x_2 + S0x_2)); + + const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + + S0y_0 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + + S0y_1 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + + S0y_2 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + + S0y_3 * (HALF * S1x_3 + S0x_3)); + + const auto delta_x = static_cast(i1(p) == i1_prev(p)) * + static_cast(dx1(p) - dx1_prev(p)) + + static_cast(i1(p) == i1_prev(p) + 1) * + static_cast(dx1(p) + (1 - dx1_prev(p))) + + static_cast(i1(p) == i1_prev(p) - 1) * + static_cast((1 - dx1(p)) + dx1_prev(p)); + + const auto delta_y = static_cast(i2(p) == i2_prev(p)) * + static_cast(dx2(p) - dx2_prev(p)) + + static_cast(i2(p) == i2_prev(p) + 1) * + static_cast(dx2(p) + (1 - dx2_prev(p))) + + static_cast(i2(p) == i2_prev(p) - 1) * + static_cast((1 - dx2(p)) + dx2_prev(p)); + + const real_t Qdxdt = -coeff * inv_dt * delta_x; + const real_t Qdydt = -coeff * inv_dt * delta_y; + const real_t QVz = vp[2] * coeff; + + // @TODO + jx_local_0_0 = Qdxdt * Wx_0_0; + jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; + + auto J_acc = J.access(); + + J_acc(ix_min, iy_min, cur::jx1) += jx_local_0_0; + J_acc(ix_min + 1, iy_min, cur::jx1) += jx_local_1_0; + + // J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; + // J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + // J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + // J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + // + // J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + // J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; + // J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; + // J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + // + // J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + // J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; + // J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; + // J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; + // + // J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; + + /* + y - component + */ + J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + + /* + z - component, simulated direction + */ + J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + + } else if constexpr (D == Dim::_3D) { + // /* + // y - direction + // */ + // + // // shape function at previous timestep + // real_t S0y_0, S0y_1, S0y_2, S0y_3; + // // shape function at current timestep + // real_t S1y_0, S1y_1, S1y_2, S1y_3; + // // indices of the shape function + // uint iy_min; + // // find indices and define shape function + // shape_function_2nd(S0y_0, + // S0y_1, + // S0y_2, + // S0y_3, + // S1y_0, + // S1y_1, + // S1y_2, + // S1y_3, + // iy_min, + // i2(p), + // dx2(p), + // i2_prev(p), + // dx2_prev(p)); + // + // /* + // z - direction + // */ + // + // // shape function at previous timestep + // real_t S0z_0, S0z_1, S0z_2, S0z_3; + // // shape function at current timestep + // real_t S1z_0, S1z_1, S1z_2, S1z_3; + // // indices of the shape function + // uint iz_min; + // // find indices and define shape function + // shape_function_2nd(S0z_0, + // S0z_1, + // S0z_2, + // S0z_3, + // S1z_0, + // S1z_1, + // S1z_2, + // S1z_3, + // iz_min, + // i3(p), + // dx3(p), + // i3_prev(p), + // dx3_prev(p)); + // + // // Calculate weight function + // // for (int i = 0; i < interp_order + 2; ++i) { + // // for (int j = 0; j < interp_order + 2; ++j) { + // // for (int k = 0; k < interp_order + 2; ++k) { + // // // Esirkepov 2001, Eq. 31 + // // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + // // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + // // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + // // + // // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + // // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + // // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + // // + // // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + // // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + // // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + // // } + // // } + // // } + // // + // // Unrolled calculations for Wx, Wy, and Wz + // const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; + // + // /* + // y-component + // */ + // // i = 0 + // const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const real_t Qdydt = coeff * inv_dt * dxp_r_2; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; + // + // /* + // z - component + // */ + // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // + // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // + // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // + // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // + // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // + // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // + // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // + // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // + // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // + // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // + // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // + // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // + // const real_t Qdzdt = coeff * inv_dt * dxp_r_3; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; + } // dimension + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); + } + } + }; +} // namespace kernel #undef i_di_to_Xi diff --git a/src/kernels/faraday_mink.hpp b/src/kernels/faraday_mink.hpp index 1112e56e..d92057c7 100644 --- a/src/kernels/faraday_mink.hpp +++ b/src/kernels/faraday_mink.hpp @@ -14,6 +14,7 @@ #include "arch/kokkos_aliases.h" #include "utils/error.h" +#include "utils/numeric.h" namespace kernel::mink { using namespace ntt; @@ -42,13 +43,21 @@ namespace kernel::mink { * ! 2D: coeff1 = dt / dx^2, coeff2 = dt * ! 3D: coeff1 = dt / dx */ - Faraday_kernel(const ndfield_t& EB, real_t coeff1, real_t coeff2 - , real_t deltax, real_t deltay, real_t betaxy, real_t betayx - , real_t deltaz, real_t betaxz, real_t betazx, real_t betayz - , real_t betazy) + Faraday_kernel(const ndfield_t& EB, + real_t coeff1, + real_t coeff2, + real_t deltax = ZERO, + real_t deltay = ZERO, + real_t betaxy = ZERO, + real_t betayx = ZERO, + real_t deltaz = ZERO, + real_t betaxz = ZERO, + real_t betazx = ZERO, + real_t betayz = ZERO, + real_t betazy = ZERO) : EB { EB } , coeff1 { coeff1 } - , coeff2 { coeff2 } + , coeff2 { coeff2 } , deltax { deltax } , deltay { deltay } , betaxy { betaxy } @@ -59,17 +68,15 @@ namespace kernel::mink { , betayz { betayz } , betazy { betazy } {} - - Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { - const auto alphax = ONE - THREE * deltax; - EB(i1, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) - + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); - EB(i1, em::bx3) += coeff1 * ( - - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) - - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); + const auto alphax = ONE - THREE * deltax; + EB(i1, em::bx2) += coeff1 * + (+alphax * (EB(i1 + 1, em::ex3) - EB(i1, em::ex3)) + + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); + EB(i1, em::bx3) += coeff1 * + (-alphax * (EB(i1 + 1, em::ex2) - EB(i1, em::ex2)) - + deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 1D implementation called for D != 1"); } @@ -79,25 +86,28 @@ namespace kernel::mink { if constexpr (D == Dim::_2D) { const auto alphax = ONE - TWO * betaxy - THREE * deltax; const auto alphay = ONE - TWO * betayx - THREE * deltay; - EB(i1, i2, em::bx1) += coeff1 * ( - - alphay * (EB(i1 , i2 + 1, em::ex3) - EB(i1 , i2 , em::ex3)) - - deltay * (EB(i1 , i2 + 2, em::ex3) - EB(i1 , i2 - 1, em::ex3)) - - betayx * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1 + 1, i2 , em::ex3)) - - betayx * (EB(i1 - 1, i2 + 1, em::ex3) - EB(i1 - 1, i2 , em::ex3))); - EB(i1, i2, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, i2 , em::ex3) - EB(i1 , i2 , em::ex3)) - + deltax * (EB(i1 + 2, i2 , em::ex3) - EB(i1 - 1, i2 , em::ex3)) - + betaxy * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1 , i2 + 1, em::ex3)) - + betaxy * (EB(i1 + 1, i2 - 1, em::ex3) - EB(i1 , i2 - 1, em::ex3))); - EB(i1, i2, em::bx3) += coeff2 * ( - + alphay * (EB(i1 , i2 + 1, em::ex1) - EB(i1 , i2 , em::ex1)) - + deltay * (EB(i1 , i2 + 2, em::ex1) - EB(i1 , i2 - 1, em::ex1)) - + betayx * (EB(i1 + 1, i2 + 1, em::ex1) - EB(i1 + 1, i2 , em::ex1)) - + betayx * (EB(i1 - 1, i2 + 1, em::ex1) - EB(i1 - 1, i2 , em::ex1)) - - alphax * (EB(i1 + 1, i2 , em::ex2) - EB(i1 , i2 , em::ex2)) - - deltax * (EB(i1 + 2, i2 , em::ex2) - EB(i1 - 1, i2 , em::ex2)) - - betaxy * (EB(i1 + 1, i2 + 1, em::ex2) - EB(i1 , i2 + 1, em::ex2)) - - betaxy * (EB(i1 + 1, i2 - 1, em::ex2) - EB(i1 , i2 - 1, em::ex2))); + EB(i1, i2, em::bx1) += + coeff1 * + (-alphay * (EB(i1, i2 + 1, em::ex3) - EB(i1, i2, em::ex3)) - + deltay * (EB(i1, i2 + 2, em::ex3) - EB(i1, i2 - 1, em::ex3)) - + betayx * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1 + 1, i2, em::ex3)) - + betayx * (EB(i1 - 1, i2 + 1, em::ex3) - EB(i1 - 1, i2, em::ex3))); + EB(i1, i2, em::bx2) += + coeff1 * + (+alphax * (EB(i1 + 1, i2, em::ex3) - EB(i1, i2, em::ex3)) + + deltax * (EB(i1 + 2, i2, em::ex3) - EB(i1 - 1, i2, em::ex3)) + + betaxy * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1, i2 + 1, em::ex3)) + + betaxy * (EB(i1 + 1, i2 - 1, em::ex3) - EB(i1, i2 - 1, em::ex3))); + EB(i1, i2, em::bx3) += + coeff2 * + (+alphay * (EB(i1, i2 + 1, em::ex1) - EB(i1, i2, em::ex1)) + + deltay * (EB(i1, i2 + 2, em::ex1) - EB(i1, i2 - 1, em::ex1)) + + betayx * (EB(i1 + 1, i2 + 1, em::ex1) - EB(i1 + 1, i2, em::ex1)) + + betayx * (EB(i1 - 1, i2 + 1, em::ex1) - EB(i1 - 1, i2, em::ex1)) - + alphax * (EB(i1 + 1, i2, em::ex2) - EB(i1, i2, em::ex2)) - + deltax * (EB(i1 + 2, i2, em::ex2) - EB(i1 - 1, i2, em::ex2)) - + betaxy * (EB(i1 + 1, i2 + 1, em::ex2) - EB(i1, i2 + 1, em::ex2)) - + betaxy * (EB(i1 + 1, i2 - 1, em::ex2) - EB(i1, i2 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 2D implementation called for D != 2"); @@ -109,45 +119,51 @@ namespace kernel::mink { const auto alphax = ONE - TWO * betaxy - TWO * betaxz - THREE * deltax; const auto alphay = ONE - TWO * betayx - TWO * betayz - THREE * deltay; const auto alphaz = ONE - TWO * betazx - TWO * betazy - THREE * deltaz; - EB(i1, i2, i3, em::bx1) += coeff1 * ( - + alphaz * (EB(i1 , i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 , em::ex2)) - + deltaz * (EB(i1 , i2 , i3 + 2, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2)) - + betazx * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 + 1, i2 , i3 , em::ex2)) - + betazx * (EB(i1 - 1, i2 , i3 + 1, em::ex2) - EB(i1 - 1, i2 , i3 , em::ex2)) - + betazy * (EB(i1 , i2 + 1, i3 + 1, em::ex2) - EB(i1 , i2 + 1, i3 , em::ex2)) - + betazy * (EB(i1 , i2 - 1, i3 + 1, em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) - - alphay * (EB(i1 , i2 + 1, i3 , em::ex3) - EB(i1 , i2 , i3 , em::ex3)) - - deltay * (EB(i1 , i2 + 2, i3 , em::ex3) - EB(i1 , i2 - 1, i3 , em::ex3)) - - betayx * (EB(i1 + 1, i2 + 1, i3 , em::ex3) - EB(i1 + 1, i2 , i3 , em::ex3)) - - betayx * (EB(i1 - 1, i2 + 1, i3 , em::ex3) - EB(i1 - 1, i2 , i3 , em::ex3)) - - betayz * (EB(i1 , i2 + 1, i3 + 1, em::ex3) - EB(i1 , i2 , i3 + 1, em::ex3)) - - betayz * (EB(i1 , i2 + 1, i3 - 1, em::ex3) - EB(i1 , i2 , i3 - 1, em::ex3))); - EB(i1, i2, i3, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, i2 , i3 , em::ex3) - EB(i1 , i2 , i3 , em::ex3)) - + deltax * (EB(i1 + 2, i2 , i3 , em::ex3) - EB(i1 - 1, i2 , i3 , em::ex3)) - + betaxy * (EB(i1 + 1, i2 + 1, i3 , em::ex3) - EB(i1 , i2 + 1, i3 , em::ex3)) - + betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex3) - EB(i1 , i2 - 1, i3 , em::ex3)) - + betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex3) - EB(i1 , i2 , i3 + 1, em::ex3)) - + betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex3) - EB(i1 , i2 , i3 - 1, em::ex3)) - - alphaz * (EB(i1 , i2 , i3 + 1, em::ex1) - EB(i1 , i2 , i3 , em::ex1)) - - deltaz * (EB(i1 , i2 , i3 + 2, em::ex1) - EB(i1 , i2 , i3 - 1, em::ex1)) - - betazx * (EB(i1 + 1, i2 , i3 + 1, em::ex1) - EB(i1 + 1, i2 , i3 , em::ex1)) - - betazx * (EB(i1 - 1, i2 , i3 + 1, em::ex1) - EB(i1 - 1, i2 , i3 , em::ex1)) - - betazy * (EB(i1 , i2 + 1, i3 + 1, em::ex1) - EB(i1 , i2 + 1, i3 , em::ex1)) - - betazy * (EB(i1 , i2 - 1, i3 + 1, em::ex1) - EB(i1 , i2 - 1, i3 , em::ex1))); - EB(i1, i2, i3, em::bx3) += coeff1 * ( - + alphay * (EB(i1 , i2 + 1, i3 , em::ex1) - EB(i1 , i2 , i3 , em::ex1)) - + deltay * (EB(i1 , i2 + 2, i3 , em::ex1) - EB(i1 , i2 - 1, i3 , em::ex1)) - + betayx * (EB(i1 + 1, i2 + 1, i3 , em::ex1) - EB(i1 + 1, i2 , i3 , em::ex1)) - + betayx * (EB(i1 - 1, i2 + 1, i3 , em::ex1) - EB(i1 - 1, i2 , i3 , em::ex1)) - + betayz * (EB(i1 , i2 + 1, i3 + 1, em::ex1) - EB(i1 , i2 , i3 + 1, em::ex1)) - + betayz * (EB(i1 , i2 + 1, i3 - 1, em::ex1) - EB(i1 , i2 , i3 - 1, em::ex1)) - - alphax * (EB(i1 + 1, i2 , i3 , em::ex2) - EB(i1 , i2 , i3 , em::ex2)) - - deltax * (EB(i1 + 2, i2 , i3 , em::ex2) - EB(i1 - 1, i2 , i3 , em::ex2)) - - betaxy * (EB(i1 + 1, i2 + 1, i3 , em::ex2) - EB(i1 , i2 + 1, i3 , em::ex2)) - - betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) - - betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 + 1, em::ex2)) - - betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2))); + EB(i1, i2, i3, em::bx1) += + coeff1 * + (+alphaz * (EB(i1, i2, i3 + 1, em::ex2) - EB(i1, i2, i3, em::ex2)) + + deltaz * (EB(i1, i2, i3 + 2, em::ex2) - EB(i1, i2, i3 - 1, em::ex2)) + + betazx * (EB(i1 + 1, i2, i3 + 1, em::ex2) - EB(i1 + 1, i2, i3, em::ex2)) + + betazx * (EB(i1 - 1, i2, i3 + 1, em::ex2) - EB(i1 - 1, i2, i3, em::ex2)) + + betazy * (EB(i1, i2 + 1, i3 + 1, em::ex2) - EB(i1, i2 + 1, i3, em::ex2)) + + betazy * (EB(i1, i2 - 1, i3 + 1, em::ex2) - EB(i1, i2 - 1, i3, em::ex2)) - + alphay * (EB(i1, i2 + 1, i3, em::ex3) - EB(i1, i2, i3, em::ex3)) - + deltay * (EB(i1, i2 + 2, i3, em::ex3) - EB(i1, i2 - 1, i3, em::ex3)) - + betayx * (EB(i1 + 1, i2 + 1, i3, em::ex3) - EB(i1 + 1, i2, i3, em::ex3)) - + betayx * (EB(i1 - 1, i2 + 1, i3, em::ex3) - EB(i1 - 1, i2, i3, em::ex3)) - + betayz * (EB(i1, i2 + 1, i3 + 1, em::ex3) - EB(i1, i2, i3 + 1, em::ex3)) - + betayz * + (EB(i1, i2 + 1, i3 - 1, em::ex3) - EB(i1, i2, i3 - 1, em::ex3))); + EB(i1, i2, i3, em::bx2) += + coeff1 * + (+alphax * (EB(i1 + 1, i2, i3, em::ex3) - EB(i1, i2, i3, em::ex3)) + + deltax * (EB(i1 + 2, i2, i3, em::ex3) - EB(i1 - 1, i2, i3, em::ex3)) + + betaxy * (EB(i1 + 1, i2 + 1, i3, em::ex3) - EB(i1, i2 + 1, i3, em::ex3)) + + betaxy * (EB(i1 + 1, i2 - 1, i3, em::ex3) - EB(i1, i2 - 1, i3, em::ex3)) + + betaxz * (EB(i1 + 1, i2, i3 + 1, em::ex3) - EB(i1, i2, i3 + 1, em::ex3)) + + betaxz * (EB(i1 + 1, i2, i3 - 1, em::ex3) - EB(i1, i2, i3 - 1, em::ex3)) - + alphaz * (EB(i1, i2, i3 + 1, em::ex1) - EB(i1, i2, i3, em::ex1)) - + deltaz * (EB(i1, i2, i3 + 2, em::ex1) - EB(i1, i2, i3 - 1, em::ex1)) - + betazx * (EB(i1 + 1, i2, i3 + 1, em::ex1) - EB(i1 + 1, i2, i3, em::ex1)) - + betazx * (EB(i1 - 1, i2, i3 + 1, em::ex1) - EB(i1 - 1, i2, i3, em::ex1)) - + betazy * (EB(i1, i2 + 1, i3 + 1, em::ex1) - EB(i1, i2 + 1, i3, em::ex1)) - + betazy * + (EB(i1, i2 - 1, i3 + 1, em::ex1) - EB(i1, i2 - 1, i3, em::ex1))); + EB(i1, i2, i3, em::bx3) += + coeff1 * + (+alphay * (EB(i1, i2 + 1, i3, em::ex1) - EB(i1, i2, i3, em::ex1)) + + deltay * (EB(i1, i2 + 2, i3, em::ex1) - EB(i1, i2 - 1, i3, em::ex1)) + + betayx * (EB(i1 + 1, i2 + 1, i3, em::ex1) - EB(i1 + 1, i2, i3, em::ex1)) + + betayx * (EB(i1 - 1, i2 + 1, i3, em::ex1) - EB(i1 - 1, i2, i3, em::ex1)) + + betayz * (EB(i1, i2 + 1, i3 + 1, em::ex1) - EB(i1, i2, i3 + 1, em::ex1)) + + betayz * (EB(i1, i2 + 1, i3 - 1, em::ex1) - EB(i1, i2, i3 - 1, em::ex1)) - + alphax * (EB(i1 + 1, i2, i3, em::ex2) - EB(i1, i2, i3, em::ex2)) - + deltax * (EB(i1 + 2, i2, i3, em::ex2) - EB(i1 - 1, i2, i3, em::ex2)) - + betaxy * (EB(i1 + 1, i2 + 1, i3, em::ex2) - EB(i1, i2 + 1, i3, em::ex2)) - + betaxy * (EB(i1 + 1, i2 - 1, i3, em::ex2) - EB(i1, i2 - 1, i3, em::ex2)) - + betaxz * (EB(i1 + 1, i2, i3 + 1, em::ex2) - EB(i1, i2, i3 + 1, em::ex2)) - + betaxz * + (EB(i1 + 1, i2, i3 - 1, em::ex2) - EB(i1, i2, i3 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 3D implementation called for D != 3"); } diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index e6967eb1..d64e4bb2 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -124,7 +124,7 @@ void testDeposit(const std::vector& res, // clang-format off Kokkos::parallel_for("CurrentsDeposit", 10, - kernel::DepositCurrents_kernel(J_scat, + kernel::DepositCurrents_kernel(J_scat, i1, i2, i3, i1_prev, i2_prev, i3_prev, dx1, dx2, dx3, @@ -136,31 +136,49 @@ void testDeposit(const std::vector& res, Kokkos::Experimental::contribute(J, J_scat); - real_t SumDivJ { 0.0 }; + const auto range = Kokkos::MDRangePolicy>( + { N_GHOSTS, N_GHOSTS }, + { nx1 + N_GHOSTS, nx2 + N_GHOSTS }); + + real_t SumDivJ = ZERO, SumJx = ZERO, SumJy = ZERO; Kokkos::parallel_reduce( "SumDivJ", - Kokkos::MDRangePolicy>({ N_GHOSTS, N_GHOSTS }, - { nx1 + N_GHOSTS, nx2 + N_GHOSTS }), + range, Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1) - J(i - 1, j, cur::jx1) + J(i, j, cur::jx2) - J(i, j - 1, cur::jx2); }, SumDivJ); + Kokkos::parallel_reduce( + "SumJx", + range, + Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1); }, + SumJx); + + Kokkos::parallel_reduce( + "SumJy", + range, + Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); }, + SumJy); + auto J_h = Kokkos::create_mirror_view(J); Kokkos::deep_copy(J_h, J); if (not cmp::AlmostZero(SumDivJ)) { throw std::logic_error("DepositCurrents_kernel::SumDivJ != 0"); } - errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx1), Jx1, "", acc), - "DepositCurrents_kernel::Jx1 is incorrect"); - errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + 1 + N_GHOSTS, cur::jx1), Jx2, "", acc), - "DepositCurrents_kernel::Jx2 is incorrect"); - errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), Jy1, "", acc), - "DepositCurrents_kernel::Jy1 is incorrect"); - errorIf(not equal(J_h(i0 + 1 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), Jy2, "", acc), - "DepositCurrents_kernel::Jy2 is incorrect"); + + std::cout << "SumJx: " << SumJx << " expected " << Jx1 + Jx2 << std::endl; + std::cout << "SumJy: " << SumJy << " expected " << Jy1 + Jy2 << std::endl; + // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx1), Jx1, "", acc), + // "DepositCurrents_kernel::Jx1 is incorrect"); + // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + 1 + N_GHOSTS, cur::jx1), Jx2, "", acc), + // "DepositCurrents_kernel::Jx2 is incorrect"); + // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), Jy1, "", acc), + // "DepositCurrents_kernel::Jy1 is incorrect"); + // errorIf(not equal(J_h(i0 + 1 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), Jy2, "", acc), + // "DepositCurrents_kernel::Jy2 is incorrect"); } auto main(int argc, char* argv[]) -> int { diff --git a/src/kernels/tests/faraday_mink.cpp b/src/kernels/tests/faraday_mink.cpp index 74c2b9b1..7394d9c0 100644 --- a/src/kernels/tests/faraday_mink.cpp +++ b/src/kernels/tests/faraday_mink.cpp @@ -4,6 +4,7 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/numeric.h" #include "metrics/minkowski.h" @@ -108,7 +109,7 @@ void testFaraday(const std::vector& res) { const real_t sx = constant::TWO_PI, sy = 4.0 * constant::PI; const auto metric = Minkowski { res, - {{ ZERO, sx }, { ZERO, sy }} + { { ZERO, sx }, { ZERO, sy } } }; auto emfield = ndfield_t { "emfield", res[0] + 2 * N_GHOSTS, @@ -116,7 +117,7 @@ void testFaraday(const std::vector& res) { const std::size_t i1min = N_GHOSTS, i1max = res[0] + N_GHOSTS; const std::size_t i2min = N_GHOSTS, i2max = res[1] + N_GHOSTS; const auto range = CreateRangePolicy({ i1min, i2min }, - { i1max, i2max }); + { i1max, i2max }); const auto range_ext = CreateRangePolicy( { 0, 0 }, { res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS }); @@ -212,7 +213,7 @@ void testFaraday(const std::vector& res) { sz = constant::TWO_PI; const auto metric = Minkowski { res, - {{ ZERO, sx }, { ZERO, sy }, { ZERO, sz }} + { { ZERO, sx }, { ZERO, sy }, { ZERO, sz } } }; auto emfield = ndfield_t { "emfield", res[0] + 2 * N_GHOSTS, @@ -222,7 +223,7 @@ void testFaraday(const std::vector& res) { const std::size_t i2min = N_GHOSTS, i2max = res[1] + N_GHOSTS; const std::size_t i3min = N_GHOSTS, i3max = res[2] + N_GHOSTS; const auto range = CreateRangePolicy({ i1min, i2min, i3min }, - { i1max, i2max, i3max }); + { i1max, i2max, i3max }); const auto range_ext = CreateRangePolicy( { 0, 0, 0 }, { res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS, res[2] + 2 * N_GHOSTS }); From 1e437bab05c042dfab0082eec0e15fb18e0d2c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 15 May 2025 16:42:02 -0500 Subject: [PATCH 15/82] added missing recursive J update --- src/kernels/currents_deposit.hpp | 601 +++++-------------------------- 1 file changed, 88 insertions(+), 513 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 5ef52bba..761ae8ab 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -654,59 +654,101 @@ namespace kernel { const real_t Qdxdt = -coeff * inv_dt * delta_x; const real_t Qdydt = -coeff * inv_dt * delta_y; - const real_t QVz = vp[2] * coeff; - - // @TODO - jx_local_0_0 = Qdxdt * Wx_0_0; - jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; + const real_t QVz = -coeff * vp[2]; + + // Esirkepov - Eq. 32 + // x-component + const auto jx_local_0_0 = -Qdxdt * Wx_0_0; + const auto jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; + const auto jx_local_2_0 = jx_local_1_0 - Qdxdt * Wx_2_0; + const auto jx_local_3_0 = jx_local_2_0 - Qdxdt * Wx_3_0; + + const auto jx_local_0_1 = -Qdxdt * Wx_0_1; + const auto jx_local_1_1 = jx_local_0_1 - Qdxdt * Wx_1_1; + const auto jx_local_2_1 = jx_local_1_1 - Qdxdt * Wx_2_1; + const auto jx_local_3_1 = jx_local_2_1 - Qdxdt * Wx_3_1; + + const auto jx_local_0_2 = -Qdxdt * Wx_0_2; + const auto jx_local_1_2 = jx_local_0_2 - Qdxdt * Wx_1_2; + const auto jx_local_2_2 = jx_local_1_2 - Qdxdt * Wx_2_2; + const auto jx_local_3_2 = jx_local_2_2 - Qdxdt * Wx_3_2; + + const auto jx_local_0_3 = -Qdxdt * Wx_0_3; + const auto jx_local_1_3 = jx_local_0_3 - Qdxdt * Wx_1_3; + const auto jx_local_2_3 = jx_local_1_3 - Qdxdt * Wx_2_3; + const auto jx_local_3_3 = jx_local_2_3 - Qdxdt * Wx_3_3; + + // y-component + const auto jy_local_0_0 = -Qdydt * Wy_0_0; + const auto jy_local_1_0 = jy_local_0_0 - Qdydt * Wy_1_0; + const auto jy_local_2_0 = jy_local_1_0 - Qdydt * Wy_2_0; + const auto jy_local_3_0 = jy_local_2_0 - Qdydt * Wy_3_0; + + const auto jy_local_0_1 = -Qdydt * Wy_0_1; + const auto jy_local_1_1 = jy_local_0_1 - Qdydt * Wy_1_1; + const auto jy_local_2_1 = jy_local_1_1 - Qdydt * Wy_2_1; + const auto jy_local_3_1 = jy_local_2_1 - Qdydt * Wy_3_1; + + const auto jy_local_0_2 = -Qdydt * Wy_0_2; + const auto jy_local_1_2 = jy_local_0_2 - Qdydt * Wy_1_2; + const auto jy_local_2_2 = jy_local_1_2 - Qdydt * Wy_2_2; + const auto jy_local_3_2 = jy_local_2_2 - Qdydt * Wy_3_2; + + const auto jy_local_0_3 = -Qdydt * Wy_0_3; + const auto jy_local_1_3 = jy_local_0_3 - Qdydt * Wy_1_3; + const auto jy_local_2_3 = jy_local_1_3 - Qdydt * Wy_2_3; + const auto jy_local_3_3 = jy_local_2_3 - Qdydt * Wy_3_3; + /* + Current update + */ auto J_acc = J.access(); + /* + x - component + */ J_acc(ix_min, iy_min, cur::jx1) += jx_local_0_0; - J_acc(ix_min + 1, iy_min, cur::jx1) += jx_local_1_0; - - // J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - // J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; - // J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; - // J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; - // - // J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; - // J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - // J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - // J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; - // - // J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; - // J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - // J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - // J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; - // - // J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; + J_acc(ix_min, iy_min + 1, cur::jx1) += jx_local_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += jx_local_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += jx_local_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += jx_local_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_local_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_local_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_local_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += jx_local_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_local_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_local_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_local_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx1) += jx_local_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += jx_local_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += jx_local_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx1) += jx_local_3_3; /* y - component */ - J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + J_acc(ix_min, iy_min, cur::jx2) += jy_local_0_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += jy_local_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += jy_local_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += jy_local_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx2) += jy_local_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_local_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_local_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += jy_local_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += jy_local_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_local_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_local_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += jy_local_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += jy_local_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_local_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_local_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += jy_local_3_3; /* z - component, simulated direction @@ -1233,471 +1275,4 @@ namespace kernel { // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const real_t Qdydt = coeff * inv_dt * dxp_r_2; - // - // J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; - // - // /* - // z - component - // */ - // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // - // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // - // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // - // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // - // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // - // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // - // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // - // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // - // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // - // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // - // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // - // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // - // const real_t Qdzdt = coeff * inv_dt * dxp_r_3; - // - // J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; - } // dimension - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); - } - } - }; -} // namespace kernel - -#undef i_di_to_Xi - -#endif // KERNELS_CURRENTS_DEPOSIT_HPP + \ No newline at end of file From 51a4f69d0cfdec84d73332a43ae0dbae4c42e38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 14 May 2025 18:38:41 -0500 Subject: [PATCH 16/82] fix comment --- src/kernels/currents_deposit.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 761ae8ab..bccd8bac 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -61,8 +61,8 @@ namespace kernel { We need to find which indices are contributing to the shape function For this we first compute the indices of the particle position - Let x be the particle position at the current timestep - Let * be the particle position at the previous timestep + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep (-1) 0 1 2 3 From cae1eef0be99c0dd8ab439aca9770eac26a32a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 15 May 2025 19:07:55 -0500 Subject: [PATCH 17/82] fix accidental deletion of commented lines --- src/kernels/currents_deposit.hpp | 469 ++++++++++++++++++++++++++++++- 1 file changed, 468 insertions(+), 1 deletion(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index bccd8bac..5f2a6e05 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -1275,4 +1275,471 @@ namespace kernel { // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - \ No newline at end of file + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const real_t Qdydt = coeff * inv_dt * dxp_r_2; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; + // + // /* + // z - component + // */ + // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // + // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // + // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // + // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // + // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // + // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // + // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // + // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // + // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // + // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // + // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // + // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // + // const real_t Qdzdt = coeff * inv_dt * dxp_r_3; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; + } // dimension + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); + } + } + }; +} // namespace kernel + +#undef i_di_to_Xi + +#endif // KERNELS_CURRENTS_DEPOSIT_HPP \ No newline at end of file From b87500e842528335d5b5e48636d46681fbaf7460 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 16 May 2025 10:54:57 -0500 Subject: [PATCH 18/82] fix in y current deposit --- src/kernels/currents_deposit.hpp | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 5f2a6e05..0ea46774 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -124,7 +124,7 @@ namespace kernel { S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); S1_2 = HALF * SQR(HALF - dx_diff); S1_3 = ZERO; - } else { + } else if (shift_x == 0) { /* (-1) 0 1 2 3 ___________________________________ @@ -142,6 +142,8 @@ namespace kernel { S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); S1_2 = HALF * SQR(HALF - dx_diff); S1_3 = ZERO; + } else { + raise::Error("Invalid shift in indices", HERE); } } @@ -652,11 +654,11 @@ namespace kernel { static_cast(i2(p) == i2_prev(p) - 1) * static_cast((1 - dx2(p)) + dx2_prev(p)); - const real_t Qdxdt = -coeff * inv_dt * delta_x; - const real_t Qdydt = -coeff * inv_dt * delta_y; - const real_t QVz = -coeff * vp[2]; + const real_t Qdxdt = coeff * inv_dt * delta_x; + const real_t Qdydt = coeff * inv_dt * delta_y; + const real_t QVz = coeff * vp[2]; - // Esirkepov - Eq. 32 + // Esirkepov - Eq. 39 // x-component const auto jx_local_0_0 = -Qdxdt * Wx_0_0; const auto jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; @@ -680,24 +682,24 @@ namespace kernel { // y-component const auto jy_local_0_0 = -Qdydt * Wy_0_0; - const auto jy_local_1_0 = jy_local_0_0 - Qdydt * Wy_1_0; - const auto jy_local_2_0 = jy_local_1_0 - Qdydt * Wy_2_0; - const auto jy_local_3_0 = jy_local_2_0 - Qdydt * Wy_3_0; - - const auto jy_local_0_1 = -Qdydt * Wy_0_1; - const auto jy_local_1_1 = jy_local_0_1 - Qdydt * Wy_1_1; - const auto jy_local_2_1 = jy_local_1_1 - Qdydt * Wy_2_1; - const auto jy_local_3_1 = jy_local_2_1 - Qdydt * Wy_3_1; - - const auto jy_local_0_2 = -Qdydt * Wy_0_2; - const auto jy_local_1_2 = jy_local_0_2 - Qdydt * Wy_1_2; - const auto jy_local_2_2 = jy_local_1_2 - Qdydt * Wy_2_2; - const auto jy_local_3_2 = jy_local_2_2 - Qdydt * Wy_3_2; - - const auto jy_local_0_3 = -Qdydt * Wy_0_3; - const auto jy_local_1_3 = jy_local_0_3 - Qdydt * Wy_1_3; - const auto jy_local_2_3 = jy_local_1_3 - Qdydt * Wy_2_3; - const auto jy_local_3_3 = jy_local_2_3 - Qdydt * Wy_3_3; + const auto jy_local_0_1 = jy_local_0_0 - Qdydt * Wy_0_1; + const auto jy_local_0_2 = jy_local_0_1 - Qdydt * Wy_0_2; + const auto jy_local_0_3 = jy_local_0_2 - Qdydt * Wy_0_3; + + const auto jy_local_1_0 = -Qdydt * Wy_1_0; + const auto jy_local_1_1 = jy_local_1_0 - Qdydt * Wy_1_1; + const auto jy_local_1_2 = jy_local_1_1 - Qdydt * Wy_1_2; + const auto jy_local_1_3 = jy_local_1_2 - Qdydt * Wy_1_3; + + const auto jy_local_2_0 = -Qdydt * Wy_2_0; + const auto jy_local_2_1 = jy_local_2_0 - Qdydt * Wy_2_1; + const auto jy_local_2_2 = jy_local_2_1 - Qdydt * Wy_2_2; + const auto jy_local_2_3 = jy_local_2_2 - Qdydt * Wy_2_3; + + const auto jy_local_3_0 = -Qdydt * Wy_3_0; + const auto jy_local_3_1 = jy_local_3_0 - Qdydt * Wy_3_1; + const auto jy_local_3_2 = jy_local_3_1 - Qdydt * Wy_3_2; + const auto jy_local_3_3 = jy_local_3_2 - Qdydt * Wy_3_3; /* Current update From f56afef035d075cac1f92f750581624495ba40da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 16 May 2025 14:11:47 -0500 Subject: [PATCH 19/82] bugfix in parameter access --- src/engines/srpic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 91c84f65..850355a6 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -508,7 +508,7 @@ namespace ntt { void CurrentsDeposit(domain_t& domain) { auto scatter_cur = Kokkos::Experimental::create_scatter_view( domain.fields.cur); - auto shape_order = params.template get("algorithms.deposit.order"); + auto shape_order = m_params.template get("algorithms.deposit.order"); for (auto& species : domain.species) { if ((species.pusher() == PrtlPusher::NONE) or (species.npart() == 0) or cmp::AlmostZero_host(species.charge())) { From 63dc8a9949d6048e1d525cb4a12397c6ebaa14a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 21 May 2025 09:16:07 -0500 Subject: [PATCH 20/82] updates to J update indexing --- src/kernels/currents_deposit.hpp | 227 +++++++++++++++---------------- 1 file changed, 111 insertions(+), 116 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 0ea46774..be59328d 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -52,6 +52,7 @@ namespace kernel { real_t& S1_2, real_t& S1_3, ncells_t& i_min, + real_t& update_x2, const index_t& i, const real_t& dx, const index_t& i_prev, @@ -79,6 +80,7 @@ namespace kernel { const auto dx_less_half = static_cast(dx < static_cast(0.5)); const auto dx_prev_less_half = static_cast( dx_prev < static_cast(0.5)); + const auto shift_x { (i - i_prev) - (dx_less_half - dx_prev_less_half) }; const real_t dx_prev_diff = static_cast(dx_prev) + @@ -95,7 +97,8 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - i_min = i_prev - dx_prev_less_half + N_GHOSTS; + i_min = i_prev - dx_prev_less_half + N_GHOSTS; + update_x2 = ONE; S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); @@ -113,7 +116,8 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - i_min = i - dx_less_half + N_GHOSTS; + i_min = i - dx_less_half + N_GHOSTS; + update_x2 = ONE; S0_0 = ZERO; S0_1 = HALF * SQR(static_cast(1.5) - dx_prev_diff); @@ -131,7 +135,8 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - i_min = i - dx_less_half + N_GHOSTS; + i_min = i - dx_less_half + N_GHOSTS; + update_x2 = ZERO; S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); @@ -513,20 +518,15 @@ namespace kernel { real_t S1x_0, S1x_1, S1x_2, S1x_3; // indices of the shape function ncells_t ix_min; + real_t update_x2; // find indices and define shape function - shape_function_2nd(S0x_0, - S0x_1, - S0x_2, - S0x_3, - S1x_0, - S1x_1, - S1x_2, - S1x_3, - ix_min, - i1(p), - dx1(p), - i1_prev(p), - dx1_prev(p)); + // clang-format off + shape_function_2nd(S0x_0, S0x_1, S0x_2, S0x_3, + S1x_0, S1x_1, S1x_2, S1x_3, + ix_min, update_x2, + i1(p), dx1(p), + i1_prev(p), dx1_prev(p)); + // clang-format on if constexpr (D == Dim::_1D) { // ToDo @@ -542,21 +542,16 @@ namespace kernel { real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function ncells_t iy_min; + real_t update_y2; // find indices and define shape function - shape_function_2nd(S0y_0, - S0y_1, - S0y_2, - S0y_3, - S1y_0, - S1y_1, - S1y_2, - S1y_3, - iy_min, - i2(p), - dx2(p), - i2_prev(p), - dx2_prev(p)); - + // clang-format off + shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, + S1y_0, S1y_1, S1y_2, S1y_3, + iy_min, update_y2, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + // clang-format on + // Esirkepov 2001, Eq. 39 /* x - component @@ -654,52 +649,52 @@ namespace kernel { static_cast(i2(p) == i2_prev(p) - 1) * static_cast((1 - dx2(p)) + dx2_prev(p)); - const real_t Qdxdt = coeff * inv_dt * delta_x; - const real_t Qdydt = coeff * inv_dt * delta_y; - const real_t QVz = coeff * vp[2]; + const real_t Qdxdt = -coeff; // * inv_dt * delta_x; + const real_t Qdydt = -coeff; // * inv_dt * delta_y; + const real_t QVz = -coeff * vp[2]; // Esirkepov - Eq. 39 // x-component - const auto jx_local_0_0 = -Qdxdt * Wx_0_0; - const auto jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; - const auto jx_local_2_0 = jx_local_1_0 - Qdxdt * Wx_2_0; - const auto jx_local_3_0 = jx_local_2_0 - Qdxdt * Wx_3_0; - - const auto jx_local_0_1 = -Qdxdt * Wx_0_1; - const auto jx_local_1_1 = jx_local_0_1 - Qdxdt * Wx_1_1; - const auto jx_local_2_1 = jx_local_1_1 - Qdxdt * Wx_2_1; - const auto jx_local_3_1 = jx_local_2_1 - Qdxdt * Wx_3_1; - - const auto jx_local_0_2 = -Qdxdt * Wx_0_2; - const auto jx_local_1_2 = jx_local_0_2 - Qdxdt * Wx_1_2; - const auto jx_local_2_2 = jx_local_1_2 - Qdxdt * Wx_2_2; - const auto jx_local_3_2 = jx_local_2_2 - Qdxdt * Wx_3_2; - - const auto jx_local_0_3 = -Qdxdt * Wx_0_3; - const auto jx_local_1_3 = jx_local_0_3 - Qdxdt * Wx_1_3; - const auto jx_local_2_3 = jx_local_1_3 - Qdxdt * Wx_2_3; - const auto jx_local_3_3 = jx_local_2_3 - Qdxdt * Wx_3_3; + const auto jx_0_0 = -Qdxdt * Wx_0_0; + const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; + const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; + const auto jx_3_0 = jx_2_0 - Qdxdt * Wx_3_0; + + const auto jx_0_1 = -Qdxdt * Wx_0_1; + const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; + const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; + const auto jx_3_1 = jx_2_1 - Qdxdt * Wx_3_1; + + const auto jx_0_2 = -Qdxdt * Wx_0_2; + const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; + const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; + const auto jx_3_2 = jx_2_2 - Qdxdt * Wx_3_2; + + const auto jx_0_3 = -Qdxdt * Wx_0_3; + const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; + const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; + const auto jx_3_3 = jx_2_3 - Qdxdt * Wx_3_3; // y-component - const auto jy_local_0_0 = -Qdydt * Wy_0_0; - const auto jy_local_0_1 = jy_local_0_0 - Qdydt * Wy_0_1; - const auto jy_local_0_2 = jy_local_0_1 - Qdydt * Wy_0_2; - const auto jy_local_0_3 = jy_local_0_2 - Qdydt * Wy_0_3; - - const auto jy_local_1_0 = -Qdydt * Wy_1_0; - const auto jy_local_1_1 = jy_local_1_0 - Qdydt * Wy_1_1; - const auto jy_local_1_2 = jy_local_1_1 - Qdydt * Wy_1_2; - const auto jy_local_1_3 = jy_local_1_2 - Qdydt * Wy_1_3; - - const auto jy_local_2_0 = -Qdydt * Wy_2_0; - const auto jy_local_2_1 = jy_local_2_0 - Qdydt * Wy_2_1; - const auto jy_local_2_2 = jy_local_2_1 - Qdydt * Wy_2_2; - const auto jy_local_2_3 = jy_local_2_2 - Qdydt * Wy_2_3; - - const auto jy_local_3_0 = -Qdydt * Wy_3_0; - const auto jy_local_3_1 = jy_local_3_0 - Qdydt * Wy_3_1; - const auto jy_local_3_2 = jy_local_3_1 - Qdydt * Wy_3_2; - const auto jy_local_3_3 = jy_local_3_2 - Qdydt * Wy_3_3; + const auto jy_0_0 = -Qdydt * Wy_0_0; + const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; + const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; + const auto jy_0_3 = jy_0_2 - Qdydt * Wy_0_3; + + const auto jy_1_0 = -Qdydt * Wy_1_0; + const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; + const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; + const auto jy_1_3 = jy_1_2 - Qdydt * Wy_1_3; + + const auto jy_2_0 = -Qdydt * Wy_2_0; + const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; + const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; + const auto jy_2_3 = jy_2_2 - Qdydt * Wy_2_3; + + const auto jy_3_0 = -Qdydt * Wy_3_0; + const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; + const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; + const auto jy_3_3 = jy_3_2 - Qdydt * Wy_3_3; /* Current update @@ -709,48 +704,48 @@ namespace kernel { /* x - component */ - J_acc(ix_min, iy_min, cur::jx1) += jx_local_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += jx_local_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += jx_local_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += jx_local_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx1) += jx_local_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_local_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_local_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_local_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += jx_local_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_local_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_local_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_local_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx1) += jx_local_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += jx_local_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += jx_local_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx1) += jx_local_3_3; + J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += update_y2 * jx_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += update_y2 * jx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += update_x2 * jx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += update_x2 * jx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; + + // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x3 * jx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x3 * jx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x3 * jx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x3 * jx_3_3; /* y - component */ - J_acc(ix_min, iy_min, cur::jx2) += jy_local_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += jy_local_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += jy_local_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += jy_local_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx2) += jy_local_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_local_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_local_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += jy_local_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += jy_local_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_local_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_local_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += jy_local_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += jy_local_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_local_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_local_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += jy_local_3_3; + J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; + J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; + J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; + J_acc(ix_min + 3, iy_min, cur::jx2) += update_x2 * jy_3_0; + + J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += update_x2 * jy_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx2) += update_y2 * jy_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += update_y2 * jy_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += update_y2 * jy_2_2; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += update_y2 * update_x2 * jy_3_2; + + // J_acc(ix_min, iy_min + 3, cur::jx2) += update_y3 * jy_0_3; + // J_acc(ix_min + 1, iy_min + 3, cur::jx2) += update_y3 * jy_1_3; + // J_acc(ix_min + 2, iy_min + 3, cur::jx2) += update_y3 * jy_2_3; + // J_acc(ix_min + 3, iy_min + 3, cur::jx2) += update_x3 * jy_3_3; /* z - component, simulated direction @@ -758,22 +753,22 @@ namespace kernel { J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + J_acc(ix_min, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_0_3; J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_1_3; J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_2_3; - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + J_acc(ix_min + 3, iy_min, cur::jx3) += update_x2 * QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += update_x2 * QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += update_x2 * QVz * Wz_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += update_x2 * update_y2 * QVz * Wz_3_3; } else if constexpr (D == Dim::_3D) { // /* From 822cb9652aa3943faf632b6e49659c28ab130f3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 21 May 2025 12:24:50 -0500 Subject: [PATCH 21/82] fixed shift calculation --- src/kernels/currents_deposit.hpp | 90 ++++++++++++++++---------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index be59328d..ace98f13 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -52,11 +52,11 @@ namespace kernel { real_t& S1_2, real_t& S1_3, ncells_t& i_min, - real_t& update_x2, + real_t& update_i2, const index_t& i, - const real_t& dx, + const real_t& di, const index_t& i_prev, - const real_t& dx_prev) const { + const real_t& di_prev) const { /* Shape function per particle is a 4 element array. We need to find which indices are contributing to the shape function @@ -77,17 +77,18 @@ namespace kernel { */ // find shift in indices - const auto dx_less_half = static_cast(dx < static_cast(0.5)); - const auto dx_prev_less_half = static_cast( - dx_prev < static_cast(0.5)); + const int di_less_half = static_cast(di < static_cast(0.5)); + const int di_prev_less_half = static_cast( + di_prev < static_cast(0.5)); - const auto shift_x { (i - i_prev) - (dx_less_half - dx_prev_less_half) }; + const auto shift_x = (i - di_less_half) - (i_prev - di_prev_less_half); - const real_t dx_prev_diff = static_cast(dx_prev) + - static_cast( - dx_prev < static_cast(0.5)); - const real_t dx_diff = static_cast(dx) + - static_cast(dx < static_cast(0.5)); + // find the minimum index of the shape function + i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); + + // center index of the shape function + const auto i_center_prev = static_cast(i_min + 1 - i_prev); + const auto i_center = static_cast(i_min + 1 - i); // find indices and define shape function if (shift_x > 0) { @@ -97,18 +98,17 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - i_min = i_prev - dx_prev_less_half + N_GHOSTS; - update_x2 = ONE; + update_i2 = ONE; - S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); - S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); - S0_2 = HALF * SQR(HALF - dx_prev_diff); + S0_0 = HALF * SQR(HALF + (i_center_prev - di_prev)); + S0_1 = static_cast(0.75) - SQR(i_center_prev - di_prev); + S0_2 = HALF * SQR(HALF - (i_center_prev - di_prev)); S0_3 = ZERO; S1_0 = ZERO; - S1_1 = HALF * SQR(static_cast(1.5) - dx_diff); - S1_2 = static_cast(0.75) - SQR(ONE - dx_diff); - S1_3 = HALF * SQR(HALF - dx_diff); + S1_1 = HALF * SQR(HALF + (i_center - di)); + S1_2 = static_cast(0.75) - SQR(i_center - di); + S1_3 = HALF * SQR(HALF - (i_center - di)); } else if (shift_x < 0) { /* (-1) 0 1 2 3 @@ -116,17 +116,16 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - i_min = i - dx_less_half + N_GHOSTS; - update_x2 = ONE; + update_i2 = ONE; S0_0 = ZERO; - S0_1 = HALF * SQR(static_cast(1.5) - dx_prev_diff); - S0_2 = static_cast(0.75) - SQR(ONE - dx_prev_diff); - S0_3 = HALF * SQR(HALF - dx_prev_diff); + S0_1 = HALF * SQR(HALF + (i_center_prev - di_prev)); + S0_2 = static_cast(0.75) - SQR(i_center_prev - di_prev); + S0_3 = HALF * SQR(HALF - (i_center_prev - di_prev)); - S1_0 = HALF * SQR(static_cast(1.5) - dx_diff); - S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); - S1_2 = HALF * SQR(HALF - dx_diff); + S1_0 = HALF * SQR(HALF + (i_center - di)); + S1_1 = static_cast(0.75) - SQR(i_center - di); + S1_2 = HALF * SQR(HALF - (i_center - di)); S1_3 = ZERO; } else if (shift_x == 0) { /* @@ -135,21 +134,23 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - i_min = i - dx_less_half + N_GHOSTS; - update_x2 = ZERO; + update_i2 = ZERO; - S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); - S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); - S0_2 = HALF * SQR(HALF - dx_prev_diff); + S0_0 = HALF * SQR(HALF + (i_center_prev - di_prev)); + S0_1 = static_cast(0.75) - SQR(i_center_prev - di_prev); + S0_2 = HALF * SQR(HALF - (i_center_prev - di_prev)); S0_3 = ZERO; - S1_0 = HALF * SQR(static_cast(1.5) - dx_diff); - S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); - S1_2 = HALF * SQR(HALF - dx_diff); + S1_0 = HALF * SQR(HALF + (i_center - di)); + S1_1 = static_cast(0.75) - SQR(i_center - di); + S1_2 = HALF * SQR(HALF - (i_center - di)); S1_3 = ZERO; } else { raise::Error("Invalid shift in indices", HERE); } + + // account for ghost cells here to shorten J update expression + i_min += N_GHOSTS; } public: @@ -551,7 +552,7 @@ namespace kernel { i2(p), dx2(p), i2_prev(p), dx2_prev(p)); // clang-format on - + // Esirkepov 2001, Eq. 39 /* x - component @@ -649,8 +650,8 @@ namespace kernel { static_cast(i2(p) == i2_prev(p) - 1) * static_cast((1 - dx2(p)) + dx2_prev(p)); - const real_t Qdxdt = -coeff; // * inv_dt * delta_x; - const real_t Qdydt = -coeff; // * inv_dt * delta_y; + const real_t Qdxdt = -coeff; + const real_t Qdydt = -coeff; const real_t QVz = -coeff * vp[2]; // Esirkepov - Eq. 39 @@ -719,10 +720,10 @@ namespace kernel { J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; - // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x3 * jx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x3 * jx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x3 * jx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x3 * jx_3_3; + // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; /* y - component @@ -768,7 +769,8 @@ namespace kernel { J_acc(ix_min + 3, iy_min, cur::jx3) += update_x2 * QVz * Wz_3_0; J_acc(ix_min + 3, iy_min + 1, cur::jx3) += update_x2 * QVz * Wz_3_1; J_acc(ix_min + 3, iy_min + 2, cur::jx3) += update_x2 * QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += update_x2 * update_y2 * QVz * Wz_3_3; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += update_x2 * update_y2 * + QVz * Wz_3_3; } else if constexpr (D == Dim::_3D) { // /* From 4ebb9944233984a244399a4ca62e21abb2c9d544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 21 May 2025 15:22:34 -0500 Subject: [PATCH 22/82] bugfixes --- src/kernels/currents_deposit.hpp | 42 +++++++++++--------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index ace98f13..6be22025 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -81,17 +81,17 @@ namespace kernel { const int di_prev_less_half = static_cast( di_prev < static_cast(0.5)); - const auto shift_x = (i - di_less_half) - (i_prev - di_prev_less_half); + const auto shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); // find the minimum index of the shape function i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); // center index of the shape function - const auto i_center_prev = static_cast(i_min + 1 - i_prev); - const auto i_center = static_cast(i_min + 1 - i); + const auto i_center_prev = static_cast(1 - di_prev_less_half); + const auto i_center = static_cast(1 - di_less_half); // find indices and define shape function - if (shift_x > 0) { + if (shift_i > 0) { /* (-1) 0 1 2 3 ___________________________________ @@ -109,7 +109,7 @@ namespace kernel { S1_1 = HALF * SQR(HALF + (i_center - di)); S1_2 = static_cast(0.75) - SQR(i_center - di); S1_3 = HALF * SQR(HALF - (i_center - di)); - } else if (shift_x < 0) { + } else if (shift_i < 0) { /* (-1) 0 1 2 3 ___________________________________ @@ -127,7 +127,7 @@ namespace kernel { S1_1 = static_cast(0.75) - SQR(i_center - di); S1_2 = HALF * SQR(HALF - (i_center - di)); S1_3 = ZERO; - } else if (shift_x == 0) { + } else if (shift_i == 0) { /* (-1) 0 1 2 3 ___________________________________ @@ -553,7 +553,7 @@ namespace kernel { i2_prev(p), dx2_prev(p)); // clang-format on - // Esirkepov 2001, Eq. 39 + // Esirkepov 2001, Eq. 38 /* x - component */ @@ -636,23 +636,9 @@ namespace kernel { const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + S0y_3 * (HALF * S1x_3 + S0x_3)); - const auto delta_x = static_cast(i1(p) == i1_prev(p)) * - static_cast(dx1(p) - dx1_prev(p)) + - static_cast(i1(p) == i1_prev(p) + 1) * - static_cast(dx1(p) + (1 - dx1_prev(p))) + - static_cast(i1(p) == i1_prev(p) - 1) * - static_cast((1 - dx1(p)) + dx1_prev(p)); - - const auto delta_y = static_cast(i2(p) == i2_prev(p)) * - static_cast(dx2(p) - dx2_prev(p)) + - static_cast(i2(p) == i2_prev(p) + 1) * - static_cast(dx2(p) + (1 - dx2_prev(p))) + - static_cast(i2(p) == i2_prev(p) - 1) * - static_cast((1 - dx2(p)) + dx2_prev(p)); - - const real_t Qdxdt = -coeff; - const real_t Qdydt = -coeff; - const real_t QVz = -coeff * vp[2]; + const real_t Qdxdt = -coeff * inv_dt; + const real_t Qdydt = -coeff * inv_dt; + const real_t QVz = -coeff * inv_dt * vp[2]; // Esirkepov - Eq. 39 // x-component @@ -720,10 +706,10 @@ namespace kernel { J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; - // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; + // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; /* y - component From 246b3e92bb5af3e923843555cb0d11406d8e5f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 08:58:18 -0500 Subject: [PATCH 23/82] simplification and bugfix in Wy --- src/kernels/currents_deposit.hpp | 93 ++++++++++++++++---------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 6be22025..ca18052c 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -87,8 +87,9 @@ namespace kernel { i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); // center index of the shape function - const auto i_center_prev = static_cast(1 - di_prev_less_half); - const auto i_center = static_cast(1 - di_less_half); + const auto di_center_prev = static_cast(1 - di_prev_less_half) - + di_prev; + const auto di_center = static_cast(1 - di_less_half) - di; // find indices and define shape function if (shift_i > 0) { @@ -100,15 +101,15 @@ namespace kernel { */ update_i2 = ONE; - S0_0 = HALF * SQR(HALF + (i_center_prev - di_prev)); - S0_1 = static_cast(0.75) - SQR(i_center_prev - di_prev); - S0_2 = HALF * SQR(HALF - (i_center_prev - di_prev)); + S0_0 = HALF * SQR(HALF + di_center_prev); + S0_1 = static_cast(0.75) - SQR(di_center_prev); + S0_2 = HALF * SQR(HALF - di_center_prev); S0_3 = ZERO; S1_0 = ZERO; - S1_1 = HALF * SQR(HALF + (i_center - di)); - S1_2 = static_cast(0.75) - SQR(i_center - di); - S1_3 = HALF * SQR(HALF - (i_center - di)); + S1_1 = HALF * SQR(HALF + di_center); + S1_2 = static_cast(0.75) - SQR(di_center); + S1_3 = HALF * SQR(HALF - di_center); } else if (shift_i < 0) { /* (-1) 0 1 2 3 @@ -119,13 +120,13 @@ namespace kernel { update_i2 = ONE; S0_0 = ZERO; - S0_1 = HALF * SQR(HALF + (i_center_prev - di_prev)); - S0_2 = static_cast(0.75) - SQR(i_center_prev - di_prev); - S0_3 = HALF * SQR(HALF - (i_center_prev - di_prev)); + S0_1 = HALF * SQR(HALF + di_center_prev); + S0_2 = static_cast(0.75) - SQR(di_center_prev); + S0_3 = HALF * SQR(HALF - di_center_prev); - S1_0 = HALF * SQR(HALF + (i_center - di)); - S1_1 = static_cast(0.75) - SQR(i_center - di); - S1_2 = HALF * SQR(HALF - (i_center - di)); + S1_0 = HALF * SQR(HALF + di_center); + S1_1 = static_cast(0.75) - SQR(di_center); + S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; } else if (shift_i == 0) { /* @@ -136,14 +137,14 @@ namespace kernel { */ update_i2 = ZERO; - S0_0 = HALF * SQR(HALF + (i_center_prev - di_prev)); - S0_1 = static_cast(0.75) - SQR(i_center_prev - di_prev); - S0_2 = HALF * SQR(HALF - (i_center_prev - di_prev)); + S0_0 = HALF * SQR(HALF + di_center_prev); + S0_1 = static_cast(0.75) - SQR(di_center_prev); + S0_2 = HALF * SQR(HALF - di_center_prev); S0_3 = ZERO; - S1_0 = HALF * SQR(HALF + (i_center - di)); - S1_1 = static_cast(0.75) - SQR(i_center - di); - S1_2 = HALF * SQR(HALF - (i_center - di)); + S1_0 = HALF * SQR(HALF + di_center); + S1_1 = static_cast(0.75) - SQR(di_center); + S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; } else { raise::Error("Invalid shift in indices", HERE); @@ -579,25 +580,25 @@ namespace kernel { const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S1y_3 - S0y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S1y_3 - S0y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S1y_3 - S0y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S1y_3 - S0y_3); // Unrolled calculations for Wz const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + @@ -636,9 +637,9 @@ namespace kernel { const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + S0y_3 * (HALF * S1x_3 + S0x_3)); - const real_t Qdxdt = -coeff * inv_dt; - const real_t Qdydt = -coeff * inv_dt; - const real_t QVz = -coeff * inv_dt * vp[2]; + const real_t Qdxdt = coeff * inv_dt; + const real_t Qdydt = coeff * inv_dt; + const real_t QVz = coeff * inv_dt * vp[2]; // Esirkepov - Eq. 39 // x-component @@ -706,10 +707,10 @@ namespace kernel { J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; - // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; + // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; /* y - component From 23011799bbb8026dbcb77d3c9bed139eda52ad0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 09:20:21 -0500 Subject: [PATCH 24/82] bugfix in case comparison --- src/kernels/currents_deposit.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index ca18052c..ccfe72fe 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -92,7 +92,7 @@ namespace kernel { const auto di_center = static_cast(1 - di_less_half) - di; // find indices and define shape function - if (shift_i > 0) { + if (shift_i == 1) { /* (-1) 0 1 2 3 ___________________________________ @@ -110,7 +110,7 @@ namespace kernel { S1_1 = HALF * SQR(HALF + di_center); S1_2 = static_cast(0.75) - SQR(di_center); S1_3 = HALF * SQR(HALF - di_center); - } else if (shift_i < 0) { + } else if (shift_i == -1) { /* (-1) 0 1 2 3 ___________________________________ @@ -128,6 +128,7 @@ namespace kernel { S1_1 = static_cast(0.75) - SQR(di_center); S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; + } else if (shift_i == 0) { /* (-1) 0 1 2 3 From eb8c58e2e88bdeadd80bce06c9c9c7c1ff66a6ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 14:07:18 -0500 Subject: [PATCH 25/82] switch off formatting for large B updates --- src/kernels/faraday_mink.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/kernels/faraday_mink.hpp b/src/kernels/faraday_mink.hpp index bea6be93..cf6844a9 100644 --- a/src/kernels/faraday_mink.hpp +++ b/src/kernels/faraday_mink.hpp @@ -70,13 +70,15 @@ namespace kernel::mink { Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { - const auto alphax = ONE - THREE * deltax; + const auto alphax = ONE - THREE * deltax; + // clang-format off EB(i1, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) - + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); + + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) + + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); EB(i1, em::bx3) += coeff1 * ( - - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) - - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); + - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) + - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 1D implementation called for D != 1"); } @@ -86,7 +88,7 @@ namespace kernel::mink { if constexpr (D == Dim::_2D) { const auto alphax = ONE - TWO * betaxy - THREE * deltax; const auto alphay = ONE - TWO * betayx - THREE * deltay; - + // clang-format off EB(i1, i2, em::bx1) += coeff1 * ( - alphay * (EB(i1 , i2 + 1, em::ex3) - EB(i1 , i2 , em::ex3)) - deltay * (EB(i1 , i2 + 2, em::ex3) - EB(i1 , i2 - 1, em::ex3)) @@ -106,7 +108,7 @@ namespace kernel::mink { - deltax * (EB(i1 + 2, i2 , em::ex2) - EB(i1 - 1, i2 , em::ex2)) - betaxy * (EB(i1 + 1, i2 + 1, em::ex2) - EB(i1 , i2 + 1, em::ex2)) - betaxy * (EB(i1 + 1, i2 - 1, em::ex2) - EB(i1 , i2 - 1, em::ex2))); - + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 2D implementation called for D != 2"); } @@ -117,7 +119,7 @@ namespace kernel::mink { const auto alphax = ONE - TWO * betaxy - TWO * betaxz - THREE * deltax; const auto alphay = ONE - TWO * betayx - TWO * betayz - THREE * deltay; const auto alphaz = ONE - TWO * betazx - TWO * betazy - THREE * deltaz; - + // clang-format off EB(i1, i2, i3, em::bx1) += coeff1 * ( + alphaz * (EB(i1 , i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 , em::ex2)) + deltaz * (EB(i1 , i2 , i3 + 2, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2)) @@ -157,6 +159,7 @@ namespace kernel::mink { - betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) - betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 + 1, em::ex2)) - betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2))); + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 3D implementation called for D != 3"); } From 03aebb57be793fd35967f11d440145e0b8c31ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 17:52:24 -0500 Subject: [PATCH 26/82] optimizations and prep for 3rd order deposit --- src/kernels/currents_deposit.hpp | 575 ++++++++++++++++++++++++++++--- 1 file changed, 523 insertions(+), 52 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index ccfe72fe..fc87a1b0 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -52,7 +52,7 @@ namespace kernel { real_t& S1_2, real_t& S1_3, ncells_t& i_min, - real_t& update_i2, + bool& update_i2, const index_t& i, const real_t& di, const index_t& i_prev, @@ -81,7 +81,7 @@ namespace kernel { const int di_prev_less_half = static_cast( di_prev < static_cast(0.5)); - const auto shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); + const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); // find the minimum index of the shape function i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); @@ -99,7 +99,7 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - update_i2 = ONE; + update_i2 = true; S0_0 = HALF * SQR(HALF + di_center_prev); S0_1 = static_cast(0.75) - SQR(di_center_prev); @@ -117,7 +117,7 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - update_i2 = ONE; + update_i2 = true; S0_0 = ZERO; S0_1 = HALF * SQR(HALF + di_center_prev); @@ -136,7 +136,7 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - update_i2 = ZERO; + update_i2 = false; S0_0 = HALF * SQR(HALF + di_center_prev); S0_1 = static_cast(0.75) - SQR(di_center_prev); @@ -155,6 +155,143 @@ namespace kernel { i_min += N_GHOSTS; } + Inline void shape_function_3rd(real_t& S0_0, + real_t& S0_1, + real_t& S0_2, + real_t& S0_3, + real_t& S0_4, + real_t& S1_0, + real_t& S1_1, + real_t& S1_2, + real_t& S1_3, + real_t& S1_4, + ncells_t& i_min, + bool& update_i3, + const index_t& i, + const real_t& di, + const index_t& i_prev, + const real_t& di_prev) const { + /* + Shape function per particle is a 4 element array. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep + + + (-1) 0 1 2 3 4 + __________________________________________ + | | x* | x* | x* | x* | | // shift_i = 0 + |______|______|______|______|______|______| + | | x | x* | x* | x* | * | // shift_i = 1 + |______|______|______|______|______|______| + | * | x* | x* | x* | x | | // shift_i = -1 + |______|______|______|______|______|______| + */ + + // find shift in indices + const int di_less_half = static_cast(di < static_cast(0.5)); + const int di_prev_less_half = static_cast( + di_prev < static_cast(0.5)); + + const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); + + // find the minimum index of the shape function + i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); + + // center index of the shape function + const auto di_center_prev = static_cast(1 - di_prev_less_half) - + di_prev; + const auto di_center_prev2 = SQR(di_center_prev); + const auto di_center_prev3 = di_center_prev2 * di_center_prev; + + const auto di_center = static_cast(1 - di_less_half) - di; + const auto di_center2 = SQR(di_center); + const auto di_center3 = di_center2 * di_center; + + // find indices and define shape function + if (shift_i == 1) { + /* + (-1) 0 1 2 3 4 + __________________________________________ + | | x | x* | x* | x* | * | // shift_i = 1 + |______|______|______|______|______|______| + */ + update_i3 = true; + + S0_0 = static_cast(1 / 6) * (ONE - di_center_prev3) - + HALF * (di_center_prev - di_center_prev2); + S0_1 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; + S0_2 = static_cast(1 / 6) + + HALF * (di_center_prev + di_center_prev2 - di_center_prev3); + S0_3 = static_cast(1 / 6) * di_center_prev3; + S0_4 = ZERO; + + S1_0 = ZERO; + S1_1 = static_cast(1 / 6) * (ONE - di_center3) - + HALF * (di_center - di_center2); + S1_2 = static_cast(2 / 3) - di_center2 + HALF * di_center3; + S1_3 = static_cast(1 / 6) + + HALF * (di_center + di_center2 - di_center3); + S1_4 = static_cast(1 / 6) * di_center3; + } else if (shift_i == -1) { + /* + (-1) 0 1 2 3 4 + _________________________________________ + | * | x* | x* | x* | x | | // shift_i = -1 + |______|______|______|______|______|_____| + */ + update_i3 = true; + + S0_0 = ZERO; + S0_1 = static_cast(1 / 6) * (ONE - di_center_prev3) - + HALF * (di_center_prev - di_center_prev2); + S0_2 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; + S0_3 = static_cast(1 / 6) + + HALF * (di_center_prev + di_center_prev2 - di_center_prev3); + S0_4 = static_cast(1 / 6) * di_center_prev3; + + S1_0 = static_cast(1 / 6) * (ONE - di_center3) - + HALF * (di_center - di_center2); + S1_1 = static_cast(2 / 3) - di_center2 + HALF * di_center3; + S1_2 = static_cast(1 / 6) + + HALF * (di_center + di_center2 - di_center3); + S1_3 = static_cast(1 / 6) * di_center3; + S1_4 = ZERO; + + } else if (shift_i == 0) { + /* + (-1) 0 1 2 3 4 + __________________________________________ + | | x* | x* | x* | x* | | // shift_i = 0 + |______|______|______|______|______|______| + */ + update_i3 = false; + + S0_0 = static_cast(1 / 6) * (ONE - di_center_prev3) - + HALF * (di_center_prev - di_center_prev2); + S0_1 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; + S0_2 = static_cast(1 / 6) + + HALF * (di_center_prev + di_center_prev2 - di_center_prev3); + S0_3 = static_cast(1 / 6) * di_center_prev3; + S0_4 = ZERO; + + S1_0 = static_cast(1 / 6) * (ONE - di_center3) - + HALF * (di_center - di_center2); + S1_1 = static_cast(2 / 3) - di_center2 + HALF * di_center3; + S1_2 = static_cast(1 / 6) + + HALF * (di_center + di_center2 - di_center3); + S1_3 = static_cast(1 / 6) * di_center3; + S1_4 = ZERO; + } else { + raise::Error("Invalid shift in indices", HERE); + } + + // account for ghost cells here to shorten J update expression + i_min += N_GHOSTS; + } + public: /** * @brief explicit constructor. @@ -521,7 +658,7 @@ namespace kernel { real_t S1x_0, S1x_1, S1x_2, S1x_3; // indices of the shape function ncells_t ix_min; - real_t update_x2; + bool update_x2; // find indices and define shape function // clang-format off shape_function_2nd(S0x_0, S0x_1, S0x_2, S0x_3, @@ -545,7 +682,7 @@ namespace kernel { real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function ncells_t iy_min; - real_t update_y2; + bool update_y2; // find indices and define shape function // clang-format off shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, @@ -575,31 +712,22 @@ namespace kernel { const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); - const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); - const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); - const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - // Unrolled calculations for Wy const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S1y_3 - S0y_3); const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S1y_3 - S0y_3); const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S1y_3 - S0y_3); const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S1y_3 - S0y_3); // Unrolled calculations for Wz const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + @@ -647,43 +775,35 @@ namespace kernel { const auto jx_0_0 = -Qdxdt * Wx_0_0; const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; - const auto jx_3_0 = jx_2_0 - Qdxdt * Wx_3_0; const auto jx_0_1 = -Qdxdt * Wx_0_1; const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; - const auto jx_3_1 = jx_2_1 - Qdxdt * Wx_3_1; const auto jx_0_2 = -Qdxdt * Wx_0_2; const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; - const auto jx_3_2 = jx_2_2 - Qdxdt * Wx_3_2; const auto jx_0_3 = -Qdxdt * Wx_0_3; const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; - const auto jx_3_3 = jx_2_3 - Qdxdt * Wx_3_3; // y-component const auto jy_0_0 = -Qdydt * Wy_0_0; const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; - const auto jy_0_3 = jy_0_2 - Qdydt * Wy_0_3; const auto jy_1_0 = -Qdydt * Wy_1_0; const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; - const auto jy_1_3 = jy_1_2 - Qdydt * Wy_1_3; const auto jy_2_0 = -Qdydt * Wy_2_0; const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; - const auto jy_2_3 = jy_2_2 - Qdydt * Wy_2_3; const auto jy_3_0 = -Qdydt * Wy_3_0; const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; - const auto jy_3_3 = jy_3_2 - Qdydt * Wy_3_3; /* Current update @@ -696,22 +816,25 @@ namespace kernel { J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += update_y2 * jx_0_3; J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += update_y2 * jx_1_3; - J_acc(ix_min + 2, iy_min, cur::jx1) += update_x2 * jx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += update_x2 * jx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; + if (update_x2) { + J_acc(ix_min + 2, iy_min, cur::jx1) += jx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_2_2; + } + + if (update_y2) { + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_1_3; + J_acc(ix_min, iy_min + 3, cur::jx1) += jx_0_3; + } - // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; + if (update_x2 && update_y2) { + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_2_3; + } /* y - component @@ -719,46 +842,54 @@ namespace kernel { J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; - J_acc(ix_min + 3, iy_min, cur::jx2) += update_x2 * jy_3_0; J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += update_x2 * jy_3_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += update_y2 * jy_0_2; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += update_y2 * jy_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += update_y2 * jy_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += update_y2 * update_x2 * jy_3_2; + if (update_x2) { + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_3_1; + J_acc(ix_min + 3, iy_min, cur::jx2) += jy_3_0; + } - // J_acc(ix_min, iy_min + 3, cur::jx2) += update_y3 * jy_0_3; - // J_acc(ix_min + 1, iy_min + 3, cur::jx2) += update_y3 * jy_1_3; - // J_acc(ix_min + 2, iy_min + 3, cur::jx2) += update_y3 * jy_2_3; - // J_acc(ix_min + 3, iy_min + 3, cur::jx2) += update_x3 * jy_3_3; + if (update_y2) { + J_acc(ix_min, iy_min + 2, cur::jx2) += jy_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_2_2; + } + if (update_x2 && update_y2) { + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; + } /* z - component, simulated direction */ J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_0_3; J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_1_3; J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_2_3; - J_acc(ix_min + 3, iy_min, cur::jx3) += update_x2 * QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += update_x2 * QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += update_x2 * QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += update_x2 * update_y2 * - QVz * Wz_3_3; + if (update_x2) { + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + } + + if (update_y2) { + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + } + if (update_x2 && update_y2) { + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + } } else if constexpr (D == Dim::_3D) { // /* @@ -1720,6 +1851,346 @@ namespace kernel { // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; } // dimension + + } else if constexpr (O == 3u) { + /* + Higher order charge conserving current deposition based on + Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + + We need to define the follwowing variable: + - Shape functions in spatial directions for the particle position + before and after the current timestep. + S0_*, S1_* + - Density composition matrix + Wx_*, Wy_*, Wz_* + */ + + /* + x - direction + */ + + // shape function at previous timestep + real_t S0x_0, S0x_1, S0x_2, S0x_3, S0x_4; + // shape function at current timestep + real_t S1x_0, S1x_1, S1x_2, S1x_3, S1x_4; + // indices of the shape function + ncells_t ix_min; + bool update_x3; + // find indices and define shape function + // clang-format off + shape_function_3rd(S0x_0, S0x_1, S0x_2, S0x_3, S0x_4, + S1x_0, S1x_1, S1x_2, S1x_3, S1x_4, + ix_min, update_x3, + i1(p), dx1(p), + i1_prev(p), dx1_prev(p)); + // clang-format on + + if constexpr (D == Dim::_1D) { + // ToDo + } else if constexpr (D == Dim::_2D) { + + /* + y - direction + */ + + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3, S0y_4; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3, S1y_4; + // indices of the shape function + ncells_t iy_min; + bool update_y3; + // find indices and define shape function + // clang-format off + shape_function_3rd(S0y_0, S0y_1, S0y_2, S0y_3, S0y_4, + S1y_0, S1y_1, S1y_2, S1y_3, S1y_4, + iy_min, update_y3, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + // clang-format on + + // Esirkepov 2001, Eq. 38 + /* + x - component + */ + // Calculate weight function - unrolled + const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); + const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); + const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); + const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); + const auto Wx_0_4 = HALF * (S1x_0 - S0x_0) * (S0y_4 + S1y_4); + + const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); + const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); + const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); + const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); + const auto Wx_1_4 = HALF * (S1x_1 - S0x_1) * (S0y_4 + S1y_4); + + const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); + const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); + const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); + const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); + const auto Wx_2_4 = HALF * (S1x_2 - S0x_2) * (S0y_4 + S1y_4); + + const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); + const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); + const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); + const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + const auto Wx_3_4 = HALF * (S1x_3 - S0x_3) * (S0y_4 + S1y_4); + + // Unrolled calculations for Wy + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S1y_3 - S0y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S1y_3 - S0y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S1y_3 - S0y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S1y_3 - S0y_3); + + const auto Wy_4_0 = HALF * (S1x_4 + S0x_4) * (S1y_0 - S0y_0); + const auto Wy_4_1 = HALF * (S1x_4 + S0x_4) * (S1y_1 - S0y_1); + const auto Wy_4_2 = HALF * (S1x_4 + S0x_4) * (S1y_2 - S0y_2); + const auto Wy_4_3 = HALF * (S1x_4 + S0x_4) * (S1y_3 - S0y_3); + + // Unrolled calculations for Wz + const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + + S0y_0 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + + S0y_1 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + + S0y_2 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + + S0y_3 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_4 = THIRD * (S1y_4 * (HALF * S0x_0 + S1x_0) + + S0y_4 * (HALF * S1x_0 + S0x_0)); + + const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + + S0y_0 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + + S0y_1 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + + S0y_2 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + + S0y_3 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_4 = THIRD * (S1y_4 * (HALF * S0x_1 + S1x_1) + + S0y_4 * (HALF * S1x_1 + S0x_1)); + + const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + + S0y_0 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + + S0y_1 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + + S0y_2 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + + S0y_3 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_4 = THIRD * (S1y_4 * (HALF * S0x_2 + S1x_2) + + S0y_4 * (HALF * S1x_2 + S0x_2)); + + const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + + S0y_0 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + + S0y_1 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + + S0y_2 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + + S0y_3 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_4 = THIRD * (S1y_4 * (HALF * S0x_3 + S1x_3) + + S0y_4 * (HALF * S1x_3 + S0x_3)); + + const auto Wz_4_0 = THIRD * (S1y_0 * (HALF * S0x_4 + S1x_4) + + S0y_0 * (HALF * S1x_4 + S0x_4)); + const auto Wz_4_1 = THIRD * (S1y_1 * (HALF * S0x_4 + S1x_4) + + S0y_1 * (HALF * S1x_4 + S0x_4)); + const auto Wz_4_2 = THIRD * (S1y_2 * (HALF * S0x_4 + S1x_4) + + S0y_2 * (HALF * S1x_4 + S0x_4)); + const auto Wz_4_3 = THIRD * (S1y_3 * (HALF * S0x_4 + S1x_4) + + S0y_3 * (HALF * S1x_4 + S0x_4)); + const auto Wz_4_4 = THIRD * (S1y_4 * (HALF * S0x_4 + S1x_4) + + S0y_4 * (HALF * S1x_4 + S0x_4)); + + const real_t Qdxdt = coeff * inv_dt; + const real_t Qdydt = coeff * inv_dt; + const real_t QVz = coeff * inv_dt * vp[2]; + + // Esirkepov - Eq. 39 + // x-component + const auto jx_0_0 = -Qdxdt * Wx_0_0; + const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; + const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; + const auto jx_3_0 = jx_2_0 - Qdxdt * Wx_3_0; + + const auto jx_0_1 = -Qdxdt * Wx_0_1; + const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; + const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; + const auto jx_3_1 = jx_2_1 - Qdxdt * Wx_3_1; + + const auto jx_0_2 = -Qdxdt * Wx_0_2; + const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; + const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; + const auto jx_3_2 = jx_2_2 - Qdxdt * Wx_3_2; + + const auto jx_0_3 = -Qdxdt * Wx_0_3; + const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; + const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; + const auto jx_3_3 = jx_2_3 - Qdxdt * Wx_3_3; + + const auto jx_0_4 = -Qdxdt * Wx_0_4; + const auto jx_1_4 = jx_0_4 - Qdxdt * Wx_1_4; + const auto jx_2_4 = jx_1_4 - Qdxdt * Wx_2_4; + const auto jx_3_4 = jx_2_4 - Qdxdt * Wx_3_4; + + // y-component + const auto jy_0_0 = -Qdydt * Wy_0_0; + const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; + const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; + const auto jy_0_3 = jy_0_2 - Qdydt * Wy_0_3; + + const auto jy_1_0 = -Qdydt * Wy_1_0; + const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; + const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; + const auto jy_1_3 = jy_1_2 - Qdydt * Wy_1_3; + + const auto jy_2_0 = -Qdydt * Wy_2_0; + const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; + const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; + const auto jy_2_3 = jy_2_2 - Qdydt * Wy_2_3; + + const auto jy_3_0 = -Qdydt * Wy_3_0; + const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; + const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; + const auto jy_3_3 = jy_3_2 - Qdydt * Wy_3_3; + + const auto jy_4_0 = -Qdydt * Wy_4_0; + const auto jy_4_1 = jy_4_0 - Qdydt * Wy_4_1; + const auto jy_4_2 = jy_4_1 - Qdydt * Wy_4_2; + const auto jy_4_3 = jy_4_2 - Qdydt * Wy_4_3; + + /* + Current update + */ + auto J_acc = J.access(); + + /* + x - component + */ + J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += jx_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += jx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_2_3; + + if (update_x3) { + J_acc(ix_min + 3, iy_min, cur::jx1) += jx_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += jx_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += jx_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx1) += jx_3_3; + } + + if (update_y3) { + J_acc(ix_min, iy_min + 4, cur::jx1) += jx_0_4; + J_acc(ix_min + 1, iy_min + 4, cur::jx1) += jx_1_4; + J_acc(ix_min + 2, iy_min + 4, cur::jx1) += jx_2_4; + } + + if (update_x3 && update_y3) { + J_acc(ix_min + 3, iy_min + 4, cur::jx1) += jx_3_4; + } + + /* + y - component + */ + J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; + J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; + J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; + J_acc(ix_min + 3, iy_min, cur::jx2) += jy_3_0; + + J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx2) += jy_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_2_2; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; + + if (update_x3) { + J_acc(ix_min + 4, iy_min, cur::jx2) += jy_4_0; + J_acc(ix_min + 4, iy_min + 1, cur::jx2) += jy_4_1; + J_acc(ix_min + 4, iy_min + 2, cur::jx2) += jy_4_2; + } + + if (update_y3) { + J_acc(ix_min, iy_min + 3, cur::jx2) += jy_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += jy_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += jy_2_3; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += jy_3_3; + } + + if (update_x3 && update_y3) { + J_acc(ix_min + 4, iy_min + 3, cur::jx2) += jy_4_3; + } + /* + z - component, simulated direction + */ + J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + + if (update_x3) { + J_acc(ix_min + 4, iy_min, cur::jx3) += QVz * Wz_4_0; + J_acc(ix_min + 4, iy_min + 1, cur::jx3) += QVz * Wz_4_1; + J_acc(ix_min + 4, iy_min + 2, cur::jx3) += QVz * Wz_4_2; + J_acc(ix_min + 4, iy_min + 3, cur::jx3) += QVz * Wz_4_3; + } + + if (update_y3) { + J_acc(ix_min, iy_min + 4, cur::jx3) += QVz * Wz_0_4; + J_acc(ix_min + 1, iy_min + 4, cur::jx3) += QVz * Wz_1_4; + J_acc(ix_min + 2, iy_min + 4, cur::jx3) += QVz * Wz_2_4; + J_acc(ix_min + 3, iy_min + 4, cur::jx3) += QVz * Wz_3_4; + } + if (update_x3 && update_y3) { + J_acc(ix_min + 4, iy_min + 4, cur::jx3) += QVz * Wz_4_4; + } + } // dim } else { // order raise::KernelError(HERE, "Unsupported interpolation order"); } From bd2f333295ddc56e9a0cf52efb9058799304e254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 22:27:22 -0500 Subject: [PATCH 27/82] update 3D 2nd order deposit --- src/kernels/currents_deposit.hpp | 2000 ++++++++++++++++-------------- 1 file changed, 1046 insertions(+), 954 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index fc87a1b0..0a024f71 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -892,964 +892,1056 @@ namespace kernel { } } else if constexpr (D == Dim::_3D) { - // /* - // y - direction - // */ - // - // // shape function at previous timestep - // real_t S0y_0, S0y_1, S0y_2, S0y_3; - // // shape function at current timestep - // real_t S1y_0, S1y_1, S1y_2, S1y_3; - // // indices of the shape function - // uint iy_min; - // // find indices and define shape function - // shape_function_2nd(S0y_0, - // S0y_1, - // S0y_2, - // S0y_3, - // S1y_0, - // S1y_1, - // S1y_2, - // S1y_3, - // iy_min, - // i2(p), - // dx2(p), - // i2_prev(p), - // dx2_prev(p)); - // - // /* - // z - direction - // */ - // - // // shape function at previous timestep - // real_t S0z_0, S0z_1, S0z_2, S0z_3; - // // shape function at current timestep - // real_t S1z_0, S1z_1, S1z_2, S1z_3; - // // indices of the shape function - // uint iz_min; - // // find indices and define shape function - // shape_function_2nd(S0z_0, - // S0z_1, - // S0z_2, - // S0z_3, - // S1z_0, - // S1z_1, - // S1z_2, - // S1z_3, - // iz_min, - // i3(p), - // dx3(p), - // i3_prev(p), - // dx3_prev(p)); - // - // // Calculate weight function - // // for (int i = 0; i < interp_order + 2; ++i) { - // // for (int j = 0; j < interp_order + 2; ++j) { - // // for (int k = 0; k < interp_order + 2; ++k) { - // // // Esirkepov 2001, Eq. 31 - // // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - // // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - // // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - // // - // // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - // // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - // // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - // // - // // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - // // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - // // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - // // } - // // } - // // } - // // - // // Unrolled calculations for Wx, Wy, and Wz - // const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - // - // J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; - // - // /* - // y-component - // */ - // // i = 0 - // const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const real_t Qdydt = coeff * inv_dt * dxp_r_2; - // - // J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; - // - // /* - // z - component - // */ - // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // - // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // - // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // - // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // - // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // - // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // - // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // - // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // - // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // - // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + /* + y - direction + */ + + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + ncells_t iy_min; + bool update_y2; + // find indices and define shape function + // clang-format off + shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, + S1y_0, S1y_1, S1y_2, S1y_3, + iy_min, update_y2, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + // clang-format on + + /* + y - direction + */ + + // shape function at previous timestep + real_t S0z_0, S0z_1, S0z_2, S0z_3; + // shape function at current timestep + real_t S1z_0, S1z_1, S1z_2, S1z_3; + // indices of the shape function + ncells_t iz_min; + bool update_z2; + // find indices and define shape function + // clang-format off + shape_function_2nd(S0z_0, S0z_1, S0z_2, S0z_3, + S1z_0, S1z_1, S1z_2, S1z_3, + iz_min, update_z2, + i3(p), dx3(p), + i3_prev(p), dx3_prev(p)); + // clang-format on + + // Calculate weight function + // for (int i = 0; i < interp_order + 2; ++i) { + // for (int j = 0; j < interp_order + 2; ++j) { + // for (int k = 0; k < interp_order + 2; ++k) { + // // Esirkepov 2001, Eq. 31 + // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); // - // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); // - // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + // } + // } + // } // - // const real_t Qdzdt = coeff * inv_dt * dxp_r_3; + + // Unrolled calculations for Wx, Wy, and Wz + // clang-format off + const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const real_t Qdxdt = coeff * inv_dt; + + const auto jx_0_0_0 = - Qdxdt * Wx_0_0_0; + const auto jx_1_0_0 = jx_0_0_0 - Qdxdt * Wx_1_0_0; + const auto jx_2_0_0 = jx_1_0_0 - Qdxdt * Wx_2_0_0; + const auto jx_0_1_0 = - Qdxdt * Wx_0_1_0; + const auto jx_1_1_0 = jx_0_1_0 - Qdxdt * Wx_1_1_0; + const auto jx_2_1_0 = jx_1_1_0 - Qdxdt * Wx_2_1_0; + const auto jx_0_2_0 = - Qdxdt * Wx_0_2_0; + const auto jx_1_2_0 = jx_0_2_0 - Qdxdt * Wx_1_2_0; + const auto jx_2_2_0 = jx_1_2_0 - Qdxdt * Wx_2_2_0; + const auto jx_0_3_0 = - Qdxdt * Wx_0_3_0; + const auto jx_1_3_0 = jx_0_3_0 - Qdxdt * Wx_1_3_0; + const auto jx_2_3_0 = jx_1_3_0 - Qdxdt * Wx_2_3_0; + + const auto jx_0_0_1 = - Qdxdt * Wx_0_0_1; + const auto jx_1_0_1 = jx_0_0_1 - Qdxdt * Wx_1_0_1; + const auto jx_2_0_1 = jx_1_0_1 - Qdxdt * Wx_2_0_1; + const auto jx_0_1_1 = - Qdxdt * Wx_0_1_1; + const auto jx_1_1_1 = jx_0_1_1 - Qdxdt * Wx_1_1_1; + const auto jx_2_1_1 = jx_1_1_1 - Qdxdt * Wx_2_1_1; + const auto jx_0_2_1 = - Qdxdt * Wx_0_2_1; + const auto jx_1_2_1 = jx_0_2_1 - Qdxdt * Wx_1_2_1; + const auto jx_2_2_1 = jx_1_2_1 - Qdxdt * Wx_2_2_1; + const auto jx_0_3_1 = - Qdxdt * Wx_0_3_1; + const auto jx_1_3_1 = jx_0_3_1 - Qdxdt * Wx_1_3_1; + const auto jx_2_3_1 = jx_1_3_1 - Qdxdt * Wx_2_3_1; + + const auto jx_0_0_2 = - Qdxdt * Wx_0_0_2; + const auto jx_1_0_2 = jx_0_0_2 - Qdxdt * Wx_1_0_2; + const auto jx_2_0_2 = jx_1_0_2 - Qdxdt * Wx_2_0_2; + const auto jx_0_1_2 = - Qdxdt * Wx_0_1_2; + const auto jx_1_1_2 = jx_0_1_2 - Qdxdt * Wx_1_1_2; + const auto jx_2_1_2 = jx_1_1_2 - Qdxdt * Wx_2_1_2; + const auto jx_0_2_2 = - Qdxdt * Wx_0_2_2; + const auto jx_1_2_2 = jx_0_2_2 - Qdxdt * Wx_1_2_2; + const auto jx_2_2_2 = jx_1_2_2 - Qdxdt * Wx_2_2_2; + const auto jx_0_3_2 = - Qdxdt * Wx_0_3_2; + const auto jx_1_3_2 = jx_0_3_2 - Qdxdt * Wx_1_3_2; + const auto jx_2_3_2 = jx_1_3_2 - Qdxdt * Wx_2_3_2; + + const auto jx_0_0_3 = - Qdxdt * Wx_0_0_3; + const auto jx_1_0_3 = jx_0_0_3 - Qdxdt * Wx_1_0_3; + const auto jx_2_0_3 = jx_1_0_3 - Qdxdt * Wx_2_0_3; + const auto jx_0_1_3 = - Qdxdt * Wx_0_1_3; + const auto jx_1_1_3 = jx_0_1_3 - Qdxdt * Wx_1_1_3; + const auto jx_2_1_3 = jx_1_1_3 - Qdxdt * Wx_2_1_3; + const auto jx_0_2_3 = - Qdxdt * Wx_0_2_3; + const auto jx_1_2_3 = jx_0_2_3 - Qdxdt * Wx_1_2_3; + const auto jx_2_2_3 = jx_1_2_3 - Qdxdt * Wx_2_2_3; + const auto jx_0_3_3 = - Qdxdt * Wx_0_3_3; + const auto jx_1_3_3 = jx_0_3_3 - Qdxdt * Wx_1_3_3; + const auto jx_2_3_3 = jx_1_3_3 - Qdxdt * Wx_2_3_3; + + /* + y-component + */ + const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const real_t Qdydt = coeff * inv_dt; + + const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; + const auto jy_0_1_0 = jy_0_0_0 - Qdydt * Wy_0_1_0; + const auto jy_0_2_0 = jy_0_1_0 - Qdydt * Wy_0_2_0; + const auto jy_1_0_0 = - Qdydt * Wy_1_0_0; + const auto jy_1_1_0 = jy_1_0_0 - Qdydt * Wy_1_1_0; + const auto jy_1_2_0 = jy_1_1_0 - Qdydt * Wy_1_2_0; + const auto jy_2_0_0 = - Qdydt * Wy_2_0_0; + const auto jy_2_1_0 = jy_2_0_0 - Qdydt * Wy_2_1_0; + const auto jy_2_2_0 = jy_2_1_0 - Qdydt * Wy_2_2_0; + const auto jy_3_0_0 = - Qdydt * Wy_3_0_0; + const auto jy_3_1_0 = jy_3_0_0 - Qdydt * Wy_3_1_0; + const auto jy_3_2_0 = jy_3_1_0 - Qdydt * Wy_3_2_0; + + const auto jy_0_0_1 = - Qdydt * Wy_0_0_1; + const auto jy_0_1_1 = jy_0_0_1 - Qdydt * Wy_0_1_1; + const auto jy_0_2_1 = jy_0_1_1 - Qdydt * Wy_0_2_1; + const auto jy_1_0_1 = - Qdydt * Wy_1_0_1; + const auto jy_1_1_1 = jy_1_0_1 - Qdydt * Wy_1_1_1; + const auto jy_1_2_1 = jy_1_1_1 - Qdydt * Wy_1_2_1; + const auto jy_2_0_1 = - Qdydt * Wy_2_0_1; + const auto jy_2_1_1 = jy_2_0_1 - Qdydt * Wy_2_1_1; + const auto jy_2_2_1 = jy_2_1_1 - Qdydt * Wy_2_2_1; + const auto jy_3_0_1 = - Qdydt * Wy_3_0_1; + const auto jy_3_1_1 = jy_3_0_1 - Qdydt * Wy_3_1_1; + const auto jy_3_2_1 = jy_3_1_1 - Qdydt * Wy_3_2_1; + + const auto jy_0_0_2 = - Qdydt * Wy_0_0_2; + const auto jy_0_1_2 = jy_0_0_2 - Qdydt * Wy_0_1_2; + const auto jy_0_2_2 = jy_0_1_2 - Qdydt * Wy_0_2_2; + const auto jy_1_0_2 = - Qdydt * Wy_1_0_2; + const auto jy_1_1_2 = jy_1_0_2 - Qdydt * Wy_1_1_2; + const auto jy_1_2_2 = jy_1_1_2 - Qdydt * Wy_1_2_2; + const auto jy_2_0_2 = - Qdydt * Wy_2_0_2; + const auto jy_2_1_2 = jy_2_0_2 - Qdydt * Wy_2_1_2; + const auto jy_2_2_2 = jy_2_1_2 - Qdydt * Wy_2_2_2; + const auto jy_3_0_2 = - Qdydt * Wy_3_0_2; + const auto jy_3_1_2 = jy_3_0_2 - Qdydt * Wy_3_1_2; + const auto jy_3_2_2 = jy_3_1_2 - Qdydt * Wy_3_2_2; + + const auto jy_0_0_3 = - Qdydt * Wy_0_0_3; + const auto jy_0_1_3 = jy_0_0_3 - Qdydt * Wy_0_1_3; + const auto jy_0_2_3 = jy_0_1_3 - Qdydt * Wy_0_2_3; + const auto jy_1_0_3 = - Qdydt * Wy_1_0_3; + const auto jy_1_1_3 = jy_1_0_3 - Qdydt * Wy_1_1_3; + const auto jy_1_2_3 = jy_1_1_3 - Qdydt * Wy_1_2_3; + const auto jy_2_0_3 = - Qdydt * Wy_2_0_3; + const auto jy_2_1_3 = jy_2_0_3 - Qdydt * Wy_2_1_3; + const auto jy_2_2_3 = jy_2_1_3 - Qdydt * Wy_2_2_3; + const auto jy_3_0_3 = - Qdydt * Wy_3_0_3; + const auto jy_3_1_3 = jy_3_0_3 - Qdydt * Wy_3_1_3; + const auto jy_3_2_3 = jy_3_1_3 - Qdydt * Wy_3_2_3; + + /* + z - component + */ + const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + + const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + + const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + + const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + + // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + + const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + + const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + + const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + + // Unrolled loop for Wz[i][j][k] with i = 2 and interp_order + 2 = 4 + const auto Wz_2_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + const auto Wz_2_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + const auto Wz_2_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + + const auto Wz_2_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + const auto Wz_2_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + const auto Wz_2_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + + const auto Wz_2_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + const auto Wz_2_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + const auto Wz_2_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + + const auto Wz_2_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + const auto Wz_2_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + const auto Wz_2_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + + // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + + const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + + const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + + const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + + const real_t Qdzdt = coeff * inv_dt; + + const auto jz_0_0_0 = - Qdzdt * Wz_0_0_0; + const auto jz_0_0_1 = jz_0_0_0 - Qdzdt * Wz_0_0_1; + const auto jz_0_0_2 = jz_0_0_1 - Qdzdt * Wz_0_0_2; + const auto jz_0_1_0 = - Qdzdt * Wz_0_1_0; + const auto jz_0_1_1 = jz_0_1_0 - Qdzdt * Wz_0_1_1; + const auto jz_0_1_2 = jz_0_1_1 - Qdzdt * Wz_0_1_2; + const auto jz_0_2_0 = - Qdzdt * Wz_0_2_0; + const auto jz_0_2_1 = jz_0_2_0 - Qdzdt * Wz_0_2_1; + const auto jz_0_2_2 = jz_0_2_1 - Qdzdt * Wz_0_2_2; + const auto jz_0_3_0 = - Qdzdt * Wz_0_3_0; + const auto jz_0_3_1 = jz_0_3_0 - Qdzdt * Wz_0_3_1; + const auto jz_0_3_2 = jz_0_3_1 - Qdzdt * Wz_0_3_2; + + const auto jz_1_0_0 = - Qdzdt * Wz_1_0_0; + const auto jz_1_0_1 = jz_1_0_0 - Qdzdt * Wz_1_0_1; + const auto jz_1_0_2 = jz_1_0_1 - Qdzdt * Wz_1_0_2; + const auto jz_1_1_0 = - Qdzdt * Wz_1_1_0; + const auto jz_1_1_1 = jz_1_1_0 - Qdzdt * Wz_1_1_1; + const auto jz_1_1_2 = jz_1_1_1 - Qdzdt * Wz_1_1_2; + const auto jz_1_2_0 = - Qdzdt * Wz_1_2_0; + const auto jz_1_2_1 = jz_1_2_0 - Qdzdt * Wz_1_2_1; + const auto jz_1_2_2 = jz_1_2_1 - Qdzdt * Wz_1_2_2; + const auto jz_1_3_0 = - Qdzdt * Wz_1_3_0; + const auto jz_1_3_1 = jz_1_3_0 - Qdzdt * Wz_1_3_1; + const auto jz_1_3_2 = jz_1_3_1 - Qdzdt * Wz_1_3_2; + + const auto jz_2_0_0 = - Qdzdt * Wz_2_0_0; + const auto jz_2_0_1 = jz_2_0_0 - Qdzdt * Wz_2_0_1; + const auto jz_2_0_2 = jz_2_0_1 - Qdzdt * Wz_2_0_2; + const auto jz_2_1_0 = - Qdzdt * Wz_2_1_0; + const auto jz_2_1_1 = jz_2_1_0 - Qdzdt * Wz_2_1_1; + const auto jz_2_1_2 = jz_2_1_1 - Qdzdt * Wz_2_1_2; + const auto jz_2_2_0 = - Qdzdt * Wz_2_2_0; + const auto jz_2_2_1 = jz_2_2_0 - Qdzdt * Wz_2_2_1; + const auto jz_2_2_2 = jz_2_2_1 - Qdzdt * Wz_2_2_2; + const auto jz_2_3_0 = - Qdzdt * Wz_2_3_0; + const auto jz_2_3_1 = jz_2_3_0 - Qdzdt * Wz_2_3_1; + const auto jz_2_3_2 = jz_2_3_1 - Qdzdt * Wz_2_3_2; + + const auto jz_3_0_0 = - Qdzdt * Wz_3_0_0; + const auto jz_3_0_1 = jz_3_0_0 - Qdzdt * Wz_3_0_1; + const auto jz_3_0_2 = jz_3_0_1 - Qdzdt * Wz_3_0_2; + const auto jz_3_1_0 = - Qdzdt * Wz_3_1_0; + const auto jz_3_1_1 = jz_3_1_0 - Qdzdt * Wz_3_1_1; + const auto jz_3_1_2 = jz_3_1_1 - Qdzdt * Wz_3_1_2; + const auto jz_3_2_0 = - Qdzdt * Wz_3_2_0; + const auto jz_3_2_1 = jz_3_2_0 - Qdzdt * Wz_3_2_1; + const auto jz_3_2_2 = jz_3_2_1 - Qdzdt * Wz_3_2_2; + const auto jz_3_3_0 = - Qdzdt * Wz_3_3_0; + const auto jz_3_3_1 = jz_3_3_0 - Qdzdt * Wz_3_3_1; + const auto jz_3_3_2 = jz_3_3_1 - Qdzdt * Wz_3_3_2; + + + /* + Current update + */ + auto J_acc = J.access(); + + J_acc(ix_min, iy_min, iz_min, cur::jx1) += jx_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += jx_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += jx_0_0_2; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += jx_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += jx_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += jx_0_1_2; + J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += jx_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += jx_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += jx_0_2_2; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += jx_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += jx_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += jx_1_0_2; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += jx_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += jx_1_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += jx_1_1_2; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += jx_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += jx_1_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += jx_1_2_2; + + if (update_x2) + { + J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += jx_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += jx_2_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += jx_2_0_2; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += jx_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += jx_2_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += jx_2_1_2; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += jx_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += jx_2_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += jx_2_2_2; + + if (update_y2) + { + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += jx_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += jx_2_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += jx_2_3_2; + } + + if (update_z2) + { + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += jx_2_0_3; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += jx_2_1_3; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += jx_2_2_3; + + if (update_y2) + { + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += jx_2_3_3; + } + } + } // - // J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; + if (update_y2) + { + J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += jx_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += jx_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += jx_0_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += jx_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += jx_1_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += jx_1_3_2; + } + + if (update_z2) + { + J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += jx_0_0_3; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += jx_0_1_3; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += jx_0_2_3; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += jx_1_0_3; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += jx_1_1_3; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += jx_1_2_3; + + if (update_y2) + { + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += jx_0_3_3; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += jx_1_3_3; + } + } + + + /* + y-component + */ + J_acc(ix_min, iy_min, iz_min, cur::jx2) += jy_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += jy_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += jy_0_0_2; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += jy_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += jy_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += jy_0_1_2; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += jy_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += jy_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += jy_1_0_2; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += jy_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += jy_1_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += jy_1_1_2; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += jy_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += jy_2_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += jy_2_0_2; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += jy_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += jy_2_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += jy_2_1_2; + + if (update_x2) + { + J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += jy_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += jy_3_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += jy_3_0_2; + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += jy_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += jy_3_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += jy_3_1_2; + + if (update_z2) + { + J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += jy_3_0_3; + J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += jy_3_1_3; + } + } + + if (update_y2) + { + J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += jy_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += jy_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += jy_0_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += jy_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += jy_1_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += jy_1_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += jy_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += jy_2_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += jy_2_2_2; + + if (update_x2) + { + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += jy_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += jy_3_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += jy_3_2_2; + + if (update_z2) + { + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += jy_2_2_3; + J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += jy_3_2_3; + } + } + + if (update_z2) + { + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += jy_0_2_3; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += jy_1_2_3; + } + } + + if (update_z2) + { + J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += jy_0_0_3; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += jy_0_1_3; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += jy_1_0_3; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += jy_1_1_3; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += jy_2_0_3; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += jy_2_1_3; + } + + /* + z-component + */ + J_acc(ix_min, iy_min, iz_min, cur::jx3) += jz_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += jz_0_0_1; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += jz_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += jz_0_1_1; + J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += jz_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += jz_0_2_1; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += jz_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += jz_1_0_1; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += jz_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += jz_1_1_1; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += jz_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += jz_1_2_1; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += jz_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += jz_2_0_1; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += jz_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += jz_2_1_1; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += jz_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += jz_2_2_1; + + if (update_x2) + { + J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += jz_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += jz_3_0_1; + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += jz_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += jz_3_1_1; + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += jz_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += jz_3_2_1; + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += jz_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += jz_3_3_1; + } + + if (update_y2) + { + J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += jz_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += jz_0_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += jz_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += jz_1_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += jz_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += jz_2_3_1; + } + + if (update_z2) + { + J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += jz_0_0_2; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += jz_0_1_2; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += jz_0_2_2; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += jz_1_0_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += jz_1_1_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += jz_1_2_2; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += jz_2_0_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += jz_2_1_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += jz_2_2_2; + + if (update_x2) + { + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += jz_3_0_2; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += jz_3_1_2; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += jz_3_2_2; + + if (update_y2) + { + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += jz_3_3_2; + } + } + + if (update_y2) + { + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += jz_0_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += jz_1_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += jz_2_3_2; + } + } + // clang-format on } // dimension } else if constexpr (O == 3u) { From 7e52c03a44dd0bbdea196bb6ebea0184621e14b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 22:31:05 -0500 Subject: [PATCH 28/82] removed redundant terms --- src/kernels/currents_deposit.hpp | 106 +------------------------------ 1 file changed, 1 insertion(+), 105 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 0a024f71..257e3e8f 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -1110,58 +1110,6 @@ namespace kernel { const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); const real_t Qdxdt = coeff * inv_dt; @@ -1259,19 +1207,6 @@ namespace kernel { (S0x_0 * S0z_3 + S1x_0 * S1z_3 + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * (S0x_1 * S0z_0 + S1x_1 * S1z_0 + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); @@ -1310,20 +1245,7 @@ namespace kernel { const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * (S0x_1 * S0z_3 + S1x_1 * S1z_3 + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - + const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * (S0x_2 * S0z_0 + S1x_2 * S1z_0 + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); @@ -1363,19 +1285,6 @@ namespace kernel { (S0x_2 * S0z_3 + S1x_2 * S1z_3 + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * (S0x_3 * S0z_0 + S1x_3 * S1z_0 + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); @@ -1415,19 +1324,6 @@ namespace kernel { (S0x_3 * S0z_3 + S1x_3 * S1z_3 + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - const real_t Qdydt = coeff * inv_dt; const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; From f1684262b0291663245092e6a3cd7b760f14df5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 23 May 2025 15:08:14 -0500 Subject: [PATCH 29/82] 2nd order particle pusher --- src/kernels/particle_pusher_sr.hpp | 485 +++++++++++++++++++++++++++++ 1 file changed, 485 insertions(+) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 831d070e..2d89f587 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -476,6 +476,9 @@ namespace kernel::sr { bool is_gca { false }; getInterpFlds(p, ei, bi); + // ToDo: Better way to call this + //getInterpFlds2nd(p, ei, bi); + metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); if (cooling != 0) { @@ -1090,6 +1093,488 @@ namespace kernel::sr { } } + Inline void getInterpFlds2nd(index_t& p, + vec_t& e0, + vec_t& b0) const { + if constexpr (D == Dim::_1D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + + // Compute weights for second-order interpolation + real_t w0 = HALF * SQR(HALF - dx1_); + real_t w1 = static_cast(0.75) - SQR(dx1_); + real_t w2 = HALF * SQR(HALF + dx1_); + + // Ex1 (dual grid) + real_t c0 = EB(i - 1, em::ex1); // First grid point + real_t c1 = EB(i, em::ex1); // Second grid point + real_t c2 = EB(i + 1, em::ex1); // Third grid point + e0[0] = c0 * w0 + c1 * w1 + c2 * w2; + + // Ex2 (primal grid) + c0 = EB(i - 1, em::ex2); // First grid point + c1 = EB(i, em::ex2); // Second grid point + c2 = EB(i + 1, em::ex2); // Third grid point + e0[1] = c0 * w0 + c1 * w1 + c2 * w2; + + // Ex3 (primal grid) + c0 = EB(i - 1, em::ex3); // First grid point + c1 = EB(i, em::ex3); // Second grid point + c2 = EB(i + 1, em::ex3); // Third grid point + e0[2] = c0 * w0 + c1 * w1 + c2 * w2; + + // Bx1 (primal grid) + c0 = EB(i - 1, em::bx1); // First grid point + c1 = EB(i, em::bx1); // Second grid point + c2 = EB(i + 1, em::bx1); // Third grid point + b0[0] = c0 * w0 + c1 * w1 + c2 * w2; + + // Bx2 (dual grid) + c0 = EB(i - 2, em::bx2); // First grid point + c1 = EB(i - 1, em::bx2); // Second grid point + c2 = EB(i, em::bx2); // Third grid point + b0[1] = c0 * w0 + c1 * w1 + c2 * w2; + + // Bx3 (dual grid) + c0 = EB(i - 2, em::bx3); // First grid point + c1 = EB(i - 1, em::bx3); // Second grid point + c2 = EB(i, em::bx3); // Third grid point + b0[2] = c0 * w0 + c1 * w1 + c2 * w2; + + } else if constexpr (D == Dim::_2D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + + // Compute weights for second-order interpolation + real_t w0x = HALF * SQR(HALF - dx1_); + real_t w1x = static_cast(0.75) - SQR(dx1_); + real_t w2x = HALF * SQR(HALF + dx1_); + + real_t w0y = HALF * SQR(HALF - dx2_); + real_t w1y = static_cast(0.75) - SQR(dx2_); + real_t w2y = HALF * SQR(HALF + dx2_); + + // Ex1 + // Interpolate --- (dual, primal) + real_t c000 = EB(i - 1, j - 1, em::ex1); + real_t c100 = EB(i, j - 1, em::ex1); + real_t c200 = EB(i + 1, j - 1, em::ex1); + real_t c010 = EB(i - 1, j, em::ex1); + real_t c110 = EB(i, j, em::ex1); + real_t c210 = EB(i + 1, j, em::ex1); + real_t c020 = EB(i - 1, j + 1, em::ex1); + real_t c120 = EB(i, j + 1, em::ex1); + real_t c220 = EB(i + 1, j + 1, em::ex1); + + real_t c0 = c000 * w0x + c100 * w1x + c200 * w2x; + real_t c1 = c010 * w0x + c110 * w1x + c210 * w2x; + real_t c2 = c020 * w0x + c120 * w1x + c220 * w2x; + e0[0] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Ex2 + // Interpolate --- (primal, dual) + c000 = EB(i - 1, j - 1, em::ex2); + c100 = EB(i, j - 1, em::ex2); + c200 = EB(i + 1, j - 1, em::ex2); + c010 = EB(i - 1, j, em::ex2); + c110 = EB(i, j, em::ex2); + c210 = EB(i + 1, j, em::ex2); + c020 = EB(i - 1, j + 1, em::ex2); + c120 = EB(i, j + 1, em::ex2); + c220 = EB(i + 1, j + 1, em::ex2); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + e0[1] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Ex3 + // Interpolate --- (primal, primal) + c000 = EB(i - 1, j - 1, em::ex3); + c100 = EB(i, j - 1, em::ex3); + c200 = EB(i + 1, j - 1, em::ex3); + c010 = EB(i - 1, j, em::ex3); + c110 = EB(i, j, em::ex3); + c210 = EB(i + 1, j, em::ex3); + c020 = EB(i - 1, j + 1, em::ex3); + c120 = EB(i, j + 1, em::ex3); + c220 = EB(i + 1, j + 1, em::ex3); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + e0[2] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Bx1 + // Interpolate --- (primal, dual) + c000 = EB(i - 1, j - 1, em::bx1); + c100 = EB(i, j - 1, em::bx1); + c200 = EB(i + 1, j - 1, em::bx1); + c010 = EB(i - 1, j, em::bx1); + c110 = EB(i, j, em::bx1); + c210 = EB(i + 1, j, em::bx1); + c020 = EB(i - 1, j + 1, em::bx1); + c120 = EB(i, j + 1, em::bx1); + c220 = EB(i + 1, j + 1, em::bx1); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + b0[0] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Bx2 + // Interpolate --- (dual, primal) + c000 = EB(i - 1, j - 1, em::bx2); + c100 = EB(i, j - 1, em::bx2); + c200 = EB(i + 1, j - 1, em::bx2); + c010 = EB(i - 1, j, em::bx2); + c110 = EB(i, j, em::bx2); + c210 = EB(i + 1, j, em::bx2); + c020 = EB(i - 1, j + 1, em::bx2); + c120 = EB(i, j + 1, em::bx2); + c220 = EB(i + 1, j + 1, em::bx2); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + b0[1] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Bx3 + // Interpolate --- (dual, dual) + c000 = EB(i - 1, j - 1, em::bx3); + c100 = EB(i, j - 1, em::bx3); + c200 = EB(i + 1, j - 1, em::bx3); + c010 = EB(i - 1, j, em::bx3); + c110 = EB(i, j, em::bx3); + c210 = EB(i + 1, j, em::bx3); + c020 = EB(i - 1, j + 1, em::bx3); + c120 = EB(i, j + 1, em::bx3); + c220 = EB(i + 1, j + 1, em::bx3); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + b0[2] = c0 * w0y + c1 * w1y + c2 * w2y; + + } else if constexpr (D == Dim::_3D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const int k { i3(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + const auto dx3_ { static_cast(dx3(p)) }; + + // Compute weights for second-order interpolation + real_t w0x = HALF * SQR(HALF - dx1_); + real_t w1x = static_cast(0.75) - SQR(dx1_); + real_t w2x = HALF * SQR(HALF + dx1_); + + real_t w0y = HALF * SQR(HALF - dx2_); + real_t w1y = static_cast(0.75) - SQR(dx2_); + real_t w2y = HALF * SQR(HALF + dx2_); + + real_t w0z = HALF * SQR(HALF - dx3_); + real_t w1z = static_cast(0.75) - SQR(dx3_); + real_t w2z = HALF * SQR(HALF + dx3_); + + // Ex1 + // Interpolate --- (dual, primal, primal) + real_t c000 = EB(i - 1, j - 1, k - 1, em::ex1); + real_t c100 = EB(i, j - 1, k - 1, em::ex1); + real_t c200 = EB(i + 1, j - 1, k - 1, em::ex1); + real_t c010 = EB(i - 1, j, k - 1, em::ex1); + real_t c110 = EB(i, j, k - 1, em::ex1); + real_t c210 = EB(i + 1, j, k - 1, em::ex1); + real_t c020 = EB(i - 1, j + 1, k - 1, em::ex1); + real_t c120 = EB(i, j + 1, k - 1, em::ex1); + real_t c220 = EB(i + 1, j + 1, k - 1, em::ex1); + + real_t c001 = EB(i - 1, j - 1, k, em::ex1); + real_t c101 = EB(i, j - 1, k, em::ex1); + real_t c201 = EB(i + 1, j - 1, k, em::ex1); + real_t c011 = EB(i - 1, j, k, em::ex1); + real_t c111 = EB(i, j, k, em::ex1); + real_t c211 = EB(i + 1, j, k, em::ex1); + real_t c021 = EB(i - 1, j + 1, k, em::ex1); + real_t c121 = EB(i, j + 1, k, em::ex1); + real_t c221 = EB(i + 1, j + 1, k, em::ex1); + + real_t c002 = EB(i - 1, j - 1, k + 1, em::ex1); + real_t c102 = EB(i, j - 1, k + 1, em::ex1); + real_t c202 = EB(i + 1, j - 1, k + 1, em::ex1); + real_t c012 = EB(i - 1, j, k + 1, em::ex1); + real_t c112 = EB(i, j, k + 1, em::ex1); + real_t c212 = EB(i + 1, j, k + 1, em::ex1); + real_t c022 = EB(i - 1, j + 1, k + 1, em::ex1); + real_t c122 = EB(i, j + 1, k + 1, em::ex1); + real_t c222 = EB(i + 1, j + 1, k + 1, em::ex1); + + real_t c0 = c000 * w0x + c100 * w1x + c200 * w2x; + real_t c1 = c010 * w0x + c110 * w1x + c210 * w2x; + real_t c2 = c020 * w0x + c120 * w1x + c220 * w2x; + real_t c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + real_t c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + real_t c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + e0[0] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Ex2 + // Interpolate -- (primal, dual, primal) + c000 = EB(i - 1, j - 1, k - 1, em::ex2); + c100 = EB(i, j - 1, k - 1, em::ex2); + c200 = EB(i + 1, j - 1, k - 1, em::ex2); + c010 = EB(i - 1, j, k - 1, em::ex2); + c110 = EB(i, j, k - 1, em::ex2); + c210 = EB(i + 1, j, k - 1, em::ex2); + c020 = EB(i - 1, j + 1, k - 1, em::ex2); + c120 = EB(i, j + 1, k - 1, em::ex2); + c220 = EB(i + 1, j + 1, k - 1, em::ex2); + + c001 = EB(i - 1, j - 1, k, em::ex2); + c101 = EB(i, j - 1, k, em::ex2); + c201 = EB(i + 1, j - 1, k, em::ex2); + c011 = EB(i - 1, j, k, em::ex2); + c111 = EB(i, j, k, em::ex2); + c211 = EB(i + 1, j, k, em::ex2); + c021 = EB(i - 1, j + 1, k, em::ex2); + c121 = EB(i, j + 1, k, em::ex2); + c221 = EB(i + 1, j + 1, k, em::ex2); + + c002 = EB(i - 1, j - 1, k + 1, em::ex2); + c102 = EB(i, j - 1, k + 1, em::ex2); + c202 = EB(i + 1, j - 1, k + 1, em::ex2); + c012 = EB(i - 1, j, k + 1, em::ex2); + c112 = EB(i, j, k + 1, em::ex2); + c212 = EB(i + 1, j, k + 1, em::ex2); + c022 = EB(i - 1, j + 1, k + 1, em::ex2); + c122 = EB(i, j + 1, k + 1, em::ex2); + c222 = EB(i + 1, j + 1, k + 1, em::ex2); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + e0[1] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Ex3 + // Interpolate -- (primal, primal, dual) + c000 = EB(i - 1, j - 1, k - 1, em::ex3); + c100 = EB(i, j - 1, k - 1, em::ex3); + c200 = EB(i + 1, j - 1, k - 1, em::ex3); + c010 = EB(i - 1, j, k - 1, em::ex3); + c110 = EB(i, j, k - 1, em::ex3); + c210 = EB(i + 1, j, k - 1, em::ex3); + c020 = EB(i - 1, j + 1, k - 1, em::ex3); + c120 = EB(i, j + 1, k - 1, em::ex3); + c220 = EB(i + 1, j + 1, k - 1, em::ex3); + + c001 = EB(i - 1, j - 1, k, em::ex3); + c101 = EB(i, j - 1, k, em::ex3); + c201 = EB(i + 1, j - 1, k, em::ex3); + c011 = EB(i - 1, j, k, em::ex3); + c111 = EB(i, j, k, em::ex3); + c211 = EB(i + 1, j, k, em::ex3); + c021 = EB(i - 1, j + 1, k, em::ex3); + c121 = EB(i, j + 1, k, em::ex3); + c221 = EB(i + 1, j + 1, k, em::ex3); + + c002 = EB(i - 1, j - 1, k + 1, em::ex3); + c102 = EB(i, j - 1, k + 1, em::ex3); + c202 = EB(i + 1, j - 1, k + 1, em::ex3); + c012 = EB(i - 1, j, k + 1, em::ex3); + c112 = EB(i, j, k + 1, em::ex3); + c212 = EB(i + 1, j, k + 1, em::ex3); + c022 = EB(i - 1, j + 1, k + 1, em::ex3); + c122 = EB(i, j + 1, k + 1, em::ex3); + c222 = EB(i + 1, j + 1, k + 1, em::ex3); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + e0[2] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Bx1 + // Interpolate -- (primal, dual, dual) + c000 = EB(i - 1, j - 1, k - 1, em::bx1); + c100 = EB(i, j - 1, k - 1, em::bx1); + c200 = EB(i + 1, j - 1, k - 1, em::bx1); + c010 = EB(i - 1, j, k - 1, em::bx1); + c110 = EB(i, j, k - 1, em::bx1); + c210 = EB(i + 1, j, k - 1, em::bx1); + c020 = EB(i - 1, j + 1, k - 1, em::bx1); + c120 = EB(i, j + 1, k - 1, em::bx1); + c220 = EB(i + 1, j + 1, k - 1, em::bx1); + + c001 = EB(i - 1, j - 1, k, em::bx1); + c101 = EB(i, j - 1, k, em::bx1); + c201 = EB(i + 1, j - 1, k, em::bx1); + c011 = EB(i - 1, j, k, em::bx1); + c111 = EB(i, j, k, em::bx1); + c211 = EB(i + 1, j, k, em::bx1); + c021 = EB(i - 1, j + 1, k, em::bx1); + c121 = EB(i, j + 1, k, em::bx1); + c221 = EB(i + 1, j + 1, k, em::bx1); + + c002 = EB(i - 1, j - 1, k + 1, em::bx1); + c102 = EB(i, j - 1, k + 1, em::bx1); + c202 = EB(i + 1, j - 1, k + 1, em::bx1); + c012 = EB(i - 1, j, k + 1, em::bx1); + c112 = EB(i, j, k + 1, em::bx1); + c212 = EB(i + 1, j, k + 1, em::bx1); + c022 = EB(i - 1, j + 1, k + 1, em::bx1); + c122 = EB(i, j + 1, k + 1, em::bx1); + c222 = EB(i + 1, j + 1, k + 1, em::bx1); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + b0[0] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Bx2 + // Interpolate -- (dual, primal, dual) + c000 = EB(i - 1, j - 1, k - 1, em::bx2); + c100 = EB(i, j - 1, k - 1, em::bx2); + c200 = EB(i + 1, j - 1, k - 1, em::bx2); + c010 = EB(i - 1, j, k - 1, em::bx2); + c110 = EB(i, j, k - 1, em::bx2); + c210 = EB(i + 1, j, k - 1, em::bx2); + c020 = EB(i - 1, j + 1, k - 1, em::bx2); + c120 = EB(i, j + 1, k - 1, em::bx2); + c220 = EB(i + 1, j + 1, k - 1, em::bx2); + + c001 = EB(i - 1, j - 1, k, em::bx2); + c101 = EB(i, j - 1, k, em::bx2); + c201 = EB(i + 1, j - 1, k, em::bx2); + c011 = EB(i - 1, j, k, em::bx2); + c111 = EB(i, j, k, em::bx2); + c211 = EB(i + 1, j, k, em::bx2); + c021 = EB(i - 1, j + 1, k, em::bx2); + c121 = EB(i, j + 1, k, em::bx2); + c221 = EB(i + 1, j + 1, k, em::bx2); + + c002 = EB(i - 1, j - 1, k + 1, em::bx2); + c102 = EB(i, j - 1, k + 1, em::bx2); + c202 = EB(i + 1, j - 1, k + 1, em::bx2); + c012 = EB(i - 1, j, k + 1, em::bx2); + c112 = EB(i, j, k + 1, em::bx2); + c212 = EB(i + 1, j, k + 1, em::bx2); + c022 = EB(i - 1, j + 1, k + 1, em::bx2); + c122 = EB(i, j + 1, k + 1, em::bx2); + c222 = EB(i + 1, j + 1, k + 1, em::bx2); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + b0[1] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Bx3 + // Interpolate -- (dual, dual, primal) + c000 = EB(i - 1, j - 1, k - 1, em::bx3); + c100 = EB(i, j - 1, k - 1, em::bx3); + c200 = EB(i + 1, j - 1, k - 1, em::bx3); + c010 = EB(i - 1, j, k - 1, em::bx3); + c110 = EB(i, j, k - 1, em::bx3); + c210 = EB(i + 1, j, k - 1, em::bx3); + c020 = EB(i - 1, j + 1, k - 1, em::bx3); + c120 = EB(i, j + 1, k - 1, em::bx3); + c220 = EB(i + 1, j + 1, k - 1, em::bx3); + + c001 = EB(i - 1, j - 1, k, em::bx3); + c101 = EB(i, j - 1, k, em::bx3); + c201 = EB(i + 1, j - 1, k, em::bx3); + c011 = EB(i - 1, j, k, em::bx3); + c111 = EB(i, j, k, em::bx3); + c211 = EB(i + 1, j, k, em::bx3); + c021 = EB(i - 1, j + 1, k, em::bx3); + c121 = EB(i, j + 1, k, em::bx3); + c221 = EB(i + 1, j + 1, k, em::bx3); + + c002 = EB(i - 1, j - 1, k + 1, em::bx3); + c102 = EB(i, j - 1, k + 1, em::bx3); + c202 = EB(i + 1, j - 1, k + 1, em::bx3); + c012 = EB(i - 1, j, k + 1, em::bx3); + c112 = EB(i, j, k + 1, em::bx3); + c212 = EB(i + 1, j, k + 1, em::bx3); + c022 = EB(i - 1, j + 1, k + 1, em::bx3); + c122 = EB(i, j + 1, k + 1, em::bx3); + c222 = EB(i + 1, j + 1, k + 1, em::bx3); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + b0[2] = c00 * w0z + c01 * w1z + c02 * w2z; + } + } + // Extra Inline void boundaryConditions(index_t& p, coord_t& xp) const { if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { From 6eabea9cb9be45c70e5c5e7be0fe61c7670187cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 24 May 2025 18:11:30 -0500 Subject: [PATCH 30/82] bugfix in Jz --- src/kernels/currents_deposit.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 257e3e8f..76743f44 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -768,7 +768,7 @@ namespace kernel { const real_t Qdxdt = coeff * inv_dt; const real_t Qdydt = coeff * inv_dt; - const real_t QVz = coeff * inv_dt * vp[2]; + const real_t QVz = coeff * vp[2]; // Esirkepov - Eq. 39 // x-component From 6a9ff16e5d7ec3bb2793f6f90b78de0a7805f5b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Mon, 26 May 2025 13:27:32 -0500 Subject: [PATCH 31/82] update to deposit test --- src/kernels/tests/deposit.cpp | 406 +++++++++++++++++++++++++++++----- 1 file changed, 353 insertions(+), 53 deletions(-) diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index d64e4bb2..bdfbb26a 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -81,12 +81,16 @@ void testDeposit(const std::vector& res, array_t tag { "tag", 10 }; const real_t charge { 1.0 }, inv_dt { 1.0 }; - const int i0 = 4, j0 = 4; + const int i0 = 3, j0 = 3; + const int i0f = 3, j0f = 3; + const real_t uz = 0.5; - const prtldx_t dxi = 0.53, dxf = 0.47; - const prtldx_t dyi = 0.34, dyf = 0.52; - const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; - const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; + // const prtldx_t dxi = 0.53, dxf = 0.47; + // const prtldx_t dyi = 0.34, dyf = 0.52; + const prtldx_t dxi = 0.65, dxf = 0.65; + const prtldx_t dyi = 0.65, dyf = 0.65; + const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0f + (real_t)dxf; + const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0f + (real_t)dyf; const real_t xr = 0.5 * (xi + xf); const real_t yr = 0.5 * (yi + yf); @@ -103,20 +107,33 @@ void testDeposit(const std::vector& res, const real_t Fy1 = (yr - yi); const real_t Fy2 = (yf - yr); + const real_t Fz1 = HALF * uz / math::sqrt(1.0 + uz * uz); + const real_t Fz2 = HALF * uz / math::sqrt(1.0 + uz * uz); + const real_t Jx1 = Fx1 * (1 - Wy1) + Fx2 * (1 - Wy2); const real_t Jx2 = Fx1 * Wy1 + Fx2 * Wy2; const real_t Jy1 = Fy1 * (1 - Wx1) + Fy2 * (1 - Wx2); const real_t Jy2 = Fy1 * Wx1 + Fy2 * Wx2; - put_value(i1, i0, 0); - put_value(i2, j0, 0); + const real_t Jz = Fz1 * (1 - Wx1) + Fz2 * (1 - Wy1) + + Fz1 * Wx1 * (1 - Wy1) + + Fz1 * (1 - Wx1) * Wy1 + + Fz1 * Wx1 * Wy1 + + Fz2 * (1 - Wx2) * (1 - Wy2) + + Fz2 * Wx2 * (1 - Wy2) + + Fz2 * (1 - Wx2) * Wy2 + + Fz2 * Wx2 * Wy2; + + put_value(i1, i0f, 0); + put_value(i2, j0f, 0); put_value(i1_prev, i0, 0); put_value(i2_prev, j0, 0); put_value(dx1, dxf, 0); put_value(dx2, dyf, 0); put_value(dx1_prev, dxi, 0); put_value(dx2_prev, dyi, 0); + put_value(ux3, uz, 0); put_value(weight, 1.0, 0); put_value(tag, ParticleTag::alive, 0); @@ -124,7 +141,7 @@ void testDeposit(const std::vector& res, // clang-format off Kokkos::parallel_for("CurrentsDeposit", 10, - kernel::DepositCurrents_kernel(J_scat, + kernel::DepositCurrents_kernel(J_scat, i1, i2, i3, i1_prev, i2_prev, i3_prev, dx1, dx2, dx3, @@ -140,7 +157,7 @@ void testDeposit(const std::vector& res, { N_GHOSTS, N_GHOSTS }, { nx1 + N_GHOSTS, nx2 + N_GHOSTS }); - real_t SumDivJ = ZERO, SumJx = ZERO, SumJy = ZERO; + real_t SumDivJ = ZERO, SumJx = ZERO, SumJy = ZERO, SumJz = ZERO; Kokkos::parallel_reduce( "SumDivJ", range, @@ -162,6 +179,12 @@ void testDeposit(const std::vector& res, Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); }, SumJy); + Kokkos::parallel_reduce( + "SumJy", + range, + Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx3); }, + SumJz); + auto J_h = Kokkos::create_mirror_view(J); Kokkos::deep_copy(J_h, J); @@ -171,6 +194,7 @@ void testDeposit(const std::vector& res, std::cout << "SumJx: " << SumJx << " expected " << Jx1 + Jx2 << std::endl; std::cout << "SumJy: " << SumJy << " expected " << Jy1 + Jy2 << std::endl; + std::cout << "SumJz: " << SumJz << " expected " << Jz << std::endl; // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx1), Jx1, "", acc), // "DepositCurrents_kernel::Jx1 is incorrect"); // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + 1 + N_GHOSTS, cur::jx1), Jx2, "", acc), @@ -181,6 +205,282 @@ void testDeposit(const std::vector& res, // "DepositCurrents_kernel::Jy2 is incorrect"); } +// void ind_pond(real_t Rcoord, int* Iindices, real_t* Rpond) { + +// // Assuming interp_order is an integer and Rcoord is a double +// int i_min = std::floor(Rcoord - HALF); + +// // Populate Iindices +// for (int i = 0; i < 3; ++i) { +// Iindices[i] = i_min + i; +// } + +// // Eq. 24 +// Rpond[0] = 0.5 * std::pow(0.5 + (static_cast(Iindices[1]) - Rcoord), 2); +// Rpond[1] = 0.75 - std::pow(static_cast(Iindices[1]) - Rcoord, 2); +// Rpond[2] = 0.5 * std::pow(0.5 - (static_cast(Iindices[1]) - Rcoord), 2); +// } + +// template +// void testDeposit_2nd(const std::vector& res, +// const boundaries_t& ext, +// const std::map& params = {}, +// const real_t acc = ONE) { +// static_assert(M::Dim == 2); +// errorIf(res.size() != M::Dim, "res.size() != M::Dim"); +// using namespace ntt; + +// M metric { res, ext, params }; + +// const auto nx1 = res[0]; +// const auto nx2 = res[1]; + +// ndfield_t J { "J", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; +// array_t i1 { "i1", 10 }; +// array_t i2 { "i2", 10 }; +// array_t i3 { "i3", 10 }; +// array_t i1_prev { "i1_prev", 10 }; +// array_t i2_prev { "i2_prev", 10 }; +// array_t i3_prev { "i3_prev", 10 }; +// array_t dx1 { "dx1", 10 }; +// array_t dx2 { "dx2", 10 }; +// array_t dx3 { "dx3", 10 }; +// array_t dx1_prev { "dx1_prev", 10 }; +// array_t dx2_prev { "dx2_prev", 10 }; +// array_t dx3_prev { "dx3_prev", 10 }; +// array_t ux1 { "ux1", 10 }; +// array_t ux2 { "ux2", 10 }; +// array_t ux3 { "ux3", 10 }; +// array_t phi { "phi", 10 }; +// array_t weight { "weight", 10 }; +// array_t tag { "tag", 10 }; +// const real_t charge { 1.0 }, inv_dt { 1.0 }; + +// const int i0 = 4, j0 = 4; + +// // initial and final positions +// const prtldx_t dxi = 0.53, dxf = 0.47; +// const prtldx_t dyi = 0.34, dyf = 0.52; +// const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; +// const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; + +// // const real_t xr = 0.5 * (xi + xf); +// // const real_t yr = 0.5 * (yi + yf); + +// // const real_t Wx1 = 0.5 * (xi + xr) - (real_t)i0; +// // const real_t Wx2 = 0.5 * (xf + xr) - (real_t)i0; + +// // const real_t Wy1 = 0.5 * (yi + yr) - (real_t)j0; +// // const real_t Wy2 = 0.5 * (yf + yr) - (real_t)j0; + +// // const real_t Fx1 = (xr - xi); +// // const real_t Fx2 = (xf - xr); + +// // const real_t Fy1 = (yr - yi); +// // const real_t Fy2 = (yf - yr); + +// // const real_t Jx1 = Fx1 * (1 - Wy1) + Fx2 * (1 - Wy2); +// // const real_t Jx2 = Fx1 * Wy1 + Fx2 * Wy2; + +// // const real_t Jy1 = Fy1 * (1 - Wx1) + Fy2 * (1 - Wx2); +// // const real_t Jy2 = Fy1 * Wx1 + Fy2 * Wx2; + +// // Define interp_order +// constexpr int interp_order = 2; +// const real_t aux_jx = 1.0; +// const real_t aux_jy = 1.0; +// const real_t aux_jz = 1.0; + +// // Arrays with size (interp_order + 1) +// std::array ISx1, ISx2; +// std::array PondSx1, PondSx2; +// std::array ISy1, ISy2; +// std::array PondSy1, PondSy2; + +// // 2D arrays with size (interp_order + 2) x (interp_order + 2) +// std::array, interp_order + 2> WEsirkx, +// WEsirky, WEsirkz; +// std::array, interp_order + 2> jx_local, +// jy_local; + +// std::array, 10> jx, jy, jz; +// std::fill(jx.begin(), jx.end(), 0.0); +// std::fill(jy.begin(), jy.end(), 0.0); +// std::fill(jz.begin(), jz.end(), 0.0); +// // 1D arrays with size (interp_order + 2) +// std::array Sx2, Sx1, Sy2, Sy1; + +// // Interpolation coefficients +// ind_pond(xi, &ISx1, &PondSx1); +// ind_pond(xf, &ISx2, &PondSx2); +// ind_pond(yi, &ISy1, &PondSy1); +// ind_pond(yf, &ISy2, &PondSy2); + +// int min_x, max_x; +// int min_y, max_y; + +// // Esirkepov coefficients W +// int shift_Ix = ISx2[0] - ISx1[0]; +// std::fill(Sx2.begin(), Sx2.end(), 0.0); +// std::fill(Sx1.begin(), Sx1.end(), 0.0); + +// if (shift_Ix == 0) { +// std::copy(PondSx2.begin(), PondSx2.end(), Sx2.begin()); +// std::copy(PondSx1.begin(), PondSx1.end(), Sx1.begin()); +// min_x = ISx2[0]; +// max_x = ISx2[interp_order]; +// } else if (shift_Ix == 1) { +// std::copy(PondSx2.begin(), PondSx2.end(), Sx2.begin() + 1); +// std::copy(PondSx1.begin(), PondSx1.end(), Sx1.begin()); +// min_x = ISx1[0]; +// max_x = ISx2[interp_order]; +// } else if (shift_Ix == -1) { +// std::copy(PondSx2.begin(), PondSx2.end(), Sx2.begin()); +// std::copy(PondSx1.begin(), PondSx1.end(), Sx1.begin() + 1); +// min_x = ISx2[0]; +// max_x = ISx1[interp_order]; +// } + +// int shift_Iy = ISy2[0] - ISy1[0]; +// std::fill(Sy2.begin(), Sy2.end(), 0.0); +// std::fill(Sy1.begin(), Sy1.end(), 0.0); + +// if (shift_Iy == 0) { +// std::copy(PondSy2.begin(), PondSy2.end(), Sy2.begin()); +// std::copy(PondSy1.begin(), PondSy1.end(), Sy1.begin()); +// min_y = ISy2[0]; +// max_y = ISy2[interp_order]; +// } else if (shift_Iy == 1) { +// std::copy(PondSy2.begin(), PondSy2.end(), Sy2.begin() + 1); +// std::copy(PondSy1.begin(), PondSy1.end(), Sy1.begin()); +// min_y = ISy1[0]; +// max_y = ISy2[interp_order]; +// } else if (shift_Iy == -1) { +// std::copy(PondSy2.begin(), PondSy2.end(), Sy2.begin()); +// std::copy(PondSy1.begin(), PondSy1.end(), Sy1.begin() + 1); +// min_y = ISy2[0]; +// max_y = ISy1[interp_order]; +// } + +// for (int i = 0; i < interp_order + 2; ++i) { +// for (int j = 0; j < interp_order + 2; ++j) { +// WEsirkx[i][j] = 0.5 * (Sx2[i] - Sx1[i]) * (Sy2[j] + Sy1[j]); +// WEsirky[i][j] = 0.5 * (Sx2[i] + Sx1[i]) * (Sy2[j] - Sy1[j]); +// WEsirkz[i][j] = THIRD * (Sy2[j] * (0.5 * Sx1[i] + Sx2[i]) + +// Sy1[j] * (0.5 * Sx2[i] + Sx1[i])); +// } +// } + +// // Current deposition jx +// for (int j = 0; j < interp_order + 2; ++j) { +// jx_local[0][j] = -aux_jx * WEsirkx[0][j]; +// } +// for (int i = 1; i < interp_order + 2; ++i) { +// for (int j = 0; j < interp_order + 2; ++j) { +// jx_local[i][j] = jx_local[i - 1][j] - aux_jx * WEsirkx[i][j]; +// } +// } +// for (int i = 0; i < max_x - min_x; ++i) { +// for (int j = 0; j < max_y - min_y + 1; ++j) { +// jx[min_x + i][min_y + j] += jx_local[i][j]; +// } +// } + +// // Current deposition jy +// for (int i = 0; i < interp_order + 2; ++i) { +// jy_local[i][0] = -aux_jy * WEsirky[i][0]; +// } +// for (int j = 1; j < interp_order + 2; ++j) { +// for (int i = 0; i < interp_order + 2; ++i) { +// jy_local[i][j] = jy_local[i][j - 1] - aux_jy * WEsirky[i][j]; +// } +// } +// for (int i = 0; i < max_x - min_x + 1; ++i) { +// for (int j = 0; j < max_y - min_y; ++j) { +// jy[min_x + i][min_y + j] += jy_local[i][j]; +// } +// } + +// // Current deposition jz +// for (int i = 0; i < max_x - min_x + 1; ++i) { +// for (int j = 0; j < max_y - min_y + 1; ++j) { +// jz[min_x + i][min_y + j] += aux_jz * WEsirkz[i][j]; +// } +// } + +// // define particle positions +// put_value(i1, i0, 0); +// put_value(i2, j0, 0); +// put_value(i1_prev, i0, 0); +// put_value(i2_prev, j0, 0); +// put_value(dx1, dxf, 0); +// put_value(dx2, dyf, 0); +// put_value(dx1_prev, dxi, 0); +// put_value(dx2_prev, dyi, 0); +// put_value(weight, 1.0, 0); +// put_value(tag, ParticleTag::alive, 0); + +// auto J_scat = Kokkos::Experimental::create_scatter_view(J); + +// // clang-format off +// Kokkos::parallel_for("CurrentsDeposit", 10, +// kernel::DepositCurrents_kernel(J_scat, +// i1, i2, i3, +// i1_prev, i2_prev, i3_prev, +// dx1, dx2, dx3, +// dx1_prev, dx2_prev, dx3_prev, +// ux1, ux2, ux3, +// phi, weight, tag, +// metric, charge, inv_dt)); +// // clang-format on + +// Kokkos::Experimental::contribute(J, J_scat); + +// const auto range = Kokkos::MDRangePolicy>( +// { N_GHOSTS, N_GHOSTS }, +// { nx1 + N_GHOSTS, nx2 + N_GHOSTS }); + +// real_t SumDivJ = ZERO, SumJx = ZERO, SumJy = ZERO; +// Kokkos::parallel_reduce( +// "SumDivJ", +// range, +// Lambda(const int i, const int j, real_t& sum) { +// sum += J(i, j, cur::jx1) - J(i - 1, j, cur::jx1) + J(i, j, cur::jx2) - +// J(i, j - 1, cur::jx2); +// }, +// SumDivJ); + +// Kokkos::parallel_reduce( +// "SumJx", +// range, +// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1); }, +// SumJx); + +// Kokkos::parallel_reduce( +// "SumJy", +// range, +// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); }, +// SumJy); + +// auto J_h = Kokkos::create_mirror_view(J); +// Kokkos::deep_copy(J_h, J); + +// if (not cmp::AlmostZero(SumDivJ)) { +// throw std::logic_error("DepositCurrents_kernel::SumDivJ != 0"); +// } + +// // std::cout << "SumJx: " << SumJx << " expected " << Jx1 + Jx2 << std::endl; +// // std::cout << "SumJy: " << SumJy << " expected " << Jy1 + Jy2 << std::endl; +// errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx1), jx[i0][j0], "", acc), +// "DepositCurrents_kernel::Jx1 is incorrect"); +// errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + 1 + N_GHOSTS, cur::jx1), jx[i0][j0+1], "", acc), +// "DepositCurrents_kernel::Jx2 is incorrect"); +// errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), jy[i0][j0], "", acc), +// "DepositCurrents_kernel::Jy1 is incorrect"); +// errorIf(not equal(J_h(i0 + 1 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), jy[i0][j0+1], "", acc), +// "DepositCurrents_kernel::Jy2 is incorrect"); +// } + auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); @@ -197,50 +497,50 @@ auto main(int argc, char* argv[]) -> int { {}, 500); - testDeposit, SimEngine::SRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - {}, - 500); - - testDeposit, SimEngine::SRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - { { "r0", 0.0 }, { "h", 0.25 } }, - 500); - - testDeposit, SimEngine::GRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - { { "a", 0.9 } }, - 500); - - testDeposit, SimEngine::GRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - { { "r0", 0.0 }, { "h", 0.25 }, { "a", 0.9 } }, - 500); - - testDeposit, SimEngine::GRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - { { "a", 0.9 } }, - 500); + // testDeposit, SimEngine::SRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // {}, + // 500); + + // testDeposit, SimEngine::SRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // { { "r0", 0.0 }, { "h", 0.25 } }, + // 500); + + // testDeposit, SimEngine::GRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // { { "a", 0.9 } }, + // 500); + + // testDeposit, SimEngine::GRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // { { "r0", 0.0 }, { "h", 0.25 }, { "a", 0.9 } }, + // 500); + + // testDeposit, SimEngine::GRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // { { "a", 0.9 } }, + // 500); } catch (std::exception& e) { std::cerr << e.what() << std::endl; From 56763b8998aea724641c6dae4e556ac1f47cb319 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 26 May 2025 14:28:11 -0400 Subject: [PATCH 32/82] kernel error --- src/kernels/currents_deposit.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 76743f44..0b7409f4 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -148,7 +148,7 @@ namespace kernel { S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; } else { - raise::Error("Invalid shift in indices", HERE); + raise::KernelError(HERE, "Invalid shift in indices"); } // account for ghost cells here to shorten J update expression @@ -285,7 +285,7 @@ namespace kernel { S1_3 = static_cast(1 / 6) * di_center3; S1_4 = ZERO; } else { - raise::Error("Invalid shift in indices", HERE); + raise::KernelError(HERE, "Invalid shift in indices"); } // account for ghost cells here to shorten J update expression @@ -862,7 +862,7 @@ namespace kernel { J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; } /* - z - component, simulated direction + z - component, unsimulated direction */ J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; @@ -931,7 +931,7 @@ namespace kernel { i3(p), dx3(p), i3_prev(p), dx3_prev(p)); // clang-format on - + // Calculate weight function // for (int i = 0; i < interp_order + 2; ++i) { // for (int j = 0; j < interp_order + 2; ++j) { @@ -2188,4 +2188,4 @@ namespace kernel { #undef i_di_to_Xi -#endif // KERNELS_CURRENTS_DEPOSIT_HPP \ No newline at end of file +#endif // KERNELS_CURRENTS_DEPOSIT_HPP From 84fcb6c372bb228e43b9b05910e6037d2d354bc7 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 26 May 2025 14:40:35 -0400 Subject: [PATCH 33/82] deposit test fixed --- src/kernels/tests/deposit.cpp | 38 ++++++++++++++++------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index bdfbb26a..3ccb4999 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -81,14 +81,14 @@ void testDeposit(const std::vector& res, array_t tag { "tag", 10 }; const real_t charge { 1.0 }, inv_dt { 1.0 }; - const int i0 = 3, j0 = 3; - const int i0f = 3, j0f = 3; - const real_t uz = 0.5; - - // const prtldx_t dxi = 0.53, dxf = 0.47; - // const prtldx_t dyi = 0.34, dyf = 0.52; - const prtldx_t dxi = 0.65, dxf = 0.65; - const prtldx_t dyi = 0.65, dyf = 0.65; + const int i0 = 4, j0 = 4; + const int i0f = 3, j0f = 3; + const real_t uz = 2.5; + + // const prtldx_t dxi = 0.53, dxf = 0.47; + // const prtldx_t dyi = 0.34, dyf = 0.52; + const prtldx_t dxi = 0.65, dxf = 0.99; + const prtldx_t dyi = 0.65, dyf = 0.80; const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0f + (real_t)dxf; const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0f + (real_t)dyf; @@ -116,14 +116,10 @@ void testDeposit(const std::vector& res, const real_t Jy1 = Fy1 * (1 - Wx1) + Fy2 * (1 - Wx2); const real_t Jy2 = Fy1 * Wx1 + Fy2 * Wx2; - const real_t Jz = Fz1 * (1 - Wx1) + Fz2 * (1 - Wy1) + - Fz1 * Wx1 * (1 - Wy1) + - Fz1 * (1 - Wx1) * Wy1 + - Fz1 * Wx1 * Wy1 + - Fz2 * (1 - Wx2) * (1 - Wy2) + - Fz2 * Wx2 * (1 - Wy2) + - Fz2 * (1 - Wx2) * Wy2 + - Fz2 * Wx2 * Wy2; + const real_t Jz = Fz1 * (1 - Wx1) * (1 - Wy1) + Fz1 * Wx1 * (1 - Wy1) + + Fz1 * (1 - Wx1) * Wy1 + Fz1 * Wx1 * Wy1 + + Fz2 * (1 - Wx2) * (1 - Wy2) + Fz2 * Wx2 * (1 - Wy2) + + Fz2 * (1 - Wx2) * Wy2 + Fz2 * Wx2 * Wy2; put_value(i1, i0f, 0); put_value(i2, j0f, 0); @@ -141,7 +137,7 @@ void testDeposit(const std::vector& res, // clang-format off Kokkos::parallel_for("CurrentsDeposit", 10, - kernel::DepositCurrents_kernel(J_scat, + kernel::DepositCurrents_kernel(J_scat, i1, i2, i3, i1_prev, i2_prev, i3_prev, dx1, dx2, dx3, @@ -453,14 +449,14 @@ void testDeposit(const std::vector& res, // Kokkos::parallel_reduce( // "SumJx", // range, -// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1); }, -// SumJx); +// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1); +// }, SumJx); // Kokkos::parallel_reduce( // "SumJy", // range, -// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); }, -// SumJy); +// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); +// }, SumJy); // auto J_h = Kokkos::create_mirror_view(J); // Kokkos::deep_copy(J_h, J); From 9b8985d4916d4f5056cd161dba91838046934df6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 27 May 2025 14:34:07 -0500 Subject: [PATCH 34/82] fixes to pusher --- src/kernels/particle_pusher_sr.hpp | 883 ++++++++++++++++------------- 1 file changed, 475 insertions(+), 408 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 2d89f587..7ccd06a8 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -475,9 +475,9 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - getInterpFlds(p, ei, bi); - // ToDo: Better way to call this - //getInterpFlds2nd(p, ei, bi); + // getInterpFlds(p, ei, bi); + // ToDo: Better way to call this + getInterpFlds2nd(p, ei, bi); metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); @@ -1100,46 +1100,59 @@ namespace kernel::sr { const int i { i1(p) + static_cast(N_GHOSTS) }; const auto dx1_ { static_cast(dx1(p)) }; + const int dx1_less_half = static_cast(dx1_ < + static_cast(0.5)); + const auto dx1_center = static_cast(dx1_less_half) - dx1_; + + // direct interpolation of staggered grid + // primal = i, dual = i+ind + const int ind = static_cast(static_cast(dx1_ + HALF)); + // Compute weights for second-order interpolation - real_t w0 = HALF * SQR(HALF - dx1_); - real_t w1 = static_cast(0.75) - SQR(dx1_); - real_t w2 = HALF * SQR(HALF + dx1_); + // primal + const auto wp0 = HALF * SQR(HALF - dx1_); + const auto wp1 = static_cast(0.75) - SQR(dx1_); + const auto wp2 = HALF * SQR(HALF + dx1_); + // dual - ToDo! + const auto wd0 = HALF * SQR(HALF - dx1_); + const auto wd1 = static_cast(0.75) - SQR(dx1_); + const auto wd2 = HALF * SQR(HALF + dx1_); // Ex1 (dual grid) - real_t c0 = EB(i - 1, em::ex1); // First grid point - real_t c1 = EB(i, em::ex1); // Second grid point - real_t c2 = EB(i + 1, em::ex1); // Third grid point - e0[0] = c0 * w0 + c1 * w1 + c2 * w2; + const auto ex1_0 = EB(ind + i - 1, em::ex1); + const auto ex1_1 = EB(ind + i, em::ex1); + const auto ex1_2 = EB(ind + i + 1, em::ex1); + e0[0] = ex1_0 * wd0 + ex1_1 * wd0 + ex1_2 * wd0; // Ex2 (primal grid) - c0 = EB(i - 1, em::ex2); // First grid point - c1 = EB(i, em::ex2); // Second grid point - c2 = EB(i + 1, em::ex2); // Third grid point - e0[1] = c0 * w0 + c1 * w1 + c2 * w2; + const auto ex2_0 = EB(i - 1, em::ex2); + const auto ex2_1 = EB(i, em::ex2); + const auto ex2_2 = EB(i + 1, em::ex2); + e0[1] = ex2_0 * wp0 + ex2_1 * wp1 + ex2_2 * wp2; // Ex3 (primal grid) - c0 = EB(i - 1, em::ex3); // First grid point - c1 = EB(i, em::ex3); // Second grid point - c2 = EB(i + 1, em::ex3); // Third grid point - e0[2] = c0 * w0 + c1 * w1 + c2 * w2; + const auto ex3_0 = EB(i - 1, em::ex3); + const auto ex3_1 = EB(i, em::ex3); // Second grid point + const auto ex3_2 = EB(i + 1, em::ex3); + e0[2] = ex3_0 * wp0 + ex3_1 * wp1 + ex3_2 * wp2; // Bx1 (primal grid) - c0 = EB(i - 1, em::bx1); // First grid point - c1 = EB(i, em::bx1); // Second grid point - c2 = EB(i + 1, em::bx1); // Third grid point - b0[0] = c0 * w0 + c1 * w1 + c2 * w2; + const auto bx1_0 = EB(i - 1, em::bx1); + const auto bx1_1 = EB(i, em::bx1); // Second grid point + const auto bx1_2 = EB(i + 1, em::bx1); + b0[0] = bx1_0 * wp0 + bx1_1 * wp1 + bx1_2 * wp2; // Bx2 (dual grid) - c0 = EB(i - 2, em::bx2); // First grid point - c1 = EB(i - 1, em::bx2); // Second grid point - c2 = EB(i, em::bx2); // Third grid point - b0[1] = c0 * w0 + c1 * w1 + c2 * w2; + const auto bx2_0 = EB(ind + i - 2, em::bx2); + const auto bx2_1 = EB(ind + i - 1, em::bx2); // Second grid point + const auto bx2_2 = EB(ind + i, em::bx2); + b0[1] = bx2_0 * wd0 + bx2_1 * wd1 + bx2_2 * wd2; // Bx3 (dual grid) - c0 = EB(i - 2, em::bx3); // First grid point - c1 = EB(i - 1, em::bx3); // Second grid point - c2 = EB(i, em::bx3); // Third grid point - b0[2] = c0 * w0 + c1 * w1 + c2 * w2; + const auto bx3_0 = EB(ind + i - 2, em::bx3); + const auto bx3_1 = EB(ind + i - 1, em::bx3); // Second grid point + const auto bx3_2 = EB(ind + i, em::bx3); + b0[2] = bx3_0 * wd0 + bx3_1 * wd1 + bx3_2 * wd2; } else if constexpr (D == Dim::_2D) { const int i { i1(p) + static_cast(N_GHOSTS) }; @@ -1147,116 +1160,149 @@ namespace kernel::sr { const auto dx1_ { static_cast(dx1(p)) }; const auto dx2_ { static_cast(dx2(p)) }; - // Compute weights for second-order interpolation - real_t w0x = HALF * SQR(HALF - dx1_); - real_t w1x = static_cast(0.75) - SQR(dx1_); - real_t w2x = HALF * SQR(HALF + dx1_); + const int dx1_less_half = static_cast(dx1_ < + static_cast(0.5)); + const auto dx1_center = static_cast(dx1_less_half) - dx1_; - real_t w0y = HALF * SQR(HALF - dx2_); - real_t w1y = static_cast(0.75) - SQR(dx2_); - real_t w2y = HALF * SQR(HALF + dx2_); + const int dx2_less_half = static_cast(dx2_ < + static_cast(0.5)); + const auto dx2_center = static_cast(dx2_less_half) - dx2_; + + // direct interpolation of staggered grid + // primal = i, dual = i+ind + const int indx = static_cast(static_cast(dx1_ + HALF)); + const int indy = static_cast(static_cast(dx2_ + HALF)); + + // Compute weights for second-order interpolation + // primal + const auto w0px = HALF * SQR(HALF + dx1_center); + const auto w1px = static_cast(0.75) - SQR(dx1_center); + const auto w2px = HALF * SQR(HALF - dx1_center); + const auto w0py = HALF * SQR(HALF + dx2_center); + const auto w1py = static_cast(0.75) - SQR(dx2_center); + const auto w2py = HALF * SQR(HALF - dx2_center); + + // dual - ToDo! + const auto w0dx = HALF * SQR(HALF + dx1_center); + const auto w1dx = static_cast(0.75) - SQR(dx1_center); + const auto w2dx = HALF * SQR(HALF - dx1_center); + const auto w0dy = HALF * SQR(HALF + dx2_center); + const auto w1dy = static_cast(0.75) - SQR(dx2_center); + const auto w2dy = HALF * SQR(HALF - dx2_center); // Ex1 // Interpolate --- (dual, primal) - real_t c000 = EB(i - 1, j - 1, em::ex1); - real_t c100 = EB(i, j - 1, em::ex1); - real_t c200 = EB(i + 1, j - 1, em::ex1); - real_t c010 = EB(i - 1, j, em::ex1); - real_t c110 = EB(i, j, em::ex1); - real_t c210 = EB(i + 1, j, em::ex1); - real_t c020 = EB(i - 1, j + 1, em::ex1); - real_t c120 = EB(i, j + 1, em::ex1); - real_t c220 = EB(i + 1, j + 1, em::ex1); - - real_t c0 = c000 * w0x + c100 * w1x + c200 * w2x; - real_t c1 = c010 * w0x + c110 * w1x + c210 * w2x; - real_t c2 = c020 * w0x + c120 * w1x + c220 * w2x; - e0[0] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto ex1_000 = EB(indx + i - 1, j - 1, em::ex1); + const auto ex1_100 = EB(indx + i, j - 1, em::ex1); + const auto ex1_200 = EB(indx + i + 1, j - 1, em::ex1); + const auto ex1_010 = EB(indx + i - 1, j, em::ex1); + const auto ex1_110 = EB(indx + i, j, em::ex1); + const auto ex1_210 = EB(indx + i + 1, j, em::ex1); + const auto ex1_020 = EB(indx + i - 1, j + 1, em::ex1); + const auto ex1_120 = EB(indx + i, j + 1, em::ex1); + const auto ex1_220 = EB(indx + i + 1, j + 1, em::ex1); + // clang-format on + + const auto ex1_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; + const auto ex1_1 = ex1_010 * w0dx + ex1_110 * w1dx + ex1_210 * w2dx; + const auto ex1_2 = ex1_020 * w0dx + ex1_120 * w1dx + ex1_220 * w2dx; + e0[0] = ex1_0 * w0py + ex1_1 * w1py + ex1_2 * w2py; // Ex2 // Interpolate --- (primal, dual) - c000 = EB(i - 1, j - 1, em::ex2); - c100 = EB(i, j - 1, em::ex2); - c200 = EB(i + 1, j - 1, em::ex2); - c010 = EB(i - 1, j, em::ex2); - c110 = EB(i, j, em::ex2); - c210 = EB(i + 1, j, em::ex2); - c020 = EB(i - 1, j + 1, em::ex2); - c120 = EB(i, j + 1, em::ex2); - c220 = EB(i + 1, j + 1, em::ex2); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - e0[1] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto ex2_000 = EB(i - 1, indy + j - 1, em::ex2); + const auto ex2_100 = EB(i, indy + j - 1, em::ex2); + const auto ex2_200 = EB(i + 1, indy + j - 1, em::ex2); + const auto ex2_010 = EB(i - 1, indy + j, em::ex2); + const auto ex2_110 = EB(i, indy + j, em::ex2); + const auto ex2_210 = EB(i + 1, indy + j, em::ex2); + const auto ex2_020 = EB(i - 1, indy + j + 1, em::ex2); + const auto ex2_120 = EB(i, indy + j + 1, em::ex2); + const auto ex2_220 = EB(i + 1, indy + j + 1, em::ex2); + // clang-format on + + const auto ex2_0 = ex2_000 * w0px + ex2_100 * w1px + ex2_200 * w2px; + const auto ex2_1 = ex2_010 * w0px + ex2_110 * w1px + ex2_210 * w2px; + const auto ex2_2 = ex2_020 * w0px + ex2_120 * w1px + ex2_220 * w2px; + e0[1] = ex2_0 * w0dy + ex2_1 * w1dy + ex2_2 * w2dy; // Ex3 // Interpolate --- (primal, primal) - c000 = EB(i - 1, j - 1, em::ex3); - c100 = EB(i, j - 1, em::ex3); - c200 = EB(i + 1, j - 1, em::ex3); - c010 = EB(i - 1, j, em::ex3); - c110 = EB(i, j, em::ex3); - c210 = EB(i + 1, j, em::ex3); - c020 = EB(i - 1, j + 1, em::ex3); - c120 = EB(i, j + 1, em::ex3); - c220 = EB(i + 1, j + 1, em::ex3); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - e0[2] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto ex3_000 = EB(i - 1, j - 1, em::ex3); + const auto ex3_100 = EB(i, j - 1, em::ex3); + const auto ex3_200 = EB(i + 1, j - 1, em::ex3); + const auto ex3_010 = EB(i - 1, j, em::ex3); + const auto ex3_110 = EB(i, j, em::ex3); + const auto ex3_210 = EB(i + 1, j, em::ex3); + const auto ex3_020 = EB(i - 1, j + 1, em::ex3); + const auto ex3_120 = EB(i, j + 1, em::ex3); + const auto ex3_220 = EB(i + 1, j + 1, em::ex3); + // clang-format on + + const auto ex3_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; + const auto ex3_1 = ex3_010 * w0px + ex3_110 * w1px + ex3_210 * w2px; + const auto ex3_2 = ex3_020 * w0px + ex3_120 * w1px + ex3_220 * w2px; + e0[2] = ex3_0 * w0py + ex3_1 * w1py + ex3_2 * w2py; // Bx1 // Interpolate --- (primal, dual) - c000 = EB(i - 1, j - 1, em::bx1); - c100 = EB(i, j - 1, em::bx1); - c200 = EB(i + 1, j - 1, em::bx1); - c010 = EB(i - 1, j, em::bx1); - c110 = EB(i, j, em::bx1); - c210 = EB(i + 1, j, em::bx1); - c020 = EB(i - 1, j + 1, em::bx1); - c120 = EB(i, j + 1, em::bx1); - c220 = EB(i + 1, j + 1, em::bx1); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - b0[0] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto bx1_000 = EB(i - 1, indy + j - 1, em::bx1); + const auto bx1_100 = EB(i, indy + j - 1, em::bx1); + const auto bx1_200 = EB(i + 1, indy + j - 1, em::bx1); + const auto bx1_010 = EB(i - 1, indy + j, em::bx1); + const auto bx1_110 = EB(i, indy + j, em::bx1); + const auto bx1_210 = EB(i + 1, indy + j, em::bx1); + const auto bx1_020 = EB(i - 1, indy + j + 1, em::bx1); + const auto bx1_120 = EB(i, indy + j + 1, em::bx1); + const auto bx1_220 = EB(i + 1, indy + j + 1, em::bx1); + // clang-format on + + const auto bx1_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; + const auto bx1_1 = bx1_010 * w0px + bx1_110 * w1px + bx1_210 * w2px; + const auto bx1_2 = bx1_020 * w0px + bx1_120 * w1px + bx1_220 * w2px; + b0[0] = bx1_0 * w0dy + bx1_1 * w1dy + bx1_2 * w2dy; // Bx2 // Interpolate --- (dual, primal) - c000 = EB(i - 1, j - 1, em::bx2); - c100 = EB(i, j - 1, em::bx2); - c200 = EB(i + 1, j - 1, em::bx2); - c010 = EB(i - 1, j, em::bx2); - c110 = EB(i, j, em::bx2); - c210 = EB(i + 1, j, em::bx2); - c020 = EB(i - 1, j + 1, em::bx2); - c120 = EB(i, j + 1, em::bx2); - c220 = EB(i + 1, j + 1, em::bx2); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - b0[1] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto bx2_000 = EB(indx + i - 1, j - 1, em::bx2); + const auto bx2_100 = EB(indx + i, j - 1, em::bx2); + const auto bx2_200 = EB(indx + i + 1, j - 1, em::bx2); + const auto bx2_010 = EB(indx + i - 1, j, em::bx2); + const auto bx2_110 = EB(indx + i, j, em::bx2); + const auto bx2_210 = EB(indx + i + 1, j, em::bx2); + const auto bx2_020 = EB(indx + i - 1, j + 1, em::bx2); + const auto bx2_120 = EB(indx + i, j + 1, em::bx2); + const auto bx2_220 = EB(indx + i + 1, j + 1, em::bx2); + // clang-format on + + const auto bx2_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; + const auto bx2_1 = bx2_010 * w0dx + bx2_110 * w1dx + bx2_210 * w2dx; + const auto bx2_2 = bx2_020 * w0dx + bx2_120 * w1dx + bx2_220 * w2dx; + b0[1] = bx2_0 * w0py + bx2_1 * w1py + bx2_2 * w2py; // Bx3 // Interpolate --- (dual, dual) - c000 = EB(i - 1, j - 1, em::bx3); - c100 = EB(i, j - 1, em::bx3); - c200 = EB(i + 1, j - 1, em::bx3); - c010 = EB(i - 1, j, em::bx3); - c110 = EB(i, j, em::bx3); - c210 = EB(i + 1, j, em::bx3); - c020 = EB(i - 1, j + 1, em::bx3); - c120 = EB(i, j + 1, em::bx3); - c220 = EB(i + 1, j + 1, em::bx3); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - b0[2] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto bx3_000 = EB(indx + i - 1, indy + j - 1, em::bx3); + const auto bx3_100 = EB(indx + i, indy + j - 1, em::bx3); + const auto bx3_200 = EB(indx + i + 1, indy + j - 1, em::bx3); + const auto bx3_010 = EB(indx + i - 1, indy + j, em::bx3); + const auto bx3_110 = EB(indx + i, indy + j, em::bx3); + const auto bx3_210 = EB(indx + i + 1, indy + j, em::bx3); + const auto bx3_020 = EB(indx + i - 1, indy + j + 1, em::bx3); + const auto bx3_120 = EB(indx + i, indy + j + 1, em::bx3); + const auto bx3_220 = EB(indx + i + 1, indy + j + 1, em::bx3); + // clang-format on + + const auto bx3_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; + const auto bx3_1 = bx3_010 * w0dx + bx3_110 * w1dx + bx3_210 * w2dx; + const auto bx3_2 = bx3_020 * w0dx + bx3_120 * w1dx + bx3_220 * w2dx; + b0[2] = bx3_0 * w0dy + bx3_1 * w1dy + bx3_2 * w2dy; } else if constexpr (D == Dim::_3D) { const int i { i1(p) + static_cast(N_GHOSTS) }; @@ -1266,312 +1312,333 @@ namespace kernel::sr { const auto dx2_ { static_cast(dx2(p)) }; const auto dx3_ { static_cast(dx3(p)) }; - // Compute weights for second-order interpolation - real_t w0x = HALF * SQR(HALF - dx1_); - real_t w1x = static_cast(0.75) - SQR(dx1_); - real_t w2x = HALF * SQR(HALF + dx1_); - - real_t w0y = HALF * SQR(HALF - dx2_); - real_t w1y = static_cast(0.75) - SQR(dx2_); - real_t w2y = HALF * SQR(HALF + dx2_); + // direct interpolation of staggered grid + // primal = i, dual = i+ind + const int indx = static_cast(static_cast(dx1_ + HALF)); + const int indy = static_cast(static_cast(dx2_ + HALF)); + const int indz = static_cast(static_cast(dx3_ + HALF)); - real_t w0z = HALF * SQR(HALF - dx3_); - real_t w1z = static_cast(0.75) - SQR(dx3_); - real_t w2z = HALF * SQR(HALF + dx3_); + // Compute weights for second-order interpolation + // primal + const auto w0px = HALF * SQR(HALF - dx1_); + const auto w1px = static_cast(0.75) - SQR(dx1_); + const auto w2px = HALF * SQR(HALF + dx1_); + const auto w0py = HALF * SQR(HALF - dx2_); + const auto w1py = static_cast(0.75) - SQR(dx2_); + const auto w2py = HALF * SQR(HALF + dx2_); + const auto w0pz = HALF * SQR(HALF - dx3_); + const auto w1pz = static_cast(0.75) - SQR(dx3_); + const auto w2pz = HALF * SQR(HALF + dx3_); + // dual + const auto w0dx = HALF * SQR(HALF - dx1_); + const auto w1dx = static_cast(0.75) - SQR(dx1_); + const auto w2dx = HALF * SQR(HALF + dx1_); + const auto w0dy = HALF * SQR(HALF - dx2_); + const auto w1dy = static_cast(0.75) - SQR(dx2_); + const auto w2dy = HALF * SQR(HALF + dx2_); + const auto w0dz = HALF * SQR(HALF - dx3_); + const auto w1dz = static_cast(0.75) - SQR(dx3_); + const auto w2dz = HALF * SQR(HALF + dx3_); // Ex1 // Interpolate --- (dual, primal, primal) - real_t c000 = EB(i - 1, j - 1, k - 1, em::ex1); - real_t c100 = EB(i, j - 1, k - 1, em::ex1); - real_t c200 = EB(i + 1, j - 1, k - 1, em::ex1); - real_t c010 = EB(i - 1, j, k - 1, em::ex1); - real_t c110 = EB(i, j, k - 1, em::ex1); - real_t c210 = EB(i + 1, j, k - 1, em::ex1); - real_t c020 = EB(i - 1, j + 1, k - 1, em::ex1); - real_t c120 = EB(i, j + 1, k - 1, em::ex1); - real_t c220 = EB(i + 1, j + 1, k - 1, em::ex1); - - real_t c001 = EB(i - 1, j - 1, k, em::ex1); - real_t c101 = EB(i, j - 1, k, em::ex1); - real_t c201 = EB(i + 1, j - 1, k, em::ex1); - real_t c011 = EB(i - 1, j, k, em::ex1); - real_t c111 = EB(i, j, k, em::ex1); - real_t c211 = EB(i + 1, j, k, em::ex1); - real_t c021 = EB(i - 1, j + 1, k, em::ex1); - real_t c121 = EB(i, j + 1, k, em::ex1); - real_t c221 = EB(i + 1, j + 1, k, em::ex1); - - real_t c002 = EB(i - 1, j - 1, k + 1, em::ex1); - real_t c102 = EB(i, j - 1, k + 1, em::ex1); - real_t c202 = EB(i + 1, j - 1, k + 1, em::ex1); - real_t c012 = EB(i - 1, j, k + 1, em::ex1); - real_t c112 = EB(i, j, k + 1, em::ex1); - real_t c212 = EB(i + 1, j, k + 1, em::ex1); - real_t c022 = EB(i - 1, j + 1, k + 1, em::ex1); - real_t c122 = EB(i, j + 1, k + 1, em::ex1); - real_t c222 = EB(i + 1, j + 1, k + 1, em::ex1); - - real_t c0 = c000 * w0x + c100 * w1x + c200 * w2x; - real_t c1 = c010 * w0x + c110 * w1x + c210 * w2x; - real_t c2 = c020 * w0x + c120 * w1x + c220 * w2x; - real_t c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - real_t c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - real_t c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - e0[0] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto ex1_000 = EB(indx + i - 1, j - 1, k - 1, em::ex1); + const auto ex1_100 = EB(indx + i, j - 1, k - 1, em::ex1); + const auto ex1_200 = EB(indx + i + 1, j - 1, k - 1, em::ex1); + const auto ex1_010 = EB(indx + i - 1, j, k - 1, em::ex1); + const auto ex1_110 = EB(indx + i, j, k - 1, em::ex1); + const auto ex1_210 = EB(indx + i + 1, j, k - 1, em::ex1); + const auto ex1_020 = EB(indx + i - 1, j + 1, k - 1, em::ex1); + const auto ex1_120 = EB(indx + i, j + 1, k - 1, em::ex1); + const auto ex1_220 = EB(indx + i + 1, j + 1, k - 1, em::ex1); + + const auto ex1_001 = EB(indx + i - 1, j - 1, k, em::ex1); + const auto ex1_101 = EB(indx + i, j - 1, k, em::ex1); + const auto ex1_201 = EB(indx + i + 1, j - 1, k, em::ex1); + const auto ex1_011 = EB(indx + i - 1, j, k, em::ex1); + const auto ex1_111 = EB(indx + i, j, k, em::ex1); + const auto ex1_211 = EB(indx + i + 1, j, k, em::ex1); + const auto ex1_021 = EB(indx + i - 1, j + 1, k, em::ex1); + const auto ex1_121 = EB(indx + i, j + 1, k, em::ex1); + const auto ex1_221 = EB(indx + i + 1, j + 1, k, em::ex1); + + const auto ex1_002 = EB(indx + i - 1, j - 1, k + 1, em::ex1); + const auto ex1_102 = EB(indx + i, j - 1, k + 1, em::ex1); + const auto ex1_202 = EB(indx + i + 1, j - 1, k + 1, em::ex1); + const auto ex1_012 = EB(indx + i - 1, j, k + 1, em::ex1); + const auto ex1_112 = EB(indx + i, j, k + 1, em::ex1); + const auto ex1_212 = EB(indx + i + 1, j, k + 1, em::ex1); + const auto ex1_022 = EB(indx + i - 1, j + 1, k + 1, em::ex1); + const auto ex1_122 = EB(indx + i, j + 1, k + 1, em::ex1); + const auto ex1_222 = EB(indx + i + 1, j + 1, k + 1, em::ex1); + // clang-format on + + const auto ex1_0_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; + const auto ex1_1_0 = ex1_010 * w0dx + ex1_110 * w1dx + ex1_210 * w2dx; + const auto ex1_2_0 = ex1_020 * w0dx + ex1_120 * w1dx + ex1_220 * w2dx; + const auto ex1_0_1 = ex1_001 * w0dx + ex1_101 * w1dx + ex1_201 * w2dx; + const auto ex1_1_1 = ex1_011 * w0dx + ex1_111 * w1dx + ex1_211 * w2dx; + const auto ex1_2_1 = ex1_021 * w0dx + ex1_121 * w1dx + ex1_221 * w2dx; + const auto ex1_0_2 = ex1_002 * w0dx + ex1_102 * w1dx + ex1_202 * w2dx; + const auto ex1_1_2 = ex1_012 * w0dx + ex1_112 * w1dx + ex1_212 * w2dx; + const auto ex1_2_2 = ex1_022 * w0dx + ex1_122 * w1dx + ex1_222 * w2dx; + + const auto ex1_00 = ex1_0_0 * w0py + ex1_1_0 * w1py + ex1_2_0 * w2py; + const auto ex1_01 = ex1_0_1 * w0py + ex1_1_1 * w1py + ex1_2_1 * w2py; + const auto ex1_02 = ex1_0_2 * w0py + ex1_1_2 * w1py + ex1_2_2 * w2py; + + e0[0] = ex1_00 * w0pz + ex1_01 * w1pz + ex1_02 * w2pz; // Ex2 // Interpolate -- (primal, dual, primal) - c000 = EB(i - 1, j - 1, k - 1, em::ex2); - c100 = EB(i, j - 1, k - 1, em::ex2); - c200 = EB(i + 1, j - 1, k - 1, em::ex2); - c010 = EB(i - 1, j, k - 1, em::ex2); - c110 = EB(i, j, k - 1, em::ex2); - c210 = EB(i + 1, j, k - 1, em::ex2); - c020 = EB(i - 1, j + 1, k - 1, em::ex2); - c120 = EB(i, j + 1, k - 1, em::ex2); - c220 = EB(i + 1, j + 1, k - 1, em::ex2); - - c001 = EB(i - 1, j - 1, k, em::ex2); - c101 = EB(i, j - 1, k, em::ex2); - c201 = EB(i + 1, j - 1, k, em::ex2); - c011 = EB(i - 1, j, k, em::ex2); - c111 = EB(i, j, k, em::ex2); - c211 = EB(i + 1, j, k, em::ex2); - c021 = EB(i - 1, j + 1, k, em::ex2); - c121 = EB(i, j + 1, k, em::ex2); - c221 = EB(i + 1, j + 1, k, em::ex2); - - c002 = EB(i - 1, j - 1, k + 1, em::ex2); - c102 = EB(i, j - 1, k + 1, em::ex2); - c202 = EB(i + 1, j - 1, k + 1, em::ex2); - c012 = EB(i - 1, j, k + 1, em::ex2); - c112 = EB(i, j, k + 1, em::ex2); - c212 = EB(i + 1, j, k + 1, em::ex2); - c022 = EB(i - 1, j + 1, k + 1, em::ex2); - c122 = EB(i, j + 1, k + 1, em::ex2); - c222 = EB(i + 1, j + 1, k + 1, em::ex2); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - e0[1] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto ex2_000 = EB(i - 1, indy + j - 1, k - 1, em::ex2); + const auto ex2_100 = EB(i, indy + j - 1, k - 1, em::ex2); + const auto ex2_200 = EB(i + 1, indy + j - 1, k - 1, em::ex2); + const auto ex2_010 = EB(i - 1, indy + j, k - 1, em::ex2); + const auto ex2_110 = EB(i, indy + j, k - 1, em::ex2); + const auto ex2_210 = EB(i + 1, indy + j, k - 1, em::ex2); + const auto ex2_020 = EB(i - 1, indy + j + 1, k - 1, em::ex2); + const auto ex2_120 = EB(i, indy + j + 1, k - 1, em::ex2); + const auto ex2_220 = EB(i + 1, indy + j + 1, k - 1, em::ex2); + + const auto ex2_001 = EB(i - 1, indy + j - 1, k, em::ex2); + const auto ex2_101 = EB(i, indy + j - 1, k, em::ex2); + const auto ex2_201 = EB(i + 1, indy + j - 1, k, em::ex2); + const auto ex2_011 = EB(i - 1, indy + j, k, em::ex2); + const auto ex2_111 = EB(i, indy + j, k, em::ex2); + const auto ex2_211 = EB(i + 1, indy + j, k, em::ex2); + const auto ex2_021 = EB(i - 1, indy + j + 1, k, em::ex2); + const auto ex2_121 = EB(i, indy + j + 1, k, em::ex2); + const auto ex2_221 = EB(i + 1, indy + j + 1, k, em::ex2); + + const auto ex2_002 = EB(i - 1, indy + j - 1, k + 1, em::ex2); + const auto ex2_102 = EB(i, indy + j - 1, k + 1, em::ex2); + const auto ex2_202 = EB(i + 1, indy + j - 1, k + 1, em::ex2); + const auto ex2_012 = EB(i - 1, indy + j, k + 1, em::ex2); + const auto ex2_112 = EB(i, indy + j, k + 1, em::ex2); + const auto ex2_212 = EB(i + 1, indy + j, k + 1, em::ex2); + const auto ex2_022 = EB(i - 1, indy + j + 1, k + 1, em::ex2); + const auto ex2_122 = EB(i, indy + j + 1, k + 1, em::ex2); + const auto ex2_222 = EB(i + 1, indy + j + 1, k + 1, em::ex2); + // clang-format on + + const auto ex2_0_0 = ex2_000 * w0px + ex2_100 * w1px + ex1_200 * w2px; + const auto ex2_1_0 = ex2_010 * w0px + ex2_110 * w1px + ex1_210 * w2px; + const auto ex2_2_0 = ex2_020 * w0px + ex2_120 * w1px + ex1_220 * w2px; + const auto ex2_0_1 = ex2_001 * w0px + ex2_101 * w1px + ex2_201 * w2px; + const auto ex2_1_1 = ex2_011 * w0px + ex2_111 * w1px + ex2_211 * w2px; + const auto ex2_2_1 = ex2_021 * w0px + ex2_121 * w1px + ex2_221 * w2px; + const auto ex2_0_2 = ex2_002 * w0px + ex2_102 * w1px + ex2_202 * w2px; + const auto ex2_1_2 = ex2_012 * w0px + ex2_112 * w1px + ex2_212 * w2px; + const auto ex2_2_2 = ex2_022 * w0px + ex2_122 * w1px + ex2_222 * w2px; + + const auto ex2_00 = ex2_0_0 * w0dy + ex2_1_0 * w1dy + ex2_2_0 * w2dy; + const auto ex2_01 = ex2_0_1 * w0dy + ex2_1_1 * w1dy + ex2_2_1 * w2dy; + const auto ex2_02 = ex2_0_2 * w0dy + ex2_1_2 * w1dy + ex2_2_2 * w2dy; + + e0[1] = ex2_00 * w0pz + ex2_01 * w1pz + ex2_02 * w2pz; // Ex3 // Interpolate -- (primal, primal, dual) - c000 = EB(i - 1, j - 1, k - 1, em::ex3); - c100 = EB(i, j - 1, k - 1, em::ex3); - c200 = EB(i + 1, j - 1, k - 1, em::ex3); - c010 = EB(i - 1, j, k - 1, em::ex3); - c110 = EB(i, j, k - 1, em::ex3); - c210 = EB(i + 1, j, k - 1, em::ex3); - c020 = EB(i - 1, j + 1, k - 1, em::ex3); - c120 = EB(i, j + 1, k - 1, em::ex3); - c220 = EB(i + 1, j + 1, k - 1, em::ex3); - - c001 = EB(i - 1, j - 1, k, em::ex3); - c101 = EB(i, j - 1, k, em::ex3); - c201 = EB(i + 1, j - 1, k, em::ex3); - c011 = EB(i - 1, j, k, em::ex3); - c111 = EB(i, j, k, em::ex3); - c211 = EB(i + 1, j, k, em::ex3); - c021 = EB(i - 1, j + 1, k, em::ex3); - c121 = EB(i, j + 1, k, em::ex3); - c221 = EB(i + 1, j + 1, k, em::ex3); - - c002 = EB(i - 1, j - 1, k + 1, em::ex3); - c102 = EB(i, j - 1, k + 1, em::ex3); - c202 = EB(i + 1, j - 1, k + 1, em::ex3); - c012 = EB(i - 1, j, k + 1, em::ex3); - c112 = EB(i, j, k + 1, em::ex3); - c212 = EB(i + 1, j, k + 1, em::ex3); - c022 = EB(i - 1, j + 1, k + 1, em::ex3); - c122 = EB(i, j + 1, k + 1, em::ex3); - c222 = EB(i + 1, j + 1, k + 1, em::ex3); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - e0[2] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto ex3_000 = EB(i - 1, j - 1, indz + k - 1, em::ex3); + const auto ex3_100 = EB(i, j - 1, indz + k - 1, em::ex3); + const auto ex3_200 = EB(i + 1, j - 1, indz + k - 1, em::ex3); + const auto ex3_010 = EB(i - 1, j, indz + k - 1, em::ex3); + const auto ex3_110 = EB(i, j, indz + k - 1, em::ex3); + const auto ex3_210 = EB(i + 1, j, indz + k - 1, em::ex3); + const auto ex3_020 = EB(i - 1, j + 1, indz + k - 1, em::ex3); + const auto ex3_120 = EB(i, j + 1, indz + k - 1, em::ex3); + const auto ex3_220 = EB(i + 1, j + 1, indz + k - 1, em::ex3); + + const auto ex3_001 = EB(i - 1, j - 1, indz + k, em::ex3); + const auto ex3_101 = EB(i, j - 1, indz + k, em::ex3); + const auto ex3_201 = EB(i + 1, j - 1, indz + k, em::ex3); + const auto ex3_011 = EB(i - 1, j, indz + k, em::ex3); + const auto ex3_111 = EB(i, j, indz + k, em::ex3); + const auto ex3_211 = EB(i + 1, j, indz + k, em::ex3); + const auto ex3_021 = EB(i - 1, j + 1, indz + k, em::ex3); + const auto ex3_121 = EB(i, j + 1, indz + k, em::ex3); + const auto ex3_221 = EB(i + 1, j + 1, indz + k, em::ex3); + + const auto ex3_002 = EB(i - 1, j - 1, indz + k + 1, em::ex3); + const auto ex3_102 = EB(i, j - 1, indz + k + 1, em::ex3); + const auto ex3_202 = EB(i + 1, j - 1, indz + k + 1, em::ex3); + const auto ex3_012 = EB(i - 1, j, indz + k + 1, em::ex3); + const auto ex3_112 = EB(i, j, indz + k + 1, em::ex3); + const auto ex3_212 = EB(i + 1, j, indz + k + 1, em::ex3); + const auto ex3_022 = EB(i - 1, j + 1, indz + k + 1, em::ex3); + const auto ex3_122 = EB(i, j + 1, indz + k + 1, em::ex3); + const auto ex3_222 = EB(i + 1, j + 1, indz + k + 1, em::ex3); + // clang-format on + + const auto ex3_0_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; + const auto ex3_1_0 = ex3_010 * w0px + ex3_110 * w1px + ex3_210 * w2px; + const auto ex3_2_0 = ex3_020 * w0px + ex3_120 * w1px + ex3_220 * w2px; + const auto ex3_0_1 = ex3_001 * w0px + ex3_101 * w1px + ex3_201 * w2px; + const auto ex3_1_1 = ex3_011 * w0px + ex3_111 * w1px + ex3_211 * w2px; + const auto ex3_2_1 = ex3_021 * w0px + ex3_121 * w1px + ex3_221 * w2px; + const auto ex3_0_2 = ex3_002 * w0px + ex3_102 * w1px + ex3_202 * w2px; + const auto ex3_1_2 = ex3_012 * w0px + ex3_112 * w1px + ex3_212 * w2px; + const auto ex3_2_2 = ex3_022 * w0px + ex3_122 * w1px + ex3_222 * w2px; + + const auto ex3_00 = ex3_0_0 * w0py + ex3_1_0 * w1py + ex3_2_0 * w2py; + const auto ex3_01 = ex3_0_1 * w0py + ex3_1_1 * w1py + ex3_2_1 * w2py; + const auto ex3_02 = ex3_0_2 * w0py + ex3_1_2 * w1py + ex3_2_2 * w2py; + + e0[2] = ex3_00 * w0dz + ex3_01 * w1dz + ex3_02 * w2dz; // Bx1 // Interpolate -- (primal, dual, dual) - c000 = EB(i - 1, j - 1, k - 1, em::bx1); - c100 = EB(i, j - 1, k - 1, em::bx1); - c200 = EB(i + 1, j - 1, k - 1, em::bx1); - c010 = EB(i - 1, j, k - 1, em::bx1); - c110 = EB(i, j, k - 1, em::bx1); - c210 = EB(i + 1, j, k - 1, em::bx1); - c020 = EB(i - 1, j + 1, k - 1, em::bx1); - c120 = EB(i, j + 1, k - 1, em::bx1); - c220 = EB(i + 1, j + 1, k - 1, em::bx1); - - c001 = EB(i - 1, j - 1, k, em::bx1); - c101 = EB(i, j - 1, k, em::bx1); - c201 = EB(i + 1, j - 1, k, em::bx1); - c011 = EB(i - 1, j, k, em::bx1); - c111 = EB(i, j, k, em::bx1); - c211 = EB(i + 1, j, k, em::bx1); - c021 = EB(i - 1, j + 1, k, em::bx1); - c121 = EB(i, j + 1, k, em::bx1); - c221 = EB(i + 1, j + 1, k, em::bx1); - - c002 = EB(i - 1, j - 1, k + 1, em::bx1); - c102 = EB(i, j - 1, k + 1, em::bx1); - c202 = EB(i + 1, j - 1, k + 1, em::bx1); - c012 = EB(i - 1, j, k + 1, em::bx1); - c112 = EB(i, j, k + 1, em::bx1); - c212 = EB(i + 1, j, k + 1, em::bx1); - c022 = EB(i - 1, j + 1, k + 1, em::bx1); - c122 = EB(i, j + 1, k + 1, em::bx1); - c222 = EB(i + 1, j + 1, k + 1, em::bx1); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - b0[0] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto bx1_000 = EB(i - 1, indy + j - 1, indz + k - 1, em::bx1); + const auto bx1_100 = EB(i, indy + j - 1, indz + k - 1, em::bx1); + const auto bx1_200 = EB(i + 1, indy + j - 1, indz + k - 1, em::bx1); + const auto bx1_010 = EB(i - 1, indy + j, indz + k - 1, em::bx1); + const auto bx1_110 = EB(i, indy + j, indz + k - 1, em::bx1); + const auto bx1_210 = EB(i + 1, indy + j, indz + k - 1, em::bx1); + const auto bx1_020 = EB(i - 1, indy + j + 1, indz + k - 1, em::bx1); + const auto bx1_120 = EB(i, indy + j + 1, indz + k - 1, em::bx1); + const auto bx1_220 = EB(i + 1, indy + j + 1, indz + k - 1, em::bx1); + + const auto bx1_001 = EB(i - 1, indy + j - 1, indz + k, em::bx1); + const auto bx1_101 = EB(i, indy + j - 1, indz + k, em::bx1); + const auto bx1_201 = EB(i + 1, indy + j - 1, indz + k, em::bx1); + const auto bx1_011 = EB(i - 1, indy + j, indz + k, em::bx1); + const auto bx1_111 = EB(i, indy + j, indz + k, em::bx1); + const auto bx1_211 = EB(i + 1, indy + j, indz + k, em::bx1); + const auto bx1_021 = EB(i - 1, indy + j + 1, indz + k, em::bx1); + const auto bx1_121 = EB(i, indy + j + 1, indz + k, em::bx1); + const auto bx1_221 = EB(i + 1, indy + j + 1, indz + k, em::bx1); + + const auto bx1_002 = EB(i - 1, indy + j - 1, indz + k + 1, em::bx1); + const auto bx1_102 = EB(i, indy + j - 1, indz + k + 1, em::bx1); + const auto bx1_202 = EB(i + 1, indy + j - 1, indz + k + 1, em::bx1); + const auto bx1_012 = EB(i - 1, indy + j, indz + k + 1, em::bx1); + const auto bx1_112 = EB(i, indy + j, indz + k + 1, em::bx1); + const auto bx1_212 = EB(i + 1, indy + j, indz + k + 1, em::bx1); + const auto bx1_022 = EB(i - 1, indy + j + 1, indz + k + 1, em::bx1); + const auto bx1_122 = EB(i, indy + j + 1, indz + k + 1, em::bx1); + const auto bx1_222 = EB(i + 1, indy + j + 1, indz + k + 1, em::bx1); + // clang-format on + + const auto bx1_0_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; + const auto bx1_1_0 = bx1_010 * w0px + bx1_110 * w1px + bx1_210 * w2px; + const auto bx1_2_0 = bx1_020 * w0px + bx1_120 * w1px + bx1_220 * w2px; + const auto bx1_0_1 = bx1_001 * w0px + bx1_101 * w1px + bx1_201 * w2px; + const auto bx1_1_1 = bx1_011 * w0px + bx1_111 * w1px + bx1_211 * w2px; + const auto bx1_2_1 = bx1_021 * w0px + bx1_121 * w1px + bx1_221 * w2px; + const auto bx1_0_2 = bx1_002 * w0px + bx1_102 * w1px + bx1_202 * w2px; + const auto bx1_1_2 = bx1_012 * w0px + bx1_112 * w1px + bx1_212 * w2px; + const auto bx1_2_2 = bx1_022 * w0px + bx1_122 * w1px + bx1_222 * w2px; + + const auto bx1_00 = bx1_0_0 * w0dy + bx1_1_0 * w1dy + bx1_2_0 * w2dy; + const auto bx1_01 = bx1_0_1 * w0dy + bx1_1_1 * w1dy + bx1_2_1 * w2dy; + const auto bx1_02 = bx1_0_2 * w0dy + bx1_1_2 * w1dy + bx1_2_2 * w2dy; + + b0[0] = bx1_00 * w0dz + bx1_01 * w1dz + bx1_02 * w2dz; // Bx2 // Interpolate -- (dual, primal, dual) - c000 = EB(i - 1, j - 1, k - 1, em::bx2); - c100 = EB(i, j - 1, k - 1, em::bx2); - c200 = EB(i + 1, j - 1, k - 1, em::bx2); - c010 = EB(i - 1, j, k - 1, em::bx2); - c110 = EB(i, j, k - 1, em::bx2); - c210 = EB(i + 1, j, k - 1, em::bx2); - c020 = EB(i - 1, j + 1, k - 1, em::bx2); - c120 = EB(i, j + 1, k - 1, em::bx2); - c220 = EB(i + 1, j + 1, k - 1, em::bx2); - - c001 = EB(i - 1, j - 1, k, em::bx2); - c101 = EB(i, j - 1, k, em::bx2); - c201 = EB(i + 1, j - 1, k, em::bx2); - c011 = EB(i - 1, j, k, em::bx2); - c111 = EB(i, j, k, em::bx2); - c211 = EB(i + 1, j, k, em::bx2); - c021 = EB(i - 1, j + 1, k, em::bx2); - c121 = EB(i, j + 1, k, em::bx2); - c221 = EB(i + 1, j + 1, k, em::bx2); - - c002 = EB(i - 1, j - 1, k + 1, em::bx2); - c102 = EB(i, j - 1, k + 1, em::bx2); - c202 = EB(i + 1, j - 1, k + 1, em::bx2); - c012 = EB(i - 1, j, k + 1, em::bx2); - c112 = EB(i, j, k + 1, em::bx2); - c212 = EB(i + 1, j, k + 1, em::bx2); - c022 = EB(i - 1, j + 1, k + 1, em::bx2); - c122 = EB(i, j + 1, k + 1, em::bx2); - c222 = EB(i + 1, j + 1, k + 1, em::bx2); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - b0[1] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto bx2_000 = EB(indx + i - 1, j - 1, indz + k - 1, em::bx2); + const auto bx2_100 = EB(indx + i, j - 1, indz + k - 1, em::bx2); + const auto bx2_200 = EB(indx + i + 1, j - 1, indz + k - 1, em::bx2); + const auto bx2_010 = EB(indx + i - 1, j, indz + k - 1, em::bx2); + const auto bx2_110 = EB(indx + i, j, indz + k - 1, em::bx2); + const auto bx2_210 = EB(indx + i + 1, j, indz + k - 1, em::bx2); + const auto bx2_020 = EB(indx + i - 1, j + 1, indz + k - 1, em::bx2); + const auto bx2_120 = EB(indx + i, j + 1, indz + k - 1, em::bx2); + const auto bx2_220 = EB(indx + i + 1, j + 1, indz + k - 1, em::bx2); + + const auto bx2_001 = EB(indx + i - 1, j - 1, indz + k, em::bx2); + const auto bx2_101 = EB(indx + i, j - 1, indz + k, em::bx2); + const auto bx2_201 = EB(indx + i + 1, j - 1, indz + k, em::bx2); + const auto bx2_011 = EB(indx + i - 1, j, indz + k, em::bx2); + const auto bx2_111 = EB(indx + i, j, indz + k, em::bx2); + const auto bx2_211 = EB(indx + i + 1, j, indz + k, em::bx2); + const auto bx2_021 = EB(indx + i - 1, j + 1, indz + k, em::bx2); + const auto bx2_121 = EB(indx + i, j + 1, indz + k, em::bx2); + const auto bx2_221 = EB(indx + i + 1, j + 1, indz + k, em::bx2); + + const auto bx2_002 = EB(indx + i - 1, j - 1, indz + k + 1, em::bx2); + const auto bx2_102 = EB(indx + i, j - 1, indz + k + 1, em::bx2); + const auto bx2_202 = EB(indx + i + 1, j - 1, indz + k + 1, em::bx2); + const auto bx2_012 = EB(indx + i - 1, j, indz + k + 1, em::bx2); + const auto bx2_112 = EB(indx + i, j, indz + k + 1, em::bx2); + const auto bx2_212 = EB(indx + i + 1, j, indz + k + 1, em::bx2); + const auto bx2_022 = EB(indx + i - 1, j + 1, indz + k + 1, em::bx2); + const auto bx2_122 = EB(indx + i, j + 1, indz + k + 1, em::bx2); + const auto bx2_222 = EB(indx + i + 1, j + 1, indz + k + 1, em::bx2); + // clang-format on + + const auto bx2_0_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; + const auto bx2_1_0 = bx2_010 * w0dx + bx2_110 * w1dx + bx2_210 * w2dx; + const auto bx2_2_0 = bx2_020 * w0dx + bx2_120 * w1dx + bx2_220 * w2dx; + const auto bx2_0_1 = bx2_001 * w0dx + bx2_101 * w1dx + bx2_201 * w2dx; + const auto bx2_1_1 = bx2_011 * w0dx + bx2_111 * w1dx + bx2_211 * w2dx; + const auto bx2_2_1 = bx2_021 * w0dx + bx2_121 * w1dx + bx2_221 * w2dx; + const auto bx2_0_2 = bx2_002 * w0dx + bx2_102 * w1dx + bx2_202 * w2dx; + const auto bx2_1_2 = bx2_012 * w0dx + bx2_112 * w1dx + bx2_212 * w2dx; + const auto bx2_2_2 = bx2_022 * w0dx + bx2_122 * w1dx + bx2_222 * w2dx; + + const auto bx2_00 = bx2_0_0 * w0py + bx2_1_0 * w1py + bx2_2_0 * w2py; + const auto bx2_01 = bx2_0_1 * w0py + bx2_1_1 * w1py + bx2_2_1 * w2py; + const auto bx2_02 = bx2_0_2 * w0py + bx2_1_2 * w1py + bx2_2_2 * w2py; + + b0[1] = bx2_00 * w0dz + bx2_01 * w1dz + bx2_02 * w2dz; // Bx3 // Interpolate -- (dual, dual, primal) - c000 = EB(i - 1, j - 1, k - 1, em::bx3); - c100 = EB(i, j - 1, k - 1, em::bx3); - c200 = EB(i + 1, j - 1, k - 1, em::bx3); - c010 = EB(i - 1, j, k - 1, em::bx3); - c110 = EB(i, j, k - 1, em::bx3); - c210 = EB(i + 1, j, k - 1, em::bx3); - c020 = EB(i - 1, j + 1, k - 1, em::bx3); - c120 = EB(i, j + 1, k - 1, em::bx3); - c220 = EB(i + 1, j + 1, k - 1, em::bx3); - - c001 = EB(i - 1, j - 1, k, em::bx3); - c101 = EB(i, j - 1, k, em::bx3); - c201 = EB(i + 1, j - 1, k, em::bx3); - c011 = EB(i - 1, j, k, em::bx3); - c111 = EB(i, j, k, em::bx3); - c211 = EB(i + 1, j, k, em::bx3); - c021 = EB(i - 1, j + 1, k, em::bx3); - c121 = EB(i, j + 1, k, em::bx3); - c221 = EB(i + 1, j + 1, k, em::bx3); - - c002 = EB(i - 1, j - 1, k + 1, em::bx3); - c102 = EB(i, j - 1, k + 1, em::bx3); - c202 = EB(i + 1, j - 1, k + 1, em::bx3); - c012 = EB(i - 1, j, k + 1, em::bx3); - c112 = EB(i, j, k + 1, em::bx3); - c212 = EB(i + 1, j, k + 1, em::bx3); - c022 = EB(i - 1, j + 1, k + 1, em::bx3); - c122 = EB(i, j + 1, k + 1, em::bx3); - c222 = EB(i + 1, j + 1, k + 1, em::bx3); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - b0[2] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto bx3_000 = EB(indx + i - 1, indy + j - 1, k - 1, em::bx3); + const auto bx3_100 = EB(indx + i, indy + j - 1, k - 1, em::bx3); + const auto bx3_200 = EB(indx + i + 1, indy + j - 1, k - 1, em::bx3); + const auto bx3_010 = EB(indx + i - 1, indy + j, k - 1, em::bx3); + const auto bx3_110 = EB(indx + i, indy + j, k - 1, em::bx3); + const auto bx3_210 = EB(indx + i + 1, indy + j, k - 1, em::bx3); + const auto bx3_020 = EB(indx + i - 1, indy + j + 1, k - 1, em::bx3); + const auto bx3_120 = EB(indx + i, indy + j + 1, k - 1, em::bx3); + const auto bx3_220 = EB(indx + i + 1, indy + j + 1, k - 1, em::bx3); + + const auto bx3_001 = EB(indx + i - 1, indy + j - 1, k, em::bx3); + const auto bx3_101 = EB(indx + i, indy + j - 1, k, em::bx3); + const auto bx3_201 = EB(indx + i + 1, indy + j - 1, k, em::bx3); + const auto bx3_011 = EB(indx + i - 1, indy + j, k, em::bx3); + const auto bx3_111 = EB(indx + i, indy + j, k, em::bx3); + const auto bx3_211 = EB(indx + i + 1, indy + j, k, em::bx3); + const auto bx3_021 = EB(indx + i - 1, indy + j + 1, k, em::bx3); + const auto bx3_121 = EB(indx + i, indy + j + 1, k, em::bx3); + const auto bx3_221 = EB(indx + i + 1, indy + j + 1, k, em::bx3); + + const auto bx3_002 = EB(indx + i - 1, indy + j - 1, k + 1, em::bx3); + const auto bx3_102 = EB(indx + i, indy + j - 1, k + 1, em::bx3); + const auto bx3_202 = EB(indx + i + 1, indy + j - 1, k + 1, em::bx3); + const auto bx3_012 = EB(indx + i - 1, indy + j, k + 1, em::bx3); + const auto bx3_112 = EB(indx + i, indy + j, k + 1, em::bx3); + const auto bx3_212 = EB(indx + i + 1, indy + j, k + 1, em::bx3); + const auto bx3_022 = EB(indx + i - 1, indy + j + 1, k + 1, em::bx3); + const auto bx3_122 = EB(indx + i, indy + j + 1, k + 1, em::bx3); + const auto bx3_222 = EB(indx + i + 1, indy + j + 1, k + 1, em::bx3); + // clang-format on + + const auto bx3_0_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; + const auto bx3_1_0 = bx3_010 * w0dx + bx3_110 * w1dx + bx3_210 * w2dx; + const auto bx3_2_0 = bx3_020 * w0dx + bx3_120 * w1dx + bx3_220 * w2dx; + const auto bx3_0_1 = bx3_001 * w0dx + bx3_101 * w1dx + bx3_201 * w2dx; + const auto bx3_1_1 = bx3_011 * w0dx + bx3_111 * w1dx + bx3_211 * w2dx; + const auto bx3_2_1 = bx3_021 * w0dx + bx3_121 * w1dx + bx3_221 * w2dx; + const auto bx3_0_2 = bx3_002 * w0dx + bx3_102 * w1dx + bx3_202 * w2dx; + const auto bx3_1_2 = bx3_012 * w0dx + bx3_112 * w1dx + bx3_212 * w2dx; + const auto bx3_2_2 = bx3_022 * w0dx + bx3_122 * w1dx + bx3_222 * w2dx; + + const auto bx3_00 = bx3_0_0 * w0dy + bx3_1_0 * w1dy + bx3_2_0 * w2dy; + const auto bx3_01 = bx3_0_1 * w0dy + bx3_1_1 * w1dy + bx3_2_1 * w2dy; + const auto bx3_02 = bx3_0_2 * w0dy + bx3_1_2 * w1dy + bx3_2_2 * w2dy; + + b0[2] = bx3_00 * w0pz + bx3_01 * w1pz + bx3_02 * w2pz; } } From c6b9cc80b335f5e5fdf8dd7bf44c276fde672445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 27 May 2025 16:30:36 -0500 Subject: [PATCH 35/82] bugfixes --- src/kernels/particle_pusher_sr.hpp | 566 +++++++++++++++-------------- 1 file changed, 284 insertions(+), 282 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 7ccd06a8..c9dd8377 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1105,53 +1105,54 @@ namespace kernel::sr { const auto dx1_center = static_cast(dx1_less_half) - dx1_; // direct interpolation of staggered grid - // primal = i, dual = i+ind - const int ind = static_cast(static_cast(dx1_ + HALF)); + // primal = i+ind, dual = i + const int indx = static_cast(static_cast(dx1_ + HALF)); // Compute weights for second-order interpolation // primal - const auto wp0 = HALF * SQR(HALF - dx1_); - const auto wp1 = static_cast(0.75) - SQR(dx1_); - const auto wp2 = HALF * SQR(HALF + dx1_); - // dual - ToDo! - const auto wd0 = HALF * SQR(HALF - dx1_); - const auto wd1 = static_cast(0.75) - SQR(dx1_); - const auto wd2 = HALF * SQR(HALF + dx1_); + const auto w0px = HALF * SQR(HALF + dx1_center); + const auto w1px = static_cast(0.75) - SQR(dx1_center); + const auto w2px = HALF * SQR(HALF - dx1_center); + + // dual + const auto w0dx = HALF * SQR(ONE - dx1_); + const auto w2dx = HALF * SQR(dx1_); + const auto w1dx = ONE - w0dx - w2dx; // Ex1 (dual grid) - const auto ex1_0 = EB(ind + i - 1, em::ex1); - const auto ex1_1 = EB(ind + i, em::ex1); - const auto ex1_2 = EB(ind + i + 1, em::ex1); + const auto ex1_0 = EB(i - 1, em::ex1); + const auto ex1_1 = EB(i, em::ex1); + const auto ex1_2 = EB(i + 1, em::ex1); e0[0] = ex1_0 * wd0 + ex1_1 * wd0 + ex1_2 * wd0; // Ex2 (primal grid) - const auto ex2_0 = EB(i - 1, em::ex2); - const auto ex2_1 = EB(i, em::ex2); - const auto ex2_2 = EB(i + 1, em::ex2); + const auto ex2_0 = EB(indx + i - 1, em::ex2); + const auto ex2_1 = EB(indx + i, em::ex2); + const auto ex2_2 = EB(indx + i + 1, em::ex2); e0[1] = ex2_0 * wp0 + ex2_1 * wp1 + ex2_2 * wp2; // Ex3 (primal grid) - const auto ex3_0 = EB(i - 1, em::ex3); - const auto ex3_1 = EB(i, em::ex3); // Second grid point - const auto ex3_2 = EB(i + 1, em::ex3); + const auto ex3_0 = EB(indx + i - 1, em::ex3); + const auto ex3_1 = EB(indx + i, em::ex3); + const auto ex3_2 = EB(indx + i + 1, em::ex3); e0[2] = ex3_0 * wp0 + ex3_1 * wp1 + ex3_2 * wp2; // Bx1 (primal grid) - const auto bx1_0 = EB(i - 1, em::bx1); - const auto bx1_1 = EB(i, em::bx1); // Second grid point - const auto bx1_2 = EB(i + 1, em::bx1); + const auto bx1_0 = EB(indx + i - 1, em::bx1); + const auto bx1_1 = EB(indx + i, em::bx1); + const auto bx1_2 = EB(indx + i + 1, em::bx1); b0[0] = bx1_0 * wp0 + bx1_1 * wp1 + bx1_2 * wp2; // Bx2 (dual grid) - const auto bx2_0 = EB(ind + i - 2, em::bx2); - const auto bx2_1 = EB(ind + i - 1, em::bx2); // Second grid point - const auto bx2_2 = EB(ind + i, em::bx2); + const auto bx2_0 = EB(i - 1, em::bx2); + const auto bx2_1 = EB(i, em::bx2); + const auto bx2_2 = EB(i + 1, em::bx2); b0[1] = bx2_0 * wd0 + bx2_1 * wd1 + bx2_2 * wd2; // Bx3 (dual grid) - const auto bx3_0 = EB(ind + i - 2, em::bx3); - const auto bx3_1 = EB(ind + i - 1, em::bx3); // Second grid point - const auto bx3_2 = EB(ind + i, em::bx3); + const auto bx3_0 = EB(i - 1, em::bx3); + const auto bx3_1 = EB(i, em::bx3); + const auto bx3_2 = EB(i + 1, em::bx3); b0[2] = bx3_0 * wd0 + bx3_1 * wd1 + bx3_2 * wd2; } else if constexpr (D == Dim::_2D) { @@ -1169,7 +1170,7 @@ namespace kernel::sr { const auto dx2_center = static_cast(dx2_less_half) - dx2_; // direct interpolation of staggered grid - // primal = i, dual = i+ind + // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); const int indy = static_cast(static_cast(dx2_ + HALF)); @@ -1182,26 +1183,26 @@ namespace kernel::sr { const auto w1py = static_cast(0.75) - SQR(dx2_center); const auto w2py = HALF * SQR(HALF - dx2_center); - // dual - ToDo! - const auto w0dx = HALF * SQR(HALF + dx1_center); - const auto w1dx = static_cast(0.75) - SQR(dx1_center); - const auto w2dx = HALF * SQR(HALF - dx1_center); - const auto w0dy = HALF * SQR(HALF + dx2_center); - const auto w1dy = static_cast(0.75) - SQR(dx2_center); - const auto w2dy = HALF * SQR(HALF - dx2_center); + // dual + const auto w0dx = HALF * SQR(ONE - dx1_); + const auto w2dx = HALF * SQR(dx1_); + const auto w1dx = ONE - w0dx - w2dx; + const auto w0dy = HALF * SQR(ONE - dx2_); + const auto w2dy = HALF * SQR(dx2_); + const auto w1dy = ONE - w0dx - w2dy; // Ex1 // Interpolate --- (dual, primal) // clang-format off - const auto ex1_000 = EB(indx + i - 1, j - 1, em::ex1); - const auto ex1_100 = EB(indx + i, j - 1, em::ex1); - const auto ex1_200 = EB(indx + i + 1, j - 1, em::ex1); - const auto ex1_010 = EB(indx + i - 1, j, em::ex1); - const auto ex1_110 = EB(indx + i, j, em::ex1); - const auto ex1_210 = EB(indx + i + 1, j, em::ex1); - const auto ex1_020 = EB(indx + i - 1, j + 1, em::ex1); - const auto ex1_120 = EB(indx + i, j + 1, em::ex1); - const auto ex1_220 = EB(indx + i + 1, j + 1, em::ex1); + const auto ex1_000 = EB(i - 1, indy + j - 1, em::ex1); + const auto ex1_100 = EB(i, indy + j - 1, em::ex1); + const auto ex1_200 = EB(i + 1, indy + j - 1, em::ex1); + const auto ex1_010 = EB(i - 1, indy + j, em::ex1); + const auto ex1_110 = EB(i, indy + j, em::ex1); + const auto ex1_210 = EB(i + 1, indy + j, em::ex1); + const auto ex1_020 = EB(i - 1, indy + j + 1, em::ex1); + const auto ex1_120 = EB(i, indy + j + 1, em::ex1); + const auto ex1_220 = EB(i + 1, indy + j + 1, em::ex1); // clang-format on const auto ex1_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; @@ -1212,15 +1213,15 @@ namespace kernel::sr { // Ex2 // Interpolate --- (primal, dual) // clang-format off - const auto ex2_000 = EB(i - 1, indy + j - 1, em::ex2); - const auto ex2_100 = EB(i, indy + j - 1, em::ex2); - const auto ex2_200 = EB(i + 1, indy + j - 1, em::ex2); - const auto ex2_010 = EB(i - 1, indy + j, em::ex2); - const auto ex2_110 = EB(i, indy + j, em::ex2); - const auto ex2_210 = EB(i + 1, indy + j, em::ex2); - const auto ex2_020 = EB(i - 1, indy + j + 1, em::ex2); - const auto ex2_120 = EB(i, indy + j + 1, em::ex2); - const auto ex2_220 = EB(i + 1, indy + j + 1, em::ex2); + const auto ex2_000 = EB(indx + i - 1, j - 1, em::ex2); + const auto ex2_100 = EB(indx + i, j - 1, em::ex2); + const auto ex2_200 = EB(indx + i + 1, j - 1, em::ex2); + const auto ex2_010 = EB(indx + i - 1, j, em::ex2); + const auto ex2_110 = EB(indx + i, j, em::ex2); + const auto ex2_210 = EB(indx + i + 1, j, em::ex2); + const auto ex2_020 = EB(indx + i - 1, j + 1, em::ex2); + const auto ex2_120 = EB(indx + i, j + 1, em::ex2); + const auto ex2_220 = EB(indx + i + 1, j + 1, em::ex2); // clang-format on const auto ex2_0 = ex2_000 * w0px + ex2_100 * w1px + ex2_200 * w2px; @@ -1231,15 +1232,15 @@ namespace kernel::sr { // Ex3 // Interpolate --- (primal, primal) // clang-format off - const auto ex3_000 = EB(i - 1, j - 1, em::ex3); - const auto ex3_100 = EB(i, j - 1, em::ex3); - const auto ex3_200 = EB(i + 1, j - 1, em::ex3); - const auto ex3_010 = EB(i - 1, j, em::ex3); - const auto ex3_110 = EB(i, j, em::ex3); - const auto ex3_210 = EB(i + 1, j, em::ex3); - const auto ex3_020 = EB(i - 1, j + 1, em::ex3); - const auto ex3_120 = EB(i, j + 1, em::ex3); - const auto ex3_220 = EB(i + 1, j + 1, em::ex3); + const auto ex3_000 = EB(indx + i - 1, indy + j - 1, em::ex3); + const auto ex3_100 = EB(indx + i, indy + j - 1, em::ex3); + const auto ex3_200 = EB(indx + i + 1, indy + j - 1, em::ex3); + const auto ex3_010 = EB(indx + i - 1, indy + j, em::ex3); + const auto ex3_110 = EB(indx + i, indy + j, em::ex3); + const auto ex3_210 = EB(indx + i + 1, indy + j, em::ex3); + const auto ex3_020 = EB(indx + i - 1, indy + j + 1, em::ex3); + const auto ex3_120 = EB(indx + i, indy + j + 1, em::ex3); + const auto ex3_220 = EB(indx + i + 1, indy + j + 1, em::ex3); // clang-format on const auto ex3_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; @@ -1250,15 +1251,15 @@ namespace kernel::sr { // Bx1 // Interpolate --- (primal, dual) // clang-format off - const auto bx1_000 = EB(i - 1, indy + j - 1, em::bx1); - const auto bx1_100 = EB(i, indy + j - 1, em::bx1); - const auto bx1_200 = EB(i + 1, indy + j - 1, em::bx1); - const auto bx1_010 = EB(i - 1, indy + j, em::bx1); - const auto bx1_110 = EB(i, indy + j, em::bx1); - const auto bx1_210 = EB(i + 1, indy + j, em::bx1); - const auto bx1_020 = EB(i - 1, indy + j + 1, em::bx1); - const auto bx1_120 = EB(i, indy + j + 1, em::bx1); - const auto bx1_220 = EB(i + 1, indy + j + 1, em::bx1); + const auto bx1_000 = EB(indx + i - 1, indy + j - 1, em::bx1); + const auto bx1_100 = EB(indx + i, indy + j - 1, em::bx1); + const auto bx1_200 = EB(indx + i + 1, indy + j - 1, em::bx1); + const auto bx1_010 = EB(indx + i - 1, indy + j, em::bx1); + const auto bx1_110 = EB(indx + i, indy + j, em::bx1); + const auto bx1_210 = EB(indx + i + 1, indy + j, em::bx1); + const auto bx1_020 = EB(indx + i - 1, indy + j + 1, em::bx1); + const auto bx1_120 = EB(indx + i, indy + j + 1, em::bx1); + const auto bx1_220 = EB(indx + i + 1, indy + j + 1, em::bx1); // clang-format on const auto bx1_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; @@ -1269,15 +1270,15 @@ namespace kernel::sr { // Bx2 // Interpolate --- (dual, primal) // clang-format off - const auto bx2_000 = EB(indx + i - 1, j - 1, em::bx2); - const auto bx2_100 = EB(indx + i, j - 1, em::bx2); - const auto bx2_200 = EB(indx + i + 1, j - 1, em::bx2); - const auto bx2_010 = EB(indx + i - 1, j, em::bx2); - const auto bx2_110 = EB(indx + i, j, em::bx2); - const auto bx2_210 = EB(indx + i + 1, j, em::bx2); - const auto bx2_020 = EB(indx + i - 1, j + 1, em::bx2); - const auto bx2_120 = EB(indx + i, j + 1, em::bx2); - const auto bx2_220 = EB(indx + i + 1, j + 1, em::bx2); + const auto bx2_000 = EB(i - 1, indy + j - 1, em::bx2); + const auto bx2_100 = EB(i, indy + j - 1, em::bx2); + const auto bx2_200 = EB(i + 1, indy + j - 1, em::bx2); + const auto bx2_010 = EB(i - 1, indy + j, em::bx2); + const auto bx2_110 = EB(i, indy + j, em::bx2); + const auto bx2_210 = EB(i + 1, indy + j, em::bx2); + const auto bx2_020 = EB(i - 1, indy + j + 1, em::bx2); + const auto bx2_120 = EB(i, indy + j + 1, em::bx2); + const auto bx2_220 = EB(i + 1, indy + j + 1, em::bx2); // clang-format on const auto bx2_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; @@ -1288,15 +1289,15 @@ namespace kernel::sr { // Bx3 // Interpolate --- (dual, dual) // clang-format off - const auto bx3_000 = EB(indx + i - 1, indy + j - 1, em::bx3); - const auto bx3_100 = EB(indx + i, indy + j - 1, em::bx3); - const auto bx3_200 = EB(indx + i + 1, indy + j - 1, em::bx3); - const auto bx3_010 = EB(indx + i - 1, indy + j, em::bx3); - const auto bx3_110 = EB(indx + i, indy + j, em::bx3); - const auto bx3_210 = EB(indx + i + 1, indy + j, em::bx3); - const auto bx3_020 = EB(indx + i - 1, indy + j + 1, em::bx3); - const auto bx3_120 = EB(indx + i, indy + j + 1, em::bx3); - const auto bx3_220 = EB(indx + i + 1, indy + j + 1, em::bx3); + const auto bx3_000 = EB(i - 1, j - 1, em::bx3); + const auto bx3_100 = EB(i, j - 1, em::bx3); + const auto bx3_200 = EB(i + 1, j - 1, em::bx3); + const auto bx3_010 = EB(i - 1, j, em::bx3); + const auto bx3_110 = EB(i, j, em::bx3); + const auto bx3_210 = EB(i + 1, j, em::bx3); + const auto bx3_020 = EB(i - 1, j + 1, em::bx3); + const auto bx3_120 = EB(i, j + 1, em::bx3); + const auto bx3_220 = EB(i + 1, j + 1, em::bx3); // clang-format on const auto bx3_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; @@ -1312,66 +1313,77 @@ namespace kernel::sr { const auto dx2_ { static_cast(dx2(p)) }; const auto dx3_ { static_cast(dx3(p)) }; + const int dx1_less_half = static_cast(dx1_ < + static_cast(0.5)); + const auto dx1_center = static_cast(dx1_less_half) - dx1_; + + const int dx2_less_half = static_cast(dx2_ < + static_cast(0.5)); + const auto dx2_center = static_cast(dx2_less_half) - dx2_; + + const int dx3_less_half = static_cast(dx3_ < + static_cast(0.5)); + const auto dx3_center = static_cast(dx3_less_half) - dx3_; + // direct interpolation of staggered grid - // primal = i, dual = i+ind + // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); const int indy = static_cast(static_cast(dx2_ + HALF)); const int indz = static_cast(static_cast(dx3_ + HALF)); // Compute weights for second-order interpolation // primal - const auto w0px = HALF * SQR(HALF - dx1_); - const auto w1px = static_cast(0.75) - SQR(dx1_); - const auto w2px = HALF * SQR(HALF + dx1_); - const auto w0py = HALF * SQR(HALF - dx2_); - const auto w1py = static_cast(0.75) - SQR(dx2_); - const auto w2py = HALF * SQR(HALF + dx2_); - const auto w0pz = HALF * SQR(HALF - dx3_); - const auto w1pz = static_cast(0.75) - SQR(dx3_); - const auto w2pz = HALF * SQR(HALF + dx3_); + const auto w0px = HALF * SQR(HALF + dx1_center); + const auto w1px = static_cast(0.75) - SQR(dx1_center); + const auto w2px = HALF * SQR(HALF - dx1_center); + const auto w0py = HALF * SQR(HALF + dx2_center); + const auto w1py = static_cast(0.75) - SQR(dx2_center); + const auto w2py = HALF * SQR(HALF - dx2_center); + const auto w0pz = HALF * SQR(HALF + dx3_center); + const auto w1pz = static_cast(0.75) - SQR(dx3_center); + const auto w2pz = HALF * SQR(HALF - dx3_center); + // dual - const auto w0dx = HALF * SQR(HALF - dx1_); - const auto w1dx = static_cast(0.75) - SQR(dx1_); - const auto w2dx = HALF * SQR(HALF + dx1_); - const auto w0dy = HALF * SQR(HALF - dx2_); - const auto w1dy = static_cast(0.75) - SQR(dx2_); - const auto w2dy = HALF * SQR(HALF + dx2_); - const auto w0dz = HALF * SQR(HALF - dx3_); - const auto w1dz = static_cast(0.75) - SQR(dx3_); - const auto w2dz = HALF * SQR(HALF + dx3_); + const auto w0dx = HALF * SQR(ONE - dx1_); + const auto w2dx = HALF * SQR(dx1_); + const auto w1dx = ONE - w0dx - w2dx; + const auto w0dy = HALF * SQR(ONE - dx2_); + const auto w2dy = HALF * SQR(dx2_); + const auto w1dy = ONE - w0dx - w2dy; + const auto w0dz = HALF * SQR(ONE - dx3_); + const auto w2dz = HALF * SQR(dx3_); + const auto w1dz = ONE - w0dx - w2dy; // Ex1 // Interpolate --- (dual, primal, primal) // clang-format off - const auto ex1_000 = EB(indx + i - 1, j - 1, k - 1, em::ex1); - const auto ex1_100 = EB(indx + i, j - 1, k - 1, em::ex1); - const auto ex1_200 = EB(indx + i + 1, j - 1, k - 1, em::ex1); - const auto ex1_010 = EB(indx + i - 1, j, k - 1, em::ex1); - const auto ex1_110 = EB(indx + i, j, k - 1, em::ex1); - const auto ex1_210 = EB(indx + i + 1, j, k - 1, em::ex1); - const auto ex1_020 = EB(indx + i - 1, j + 1, k - 1, em::ex1); - const auto ex1_120 = EB(indx + i, j + 1, k - 1, em::ex1); - const auto ex1_220 = EB(indx + i + 1, j + 1, k - 1, em::ex1); - - const auto ex1_001 = EB(indx + i - 1, j - 1, k, em::ex1); - const auto ex1_101 = EB(indx + i, j - 1, k, em::ex1); - const auto ex1_201 = EB(indx + i + 1, j - 1, k, em::ex1); - const auto ex1_011 = EB(indx + i - 1, j, k, em::ex1); - const auto ex1_111 = EB(indx + i, j, k, em::ex1); - const auto ex1_211 = EB(indx + i + 1, j, k, em::ex1); - const auto ex1_021 = EB(indx + i - 1, j + 1, k, em::ex1); - const auto ex1_121 = EB(indx + i, j + 1, k, em::ex1); - const auto ex1_221 = EB(indx + i + 1, j + 1, k, em::ex1); - - const auto ex1_002 = EB(indx + i - 1, j - 1, k + 1, em::ex1); - const auto ex1_102 = EB(indx + i, j - 1, k + 1, em::ex1); - const auto ex1_202 = EB(indx + i + 1, j - 1, k + 1, em::ex1); - const auto ex1_012 = EB(indx + i - 1, j, k + 1, em::ex1); - const auto ex1_112 = EB(indx + i, j, k + 1, em::ex1); - const auto ex1_212 = EB(indx + i + 1, j, k + 1, em::ex1); - const auto ex1_022 = EB(indx + i - 1, j + 1, k + 1, em::ex1); - const auto ex1_122 = EB(indx + i, j + 1, k + 1, em::ex1); - const auto ex1_222 = EB(indx + i + 1, j + 1, k + 1, em::ex1); + const auto ex1_000 = EB(i - 1, indy + j - 1, indz + k - 1, em::ex1); + const auto ex1_100 = EB(i, indy + j - 1, indz + k - 1, em::ex1); + const auto ex1_200 = EB(i + 1, indy + j - 1, indz + k - 1, em::ex1); + const auto ex1_010 = EB(i - 1, indy + j, indz + k - 1, em::ex1); + const auto ex1_110 = EB(i, indy + j, indz + k - 1, em::ex1); + const auto ex1_210 = EB(i + 1, indy + j, indz + k - 1, em::ex1); + const auto ex1_020 = EB(i - 1, indy + j + 1, indz + k - 1, em::ex1); + const auto ex1_120 = EB(i, indy + j + 1, indz + k - 1, em::ex1); + const auto ex1_220 = EB(i + 1, indy + j + 1, indz + k - 1, em::ex1); + const auto ex1_001 = EB(i - 1, indy + j - 1, indz + k, em::ex1); + const auto ex1_101 = EB(i, indy + j - 1, indz + k, em::ex1); + const auto ex1_201 = EB(i + 1, indy + j - 1, indz + k, em::ex1); + const auto ex1_011 = EB(i - 1, indy + j, indz + k, em::ex1); + const auto ex1_111 = EB(i, indy + j, indz + k, em::ex1); + const auto ex1_211 = EB(i + 1, indy + j, indz + k, em::ex1); + const auto ex1_021 = EB(i - 1, indy + j + 1, indz + k, em::ex1); + const auto ex1_121 = EB(i, indy + j + 1, indz + k, em::ex1); + const auto ex1_221 = EB(i + 1, indy + j + 1, indz + k, em::ex1); + const auto ex1_002 = EB(i - 1, indy + j - 1, indz + k + 1, em::ex1); + const auto ex1_102 = EB(i, indy + j - 1, indz + k + 1, em::ex1); + const auto ex1_202 = EB(i + 1, indy + j - 1, indz + k + 1, em::ex1); + const auto ex1_012 = EB(i - 1, indy + j, indz + k + 1, em::ex1); + const auto ex1_112 = EB(i, indy + j, indz + k + 1, em::ex1); + const auto ex1_212 = EB(i + 1, indy + j, indz + k + 1, em::ex1); + const auto ex1_022 = EB(i - 1, indy + j + 1, indz + k + 1, em::ex1); + const auto ex1_122 = EB(i, indy + j + 1, indz + k + 1, em::ex1); + const auto ex1_222 = EB(i + 1, indy + j + 1, indz + k + 1, em::ex1); // clang-format on const auto ex1_0_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; @@ -1393,35 +1405,33 @@ namespace kernel::sr { // Ex2 // Interpolate -- (primal, dual, primal) // clang-format off - const auto ex2_000 = EB(i - 1, indy + j - 1, k - 1, em::ex2); - const auto ex2_100 = EB(i, indy + j - 1, k - 1, em::ex2); - const auto ex2_200 = EB(i + 1, indy + j - 1, k - 1, em::ex2); - const auto ex2_010 = EB(i - 1, indy + j, k - 1, em::ex2); - const auto ex2_110 = EB(i, indy + j, k - 1, em::ex2); - const auto ex2_210 = EB(i + 1, indy + j, k - 1, em::ex2); - const auto ex2_020 = EB(i - 1, indy + j + 1, k - 1, em::ex2); - const auto ex2_120 = EB(i, indy + j + 1, k - 1, em::ex2); - const auto ex2_220 = EB(i + 1, indy + j + 1, k - 1, em::ex2); - - const auto ex2_001 = EB(i - 1, indy + j - 1, k, em::ex2); - const auto ex2_101 = EB(i, indy + j - 1, k, em::ex2); - const auto ex2_201 = EB(i + 1, indy + j - 1, k, em::ex2); - const auto ex2_011 = EB(i - 1, indy + j, k, em::ex2); - const auto ex2_111 = EB(i, indy + j, k, em::ex2); - const auto ex2_211 = EB(i + 1, indy + j, k, em::ex2); - const auto ex2_021 = EB(i - 1, indy + j + 1, k, em::ex2); - const auto ex2_121 = EB(i, indy + j + 1, k, em::ex2); - const auto ex2_221 = EB(i + 1, indy + j + 1, k, em::ex2); - - const auto ex2_002 = EB(i - 1, indy + j - 1, k + 1, em::ex2); - const auto ex2_102 = EB(i, indy + j - 1, k + 1, em::ex2); - const auto ex2_202 = EB(i + 1, indy + j - 1, k + 1, em::ex2); - const auto ex2_012 = EB(i - 1, indy + j, k + 1, em::ex2); - const auto ex2_112 = EB(i, indy + j, k + 1, em::ex2); - const auto ex2_212 = EB(i + 1, indy + j, k + 1, em::ex2); - const auto ex2_022 = EB(i - 1, indy + j + 1, k + 1, em::ex2); - const auto ex2_122 = EB(i, indy + j + 1, k + 1, em::ex2); - const auto ex2_222 = EB(i + 1, indy + j + 1, k + 1, em::ex2); + const auto ex2_000 = EB(indx + i - 1, j - 1, indz + k - 1, em::ex2); + const auto ex2_100 = EB(indx + i, j - 1, indz + k - 1, em::ex2); + const auto ex2_200 = EB(indx + i + 1, j - 1, indz + k - 1, em::ex2); + const auto ex2_010 = EB(indx + i - 1, j, indz + k - 1, em::ex2); + const auto ex2_110 = EB(indx + i, j, indz + k - 1, em::ex2); + const auto ex2_210 = EB(indx + i + 1, j, indz + k - 1, em::ex2); + const auto ex2_020 = EB(indx + i - 1, j + 1, indz + k - 1, em::ex2); + const auto ex2_120 = EB(indx + i, j + 1, indz + k - 1, em::ex2); + const auto ex2_220 = EB(indx + i + 1, j + 1, indz + k - 1, em::ex2); + const auto ex2_001 = EB(indx + i - 1, j - 1, indz + k, em::ex2); + const auto ex2_101 = EB(indx + i, j - 1, indz + k, em::ex2); + const auto ex2_201 = EB(indx + i + 1, j - 1, indz + k, em::ex2); + const auto ex2_011 = EB(indx + i - 1, j, indz + k, em::ex2); + const auto ex2_111 = EB(indx + i, j, indz + k, em::ex2); + const auto ex2_211 = EB(indx + i + 1, j, indz + k, em::ex2); + const auto ex2_021 = EB(indx + i - 1, j + 1, indz + k, em::ex2); + const auto ex2_121 = EB(indx + i, j + 1, indz + k, em::ex2); + const auto ex2_221 = EB(indx + i + 1, j + 1, indz + k, em::ex2); + const auto ex2_002 = EB(indx + i - 1, j - 1, indz + k + 1, em::ex2); + const auto ex2_102 = EB(indx + i, j - 1, indz + k + 1, em::ex2); + const auto ex2_202 = EB(indx + i + 1, j - 1, indz + k + 1, em::ex2); + const auto ex2_012 = EB(indx + i - 1, j, indz + k + 1, em::ex2); + const auto ex2_112 = EB(indx + i, j, indz + k + 1, em::ex2); + const auto ex2_212 = EB(indx + i + 1, j, indz + k + 1, em::ex2); + const auto ex2_022 = EB(indx + i - 1, j + 1, indz + k + 1, em::ex2); + const auto ex2_122 = EB(indx + i, j + 1, indz + k + 1, em::ex2); + const auto ex2_222 = EB(indx + i + 1, j + 1, indz + k + 1, em::ex2); // clang-format on const auto ex2_0_0 = ex2_000 * w0px + ex2_100 * w1px + ex1_200 * w2px; @@ -1443,35 +1453,33 @@ namespace kernel::sr { // Ex3 // Interpolate -- (primal, primal, dual) // clang-format off - const auto ex3_000 = EB(i - 1, j - 1, indz + k - 1, em::ex3); - const auto ex3_100 = EB(i, j - 1, indz + k - 1, em::ex3); - const auto ex3_200 = EB(i + 1, j - 1, indz + k - 1, em::ex3); - const auto ex3_010 = EB(i - 1, j, indz + k - 1, em::ex3); - const auto ex3_110 = EB(i, j, indz + k - 1, em::ex3); - const auto ex3_210 = EB(i + 1, j, indz + k - 1, em::ex3); - const auto ex3_020 = EB(i - 1, j + 1, indz + k - 1, em::ex3); - const auto ex3_120 = EB(i, j + 1, indz + k - 1, em::ex3); - const auto ex3_220 = EB(i + 1, j + 1, indz + k - 1, em::ex3); - - const auto ex3_001 = EB(i - 1, j - 1, indz + k, em::ex3); - const auto ex3_101 = EB(i, j - 1, indz + k, em::ex3); - const auto ex3_201 = EB(i + 1, j - 1, indz + k, em::ex3); - const auto ex3_011 = EB(i - 1, j, indz + k, em::ex3); - const auto ex3_111 = EB(i, j, indz + k, em::ex3); - const auto ex3_211 = EB(i + 1, j, indz + k, em::ex3); - const auto ex3_021 = EB(i - 1, j + 1, indz + k, em::ex3); - const auto ex3_121 = EB(i, j + 1, indz + k, em::ex3); - const auto ex3_221 = EB(i + 1, j + 1, indz + k, em::ex3); - - const auto ex3_002 = EB(i - 1, j - 1, indz + k + 1, em::ex3); - const auto ex3_102 = EB(i, j - 1, indz + k + 1, em::ex3); - const auto ex3_202 = EB(i + 1, j - 1, indz + k + 1, em::ex3); - const auto ex3_012 = EB(i - 1, j, indz + k + 1, em::ex3); - const auto ex3_112 = EB(i, j, indz + k + 1, em::ex3); - const auto ex3_212 = EB(i + 1, j, indz + k + 1, em::ex3); - const auto ex3_022 = EB(i - 1, j + 1, indz + k + 1, em::ex3); - const auto ex3_122 = EB(i, j + 1, indz + k + 1, em::ex3); - const auto ex3_222 = EB(i + 1, j + 1, indz + k + 1, em::ex3); + const auto ex3_000 = EB(indx + i - 1, indy + j - 1, k - 1, em::ex3); + const auto ex3_100 = EB(indx + i, indy + j - 1, k - 1, em::ex3); + const auto ex3_200 = EB(indx + i + 1, indy + j - 1, k - 1, em::ex3); + const auto ex3_010 = EB(indx + i - 1, indy + j, k - 1, em::ex3); + const auto ex3_110 = EB(indx + i, indy + j, k - 1, em::ex3); + const auto ex3_210 = EB(indx + i + 1, indy + j, k - 1, em::ex3); + const auto ex3_020 = EB(indx + i - 1, indy + j + 1, k - 1, em::ex3); + const auto ex3_120 = EB(indx + i, indy + j + 1, k - 1, em::ex3); + const auto ex3_220 = EB(indx + i + 1, indy + j + 1, k - 1, em::ex3); + const auto ex3_001 = EB(indx + i - 1, indy + j - 1, k, em::ex3); + const auto ex3_101 = EB(indx + i, indy + j - 1, k, em::ex3); + const auto ex3_201 = EB(indx + i + 1, indy + j - 1, k, em::ex3); + const auto ex3_011 = EB(indx + i - 1, indy + j, k, em::ex3); + const auto ex3_111 = EB(indx + i, indy + j, k, em::ex3); + const auto ex3_211 = EB(indx + i + 1, indy + j, k, em::ex3); + const auto ex3_021 = EB(indx + i - 1, indy + j + 1, k, em::ex3); + const auto ex3_121 = EB(indx + i, indy + j + 1, k, em::ex3); + const auto ex3_221 = EB(indx + i + 1, indy + j + 1, k, em::ex3); + const auto ex3_002 = EB(indx + i - 1, indy + j - 1, k + 1, em::ex3); + const auto ex3_102 = EB(indx + i, indy + j - 1, k + 1, em::ex3); + const auto ex3_202 = EB(indx + i + 1, indy + j - 1, k + 1, em::ex3); + const auto ex3_012 = EB(indx + i - 1, indy + j, k + 1, em::ex3); + const auto ex3_112 = EB(indx + i, indy + j, k + 1, em::ex3); + const auto ex3_212 = EB(indx + i + 1, indy + j, k + 1, em::ex3); + const auto ex3_022 = EB(indx + i - 1, indy + j + 1, k + 1, em::ex3); + const auto ex3_122 = EB(indx + i, indy + j + 1, k + 1, em::ex3); + const auto ex3_222 = EB(indx + i + 1, indy + j + 1, k + 1, em::ex3); // clang-format on const auto ex3_0_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; @@ -1493,35 +1501,33 @@ namespace kernel::sr { // Bx1 // Interpolate -- (primal, dual, dual) // clang-format off - const auto bx1_000 = EB(i - 1, indy + j - 1, indz + k - 1, em::bx1); - const auto bx1_100 = EB(i, indy + j - 1, indz + k - 1, em::bx1); - const auto bx1_200 = EB(i + 1, indy + j - 1, indz + k - 1, em::bx1); - const auto bx1_010 = EB(i - 1, indy + j, indz + k - 1, em::bx1); - const auto bx1_110 = EB(i, indy + j, indz + k - 1, em::bx1); - const auto bx1_210 = EB(i + 1, indy + j, indz + k - 1, em::bx1); - const auto bx1_020 = EB(i - 1, indy + j + 1, indz + k - 1, em::bx1); - const auto bx1_120 = EB(i, indy + j + 1, indz + k - 1, em::bx1); - const auto bx1_220 = EB(i + 1, indy + j + 1, indz + k - 1, em::bx1); - - const auto bx1_001 = EB(i - 1, indy + j - 1, indz + k, em::bx1); - const auto bx1_101 = EB(i, indy + j - 1, indz + k, em::bx1); - const auto bx1_201 = EB(i + 1, indy + j - 1, indz + k, em::bx1); - const auto bx1_011 = EB(i - 1, indy + j, indz + k, em::bx1); - const auto bx1_111 = EB(i, indy + j, indz + k, em::bx1); - const auto bx1_211 = EB(i + 1, indy + j, indz + k, em::bx1); - const auto bx1_021 = EB(i - 1, indy + j + 1, indz + k, em::bx1); - const auto bx1_121 = EB(i, indy + j + 1, indz + k, em::bx1); - const auto bx1_221 = EB(i + 1, indy + j + 1, indz + k, em::bx1); - - const auto bx1_002 = EB(i - 1, indy + j - 1, indz + k + 1, em::bx1); - const auto bx1_102 = EB(i, indy + j - 1, indz + k + 1, em::bx1); - const auto bx1_202 = EB(i + 1, indy + j - 1, indz + k + 1, em::bx1); - const auto bx1_012 = EB(i - 1, indy + j, indz + k + 1, em::bx1); - const auto bx1_112 = EB(i, indy + j, indz + k + 1, em::bx1); - const auto bx1_212 = EB(i + 1, indy + j, indz + k + 1, em::bx1); - const auto bx1_022 = EB(i - 1, indy + j + 1, indz + k + 1, em::bx1); - const auto bx1_122 = EB(i, indy + j + 1, indz + k + 1, em::bx1); - const auto bx1_222 = EB(i + 1, indy + j + 1, indz + k + 1, em::bx1); + const auto bx1_000 = EB(indx + i - 1, j - 1, k - 1, em::bx1); + const auto bx1_100 = EB(indx + i, j - 1, k - 1, em::bx1); + const auto bx1_200 = EB(indx + i + 1, j - 1, k - 1, em::bx1); + const auto bx1_010 = EB(indx + i - 1, j, k - 1, em::bx1); + const auto bx1_110 = EB(indx + i, j, k - 1, em::bx1); + const auto bx1_210 = EB(indx + i + 1, j, k - 1, em::bx1); + const auto bx1_020 = EB(indx + i - 1, j + 1, k - 1, em::bx1); + const auto bx1_120 = EB(indx + i, j + 1, k - 1, em::bx1); + const auto bx1_220 = EB(indx + i + 1, j + 1, k - 1, em::bx1); + const auto bx1_001 = EB(indx + i - 1, j - 1, k, em::bx1); + const auto bx1_101 = EB(indx + i, j - 1, k, em::bx1); + const auto bx1_201 = EB(indx + i + 1, j - 1, k, em::bx1); + const auto bx1_011 = EB(indx + i - 1, j, k, em::bx1); + const auto bx1_111 = EB(indx + i, j, k, em::bx1); + const auto bx1_211 = EB(indx + i + 1, j, k, em::bx1); + const auto bx1_021 = EB(indx + i - 1, j + 1, k, em::bx1); + const auto bx1_121 = EB(indx + i, j + 1, k, em::bx1); + const auto bx1_221 = EB(indx + i + 1, j + 1, k, em::bx1); + const auto bx1_002 = EB(indx + i - 1, j - 1, k + 1, em::bx1); + const auto bx1_102 = EB(indx + i, j - 1, k + 1, em::bx1); + const auto bx1_202 = EB(indx + i + 1, j - 1, k + 1, em::bx1); + const auto bx1_012 = EB(indx + i - 1, j, k + 1, em::bx1); + const auto bx1_112 = EB(indx + i, j, k + 1, em::bx1); + const auto bx1_212 = EB(indx + i + 1, j, k + 1, em::bx1); + const auto bx1_022 = EB(indx + i - 1, j + 1, k + 1, em::bx1); + const auto bx1_122 = EB(indx + i, j + 1, k + 1, em::bx1); + const auto bx1_222 = EB(indx + i + 1, j + 1, k + 1, em::bx1); // clang-format on const auto bx1_0_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; @@ -1543,35 +1549,33 @@ namespace kernel::sr { // Bx2 // Interpolate -- (dual, primal, dual) // clang-format off - const auto bx2_000 = EB(indx + i - 1, j - 1, indz + k - 1, em::bx2); - const auto bx2_100 = EB(indx + i, j - 1, indz + k - 1, em::bx2); - const auto bx2_200 = EB(indx + i + 1, j - 1, indz + k - 1, em::bx2); - const auto bx2_010 = EB(indx + i - 1, j, indz + k - 1, em::bx2); - const auto bx2_110 = EB(indx + i, j, indz + k - 1, em::bx2); - const auto bx2_210 = EB(indx + i + 1, j, indz + k - 1, em::bx2); - const auto bx2_020 = EB(indx + i - 1, j + 1, indz + k - 1, em::bx2); - const auto bx2_120 = EB(indx + i, j + 1, indz + k - 1, em::bx2); - const auto bx2_220 = EB(indx + i + 1, j + 1, indz + k - 1, em::bx2); - - const auto bx2_001 = EB(indx + i - 1, j - 1, indz + k, em::bx2); - const auto bx2_101 = EB(indx + i, j - 1, indz + k, em::bx2); - const auto bx2_201 = EB(indx + i + 1, j - 1, indz + k, em::bx2); - const auto bx2_011 = EB(indx + i - 1, j, indz + k, em::bx2); - const auto bx2_111 = EB(indx + i, j, indz + k, em::bx2); - const auto bx2_211 = EB(indx + i + 1, j, indz + k, em::bx2); - const auto bx2_021 = EB(indx + i - 1, j + 1, indz + k, em::bx2); - const auto bx2_121 = EB(indx + i, j + 1, indz + k, em::bx2); - const auto bx2_221 = EB(indx + i + 1, j + 1, indz + k, em::bx2); - - const auto bx2_002 = EB(indx + i - 1, j - 1, indz + k + 1, em::bx2); - const auto bx2_102 = EB(indx + i, j - 1, indz + k + 1, em::bx2); - const auto bx2_202 = EB(indx + i + 1, j - 1, indz + k + 1, em::bx2); - const auto bx2_012 = EB(indx + i - 1, j, indz + k + 1, em::bx2); - const auto bx2_112 = EB(indx + i, j, indz + k + 1, em::bx2); - const auto bx2_212 = EB(indx + i + 1, j, indz + k + 1, em::bx2); - const auto bx2_022 = EB(indx + i - 1, j + 1, indz + k + 1, em::bx2); - const auto bx2_122 = EB(indx + i, j + 1, indz + k + 1, em::bx2); - const auto bx2_222 = EB(indx + i + 1, j + 1, indz + k + 1, em::bx2); + const auto bx2_000 = EB(i - 1, indy + j - 1, k - 1, em::bx2); + const auto bx2_100 = EB(i, indy + j - 1, k - 1, em::bx2); + const auto bx2_200 = EB(i + 1, indy + j - 1, k - 1, em::bx2); + const auto bx2_010 = EB(i - 1, indy + j, k - 1, em::bx2); + const auto bx2_110 = EB(i, indy + j, k - 1, em::bx2); + const auto bx2_210 = EB(i + 1, indy + j, k - 1, em::bx2); + const auto bx2_020 = EB(i - 1, indy + j + 1, k - 1, em::bx2); + const auto bx2_120 = EB(i, indy + j + 1, k - 1, em::bx2); + const auto bx2_220 = EB(i + 1, indy + j + 1, k - 1, em::bx2); + const auto bx2_001 = EB(i - 1, indy + j - 1, k, em::bx2); + const auto bx2_101 = EB(i, indy + j - 1, k, em::bx2); + const auto bx2_201 = EB(i + 1, indy + j - 1, k, em::bx2); + const auto bx2_011 = EB(i - 1, indy + j, k, em::bx2); + const auto bx2_111 = EB(i, indy + j, k, em::bx2); + const auto bx2_211 = EB(i + 1, indy + j, k, em::bx2); + const auto bx2_021 = EB(i - 1, indy + j + 1, k, em::bx2); + const auto bx2_121 = EB(i, indy + j + 1, k, em::bx2); + const auto bx2_221 = EB(i + 1, indy + j + 1, k, em::bx2); + const auto bx2_002 = EB(i - 1, indy + j - 1, k + 1, em::bx2); + const auto bx2_102 = EB(i, indy + j - 1, k + 1, em::bx2); + const auto bx2_202 = EB(i + 1, indy + j - 1, k + 1, em::bx2); + const auto bx2_012 = EB(i - 1, indy + j, k + 1, em::bx2); + const auto bx2_112 = EB(i, indy + j, k + 1, em::bx2); + const auto bx2_212 = EB(i + 1, indy + j, k + 1, em::bx2); + const auto bx2_022 = EB(i - 1, indy + j + 1, k + 1, em::bx2); + const auto bx2_122 = EB(i, indy + j + 1, k + 1, em::bx2); + const auto bx2_222 = EB(i + 1, indy + j + 1, k + 1, em::bx2); // clang-format on const auto bx2_0_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; @@ -1593,35 +1597,33 @@ namespace kernel::sr { // Bx3 // Interpolate -- (dual, dual, primal) // clang-format off - const auto bx3_000 = EB(indx + i - 1, indy + j - 1, k - 1, em::bx3); - const auto bx3_100 = EB(indx + i, indy + j - 1, k - 1, em::bx3); - const auto bx3_200 = EB(indx + i + 1, indy + j - 1, k - 1, em::bx3); - const auto bx3_010 = EB(indx + i - 1, indy + j, k - 1, em::bx3); - const auto bx3_110 = EB(indx + i, indy + j, k - 1, em::bx3); - const auto bx3_210 = EB(indx + i + 1, indy + j, k - 1, em::bx3); - const auto bx3_020 = EB(indx + i - 1, indy + j + 1, k - 1, em::bx3); - const auto bx3_120 = EB(indx + i, indy + j + 1, k - 1, em::bx3); - const auto bx3_220 = EB(indx + i + 1, indy + j + 1, k - 1, em::bx3); - - const auto bx3_001 = EB(indx + i - 1, indy + j - 1, k, em::bx3); - const auto bx3_101 = EB(indx + i, indy + j - 1, k, em::bx3); - const auto bx3_201 = EB(indx + i + 1, indy + j - 1, k, em::bx3); - const auto bx3_011 = EB(indx + i - 1, indy + j, k, em::bx3); - const auto bx3_111 = EB(indx + i, indy + j, k, em::bx3); - const auto bx3_211 = EB(indx + i + 1, indy + j, k, em::bx3); - const auto bx3_021 = EB(indx + i - 1, indy + j + 1, k, em::bx3); - const auto bx3_121 = EB(indx + i, indy + j + 1, k, em::bx3); - const auto bx3_221 = EB(indx + i + 1, indy + j + 1, k, em::bx3); - - const auto bx3_002 = EB(indx + i - 1, indy + j - 1, k + 1, em::bx3); - const auto bx3_102 = EB(indx + i, indy + j - 1, k + 1, em::bx3); - const auto bx3_202 = EB(indx + i + 1, indy + j - 1, k + 1, em::bx3); - const auto bx3_012 = EB(indx + i - 1, indy + j, k + 1, em::bx3); - const auto bx3_112 = EB(indx + i, indy + j, k + 1, em::bx3); - const auto bx3_212 = EB(indx + i + 1, indy + j, k + 1, em::bx3); - const auto bx3_022 = EB(indx + i - 1, indy + j + 1, k + 1, em::bx3); - const auto bx3_122 = EB(indx + i, indy + j + 1, k + 1, em::bx3); - const auto bx3_222 = EB(indx + i + 1, indy + j + 1, k + 1, em::bx3); + const auto bx3_000 = EB(i - 1, j - 1, indz + k - 1, em::bx3); + const auto bx3_100 = EB(i, j - 1, indz + k - 1, em::bx3); + const auto bx3_200 = EB(i + 1, j - 1, indz + k - 1, em::bx3); + const auto bx3_010 = EB(i - 1, j, indz + k - 1, em::bx3); + const auto bx3_110 = EB(i, j, indz + k - 1, em::bx3); + const auto bx3_210 = EB(i + 1, j, indz + k - 1, em::bx3); + const auto bx3_020 = EB(i - 1, j + 1, indz + k - 1, em::bx3); + const auto bx3_120 = EB(i, j + 1, indz + k - 1, em::bx3); + const auto bx3_220 = EB(i + 1, j + 1, indz + k - 1, em::bx3); + const auto bx3_001 = EB(i - 1, j - 1, indz + k, em::bx3); + const auto bx3_101 = EB(i, j - 1, indz + k, em::bx3); + const auto bx3_201 = EB(i + 1, j - 1, indz + k, em::bx3); + const auto bx3_011 = EB(i - 1, j, indz + k, em::bx3); + const auto bx3_111 = EB(i, j, indz + k, em::bx3); + const auto bx3_211 = EB(i + 1, j, indz + k, em::bx3); + const auto bx3_021 = EB(i - 1, j + 1, indz + k, em::bx3); + const auto bx3_121 = EB(i, j + 1, indz + k, em::bx3); + const auto bx3_221 = EB(i + 1, j + 1, indz + k, em::bx3); + const auto bx3_002 = EB(i - 1, j - 1, indz + k + 1, em::bx3); + const auto bx3_102 = EB(i, j - 1, indz + k + 1, em::bx3); + const auto bx3_202 = EB(i + 1, j - 1, indz + k + 1, em::bx3); + const auto bx3_012 = EB(i - 1, j, indz + k + 1, em::bx3); + const auto bx3_112 = EB(i, j, indz + k + 1, em::bx3); + const auto bx3_212 = EB(i + 1, j, indz + k + 1, em::bx3); + const auto bx3_022 = EB(i - 1, j + 1, indz + k + 1, em::bx3); + const auto bx3_122 = EB(i, j + 1, indz + k + 1, em::bx3); + const auto bx3_222 = EB(i + 1, j + 1, indz + k + 1, em::bx3); // clang-format on const auto bx3_0_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; From 1d4b074f836870c1b56e3e3704030fc1471bde51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 27 May 2025 16:36:54 -0500 Subject: [PATCH 36/82] bugfixes for variable names --- src/kernels/particle_pusher_sr.hpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index c9dd8377..e6e7224d 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1110,50 +1110,50 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal - const auto w0px = HALF * SQR(HALF + dx1_center); - const auto w1px = static_cast(0.75) - SQR(dx1_center); - const auto w2px = HALF * SQR(HALF - dx1_center); + const auto w0p = HALF * SQR(HALF + dx1_center); + const auto w1p = static_cast(0.75) - SQR(dx1_center); + const auto w2p = HALF * SQR(HALF - dx1_center); // dual - const auto w0dx = HALF * SQR(ONE - dx1_); - const auto w2dx = HALF * SQR(dx1_); - const auto w1dx = ONE - w0dx - w2dx; + const auto w0d = HALF * SQR(ONE - dx1_); + const auto w2d = HALF * SQR(dx1_); + const auto w1d = ONE - w0d - w2d; // Ex1 (dual grid) const auto ex1_0 = EB(i - 1, em::ex1); const auto ex1_1 = EB(i, em::ex1); const auto ex1_2 = EB(i + 1, em::ex1); - e0[0] = ex1_0 * wd0 + ex1_1 * wd0 + ex1_2 * wd0; + e0[0] = ex1_0 * w0d + ex1_1 * w1d + ex1_2 * w2d; // Ex2 (primal grid) const auto ex2_0 = EB(indx + i - 1, em::ex2); const auto ex2_1 = EB(indx + i, em::ex2); const auto ex2_2 = EB(indx + i + 1, em::ex2); - e0[1] = ex2_0 * wp0 + ex2_1 * wp1 + ex2_2 * wp2; + e0[1] = ex2_0 * w0p + ex2_1 * w1p + ex2_2 * w2p; // Ex3 (primal grid) const auto ex3_0 = EB(indx + i - 1, em::ex3); const auto ex3_1 = EB(indx + i, em::ex3); const auto ex3_2 = EB(indx + i + 1, em::ex3); - e0[2] = ex3_0 * wp0 + ex3_1 * wp1 + ex3_2 * wp2; + e0[2] = ex3_0 * w0p + ex3_1 * w1p + ex3_2 * w2p; // Bx1 (primal grid) const auto bx1_0 = EB(indx + i - 1, em::bx1); const auto bx1_1 = EB(indx + i, em::bx1); const auto bx1_2 = EB(indx + i + 1, em::bx1); - b0[0] = bx1_0 * wp0 + bx1_1 * wp1 + bx1_2 * wp2; + b0[0] = bx1_0 * w0p + bx1_1 * w1p + bx1_2 * w2p; // Bx2 (dual grid) const auto bx2_0 = EB(i - 1, em::bx2); const auto bx2_1 = EB(i, em::bx2); const auto bx2_2 = EB(i + 1, em::bx2); - b0[1] = bx2_0 * wd0 + bx2_1 * wd1 + bx2_2 * wd2; + b0[1] = bx2_0 * w0d + bx2_1 * w1d + bx2_2 * w2d; // Bx3 (dual grid) const auto bx3_0 = EB(i - 1, em::bx3); const auto bx3_1 = EB(i, em::bx3); const auto bx3_2 = EB(i + 1, em::bx3); - b0[2] = bx3_0 * wd0 + bx3_1 * wd1 + bx3_2 * wd2; + b0[2] = bx3_0 * w0d + bx3_1 * w1d + bx3_2 * w2d; } else if constexpr (D == Dim::_2D) { const int i { i1(p) + static_cast(N_GHOSTS) }; From b551c10abdc19138aa9e76312b9f47680245810b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 7 Jun 2025 15:27:38 -0500 Subject: [PATCH 37/82] bugfix --- src/kernels/particle_pusher_sr.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index e6e7224d..a8984de2 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1189,7 +1189,7 @@ namespace kernel::sr { const auto w1dx = ONE - w0dx - w2dx; const auto w0dy = HALF * SQR(ONE - dx2_); const auto w2dy = HALF * SQR(dx2_); - const auto w1dy = ONE - w0dx - w2dy; + const auto w1dy = ONE - w0dy - w2dy; // Ex1 // Interpolate --- (dual, primal) @@ -1349,10 +1349,10 @@ namespace kernel::sr { const auto w1dx = ONE - w0dx - w2dx; const auto w0dy = HALF * SQR(ONE - dx2_); const auto w2dy = HALF * SQR(dx2_); - const auto w1dy = ONE - w0dx - w2dy; + const auto w1dy = ONE - w0dy - w2dy; const auto w0dz = HALF * SQR(ONE - dx3_); const auto w2dz = HALF * SQR(dx3_); - const auto w1dz = ONE - w0dx - w2dy; + const auto w1dz = ONE - w0dz - w2dz; // Ex1 // Interpolate --- (dual, primal, primal) From a8aa8bbae81789fb94669c8611cb12483b418a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sun, 8 Jun 2025 22:42:55 -0500 Subject: [PATCH 38/82] bugfix --- src/kernels/particle_pusher_sr.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index a8984de2..2d756931 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1251,15 +1251,15 @@ namespace kernel::sr { // Bx1 // Interpolate --- (primal, dual) // clang-format off - const auto bx1_000 = EB(indx + i - 1, indy + j - 1, em::bx1); - const auto bx1_100 = EB(indx + i, indy + j - 1, em::bx1); - const auto bx1_200 = EB(indx + i + 1, indy + j - 1, em::bx1); - const auto bx1_010 = EB(indx + i - 1, indy + j, em::bx1); - const auto bx1_110 = EB(indx + i, indy + j, em::bx1); - const auto bx1_210 = EB(indx + i + 1, indy + j, em::bx1); - const auto bx1_020 = EB(indx + i - 1, indy + j + 1, em::bx1); - const auto bx1_120 = EB(indx + i, indy + j + 1, em::bx1); - const auto bx1_220 = EB(indx + i + 1, indy + j + 1, em::bx1); + const auto bx1_000 = EB(indx + i - 1, j - 1, em::bx1); + const auto bx1_100 = EB(indx + i, j - 1, em::bx1); + const auto bx1_200 = EB(indx + i + 1, j - 1, em::bx1); + const auto bx1_010 = EB(indx + i - 1, j, em::bx1); + const auto bx1_110 = EB(indx + i, j, em::bx1); + const auto bx1_210 = EB(indx + i + 1, j, em::bx1); + const auto bx1_020 = EB(indx + i - 1, j + 1, em::bx1); + const auto bx1_120 = EB(indx + i, j + 1, em::bx1); + const auto bx1_220 = EB(indx + i + 1, j + 1, em::bx1); // clang-format on const auto bx1_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; From 77cdd68b6fa4c5c49f38777362d749cf7fa77ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Mon, 14 Jul 2025 14:42:03 -0500 Subject: [PATCH 39/82] fix 2nd order shape function in pusher --- src/kernels/particle_pusher_sr.hpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 2d756931..e379e96f 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -475,7 +475,7 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - // getInterpFlds(p, ei, bi); + //getInterpFlds(p, ei, bi); // ToDo: Better way to call this getInterpFlds2nd(p, ei, bi); @@ -1161,14 +1161,6 @@ namespace kernel::sr { const auto dx1_ { static_cast(dx1(p)) }; const auto dx2_ { static_cast(dx2(p)) }; - const int dx1_less_half = static_cast(dx1_ < - static_cast(0.5)); - const auto dx1_center = static_cast(dx1_less_half) - dx1_; - - const int dx2_less_half = static_cast(dx2_ < - static_cast(0.5)); - const auto dx2_center = static_cast(dx2_less_half) - dx2_; - // direct interpolation of staggered grid // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); @@ -1176,12 +1168,14 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal - const auto w0px = HALF * SQR(HALF + dx1_center); - const auto w1px = static_cast(0.75) - SQR(dx1_center); - const auto w2px = HALF * SQR(HALF - dx1_center); - const auto w0py = HALF * SQR(HALF + dx2_center); - const auto w1py = static_cast(0.75) - SQR(dx2_center); - const auto w2py = HALF * SQR(HALF - dx2_center); + const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); + const auto w1px = static_cast(0.75) - + SQR(dx1_ - static_cast(indx)); + const auto w2px = ONE - w0px - w1px; + const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); + const auto w1py = static_cast(0.75) - + SQR(dx2_ - static_cast(indy)); + const auto w2py = ONE - w0py - w1py; // dual const auto w0dx = HALF * SQR(ONE - dx1_); From 8c60b0179d07a092fb0eb4be9365d9b2146e4dc9 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 17 Jul 2025 09:40:33 -0500 Subject: [PATCH 40/82] fix second order weight functions in 1D and 3D --- src/kernels/particle_pusher_sr.hpp | 44 +++++++++++------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index e379e96f..826da86e 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1100,19 +1100,16 @@ namespace kernel::sr { const int i { i1(p) + static_cast(N_GHOSTS) }; const auto dx1_ { static_cast(dx1(p)) }; - const int dx1_less_half = static_cast(dx1_ < - static_cast(0.5)); - const auto dx1_center = static_cast(dx1_less_half) - dx1_; - // direct interpolation of staggered grid // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); // Compute weights for second-order interpolation // primal - const auto w0p = HALF * SQR(HALF + dx1_center); - const auto w1p = static_cast(0.75) - SQR(dx1_center); - const auto w2p = HALF * SQR(HALF - dx1_center); + const auto w0p = HALF * SQR(HALF - dx1_ + static_cast(indx)); + const auto w1p = static_cast(0.75) - + SQR(dx1_ - static_cast(indx)); + const auto w2p = ONE - w0p - w1p; // dual const auto w0d = HALF * SQR(ONE - dx1_); @@ -1307,18 +1304,6 @@ namespace kernel::sr { const auto dx2_ { static_cast(dx2(p)) }; const auto dx3_ { static_cast(dx3(p)) }; - const int dx1_less_half = static_cast(dx1_ < - static_cast(0.5)); - const auto dx1_center = static_cast(dx1_less_half) - dx1_; - - const int dx2_less_half = static_cast(dx2_ < - static_cast(0.5)); - const auto dx2_center = static_cast(dx2_less_half) - dx2_; - - const int dx3_less_half = static_cast(dx3_ < - static_cast(0.5)); - const auto dx3_center = static_cast(dx3_less_half) - dx3_; - // direct interpolation of staggered grid // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); @@ -1327,15 +1312,18 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal - const auto w0px = HALF * SQR(HALF + dx1_center); - const auto w1px = static_cast(0.75) - SQR(dx1_center); - const auto w2px = HALF * SQR(HALF - dx1_center); - const auto w0py = HALF * SQR(HALF + dx2_center); - const auto w1py = static_cast(0.75) - SQR(dx2_center); - const auto w2py = HALF * SQR(HALF - dx2_center); - const auto w0pz = HALF * SQR(HALF + dx3_center); - const auto w1pz = static_cast(0.75) - SQR(dx3_center); - const auto w2pz = HALF * SQR(HALF - dx3_center); + const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); + const auto w1px = static_cast(0.75) - + SQR(dx1_ - static_cast(indx)); + const auto w2px = ONE - w0px - w1px; + const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); + const auto w1py = static_cast(0.75) - + SQR(dx2_ - static_cast(indy)); + const auto w2py = ONE - w0py - w1py; + const auto w0pz = HALF * SQR(HALF - dx3_ + static_cast(indz)); + const auto w1pz = static_cast(0.75) - + SQR(dx3_ - static_cast(indz)); + const auto w2pz = ONE - w0pz - w1pz; // dual const auto w0dx = HALF * SQR(ONE - dx1_); From 2732659a33d72c2a453d1c15410b74c2f5df04c3 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 17 Jul 2025 09:50:45 -0500 Subject: [PATCH 41/82] introduced `THREE_FOURTHS` to shorten expression in pusher and deposit for second order shape function --- src/global/utils/numeric.h | 2 ++ src/kernels/currents_deposit.hpp | 12 ++++++------ src/kernels/particle_pusher_sr.hpp | 18 ++++++------------ 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index 9ff262ed..fd1ddc65 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -40,6 +40,7 @@ inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; inline constexpr float THIRD = 0.333333f; +inline constexpr float THREE_FOURTHS = 0.75f; inline constexpr float INV_2 = 0.5f; inline constexpr float INV_4 = 0.25f; inline constexpr float INV_8 = 0.125f; @@ -56,6 +57,7 @@ inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; inline constexpr double THIRD = 0.3333333333333333; +inline constexpr double THREE_FOURTHS = 0.75; inline constexpr double INV_2 = 0.5; inline constexpr double INV_4 = 0.25; inline constexpr double INV_8 = 0.125; diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 0b7409f4..1f508a9a 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -102,13 +102,13 @@ namespace kernel { update_i2 = true; S0_0 = HALF * SQR(HALF + di_center_prev); - S0_1 = static_cast(0.75) - SQR(di_center_prev); + S0_1 = THREE_FOURTHS - SQR(di_center_prev); S0_2 = HALF * SQR(HALF - di_center_prev); S0_3 = ZERO; S1_0 = ZERO; S1_1 = HALF * SQR(HALF + di_center); - S1_2 = static_cast(0.75) - SQR(di_center); + S1_2 = THREE_FOURTHS - SQR(di_center); S1_3 = HALF * SQR(HALF - di_center); } else if (shift_i == -1) { /* @@ -121,11 +121,11 @@ namespace kernel { S0_0 = ZERO; S0_1 = HALF * SQR(HALF + di_center_prev); - S0_2 = static_cast(0.75) - SQR(di_center_prev); + S0_2 = THREE_FOURTHS - SQR(di_center_prev); S0_3 = HALF * SQR(HALF - di_center_prev); S1_0 = HALF * SQR(HALF + di_center); - S1_1 = static_cast(0.75) - SQR(di_center); + S1_1 = THREE_FOURTHS - SQR(di_center); S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; @@ -139,12 +139,12 @@ namespace kernel { update_i2 = false; S0_0 = HALF * SQR(HALF + di_center_prev); - S0_1 = static_cast(0.75) - SQR(di_center_prev); + S0_1 = THREE_FOURTHS - SQR(di_center_prev); S0_2 = HALF * SQR(HALF - di_center_prev); S0_3 = ZERO; S1_0 = HALF * SQR(HALF + di_center); - S1_1 = static_cast(0.75) - SQR(di_center); + S1_1 = THREE_FOURTHS - SQR(di_center); S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; } else { diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 826da86e..980acca5 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1107,8 +1107,7 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal const auto w0p = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1p = static_cast(0.75) - - SQR(dx1_ - static_cast(indx)); + const auto w1p = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); const auto w2p = ONE - w0p - w1p; // dual @@ -1166,12 +1165,10 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1px = static_cast(0.75) - - SQR(dx1_ - static_cast(indx)); + const auto w1px = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); const auto w2px = ONE - w0px - w1px; const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); - const auto w1py = static_cast(0.75) - - SQR(dx2_ - static_cast(indy)); + const auto w1py = THREE_FOURTHS - SQR(dx2_ - static_cast(indy)); const auto w2py = ONE - w0py - w1py; // dual @@ -1313,16 +1310,13 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1px = static_cast(0.75) - - SQR(dx1_ - static_cast(indx)); + const auto w1px = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); const auto w2px = ONE - w0px - w1px; const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); - const auto w1py = static_cast(0.75) - - SQR(dx2_ - static_cast(indy)); + const auto w1py = THREE_FOURTHS - SQR(dx2_ - static_cast(indy)); const auto w2py = ONE - w0py - w1py; const auto w0pz = HALF * SQR(HALF - dx3_ + static_cast(indz)); - const auto w1pz = static_cast(0.75) - - SQR(dx3_ - static_cast(indz)); + const auto w1pz = THREE_FOURTHS - SQR(dx3_ - static_cast(indz)); const auto w2pz = ONE - w0pz - w1pz; // dual From 82ddfad0328e4d7772b2ecfdd7a08a110f6e86ad Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 28 Jul 2025 18:36:41 -0500 Subject: [PATCH 42/82] first step for esirkepov with arbitrary order (wip) --- src/kernels/currents_deposit.hpp | 660 +++++++++++++++++++++++++++++-- 1 file changed, 633 insertions(+), 27 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 1f508a9a..40ca405f 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -292,6 +292,395 @@ namespace kernel { i_min += N_GHOSTS; } + Inline void W(real_t* _S, real_t x) const { + + if constexpr (O == 2) { + + _S[0] = HALF * SQR(HALF - x); + _S[1] = THREE_FOURTHS - SQR(x); + _S[2] = HALF * SQR(HALF + x); + + } else if constexpr (O == 3) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + + _S[0] = static_cast(1 / 6) * (ONE - x3) - HALF * SQR(x - x2); + _S[1] = static_cast(2 / 3) - x2 + HALF * x3; + _S[2] = static_cast(1 / 6) + HALF * (x + x2 + x3); + _S[3] = static_cast(1 / 6) * x3; + + } else if constexpr (O == 4) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + + _S[0] = static_cast(1 / 384) - static_cast(1 / 48) * x + + static_cast(1 / 16) * x2 - + static_cast(1 / 12) * x3 + + static_cast(1 / 24) * x4; + _S[1] = static_cast(19 / 96) - static_cast(11 / 24) * x + + static_cast(1 / 4) * x2 + + static_cast(1 / 6) * x3 - static_cast(1 / 6) * x4; + _S[2] = static_cast(115 / 192) - static_cast(5 / 8) * x2 + + static_cast(1 / 4) * x4; + _S[3] = static_cast(19 / 96) + static_cast(11 / 24) * x + + static_cast(1 / 4) * x2 - + static_cast(1 / 6) * x3 - static_cast(1 / 6) * x4; + _S[4] = static_cast(1 / 384) + static_cast(1 / 48) * x + + static_cast(1 / 16) * x2 + + static_cast(1 / 12) * x3 + + static_cast(1 / 24) * x4; + + } else if constexpr (O == 5) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + const auto x5 = x3 * x2; + const auto x6 = x3 * x3; + + _S[0] = static_cast(1.0 / 46080.0) - + static_cast(1.0 / 3840.0) * x + + static_cast(1.0 / 384.0) * x2 - + static_cast(1.0 / 96.0) * x3 + + static_cast(1.0 / 72.0) * x4 - + static_cast(1.0 / 144.0) * x5 + + static_cast(1.0 / 720.0) * x6; + + _S[1] = static_cast(13.0 / 9216.0) - + static_cast(11.0 / 768.0) * x + + static_cast(1.0 / 48.0) * x2 + + static_cast(5.0 / 72.0) * x3 - + static_cast(1.0 / 8.0) * x4 + + static_cast(5.0 / 144.0) * x5 - + static_cast(1.0 / 144.0) * x6; + + _S[2] = static_cast(115.0 / 768.0) - + static_cast(5.0 / 24.0) * x2 + + static_cast(1.0 / 8.0) * x4 - + static_cast(1.0 / 72.0) * x6; + + _S[3] = static_cast(115.0 / 768.0) - + static_cast(5.0 / 24.0) * x2 + + static_cast(1.0 / 8.0) * x4 - + static_cast(1.0 / 72.0) * x6; + + _S[4] = static_cast(13.0 / 9216.0) + + static_cast(11.0 / 768.0) * x + + static_cast(1.0 / 48.0) * x2 - + static_cast(5.0 / 72.0) * x3 - + static_cast(1.0 / 8.0) * x4 - + static_cast(5.0 / 144.0) * x5 - + static_cast(1.0 / 144.0) * x6; + + _S[5] = static_cast(1.0 / 46080.0) + + static_cast(1.0 / 3840.0) * x + + static_cast(1.0 / 384.0) * x2 + + static_cast(1.0 / 96.0) * x3 + + static_cast(1.0 / 72.0) * x4 + + static_cast(1.0 / 144.0) * x5 + + static_cast(1.0 / 720.0) * x6; + + } else if constexpr (O == 6) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + const auto x5 = x3 * x2; + const auto x6 = x3 * x3; + + _S[0] = static_cast(1.0 / 40320.0) - + static_cast(1.0 / 4480.0) * x + + static_cast(1.0 / 640.0) * x2 - + static_cast(1.0 / 192.0) * x3 + + static_cast(1.0 / 144.0) * x4 - + static_cast(1.0 / 288.0) * x5 + + static_cast(1.0 / 1440.0) * x6; + + _S[1] = static_cast(1.0 / 1344.0) - + static_cast(1.0 / 160.0) * x + + static_cast(5.0 / 192.0) * x2 - + static_cast(1.0 / 48.0) * x3 - + static_cast(1.0 / 48.0) * x4 + + static_cast(5.0 / 288.0) * x5 - + static_cast(1.0 / 288.0) * x6; + + _S[2] = static_cast(17.0 / 336.0) - + static_cast(5.0 / 48.0) * x2 + + static_cast(1.0 / 12.0) * x4 - + static_cast(1.0 / 144.0) * x6; + + _S[3] = static_cast(151.0 / 252.0) - + static_cast(35.0 / 48.0) * x2 + + static_cast(5.0 / 12.0) * x4 - + static_cast(1.0 / 36.0) * x6; + + _S[4] = static_cast(17.0 / 336.0) - + static_cast(5.0 / 48.0) * x2 + + static_cast(1.0 / 12.0) * x4 - + static_cast(1.0 / 144.0) * x6; + + _S[5] = static_cast(1.0 / 1344.0) + + static_cast(1.0 / 160.0) * x + + static_cast(5.0 / 192.0) * x2 + + static_cast(1.0 / 48.0) * x3 - + static_cast(1.0 / 48.0) * x4 - + static_cast(5.0 / 288.0) * x5 - + static_cast(1.0 / 288.0) * x6; + + _S[6] = static_cast(1.0 / 40320.0) + + static_cast(1.0 / 4480.0) * x + + static_cast(1.0 / 640.0) * x2 + + static_cast(1.0 / 192.0) * x3 + + static_cast(1.0 / 144.0) * x4 + + static_cast(1.0 / 288.0) * x5 + + static_cast(1.0 / 1440.0) * x6; + + } else if constexpr (O == 7) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + const auto x5 = x3 * x2; + const auto x6 = x3 * x3; + const auto x7 = x4 * x3; + + _S[0] = static_cast(1.0 / 645120.0) - + static_cast(1.0 / 64512.0) * x + + static_cast(1.0 / 9216.0) * x2 - + static_cast(1.0 / 3072.0) * x3 + + static_cast(1.0 / 2304.0) * x4 - + static_cast(1.0 / 4608.0) * x5 + + static_cast(1.0 / 23040.0) * x6 - + static_cast(1.0 / 161280.0) * x7; + + _S[1] = static_cast(1.0 / 9216.0) - + static_cast(5.0 / 4608.0) * x + + static_cast(35.0 / 9216.0) * x2 - + static_cast(7.0 / 768.0) * x3 - + static_cast(7.0 / 1152.0) * x4 + + static_cast(35.0 / 4608.0) * x5 - + static_cast(5.0 / 4608.0) * x6 + + static_cast(1.0 / 9216.0) * x7; + + _S[2] = static_cast(25.0 / 1536.0) - + static_cast(35.0 / 768.0) * x2 + + static_cast(7.0 / 192.0) * x4 - + static_cast(1.0 / 96.0) * x6; + + _S[3] = static_cast(245.0 / 384.0) - + static_cast(245.0 / 192.0) * x2 + + static_cast(49.0 / 48.0) * x4 - + static_cast(7.0 / 72.0) * x6; + + _S[4] = _S[3]; // symmetry + + _S[5] = _S[2]; // symmetry + + _S[6] = static_cast(1 / 9216) + static_cast(5 / 4608) * x + + static_cast(35 / 9216) * x2 + + static_cast(7 / 768) * x3 - + static_cast(7 / 1152) * x4 - + static_cast(35 / 4608) * x5 - + static_cast(5 / 4608) * x6 - + static_cast(1 / 9216) * x7; + + _S[7] = static_cast(1 / 645120) + + static_cast(1 / 64512) * x + + static_cast(1 / 9216) * x2 + + static_cast(1 / 3072) * x3 + + static_cast(1 / 2304) * x4 + + static_cast(1 / 4608) * x5 + + static_cast(1 / 23040) * x6 + + static_cast(1 / 161280) * x7; + + } else if constexpr (O == 8) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + const auto x5 = x3 * x2; + const auto x6 = x3 * x3; + const auto x7 = x4 * x3; + const auto x8 = x4 * x4; + + _S[0] = static_cast(1.0 / 10321920.0) - + static_cast(1.0 / 1146880.0) * x + + static_cast(1.0 / 161280.0) * x2 - + static_cast(1.0 / 53760.0) * x3 + + static_cast(1.0 / 43008.0) * x4 - + static_cast(1.0 / 96768.0) * x5 + + static_cast(1.0 / 645120.0) * x6 - + static_cast(1.0 / 1032192.0) * x7 + + static_cast(1.0 / 4134528.0) * x8; + + _S[1] = static_cast(1.0 / 129024.0) - + static_cast(1.0 / 14336.0) * x + + static_cast(17.0 / 43008.0) * x2 - + static_cast(17.0 / 21504.0) * x3 + + static_cast(17.0 / 21504.0) * x4 - + static_cast(17.0 / 43008.0) * x5 + + static_cast(1.0 / 14336.0) * x6 - + static_cast(1.0 / 129024.0) * x7 + + static_cast(1.0 / 1032192.0) * x8; + + _S[2] = static_cast(361.0 / 64512.0) - + static_cast(153.0 / 14336.0) * x2 + + static_cast(51.0 / 14336.0) * x4 - + static_cast(17.0 / 43008.0) * x6 + + static_cast(1.0 / 1032192.0) * x8; + + _S[3] = static_cast(3061.0 / 16128.0) - + static_cast(170.0 / 1792.0) * x2 + + static_cast(34.0 / 1536.0) * x4 - + static_cast(17.0 / 16128.0) * x6; + + _S[4] = static_cast(257135.0 / 32256.0) - + static_cast(1785.0 / 896.0) * x2 + + static_cast(255.0 / 256.0) * x4 - + static_cast(85.0 / 1152.0) * x6; + + _S[5] = _S[3]; // symmetry + + _S[6] = _S[2]; // symmetry + + _S[7] = static_cast(1 / 129024) + + static_cast(1 / 14336) * x + + static_cast(17 / 43008) * x2 + + static_cast(17 / 21504) * x3 + + static_cast(17 / 21504) * x4 + + static_cast(17 / 43008) * x5 + + static_cast(1 / 14336) * x6 + + static_cast(1 / 129024) * x7 + + static_cast(1 / 1032192) * x8; + + _S[8] = static_cast(1 / 10321920) + + static_cast(1 / 1146880) * x + + static_cast(1 / 161280) * x2 + + static_cast(1 / 53760) * x3 + + static_cast(1 / 43008) * x4 + + static_cast(1 / 96768) * x5 + + static_cast(1 / 645120) * x6 + + static_cast(1 / 1032192) * x7 + + static_cast(1 / 4134528) * x8; + + } else { + raise::KernelError(HERE, "Invalid order of shape function!"); + } + } + + Inline void shape_function_Nth(real_t* S0, + real_t* S1, + ncells_t& i_min, + const index_t& i, + const real_t& di, + const index_t& i_prev, + const real_t& di_prev) const { + /* + Shape function per particle is a O+1 element array. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep + + + (-1) 0 1 ... N N+1 + __________________________________________ + | | x* | x* | // | x* | | // shift_i = 0 + |______|______|______|______|______|______| + | | x | x* | // | x* | * | // shift_i = 1 + |______|______|______|______|______|______| + | * | x* | x* | // | x | | // shift_i = -1 + |______|______|______|______|______|______| + */ + + // find shift in indices + // ToDo: fix + const int di_less_half = static_cast(di < static_cast(0.5)); + const int di_prev_less_half = static_cast( + di_prev < static_cast(0.5)); + + const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); + + // find the minimum index of the shape function -> ToDo! + i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); + + // center index of the shape function -> ToDo! + const auto di_center_prev = static_cast(1 - di_prev_less_half) - + di_prev; + const auto di_center = static_cast(1 - di_less_half) - di; + // ToDo: end fix + + real_t _S0[O+1], _S1[O+1]; + // apply shape function + W(_S0, di_center_prev); + W(_S1, di_center); + + // find indices and define shape function + if (shift_i == 1) { + /* + (-1) 0 1 ... N N+1 + __________________________________________ + | | x | x* | // | x* | * | // shift_i = 1 + |______|______|______|______|______|______| + */ + + for (int j = 0; j < O; j++) { + S0[j] = _S0[j]; + } + S0[O + 1] = ZERO; + + S1[0] = ZERO; + for (int j = 0; j < O; j++) { + S1[j+1] = _S1[j]; + } + + } else if (shift_i == -1) { + /* + (-1) 0 1 ... N N+1 + __________________________________________ + | * | x* | x* | // | x | | // shift_i = -1 + |______|______|______|______|______|______| + */ + + S0[0] = ZERO; + for (int j = 0; j < O; j++) { + S0[j+1] = _S0[j]; + } + + for (int j = 0; j < O; j++) { + S1[j] = _S1[j]; + } + S1[O+1] = ZERO; + + } else if (shift_i == 0) { + /* + (-1) 0 1 ... N N+1 + __________________________________________ + | | x* | x* | // | x* | | // shift_i = 0 + |______|______|______|______|______|______| + */ + + for (int j = 0; j < O; j++) { + S0[j] = _S0[j]; + } + S0[O + 1] = ZERO; + + for (int j = 0; j < O; j++) { + S1[j] = _S1[j]; + } + S1[O + 1] = ZERO; + } else { + raise::KernelError(HERE, "Invalid shift in indices"); + } + + // account for ghost cells here to shorten J update expression + i_min += N_GHOSTS; + } + public: /** * @brief explicit constructor. @@ -932,27 +1321,6 @@ namespace kernel { i3_prev(p), dx3_prev(p)); // clang-format on - // Calculate weight function - // for (int i = 0; i < interp_order + 2; ++i) { - // for (int j = 0; j < interp_order + 2; ++j) { - // for (int k = 0; k < interp_order + 2; ++k) { - // // Esirkepov 2001, Eq. 31 - // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - // - // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - // - // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - // } - // } - // } - // - // Unrolled calculations for Wx, Wy, and Wz // clang-format off const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * @@ -2178,13 +2546,251 @@ namespace kernel { if (update_x3 && update_y3) { J_acc(ix_min + 4, iy_min + 4, cur::jx3) += QVz * Wz_4_4; } - } // dim - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); + + } // dim -> ToDo: 3D! + + } else if constexpr (O > 3u) { + + // shape function in dim1 -> always required + real_t S0x[O + 2], S1x[O + 2]; + // indices of the shape function + ncells_t ix_min; + + // ToDo: Call shape function + + if constexpr (D == Dim::_1D) { + // ToDo + } else if constexpr (D == Dim::_2D) { + + // shape function in dim2 + real_t S0y[O + 2], S1y[O + 2]; + // indices of the shape function + ncells_t iy_min; + + // ToDo: Call shape function + + // define weight tensors + real_t Wx[O + 1][O + 1]; + real_t Wy[O + 1][O + 1]; + real_t Wz[O + 1][O + 1]; + +// Calculate weight function +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + // Esirkepov 2001, Eq. 38 + Wx[i][j] = (S1x[i] - S0x[i]) * (S0y[j] + HALF * (S1y[j] - S0y[j])); + + Wy[i][j] = (S1y[i] - S0y[i]) * (S0y[j] + HALF * (S1x[j] - S0x[j])); + + Wz[i][j] = S0x[i] * S0y[j] + HALF * (S1x[i] - S1x[i]) * S0y[j] + + HALF * S0x[i] * (S1y[j] - S0y[j]) + + THIRD * (S1x[i] - S0x[i]) * (S1y[j] - S0y[j]); + } + } + + // contribution within the shape function stencil + real_t jx[O + 2][O + 2], jy[O + 2][O + 2], jz[O + 2][O + 2]; + + // prefactors to j update + const real_t Qdxdt = coeff * inv_dt; + const real_t Qdydt = coeff * inv_dt; + const real_t QVz = coeff * inv_dt * vp[2]; + + // Calculate current contribution + + // jx +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + jx[0][j] = -Qdxdt * Wx[0][j]; + } + +#pragma unroll + for (int i = 1; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + jx[i][j] = jx[i - 1][j] - Qdxdt * Wx[i][j]; + } + } + + // jy +#pragma unroll + for (int i = 0; i < O + 2; ++i) { + jy[i][0] = -Qdydt * Wy[i][0]; + } + +#pragma unroll + for (int j = 1; j < O + 2; ++j) { +#pragma unroll + for (int i = 0; i < O + 2; ++i) { + jy[i][j] = jy[i][j - 1] - Qdydt * Wy[i][j]; + } + } + + // jz +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + jz[i][j] = QVz * Wz[i][j]; + } + } + + /* + Current update + */ + auto J_acc = J.access(); + +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + J_acc(ix_min + i, iy_min + j, cur::jx1) += jx[i][j]; + J_acc(ix_min + i, iy_min + j, cur::jx2) += jy[i][j]; + J_acc(ix_min + i, iy_min + j, cur::jx3) += jz[i][j]; + } + } + + } else if constexpr (D == Dim::_3D) { + // shape function in dim2 + real_t S0y[O + 2], S1y[O + 2]; + // indices of the shape function + ncells_t iy_min; + + // ToDo: Call shape function + + // shape function in dim3 + real_t S0z[O + 2], S1z[O + 2]; + // indices of the shape function + ncells_t iz_min; + + // ToDo: Call shape function + + // define weight tensors + real_t Wx[O + 1][O + 1][O + 1]; + real_t Wy[O + 1][O + 1][O + 1]; + real_t Wz[O + 1][O + 1][O + 1]; + +// Calculate weight function +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 0; k < O + 2; ++k) { + // Esirkepov 2001, Eq. 31 + Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + } + } + } + + // contribution within the shape function stencil + real_t jx[O + 2][O + 2][O + 2], jy[O + 2][O + 2][O + 2], + jz[O + 2][O + 2][O + 2]; + + // prefactors to j update + const real_t Qdxdt = coeff * inv_dt; + const real_t Qdydt = coeff * inv_dt; + const real_t Qdzdt = coeff * inv_dt; + + // Calculate current contribution + + // jx +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 0; k < O + 2; ++k) { + jx[0][j][k] = -Qdxdt * Wx[0][j][k]; + } + } + +#pragma unroll + for (int i = 1; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 0; j < O + 2; ++k) { + jx[i][j][k] = jx[i - 1][j][k] - Qdxdt * Wx[i][j][k]; + } + } + } + + // jy +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int k = 0; k < O + 2; ++k) { + jy[i][0][k] = -Qdydt * Wy[i][0][k]; + } + } + +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 1; j < O + 2; ++j) { +#pragma unroll + for (int k = 0; k < O + 2; ++k) { + jy[i][j][k] = jy[i][j - 1][k] - Qdydt * Wy[i][j][k]; + } + } + } + + // jz +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + jy[i][j][0] = -Qdydt * Wy[i][j][0]; + } + } + +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 1; k < O + 2; ++k) { + jz[i][j][k] = jz[i][j][k - 1] - Qdzdt * Wz[i][j][k]; + } + } + } + + /* + Current update + */ + auto J_acc = J.access(); + +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 1; k < O + 2; ++k) { + J_acc(ix_min + i, iy_min + j, iz_min, cur::jx1) += jx[i][j][k]; + J_acc(ix_min + i, iy_min + j, iz_min, cur::jx2) += jy[i][j][k]; + J_acc(ix_min + i, iy_min + j, iz_min, cur::jx3) += jz[i][j][k]; + } + } + } + } + + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); + } } - } - }; -} // namespace kernel + }; + } // namespace kernel #undef i_di_to_Xi From 90f5019c0bfcd18badac18f5647c1c0bab33e8b3 Mon Sep 17 00:00:00 2001 From: hayk Date: Sun, 3 Aug 2025 11:13:52 -0400 Subject: [PATCH 43/82] new esirkepov (WIP, not yet working properly) --- src/kernels/currents_deposit.hpp | 2562 ++++++++++++++-------------- src/kernels/particle_pusher_sr.hpp | 10 +- src/kernels/particle_shapes.hpp | 107 ++ 3 files changed, 1395 insertions(+), 1284 deletions(-) create mode 100644 src/kernels/particle_shapes.hpp diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 40ca405f..1feb7ba4 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -14,8 +14,11 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/error.h" #include "utils/numeric.h" +#include "particle_shapes.hpp" + #include #define i_di_to_Xi(I, DI) static_cast((I)) + static_cast((DI)) @@ -43,117 +46,117 @@ namespace kernel { const M metric; const real_t charge, inv_dt; - Inline void shape_function_2nd(real_t& S0_0, - real_t& S0_1, - real_t& S0_2, - real_t& S0_3, - real_t& S1_0, - real_t& S1_1, - real_t& S1_2, - real_t& S1_3, - ncells_t& i_min, - bool& update_i2, - const index_t& i, - const real_t& di, - const index_t& i_prev, - const real_t& di_prev) const { - /* - Shape function per particle is a 4 element array. - We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position - - Let * be the particle position at the current timestep - Let x be the particle position at the previous timestep - - - (-1) 0 1 2 3 - ___________________________________ - | | x* | x* | x* | | // shift_i = 0 - |______|______|______|______|______| - | | x | x* | x* | * | // shift_i = 1 - |______|______|______|______|______| - | * | x* | x* | x | | // shift_i = -1 - |______|______|______|______|______| - */ - - // find shift in indices - const int di_less_half = static_cast(di < static_cast(0.5)); - const int di_prev_less_half = static_cast( - di_prev < static_cast(0.5)); - - const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); - - // find the minimum index of the shape function - i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); - - // center index of the shape function - const auto di_center_prev = static_cast(1 - di_prev_less_half) - - di_prev; - const auto di_center = static_cast(1 - di_less_half) - di; - - // find indices and define shape function - if (shift_i == 1) { - /* - (-1) 0 1 2 3 - ___________________________________ - | | x | x* | x* | * | // shift_i = 1 - |______|______|______|______|______| - */ - update_i2 = true; - - S0_0 = HALF * SQR(HALF + di_center_prev); - S0_1 = THREE_FOURTHS - SQR(di_center_prev); - S0_2 = HALF * SQR(HALF - di_center_prev); - S0_3 = ZERO; - - S1_0 = ZERO; - S1_1 = HALF * SQR(HALF + di_center); - S1_2 = THREE_FOURTHS - SQR(di_center); - S1_3 = HALF * SQR(HALF - di_center); - } else if (shift_i == -1) { - /* - (-1) 0 1 2 3 - ___________________________________ - | * | x* | x* | x | | // shift_i = -1 - |______|______|______|______|______| - */ - update_i2 = true; - - S0_0 = ZERO; - S0_1 = HALF * SQR(HALF + di_center_prev); - S0_2 = THREE_FOURTHS - SQR(di_center_prev); - S0_3 = HALF * SQR(HALF - di_center_prev); - - S1_0 = HALF * SQR(HALF + di_center); - S1_1 = THREE_FOURTHS - SQR(di_center); - S1_2 = HALF * SQR(HALF - di_center); - S1_3 = ZERO; - - } else if (shift_i == 0) { - /* - (-1) 0 1 2 3 - ___________________________________ - | | x* | x* | x* | | // shift_i = 0 - |______|______|______|______|______| - */ - update_i2 = false; - - S0_0 = HALF * SQR(HALF + di_center_prev); - S0_1 = THREE_FOURTHS - SQR(di_center_prev); - S0_2 = HALF * SQR(HALF - di_center_prev); - S0_3 = ZERO; - - S1_0 = HALF * SQR(HALF + di_center); - S1_1 = THREE_FOURTHS - SQR(di_center); - S1_2 = HALF * SQR(HALF - di_center); - S1_3 = ZERO; - } else { - raise::KernelError(HERE, "Invalid shift in indices"); - } - - // account for ghost cells here to shorten J update expression - i_min += N_GHOSTS; - } + // Inline void shape_function_2nd(real_t& S0_0, + // real_t& S0_1, + // real_t& S0_2, + // real_t& S0_3, + // real_t& S1_0, + // real_t& S1_1, + // real_t& S1_2, + // real_t& S1_3, + // ncells_t& i_min, + // bool& update_i2, + // const index_t& i, + // const real_t& di, + // const index_t& i_prev, + // const real_t& di_prev) const { + // /* + // Shape function per particle is a 4 element array. + // We need to find which indices are contributing to the shape function + // For this we first compute the indices of the particle position + // + // Let * be the particle position at the current timestep + // Let x be the particle position at the previous timestep + // + // + // (-1) 0 1 2 3 + // ___________________________________ + // | | x* | x* | x* | | // shift_i = 0 + // |______|______|______|______|______| + // | | x | x* | x* | * | // shift_i = 1 + // |______|______|______|______|______| + // | * | x* | x* | x | | // shift_i = -1 + // |______|______|______|______|______| + // */ + // + // // find shift in indices + // const int di_less_half = static_cast(di < static_cast(0.5)); + // const int di_prev_less_half = static_cast( + // di_prev < static_cast(0.5)); + // + // const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); + // + // // find the minimum index of the shape function + // i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); + // + // // center index of the shape function + // const auto di_center_prev = static_cast(1 - di_prev_less_half) - + // di_prev; + // const auto di_center = static_cast(1 - di_less_half) - di; + // + // // find indices and define shape function + // if (shift_i == 1) { + // /* + // (-1) 0 1 2 3 + // ___________________________________ + // | | x | x* | x* | * | // shift_i = 1 + // |______|______|______|______|______| + // */ + // update_i2 = true; + // + // S0_0 = HALF * SQR(HALF + di_center_prev); + // S0_1 = THREE_FOURTHS - SQR(di_center_prev); + // S0_2 = HALF * SQR(HALF - di_center_prev); + // S0_3 = ZERO; + // + // S1_0 = ZERO; + // S1_1 = HALF * SQR(HALF + di_center); + // S1_2 = THREE_FOURTHS - SQR(di_center); + // S1_3 = HALF * SQR(HALF - di_center); + // } else if (shift_i == -1) { + // /* + // (-1) 0 1 2 3 + // ___________________________________ + // | * | x* | x* | x | | // shift_i = -1 + // |______|______|______|______|______| + // */ + // update_i2 = true; + // + // S0_0 = ZERO; + // S0_1 = HALF * SQR(HALF + di_center_prev); + // S0_2 = THREE_FOURTHS - SQR(di_center_prev); + // S0_3 = HALF * SQR(HALF - di_center_prev); + // + // S1_0 = HALF * SQR(HALF + di_center); + // S1_1 = THREE_FOURTHS - SQR(di_center); + // S1_2 = HALF * SQR(HALF - di_center); + // S1_3 = ZERO; + // + // } else if (shift_i == 0) { + // /* + // (-1) 0 1 2 3 + // ___________________________________ + // | | x* | x* | x* | | // shift_i = 0 + // |______|______|______|______|______| + // */ + // update_i2 = false; + // + // S0_0 = HALF * SQR(HALF + di_center_prev); + // S0_1 = THREE_FOURTHS - SQR(di_center_prev); + // S0_2 = HALF * SQR(HALF - di_center_prev); + // S0_3 = ZERO; + // + // S1_0 = HALF * SQR(HALF + di_center); + // S1_1 = THREE_FOURTHS - SQR(di_center); + // S1_2 = HALF * SQR(HALF - di_center); + // S1_3 = ZERO; + // } else { + // raise::KernelError(HERE, "Invalid shift in indices"); + // } + // + // // account for ghost cells here to shorten J update expression + // i_min += N_GHOSTS; + // } Inline void shape_function_3rd(real_t& S0_0, real_t& S0_1, @@ -614,7 +617,7 @@ namespace kernel { const auto di_center = static_cast(1 - di_less_half) - di; // ToDo: end fix - real_t _S0[O+1], _S1[O+1]; + real_t _S0[O + 1], _S1[O + 1]; // apply shape function W(_S0, di_center_prev); W(_S1, di_center); @@ -635,7 +638,7 @@ namespace kernel { S1[0] = ZERO; for (int j = 0; j < O; j++) { - S1[j+1] = _S1[j]; + S1[j + 1] = _S1[j]; } } else if (shift_i == -1) { @@ -648,13 +651,13 @@ namespace kernel { S0[0] = ZERO; for (int j = 0; j < O; j++) { - S0[j+1] = _S0[j]; + S0[j + 1] = _S0[j]; } for (int j = 0; j < O; j++) { S1[j] = _S1[j]; } - S1[O+1] = ZERO; + S1[O + 1] = ZERO; } else if (shift_i == 0) { /* @@ -1026,1185 +1029,1182 @@ namespace kernel { } } else if constexpr (O == 2u) { /* - Higher order charge conserving current deposition based on - Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + * Higher order charge conserving current deposition based on + * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + **/ - We need to define the follwowing variable: - - Shape functions in spatial directions for the particle position - before and after the current timestep. - S0_*, S1_* - - Density composition matrix - Wx_*, Wy_*, Wz_* - */ + // iS -> shape function for init position + // fS -> shape function for final position - /* - x - direction - */ + // shape function at staggered points (one coeff is always ZERO) + int i1_minH; + real_t iS_x1H_0, iS_x1H_1, iS_x1H_2, iS_x1H_3; + real_t fS_x1H_0, fS_x1H_1, fS_x1H_2, fS_x1H_3; + + // shape function at integer points (one coeff is always ZERO) + int i1_min; + real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; + real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; - // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3; - // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3; - // indices of the shape function - ncells_t ix_min; - bool update_x2; - // find indices and define shape function // clang-format off - shape_function_2nd(S0x_0, S0x_1, S0x_2, S0x_3, - S1x_0, S1x_1, S1x_2, S1x_3, - ix_min, update_x2, - i1(p), dx1(p), - i1_prev(p), dx1_prev(p)); + prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), + i1(p), static_cast(dx1(p)), + i1_minH, + iS_x1H_0, iS_x1H_1, iS_x1H_2, iS_x1H_3, + fS_x1H_0, fS_x1H_1, fS_x1H_2, fS_x1H_3); + prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), + i1(p), static_cast(dx1(p)), + i1_min, + iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, + fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); // clang-format on if constexpr (D == Dim::_1D) { - // ToDo + raise::KernelNotImplementedError(HERE); } else if constexpr (D == Dim::_2D) { - /* - y - direction - */ + // shape function at staggered points (one coeff is always ZERO) + int i2_minH; + real_t iS_x2H_0, iS_x2H_1, iS_x2H_2, iS_x2H_3; + real_t fS_x2H_0, fS_x2H_1, fS_x2H_2, fS_x2H_3; + + // shape function at integer points (one coeff is always ZERO) + int i2_min; + real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; + real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - ncells_t iy_min; - bool update_y2; - // find indices and define shape function // clang-format off - shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, - S1y_0, S1y_1, S1y_2, S1y_3, - iy_min, update_y2, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); + prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), + i2(p), static_cast(dx2(p)), + i2_minH, + iS_x2H_0, iS_x2H_1, iS_x2H_2, iS_x2H_3, + fS_x2H_0, fS_x2H_1, fS_x2H_2, fS_x2H_3); + prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), + i2(p), static_cast(dx2(p)), + i2_min, + iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, + fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); // clang-format on - - // Esirkepov 2001, Eq. 38 - /* - x - component - */ - // Calculate weight function - unrolled - const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); - const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); - const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); - const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); - - const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); - const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); - const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); - const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); - - const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); - const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); - const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); - const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - - // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); - - // Unrolled calculations for Wz - const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + - S0y_0 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + - S0y_1 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + - S0y_2 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + - S0y_3 * (HALF * S1x_0 + S0x_0)); - - const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + - S0y_0 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + - S0y_1 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + - S0y_2 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + - S0y_3 * (HALF * S1x_1 + S0x_1)); - - const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + - S0y_0 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + - S0y_1 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + - S0y_2 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + - S0y_3 * (HALF * S1x_2 + S0x_2)); - - const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + - S0y_0 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + - S0y_1 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + - S0y_2 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + - S0y_3 * (HALF * S1x_3 + S0x_3)); - - const real_t Qdxdt = coeff * inv_dt; - const real_t Qdydt = coeff * inv_dt; - const real_t QVz = coeff * vp[2]; - - // Esirkepov - Eq. 39 - // x-component - const auto jx_0_0 = -Qdxdt * Wx_0_0; - const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; - const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; - - const auto jx_0_1 = -Qdxdt * Wx_0_1; - const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; - const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; - - const auto jx_0_2 = -Qdxdt * Wx_0_2; - const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; - const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; - - const auto jx_0_3 = -Qdxdt * Wx_0_3; - const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; - const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; + // x1-components + const auto Wx1_00 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_0 + iS_x2_0); + const auto Wx1_01 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_1 + iS_x2_1); + const auto Wx1_02 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_2 + iS_x2_2); + const auto Wx1_03 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_10 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_0 + iS_x2_0); + const auto Wx1_11 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_1 + iS_x2_1); + const auto Wx1_12 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_2 + iS_x2_2); + const auto Wx1_13 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_20 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_0 + iS_x2_0); + const auto Wx1_21 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_1 + iS_x2_1); + const auto Wx1_22 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_2 + iS_x2_2); + const auto Wx1_23 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_30 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_0 + iS_x2_0); + const auto Wx1_31 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_1 + iS_x2_1); + const auto Wx1_32 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_2 + iS_x2_2); + const auto Wx1_33 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_3 + iS_x2_3); + + // x2-components + const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_0 - iS_x2H_0); + const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_1 - iS_x2H_1); + const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_2 - iS_x2H_2); + const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_3 - iS_x2H_3); + + const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_0 - iS_x2H_0); + const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_1 - iS_x2H_1); + const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_2 - iS_x2H_2); + const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_3 - iS_x2H_3); + + const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_0 - iS_x2H_0); + const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_1 - iS_x2H_1); + const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_2 - iS_x2H_2); + const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_3 - iS_x2H_3); + + const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_0 - iS_x2H_0); + const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_1 - iS_x2H_1); + const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_2 - iS_x2H_2); + const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_3 - iS_x2H_3); + + // x3-components + const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + + iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); + const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + + iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); + const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + + iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); + const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + + iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); + + const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + + iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); + const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + + iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); + const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + + iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); + const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + + iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); + + const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + + iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); + const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + + iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); + const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + + iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); + const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + + iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); + + const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + + iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); + const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + + iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); + const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + + iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); + const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + + iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); + + // x1-component + const auto jx1_00 = Wx1_00; + const auto jx1_10 = jx1_00 + Wx1_10; + const auto jx1_20 = jx1_10 + Wx1_20; + const auto jx1_30 = jx1_20 + Wx1_30; + + const auto jx1_01 = Wx1_01; + const auto jx1_11 = jx1_01 + Wx1_11; + const auto jx1_21 = jx1_11 + Wx1_21; + const auto jx1_31 = jx1_21 + Wx1_31; + + const auto jx1_02 = Wx1_02; + const auto jx1_12 = jx1_02 + Wx1_12; + const auto jx1_22 = jx1_12 + Wx1_22; + const auto jx1_32 = jx1_22 + Wx1_32; + + const auto jx1_03 = Wx1_03; + const auto jx1_13 = jx1_03 + Wx1_13; + const auto jx1_23 = jx1_13 + Wx1_23; + const auto jx1_33 = jx1_23 + Wx1_33; // y-component - const auto jy_0_0 = -Qdydt * Wy_0_0; - const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; - const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; - - const auto jy_1_0 = -Qdydt * Wy_1_0; - const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; - const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; - - const auto jy_2_0 = -Qdydt * Wy_2_0; - const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; - const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; + const auto jx2_00 = Wx2_00; + const auto jx2_01 = jx2_00 + Wx2_01; + const auto jx2_02 = jx2_01 + Wx2_02; + const auto jx2_03 = jx2_02 + Wx2_03; + + const auto jx2_10 = Wx2_10; + const auto jx2_11 = jx2_10 + Wx2_11; + const auto jx2_12 = jx2_11 + Wx2_12; + const auto jx2_13 = jx2_12 + Wx2_13; + + const auto jx2_20 = Wx2_20; + const auto jx2_21 = jx2_20 + Wx2_21; + const auto jx2_22 = jx2_21 + Wx2_22; + const auto jx2_23 = jx2_22 + Wx2_23; + + const auto jx2_30 = Wx2_30; + const auto jx2_31 = jx2_30 + Wx2_31; + const auto jx2_32 = jx2_31 + Wx2_32; + const auto jx2_33 = jx2_32 + Wx2_33; + + i1_minH += N_GHOSTS; + i1_min += N_GHOSTS; + i2_minH += N_GHOSTS; + i2_min += N_GHOSTS; + + // @TODO: not sure about the signs here + const real_t Qdx1dt = -coeff * inv_dt; + const real_t Qdx2dt = -coeff * inv_dt; + const real_t QVx3 = coeff * vp[2]; - const auto jy_3_0 = -Qdydt * Wy_3_0; - const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; - const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; - - /* - Current update - */ auto J_acc = J.access(); - /* - x - component - */ - J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; - - J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; - - if (update_x2) { - J_acc(ix_min + 2, iy_min, cur::jx1) += jx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_2_2; - } - - if (update_y2) { - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_1_3; - J_acc(ix_min, iy_min + 3, cur::jx1) += jx_0_3; - } - - if (update_x2 && update_y2) { - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_2_3; - } - - /* - y - component - */ - J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; - J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; - J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; - - J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; - - if (update_x2) { - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_3_1; - J_acc(ix_min + 3, iy_min, cur::jx2) += jy_3_0; - } - - if (update_y2) { - J_acc(ix_min, iy_min + 2, cur::jx2) += jy_0_2; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_2_2; - } - - if (update_x2 && update_y2) { - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; - } - /* - z - component, unsimulated direction - */ - J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - - if (update_x2) { - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - } - - if (update_y2) { - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; - } - if (update_x2 && update_y2) { - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; - } - + // x1-currents + J_acc(i1_minH + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; + J_acc(i1_minH + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; + J_acc(i1_minH + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; + J_acc(i1_minH + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; + + J_acc(i1_minH + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; + J_acc(i1_minH + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; + J_acc(i1_minH + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; + J_acc(i1_minH + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; + + J_acc(i1_minH + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; + J_acc(i1_minH + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; + J_acc(i1_minH + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; + J_acc(i1_minH + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; + + J_acc(i1_minH + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; + J_acc(i1_minH + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; + J_acc(i1_minH + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; + J_acc(i1_minH + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; + + // x2-currents + J_acc(i1_min + 0, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_00; + J_acc(i1_min + 0, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_01; + J_acc(i1_min + 0, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_02; + J_acc(i1_min + 0, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_03; + + J_acc(i1_min + 1, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_10; + J_acc(i1_min + 1, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_11; + J_acc(i1_min + 1, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_12; + J_acc(i1_min + 1, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_13; + + J_acc(i1_min + 2, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_20; + J_acc(i1_min + 2, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_21; + J_acc(i1_min + 2, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_22; + J_acc(i1_min + 2, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_23; + + J_acc(i1_min + 3, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_30; + J_acc(i1_min + 3, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_31; + J_acc(i1_min + 3, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_32; + J_acc(i1_min + 3, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_33; + + // x3-currents + J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; + J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; + J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; + J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; + + J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; + J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; + J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; + J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; + + J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; + J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; + J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; + J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; + + J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; + J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; + J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; + J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; } else if constexpr (D == Dim::_3D) { - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - ncells_t iy_min; - bool update_y2; - // find indices and define shape function - // clang-format off - shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, - S1y_0, S1y_1, S1y_2, S1y_3, - iy_min, update_y2, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); - // clang-format on - - /* - y - direction - */ - - // shape function at previous timestep - real_t S0z_0, S0z_1, S0z_2, S0z_3; - // shape function at current timestep - real_t S1z_0, S1z_1, S1z_2, S1z_3; - // indices of the shape function - ncells_t iz_min; - bool update_z2; - // find indices and define shape function - // clang-format off - shape_function_2nd(S0z_0, S0z_1, S0z_2, S0z_3, - S1z_0, S1z_1, S1z_2, S1z_3, - iz_min, update_z2, - i3(p), dx3(p), - i3_prev(p), dx3_prev(p)); - // clang-format on - - // Unrolled calculations for Wx, Wy, and Wz - // clang-format off - const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const real_t Qdxdt = coeff * inv_dt; - - const auto jx_0_0_0 = - Qdxdt * Wx_0_0_0; - const auto jx_1_0_0 = jx_0_0_0 - Qdxdt * Wx_1_0_0; - const auto jx_2_0_0 = jx_1_0_0 - Qdxdt * Wx_2_0_0; - const auto jx_0_1_0 = - Qdxdt * Wx_0_1_0; - const auto jx_1_1_0 = jx_0_1_0 - Qdxdt * Wx_1_1_0; - const auto jx_2_1_0 = jx_1_1_0 - Qdxdt * Wx_2_1_0; - const auto jx_0_2_0 = - Qdxdt * Wx_0_2_0; - const auto jx_1_2_0 = jx_0_2_0 - Qdxdt * Wx_1_2_0; - const auto jx_2_2_0 = jx_1_2_0 - Qdxdt * Wx_2_2_0; - const auto jx_0_3_0 = - Qdxdt * Wx_0_3_0; - const auto jx_1_3_0 = jx_0_3_0 - Qdxdt * Wx_1_3_0; - const auto jx_2_3_0 = jx_1_3_0 - Qdxdt * Wx_2_3_0; - - const auto jx_0_0_1 = - Qdxdt * Wx_0_0_1; - const auto jx_1_0_1 = jx_0_0_1 - Qdxdt * Wx_1_0_1; - const auto jx_2_0_1 = jx_1_0_1 - Qdxdt * Wx_2_0_1; - const auto jx_0_1_1 = - Qdxdt * Wx_0_1_1; - const auto jx_1_1_1 = jx_0_1_1 - Qdxdt * Wx_1_1_1; - const auto jx_2_1_1 = jx_1_1_1 - Qdxdt * Wx_2_1_1; - const auto jx_0_2_1 = - Qdxdt * Wx_0_2_1; - const auto jx_1_2_1 = jx_0_2_1 - Qdxdt * Wx_1_2_1; - const auto jx_2_2_1 = jx_1_2_1 - Qdxdt * Wx_2_2_1; - const auto jx_0_3_1 = - Qdxdt * Wx_0_3_1; - const auto jx_1_3_1 = jx_0_3_1 - Qdxdt * Wx_1_3_1; - const auto jx_2_3_1 = jx_1_3_1 - Qdxdt * Wx_2_3_1; - - const auto jx_0_0_2 = - Qdxdt * Wx_0_0_2; - const auto jx_1_0_2 = jx_0_0_2 - Qdxdt * Wx_1_0_2; - const auto jx_2_0_2 = jx_1_0_2 - Qdxdt * Wx_2_0_2; - const auto jx_0_1_2 = - Qdxdt * Wx_0_1_2; - const auto jx_1_1_2 = jx_0_1_2 - Qdxdt * Wx_1_1_2; - const auto jx_2_1_2 = jx_1_1_2 - Qdxdt * Wx_2_1_2; - const auto jx_0_2_2 = - Qdxdt * Wx_0_2_2; - const auto jx_1_2_2 = jx_0_2_2 - Qdxdt * Wx_1_2_2; - const auto jx_2_2_2 = jx_1_2_2 - Qdxdt * Wx_2_2_2; - const auto jx_0_3_2 = - Qdxdt * Wx_0_3_2; - const auto jx_1_3_2 = jx_0_3_2 - Qdxdt * Wx_1_3_2; - const auto jx_2_3_2 = jx_1_3_2 - Qdxdt * Wx_2_3_2; - - const auto jx_0_0_3 = - Qdxdt * Wx_0_0_3; - const auto jx_1_0_3 = jx_0_0_3 - Qdxdt * Wx_1_0_3; - const auto jx_2_0_3 = jx_1_0_3 - Qdxdt * Wx_2_0_3; - const auto jx_0_1_3 = - Qdxdt * Wx_0_1_3; - const auto jx_1_1_3 = jx_0_1_3 - Qdxdt * Wx_1_1_3; - const auto jx_2_1_3 = jx_1_1_3 - Qdxdt * Wx_2_1_3; - const auto jx_0_2_3 = - Qdxdt * Wx_0_2_3; - const auto jx_1_2_3 = jx_0_2_3 - Qdxdt * Wx_1_2_3; - const auto jx_2_2_3 = jx_1_2_3 - Qdxdt * Wx_2_2_3; - const auto jx_0_3_3 = - Qdxdt * Wx_0_3_3; - const auto jx_1_3_3 = jx_0_3_3 - Qdxdt * Wx_1_3_3; - const auto jx_2_3_3 = jx_1_3_3 - Qdxdt * Wx_2_3_3; - - /* - y-component - */ - const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const real_t Qdydt = coeff * inv_dt; - - const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; - const auto jy_0_1_0 = jy_0_0_0 - Qdydt * Wy_0_1_0; - const auto jy_0_2_0 = jy_0_1_0 - Qdydt * Wy_0_2_0; - const auto jy_1_0_0 = - Qdydt * Wy_1_0_0; - const auto jy_1_1_0 = jy_1_0_0 - Qdydt * Wy_1_1_0; - const auto jy_1_2_0 = jy_1_1_0 - Qdydt * Wy_1_2_0; - const auto jy_2_0_0 = - Qdydt * Wy_2_0_0; - const auto jy_2_1_0 = jy_2_0_0 - Qdydt * Wy_2_1_0; - const auto jy_2_2_0 = jy_2_1_0 - Qdydt * Wy_2_2_0; - const auto jy_3_0_0 = - Qdydt * Wy_3_0_0; - const auto jy_3_1_0 = jy_3_0_0 - Qdydt * Wy_3_1_0; - const auto jy_3_2_0 = jy_3_1_0 - Qdydt * Wy_3_2_0; - - const auto jy_0_0_1 = - Qdydt * Wy_0_0_1; - const auto jy_0_1_1 = jy_0_0_1 - Qdydt * Wy_0_1_1; - const auto jy_0_2_1 = jy_0_1_1 - Qdydt * Wy_0_2_1; - const auto jy_1_0_1 = - Qdydt * Wy_1_0_1; - const auto jy_1_1_1 = jy_1_0_1 - Qdydt * Wy_1_1_1; - const auto jy_1_2_1 = jy_1_1_1 - Qdydt * Wy_1_2_1; - const auto jy_2_0_1 = - Qdydt * Wy_2_0_1; - const auto jy_2_1_1 = jy_2_0_1 - Qdydt * Wy_2_1_1; - const auto jy_2_2_1 = jy_2_1_1 - Qdydt * Wy_2_2_1; - const auto jy_3_0_1 = - Qdydt * Wy_3_0_1; - const auto jy_3_1_1 = jy_3_0_1 - Qdydt * Wy_3_1_1; - const auto jy_3_2_1 = jy_3_1_1 - Qdydt * Wy_3_2_1; - - const auto jy_0_0_2 = - Qdydt * Wy_0_0_2; - const auto jy_0_1_2 = jy_0_0_2 - Qdydt * Wy_0_1_2; - const auto jy_0_2_2 = jy_0_1_2 - Qdydt * Wy_0_2_2; - const auto jy_1_0_2 = - Qdydt * Wy_1_0_2; - const auto jy_1_1_2 = jy_1_0_2 - Qdydt * Wy_1_1_2; - const auto jy_1_2_2 = jy_1_1_2 - Qdydt * Wy_1_2_2; - const auto jy_2_0_2 = - Qdydt * Wy_2_0_2; - const auto jy_2_1_2 = jy_2_0_2 - Qdydt * Wy_2_1_2; - const auto jy_2_2_2 = jy_2_1_2 - Qdydt * Wy_2_2_2; - const auto jy_3_0_2 = - Qdydt * Wy_3_0_2; - const auto jy_3_1_2 = jy_3_0_2 - Qdydt * Wy_3_1_2; - const auto jy_3_2_2 = jy_3_1_2 - Qdydt * Wy_3_2_2; - - const auto jy_0_0_3 = - Qdydt * Wy_0_0_3; - const auto jy_0_1_3 = jy_0_0_3 - Qdydt * Wy_0_1_3; - const auto jy_0_2_3 = jy_0_1_3 - Qdydt * Wy_0_2_3; - const auto jy_1_0_3 = - Qdydt * Wy_1_0_3; - const auto jy_1_1_3 = jy_1_0_3 - Qdydt * Wy_1_1_3; - const auto jy_1_2_3 = jy_1_1_3 - Qdydt * Wy_1_2_3; - const auto jy_2_0_3 = - Qdydt * Wy_2_0_3; - const auto jy_2_1_3 = jy_2_0_3 - Qdydt * Wy_2_1_3; - const auto jy_2_2_3 = jy_2_1_3 - Qdydt * Wy_2_2_3; - const auto jy_3_0_3 = - Qdydt * Wy_3_0_3; - const auto jy_3_1_3 = jy_3_0_3 - Qdydt * Wy_3_1_3; - const auto jy_3_2_3 = jy_3_1_3 - Qdydt * Wy_3_2_3; - - /* - z - component - */ - const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - - const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - - const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - - const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - - // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - - const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - - const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - - const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - - // Unrolled loop for Wz[i][j][k] with i = 2 and interp_order + 2 = 4 - const auto Wz_2_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - const auto Wz_2_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - const auto Wz_2_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - - const auto Wz_2_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - const auto Wz_2_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - const auto Wz_2_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - - const auto Wz_2_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - const auto Wz_2_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - const auto Wz_2_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - - const auto Wz_2_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - const auto Wz_2_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - const auto Wz_2_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - - // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - - const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - - const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - - const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - - const real_t Qdzdt = coeff * inv_dt; - - const auto jz_0_0_0 = - Qdzdt * Wz_0_0_0; - const auto jz_0_0_1 = jz_0_0_0 - Qdzdt * Wz_0_0_1; - const auto jz_0_0_2 = jz_0_0_1 - Qdzdt * Wz_0_0_2; - const auto jz_0_1_0 = - Qdzdt * Wz_0_1_0; - const auto jz_0_1_1 = jz_0_1_0 - Qdzdt * Wz_0_1_1; - const auto jz_0_1_2 = jz_0_1_1 - Qdzdt * Wz_0_1_2; - const auto jz_0_2_0 = - Qdzdt * Wz_0_2_0; - const auto jz_0_2_1 = jz_0_2_0 - Qdzdt * Wz_0_2_1; - const auto jz_0_2_2 = jz_0_2_1 - Qdzdt * Wz_0_2_2; - const auto jz_0_3_0 = - Qdzdt * Wz_0_3_0; - const auto jz_0_3_1 = jz_0_3_0 - Qdzdt * Wz_0_3_1; - const auto jz_0_3_2 = jz_0_3_1 - Qdzdt * Wz_0_3_2; - - const auto jz_1_0_0 = - Qdzdt * Wz_1_0_0; - const auto jz_1_0_1 = jz_1_0_0 - Qdzdt * Wz_1_0_1; - const auto jz_1_0_2 = jz_1_0_1 - Qdzdt * Wz_1_0_2; - const auto jz_1_1_0 = - Qdzdt * Wz_1_1_0; - const auto jz_1_1_1 = jz_1_1_0 - Qdzdt * Wz_1_1_1; - const auto jz_1_1_2 = jz_1_1_1 - Qdzdt * Wz_1_1_2; - const auto jz_1_2_0 = - Qdzdt * Wz_1_2_0; - const auto jz_1_2_1 = jz_1_2_0 - Qdzdt * Wz_1_2_1; - const auto jz_1_2_2 = jz_1_2_1 - Qdzdt * Wz_1_2_2; - const auto jz_1_3_0 = - Qdzdt * Wz_1_3_0; - const auto jz_1_3_1 = jz_1_3_0 - Qdzdt * Wz_1_3_1; - const auto jz_1_3_2 = jz_1_3_1 - Qdzdt * Wz_1_3_2; - - const auto jz_2_0_0 = - Qdzdt * Wz_2_0_0; - const auto jz_2_0_1 = jz_2_0_0 - Qdzdt * Wz_2_0_1; - const auto jz_2_0_2 = jz_2_0_1 - Qdzdt * Wz_2_0_2; - const auto jz_2_1_0 = - Qdzdt * Wz_2_1_0; - const auto jz_2_1_1 = jz_2_1_0 - Qdzdt * Wz_2_1_1; - const auto jz_2_1_2 = jz_2_1_1 - Qdzdt * Wz_2_1_2; - const auto jz_2_2_0 = - Qdzdt * Wz_2_2_0; - const auto jz_2_2_1 = jz_2_2_0 - Qdzdt * Wz_2_2_1; - const auto jz_2_2_2 = jz_2_2_1 - Qdzdt * Wz_2_2_2; - const auto jz_2_3_0 = - Qdzdt * Wz_2_3_0; - const auto jz_2_3_1 = jz_2_3_0 - Qdzdt * Wz_2_3_1; - const auto jz_2_3_2 = jz_2_3_1 - Qdzdt * Wz_2_3_2; - - const auto jz_3_0_0 = - Qdzdt * Wz_3_0_0; - const auto jz_3_0_1 = jz_3_0_0 - Qdzdt * Wz_3_0_1; - const auto jz_3_0_2 = jz_3_0_1 - Qdzdt * Wz_3_0_2; - const auto jz_3_1_0 = - Qdzdt * Wz_3_1_0; - const auto jz_3_1_1 = jz_3_1_0 - Qdzdt * Wz_3_1_1; - const auto jz_3_1_2 = jz_3_1_1 - Qdzdt * Wz_3_1_2; - const auto jz_3_2_0 = - Qdzdt * Wz_3_2_0; - const auto jz_3_2_1 = jz_3_2_0 - Qdzdt * Wz_3_2_1; - const auto jz_3_2_2 = jz_3_2_1 - Qdzdt * Wz_3_2_2; - const auto jz_3_3_0 = - Qdzdt * Wz_3_3_0; - const auto jz_3_3_1 = jz_3_3_0 - Qdzdt * Wz_3_3_1; - const auto jz_3_3_2 = jz_3_3_1 - Qdzdt * Wz_3_3_2; - - - /* - Current update - */ - auto J_acc = J.access(); - - J_acc(ix_min, iy_min, iz_min, cur::jx1) += jx_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += jx_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += jx_0_0_2; - J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += jx_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += jx_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += jx_0_1_2; - J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += jx_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += jx_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += jx_0_2_2; - J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += jx_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += jx_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += jx_1_0_2; - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += jx_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += jx_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += jx_1_1_2; - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += jx_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += jx_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += jx_1_2_2; - - if (update_x2) - { - J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += jx_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += jx_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += jx_2_0_2; - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += jx_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += jx_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += jx_2_1_2; - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += jx_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += jx_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += jx_2_2_2; - - if (update_y2) - { - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += jx_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += jx_2_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += jx_2_3_2; - } - - if (update_z2) - { - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += jx_2_0_3; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += jx_2_1_3; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += jx_2_2_3; - - if (update_y2) - { - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += jx_2_3_3; - } - } - } + // /* + // y - direction + // */ // - if (update_y2) - { - J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += jx_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += jx_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += jx_0_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += jx_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += jx_1_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += jx_1_3_2; - } - - if (update_z2) - { - J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += jx_0_0_3; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += jx_0_1_3; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += jx_0_2_3; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += jx_1_0_3; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += jx_1_1_3; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += jx_1_2_3; - - if (update_y2) - { - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += jx_0_3_3; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += jx_1_3_3; - } - } - - - /* - y-component - */ - J_acc(ix_min, iy_min, iz_min, cur::jx2) += jy_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += jy_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += jy_0_0_2; - J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += jy_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += jy_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += jy_0_1_2; - J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += jy_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += jy_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += jy_1_0_2; - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += jy_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += jy_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += jy_1_1_2; - J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += jy_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += jy_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += jy_2_0_2; - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += jy_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += jy_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += jy_2_1_2; - - if (update_x2) - { - J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += jy_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += jy_3_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += jy_3_0_2; - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += jy_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += jy_3_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += jy_3_1_2; - - if (update_z2) - { - J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += jy_3_0_3; - J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += jy_3_1_3; - } - } - - if (update_y2) - { - J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += jy_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += jy_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += jy_0_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += jy_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += jy_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += jy_1_2_2; - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += jy_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += jy_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += jy_2_2_2; - - if (update_x2) - { - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += jy_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += jy_3_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += jy_3_2_2; - - if (update_z2) - { - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += jy_2_2_3; - J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += jy_3_2_3; - } - } - - if (update_z2) - { - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += jy_0_2_3; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += jy_1_2_3; - } - } - - if (update_z2) - { - J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += jy_0_0_3; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += jy_0_1_3; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += jy_1_0_3; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += jy_1_1_3; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += jy_2_0_3; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += jy_2_1_3; - } - - /* - z-component - */ - J_acc(ix_min, iy_min, iz_min, cur::jx3) += jz_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += jz_0_0_1; - J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += jz_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += jz_0_1_1; - J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += jz_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += jz_0_2_1; - J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += jz_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += jz_1_0_1; - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += jz_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += jz_1_1_1; - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += jz_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += jz_1_2_1; - J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += jz_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += jz_2_0_1; - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += jz_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += jz_2_1_1; - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += jz_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += jz_2_2_1; - - if (update_x2) - { - J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += jz_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += jz_3_0_1; - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += jz_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += jz_3_1_1; - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += jz_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += jz_3_2_1; - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += jz_3_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += jz_3_3_1; - } - - if (update_y2) - { - J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += jz_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += jz_0_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += jz_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += jz_1_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += jz_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += jz_2_3_1; - } - - if (update_z2) - { - J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += jz_0_0_2; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += jz_0_1_2; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += jz_0_2_2; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += jz_1_0_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += jz_1_1_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += jz_1_2_2; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += jz_2_0_2; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += jz_2_1_2; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += jz_2_2_2; - - if (update_x2) - { - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += jz_3_0_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += jz_3_1_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += jz_3_2_2; - - if (update_y2) - { - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += jz_3_3_2; - } - } - - if (update_y2) - { - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += jz_0_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += jz_1_3_2; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += jz_2_3_2; - } - } + // // shape function at previous timestep + // real_t S0y_0, S0y_1, S0y_2, S0y_3; + // // shape function at current timestep + // real_t S1y_0, S1y_1, S1y_2, S1y_3; + // // indices of the shape function + // ncells_t iy_min; + // bool update_y2; + // // find indices and define shape function + // // clang-format off + // shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, + // S1y_0, S1y_1, S1y_2, S1y_3, + // iy_min, update_y2, + // i2(p), dx2(p), + // i2_prev(p), dx2_prev(p)); + // // clang-format on + // + // /* + // y - direction + // */ + // + // // shape function at previous timestep + // real_t S0z_0, S0z_1, S0z_2, S0z_3; + // // shape function at current timestep + // real_t S1z_0, S1z_1, S1z_2, S1z_3; + // // indices of the shape function + // ncells_t iz_min; + // bool update_z2; + // // find indices and define shape function + // // clang-format off + // shape_function_2nd(S0z_0, S0z_1, S0z_2, S0z_3, + // S1z_0, S1z_1, S1z_2, S1z_3, + // iz_min, update_z2, + // i3(p), dx3(p), + // i3_prev(p), dx3_prev(p)); + // // clang-format on + // + // // Unrolled calculations for Wx, Wy, and Wz + // // clang-format off + // const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const real_t Qdxdt = coeff * inv_dt; + // + // const auto jx_0_0_0 = - Qdxdt * Wx_0_0_0; + // const auto jx_1_0_0 = jx_0_0_0 - Qdxdt * Wx_1_0_0; + // const auto jx_2_0_0 = jx_1_0_0 - Qdxdt * Wx_2_0_0; + // const auto jx_0_1_0 = - Qdxdt * Wx_0_1_0; + // const auto jx_1_1_0 = jx_0_1_0 - Qdxdt * Wx_1_1_0; + // const auto jx_2_1_0 = jx_1_1_0 - Qdxdt * Wx_2_1_0; + // const auto jx_0_2_0 = - Qdxdt * Wx_0_2_0; + // const auto jx_1_2_0 = jx_0_2_0 - Qdxdt * Wx_1_2_0; + // const auto jx_2_2_0 = jx_1_2_0 - Qdxdt * Wx_2_2_0; + // const auto jx_0_3_0 = - Qdxdt * Wx_0_3_0; + // const auto jx_1_3_0 = jx_0_3_0 - Qdxdt * Wx_1_3_0; + // const auto jx_2_3_0 = jx_1_3_0 - Qdxdt * Wx_2_3_0; + // + // const auto jx_0_0_1 = - Qdxdt * Wx_0_0_1; + // const auto jx_1_0_1 = jx_0_0_1 - Qdxdt * Wx_1_0_1; + // const auto jx_2_0_1 = jx_1_0_1 - Qdxdt * Wx_2_0_1; + // const auto jx_0_1_1 = - Qdxdt * Wx_0_1_1; + // const auto jx_1_1_1 = jx_0_1_1 - Qdxdt * Wx_1_1_1; + // const auto jx_2_1_1 = jx_1_1_1 - Qdxdt * Wx_2_1_1; + // const auto jx_0_2_1 = - Qdxdt * Wx_0_2_1; + // const auto jx_1_2_1 = jx_0_2_1 - Qdxdt * Wx_1_2_1; + // const auto jx_2_2_1 = jx_1_2_1 - Qdxdt * Wx_2_2_1; + // const auto jx_0_3_1 = - Qdxdt * Wx_0_3_1; + // const auto jx_1_3_1 = jx_0_3_1 - Qdxdt * Wx_1_3_1; + // const auto jx_2_3_1 = jx_1_3_1 - Qdxdt * Wx_2_3_1; + // + // const auto jx_0_0_2 = - Qdxdt * Wx_0_0_2; + // const auto jx_1_0_2 = jx_0_0_2 - Qdxdt * Wx_1_0_2; + // const auto jx_2_0_2 = jx_1_0_2 - Qdxdt * Wx_2_0_2; + // const auto jx_0_1_2 = - Qdxdt * Wx_0_1_2; + // const auto jx_1_1_2 = jx_0_1_2 - Qdxdt * Wx_1_1_2; + // const auto jx_2_1_2 = jx_1_1_2 - Qdxdt * Wx_2_1_2; + // const auto jx_0_2_2 = - Qdxdt * Wx_0_2_2; + // const auto jx_1_2_2 = jx_0_2_2 - Qdxdt * Wx_1_2_2; + // const auto jx_2_2_2 = jx_1_2_2 - Qdxdt * Wx_2_2_2; + // const auto jx_0_3_2 = - Qdxdt * Wx_0_3_2; + // const auto jx_1_3_2 = jx_0_3_2 - Qdxdt * Wx_1_3_2; + // const auto jx_2_3_2 = jx_1_3_2 - Qdxdt * Wx_2_3_2; + // + // const auto jx_0_0_3 = - Qdxdt * Wx_0_0_3; + // const auto jx_1_0_3 = jx_0_0_3 - Qdxdt * Wx_1_0_3; + // const auto jx_2_0_3 = jx_1_0_3 - Qdxdt * Wx_2_0_3; + // const auto jx_0_1_3 = - Qdxdt * Wx_0_1_3; + // const auto jx_1_1_3 = jx_0_1_3 - Qdxdt * Wx_1_1_3; + // const auto jx_2_1_3 = jx_1_1_3 - Qdxdt * Wx_2_1_3; + // const auto jx_0_2_3 = - Qdxdt * Wx_0_2_3; + // const auto jx_1_2_3 = jx_0_2_3 - Qdxdt * Wx_1_2_3; + // const auto jx_2_2_3 = jx_1_2_3 - Qdxdt * Wx_2_2_3; + // const auto jx_0_3_3 = - Qdxdt * Wx_0_3_3; + // const auto jx_1_3_3 = jx_0_3_3 - Qdxdt * Wx_1_3_3; + // const auto jx_2_3_3 = jx_1_3_3 - Qdxdt * Wx_2_3_3; + // + // /* + // y-component + // */ + // const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const real_t Qdydt = coeff * inv_dt; + // + // const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; + // const auto jy_0_1_0 = jy_0_0_0 - Qdydt * Wy_0_1_0; + // const auto jy_0_2_0 = jy_0_1_0 - Qdydt * Wy_0_2_0; + // const auto jy_1_0_0 = - Qdydt * Wy_1_0_0; + // const auto jy_1_1_0 = jy_1_0_0 - Qdydt * Wy_1_1_0; + // const auto jy_1_2_0 = jy_1_1_0 - Qdydt * Wy_1_2_0; + // const auto jy_2_0_0 = - Qdydt * Wy_2_0_0; + // const auto jy_2_1_0 = jy_2_0_0 - Qdydt * Wy_2_1_0; + // const auto jy_2_2_0 = jy_2_1_0 - Qdydt * Wy_2_2_0; + // const auto jy_3_0_0 = - Qdydt * Wy_3_0_0; + // const auto jy_3_1_0 = jy_3_0_0 - Qdydt * Wy_3_1_0; + // const auto jy_3_2_0 = jy_3_1_0 - Qdydt * Wy_3_2_0; + // + // const auto jy_0_0_1 = - Qdydt * Wy_0_0_1; + // const auto jy_0_1_1 = jy_0_0_1 - Qdydt * Wy_0_1_1; + // const auto jy_0_2_1 = jy_0_1_1 - Qdydt * Wy_0_2_1; + // const auto jy_1_0_1 = - Qdydt * Wy_1_0_1; + // const auto jy_1_1_1 = jy_1_0_1 - Qdydt * Wy_1_1_1; + // const auto jy_1_2_1 = jy_1_1_1 - Qdydt * Wy_1_2_1; + // const auto jy_2_0_1 = - Qdydt * Wy_2_0_1; + // const auto jy_2_1_1 = jy_2_0_1 - Qdydt * Wy_2_1_1; + // const auto jy_2_2_1 = jy_2_1_1 - Qdydt * Wy_2_2_1; + // const auto jy_3_0_1 = - Qdydt * Wy_3_0_1; + // const auto jy_3_1_1 = jy_3_0_1 - Qdydt * Wy_3_1_1; + // const auto jy_3_2_1 = jy_3_1_1 - Qdydt * Wy_3_2_1; + // + // const auto jy_0_0_2 = - Qdydt * Wy_0_0_2; + // const auto jy_0_1_2 = jy_0_0_2 - Qdydt * Wy_0_1_2; + // const auto jy_0_2_2 = jy_0_1_2 - Qdydt * Wy_0_2_2; + // const auto jy_1_0_2 = - Qdydt * Wy_1_0_2; + // const auto jy_1_1_2 = jy_1_0_2 - Qdydt * Wy_1_1_2; + // const auto jy_1_2_2 = jy_1_1_2 - Qdydt * Wy_1_2_2; + // const auto jy_2_0_2 = - Qdydt * Wy_2_0_2; + // const auto jy_2_1_2 = jy_2_0_2 - Qdydt * Wy_2_1_2; + // const auto jy_2_2_2 = jy_2_1_2 - Qdydt * Wy_2_2_2; + // const auto jy_3_0_2 = - Qdydt * Wy_3_0_2; + // const auto jy_3_1_2 = jy_3_0_2 - Qdydt * Wy_3_1_2; + // const auto jy_3_2_2 = jy_3_1_2 - Qdydt * Wy_3_2_2; + // + // const auto jy_0_0_3 = - Qdydt * Wy_0_0_3; + // const auto jy_0_1_3 = jy_0_0_3 - Qdydt * Wy_0_1_3; + // const auto jy_0_2_3 = jy_0_1_3 - Qdydt * Wy_0_2_3; + // const auto jy_1_0_3 = - Qdydt * Wy_1_0_3; + // const auto jy_1_1_3 = jy_1_0_3 - Qdydt * Wy_1_1_3; + // const auto jy_1_2_3 = jy_1_1_3 - Qdydt * Wy_1_2_3; + // const auto jy_2_0_3 = - Qdydt * Wy_2_0_3; + // const auto jy_2_1_3 = jy_2_0_3 - Qdydt * Wy_2_1_3; + // const auto jy_2_2_3 = jy_2_1_3 - Qdydt * Wy_2_2_3; + // const auto jy_3_0_3 = - Qdydt * Wy_3_0_3; + // const auto jy_3_1_3 = jy_3_0_3 - Qdydt * Wy_3_1_3; + // const auto jy_3_2_3 = jy_3_1_3 - Qdydt * Wy_3_2_3; + // + // /* + // z - component + // */ + // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // + // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // + // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // + // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // + // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // + // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // + // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // + // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // + // // Unrolled loop for Wz[i][j][k] with i = 2 and interp_order + 2 = 4 + // const auto Wz_2_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + // const auto Wz_2_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + // const auto Wz_2_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + // + // const auto Wz_2_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + // const auto Wz_2_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + // const auto Wz_2_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + // + // const auto Wz_2_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + // const auto Wz_2_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + // const auto Wz_2_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + // + // const auto Wz_2_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + // const auto Wz_2_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + // const auto Wz_2_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + // + // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // + // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // + // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // + // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // + // const real_t Qdzdt = coeff * inv_dt; + // + // const auto jz_0_0_0 = - Qdzdt * Wz_0_0_0; + // const auto jz_0_0_1 = jz_0_0_0 - Qdzdt * Wz_0_0_1; + // const auto jz_0_0_2 = jz_0_0_1 - Qdzdt * Wz_0_0_2; + // const auto jz_0_1_0 = - Qdzdt * Wz_0_1_0; + // const auto jz_0_1_1 = jz_0_1_0 - Qdzdt * Wz_0_1_1; + // const auto jz_0_1_2 = jz_0_1_1 - Qdzdt * Wz_0_1_2; + // const auto jz_0_2_0 = - Qdzdt * Wz_0_2_0; + // const auto jz_0_2_1 = jz_0_2_0 - Qdzdt * Wz_0_2_1; + // const auto jz_0_2_2 = jz_0_2_1 - Qdzdt * Wz_0_2_2; + // const auto jz_0_3_0 = - Qdzdt * Wz_0_3_0; + // const auto jz_0_3_1 = jz_0_3_0 - Qdzdt * Wz_0_3_1; + // const auto jz_0_3_2 = jz_0_3_1 - Qdzdt * Wz_0_3_2; + // + // const auto jz_1_0_0 = - Qdzdt * Wz_1_0_0; + // const auto jz_1_0_1 = jz_1_0_0 - Qdzdt * Wz_1_0_1; + // const auto jz_1_0_2 = jz_1_0_1 - Qdzdt * Wz_1_0_2; + // const auto jz_1_1_0 = - Qdzdt * Wz_1_1_0; + // const auto jz_1_1_1 = jz_1_1_0 - Qdzdt * Wz_1_1_1; + // const auto jz_1_1_2 = jz_1_1_1 - Qdzdt * Wz_1_1_2; + // const auto jz_1_2_0 = - Qdzdt * Wz_1_2_0; + // const auto jz_1_2_1 = jz_1_2_0 - Qdzdt * Wz_1_2_1; + // const auto jz_1_2_2 = jz_1_2_1 - Qdzdt * Wz_1_2_2; + // const auto jz_1_3_0 = - Qdzdt * Wz_1_3_0; + // const auto jz_1_3_1 = jz_1_3_0 - Qdzdt * Wz_1_3_1; + // const auto jz_1_3_2 = jz_1_3_1 - Qdzdt * Wz_1_3_2; + // + // const auto jz_2_0_0 = - Qdzdt * Wz_2_0_0; + // const auto jz_2_0_1 = jz_2_0_0 - Qdzdt * Wz_2_0_1; + // const auto jz_2_0_2 = jz_2_0_1 - Qdzdt * Wz_2_0_2; + // const auto jz_2_1_0 = - Qdzdt * Wz_2_1_0; + // const auto jz_2_1_1 = jz_2_1_0 - Qdzdt * Wz_2_1_1; + // const auto jz_2_1_2 = jz_2_1_1 - Qdzdt * Wz_2_1_2; + // const auto jz_2_2_0 = - Qdzdt * Wz_2_2_0; + // const auto jz_2_2_1 = jz_2_2_0 - Qdzdt * Wz_2_2_1; + // const auto jz_2_2_2 = jz_2_2_1 - Qdzdt * Wz_2_2_2; + // const auto jz_2_3_0 = - Qdzdt * Wz_2_3_0; + // const auto jz_2_3_1 = jz_2_3_0 - Qdzdt * Wz_2_3_1; + // const auto jz_2_3_2 = jz_2_3_1 - Qdzdt * Wz_2_3_2; + // + // const auto jz_3_0_0 = - Qdzdt * Wz_3_0_0; + // const auto jz_3_0_1 = jz_3_0_0 - Qdzdt * Wz_3_0_1; + // const auto jz_3_0_2 = jz_3_0_1 - Qdzdt * Wz_3_0_2; + // const auto jz_3_1_0 = - Qdzdt * Wz_3_1_0; + // const auto jz_3_1_1 = jz_3_1_0 - Qdzdt * Wz_3_1_1; + // const auto jz_3_1_2 = jz_3_1_1 - Qdzdt * Wz_3_1_2; + // const auto jz_3_2_0 = - Qdzdt * Wz_3_2_0; + // const auto jz_3_2_1 = jz_3_2_0 - Qdzdt * Wz_3_2_1; + // const auto jz_3_2_2 = jz_3_2_1 - Qdzdt * Wz_3_2_2; + // const auto jz_3_3_0 = - Qdzdt * Wz_3_3_0; + // const auto jz_3_3_1 = jz_3_3_0 - Qdzdt * Wz_3_3_1; + // const auto jz_3_3_2 = jz_3_3_1 - Qdzdt * Wz_3_3_2; + // + // + // /* + // Current update + // */ + // auto J_acc = J.access(); + // + // J_acc(ix_min, iy_min, iz_min, cur::jx1) += jx_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += jx_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += jx_0_0_2; + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += jx_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += jx_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += jx_0_1_2; + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += jx_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += jx_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += jx_0_2_2; + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += jx_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += jx_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += jx_1_0_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += jx_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += jx_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += jx_1_1_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += jx_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += jx_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += jx_1_2_2; + // + // if (update_x2) + // { + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += jx_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += jx_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += jx_2_0_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += jx_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += jx_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += jx_2_1_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += jx_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += jx_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += jx_2_2_2; + // + // if (update_y2) + // { + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += jx_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += jx_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += jx_2_3_2; + // } + // + // if (update_z2) + // { + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += jx_2_0_3; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += jx_2_1_3; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += jx_2_2_3; + // + // if (update_y2) + // { + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += jx_2_3_3; + // } + // } + // } + // // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += jx_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += jx_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += jx_0_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += jx_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += jx_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += jx_1_3_2; + // } + // + // if (update_z2) + // { + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += jx_0_0_3; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += jx_0_1_3; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += jx_0_2_3; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += jx_1_0_3; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += jx_1_1_3; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += jx_1_2_3; + // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += jx_0_3_3; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += jx_1_3_3; + // } + // } + // + // + // /* + // y-component + // */ + // J_acc(ix_min, iy_min, iz_min, cur::jx2) += jy_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += jy_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += jy_0_0_2; + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += jy_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += jy_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += jy_0_1_2; + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += jy_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += jy_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += jy_1_0_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += jy_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += jy_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += jy_1_1_2; + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += jy_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += jy_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += jy_2_0_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += jy_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += jy_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += jy_2_1_2; + // + // if (update_x2) + // { + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += jy_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += jy_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += jy_3_0_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += jy_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += jy_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += jy_3_1_2; + // + // if (update_z2) + // { + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += jy_3_0_3; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += jy_3_1_3; + // } + // } + // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += jy_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += jy_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += jy_0_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += jy_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += jy_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += jy_1_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += jy_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += jy_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += jy_2_2_2; + // + // if (update_x2) + // { + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += jy_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += jy_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += jy_3_2_2; + // + // if (update_z2) + // { + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += jy_2_2_3; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += jy_3_2_3; + // } + // } + // + // if (update_z2) + // { + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += jy_0_2_3; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += jy_1_2_3; + // } + // } + // + // if (update_z2) + // { + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += jy_0_0_3; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += jy_0_1_3; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += jy_1_0_3; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += jy_1_1_3; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += jy_2_0_3; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += jy_2_1_3; + // } + // + // /* + // z-component + // */ + // J_acc(ix_min, iy_min, iz_min, cur::jx3) += jz_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += jz_0_0_1; + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += jz_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += jz_0_1_1; + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += jz_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += jz_0_2_1; + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += jz_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += jz_1_0_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += jz_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += jz_1_1_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += jz_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += jz_1_2_1; + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += jz_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += jz_2_0_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += jz_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += jz_2_1_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += jz_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += jz_2_2_1; + // + // if (update_x2) + // { + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += jz_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += jz_3_0_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += jz_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += jz_3_1_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += jz_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += jz_3_2_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += jz_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += jz_3_3_1; + // } + // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += jz_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += jz_0_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += jz_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += jz_1_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += jz_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += jz_2_3_1; + // } + // + // if (update_z2) + // { + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += jz_0_0_2; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += jz_0_1_2; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += jz_0_2_2; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += jz_1_0_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += jz_1_1_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += jz_1_2_2; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += jz_2_0_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += jz_2_1_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += jz_2_2_2; + // + // if (update_x2) + // { + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += jz_3_0_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += jz_3_1_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += jz_3_2_2; + // + // if (update_y2) + // { + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += jz_3_3_2; + // } + // } + // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += jz_0_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += jz_1_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += jz_2_3_2; + // } + // } // clang-format on } // dimension @@ -2784,13 +2784,13 @@ namespace kernel { } } } - - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); - } + + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); } - }; - } // namespace kernel + } + }; +} // namespace kernel #undef i_di_to_Xi diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 980acca5..7d267ce9 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -475,9 +475,13 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - //getInterpFlds(p, ei, bi); - // ToDo: Better way to call this - getInterpFlds2nd(p, ei, bi); + // getInterpFlds(p, ei, bi); + // ToDo: Better way to call this + // getInterpFlds2nd(p, ei, bi); + for (auto i { 0u }; i < 3u; ++i) { + ei[i] = ZERO; + bi[i] = ZERO; + } metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp new file mode 100644 index 00000000..97b5bde4 --- /dev/null +++ b/src/kernels/particle_shapes.hpp @@ -0,0 +1,107 @@ +/** + * @file kernels/particle_shapes.hpp + * @brief Functions to compute particle shapes at specific locations on the grid. + * @implements: + * - order_2<> -> void + * @namespaces: + * - prtl_shape:: + */ + +#ifndef KERNELS_PARTICLE_SHAPES_HPP +#define KERNELS_PARTICLE_SHAPES_HPP + +#include "global.h" + +#include "utils/error.h" +#include "utils/numeric.h" + +namespace prtl_shape { + + template + Inline void order_2nd(const int& i, + const real_t& di, + int& i_min, + real_t& S0, + real_t& S1, + real_t& S2) { + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 1; + S0 = HALF * SQR(HALF - di); + S1 = THREE_FOURTHS - SQR(di); + S2 = ONE - S0 - S1; + } else { + i_min = i; + S0 = HALF * SQR(THREE_FOURTHS - di); + S2 = HALF * SQR(di - HALF); + S1 = ONE - S0 - S2; + } + } else { // compute at i + 1/2 positions + i_min = i - 1; + S1 = HALF + di - SQR(di); + S2 = HALF * SQR(di); + S0 = ONE - S1 - S2; + } + } + + template + Inline void for_deposit_2nd(const int& i_init, + const real_t& di_init, + const int& i_fin, + const real_t& di_fin, + int& i_min, + real_t& iS_0, + real_t& iS_1, + real_t& iS_2, + real_t& iS_3, + real_t& fS_0, + real_t& fS_1, + real_t& fS_2, + real_t& fS_3) { + int i_init_min, i_fin_min; + + real_t iS_0_, iS_1_, iS_2_; + real_t fS_0_, fS_1_, fS_2_; + + order_2nd(i_init, di_init, i_init_min, iS_0_, iS_1_, iS_2_); + order_2nd(i_fin, di_fin, i_fin_min, fS_0_, fS_1_, fS_2_); + + if (i_init_min < i_fin_min) { + i_min = i_init_min; + iS_0 = iS_0_; + iS_1 = iS_1_; + iS_2 = iS_2_; + iS_3 = ZERO; + + fS_0 = ZERO; + fS_1 = iS_0_; + fS_2 = iS_1_; + fS_3 = iS_2_; + } else if (i_init_min > i_fin_min) { + i_min = i_fin_min; + iS_0 = ZERO; + iS_1 = iS_0_; + iS_2 = iS_1_; + iS_3 = iS_2_; + + fS_0 = iS_0_; + fS_1 = iS_1_; + fS_2 = iS_2_; + fS_3 = ZERO; + } else { + i_min = i_init_min; + iS_0 = iS_0_; + iS_1 = iS_1_; + iS_2 = iS_2_; + iS_3 = ZERO; + + fS_0 = iS_0_; + fS_1 = iS_1_; + fS_2 = iS_2_; + fS_3 = ZERO; + } + } + +} // namespace prtl_shape + +#endif // KERNELS_PARTICLE_SHAPES_HPP From 5aa4814efef205273659f2b33080f84b8a2885b1 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 5 Aug 2025 16:38:54 -0400 Subject: [PATCH 44/82] shape func fix --- src/kernels/particle_shapes.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 97b5bde4..bff8853a 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -32,7 +32,7 @@ namespace prtl_shape { S2 = ONE - S0 - S1; } else { i_min = i; - S0 = HALF * SQR(THREE_FOURTHS - di); + S0 = HALF * SQR(static_cast(1.5) - di); S2 = HALF * SQR(di - HALF); S1 = ONE - S0 - S2; } From 7099a8813d0c85ab8ca8be7e893f2f16420fedbb Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Tue, 5 Aug 2025 23:40:17 -0500 Subject: [PATCH 45/82] generalized shape function to 5th order, cleanup and removal of staggered grid call --- src/global/utils/numeric.h | 2 + src/kernels/currents_deposit.hpp | 714 ++++++++----------------------- src/kernels/particle_shapes.hpp | 275 ++++++++++-- 3 files changed, 400 insertions(+), 591 deletions(-) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index fd1ddc65..63f23d3e 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -36,6 +36,7 @@ inline constexpr float TWO = 2.0f; inline constexpr float THREE = 3.0f; inline constexpr float FOUR = 4.0f; inline constexpr float FIVE = 5.0f; +inline constexpr float SIX = 6.0f; inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; @@ -53,6 +54,7 @@ inline constexpr double TWO = 2.0; inline constexpr double THREE = 3.0; inline constexpr double FOUR = 4.0; inline constexpr double FIVE = 5.0; +inline constexpr double SIX = 6.0; inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 1feb7ba4..492dec5c 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -295,395 +295,6 @@ namespace kernel { i_min += N_GHOSTS; } - Inline void W(real_t* _S, real_t x) const { - - if constexpr (O == 2) { - - _S[0] = HALF * SQR(HALF - x); - _S[1] = THREE_FOURTHS - SQR(x); - _S[2] = HALF * SQR(HALF + x); - - } else if constexpr (O == 3) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - - _S[0] = static_cast(1 / 6) * (ONE - x3) - HALF * SQR(x - x2); - _S[1] = static_cast(2 / 3) - x2 + HALF * x3; - _S[2] = static_cast(1 / 6) + HALF * (x + x2 + x3); - _S[3] = static_cast(1 / 6) * x3; - - } else if constexpr (O == 4) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - - _S[0] = static_cast(1 / 384) - static_cast(1 / 48) * x + - static_cast(1 / 16) * x2 - - static_cast(1 / 12) * x3 + - static_cast(1 / 24) * x4; - _S[1] = static_cast(19 / 96) - static_cast(11 / 24) * x + - static_cast(1 / 4) * x2 + - static_cast(1 / 6) * x3 - static_cast(1 / 6) * x4; - _S[2] = static_cast(115 / 192) - static_cast(5 / 8) * x2 + - static_cast(1 / 4) * x4; - _S[3] = static_cast(19 / 96) + static_cast(11 / 24) * x + - static_cast(1 / 4) * x2 - - static_cast(1 / 6) * x3 - static_cast(1 / 6) * x4; - _S[4] = static_cast(1 / 384) + static_cast(1 / 48) * x + - static_cast(1 / 16) * x2 + - static_cast(1 / 12) * x3 + - static_cast(1 / 24) * x4; - - } else if constexpr (O == 5) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - const auto x5 = x3 * x2; - const auto x6 = x3 * x3; - - _S[0] = static_cast(1.0 / 46080.0) - - static_cast(1.0 / 3840.0) * x + - static_cast(1.0 / 384.0) * x2 - - static_cast(1.0 / 96.0) * x3 + - static_cast(1.0 / 72.0) * x4 - - static_cast(1.0 / 144.0) * x5 + - static_cast(1.0 / 720.0) * x6; - - _S[1] = static_cast(13.0 / 9216.0) - - static_cast(11.0 / 768.0) * x + - static_cast(1.0 / 48.0) * x2 + - static_cast(5.0 / 72.0) * x3 - - static_cast(1.0 / 8.0) * x4 + - static_cast(5.0 / 144.0) * x5 - - static_cast(1.0 / 144.0) * x6; - - _S[2] = static_cast(115.0 / 768.0) - - static_cast(5.0 / 24.0) * x2 + - static_cast(1.0 / 8.0) * x4 - - static_cast(1.0 / 72.0) * x6; - - _S[3] = static_cast(115.0 / 768.0) - - static_cast(5.0 / 24.0) * x2 + - static_cast(1.0 / 8.0) * x4 - - static_cast(1.0 / 72.0) * x6; - - _S[4] = static_cast(13.0 / 9216.0) + - static_cast(11.0 / 768.0) * x + - static_cast(1.0 / 48.0) * x2 - - static_cast(5.0 / 72.0) * x3 - - static_cast(1.0 / 8.0) * x4 - - static_cast(5.0 / 144.0) * x5 - - static_cast(1.0 / 144.0) * x6; - - _S[5] = static_cast(1.0 / 46080.0) + - static_cast(1.0 / 3840.0) * x + - static_cast(1.0 / 384.0) * x2 + - static_cast(1.0 / 96.0) * x3 + - static_cast(1.0 / 72.0) * x4 + - static_cast(1.0 / 144.0) * x5 + - static_cast(1.0 / 720.0) * x6; - - } else if constexpr (O == 6) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - const auto x5 = x3 * x2; - const auto x6 = x3 * x3; - - _S[0] = static_cast(1.0 / 40320.0) - - static_cast(1.0 / 4480.0) * x + - static_cast(1.0 / 640.0) * x2 - - static_cast(1.0 / 192.0) * x3 + - static_cast(1.0 / 144.0) * x4 - - static_cast(1.0 / 288.0) * x5 + - static_cast(1.0 / 1440.0) * x6; - - _S[1] = static_cast(1.0 / 1344.0) - - static_cast(1.0 / 160.0) * x + - static_cast(5.0 / 192.0) * x2 - - static_cast(1.0 / 48.0) * x3 - - static_cast(1.0 / 48.0) * x4 + - static_cast(5.0 / 288.0) * x5 - - static_cast(1.0 / 288.0) * x6; - - _S[2] = static_cast(17.0 / 336.0) - - static_cast(5.0 / 48.0) * x2 + - static_cast(1.0 / 12.0) * x4 - - static_cast(1.0 / 144.0) * x6; - - _S[3] = static_cast(151.0 / 252.0) - - static_cast(35.0 / 48.0) * x2 + - static_cast(5.0 / 12.0) * x4 - - static_cast(1.0 / 36.0) * x6; - - _S[4] = static_cast(17.0 / 336.0) - - static_cast(5.0 / 48.0) * x2 + - static_cast(1.0 / 12.0) * x4 - - static_cast(1.0 / 144.0) * x6; - - _S[5] = static_cast(1.0 / 1344.0) + - static_cast(1.0 / 160.0) * x + - static_cast(5.0 / 192.0) * x2 + - static_cast(1.0 / 48.0) * x3 - - static_cast(1.0 / 48.0) * x4 - - static_cast(5.0 / 288.0) * x5 - - static_cast(1.0 / 288.0) * x6; - - _S[6] = static_cast(1.0 / 40320.0) + - static_cast(1.0 / 4480.0) * x + - static_cast(1.0 / 640.0) * x2 + - static_cast(1.0 / 192.0) * x3 + - static_cast(1.0 / 144.0) * x4 + - static_cast(1.0 / 288.0) * x5 + - static_cast(1.0 / 1440.0) * x6; - - } else if constexpr (O == 7) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - const auto x5 = x3 * x2; - const auto x6 = x3 * x3; - const auto x7 = x4 * x3; - - _S[0] = static_cast(1.0 / 645120.0) - - static_cast(1.0 / 64512.0) * x + - static_cast(1.0 / 9216.0) * x2 - - static_cast(1.0 / 3072.0) * x3 + - static_cast(1.0 / 2304.0) * x4 - - static_cast(1.0 / 4608.0) * x5 + - static_cast(1.0 / 23040.0) * x6 - - static_cast(1.0 / 161280.0) * x7; - - _S[1] = static_cast(1.0 / 9216.0) - - static_cast(5.0 / 4608.0) * x + - static_cast(35.0 / 9216.0) * x2 - - static_cast(7.0 / 768.0) * x3 - - static_cast(7.0 / 1152.0) * x4 + - static_cast(35.0 / 4608.0) * x5 - - static_cast(5.0 / 4608.0) * x6 + - static_cast(1.0 / 9216.0) * x7; - - _S[2] = static_cast(25.0 / 1536.0) - - static_cast(35.0 / 768.0) * x2 + - static_cast(7.0 / 192.0) * x4 - - static_cast(1.0 / 96.0) * x6; - - _S[3] = static_cast(245.0 / 384.0) - - static_cast(245.0 / 192.0) * x2 + - static_cast(49.0 / 48.0) * x4 - - static_cast(7.0 / 72.0) * x6; - - _S[4] = _S[3]; // symmetry - - _S[5] = _S[2]; // symmetry - - _S[6] = static_cast(1 / 9216) + static_cast(5 / 4608) * x + - static_cast(35 / 9216) * x2 + - static_cast(7 / 768) * x3 - - static_cast(7 / 1152) * x4 - - static_cast(35 / 4608) * x5 - - static_cast(5 / 4608) * x6 - - static_cast(1 / 9216) * x7; - - _S[7] = static_cast(1 / 645120) + - static_cast(1 / 64512) * x + - static_cast(1 / 9216) * x2 + - static_cast(1 / 3072) * x3 + - static_cast(1 / 2304) * x4 + - static_cast(1 / 4608) * x5 + - static_cast(1 / 23040) * x6 + - static_cast(1 / 161280) * x7; - - } else if constexpr (O == 8) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - const auto x5 = x3 * x2; - const auto x6 = x3 * x3; - const auto x7 = x4 * x3; - const auto x8 = x4 * x4; - - _S[0] = static_cast(1.0 / 10321920.0) - - static_cast(1.0 / 1146880.0) * x + - static_cast(1.0 / 161280.0) * x2 - - static_cast(1.0 / 53760.0) * x3 + - static_cast(1.0 / 43008.0) * x4 - - static_cast(1.0 / 96768.0) * x5 + - static_cast(1.0 / 645120.0) * x6 - - static_cast(1.0 / 1032192.0) * x7 + - static_cast(1.0 / 4134528.0) * x8; - - _S[1] = static_cast(1.0 / 129024.0) - - static_cast(1.0 / 14336.0) * x + - static_cast(17.0 / 43008.0) * x2 - - static_cast(17.0 / 21504.0) * x3 + - static_cast(17.0 / 21504.0) * x4 - - static_cast(17.0 / 43008.0) * x5 + - static_cast(1.0 / 14336.0) * x6 - - static_cast(1.0 / 129024.0) * x7 + - static_cast(1.0 / 1032192.0) * x8; - - _S[2] = static_cast(361.0 / 64512.0) - - static_cast(153.0 / 14336.0) * x2 + - static_cast(51.0 / 14336.0) * x4 - - static_cast(17.0 / 43008.0) * x6 + - static_cast(1.0 / 1032192.0) * x8; - - _S[3] = static_cast(3061.0 / 16128.0) - - static_cast(170.0 / 1792.0) * x2 + - static_cast(34.0 / 1536.0) * x4 - - static_cast(17.0 / 16128.0) * x6; - - _S[4] = static_cast(257135.0 / 32256.0) - - static_cast(1785.0 / 896.0) * x2 + - static_cast(255.0 / 256.0) * x4 - - static_cast(85.0 / 1152.0) * x6; - - _S[5] = _S[3]; // symmetry - - _S[6] = _S[2]; // symmetry - - _S[7] = static_cast(1 / 129024) + - static_cast(1 / 14336) * x + - static_cast(17 / 43008) * x2 + - static_cast(17 / 21504) * x3 + - static_cast(17 / 21504) * x4 + - static_cast(17 / 43008) * x5 + - static_cast(1 / 14336) * x6 + - static_cast(1 / 129024) * x7 + - static_cast(1 / 1032192) * x8; - - _S[8] = static_cast(1 / 10321920) + - static_cast(1 / 1146880) * x + - static_cast(1 / 161280) * x2 + - static_cast(1 / 53760) * x3 + - static_cast(1 / 43008) * x4 + - static_cast(1 / 96768) * x5 + - static_cast(1 / 645120) * x6 + - static_cast(1 / 1032192) * x7 + - static_cast(1 / 4134528) * x8; - - } else { - raise::KernelError(HERE, "Invalid order of shape function!"); - } - } - - Inline void shape_function_Nth(real_t* S0, - real_t* S1, - ncells_t& i_min, - const index_t& i, - const real_t& di, - const index_t& i_prev, - const real_t& di_prev) const { - /* - Shape function per particle is a O+1 element array. - We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position - - Let * be the particle position at the current timestep - Let x be the particle position at the previous timestep - - - (-1) 0 1 ... N N+1 - __________________________________________ - | | x* | x* | // | x* | | // shift_i = 0 - |______|______|______|______|______|______| - | | x | x* | // | x* | * | // shift_i = 1 - |______|______|______|______|______|______| - | * | x* | x* | // | x | | // shift_i = -1 - |______|______|______|______|______|______| - */ - - // find shift in indices - // ToDo: fix - const int di_less_half = static_cast(di < static_cast(0.5)); - const int di_prev_less_half = static_cast( - di_prev < static_cast(0.5)); - - const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); - - // find the minimum index of the shape function -> ToDo! - i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); - - // center index of the shape function -> ToDo! - const auto di_center_prev = static_cast(1 - di_prev_less_half) - - di_prev; - const auto di_center = static_cast(1 - di_less_half) - di; - // ToDo: end fix - - real_t _S0[O + 1], _S1[O + 1]; - // apply shape function - W(_S0, di_center_prev); - W(_S1, di_center); - - // find indices and define shape function - if (shift_i == 1) { - /* - (-1) 0 1 ... N N+1 - __________________________________________ - | | x | x* | // | x* | * | // shift_i = 1 - |______|______|______|______|______|______| - */ - - for (int j = 0; j < O; j++) { - S0[j] = _S0[j]; - } - S0[O + 1] = ZERO; - - S1[0] = ZERO; - for (int j = 0; j < O; j++) { - S1[j + 1] = _S1[j]; - } - - } else if (shift_i == -1) { - /* - (-1) 0 1 ... N N+1 - __________________________________________ - | * | x* | x* | // | x | | // shift_i = -1 - |______|______|______|______|______|______| - */ - - S0[0] = ZERO; - for (int j = 0; j < O; j++) { - S0[j + 1] = _S0[j]; - } - - for (int j = 0; j < O; j++) { - S1[j] = _S1[j]; - } - S1[O + 1] = ZERO; - - } else if (shift_i == 0) { - /* - (-1) 0 1 ... N N+1 - __________________________________________ - | | x* | x* | // | x* | | // shift_i = 0 - |______|______|______|______|______|______| - */ - - for (int j = 0; j < O; j++) { - S0[j] = _S0[j]; - } - S0[O + 1] = ZERO; - - for (int j = 0; j < O; j++) { - S1[j] = _S1[j]; - } - S1[O + 1] = ZERO; - } else { - raise::KernelError(HERE, "Invalid shift in indices"); - } - - // account for ghost cells here to shorten J update expression - i_min += N_GHOSTS; - } - public: /** * @brief explicit constructor. @@ -1036,96 +647,76 @@ namespace kernel { // iS -> shape function for init position // fS -> shape function for final position - // shape function at staggered points (one coeff is always ZERO) - int i1_minH; - real_t iS_x1H_0, iS_x1H_1, iS_x1H_2, iS_x1H_3; - real_t fS_x1H_0, fS_x1H_1, fS_x1H_2, fS_x1H_3; - // shape function at integer points (one coeff is always ZERO) int i1_min; real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; // clang-format off - prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), - i1(p), static_cast(dx1(p)), - i1_minH, - iS_x1H_0, iS_x1H_1, iS_x1H_2, iS_x1H_3, - fS_x1H_0, fS_x1H_1, fS_x1H_2, fS_x1H_3); - prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), - i1(p), static_cast(dx1(p)), - i1_min, - iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, - fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); + prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), + i1(p), static_cast(dx1(p)), + i1_min, + iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, + fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); // clang-format on if constexpr (D == Dim::_1D) { raise::KernelNotImplementedError(HERE); } else if constexpr (D == Dim::_2D) { - // shape function at staggered points (one coeff is always ZERO) - int i2_minH; - real_t iS_x2H_0, iS_x2H_1, iS_x2H_2, iS_x2H_3; - real_t fS_x2H_0, fS_x2H_1, fS_x2H_2, fS_x2H_3; - // shape function at integer points (one coeff is always ZERO) int i2_min; real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; // clang-format off - prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), - i2(p), static_cast(dx2(p)), - i2_minH, - iS_x2H_0, iS_x2H_1, iS_x2H_2, iS_x2H_3, - fS_x2H_0, fS_x2H_1, fS_x2H_2, fS_x2H_3); - prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), - i2(p), static_cast(dx2(p)), - i2_min, - iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, - fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); + prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), + i2(p), static_cast(dx2(p)), + i2_min, + iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, + fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); // clang-format on // x1-components - const auto Wx1_00 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_0 + iS_x2_0); - const auto Wx1_01 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_1 + iS_x2_1); - const auto Wx1_02 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_2 + iS_x2_2); - const auto Wx1_03 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_10 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_0 + iS_x2_0); - const auto Wx1_11 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_1 + iS_x2_1); - const auto Wx1_12 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_2 + iS_x2_2); - const auto Wx1_13 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_20 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_0 + iS_x2_0); - const auto Wx1_21 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_1 + iS_x2_1); - const auto Wx1_22 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_2 + iS_x2_2); - const auto Wx1_23 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_30 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_0 + iS_x2_0); - const auto Wx1_31 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_1 + iS_x2_1); - const auto Wx1_32 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_2 + iS_x2_2); - const auto Wx1_33 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_3 + iS_x2_3); + const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); + const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); + const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); + const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); + const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); + const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); + const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); + const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); + const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); + const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); + const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); + const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); + const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); // x2-components - const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_0 - iS_x2H_0); - const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_1 - iS_x2H_1); - const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_2 - iS_x2H_2); - const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_3 - iS_x2H_3); - - const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_0 - iS_x2H_0); - const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_1 - iS_x2H_1); - const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_2 - iS_x2H_2); - const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_3 - iS_x2H_3); - - const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_0 - iS_x2H_0); - const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_1 - iS_x2H_1); - const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_2 - iS_x2H_2); - const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_3 - iS_x2H_3); - - const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_0 - iS_x2H_0); - const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_1 - iS_x2H_1); - const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_2 - iS_x2H_2); - const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_3 - iS_x2H_3); + const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); + const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); + const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); + const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); + + const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); + const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); + const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); + const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); + + const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); + const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); + const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); + const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); + + const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); + const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); + const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); + const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); // x3-components const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + @@ -1206,9 +797,7 @@ namespace kernel { const auto jx2_32 = jx2_31 + Wx2_32; const auto jx2_33 = jx2_32 + Wx2_33; - i1_minH += N_GHOSTS; i1_min += N_GHOSTS; - i2_minH += N_GHOSTS; i2_min += N_GHOSTS; // @TODO: not sure about the signs here @@ -1219,46 +808,46 @@ namespace kernel { auto J_acc = J.access(); // x1-currents - J_acc(i1_minH + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; - J_acc(i1_minH + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; - J_acc(i1_minH + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; - J_acc(i1_minH + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; - - J_acc(i1_minH + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; - J_acc(i1_minH + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; - J_acc(i1_minH + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; - J_acc(i1_minH + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; - - J_acc(i1_minH + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; - J_acc(i1_minH + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; - J_acc(i1_minH + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; - J_acc(i1_minH + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; - - J_acc(i1_minH + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; - J_acc(i1_minH + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; - J_acc(i1_minH + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; - J_acc(i1_minH + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; + J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; + J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; + J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; + J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; + + J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; + J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; + J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; + J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; + + J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; + J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; + J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; + J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; + + J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; + J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; + J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; + J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; // x2-currents - J_acc(i1_min + 0, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_00; - J_acc(i1_min + 0, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_01; - J_acc(i1_min + 0, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_02; - J_acc(i1_min + 0, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_03; - - J_acc(i1_min + 1, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_10; - J_acc(i1_min + 1, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_11; - J_acc(i1_min + 1, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_12; - J_acc(i1_min + 1, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_13; - - J_acc(i1_min + 2, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_20; - J_acc(i1_min + 2, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_21; - J_acc(i1_min + 2, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_22; - J_acc(i1_min + 2, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_23; - - J_acc(i1_min + 3, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_30; - J_acc(i1_min + 3, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_31; - J_acc(i1_min + 3, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_32; - J_acc(i1_min + 3, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_33; + J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; + J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; + J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; + J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; + + J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; + J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; + J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; + J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; + + J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; + J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; + J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; + J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; + + J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; + J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; + J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; + J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; // x3-currents J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; @@ -1280,6 +869,7 @@ namespace kernel { J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; + } else if constexpr (D == Dim::_3D) { // /* // y - direction @@ -2549,30 +2139,44 @@ namespace kernel { } // dim -> ToDo: 3D! - } else if constexpr (O > 3u) { + } else if constexpr ((O > 3u) && (O < 5u)) { // shape function in dim1 -> always required - real_t S0x[O + 2], S1x[O + 2]; + real_t iS_x1[O + 2], fS_x1[O + 2]; // indices of the shape function - ncells_t ix_min; + ncells_t i1_min; - // ToDo: Call shape function + // call shape function + prtl_shape::for_deposit(i1_prev(p), + static_cast(dx1_prev(p)), + i1(p), + static_cast(dx1(p)), + i1_min, + iS_x1, + fS_x1); if constexpr (D == Dim::_1D) { // ToDo } else if constexpr (D == Dim::_2D) { - // shape function in dim2 - real_t S0y[O + 2], S1y[O + 2]; + // shape function in dim1 -> always required + real_t iS_x2[O + 2], fS_x2[O + 2]; // indices of the shape function - ncells_t iy_min; + ncells_t i2_min; - // ToDo: Call shape function + // call shape function + prtl_shape::for_deposit(i2_prev(p), + static_cast(dx2_prev(p)), + i2(p), + static_cast(dx2(p)), + i2_min, + iS_x2, + fS_x2); // define weight tensors - real_t Wx[O + 1][O + 1]; - real_t Wy[O + 1][O + 1]; - real_t Wz[O + 1][O + 1]; + real_t Wx[O + 2][O + 2]; + real_t Wy[O + 2][O + 2]; + real_t Wz[O + 2][O + 2]; // Calculate weight function #pragma unroll @@ -2580,51 +2184,54 @@ namespace kernel { #pragma unroll for (int j = 0; j < O + 2; ++j) { // Esirkepov 2001, Eq. 38 - Wx[i][j] = (S1x[i] - S0x[i]) * (S0y[j] + HALF * (S1y[j] - S0y[j])); + Wx[i][j] = (fS_x1[i] - iS_x1[i]) * + (iS_x2[j] + HALF * (fS_x2[j] - iS_x2[j])); - Wy[i][j] = (S1y[i] - S0y[i]) * (S0y[j] + HALF * (S1x[j] - S0x[j])); + Wy[i][j] = (fS_x2[j] - iS_x2[j]) * + (iS_x2[j] + HALF * (fS_x1[i] - iS_x1[i])); - Wz[i][j] = S0x[i] * S0y[j] + HALF * (S1x[i] - S1x[i]) * S0y[j] + - HALF * S0x[i] * (S1y[j] - S0y[j]) + - THIRD * (S1x[i] - S0x[i]) * (S1y[j] - S0y[j]); + Wz[i][j] = iS_x1[i] * iS_x2[j] + + HALF * (fS_x1[i] - fS_x1[i]) * iS_x2[j] + + HALF * iS_x1[i] * (fS_x2[j] - iS_x2[j]) + + THIRD * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] - iS_x2[j]); } } // contribution within the shape function stencil real_t jx[O + 2][O + 2], jy[O + 2][O + 2], jz[O + 2][O + 2]; - // prefactors to j update - const real_t Qdxdt = coeff * inv_dt; - const real_t Qdydt = coeff * inv_dt; - const real_t QVz = coeff * inv_dt * vp[2]; + // prefactors for j update + const real_t Qdx1dt = -coeff * inv_dt; + const real_t Qdx2dt = -coeff * inv_dt; + const real_t QVx3 = coeff * vp[2]; // Calculate current contribution // jx #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx[0][j] = -Qdxdt * Wx[0][j]; + jx[0][j] = Wx[0][j]; } #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx[i][j] = jx[i - 1][j] - Qdxdt * Wx[i][j]; + jx[i][j] = jx[i - 1][j] + Wx[i][j]; } } // jy #pragma unroll for (int i = 0; i < O + 2; ++i) { - jy[i][0] = -Qdydt * Wy[i][0]; + jy[i][0] = Wy[i][0]; } #pragma unroll for (int j = 1; j < O + 2; ++j) { #pragma unroll for (int i = 0; i < O + 2; ++i) { - jy[i][j] = jy[i][j - 1] - Qdydt * Wy[i][j]; + jy[i][j] = jy[i][j - 1] + Wy[i][j]; } } @@ -2633,39 +2240,56 @@ namespace kernel { for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jz[i][j] = QVz * Wz[i][j]; + jz[i][j] = Wz[i][j]; } } + // account for ghost cells + i1_min += N_GHOSTS; + i2_min += N_GHOSTS; + /* Current update - */ + */ auto J_acc = J.access(); #pragma unroll for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - J_acc(ix_min + i, iy_min + j, cur::jx1) += jx[i][j]; - J_acc(ix_min + i, iy_min + j, cur::jx2) += jy[i][j]; - J_acc(ix_min + i, iy_min + j, cur::jx3) += jz[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jy[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jz[i][j]; } } } else if constexpr (D == Dim::_3D) { // shape function in dim2 - real_t S0y[O + 2], S1y[O + 2]; + real_t iS_x2[O + 2], fS_x2[O + 2]; // indices of the shape function - ncells_t iy_min; - - // ToDo: Call shape function + ncells_t i2_min; + // call shape function + prtl_shape::for_deposit(i2_prev(p), + static_cast(dx2_prev(p)), + i2(p), + static_cast(dx2(p)), + i2_min, + iS_x2, + fS_x2); // shape function in dim3 - real_t S0z[O + 2], S1z[O + 2]; + real_t iS_x3[O + 2], fS_x3[O + 2]; // indices of the shape function - ncells_t iz_min; + ncells_t i3_min; - // ToDo: Call shape function + // call shape function + prtl_shape::for_deposit(i3_prev(p), + static_cast(dx3_prev(p)), + i3(p), + static_cast(dx3(p)), + i3_min, + iS_x3, + fS_x3); // define weight tensors real_t Wx[O + 1][O + 1][O + 1]; @@ -2680,17 +2304,17 @@ namespace kernel { #pragma unroll for (int k = 0; k < O + 2; ++k) { // Esirkepov 2001, Eq. 31 - Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + Wx[i][j][k] = THIRD * (fS_x1[i] - iS_x1[i]) * + ((iS_x2[j] * iS_x3[k] + fS_x2[j] * fS_x3[k]) + + HALF * (iS_x3[k] * fS_x2[j] + iS_x2[j] * fS_x3[k])); - Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + Wy[i][j][k] = THIRD * (fS_x2[j] - iS_x2[j]) * + (iS_x1[i] * iS_x3[k] + fS_x1[i] * fS_x3[k] + + HALF * (iS_x3[k] * fS_x1[i] + iS_x1[i] * fS_x3[k])); - Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + Wz[i][j][k] = THIRD * (fS_x3[k] - iS_x3[k]) * + (iS_x1[i] * iS_x2[j] + fS_x1[i] * fS_x2[j] + + HALF * (iS_x1[i] * fS_x2[j] + iS_x2[j] * fS_x1[i])); } } } @@ -2777,9 +2401,9 @@ namespace kernel { for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 1; k < O + 2; ++k) { - J_acc(ix_min + i, iy_min + j, iz_min, cur::jx1) += jx[i][j][k]; - J_acc(ix_min + i, iy_min + j, iz_min, cur::jx2) += jy[i][j][k]; - J_acc(ix_min + i, iy_min + j, iz_min, cur::jx3) += jz[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx1) += jx[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx2) += jy[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx3) += jz[i][j][k]; } } } diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index bff8853a..7a0af8b7 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,34 +17,155 @@ namespace prtl_shape { - template - Inline void order_2nd(const int& i, - const real_t& di, - int& i_min, - real_t& S0, - real_t& S1, - real_t& S2) { - if constexpr (not STAGGERED) { // compute at i positions - if (di < HALF) { + template + Inline void order(const int& i, const real_t& di, int& i_min, real_t* S) { + if constexpr (O == 2u) { + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 1; + S[0] = HALF * SQR(HALF - di); + S[1] = THREE_FOURTHS - SQR(di); + S[2] = ONE - S[0] - S[1]; + } else { + i_min = i; + S[0] = HALF * SQR(static_cast(1.5) - di); + S[2] = HALF * SQR(di - HALF); + S[1] = ONE - S[0] - S[2]; + } + } else { // compute at i + 1/2 positions i_min = i - 1; - S0 = HALF * SQR(HALF - di); - S1 = THREE_FOURTHS - SQR(di); - S2 = ONE - S0 - S1; - } else { - i_min = i; - S0 = HALF * SQR(static_cast(1.5) - di); - S2 = HALF * SQR(di - HALF); - S1 = ONE - S0 - S2; - } - } else { // compute at i + 1/2 positions - i_min = i - 1; - S1 = HALF + di - SQR(di); - S2 = HALF * SQR(di); - S0 = ONE - S1 - S2; + S[1] = THREE_FOURTHS - SQR(di - HALF); + S[2] = HALF * SQR(di); + S[0] = ONE - S[1] - S[2]; + } // staggered + } else if constexpr (O == 3u) { + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 2; + S[0] = HALF * THIRD * CUBE(ONE - di); + S[3] = HALF * THIRD * CUBE(di); + S[1] = HALF * THIRD * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); + S[2] = ONE - S[0] - S[1] - S[3]; + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 2; + S[0] = HALF * THIRD * CUBE(HALF - di); + S[3] = HALF * THIRD * CUBE(HALF + di); + S[1] = HALF * THIRD * + (FOUR - SIX * SQR(HALF - di) + THREE * CUBE(HALF - di)); + S[2] = ONE - S[0] - S[1] - S[3]; + } else { + i_min = i - 1; + S[0] = HALF * THIRD * CUBE(HALF + di); + S[3] = HALF * THIRD * CUBE(HALF + di); + S[1] = HALF * THIRD * + (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); + S[2] = ONE - S[0] - S[1] - S[3]; + } + } // staggered + } else if constexpr (O == 4u) { + // 1/25 * ( 5/2 - |x|)^4 |x| < 3/2 + // S(x) = 5/8 - |x|^2 + 32/45 * |x|^3 - 98/675 * |x|^4 3/2 ≤ |x| < 5/2 + // 0.0 |x| ≥ 5/2 + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 2; + S[0] = ONE / (FIVE * FIVE) * SQR(SQR(HALF - di)); + S[4] = ONE / (FIVE * FIVE) * SQR(SQR(HALF + di)); + S[1] = FIVE * INV_8 - SQR(ONE + di) + + static_cast(32 / 45) * CUBE(ONE + di) - + static_cast(98 / 675) * SQR(SQR(ONE + di)); + S[2] = FIVE * INV_8 - SQR(di) + static_cast(32 / 45) * CUBE(di) - + static_cast(98 / 675) * SQR(SQR(di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } else { + i_min = i - 1; + S[0] = ONE / (FIVE * FIVE) * SQR(SQR(THREE * HALF - di)); + S[4] = ONE / (FIVE * FIVE) * SQR(SQR(di - HALF)); + S[1] = FIVE * INV_8 - SQR(di) + static_cast(32 / 45) * CUBE(di) - + static_cast(98 / 675) * SQR(SQR(di)); + S[2] = FIVE * INV_8 - SQR(ONE - di) + + static_cast(32 / 45) * CUBE(ONE - di) - + static_cast(98 / 675) * SQR(SQR(ONE - di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } + } else { // compute at i + 1/2 positions + i_min = i - 2; + S[0] = ONE / (FIVE * FIVE) * SQR(SQR(ONE - di)); // + S[4] = ONE / (FIVE * FIVE) * SQR(SQR(di)); // + S[1] = FIVE * INV_8 - SQR(HALF + di) + + static_cast(32 / 45) * CUBE(HALF + di) - + static_cast(98 / 675) * SQR(SQR(HALF + di)); + S[2] = FIVE * INV_8 - SQR(HALF - di) + + static_cast(32 / 45) * CUBE(HALF - di) - + static_cast(98 / 675) * SQR(SQR(HALF - di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } // staggered + } else if constexpr (O == 5u) { + // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 + // S(x) = 1/135 * (3 - |x|)^5 2 ≤ |x| < 3 + // 0.0 |x| ≥ 3 + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 2; + S[0] = static_cast(1 / 135) * SQR(CUBE(ONE - di)); // + S[1] = static_cast(3 / 5) - SQR(ONE + di) + + static_cast(5 / 6) * CUBE(ONE + di) - + static_cast(19 / 72) * SQR(SQR(ONE + di)) + + static_cast(13 / 432) * SQR(CUBE(ONE + di)); + S[2] = static_cast(3 / 5) - SQR(di) + + static_cast(5 / 6) * CUBE(di) - + static_cast(19 / 72) * SQR(SQR(di)) + + static_cast(13 / 432) * SQR(CUBE(di)); + S[3] = static_cast(3 / 5) - SQR(ONE - di) + + static_cast(5 / 6) * CUBE(ONE - di) - + static_cast(19 / 72) * SQR(SQR(ONE - di)) + + static_cast(13 / 432) * SQR(CUBE(ONE - di)); + S[5] = static_cast(1 / 135) * SQR(CUBE(di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 3; + S[0] = static_cast(1 / 135) * SQR(CUBE(HALF - di)); + S[1] = static_cast(3 / 5) - SQR(static_cast(1.5) + di) + + static_cast(5 / 6) * CUBE(static_cast(1.5) + di) - + static_cast(19 / 72) * + SQR(SQR(static_cast(1.5) + di)) + + static_cast(13 / 432) * + SQR(CUBE(static_cast(1.5) + di)); + S[2] = static_cast(3 / 5) - SQR(HALF + di) + + static_cast(5 / 6) * CUBE(HALF + di) - + static_cast(19 / 72) * SQR(SQR(HALF + di)) + + static_cast(13 / 432) * SQR(CUBE(HALF + di)); + S[3] = static_cast(3 / 5) - SQR(HALF - di) + + static_cast(5 / 6) * CUBE(HALF - di) - + static_cast(19 / 72) * SQR(SQR(HALF - di)) + + static_cast(13 / 432) * SQR(CUBE(HALF - di)); + S[5] = static_cast(1 / 135) * SQR(CUBE(HALF + di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } else { + i_min = i - 2; + S[0] = static_cast(1 / 135) * + SQR(CUBE(static_cast(1.5) - di)); + S[1] = static_cast(3 / 5) - SQR(HALF + di) + + static_cast(5 / 6) * CUBE(HALF + di) - + static_cast(19 / 72) * SQR(SQR(HALF + di)) + + static_cast(13 / 432) * SQR(CUBE(HALF + di)); + S[2] = static_cast(3 / 5) - SQR(di - HALF) + + static_cast(5 / 6) * CUBE(di - HALF) - + static_cast(19 / 72) * SQR(SQR(di - HALF)) + + static_cast(13 / 432) * SQR(CUBE(di - HALF)); + S[3] = static_cast(3 / 5) - SQR(static_cast(1.5) - di) + + static_cast(5 / 6) * CUBE(static_cast(1.5) - di) - + static_cast(19 / 72) * + SQR(SQR(static_cast(1.5) - di)) + + static_cast(13 / 432) * + SQR(CUBE(static_cast(1.5) - di)); + S[5] = static_cast(1 / 135) * SQR(CUBE(di - HALF)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } + } // staggered } } - template Inline void for_deposit_2nd(const int& i_init, const real_t& di_init, const int& i_fin, @@ -60,48 +181,110 @@ namespace prtl_shape { real_t& fS_3) { int i_init_min, i_fin_min; - real_t iS_0_, iS_1_, iS_2_; - real_t fS_0_, fS_1_, fS_2_; + real_t iS_[3], fS_[3]; - order_2nd(i_init, di_init, i_init_min, iS_0_, iS_1_, iS_2_); - order_2nd(i_fin, di_fin, i_fin_min, fS_0_, fS_1_, fS_2_); + order(i_init, di_init, i_init_min, iS_); + order(i_fin, di_fin, i_fin_min, fS_); if (i_init_min < i_fin_min) { i_min = i_init_min; - iS_0 = iS_0_; - iS_1 = iS_1_; - iS_2 = iS_2_; + iS_0 = iS_[0]; + iS_1 = iS_[1]; + iS_2 = iS_[2]; iS_3 = ZERO; fS_0 = ZERO; - fS_1 = iS_0_; - fS_2 = iS_1_; - fS_3 = iS_2_; + fS_1 = iS_[0]; + fS_2 = iS_[1]; + fS_3 = iS_[2]; } else if (i_init_min > i_fin_min) { i_min = i_fin_min; iS_0 = ZERO; - iS_1 = iS_0_; - iS_2 = iS_1_; - iS_3 = iS_2_; + iS_1 = iS_[0]; + iS_2 = iS_[1]; + iS_3 = iS_[2]; - fS_0 = iS_0_; - fS_1 = iS_1_; - fS_2 = iS_2_; + fS_0 = iS_[0]; + fS_1 = iS_[1]; + fS_2 = iS_[2]; fS_3 = ZERO; } else { i_min = i_init_min; - iS_0 = iS_0_; - iS_1 = iS_1_; - iS_2 = iS_2_; + iS_0 = iS_[0]; + iS_1 = iS_[1]; + iS_2 = iS_[2]; iS_3 = ZERO; - fS_0 = iS_0_; - fS_1 = iS_1_; - fS_2 = iS_2_; + fS_0 = iS_[0]; + fS_1 = iS_[1]; + fS_2 = iS_[2]; fS_3 = ZERO; } } + template + Inline void for_deposit(const int& i_init, + const real_t& di_init, + const int& i_fin, + const real_t& di_fin, + int& i_min, + real_t* iS, + real_t* fS) { + + int i_init_min, i_fin_min; + + real_t iS_[O + 1], fS_[O + 1]; + + order(i_init, di_init, i_init_min, iS_); + order(i_fin, di_fin, i_fin_min, fS_); + + if (i_init_min < i_fin_min) { + i_min = i_init_min; + +#pragma unroll + for (int j = 0; j < O; j++) { + iS[j] = iS_[j]; + } + iS[O + 1] = ZERO; + + fS[0] = ZERO; +#pragma unroll + for (int j = 0; j < O; j++) { + fS[j + 1] = fS_[j]; + } + + } else if (i_init_min > i_fin_min) { + i_min = i_fin_min; + + iS[0] = ZERO; +#pragma unroll + for (int j = 0; j < O; j++) { + iS[j + 1] = iS_[j]; + } + +#pragma unroll + for (int j = 0; j < O; j++) { + fS[j] = fS_[j]; + } + fS[O + 1] = ZERO; + + } else { + i_min = i_init_min; + +#pragma unroll + for (int j = 0; j < O; j++) { + iS[j] = iS_[j]; + } + iS[O + 1] = ZERO; + +#pragma unroll + for (int j = 0; j < O; j++) { + fS[j] = fS_[j]; + } + fS[O + 1] = ZERO; + } + } + } // namespace prtl_shape #endif // KERNELS_PARTICLE_SHAPES_HPP From 9f6f318d95395d68337b4ad84f32257f9dfdc896 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 6 Aug 2025 10:57:08 -0500 Subject: [PATCH 46/82] improved comments and cleanup --- src/kernels/currents_deposit.hpp | 249 ------------------------------- src/kernels/particle_shapes.hpp | 118 +++++++++++---- 2 files changed, 86 insertions(+), 281 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 492dec5c..a4d62aa8 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -46,255 +46,6 @@ namespace kernel { const M metric; const real_t charge, inv_dt; - // Inline void shape_function_2nd(real_t& S0_0, - // real_t& S0_1, - // real_t& S0_2, - // real_t& S0_3, - // real_t& S1_0, - // real_t& S1_1, - // real_t& S1_2, - // real_t& S1_3, - // ncells_t& i_min, - // bool& update_i2, - // const index_t& i, - // const real_t& di, - // const index_t& i_prev, - // const real_t& di_prev) const { - // /* - // Shape function per particle is a 4 element array. - // We need to find which indices are contributing to the shape function - // For this we first compute the indices of the particle position - // - // Let * be the particle position at the current timestep - // Let x be the particle position at the previous timestep - // - // - // (-1) 0 1 2 3 - // ___________________________________ - // | | x* | x* | x* | | // shift_i = 0 - // |______|______|______|______|______| - // | | x | x* | x* | * | // shift_i = 1 - // |______|______|______|______|______| - // | * | x* | x* | x | | // shift_i = -1 - // |______|______|______|______|______| - // */ - // - // // find shift in indices - // const int di_less_half = static_cast(di < static_cast(0.5)); - // const int di_prev_less_half = static_cast( - // di_prev < static_cast(0.5)); - // - // const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); - // - // // find the minimum index of the shape function - // i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); - // - // // center index of the shape function - // const auto di_center_prev = static_cast(1 - di_prev_less_half) - - // di_prev; - // const auto di_center = static_cast(1 - di_less_half) - di; - // - // // find indices and define shape function - // if (shift_i == 1) { - // /* - // (-1) 0 1 2 3 - // ___________________________________ - // | | x | x* | x* | * | // shift_i = 1 - // |______|______|______|______|______| - // */ - // update_i2 = true; - // - // S0_0 = HALF * SQR(HALF + di_center_prev); - // S0_1 = THREE_FOURTHS - SQR(di_center_prev); - // S0_2 = HALF * SQR(HALF - di_center_prev); - // S0_3 = ZERO; - // - // S1_0 = ZERO; - // S1_1 = HALF * SQR(HALF + di_center); - // S1_2 = THREE_FOURTHS - SQR(di_center); - // S1_3 = HALF * SQR(HALF - di_center); - // } else if (shift_i == -1) { - // /* - // (-1) 0 1 2 3 - // ___________________________________ - // | * | x* | x* | x | | // shift_i = -1 - // |______|______|______|______|______| - // */ - // update_i2 = true; - // - // S0_0 = ZERO; - // S0_1 = HALF * SQR(HALF + di_center_prev); - // S0_2 = THREE_FOURTHS - SQR(di_center_prev); - // S0_3 = HALF * SQR(HALF - di_center_prev); - // - // S1_0 = HALF * SQR(HALF + di_center); - // S1_1 = THREE_FOURTHS - SQR(di_center); - // S1_2 = HALF * SQR(HALF - di_center); - // S1_3 = ZERO; - // - // } else if (shift_i == 0) { - // /* - // (-1) 0 1 2 3 - // ___________________________________ - // | | x* | x* | x* | | // shift_i = 0 - // |______|______|______|______|______| - // */ - // update_i2 = false; - // - // S0_0 = HALF * SQR(HALF + di_center_prev); - // S0_1 = THREE_FOURTHS - SQR(di_center_prev); - // S0_2 = HALF * SQR(HALF - di_center_prev); - // S0_3 = ZERO; - // - // S1_0 = HALF * SQR(HALF + di_center); - // S1_1 = THREE_FOURTHS - SQR(di_center); - // S1_2 = HALF * SQR(HALF - di_center); - // S1_3 = ZERO; - // } else { - // raise::KernelError(HERE, "Invalid shift in indices"); - // } - // - // // account for ghost cells here to shorten J update expression - // i_min += N_GHOSTS; - // } - - Inline void shape_function_3rd(real_t& S0_0, - real_t& S0_1, - real_t& S0_2, - real_t& S0_3, - real_t& S0_4, - real_t& S1_0, - real_t& S1_1, - real_t& S1_2, - real_t& S1_3, - real_t& S1_4, - ncells_t& i_min, - bool& update_i3, - const index_t& i, - const real_t& di, - const index_t& i_prev, - const real_t& di_prev) const { - /* - Shape function per particle is a 4 element array. - We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position - - Let * be the particle position at the current timestep - Let x be the particle position at the previous timestep - - - (-1) 0 1 2 3 4 - __________________________________________ - | | x* | x* | x* | x* | | // shift_i = 0 - |______|______|______|______|______|______| - | | x | x* | x* | x* | * | // shift_i = 1 - |______|______|______|______|______|______| - | * | x* | x* | x* | x | | // shift_i = -1 - |______|______|______|______|______|______| - */ - - // find shift in indices - const int di_less_half = static_cast(di < static_cast(0.5)); - const int di_prev_less_half = static_cast( - di_prev < static_cast(0.5)); - - const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); - - // find the minimum index of the shape function - i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); - - // center index of the shape function - const auto di_center_prev = static_cast(1 - di_prev_less_half) - - di_prev; - const auto di_center_prev2 = SQR(di_center_prev); - const auto di_center_prev3 = di_center_prev2 * di_center_prev; - - const auto di_center = static_cast(1 - di_less_half) - di; - const auto di_center2 = SQR(di_center); - const auto di_center3 = di_center2 * di_center; - - // find indices and define shape function - if (shift_i == 1) { - /* - (-1) 0 1 2 3 4 - __________________________________________ - | | x | x* | x* | x* | * | // shift_i = 1 - |______|______|______|______|______|______| - */ - update_i3 = true; - - S0_0 = static_cast(1 / 6) * (ONE - di_center_prev3) - - HALF * (di_center_prev - di_center_prev2); - S0_1 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; - S0_2 = static_cast(1 / 6) + - HALF * (di_center_prev + di_center_prev2 - di_center_prev3); - S0_3 = static_cast(1 / 6) * di_center_prev3; - S0_4 = ZERO; - - S1_0 = ZERO; - S1_1 = static_cast(1 / 6) * (ONE - di_center3) - - HALF * (di_center - di_center2); - S1_2 = static_cast(2 / 3) - di_center2 + HALF * di_center3; - S1_3 = static_cast(1 / 6) + - HALF * (di_center + di_center2 - di_center3); - S1_4 = static_cast(1 / 6) * di_center3; - } else if (shift_i == -1) { - /* - (-1) 0 1 2 3 4 - _________________________________________ - | * | x* | x* | x* | x | | // shift_i = -1 - |______|______|______|______|______|_____| - */ - update_i3 = true; - - S0_0 = ZERO; - S0_1 = static_cast(1 / 6) * (ONE - di_center_prev3) - - HALF * (di_center_prev - di_center_prev2); - S0_2 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; - S0_3 = static_cast(1 / 6) + - HALF * (di_center_prev + di_center_prev2 - di_center_prev3); - S0_4 = static_cast(1 / 6) * di_center_prev3; - - S1_0 = static_cast(1 / 6) * (ONE - di_center3) - - HALF * (di_center - di_center2); - S1_1 = static_cast(2 / 3) - di_center2 + HALF * di_center3; - S1_2 = static_cast(1 / 6) + - HALF * (di_center + di_center2 - di_center3); - S1_3 = static_cast(1 / 6) * di_center3; - S1_4 = ZERO; - - } else if (shift_i == 0) { - /* - (-1) 0 1 2 3 4 - __________________________________________ - | | x* | x* | x* | x* | | // shift_i = 0 - |______|______|______|______|______|______| - */ - update_i3 = false; - - S0_0 = static_cast(1 / 6) * (ONE - di_center_prev3) - - HALF * (di_center_prev - di_center_prev2); - S0_1 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; - S0_2 = static_cast(1 / 6) + - HALF * (di_center_prev + di_center_prev2 - di_center_prev3); - S0_3 = static_cast(1 / 6) * di_center_prev3; - S0_4 = ZERO; - - S1_0 = static_cast(1 / 6) * (ONE - di_center3) - - HALF * (di_center - di_center2); - S1_1 = static_cast(2 / 3) - di_center2 + HALF * di_center3; - S1_2 = static_cast(1 / 6) + - HALF * (di_center + di_center2 - di_center3); - S1_3 = static_cast(1 / 6) * di_center3; - S1_4 = ZERO; - } else { - raise::KernelError(HERE, "Invalid shift in indices"); - } - - // account for ghost cells here to shorten J update expression - i_min += N_GHOSTS; - } - public: /** * @brief explicit constructor. diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 7a0af8b7..c793ee67 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -20,6 +20,9 @@ namespace prtl_shape { template Inline void order(const int& i, const real_t& di, int& i_min, real_t* S) { if constexpr (O == 2u) { + // 3/4 - |x|^2 |x| < 1/2 + // S(x) = 1/2 * (3/2 - |x|)^2 1/2 ≤ |x| < 3/2 + // 0.0 |x| ≥ 3/2 if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 1; @@ -28,7 +31,7 @@ namespace prtl_shape { S[2] = ONE - S[0] - S[1]; } else { i_min = i; - S[0] = HALF * SQR(static_cast(1.5) - di); + S[0] = HALF * SQR(static_cast(3 / 2) - di); S[2] = HALF * SQR(di - HALF); S[1] = ONE - S[0] - S[2]; } @@ -39,25 +42,29 @@ namespace prtl_shape { S[0] = ONE - S[1] - S[2]; } // staggered } else if constexpr (O == 3u) { + // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^2) |x| < 1 + // S(x) = 1/6 * ( 2 - |x|)^3 1 ≤ |x| < 2 + // 0.0 |x| ≥ 2 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = HALF * THIRD * CUBE(ONE - di); - S[3] = HALF * THIRD * CUBE(di); - S[1] = HALF * THIRD * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); - S[2] = ONE - S[0] - S[1] - S[3]; + S[0] = static_cast(1 / 6) * CUBE(ONE - di); + S[3] = static_cast(1 / 6) * CUBE(di); + S[1] = static_cast(1 / 6) * + (FOUR - SIX * SQR(di) + THREE * CUBE(di)); + S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; - S[0] = HALF * THIRD * CUBE(HALF - di); - S[3] = HALF * THIRD * CUBE(HALF + di); - S[1] = HALF * THIRD * + S[0] = static_cast(1 / 6) * CUBE(HALF - di); + S[3] = static_cast(1 / 6) * CUBE(HALF + di); + S[1] = static_cast(1 / 6) * (FOUR - SIX * SQR(HALF - di) + THREE * CUBE(HALF - di)); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = HALF * THIRD * CUBE(HALF + di); - S[3] = HALF * THIRD * CUBE(HALF + di); - S[1] = HALF * THIRD * + S[0] = static_cast(1 / 6) * CUBE(HALF + di); + S[3] = static_cast(1 / 6) * CUBE(HALF + di); + S[1] = static_cast(1 / 6) * (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); S[2] = ONE - S[0] - S[1] - S[3]; } @@ -69,33 +76,35 @@ namespace prtl_shape { if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 2; - S[0] = ONE / (FIVE * FIVE) * SQR(SQR(HALF - di)); - S[4] = ONE / (FIVE * FIVE) * SQR(SQR(HALF + di)); - S[1] = FIVE * INV_8 - SQR(ONE + di) + + S[0] = static_cast(1 / 25) * SQR(SQR(HALF - di)); + S[4] = static_cast(1 / 25) * SQR(SQR(HALF + di)); + S[1] = static_cast(5 / 8) - SQR(ONE + di) + static_cast(32 / 45) * CUBE(ONE + di) - static_cast(98 / 675) * SQR(SQR(ONE + di)); - S[2] = FIVE * INV_8 - SQR(di) + static_cast(32 / 45) * CUBE(di) - + S[2] = static_cast(5 / 8) - SQR(di) + + static_cast(32 / 45) * CUBE(di) - static_cast(98 / 675) * SQR(SQR(di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } else { i_min = i - 1; - S[0] = ONE / (FIVE * FIVE) * SQR(SQR(THREE * HALF - di)); - S[4] = ONE / (FIVE * FIVE) * SQR(SQR(di - HALF)); - S[1] = FIVE * INV_8 - SQR(di) + static_cast(32 / 45) * CUBE(di) - + S[0] = static_cast(1 / 25) * SQR(SQR(THREE * HALF - di)); + S[4] = static_cast(1 / 25) * SQR(SQR(di - HALF)); + S[1] = static_cast(5 / 8) - SQR(di) + + static_cast(32 / 45) * CUBE(di) - static_cast(98 / 675) * SQR(SQR(di)); - S[2] = FIVE * INV_8 - SQR(ONE - di) + + S[2] = static_cast(5 / 8) - SQR(ONE - di) + static_cast(32 / 45) * CUBE(ONE - di) - static_cast(98 / 675) * SQR(SQR(ONE - di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } } else { // compute at i + 1/2 positions i_min = i - 2; - S[0] = ONE / (FIVE * FIVE) * SQR(SQR(ONE - di)); // - S[4] = ONE / (FIVE * FIVE) * SQR(SQR(di)); // - S[1] = FIVE * INV_8 - SQR(HALF + di) + + S[0] = static_cast(1 / 25) * SQR(SQR(ONE - di)); + S[4] = static_cast(1 / 25) * SQR(SQR(di)); + S[1] = static_cast(5 / 8) - SQR(HALF + di) + static_cast(32 / 45) * CUBE(HALF + di) - static_cast(98 / 675) * SQR(SQR(HALF + di)); - S[2] = FIVE * INV_8 - SQR(HALF - di) + + S[2] = static_cast(5 / 8) - SQR(HALF - di) + static_cast(32 / 45) * CUBE(HALF - di) - static_cast(98 / 675) * SQR(SQR(HALF - di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; @@ -125,12 +134,14 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 3; S[0] = static_cast(1 / 135) * SQR(CUBE(HALF - di)); - S[1] = static_cast(3 / 5) - SQR(static_cast(1.5) + di) + - static_cast(5 / 6) * CUBE(static_cast(1.5) + di) - + S[1] = static_cast(3 / 5) - + SQR(static_cast(3 / 2) + di) + + static_cast(5 / 6) * + CUBE(static_cast(3 / 2) + di) - static_cast(19 / 72) * - SQR(SQR(static_cast(1.5) + di)) + + SQR(SQR(static_cast(3 / 2) + di)) + static_cast(13 / 432) * - SQR(CUBE(static_cast(1.5) + di)); + SQR(CUBE(static_cast(3 / 2) + di)); S[2] = static_cast(3 / 5) - SQR(HALF + di) + static_cast(5 / 6) * CUBE(HALF + di) - static_cast(19 / 72) * SQR(SQR(HALF + di)) + @@ -144,7 +155,7 @@ namespace prtl_shape { } else { i_min = i - 2; S[0] = static_cast(1 / 135) * - SQR(CUBE(static_cast(1.5) - di)); + SQR(CUBE(static_cast(3 / 2) - di)); S[1] = static_cast(3 / 5) - SQR(HALF + di) + static_cast(5 / 6) * CUBE(HALF + di) - static_cast(19 / 72) * SQR(SQR(HALF + di)) + @@ -153,12 +164,14 @@ namespace prtl_shape { static_cast(5 / 6) * CUBE(di - HALF) - static_cast(19 / 72) * SQR(SQR(di - HALF)) + static_cast(13 / 432) * SQR(CUBE(di - HALF)); - S[3] = static_cast(3 / 5) - SQR(static_cast(1.5) - di) + - static_cast(5 / 6) * CUBE(static_cast(1.5) - di) - + S[3] = static_cast(3 / 5) - + SQR(static_cast(3 / 2) - di) + + static_cast(5 / 6) * + CUBE(static_cast(3 / 2) - di) - static_cast(19 / 72) * - SQR(SQR(static_cast(1.5) - di)) + + SQR(SQR(static_cast(3 / 2) - di)) + static_cast(13 / 432) * - SQR(CUBE(static_cast(1.5) - di)); + SQR(CUBE(static_cast(3 / 2) - di)); S[5] = static_cast(1 / 135) * SQR(CUBE(di - HALF)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } @@ -179,6 +192,27 @@ namespace prtl_shape { real_t& fS_1, real_t& fS_2, real_t& fS_3) { + + /* + The second order shape function per particle is a 4 element array + where the shape function contributes to only 3 elements. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep + + + 0 1 2 3 + ____________________________ + | x* | x* | x* | | // i_init_min = i_fin_min + |______|______|______|______| + | x | x* | x* | * | // i_init_min < i_fin_min + |______|______|______|______| + | * | x* | x* | x | // i_init_min > i_fin_min + |______|______|______|______| + */ + int i_init_min, i_fin_min; real_t iS_[3], fS_[3]; @@ -231,6 +265,26 @@ namespace prtl_shape { real_t* iS, real_t* fS) { + /* + The N-th order shape function per particle is a N+2 element array + where the shape function contributes to only N+1 elements. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep + + + 0 1 (...) N N+1 + ___________________________________ + | x* | x* | ... | x* | | // i_init_min = i_fin_min + |______|______|______|______|______| + | x | x* | ... | x* | * | // i_init_min < i_fin_min + |______|______|______|______|______| + | * | x* | ... | x* | x | // i_init_min > i_fin_min + |______|______|______|______|______| + */ + int i_init_min, i_fin_min; real_t iS_[O + 1], fS_[O + 1]; From 12210687806b5462b87f7519b9a5c0e2e84ff803 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 6 Aug 2025 14:21:39 -0500 Subject: [PATCH 47/82] cleanup and updates to generalized version (wip) --- src/kernels/currents_deposit.hpp | 1898 ++++++------------------------ src/kernels/particle_shapes.hpp | 27 +- 2 files changed, 354 insertions(+), 1571 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index a4d62aa8..b363b381 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -389,1513 +389,249 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr (O == 2u) { - /* - * Higher order charge conserving current deposition based on - * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - **/ - - // iS -> shape function for init position - // fS -> shape function for final position - - // shape function at integer points (one coeff is always ZERO) - int i1_min; - real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; - real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; - - // clang-format off - prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), - i1(p), static_cast(dx1(p)), - i1_min, - iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, - fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); - // clang-format on - - if constexpr (D == Dim::_1D) { - raise::KernelNotImplementedError(HERE); - } else if constexpr (D == Dim::_2D) { - - // shape function at integer points (one coeff is always ZERO) - int i2_min; - real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; - real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; - - // clang-format off - prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), - i2(p), static_cast(dx2(p)), - i2_min, - iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, - fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); - // clang-format on - // x1-components - const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); - const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); - const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); - const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); - const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); - const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); - const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); - const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); - const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); - const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); - const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); - const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); - const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); - - // x2-components - const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); - const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); - const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); - const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); - - const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); - const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); - const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); - const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); - - const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); - const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); - const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); - const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); - - const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); - const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); - const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); - const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); - - // x3-components - const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + - iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); - const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + - iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); - const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + - iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); - const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + - iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); - - const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + - iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); - const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + - iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); - const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + - iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); - const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + - iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); - - const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + - iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); - const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + - iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); - const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + - iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); - const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + - iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); - - const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + - iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); - const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + - iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); - const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + - iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); - const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + - iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); - - // x1-component - const auto jx1_00 = Wx1_00; - const auto jx1_10 = jx1_00 + Wx1_10; - const auto jx1_20 = jx1_10 + Wx1_20; - const auto jx1_30 = jx1_20 + Wx1_30; - - const auto jx1_01 = Wx1_01; - const auto jx1_11 = jx1_01 + Wx1_11; - const auto jx1_21 = jx1_11 + Wx1_21; - const auto jx1_31 = jx1_21 + Wx1_31; - - const auto jx1_02 = Wx1_02; - const auto jx1_12 = jx1_02 + Wx1_12; - const auto jx1_22 = jx1_12 + Wx1_22; - const auto jx1_32 = jx1_22 + Wx1_32; - - const auto jx1_03 = Wx1_03; - const auto jx1_13 = jx1_03 + Wx1_13; - const auto jx1_23 = jx1_13 + Wx1_23; - const auto jx1_33 = jx1_23 + Wx1_33; - - // y-component - const auto jx2_00 = Wx2_00; - const auto jx2_01 = jx2_00 + Wx2_01; - const auto jx2_02 = jx2_01 + Wx2_02; - const auto jx2_03 = jx2_02 + Wx2_03; - - const auto jx2_10 = Wx2_10; - const auto jx2_11 = jx2_10 + Wx2_11; - const auto jx2_12 = jx2_11 + Wx2_12; - const auto jx2_13 = jx2_12 + Wx2_13; - - const auto jx2_20 = Wx2_20; - const auto jx2_21 = jx2_20 + Wx2_21; - const auto jx2_22 = jx2_21 + Wx2_22; - const auto jx2_23 = jx2_22 + Wx2_23; - - const auto jx2_30 = Wx2_30; - const auto jx2_31 = jx2_30 + Wx2_31; - const auto jx2_32 = jx2_31 + Wx2_32; - const auto jx2_33 = jx2_32 + Wx2_33; - - i1_min += N_GHOSTS; - i2_min += N_GHOSTS; - - // @TODO: not sure about the signs here - const real_t Qdx1dt = -coeff * inv_dt; - const real_t Qdx2dt = -coeff * inv_dt; - const real_t QVx3 = coeff * vp[2]; - - auto J_acc = J.access(); - - // x1-currents - J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; - J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; - J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; - J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; - - J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; - J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; - J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; - J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; - - J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; - J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; - J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; - J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; - - J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; - J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; - J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; - J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; - - // x2-currents - J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; - J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; - J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; - J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; - - J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; - J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; - J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; - J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; - - J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; - J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; - J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; - J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; - - J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; - J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; - J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; - J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; - - // x3-currents - J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; - J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; - J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; - J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; - - J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; - J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; - J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; - J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; - - J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; - J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; - J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; - J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; - - J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; - J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; - J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; - J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; - - } else if constexpr (D == Dim::_3D) { - // /* - // y - direction - // */ - // - // // shape function at previous timestep - // real_t S0y_0, S0y_1, S0y_2, S0y_3; - // // shape function at current timestep - // real_t S1y_0, S1y_1, S1y_2, S1y_3; - // // indices of the shape function - // ncells_t iy_min; - // bool update_y2; - // // find indices and define shape function - // // clang-format off - // shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, - // S1y_0, S1y_1, S1y_2, S1y_3, - // iy_min, update_y2, - // i2(p), dx2(p), - // i2_prev(p), dx2_prev(p)); - // // clang-format on - // - // /* - // y - direction - // */ - // - // // shape function at previous timestep - // real_t S0z_0, S0z_1, S0z_2, S0z_3; - // // shape function at current timestep - // real_t S1z_0, S1z_1, S1z_2, S1z_3; - // // indices of the shape function - // ncells_t iz_min; - // bool update_z2; - // // find indices and define shape function - // // clang-format off - // shape_function_2nd(S0z_0, S0z_1, S0z_2, S0z_3, - // S1z_0, S1z_1, S1z_2, S1z_3, - // iz_min, update_z2, - // i3(p), dx3(p), - // i3_prev(p), dx3_prev(p)); - // // clang-format on - // - // // Unrolled calculations for Wx, Wy, and Wz - // // clang-format off - // const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const real_t Qdxdt = coeff * inv_dt; - // - // const auto jx_0_0_0 = - Qdxdt * Wx_0_0_0; - // const auto jx_1_0_0 = jx_0_0_0 - Qdxdt * Wx_1_0_0; - // const auto jx_2_0_0 = jx_1_0_0 - Qdxdt * Wx_2_0_0; - // const auto jx_0_1_0 = - Qdxdt * Wx_0_1_0; - // const auto jx_1_1_0 = jx_0_1_0 - Qdxdt * Wx_1_1_0; - // const auto jx_2_1_0 = jx_1_1_0 - Qdxdt * Wx_2_1_0; - // const auto jx_0_2_0 = - Qdxdt * Wx_0_2_0; - // const auto jx_1_2_0 = jx_0_2_0 - Qdxdt * Wx_1_2_0; - // const auto jx_2_2_0 = jx_1_2_0 - Qdxdt * Wx_2_2_0; - // const auto jx_0_3_0 = - Qdxdt * Wx_0_3_0; - // const auto jx_1_3_0 = jx_0_3_0 - Qdxdt * Wx_1_3_0; - // const auto jx_2_3_0 = jx_1_3_0 - Qdxdt * Wx_2_3_0; - // - // const auto jx_0_0_1 = - Qdxdt * Wx_0_0_1; - // const auto jx_1_0_1 = jx_0_0_1 - Qdxdt * Wx_1_0_1; - // const auto jx_2_0_1 = jx_1_0_1 - Qdxdt * Wx_2_0_1; - // const auto jx_0_1_1 = - Qdxdt * Wx_0_1_1; - // const auto jx_1_1_1 = jx_0_1_1 - Qdxdt * Wx_1_1_1; - // const auto jx_2_1_1 = jx_1_1_1 - Qdxdt * Wx_2_1_1; - // const auto jx_0_2_1 = - Qdxdt * Wx_0_2_1; - // const auto jx_1_2_1 = jx_0_2_1 - Qdxdt * Wx_1_2_1; - // const auto jx_2_2_1 = jx_1_2_1 - Qdxdt * Wx_2_2_1; - // const auto jx_0_3_1 = - Qdxdt * Wx_0_3_1; - // const auto jx_1_3_1 = jx_0_3_1 - Qdxdt * Wx_1_3_1; - // const auto jx_2_3_1 = jx_1_3_1 - Qdxdt * Wx_2_3_1; - // - // const auto jx_0_0_2 = - Qdxdt * Wx_0_0_2; - // const auto jx_1_0_2 = jx_0_0_2 - Qdxdt * Wx_1_0_2; - // const auto jx_2_0_2 = jx_1_0_2 - Qdxdt * Wx_2_0_2; - // const auto jx_0_1_2 = - Qdxdt * Wx_0_1_2; - // const auto jx_1_1_2 = jx_0_1_2 - Qdxdt * Wx_1_1_2; - // const auto jx_2_1_2 = jx_1_1_2 - Qdxdt * Wx_2_1_2; - // const auto jx_0_2_2 = - Qdxdt * Wx_0_2_2; - // const auto jx_1_2_2 = jx_0_2_2 - Qdxdt * Wx_1_2_2; - // const auto jx_2_2_2 = jx_1_2_2 - Qdxdt * Wx_2_2_2; - // const auto jx_0_3_2 = - Qdxdt * Wx_0_3_2; - // const auto jx_1_3_2 = jx_0_3_2 - Qdxdt * Wx_1_3_2; - // const auto jx_2_3_2 = jx_1_3_2 - Qdxdt * Wx_2_3_2; - // - // const auto jx_0_0_3 = - Qdxdt * Wx_0_0_3; - // const auto jx_1_0_3 = jx_0_0_3 - Qdxdt * Wx_1_0_3; - // const auto jx_2_0_3 = jx_1_0_3 - Qdxdt * Wx_2_0_3; - // const auto jx_0_1_3 = - Qdxdt * Wx_0_1_3; - // const auto jx_1_1_3 = jx_0_1_3 - Qdxdt * Wx_1_1_3; - // const auto jx_2_1_3 = jx_1_1_3 - Qdxdt * Wx_2_1_3; - // const auto jx_0_2_3 = - Qdxdt * Wx_0_2_3; - // const auto jx_1_2_3 = jx_0_2_3 - Qdxdt * Wx_1_2_3; - // const auto jx_2_2_3 = jx_1_2_3 - Qdxdt * Wx_2_2_3; - // const auto jx_0_3_3 = - Qdxdt * Wx_0_3_3; - // const auto jx_1_3_3 = jx_0_3_3 - Qdxdt * Wx_1_3_3; - // const auto jx_2_3_3 = jx_1_3_3 - Qdxdt * Wx_2_3_3; - // - // /* - // y-component - // */ - // const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const real_t Qdydt = coeff * inv_dt; - // - // const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; - // const auto jy_0_1_0 = jy_0_0_0 - Qdydt * Wy_0_1_0; - // const auto jy_0_2_0 = jy_0_1_0 - Qdydt * Wy_0_2_0; - // const auto jy_1_0_0 = - Qdydt * Wy_1_0_0; - // const auto jy_1_1_0 = jy_1_0_0 - Qdydt * Wy_1_1_0; - // const auto jy_1_2_0 = jy_1_1_0 - Qdydt * Wy_1_2_0; - // const auto jy_2_0_0 = - Qdydt * Wy_2_0_0; - // const auto jy_2_1_0 = jy_2_0_0 - Qdydt * Wy_2_1_0; - // const auto jy_2_2_0 = jy_2_1_0 - Qdydt * Wy_2_2_0; - // const auto jy_3_0_0 = - Qdydt * Wy_3_0_0; - // const auto jy_3_1_0 = jy_3_0_0 - Qdydt * Wy_3_1_0; - // const auto jy_3_2_0 = jy_3_1_0 - Qdydt * Wy_3_2_0; - // - // const auto jy_0_0_1 = - Qdydt * Wy_0_0_1; - // const auto jy_0_1_1 = jy_0_0_1 - Qdydt * Wy_0_1_1; - // const auto jy_0_2_1 = jy_0_1_1 - Qdydt * Wy_0_2_1; - // const auto jy_1_0_1 = - Qdydt * Wy_1_0_1; - // const auto jy_1_1_1 = jy_1_0_1 - Qdydt * Wy_1_1_1; - // const auto jy_1_2_1 = jy_1_1_1 - Qdydt * Wy_1_2_1; - // const auto jy_2_0_1 = - Qdydt * Wy_2_0_1; - // const auto jy_2_1_1 = jy_2_0_1 - Qdydt * Wy_2_1_1; - // const auto jy_2_2_1 = jy_2_1_1 - Qdydt * Wy_2_2_1; - // const auto jy_3_0_1 = - Qdydt * Wy_3_0_1; - // const auto jy_3_1_1 = jy_3_0_1 - Qdydt * Wy_3_1_1; - // const auto jy_3_2_1 = jy_3_1_1 - Qdydt * Wy_3_2_1; - // - // const auto jy_0_0_2 = - Qdydt * Wy_0_0_2; - // const auto jy_0_1_2 = jy_0_0_2 - Qdydt * Wy_0_1_2; - // const auto jy_0_2_2 = jy_0_1_2 - Qdydt * Wy_0_2_2; - // const auto jy_1_0_2 = - Qdydt * Wy_1_0_2; - // const auto jy_1_1_2 = jy_1_0_2 - Qdydt * Wy_1_1_2; - // const auto jy_1_2_2 = jy_1_1_2 - Qdydt * Wy_1_2_2; - // const auto jy_2_0_2 = - Qdydt * Wy_2_0_2; - // const auto jy_2_1_2 = jy_2_0_2 - Qdydt * Wy_2_1_2; - // const auto jy_2_2_2 = jy_2_1_2 - Qdydt * Wy_2_2_2; - // const auto jy_3_0_2 = - Qdydt * Wy_3_0_2; - // const auto jy_3_1_2 = jy_3_0_2 - Qdydt * Wy_3_1_2; - // const auto jy_3_2_2 = jy_3_1_2 - Qdydt * Wy_3_2_2; - // - // const auto jy_0_0_3 = - Qdydt * Wy_0_0_3; - // const auto jy_0_1_3 = jy_0_0_3 - Qdydt * Wy_0_1_3; - // const auto jy_0_2_3 = jy_0_1_3 - Qdydt * Wy_0_2_3; - // const auto jy_1_0_3 = - Qdydt * Wy_1_0_3; - // const auto jy_1_1_3 = jy_1_0_3 - Qdydt * Wy_1_1_3; - // const auto jy_1_2_3 = jy_1_1_3 - Qdydt * Wy_1_2_3; - // const auto jy_2_0_3 = - Qdydt * Wy_2_0_3; - // const auto jy_2_1_3 = jy_2_0_3 - Qdydt * Wy_2_1_3; - // const auto jy_2_2_3 = jy_2_1_3 - Qdydt * Wy_2_2_3; - // const auto jy_3_0_3 = - Qdydt * Wy_3_0_3; - // const auto jy_3_1_3 = jy_3_0_3 - Qdydt * Wy_3_1_3; - // const auto jy_3_2_3 = jy_3_1_3 - Qdydt * Wy_3_2_3; - // - // /* - // z - component - // */ - // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // - // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // - // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // - // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // - // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // - // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // - // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // - // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // - // // Unrolled loop for Wz[i][j][k] with i = 2 and interp_order + 2 = 4 - // const auto Wz_2_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - // const auto Wz_2_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - // const auto Wz_2_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - // - // const auto Wz_2_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - // const auto Wz_2_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - // const auto Wz_2_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - // - // const auto Wz_2_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - // const auto Wz_2_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - // const auto Wz_2_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - // - // const auto Wz_2_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - // const auto Wz_2_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - // const auto Wz_2_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - // - // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // - // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // - // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // - // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // - // const real_t Qdzdt = coeff * inv_dt; - // - // const auto jz_0_0_0 = - Qdzdt * Wz_0_0_0; - // const auto jz_0_0_1 = jz_0_0_0 - Qdzdt * Wz_0_0_1; - // const auto jz_0_0_2 = jz_0_0_1 - Qdzdt * Wz_0_0_2; - // const auto jz_0_1_0 = - Qdzdt * Wz_0_1_0; - // const auto jz_0_1_1 = jz_0_1_0 - Qdzdt * Wz_0_1_1; - // const auto jz_0_1_2 = jz_0_1_1 - Qdzdt * Wz_0_1_2; - // const auto jz_0_2_0 = - Qdzdt * Wz_0_2_0; - // const auto jz_0_2_1 = jz_0_2_0 - Qdzdt * Wz_0_2_1; - // const auto jz_0_2_2 = jz_0_2_1 - Qdzdt * Wz_0_2_2; - // const auto jz_0_3_0 = - Qdzdt * Wz_0_3_0; - // const auto jz_0_3_1 = jz_0_3_0 - Qdzdt * Wz_0_3_1; - // const auto jz_0_3_2 = jz_0_3_1 - Qdzdt * Wz_0_3_2; - // - // const auto jz_1_0_0 = - Qdzdt * Wz_1_0_0; - // const auto jz_1_0_1 = jz_1_0_0 - Qdzdt * Wz_1_0_1; - // const auto jz_1_0_2 = jz_1_0_1 - Qdzdt * Wz_1_0_2; - // const auto jz_1_1_0 = - Qdzdt * Wz_1_1_0; - // const auto jz_1_1_1 = jz_1_1_0 - Qdzdt * Wz_1_1_1; - // const auto jz_1_1_2 = jz_1_1_1 - Qdzdt * Wz_1_1_2; - // const auto jz_1_2_0 = - Qdzdt * Wz_1_2_0; - // const auto jz_1_2_1 = jz_1_2_0 - Qdzdt * Wz_1_2_1; - // const auto jz_1_2_2 = jz_1_2_1 - Qdzdt * Wz_1_2_2; - // const auto jz_1_3_0 = - Qdzdt * Wz_1_3_0; - // const auto jz_1_3_1 = jz_1_3_0 - Qdzdt * Wz_1_3_1; - // const auto jz_1_3_2 = jz_1_3_1 - Qdzdt * Wz_1_3_2; - // - // const auto jz_2_0_0 = - Qdzdt * Wz_2_0_0; - // const auto jz_2_0_1 = jz_2_0_0 - Qdzdt * Wz_2_0_1; - // const auto jz_2_0_2 = jz_2_0_1 - Qdzdt * Wz_2_0_2; - // const auto jz_2_1_0 = - Qdzdt * Wz_2_1_0; - // const auto jz_2_1_1 = jz_2_1_0 - Qdzdt * Wz_2_1_1; - // const auto jz_2_1_2 = jz_2_1_1 - Qdzdt * Wz_2_1_2; - // const auto jz_2_2_0 = - Qdzdt * Wz_2_2_0; - // const auto jz_2_2_1 = jz_2_2_0 - Qdzdt * Wz_2_2_1; - // const auto jz_2_2_2 = jz_2_2_1 - Qdzdt * Wz_2_2_2; - // const auto jz_2_3_0 = - Qdzdt * Wz_2_3_0; - // const auto jz_2_3_1 = jz_2_3_0 - Qdzdt * Wz_2_3_1; - // const auto jz_2_3_2 = jz_2_3_1 - Qdzdt * Wz_2_3_2; - // - // const auto jz_3_0_0 = - Qdzdt * Wz_3_0_0; - // const auto jz_3_0_1 = jz_3_0_0 - Qdzdt * Wz_3_0_1; - // const auto jz_3_0_2 = jz_3_0_1 - Qdzdt * Wz_3_0_2; - // const auto jz_3_1_0 = - Qdzdt * Wz_3_1_0; - // const auto jz_3_1_1 = jz_3_1_0 - Qdzdt * Wz_3_1_1; - // const auto jz_3_1_2 = jz_3_1_1 - Qdzdt * Wz_3_1_2; - // const auto jz_3_2_0 = - Qdzdt * Wz_3_2_0; - // const auto jz_3_2_1 = jz_3_2_0 - Qdzdt * Wz_3_2_1; - // const auto jz_3_2_2 = jz_3_2_1 - Qdzdt * Wz_3_2_2; - // const auto jz_3_3_0 = - Qdzdt * Wz_3_3_0; - // const auto jz_3_3_1 = jz_3_3_0 - Qdzdt * Wz_3_3_1; - // const auto jz_3_3_2 = jz_3_3_1 - Qdzdt * Wz_3_3_2; - // - // - // /* - // Current update - // */ - // auto J_acc = J.access(); - // - // J_acc(ix_min, iy_min, iz_min, cur::jx1) += jx_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += jx_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += jx_0_0_2; - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += jx_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += jx_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += jx_0_1_2; - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += jx_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += jx_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += jx_0_2_2; - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += jx_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += jx_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += jx_1_0_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += jx_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += jx_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += jx_1_1_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += jx_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += jx_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += jx_1_2_2; - // - // if (update_x2) - // { - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += jx_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += jx_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += jx_2_0_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += jx_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += jx_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += jx_2_1_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += jx_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += jx_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += jx_2_2_2; - // - // if (update_y2) - // { - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += jx_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += jx_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += jx_2_3_2; - // } - // - // if (update_z2) - // { - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += jx_2_0_3; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += jx_2_1_3; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += jx_2_2_3; - // - // if (update_y2) - // { - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += jx_2_3_3; - // } - // } - // } - // // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += jx_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += jx_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += jx_0_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += jx_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += jx_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += jx_1_3_2; - // } - // - // if (update_z2) - // { - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += jx_0_0_3; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += jx_0_1_3; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += jx_0_2_3; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += jx_1_0_3; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += jx_1_1_3; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += jx_1_2_3; - // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += jx_0_3_3; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += jx_1_3_3; - // } - // } - // - // - // /* - // y-component - // */ - // J_acc(ix_min, iy_min, iz_min, cur::jx2) += jy_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += jy_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += jy_0_0_2; - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += jy_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += jy_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += jy_0_1_2; - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += jy_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += jy_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += jy_1_0_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += jy_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += jy_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += jy_1_1_2; - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += jy_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += jy_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += jy_2_0_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += jy_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += jy_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += jy_2_1_2; - // - // if (update_x2) - // { - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += jy_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += jy_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += jy_3_0_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += jy_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += jy_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += jy_3_1_2; - // - // if (update_z2) - // { - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += jy_3_0_3; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += jy_3_1_3; - // } - // } - // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += jy_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += jy_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += jy_0_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += jy_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += jy_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += jy_1_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += jy_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += jy_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += jy_2_2_2; - // - // if (update_x2) - // { - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += jy_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += jy_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += jy_3_2_2; - // - // if (update_z2) - // { - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += jy_2_2_3; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += jy_3_2_3; - // } - // } - // - // if (update_z2) - // { - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += jy_0_2_3; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += jy_1_2_3; - // } - // } - // - // if (update_z2) - // { - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += jy_0_0_3; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += jy_0_1_3; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += jy_1_0_3; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += jy_1_1_3; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += jy_2_0_3; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += jy_2_1_3; - // } - // - // /* - // z-component - // */ - // J_acc(ix_min, iy_min, iz_min, cur::jx3) += jz_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += jz_0_0_1; - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += jz_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += jz_0_1_1; - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += jz_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += jz_0_2_1; - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += jz_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += jz_1_0_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += jz_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += jz_1_1_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += jz_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += jz_1_2_1; - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += jz_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += jz_2_0_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += jz_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += jz_2_1_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += jz_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += jz_2_2_1; - // - // if (update_x2) - // { - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += jz_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += jz_3_0_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += jz_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += jz_3_1_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += jz_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += jz_3_2_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += jz_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += jz_3_3_1; - // } - // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += jz_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += jz_0_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += jz_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += jz_1_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += jz_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += jz_2_3_1; - // } - // - // if (update_z2) - // { - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += jz_0_0_2; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += jz_0_1_2; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += jz_0_2_2; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += jz_1_0_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += jz_1_1_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += jz_1_2_2; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += jz_2_0_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += jz_2_1_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += jz_2_2_2; - // - // if (update_x2) - // { - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += jz_3_0_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += jz_3_1_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += jz_3_2_2; - // - // if (update_y2) - // { - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += jz_3_3_2; - // } - // } - // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += jz_0_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += jz_1_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += jz_2_3_2; - // } - // } - // clang-format on - } // dimension - - } else if constexpr (O == 3u) { - /* - Higher order charge conserving current deposition based on - Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - - We need to define the follwowing variable: - - Shape functions in spatial directions for the particle position - before and after the current timestep. - S0_*, S1_* - - Density composition matrix - Wx_*, Wy_*, Wz_* - */ - - /* - x - direction - */ - - // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3, S0x_4; - // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3, S1x_4; - // indices of the shape function - ncells_t ix_min; - bool update_x3; - // find indices and define shape function - // clang-format off - shape_function_3rd(S0x_0, S0x_1, S0x_2, S0x_3, S0x_4, - S1x_0, S1x_1, S1x_2, S1x_3, S1x_4, - ix_min, update_x3, - i1(p), dx1(p), - i1_prev(p), dx1_prev(p)); - // clang-format on - - if constexpr (D == Dim::_1D) { - // ToDo - } else if constexpr (D == Dim::_2D) { - - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3, S0y_4; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3, S1y_4; - // indices of the shape function - ncells_t iy_min; - bool update_y3; - // find indices and define shape function - // clang-format off - shape_function_3rd(S0y_0, S0y_1, S0y_2, S0y_3, S0y_4, - S1y_0, S1y_1, S1y_2, S1y_3, S1y_4, - iy_min, update_y3, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); - // clang-format on - - // Esirkepov 2001, Eq. 38 - /* - x - component - */ - // Calculate weight function - unrolled - const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); - const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); - const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); - const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); - const auto Wx_0_4 = HALF * (S1x_0 - S0x_0) * (S0y_4 + S1y_4); - - const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); - const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); - const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); - const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); - const auto Wx_1_4 = HALF * (S1x_1 - S0x_1) * (S0y_4 + S1y_4); - - const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); - const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); - const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); - const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - const auto Wx_2_4 = HALF * (S1x_2 - S0x_2) * (S0y_4 + S1y_4); - - const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); - const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); - const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); - const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - const auto Wx_3_4 = HALF * (S1x_3 - S0x_3) * (S0y_4 + S1y_4); - - // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S1y_3 - S0y_3); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S1y_3 - S0y_3); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S1y_3 - S0y_3); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S1y_3 - S0y_3); - - const auto Wy_4_0 = HALF * (S1x_4 + S0x_4) * (S1y_0 - S0y_0); - const auto Wy_4_1 = HALF * (S1x_4 + S0x_4) * (S1y_1 - S0y_1); - const auto Wy_4_2 = HALF * (S1x_4 + S0x_4) * (S1y_2 - S0y_2); - const auto Wy_4_3 = HALF * (S1x_4 + S0x_4) * (S1y_3 - S0y_3); - - // Unrolled calculations for Wz - const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + - S0y_0 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + - S0y_1 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + - S0y_2 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + - S0y_3 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_4 = THIRD * (S1y_4 * (HALF * S0x_0 + S1x_0) + - S0y_4 * (HALF * S1x_0 + S0x_0)); - - const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + - S0y_0 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + - S0y_1 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + - S0y_2 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + - S0y_3 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_4 = THIRD * (S1y_4 * (HALF * S0x_1 + S1x_1) + - S0y_4 * (HALF * S1x_1 + S0x_1)); - - const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + - S0y_0 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + - S0y_1 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + - S0y_2 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + - S0y_3 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_4 = THIRD * (S1y_4 * (HALF * S0x_2 + S1x_2) + - S0y_4 * (HALF * S1x_2 + S0x_2)); - - const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + - S0y_0 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + - S0y_1 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + - S0y_2 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + - S0y_3 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_4 = THIRD * (S1y_4 * (HALF * S0x_3 + S1x_3) + - S0y_4 * (HALF * S1x_3 + S0x_3)); - - const auto Wz_4_0 = THIRD * (S1y_0 * (HALF * S0x_4 + S1x_4) + - S0y_0 * (HALF * S1x_4 + S0x_4)); - const auto Wz_4_1 = THIRD * (S1y_1 * (HALF * S0x_4 + S1x_4) + - S0y_1 * (HALF * S1x_4 + S0x_4)); - const auto Wz_4_2 = THIRD * (S1y_2 * (HALF * S0x_4 + S1x_4) + - S0y_2 * (HALF * S1x_4 + S0x_4)); - const auto Wz_4_3 = THIRD * (S1y_3 * (HALF * S0x_4 + S1x_4) + - S0y_3 * (HALF * S1x_4 + S0x_4)); - const auto Wz_4_4 = THIRD * (S1y_4 * (HALF * S0x_4 + S1x_4) + - S0y_4 * (HALF * S1x_4 + S0x_4)); - - const real_t Qdxdt = coeff * inv_dt; - const real_t Qdydt = coeff * inv_dt; - const real_t QVz = coeff * inv_dt * vp[2]; - - // Esirkepov - Eq. 39 - // x-component - const auto jx_0_0 = -Qdxdt * Wx_0_0; - const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; - const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; - const auto jx_3_0 = jx_2_0 - Qdxdt * Wx_3_0; - - const auto jx_0_1 = -Qdxdt * Wx_0_1; - const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; - const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; - const auto jx_3_1 = jx_2_1 - Qdxdt * Wx_3_1; - - const auto jx_0_2 = -Qdxdt * Wx_0_2; - const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; - const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; - const auto jx_3_2 = jx_2_2 - Qdxdt * Wx_3_2; - - const auto jx_0_3 = -Qdxdt * Wx_0_3; - const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; - const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; - const auto jx_3_3 = jx_2_3 - Qdxdt * Wx_3_3; - - const auto jx_0_4 = -Qdxdt * Wx_0_4; - const auto jx_1_4 = jx_0_4 - Qdxdt * Wx_1_4; - const auto jx_2_4 = jx_1_4 - Qdxdt * Wx_2_4; - const auto jx_3_4 = jx_2_4 - Qdxdt * Wx_3_4; - - // y-component - const auto jy_0_0 = -Qdydt * Wy_0_0; - const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; - const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; - const auto jy_0_3 = jy_0_2 - Qdydt * Wy_0_3; - - const auto jy_1_0 = -Qdydt * Wy_1_0; - const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; - const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; - const auto jy_1_3 = jy_1_2 - Qdydt * Wy_1_3; - - const auto jy_2_0 = -Qdydt * Wy_2_0; - const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; - const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; - const auto jy_2_3 = jy_2_2 - Qdydt * Wy_2_3; - - const auto jy_3_0 = -Qdydt * Wy_3_0; - const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; - const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; - const auto jy_3_3 = jy_3_2 - Qdydt * Wy_3_3; - - const auto jy_4_0 = -Qdydt * Wy_4_0; - const auto jy_4_1 = jy_4_0 - Qdydt * Wy_4_1; - const auto jy_4_2 = jy_4_1 - Qdydt * Wy_4_2; - const auto jy_4_3 = jy_4_2 - Qdydt * Wy_4_3; - - /* - Current update - */ - auto J_acc = J.access(); - - /* - x - component - */ - J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += jx_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += jx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_2_3; - - if (update_x3) { - J_acc(ix_min + 3, iy_min, cur::jx1) += jx_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += jx_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += jx_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx1) += jx_3_3; - } - - if (update_y3) { - J_acc(ix_min, iy_min + 4, cur::jx1) += jx_0_4; - J_acc(ix_min + 1, iy_min + 4, cur::jx1) += jx_1_4; - J_acc(ix_min + 2, iy_min + 4, cur::jx1) += jx_2_4; - } - - if (update_x3 && update_y3) { - J_acc(ix_min + 3, iy_min + 4, cur::jx1) += jx_3_4; - } - - /* - y - component - */ - J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; - J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; - J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; - J_acc(ix_min + 3, iy_min, cur::jx2) += jy_3_0; - - J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_3_1; - - J_acc(ix_min, iy_min + 2, cur::jx2) += jy_0_2; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; - - if (update_x3) { - J_acc(ix_min + 4, iy_min, cur::jx2) += jy_4_0; - J_acc(ix_min + 4, iy_min + 1, cur::jx2) += jy_4_1; - J_acc(ix_min + 4, iy_min + 2, cur::jx2) += jy_4_2; - } - - if (update_y3) { - J_acc(ix_min, iy_min + 3, cur::jx2) += jy_0_3; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += jy_1_3; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += jy_2_3; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += jy_3_3; - } - - if (update_x3 && update_y3) { - J_acc(ix_min + 4, iy_min + 3, cur::jx2) += jy_4_3; - } - /* - z - component, simulated direction - */ - J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; - - if (update_x3) { - J_acc(ix_min + 4, iy_min, cur::jx3) += QVz * Wz_4_0; - J_acc(ix_min + 4, iy_min + 1, cur::jx3) += QVz * Wz_4_1; - J_acc(ix_min + 4, iy_min + 2, cur::jx3) += QVz * Wz_4_2; - J_acc(ix_min + 4, iy_min + 3, cur::jx3) += QVz * Wz_4_3; - } - - if (update_y3) { - J_acc(ix_min, iy_min + 4, cur::jx3) += QVz * Wz_0_4; - J_acc(ix_min + 1, iy_min + 4, cur::jx3) += QVz * Wz_1_4; - J_acc(ix_min + 2, iy_min + 4, cur::jx3) += QVz * Wz_2_4; - J_acc(ix_min + 3, iy_min + 4, cur::jx3) += QVz * Wz_3_4; - } - if (update_x3 && update_y3) { - J_acc(ix_min + 4, iy_min + 4, cur::jx3) += QVz * Wz_4_4; - } - - } // dim -> ToDo: 3D! - } else if constexpr ((O > 3u) && (O < 5u)) { + // } else if constexpr (O == 2u) { + // /* + // * Higher order charge conserving current deposition based on + // * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + // **/ + + // // iS -> shape function for init position + // // fS -> shape function for final position + + // // shape function at integer points (one coeff is always ZERO) + // int i1_min; + // real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; + // real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; + + // // clang-format off + // prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), + // i1(p), static_cast(dx1(p)), + // i1_min, + // iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, + // fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); + // // clang-format on + + // if constexpr (D == Dim::_1D) { + // raise::KernelNotImplementedError(HERE); + // } else if constexpr (D == Dim::_2D) { + + // // shape function at integer points (one coeff is always ZERO) + // int i2_min; + // real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; + // real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; + + // // clang-format off + // prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), + // i2(p), static_cast(dx2(p)), + // i2_min, + // iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, + // fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); + // // clang-format on + // // x1-components + // const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); + // const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); + // const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); + // const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); + + // const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); + // const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); + // const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); + // const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); + + // const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); + // const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); + // const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); + // const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); + + // const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); + // const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); + // const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); + // const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); + + // // x2-components + // const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); + // const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); + // const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); + // const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); + + // const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); + // const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); + // const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); + // const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); + + // const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); + // const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); + // const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); + // const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); + + // const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); + // const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); + // const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); + // const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); + + // // x3-components + // const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + + // iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); + // const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + + // iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); + // const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + + // iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); + // const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + + // iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); + + // const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + + // iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); + // const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + + // iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); + // const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + + // iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); + // const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + + // iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); + + // const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + + // iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); + // const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + + // iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); + // const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + + // iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); + // const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + + // iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); + + // const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + + // iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); + // const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + + // iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); + // const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + + // iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); + // const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + + // iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); + + // // x1-component + // const auto jx1_00 = Wx1_00; + // const auto jx1_10 = jx1_00 + Wx1_10; + // const auto jx1_20 = jx1_10 + Wx1_20; + // const auto jx1_30 = jx1_20 + Wx1_30; + + // const auto jx1_01 = Wx1_01; + // const auto jx1_11 = jx1_01 + Wx1_11; + // const auto jx1_21 = jx1_11 + Wx1_21; + // const auto jx1_31 = jx1_21 + Wx1_31; + + // const auto jx1_02 = Wx1_02; + // const auto jx1_12 = jx1_02 + Wx1_12; + // const auto jx1_22 = jx1_12 + Wx1_22; + // const auto jx1_32 = jx1_22 + Wx1_32; + + // const auto jx1_03 = Wx1_03; + // const auto jx1_13 = jx1_03 + Wx1_13; + // const auto jx1_23 = jx1_13 + Wx1_23; + // const auto jx1_33 = jx1_23 + Wx1_33; + + // // y-component + // const auto jx2_00 = Wx2_00; + // const auto jx2_01 = jx2_00 + Wx2_01; + // const auto jx2_02 = jx2_01 + Wx2_02; + // const auto jx2_03 = jx2_02 + Wx2_03; + + // const auto jx2_10 = Wx2_10; + // const auto jx2_11 = jx2_10 + Wx2_11; + // const auto jx2_12 = jx2_11 + Wx2_12; + // const auto jx2_13 = jx2_12 + Wx2_13; + + // const auto jx2_20 = Wx2_20; + // const auto jx2_21 = jx2_20 + Wx2_21; + // const auto jx2_22 = jx2_21 + Wx2_22; + // const auto jx2_23 = jx2_22 + Wx2_23; + + // const auto jx2_30 = Wx2_30; + // const auto jx2_31 = jx2_30 + Wx2_31; + // const auto jx2_32 = jx2_31 + Wx2_32; + // const auto jx2_33 = jx2_32 + Wx2_33; + + // i1_min += N_GHOSTS; + // i2_min += N_GHOSTS; + + // // @TODO: not sure about the signs here + // const real_t Qdx1dt = -coeff * inv_dt; + // const real_t Qdx2dt = -coeff * inv_dt; + // const real_t QVx3 = coeff * vp[2]; + + // auto J_acc = J.access(); + + // // x1-currents + // J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; + // J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; + // J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; + // J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; + + // J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; + // J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; + // J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; + // J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; + + // J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; + // J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; + // J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; + // J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; + + // J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; + // J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; + // J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; + // J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; + + // // x2-currents + // J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; + // J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; + // J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; + // J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; + + // J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; + // J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; + // J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; + // J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; + + // J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; + // J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; + // J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; + // J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; + + // J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; + // J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; + // J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; + // J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; + + // // x3-currents + // J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; + // J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; + // J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; + // J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; + + // J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; + // J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; + // J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; + // J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; + + // J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; + // J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; + // J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; + // J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; + + // J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; + // J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; + // J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; + // J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; + + // } else if constexpr (D == Dim::_3D) { + // raise::KernelNotImplementedError(HERE); + // } // dimension + + } else if constexpr ((O > 1u) && (O < 6u)) { // shape function in dim1 -> always required - real_t iS_x1[O + 2], fS_x1[O + 2]; + real_t iS_x1[O + 2], fS_x1[O + 2]; // indices of the shape function - ncells_t i1_min; + int i1_min, i1_max; // call shape function prtl_shape::for_deposit(i1_prev(p), @@ -1903,31 +639,34 @@ namespace kernel { i1(p), static_cast(dx1(p)), i1_min, + i1_max, iS_x1, fS_x1); if constexpr (D == Dim::_1D) { // ToDo + raise::KernelNotImplementedError(HERE); } else if constexpr (D == Dim::_2D) { // shape function in dim1 -> always required - real_t iS_x2[O + 2], fS_x2[O + 2]; + real_t iS_x2[O + 2], fS_x2[O + 2]; // indices of the shape function - ncells_t i2_min; + int i2_min, i2_max; // call shape function prtl_shape::for_deposit(i2_prev(p), - static_cast(dx2_prev(p)), - i2(p), - static_cast(dx2(p)), - i2_min, - iS_x2, - fS_x2); + static_cast(dx2_prev(p)), + i2(p), + static_cast(dx2(p)), + i2_min, + i2_max, + iS_x2, + fS_x2); // define weight tensors - real_t Wx[O + 2][O + 2]; - real_t Wy[O + 2][O + 2]; - real_t Wz[O + 2][O + 2]; + real_t Wx1[O + 2][O + 2]; + real_t Wx2[O + 2][O + 2]; + real_t Wx3[O + 2][O + 2]; // Calculate weight function #pragma unroll @@ -1935,21 +674,28 @@ namespace kernel { #pragma unroll for (int j = 0; j < O + 2; ++j) { // Esirkepov 2001, Eq. 38 - Wx[i][j] = (fS_x1[i] - iS_x1[i]) * - (iS_x2[j] + HALF * (fS_x2[j] - iS_x2[j])); + Wx1[i][j] = (fS_x1[i] - iS_x1[i]) * + (iS_x2[j] + HALF * (fS_x2[j] - iS_x2[j])); + + Wx2[i][j] = (fS_x2[j] - iS_x2[j]) * + (iS_x2[j] + HALF * (fS_x1[i] - iS_x1[i])); + + Wx3[i][j] = iS_x1[i] * iS_x2[j] + + HALF * (fS_x1[i] - fS_x1[i]) * iS_x2[j] + + HALF * iS_x1[i] * (fS_x2[j] - iS_x2[j]) + + THIRD * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - Wy[i][j] = (fS_x2[j] - iS_x2[j]) * - (iS_x2[j] + HALF * (fS_x1[i] - iS_x1[i])); + // Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - Wz[i][j] = iS_x1[i] * iS_x2[j] + - HALF * (fS_x1[i] - fS_x1[i]) * iS_x2[j] + - HALF * iS_x1[i] * (fS_x2[j] - iS_x2[j]) + - THIRD * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] - iS_x2[j]); + // Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); + + // Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x2[j]) + + // iS_x2[j] * (HALF * fS_x2[j] + iS_x2[i])); } } // contribution within the shape function stencil - real_t jx[O + 2][O + 2], jy[O + 2][O + 2], jz[O + 2][O + 2]; + real_t jx1[O + 2][O + 2], jx2[O + 2][O + 2], jx3[O + 2][O + 2]; // prefactors for j update const real_t Qdx1dt = -coeff * inv_dt; @@ -1958,80 +704,95 @@ namespace kernel { // Calculate current contribution - // jx + // jx1 #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx[0][j] = Wx[0][j]; + jx1[0][j] = Wx1[0][j]; } #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx[i][j] = jx[i - 1][j] + Wx[i][j]; + jx1[i][j] = jx1[i - 1][j] + Wx1[i][j]; } } - // jy + // jx2 #pragma unroll for (int i = 0; i < O + 2; ++i) { - jy[i][0] = Wy[i][0]; + jx2[i][0] = Wx2[i][0]; } #pragma unroll for (int j = 1; j < O + 2; ++j) { #pragma unroll for (int i = 0; i < O + 2; ++i) { - jy[i][j] = jy[i][j - 1] + Wy[i][j]; + jx2[i][j] = jx2[i][j - 1] + Wx2[i][j]; } } - // jz + // jx3 #pragma unroll for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jz[i][j] = Wz[i][j]; + jx3[i][j] = Wx3[i][j]; } } // account for ghost cells i1_min += N_GHOSTS; i2_min += N_GHOSTS; + i1_max += N_GHOSTS; + i2_max += N_GHOSTS; + + // get number of update indices for asymmetric movement + const int di_x1 = i1_max - i1_min; + const int di_x2 = i2_max - i2_min; /* Current update */ auto J_acc = J.access(); -#pragma unroll - for (int i = 0; i < O + 2; ++i) { -#pragma unroll - for (int j = 0; j < O + 2; ++j) { - J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx[i][j]; - J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jy[i][j]; - J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jz[i][j]; + for (int i = 0; i < di_x1; ++i) { + for (int j = 0; j < di_x2; ++j) { + J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx1[i][j]; + } + } + + for (int i = 0; i < di_x1; ++i) { + for (int j = 0; j < di_x2; ++j) { + J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jx2[i][j]; + } + } + + for (int i = 0; i < di_x1; ++i) { + for (int j = 0; j < di_x2; ++j) { + J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jx3[i][j]; } } } else if constexpr (D == Dim::_3D) { // shape function in dim2 - real_t iS_x2[O + 2], fS_x2[O + 2]; + real_t iS_x2[O + 2], fS_x2[O + 2]; // indices of the shape function - ncells_t i2_min; + int i2_min, i2_max; // call shape function prtl_shape::for_deposit(i2_prev(p), static_cast(dx2_prev(p)), i2(p), static_cast(dx2(p)), i2_min, + i2_max, iS_x2, fS_x2); // shape function in dim3 - real_t iS_x3[O + 2], fS_x3[O + 2]; + real_t iS_x3[O + 2], fS_x3[O + 2]; // indices of the shape function - ncells_t i3_min; + int i3_min, i3_max; // call shape function prtl_shape::for_deposit(i3_prev(p), @@ -2039,13 +800,14 @@ namespace kernel { i3(p), static_cast(dx3(p)), i3_min, + i3_max, iS_x3, fS_x3); // define weight tensors - real_t Wx[O + 1][O + 1][O + 1]; - real_t Wy[O + 1][O + 1][O + 1]; - real_t Wz[O + 1][O + 1][O + 1]; + real_t Wx1[O + 2][O + 2][O + 2]; + real_t Wx2[O + 2][O + 2][O + 2]; + real_t Wx3[O + 2][O + 2][O + 2]; // Calculate weight function #pragma unroll @@ -2055,24 +817,24 @@ namespace kernel { #pragma unroll for (int k = 0; k < O + 2; ++k) { // Esirkepov 2001, Eq. 31 - Wx[i][j][k] = THIRD * (fS_x1[i] - iS_x1[i]) * - ((iS_x2[j] * iS_x3[k] + fS_x2[j] * fS_x3[k]) + - HALF * (iS_x3[k] * fS_x2[j] + iS_x2[j] * fS_x3[k])); + Wx1[i][j][k] = THIRD * (fS_x1[i] - iS_x1[i]) * + ((iS_x2[j] * iS_x3[k] + fS_x2[j] * fS_x3[k]) + + HALF * (iS_x3[k] * fS_x2[j] + iS_x2[j] * fS_x3[k])); - Wy[i][j][k] = THIRD * (fS_x2[j] - iS_x2[j]) * - (iS_x1[i] * iS_x3[k] + fS_x1[i] * fS_x3[k] + - HALF * (iS_x3[k] * fS_x1[i] + iS_x1[i] * fS_x3[k])); + Wx2[i][j][k] = THIRD * (fS_x2[j] - iS_x2[j]) * + (iS_x1[i] * iS_x3[k] + fS_x1[i] * fS_x3[k] + + HALF * (iS_x3[k] * fS_x1[i] + iS_x1[i] * fS_x3[k])); - Wz[i][j][k] = THIRD * (fS_x3[k] - iS_x3[k]) * - (iS_x1[i] * iS_x2[j] + fS_x1[i] * fS_x2[j] + - HALF * (iS_x1[i] * fS_x2[j] + iS_x2[j] * fS_x1[i])); + Wx3[i][j][k] = THIRD * (fS_x3[k] - iS_x3[k]) * + (iS_x1[i] * iS_x2[j] + fS_x1[i] * fS_x2[j] + + HALF * (iS_x1[i] * fS_x2[j] + iS_x2[j] * fS_x1[i])); } } } // contribution within the shape function stencil - real_t jx[O + 2][O + 2][O + 2], jy[O + 2][O + 2][O + 2], - jz[O + 2][O + 2][O + 2]; + real_t jx1[O + 2][O + 2][O + 2], jx2[O + 2][O + 2][O + 2], + jx3[O + 2][O + 2][O + 2]; // prefactors to j update const real_t Qdxdt = coeff * inv_dt; @@ -2081,12 +843,12 @@ namespace kernel { // Calculate current contribution - // jx + // jx1 #pragma unroll for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 0; k < O + 2; ++k) { - jx[0][j][k] = -Qdxdt * Wx[0][j][k]; + jx1[0][j][k] = -Qdxdt * Wx1[0][j][k]; } } @@ -2096,17 +858,17 @@ namespace kernel { for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 0; j < O + 2; ++k) { - jx[i][j][k] = jx[i - 1][j][k] - Qdxdt * Wx[i][j][k]; + jx1[i][j][k] = jx1[i - 1][j][k] - Qdxdt * Wx1[i][j][k]; } } } - // jy + // jx2 #pragma unroll for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int k = 0; k < O + 2; ++k) { - jy[i][0][k] = -Qdydt * Wy[i][0][k]; + jx2[i][0][k] = -Qdydt * Wx2[i][0][k]; } } @@ -2116,17 +878,17 @@ namespace kernel { for (int j = 1; j < O + 2; ++j) { #pragma unroll for (int k = 0; k < O + 2; ++k) { - jy[i][j][k] = jy[i][j - 1][k] - Qdydt * Wy[i][j][k]; + jx2[i][j][k] = jx2[i][j - 1][k] - Qdydt * Wx2[i][j][k]; } } } - // jz + // jx3 #pragma unroll for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jy[i][j][0] = -Qdydt * Wy[i][j][0]; + jx2[i][j][0] = -Qdydt * Wx2[i][j][0]; } } @@ -2136,7 +898,7 @@ namespace kernel { for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 1; k < O + 2; ++k) { - jz[i][j][k] = jz[i][j][k - 1] - Qdzdt * Wz[i][j][k]; + jx3[i][j][k] = jx3[i][j][k - 1] - Qdzdt * Wx3[i][j][k]; } } } @@ -2152,9 +914,9 @@ namespace kernel { for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 1; k < O + 2; ++k) { - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx1) += jx[i][j][k]; - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx2) += jy[i][j][k]; - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx3) += jz[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx1) += jx1[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx2) += jx2[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx3) += jx3[i][j][k]; } } } diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index c793ee67..776271c7 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -19,7 +19,25 @@ namespace prtl_shape { template Inline void order(const int& i, const real_t& di, int& i_min, real_t* S) { - if constexpr (O == 2u) { + if constexpr (O == 1u) { + // S(x) = 1 - |x| |x| < 1 + // 0.0 |x| ≥ 1 + if constexpr (not STAGGERED) { // compute at i positions + i_min = i; + S[0] = ONE - di; + S[1] = di; + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 1; + S[0] = HALF - di; + S[1] = ONE - S[0]; + } else { + i_min = i; + S[1] = static_cast(1.5) - di; + S[0] = ONE - S[1]; + } + } // staggered + } else if constexpr (O == 2u) { // 3/4 - |x|^2 |x| < 1/2 // S(x) = 1/2 * (3/2 - |x|)^2 1/2 ≤ |x| < 3/2 // 0.0 |x| ≥ 3/2 @@ -194,7 +212,7 @@ namespace prtl_shape { real_t& fS_3) { /* - The second order shape function per particle is a 4 element array + The second order shape function per particle is a 4 element array where the shape function contributes to only 3 elements. We need to find which indices are contributing to the shape function For this we first compute the indices of the particle position @@ -262,6 +280,7 @@ namespace prtl_shape { const int& i_fin, const real_t& di_fin, int& i_min, + int& i_max, real_t* iS, real_t* fS) { @@ -294,6 +313,7 @@ namespace prtl_shape { if (i_init_min < i_fin_min) { i_min = i_init_min; + i_max = i_fin_min + O + 1; #pragma unroll for (int j = 0; j < O; j++) { @@ -309,6 +329,7 @@ namespace prtl_shape { } else if (i_init_min > i_fin_min) { i_min = i_fin_min; + i_max = i_init_min + O + 1; iS[0] = ZERO; #pragma unroll @@ -324,6 +345,7 @@ namespace prtl_shape { } else { i_min = i_init_min; + i_max = i_min + O; #pragma unroll for (int j = 0; j < O; j++) { @@ -338,7 +360,6 @@ namespace prtl_shape { fS[O + 1] = ZERO; } } - } // namespace prtl_shape #endif // KERNELS_PARTICLE_SHAPES_HPP From 618578330414cd653cea41d99c4ecc89dcaca3ca Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 6 Aug 2025 14:39:00 -0500 Subject: [PATCH 48/82] bugfix --- src/kernels/particle_shapes.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 776271c7..fa3649db 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -313,7 +313,7 @@ namespace prtl_shape { if (i_init_min < i_fin_min) { i_min = i_init_min; - i_max = i_fin_min + O + 1; + i_max = i_fin_min + O; #pragma unroll for (int j = 0; j < O; j++) { @@ -329,7 +329,7 @@ namespace prtl_shape { } else if (i_init_min > i_fin_min) { i_min = i_fin_min; - i_max = i_init_min + O + 1; + i_max = i_init_min + O; iS[0] = ZERO; #pragma unroll From d55240d8f8c21582c1d2b4fd5cd0e08f13947336 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 6 Aug 2025 15:53:11 -0400 Subject: [PATCH 49/82] fixed old zigzag --- src/kernels/currents_deposit.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index b363b381..bab0bd15 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -156,7 +156,6 @@ namespace kernel { /* Zig-zag deposit */ - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; @@ -247,7 +246,7 @@ namespace kernel { cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); J_acc(i1_prev(p) + N_GHOSTS + 1, i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); + cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); J_acc(i1_prev(p) + N_GHOSTS, i2_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; From 87a80c2c61553ff877d01c48bd7185a707c3e67a Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 7 Aug 2025 14:57:21 -0400 Subject: [PATCH 50/82] esirkepov ranges fixed --- pgens/streaming/pgen.hpp | 11 + src/engines/srpic.hpp | 52 ++-- src/kernels/currents_deposit.hpp | 490 +++++++++++++++---------------- src/kernels/particle_shapes.hpp | 18 +- 4 files changed, 289 insertions(+), 282 deletions(-) diff --git a/pgens/streaming/pgen.hpp b/pgens/streaming/pgen.hpp index ee14712d..a08204ac 100644 --- a/pgens/streaming/pgen.hpp +++ b/pgens/streaming/pgen.hpp @@ -103,6 +103,17 @@ namespace user { domain, injector, densities[n / 2]); + // for (auto& i : { n, n + 1 }) { + // auto& ux2 = domain.species[i].ux2; + // auto& ux3 = domain.species[i].ux3; + // Kokkos::parallel_for( + // "Remove_ux2ux3", + // domain.species[i].npart(), + // Lambda(index_t p) { + // ux2(p) = ZERO; + // ux3(p) = ZERO; + // }); + // } } } }; diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 83c4e9bd..b63415a0 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -507,6 +507,26 @@ namespace ntt { } } + template + void deposit_with(const Particles& species, + const M& metric, + const scatter_ndfield_t& scatter_cur, + real_t dt) { + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", + species.rangeActiveParticles(), + kernel::DepositCurrents_kernel( + scatter_cur, + species.i1, species.i2, species.i3, + species.i1_prev, species.i2_prev, species.i3_prev, + species.dx1, species.dx2, species.dx3, + species.dx1_prev, species.dx2_prev, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.phi, species.weight, species.tag, + metric, (real_t)(species.charge()), dt)); + // clang-format on + } + void CurrentsDeposit(domain_t& domain) { auto scatter_cur = Kokkos::Experimental::create_scatter_view( domain.fields.cur); @@ -523,34 +543,12 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - if (shape_order == 1) { - // clang-format off - Kokkos::parallel_for("CurrentsDeposit", - species.rangeActiveParticles(), - kernel::DepositCurrents_kernel( - scatter_cur, - species.i1, species.i2, species.i3, - species.i1_prev, species.i2_prev, species.i3_prev, - species.dx1, species.dx2, species.dx3, - species.dx1_prev, species.dx2_prev, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.phi, species.weight, species.tag, - domain.mesh.metric, (real_t)(species.charge()), dt)); - // clang-format on + if (shape_order == 0) { + deposit_with<0u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 1) { + deposit_with<1u>(species, domain.mesh.metric, scatter_cur, dt); } else if (shape_order == 2) { - // clang-format off - Kokkos::parallel_for("CurrentsDeposit", - species.rangeActiveParticles(), - kernel::DepositCurrents_kernel( - scatter_cur, - species.i1, species.i2, species.i3, - species.i1_prev, species.i2_prev, species.i3_prev, - species.dx1, species.dx2, species.dx3, - species.dx1_prev, species.dx2_prev, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.phi, species.weight, species.tag, - domain.mesh.metric, (real_t)(species.charge()), dt)); - // clang-format on + deposit_with<2u>(species, domain.mesh.metric, scatter_cur, dt); } else { raise::Error("Invalid shape order for current deposition", HERE); } diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index d8348ed0..51610800 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -166,7 +166,7 @@ namespace kernel { const real_t coeff { weight(p) * charge }; // ToDo: interpolation_order as parameter - if constexpr (O == 1u) { + if constexpr (O == 0u) { /* Zig-zag deposit */ @@ -191,7 +191,6 @@ namespace kernel { auto J_acc = J.access(); - // tuple_t dxp_r; if constexpr (D == Dim::_1D) { const real_t Fx2_1 { HALF * vp[1] * coeff }; const real_t Fx2_2 { HALF * vp[1] * coeff }; @@ -402,244 +401,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - - // } else if constexpr (O == 2u) { - // /* - // * Higher order charge conserving current deposition based on - // * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - // **/ - - // // iS -> shape function for init position - // // fS -> shape function for final position - - // // shape function at integer points (one coeff is always ZERO) - // int i1_min; - // real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; - // real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; - - // // clang-format off - // prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), - // i1(p), static_cast(dx1(p)), - // i1_min, - // iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, - // fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); - // // clang-format on - - // if constexpr (D == Dim::_1D) { - // raise::KernelNotImplementedError(HERE); - // } else if constexpr (D == Dim::_2D) { - - // // shape function at integer points (one coeff is always ZERO) - // int i2_min; - // real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; - // real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; - - // // clang-format off - // prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), - // i2(p), static_cast(dx2(p)), - // i2_min, - // iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, - // fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); - // // clang-format on - // // x1-components - // const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); - // const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); - // const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); - // const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); - - // const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); - // const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); - // const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); - // const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); - - // const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); - // const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); - // const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); - // const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); - - // const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); - // const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); - // const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); - // const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); - - // // x2-components - // const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); - // const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); - // const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); - // const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); - - // const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); - // const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); - // const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); - // const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); - - // const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); - // const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); - // const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); - // const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); - - // const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); - // const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); - // const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); - // const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); - - // // x3-components - // const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + - // iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); - // const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + - // iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); - // const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + - // iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); - // const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + - // iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); - - // const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + - // iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); - // const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + - // iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); - // const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + - // iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); - // const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + - // iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); - - // const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + - // iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); - // const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + - // iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); - // const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + - // iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); - // const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + - // iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); - - // const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + - // iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); - // const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + - // iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); - // const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + - // iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); - // const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + - // iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); - - // // x1-component - // const auto jx1_00 = Wx1_00; - // const auto jx1_10 = jx1_00 + Wx1_10; - // const auto jx1_20 = jx1_10 + Wx1_20; - // const auto jx1_30 = jx1_20 + Wx1_30; - - // const auto jx1_01 = Wx1_01; - // const auto jx1_11 = jx1_01 + Wx1_11; - // const auto jx1_21 = jx1_11 + Wx1_21; - // const auto jx1_31 = jx1_21 + Wx1_31; - - // const auto jx1_02 = Wx1_02; - // const auto jx1_12 = jx1_02 + Wx1_12; - // const auto jx1_22 = jx1_12 + Wx1_22; - // const auto jx1_32 = jx1_22 + Wx1_32; - - // const auto jx1_03 = Wx1_03; - // const auto jx1_13 = jx1_03 + Wx1_13; - // const auto jx1_23 = jx1_13 + Wx1_23; - // const auto jx1_33 = jx1_23 + Wx1_33; - - // // y-component - // const auto jx2_00 = Wx2_00; - // const auto jx2_01 = jx2_00 + Wx2_01; - // const auto jx2_02 = jx2_01 + Wx2_02; - // const auto jx2_03 = jx2_02 + Wx2_03; - - // const auto jx2_10 = Wx2_10; - // const auto jx2_11 = jx2_10 + Wx2_11; - // const auto jx2_12 = jx2_11 + Wx2_12; - // const auto jx2_13 = jx2_12 + Wx2_13; - - // const auto jx2_20 = Wx2_20; - // const auto jx2_21 = jx2_20 + Wx2_21; - // const auto jx2_22 = jx2_21 + Wx2_22; - // const auto jx2_23 = jx2_22 + Wx2_23; - - // const auto jx2_30 = Wx2_30; - // const auto jx2_31 = jx2_30 + Wx2_31; - // const auto jx2_32 = jx2_31 + Wx2_32; - // const auto jx2_33 = jx2_32 + Wx2_33; - - // i1_min += N_GHOSTS; - // i2_min += N_GHOSTS; - - // // @TODO: not sure about the signs here - // const real_t Qdx1dt = -coeff * inv_dt; - // const real_t Qdx2dt = -coeff * inv_dt; - // const real_t QVx3 = coeff * vp[2]; - - // auto J_acc = J.access(); - - // // x1-currents - // J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; - // J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; - // J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; - // J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; - - // J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; - // J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; - // J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; - // J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; - - // J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; - // J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; - // J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; - // J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; - - // J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; - // J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; - // J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; - // J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; - - // // x2-currents - // J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; - // J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; - // J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; - // J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; - - // J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; - // J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; - // J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; - // J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; - - // J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; - // J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; - // J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; - // J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; - - // J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; - // J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; - // J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; - // J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; - - // // x3-currents - // J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; - // J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; - // J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; - // J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; - - // J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; - // J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; - // J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; - // J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; - - // J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; - // J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; - // J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; - // J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; - - // J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; - // J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; - // J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; - // J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; - - // } else if constexpr (D == Dim::_3D) { - // raise::KernelNotImplementedError(HERE); - // } // dimension - - } else if constexpr ((O > 1u) && (O < 6u)) { + } else if constexpr ((O >= 1u) and (O <= 5u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; @@ -769,20 +531,20 @@ namespace kernel { */ auto J_acc = J.access(); - for (int i = 0; i < di_x1; ++i) { - for (int j = 0; j < di_x2; ++j) { + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx1[i][j]; } } - for (int i = 0; i < di_x1; ++i) { - for (int j = 0; j < di_x2; ++j) { + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jx2[i][j]; } } - for (int i = 0; i < di_x1; ++i) { - for (int j = 0; j < di_x2; ++j) { + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jx3[i][j]; } } @@ -943,5 +705,241 @@ namespace kernel { } // namespace kernel #undef i_di_to_Xi +// +// } else if constexpr (O == 2u) { +// /* +// * Higher order charge conserving current deposition based on +// * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract +// **/ + +// // iS -> shape function for init position +// // fS -> shape function for final position + +// // shape function at integer points (one coeff is always ZERO) +// int i1_min; +// real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; +// real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; + +// // clang-format off +// prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), +// i1(p), static_cast(dx1(p)), +// i1_min, +// iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, +// fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); +// // clang-format on + +// if constexpr (D == Dim::_1D) { +// raise::KernelNotImplementedError(HERE); +// } else if constexpr (D == Dim::_2D) { + +// // shape function at integer points (one coeff is always ZERO) +// int i2_min; +// real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; +// real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; + +// // clang-format off +// prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), +// i2(p), static_cast(dx2(p)), +// i2_min, +// iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, +// fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); +// // clang-format on +// // x1-components +// const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); +// const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); +// const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); +// const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); + +// const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); +// const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); +// const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); +// const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); + +// const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); +// const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); +// const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); +// const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); + +// const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); +// const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); +// const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); +// const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); + +// // x2-components +// const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); +// const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); +// const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); +// const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); + +// const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); +// const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); +// const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); +// const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); + +// const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); +// const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); +// const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); +// const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); + +// const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); +// const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); +// const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); +// const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); + +// // x3-components +// const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + +// iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); +// const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + +// iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); +// const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + +// iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); +// const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + +// iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); + +// const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + +// iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); +// const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + +// iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); +// const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + +// iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); +// const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + +// iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); + +// const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + +// iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); +// const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + +// iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); +// const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + +// iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); +// const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + +// iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); + +// const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + +// iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); +// const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + +// iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); +// const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + +// iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); +// const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + +// iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); + +// // x1-component +// const auto jx1_00 = Wx1_00; +// const auto jx1_10 = jx1_00 + Wx1_10; +// const auto jx1_20 = jx1_10 + Wx1_20; +// const auto jx1_30 = jx1_20 + Wx1_30; + +// const auto jx1_01 = Wx1_01; +// const auto jx1_11 = jx1_01 + Wx1_11; +// const auto jx1_21 = jx1_11 + Wx1_21; +// const auto jx1_31 = jx1_21 + Wx1_31; + +// const auto jx1_02 = Wx1_02; +// const auto jx1_12 = jx1_02 + Wx1_12; +// const auto jx1_22 = jx1_12 + Wx1_22; +// const auto jx1_32 = jx1_22 + Wx1_32; + +// const auto jx1_03 = Wx1_03; +// const auto jx1_13 = jx1_03 + Wx1_13; +// const auto jx1_23 = jx1_13 + Wx1_23; +// const auto jx1_33 = jx1_23 + Wx1_33; + +// // y-component +// const auto jx2_00 = Wx2_00; +// const auto jx2_01 = jx2_00 + Wx2_01; +// const auto jx2_02 = jx2_01 + Wx2_02; +// const auto jx2_03 = jx2_02 + Wx2_03; + +// const auto jx2_10 = Wx2_10; +// const auto jx2_11 = jx2_10 + Wx2_11; +// const auto jx2_12 = jx2_11 + Wx2_12; +// const auto jx2_13 = jx2_12 + Wx2_13; + +// const auto jx2_20 = Wx2_20; +// const auto jx2_21 = jx2_20 + Wx2_21; +// const auto jx2_22 = jx2_21 + Wx2_22; +// const auto jx2_23 = jx2_22 + Wx2_23; + +// const auto jx2_30 = Wx2_30; +// const auto jx2_31 = jx2_30 + Wx2_31; +// const auto jx2_32 = jx2_31 + Wx2_32; +// const auto jx2_33 = jx2_32 + Wx2_33; + +// i1_min += N_GHOSTS; +// i2_min += N_GHOSTS; + +// // @TODO: not sure about the signs here +// const real_t Qdx1dt = -coeff * inv_dt; +// const real_t Qdx2dt = -coeff * inv_dt; +// const real_t QVx3 = coeff * vp[2]; + +// auto J_acc = J.access(); + +// // x1-currents +// J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; +// J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; +// J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; +// J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; + +// J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; +// J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; +// J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; +// J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; + +// J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; +// J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; +// J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; +// J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; + +// J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; +// J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; +// J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; +// J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; + +// // x2-currents +// J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; +// J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; +// J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; +// J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; + +// J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; +// J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; +// J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; +// J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; + +// J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; +// J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; +// J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; +// J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; + +// J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; +// J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; +// J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; +// J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; + +// // x3-currents +// J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; +// J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; +// J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; +// J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; + +// J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; +// J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; +// J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; +// J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; + +// J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; +// J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; +// J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; +// J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; + +// J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; +// J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; +// J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; +// J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; + +// } else if constexpr (D == Dim::_3D) { +// raise::KernelNotImplementedError(HERE); +// } // dimension #endif // KERNELS_CURRENTS_DEPOSIT_HPP diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index fa3649db..7d626c9d 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -18,7 +18,7 @@ namespace prtl_shape { template - Inline void order(const int& i, const real_t& di, int& i_min, real_t* S) { + Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { if constexpr (O == 1u) { // S(x) = 1 - |x| |x| < 1 // 0.0 |x| ≥ 1 @@ -281,8 +281,8 @@ namespace prtl_shape { const real_t& di_fin, int& i_min, int& i_max, - real_t* iS, - real_t* fS) { + real_t iS[O + 2], + real_t fS[O + 2]) { /* The N-th order shape function per particle is a N+2 element array @@ -316,14 +316,14 @@ namespace prtl_shape { i_max = i_fin_min + O; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { iS[j] = iS_[j]; } iS[O + 1] = ZERO; fS[0] = ZERO; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { fS[j + 1] = fS_[j]; } @@ -333,12 +333,12 @@ namespace prtl_shape { iS[0] = ZERO; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { iS[j + 1] = iS_[j]; } #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { fS[j] = fS_[j]; } fS[O + 1] = ZERO; @@ -348,13 +348,13 @@ namespace prtl_shape { i_max = i_min + O; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { iS[j] = iS_[j]; } iS[O + 1] = ZERO; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { fS[j] = fS_[j]; } fS[O + 1] = ZERO; From 877e9f54b4395582e744817bf7c42de615a0d205 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Tue, 12 Aug 2025 18:17:30 -0500 Subject: [PATCH 51/82] generalized field interpolation to arbitrary order (wip) --- src/kernels/particle_pusher_sr.hpp | 1308 +++++++++++----------------- 1 file changed, 522 insertions(+), 786 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index dff92677..873f488c 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -22,6 +22,8 @@ #include "utils/error.h" #include "utils/numeric.h" +#include "particle_shapes.hpp" + #if defined(MPI_ENABLED) #include "arch/mpi_tags.h" #endif @@ -30,7 +32,9 @@ /* Local macros */ /* -------------------------------------------------------------------------- */ #define from_Xi_to_i(XI, I) \ - { I = static_cast((XI + 1)) - 1; } + { \ + I = static_cast((XI + 1)) - 1; \ + } #define from_Xi_to_i_di(XI, I, DI) \ { \ @@ -473,9 +477,9 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - // getInterpFlds(p, ei, bi); - // ToDo: Better way to call this - // getInterpFlds2nd(p, ei, bi); + // field interpolation 1st-6th order + //getInterpFlds(p, ei, bi); + for (auto i { 0u }; i < 3u; ++i) { ei[i] = ZERO; bi[i] = ZERO; @@ -834,791 +838,523 @@ namespace kernel::sr { Inline void getInterpFlds(index_t& p, vec_t& e0, vec_t& b0) const { - if constexpr (D == Dim::_1D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - - // direct interpolation - Arno - int indx = static_cast(dx1_ + HALF); - - // first order - real_t c0, c1; - - real_t ponpmx = ONE - dx1_; - real_t ponppx = dx1_; - - real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); - real_t pondpx = ONE - pondmx; - - // Ex1 - // Interpolate --- (dual) - c0 = EB(i - 1 + indx, em::ex1); - c1 = EB(i + indx, em::ex1); - e0[0] = c0 * pondmx + c1 * pondpx; - // Ex2 - // Interpolate --- (primal) - c0 = EB(i, em::ex2); - c1 = EB(i + 1, em::ex2); - e0[1] = c0 * ponpmx + c1 * ponppx; - // Ex3 - // Interpolate --- (primal) - c0 = EB(i, em::ex3); - c1 = EB(i + 1, em::ex3); - e0[2] = c0 * ponpmx + c1 * ponppx; - // Bx1 - // Interpolate --- (primal) - c0 = EB(i, em::bx1); - c1 = EB(i + 1, em::bx1); - b0[0] = c0 * ponpmx + c1 * ponppx; - // Bx2 - // Interpolate --- (dual) - c0 = EB(i - 1 + indx, em::bx2); - c1 = EB(i + indx, em::bx2); - b0[1] = c0 * pondmx + c1 * pondpx; - // Bx3 - // Interpolate --- (dual) - c0 = EB(i - 1 + indx, em::bx3); - c1 = EB(i + indx, em::bx3); - b0[2] = c0 * pondmx + c1 * pondpx; - } else if constexpr (D == Dim::_2D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - - // direct interpolation - Arno - int indx = static_cast(dx1_ + HALF); - int indy = static_cast(dx2_ + HALF); - - // first order - real_t c000, c100, c010, c110, c00, c10; - - real_t ponpmx = ONE - dx1_; - real_t ponppx = dx1_; - real_t ponpmy = ONE - dx2_; - real_t ponppy = dx2_; - - real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); - real_t pondpx = ONE - pondmx; - real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); - real_t pondpy = ONE - pondmy; - - // Ex1 - // Interpolate --- (dual, primal) - c000 = EB(i - 1 + indx, j, em::ex1); - c100 = EB(i + indx, j, em::ex1); - c010 = EB(i - 1 + indx, j + 1, em::ex1); - c110 = EB(i + indx, j + 1, em::ex1); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - e0[0] = c00 * ponpmy + c10 * ponppy; - // Ex2 - // Interpolate -- (primal, dual) - c000 = EB(i, j - 1 + indy, em::ex2); - c100 = EB(i + 1, j - 1 + indy, em::ex2); - c010 = EB(i, j + indy, em::ex2); - c110 = EB(i + 1, j + indy, em::ex2); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - e0[1] = c00 * pondmy + c10 * pondpy; - // Ex3 - // Interpolate -- (primal, primal) - c000 = EB(i, j, em::ex3); - c100 = EB(i + 1, j, em::ex3); - c010 = EB(i, j + 1, em::ex3); - c110 = EB(i + 1, j + 1, em::ex3); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - e0[2] = c00 * ponpmy + c10 * ponppy; - - // Bx1 - // Interpolate -- (primal, dual) - c000 = EB(i, j - 1 + indy, em::bx1); - c100 = EB(i + 1, j - 1 + indy, em::bx1); - c010 = EB(i, j + indy, em::bx1); - c110 = EB(i + 1, j + indy, em::bx1); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - b0[0] = c00 * pondmy + c10 * pondpy; - // Bx2 - // Interpolate -- (dual, primal) - c000 = EB(i - 1 + indx, j, em::bx2); - c100 = EB(i + indx, j, em::bx2); - c010 = EB(i - 1 + indx, j + 1, em::bx2); - c110 = EB(i + indx, j + 1, em::bx2); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - b0[1] = c00 * ponpmy + c10 * ponppy; - // Bx3 - // Interpolate -- (dual, dual) - c000 = EB(i - 1 + indx, j - 1 + indy, em::bx3); - c100 = EB(i + indx, j - 1 + indy, em::bx3); - c010 = EB(i - 1 + indx, j + indy, em::bx3); - c110 = EB(i + indx, j + indy, em::bx3); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - b0[2] = c00 * pondmy + c10 * pondpy; - } else if constexpr (D == Dim::_3D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const int k { i3(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - const auto dx3_ { static_cast(dx3(p)) }; - - // direct interpolation - Arno - int indx = static_cast(dx1_ + HALF); - int indy = static_cast(dx2_ + HALF); - int indz = static_cast(dx3_ + HALF); - - // first order - real_t c000, c100, c010, c110, c001, c101, c011, c111, c00, c10, c01, - c11, c0, c1; - - real_t ponpmx = ONE - dx1_; - real_t ponppx = dx1_; - real_t ponpmy = ONE - dx2_; - real_t ponppy = dx2_; - real_t ponpmz = ONE - dx3_; - real_t ponppz = dx3_; - - real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); - real_t pondpx = ONE - pondmx; - real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); - real_t pondpy = ONE - pondmy; - real_t pondmz = static_cast(indz + ONE) - (dx3_ + HALF); - real_t pondpz = ONE - pondmz; - - // Ex1 - // Interpolate --- (dual, primal, primal) - c000 = EB(i - 1 + indx, j, k, em::ex1); - c100 = EB(i + indx, j, k, em::ex1); - c010 = EB(i - 1 + indx, j + 1, k, em::ex1); - c110 = EB(i + indx, j + 1, k, em::ex1); - c001 = EB(i - 1 + indx, j, k + 1, em::ex1); - c101 = EB(i + indx, j, k + 1, em::ex1); - c011 = EB(i - 1 + indx, j + 1, k + 1, em::ex1); - c111 = EB(i + indx, j + 1, k + 1, em::ex1); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - c0 = c00 * ponpmy + c10 * ponppy; - c01 = c001 * pondmx + c101 * pondpx; - c11 = c011 * pondmx + c111 * pondpx; - c1 = c01 * ponpmy + c11 * ponppy; - e0[0] = c0 * ponpmz + c1 * ponppz; - // Ex2 - // Interpolate -- (primal, dual, primal) - c000 = EB(i, j - 1 + indy, k, em::ex2); - c100 = EB(i + 1, j - 1 + indy, k, em::ex2); - c010 = EB(i, j + indy, k, em::ex2); - c110 = EB(i + 1, j + indy, k, em::ex2); - c001 = EB(i, j - 1 + indy, k + 1, em::ex2); - c101 = EB(i + 1, j - 1 + indy, k + 1, em::ex2); - c011 = EB(i, j + indy, k + 1, em::ex2); - c111 = EB(i + 1, j + indy, k + 1, em::ex2); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - c0 = c00 * pondmy + c10 * pondpy; - c01 = c001 * ponpmx + c101 * ponppx; - c11 = c011 * ponpmx + c111 * ponppx; - c1 = c01 * pondmy + c11 * pondpy; - e0[1] = c0 * ponpmz + c1 * ponppz; - // Ex3 - // Interpolate -- (primal, primal, dual) - c000 = EB(i, j, k - 1 + indz, em::ex3); - c100 = EB(i + 1, j, k - 1 + indz, em::ex3); - c010 = EB(i, j + 1, k - 1 + indz, em::ex3); - c110 = EB(i + 1, j + 1, k - 1 + indz, em::ex3); - c001 = EB(i, j, k + indz, em::ex3); - c101 = EB(i + 1, j, k + indz, em::ex3); - c011 = EB(i, j + 1, k + indz, em::ex3); - c111 = EB(i + 1, j + 1, k + indz, em::ex3); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - c0 = c00 * ponpmy + c10 * ponppy; - c01 = c001 * ponpmx + c101 * ponppx; - c11 = c011 * ponpmx + c111 * ponppx; - c1 = c01 * ponpmy + c11 * ponppy; - e0[2] = c0 * pondmz + c1 * pondpz; - - // Bx1 - // Interpolate -- (primal, dual, dual) - c000 = EB(i, j - 1 + indy, k - 1 + indz, em::bx1); - c100 = EB(i + 1, j - 1 + indy, k - 1 + indz, em::bx1); - c010 = EB(i, j + indy, k - 1 + indz, em::bx1); - c110 = EB(i + 1, j + indy, k - 1 + indz, em::bx1); - c001 = EB(i, j - 1 + indy, k + indz, em::bx1); - c101 = EB(i + 1, j - 1 + indy, k + indz, em::bx1); - c011 = EB(i, j + indy, k + indz, em::bx1); - c111 = EB(i + 1, j + indy, k + indz, em::bx1); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - c0 = c00 * pondmy + c10 * pondpy; - c01 = c001 * ponpmx + c101 * ponppx; - c11 = c011 * ponpmx + c111 * ponppx; - c1 = c01 * pondmy + c11 * pondpy; - b0[0] = c0 * pondmz + c1 * pondpz; - // Bx2 - // Interpolate -- (dual, primal, dual) - c000 = EB(i - 1 + indx, j, k - 1 + indz, em::bx2); - c100 = EB(i + indx, j, k - 1 + indz, em::bx2); - c010 = EB(i - 1 + indx, j + 1, k - 1 + indz, em::bx2); - c110 = EB(i + indx, j + 1, k - 1 + indz, em::bx2); - c001 = EB(i - 1 + indx, j, k + indz, em::bx2); - c101 = EB(i + indx, j, k + indz, em::bx2); - c011 = EB(i - 1 + indx, j + 1, k + indz, em::bx2); - c111 = EB(i + indx, j + 1, k + indz, em::bx2); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - c0 = c00 * ponpmy + c10 * ponppy; - c01 = c001 * pondmx + c101 * pondpx; - c11 = c011 * pondmx + c111 * pondpx; - c1 = c01 * ponpmy + c11 * ponppy; - b0[1] = c0 * pondmz + c1 * pondpz; - // Bx3 - // Interpolate -- (dual, dual, primal) - c000 = EB(i - 1 + indx, j - 1 + indy, k, em::bx3); - c100 = EB(i + indx, j - 1 + indy, k, em::bx3); - c010 = EB(i - 1 + indx, j + indy, k, em::bx3); - c110 = EB(i + indx, j + indy, k, em::bx3); - c001 = EB(i - 1 + indx, j - 1 + indy, k + 1, em::bx3); - c101 = EB(i + indx, j - 1 + indy, k + 1, em::bx3); - c011 = EB(i - 1 + indx, j + indy, k + 1, em::bx3); - c111 = EB(i + indx, j + indy, k + 1, em::bx3); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - c0 = c00 * ponpmy + c10 * ponppy; - c01 = c001 * pondmx + c101 * pondpx; - c11 = c011 * pondmx + c111 * pondpx; - c1 = c01 * ponpmy + c11 * ponppy; - b0[2] = c0 * ponpmz + c1 * ponppz; - } - } - Inline void getInterpFlds2nd(index_t& p, - vec_t& e0, - vec_t& b0) const { - if constexpr (D == Dim::_1D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - - // direct interpolation of staggered grid - // primal = i+ind, dual = i - const int indx = static_cast(static_cast(dx1_ + HALF)); - - // Compute weights for second-order interpolation - // primal - const auto w0p = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1p = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); - const auto w2p = ONE - w0p - w1p; - - // dual - const auto w0d = HALF * SQR(ONE - dx1_); - const auto w2d = HALF * SQR(dx1_); - const auto w1d = ONE - w0d - w2d; - - // Ex1 (dual grid) - const auto ex1_0 = EB(i - 1, em::ex1); - const auto ex1_1 = EB(i, em::ex1); - const auto ex1_2 = EB(i + 1, em::ex1); - e0[0] = ex1_0 * w0d + ex1_1 * w1d + ex1_2 * w2d; - - // Ex2 (primal grid) - const auto ex2_0 = EB(indx + i - 1, em::ex2); - const auto ex2_1 = EB(indx + i, em::ex2); - const auto ex2_2 = EB(indx + i + 1, em::ex2); - e0[1] = ex2_0 * w0p + ex2_1 * w1p + ex2_2 * w2p; - - // Ex3 (primal grid) - const auto ex3_0 = EB(indx + i - 1, em::ex3); - const auto ex3_1 = EB(indx + i, em::ex3); - const auto ex3_2 = EB(indx + i + 1, em::ex3); - e0[2] = ex3_0 * w0p + ex3_1 * w1p + ex3_2 * w2p; - - // Bx1 (primal grid) - const auto bx1_0 = EB(indx + i - 1, em::bx1); - const auto bx1_1 = EB(indx + i, em::bx1); - const auto bx1_2 = EB(indx + i + 1, em::bx1); - b0[0] = bx1_0 * w0p + bx1_1 * w1p + bx1_2 * w2p; - - // Bx2 (dual grid) - const auto bx2_0 = EB(i - 1, em::bx2); - const auto bx2_1 = EB(i, em::bx2); - const auto bx2_2 = EB(i + 1, em::bx2); - b0[1] = bx2_0 * w0d + bx2_1 * w1d + bx2_2 * w2d; - - // Bx3 (dual grid) - const auto bx3_0 = EB(i - 1, em::bx3); - const auto bx3_1 = EB(i, em::bx3); - const auto bx3_2 = EB(i + 1, em::bx3); - b0[2] = bx3_0 * w0d + bx3_1 * w1d + bx3_2 * w2d; + // ToDo: implement template in srpic.hpp + const unsigned int O = 2u; + + // ToDo: change to 1u! + if constexpr (O == 0u) { + + if constexpr (D == Dim::_1D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + + // first order + real_t c0, c1; + + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + + // Ex1 + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::ex1); + c1 = EB(i + indx, em::ex1); + e0[0] = c0 * pondmx + c1 * pondpx; + // Ex2 + // Interpolate --- (primal) + c0 = EB(i, em::ex2); + c1 = EB(i + 1, em::ex2); + e0[1] = c0 * ponpmx + c1 * ponppx; + // Ex3 + // Interpolate --- (primal) + c0 = EB(i, em::ex3); + c1 = EB(i + 1, em::ex3); + e0[2] = c0 * ponpmx + c1 * ponppx; + // Bx1 + // Interpolate --- (primal) + c0 = EB(i, em::bx1); + c1 = EB(i + 1, em::bx1); + b0[0] = c0 * ponpmx + c1 * ponppx; + // Bx2 + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::bx2); + c1 = EB(i + indx, em::bx2); + b0[1] = c0 * pondmx + c1 * pondpx; + // Bx3 + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::bx3); + c1 = EB(i + indx, em::bx3); + b0[2] = c0 * pondmx + c1 * pondpx; + } else if constexpr (D == Dim::_2D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + int indy = static_cast(dx2_ + HALF); + + // first order + real_t c000, c100, c010, c110, c00, c10; + + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + real_t ponpmy = ONE - dx2_; + real_t ponppy = dx2_; + + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); + real_t pondpy = ONE - pondmy; + + // Ex1 + // Interpolate --- (dual, primal) + c000 = EB(i - 1 + indx, j, em::ex1); + c100 = EB(i + indx, j, em::ex1); + c010 = EB(i - 1 + indx, j + 1, em::ex1); + c110 = EB(i + indx, j + 1, em::ex1); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + e0[0] = c00 * ponpmy + c10 * ponppy; + // Ex2 + // Interpolate -- (primal, dual) + c000 = EB(i, j - 1 + indy, em::ex2); + c100 = EB(i + 1, j - 1 + indy, em::ex2); + c010 = EB(i, j + indy, em::ex2); + c110 = EB(i + 1, j + indy, em::ex2); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + e0[1] = c00 * pondmy + c10 * pondpy; + // Ex3 + // Interpolate -- (primal, primal) + c000 = EB(i, j, em::ex3); + c100 = EB(i + 1, j, em::ex3); + c010 = EB(i, j + 1, em::ex3); + c110 = EB(i + 1, j + 1, em::ex3); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + e0[2] = c00 * ponpmy + c10 * ponppy; + + // Bx1 + // Interpolate -- (primal, dual) + c000 = EB(i, j - 1 + indy, em::bx1); + c100 = EB(i + 1, j - 1 + indy, em::bx1); + c010 = EB(i, j + indy, em::bx1); + c110 = EB(i + 1, j + indy, em::bx1); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + b0[0] = c00 * pondmy + c10 * pondpy; + // Bx2 + // Interpolate -- (dual, primal) + c000 = EB(i - 1 + indx, j, em::bx2); + c100 = EB(i + indx, j, em::bx2); + c010 = EB(i - 1 + indx, j + 1, em::bx2); + c110 = EB(i + indx, j + 1, em::bx2); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + b0[1] = c00 * ponpmy + c10 * ponppy; + // Bx3 + // Interpolate -- (dual, dual) + c000 = EB(i - 1 + indx, j - 1 + indy, em::bx3); + c100 = EB(i + indx, j - 1 + indy, em::bx3); + c010 = EB(i - 1 + indx, j + indy, em::bx3); + c110 = EB(i + indx, j + indy, em::bx3); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + b0[2] = c00 * pondmy + c10 * pondpy; + } else if constexpr (D == Dim::_3D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const int k { i3(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + const auto dx3_ { static_cast(dx3(p)) }; + + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + int indy = static_cast(dx2_ + HALF); + int indz = static_cast(dx3_ + HALF); + + // first order + real_t c000, c100, c010, c110, c001, c101, c011, c111, c00, c10, c01, + c11, c0, c1; + + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + real_t ponpmy = ONE - dx2_; + real_t ponppy = dx2_; + real_t ponpmz = ONE - dx3_; + real_t ponppz = dx3_; + + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); + real_t pondpy = ONE - pondmy; + real_t pondmz = static_cast(indz + ONE) - (dx3_ + HALF); + real_t pondpz = ONE - pondmz; + + // Ex1 + // Interpolate --- (dual, primal, primal) + c000 = EB(i - 1 + indx, j, k, em::ex1); + c100 = EB(i + indx, j, k, em::ex1); + c010 = EB(i - 1 + indx, j + 1, k, em::ex1); + c110 = EB(i + indx, j + 1, k, em::ex1); + c001 = EB(i - 1 + indx, j, k + 1, em::ex1); + c101 = EB(i + indx, j, k + 1, em::ex1); + c011 = EB(i - 1 + indx, j + 1, k + 1, em::ex1); + c111 = EB(i + indx, j + 1, k + 1, em::ex1); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + e0[0] = c0 * ponpmz + c1 * ponppz; + // Ex2 + // Interpolate -- (primal, dual, primal) + c000 = EB(i, j - 1 + indy, k, em::ex2); + c100 = EB(i + 1, j - 1 + indy, k, em::ex2); + c010 = EB(i, j + indy, k, em::ex2); + c110 = EB(i + 1, j + indy, k, em::ex2); + c001 = EB(i, j - 1 + indy, k + 1, em::ex2); + c101 = EB(i + 1, j - 1 + indy, k + 1, em::ex2); + c011 = EB(i, j + indy, k + 1, em::ex2); + c111 = EB(i + 1, j + indy, k + 1, em::ex2); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * pondmy + c10 * pondpy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * pondmy + c11 * pondpy; + e0[1] = c0 * ponpmz + c1 * ponppz; + // Ex3 + // Interpolate -- (primal, primal, dual) + c000 = EB(i, j, k - 1 + indz, em::ex3); + c100 = EB(i + 1, j, k - 1 + indz, em::ex3); + c010 = EB(i, j + 1, k - 1 + indz, em::ex3); + c110 = EB(i + 1, j + 1, k - 1 + indz, em::ex3); + c001 = EB(i, j, k + indz, em::ex3); + c101 = EB(i + 1, j, k + indz, em::ex3); + c011 = EB(i, j + 1, k + indz, em::ex3); + c111 = EB(i + 1, j + 1, k + indz, em::ex3); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * ponpmy + c11 * ponppy; + e0[2] = c0 * pondmz + c1 * pondpz; + + // Bx1 + // Interpolate -- (primal, dual, dual) + c000 = EB(i, j - 1 + indy, k - 1 + indz, em::bx1); + c100 = EB(i + 1, j - 1 + indy, k - 1 + indz, em::bx1); + c010 = EB(i, j + indy, k - 1 + indz, em::bx1); + c110 = EB(i + 1, j + indy, k - 1 + indz, em::bx1); + c001 = EB(i, j - 1 + indy, k + indz, em::bx1); + c101 = EB(i + 1, j - 1 + indy, k + indz, em::bx1); + c011 = EB(i, j + indy, k + indz, em::bx1); + c111 = EB(i + 1, j + indy, k + indz, em::bx1); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * pondmy + c10 * pondpy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * pondmy + c11 * pondpy; + b0[0] = c0 * pondmz + c1 * pondpz; + // Bx2 + // Interpolate -- (dual, primal, dual) + c000 = EB(i - 1 + indx, j, k - 1 + indz, em::bx2); + c100 = EB(i + indx, j, k - 1 + indz, em::bx2); + c010 = EB(i - 1 + indx, j + 1, k - 1 + indz, em::bx2); + c110 = EB(i + indx, j + 1, k - 1 + indz, em::bx2); + c001 = EB(i - 1 + indx, j, k + indz, em::bx2); + c101 = EB(i + indx, j, k + indz, em::bx2); + c011 = EB(i - 1 + indx, j + 1, k + indz, em::bx2); + c111 = EB(i + indx, j + 1, k + indz, em::bx2); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + b0[1] = c0 * pondmz + c1 * pondpz; + // Bx3 + // Interpolate -- (dual, dual, primal) + c000 = EB(i - 1 + indx, j - 1 + indy, k, em::bx3); + c100 = EB(i + indx, j - 1 + indy, k, em::bx3); + c010 = EB(i - 1 + indx, j + indy, k, em::bx3); + c110 = EB(i + indx, j + indy, k, em::bx3); + c001 = EB(i - 1 + indx, j - 1 + indy, k + 1, em::bx3); + c101 = EB(i + indx, j - 1 + indy, k + 1, em::bx3); + c011 = EB(i - 1 + indx, j + indy, k + 1, em::bx3); + c111 = EB(i + indx, j + indy, k + 1, em::bx3); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + b0[2] = c0 * ponpmz + c1 * ponppz; + } + } else if constexpr ((O >= 1u) and (O <= 5u)) { + + if constexpr (D == Dim::_1D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + // primal and dual shape function + real_t Sp[O + 1], Sd[O + 1]; + // minimum contributing cells + int ip_min, id_min; + + // primal shape function - not staggered + prtl_shape::order(i, dx1_, ip_min, Sp); + + // dual shape function - staggered + prtl_shape::order(i, dx1_, id_min, Sd); + + // Ex1 -- dual + e0[0] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + e0[0] += Sd[idx1] * EB(id_min + idx1, em::ex1); + } - } else if constexpr (D == Dim::_2D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - - // direct interpolation of staggered grid - // primal = i+ind, dual = i - const int indx = static_cast(static_cast(dx1_ + HALF)); - const int indy = static_cast(static_cast(dx2_ + HALF)); - - // Compute weights for second-order interpolation - // primal - const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1px = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); - const auto w2px = ONE - w0px - w1px; - const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); - const auto w1py = THREE_FOURTHS - SQR(dx2_ - static_cast(indy)); - const auto w2py = ONE - w0py - w1py; - - // dual - const auto w0dx = HALF * SQR(ONE - dx1_); - const auto w2dx = HALF * SQR(dx1_); - const auto w1dx = ONE - w0dx - w2dx; - const auto w0dy = HALF * SQR(ONE - dx2_); - const auto w2dy = HALF * SQR(dx2_); - const auto w1dy = ONE - w0dy - w2dy; - - // Ex1 - // Interpolate --- (dual, primal) - // clang-format off - const auto ex1_000 = EB(i - 1, indy + j - 1, em::ex1); - const auto ex1_100 = EB(i, indy + j - 1, em::ex1); - const auto ex1_200 = EB(i + 1, indy + j - 1, em::ex1); - const auto ex1_010 = EB(i - 1, indy + j, em::ex1); - const auto ex1_110 = EB(i, indy + j, em::ex1); - const auto ex1_210 = EB(i + 1, indy + j, em::ex1); - const auto ex1_020 = EB(i - 1, indy + j + 1, em::ex1); - const auto ex1_120 = EB(i, indy + j + 1, em::ex1); - const auto ex1_220 = EB(i + 1, indy + j + 1, em::ex1); - // clang-format on - - const auto ex1_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; - const auto ex1_1 = ex1_010 * w0dx + ex1_110 * w1dx + ex1_210 * w2dx; - const auto ex1_2 = ex1_020 * w0dx + ex1_120 * w1dx + ex1_220 * w2dx; - e0[0] = ex1_0 * w0py + ex1_1 * w1py + ex1_2 * w2py; - - // Ex2 - // Interpolate --- (primal, dual) - // clang-format off - const auto ex2_000 = EB(indx + i - 1, j - 1, em::ex2); - const auto ex2_100 = EB(indx + i, j - 1, em::ex2); - const auto ex2_200 = EB(indx + i + 1, j - 1, em::ex2); - const auto ex2_010 = EB(indx + i - 1, j, em::ex2); - const auto ex2_110 = EB(indx + i, j, em::ex2); - const auto ex2_210 = EB(indx + i + 1, j, em::ex2); - const auto ex2_020 = EB(indx + i - 1, j + 1, em::ex2); - const auto ex2_120 = EB(indx + i, j + 1, em::ex2); - const auto ex2_220 = EB(indx + i + 1, j + 1, em::ex2); - // clang-format on - - const auto ex2_0 = ex2_000 * w0px + ex2_100 * w1px + ex2_200 * w2px; - const auto ex2_1 = ex2_010 * w0px + ex2_110 * w1px + ex2_210 * w2px; - const auto ex2_2 = ex2_020 * w0px + ex2_120 * w1px + ex2_220 * w2px; - e0[1] = ex2_0 * w0dy + ex2_1 * w1dy + ex2_2 * w2dy; - - // Ex3 - // Interpolate --- (primal, primal) - // clang-format off - const auto ex3_000 = EB(indx + i - 1, indy + j - 1, em::ex3); - const auto ex3_100 = EB(indx + i, indy + j - 1, em::ex3); - const auto ex3_200 = EB(indx + i + 1, indy + j - 1, em::ex3); - const auto ex3_010 = EB(indx + i - 1, indy + j, em::ex3); - const auto ex3_110 = EB(indx + i, indy + j, em::ex3); - const auto ex3_210 = EB(indx + i + 1, indy + j, em::ex3); - const auto ex3_020 = EB(indx + i - 1, indy + j + 1, em::ex3); - const auto ex3_120 = EB(indx + i, indy + j + 1, em::ex3); - const auto ex3_220 = EB(indx + i + 1, indy + j + 1, em::ex3); - // clang-format on - - const auto ex3_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; - const auto ex3_1 = ex3_010 * w0px + ex3_110 * w1px + ex3_210 * w2px; - const auto ex3_2 = ex3_020 * w0px + ex3_120 * w1px + ex3_220 * w2px; - e0[2] = ex3_0 * w0py + ex3_1 * w1py + ex3_2 * w2py; - - // Bx1 - // Interpolate --- (primal, dual) - // clang-format off - const auto bx1_000 = EB(indx + i - 1, j - 1, em::bx1); - const auto bx1_100 = EB(indx + i, j - 1, em::bx1); - const auto bx1_200 = EB(indx + i + 1, j - 1, em::bx1); - const auto bx1_010 = EB(indx + i - 1, j, em::bx1); - const auto bx1_110 = EB(indx + i, j, em::bx1); - const auto bx1_210 = EB(indx + i + 1, j, em::bx1); - const auto bx1_020 = EB(indx + i - 1, j + 1, em::bx1); - const auto bx1_120 = EB(indx + i, j + 1, em::bx1); - const auto bx1_220 = EB(indx + i + 1, j + 1, em::bx1); - // clang-format on - - const auto bx1_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; - const auto bx1_1 = bx1_010 * w0px + bx1_110 * w1px + bx1_210 * w2px; - const auto bx1_2 = bx1_020 * w0px + bx1_120 * w1px + bx1_220 * w2px; - b0[0] = bx1_0 * w0dy + bx1_1 * w1dy + bx1_2 * w2dy; - - // Bx2 - // Interpolate --- (dual, primal) - // clang-format off - const auto bx2_000 = EB(i - 1, indy + j - 1, em::bx2); - const auto bx2_100 = EB(i, indy + j - 1, em::bx2); - const auto bx2_200 = EB(i + 1, indy + j - 1, em::bx2); - const auto bx2_010 = EB(i - 1, indy + j, em::bx2); - const auto bx2_110 = EB(i, indy + j, em::bx2); - const auto bx2_210 = EB(i + 1, indy + j, em::bx2); - const auto bx2_020 = EB(i - 1, indy + j + 1, em::bx2); - const auto bx2_120 = EB(i, indy + j + 1, em::bx2); - const auto bx2_220 = EB(i + 1, indy + j + 1, em::bx2); - // clang-format on - - const auto bx2_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; - const auto bx2_1 = bx2_010 * w0dx + bx2_110 * w1dx + bx2_210 * w2dx; - const auto bx2_2 = bx2_020 * w0dx + bx2_120 * w1dx + bx2_220 * w2dx; - b0[1] = bx2_0 * w0py + bx2_1 * w1py + bx2_2 * w2py; - - // Bx3 - // Interpolate --- (dual, dual) - // clang-format off - const auto bx3_000 = EB(i - 1, j - 1, em::bx3); - const auto bx3_100 = EB(i, j - 1, em::bx3); - const auto bx3_200 = EB(i + 1, j - 1, em::bx3); - const auto bx3_010 = EB(i - 1, j, em::bx3); - const auto bx3_110 = EB(i, j, em::bx3); - const auto bx3_210 = EB(i + 1, j, em::bx3); - const auto bx3_020 = EB(i - 1, j + 1, em::bx3); - const auto bx3_120 = EB(i, j + 1, em::bx3); - const auto bx3_220 = EB(i + 1, j + 1, em::bx3); - // clang-format on - - const auto bx3_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; - const auto bx3_1 = bx3_010 * w0dx + bx3_110 * w1dx + bx3_210 * w2dx; - const auto bx3_2 = bx3_020 * w0dx + bx3_120 * w1dx + bx3_220 * w2dx; - b0[2] = bx3_0 * w0dy + bx3_1 * w1dy + bx3_2 * w2dy; + // Ex2 -- primal + e0[1] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + e0[1] += Sp[idx1] * EB(ip_min + idx1, em::ex2); + } - } else if constexpr (D == Dim::_3D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const int k { i3(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - const auto dx3_ { static_cast(dx3(p)) }; - - // direct interpolation of staggered grid - // primal = i+ind, dual = i - const int indx = static_cast(static_cast(dx1_ + HALF)); - const int indy = static_cast(static_cast(dx2_ + HALF)); - const int indz = static_cast(static_cast(dx3_ + HALF)); - - // Compute weights for second-order interpolation - // primal - const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1px = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); - const auto w2px = ONE - w0px - w1px; - const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); - const auto w1py = THREE_FOURTHS - SQR(dx2_ - static_cast(indy)); - const auto w2py = ONE - w0py - w1py; - const auto w0pz = HALF * SQR(HALF - dx3_ + static_cast(indz)); - const auto w1pz = THREE_FOURTHS - SQR(dx3_ - static_cast(indz)); - const auto w2pz = ONE - w0pz - w1pz; - - // dual - const auto w0dx = HALF * SQR(ONE - dx1_); - const auto w2dx = HALF * SQR(dx1_); - const auto w1dx = ONE - w0dx - w2dx; - const auto w0dy = HALF * SQR(ONE - dx2_); - const auto w2dy = HALF * SQR(dx2_); - const auto w1dy = ONE - w0dy - w2dy; - const auto w0dz = HALF * SQR(ONE - dx3_); - const auto w2dz = HALF * SQR(dx3_); - const auto w1dz = ONE - w0dz - w2dz; - - // Ex1 - // Interpolate --- (dual, primal, primal) - // clang-format off - const auto ex1_000 = EB(i - 1, indy + j - 1, indz + k - 1, em::ex1); - const auto ex1_100 = EB(i, indy + j - 1, indz + k - 1, em::ex1); - const auto ex1_200 = EB(i + 1, indy + j - 1, indz + k - 1, em::ex1); - const auto ex1_010 = EB(i - 1, indy + j, indz + k - 1, em::ex1); - const auto ex1_110 = EB(i, indy + j, indz + k - 1, em::ex1); - const auto ex1_210 = EB(i + 1, indy + j, indz + k - 1, em::ex1); - const auto ex1_020 = EB(i - 1, indy + j + 1, indz + k - 1, em::ex1); - const auto ex1_120 = EB(i, indy + j + 1, indz + k - 1, em::ex1); - const auto ex1_220 = EB(i + 1, indy + j + 1, indz + k - 1, em::ex1); - const auto ex1_001 = EB(i - 1, indy + j - 1, indz + k, em::ex1); - const auto ex1_101 = EB(i, indy + j - 1, indz + k, em::ex1); - const auto ex1_201 = EB(i + 1, indy + j - 1, indz + k, em::ex1); - const auto ex1_011 = EB(i - 1, indy + j, indz + k, em::ex1); - const auto ex1_111 = EB(i, indy + j, indz + k, em::ex1); - const auto ex1_211 = EB(i + 1, indy + j, indz + k, em::ex1); - const auto ex1_021 = EB(i - 1, indy + j + 1, indz + k, em::ex1); - const auto ex1_121 = EB(i, indy + j + 1, indz + k, em::ex1); - const auto ex1_221 = EB(i + 1, indy + j + 1, indz + k, em::ex1); - const auto ex1_002 = EB(i - 1, indy + j - 1, indz + k + 1, em::ex1); - const auto ex1_102 = EB(i, indy + j - 1, indz + k + 1, em::ex1); - const auto ex1_202 = EB(i + 1, indy + j - 1, indz + k + 1, em::ex1); - const auto ex1_012 = EB(i - 1, indy + j, indz + k + 1, em::ex1); - const auto ex1_112 = EB(i, indy + j, indz + k + 1, em::ex1); - const auto ex1_212 = EB(i + 1, indy + j, indz + k + 1, em::ex1); - const auto ex1_022 = EB(i - 1, indy + j + 1, indz + k + 1, em::ex1); - const auto ex1_122 = EB(i, indy + j + 1, indz + k + 1, em::ex1); - const auto ex1_222 = EB(i + 1, indy + j + 1, indz + k + 1, em::ex1); - // clang-format on - - const auto ex1_0_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; - const auto ex1_1_0 = ex1_010 * w0dx + ex1_110 * w1dx + ex1_210 * w2dx; - const auto ex1_2_0 = ex1_020 * w0dx + ex1_120 * w1dx + ex1_220 * w2dx; - const auto ex1_0_1 = ex1_001 * w0dx + ex1_101 * w1dx + ex1_201 * w2dx; - const auto ex1_1_1 = ex1_011 * w0dx + ex1_111 * w1dx + ex1_211 * w2dx; - const auto ex1_2_1 = ex1_021 * w0dx + ex1_121 * w1dx + ex1_221 * w2dx; - const auto ex1_0_2 = ex1_002 * w0dx + ex1_102 * w1dx + ex1_202 * w2dx; - const auto ex1_1_2 = ex1_012 * w0dx + ex1_112 * w1dx + ex1_212 * w2dx; - const auto ex1_2_2 = ex1_022 * w0dx + ex1_122 * w1dx + ex1_222 * w2dx; - - const auto ex1_00 = ex1_0_0 * w0py + ex1_1_0 * w1py + ex1_2_0 * w2py; - const auto ex1_01 = ex1_0_1 * w0py + ex1_1_1 * w1py + ex1_2_1 * w2py; - const auto ex1_02 = ex1_0_2 * w0py + ex1_1_2 * w1py + ex1_2_2 * w2py; - - e0[0] = ex1_00 * w0pz + ex1_01 * w1pz + ex1_02 * w2pz; - - // Ex2 - // Interpolate -- (primal, dual, primal) - // clang-format off - const auto ex2_000 = EB(indx + i - 1, j - 1, indz + k - 1, em::ex2); - const auto ex2_100 = EB(indx + i, j - 1, indz + k - 1, em::ex2); - const auto ex2_200 = EB(indx + i + 1, j - 1, indz + k - 1, em::ex2); - const auto ex2_010 = EB(indx + i - 1, j, indz + k - 1, em::ex2); - const auto ex2_110 = EB(indx + i, j, indz + k - 1, em::ex2); - const auto ex2_210 = EB(indx + i + 1, j, indz + k - 1, em::ex2); - const auto ex2_020 = EB(indx + i - 1, j + 1, indz + k - 1, em::ex2); - const auto ex2_120 = EB(indx + i, j + 1, indz + k - 1, em::ex2); - const auto ex2_220 = EB(indx + i + 1, j + 1, indz + k - 1, em::ex2); - const auto ex2_001 = EB(indx + i - 1, j - 1, indz + k, em::ex2); - const auto ex2_101 = EB(indx + i, j - 1, indz + k, em::ex2); - const auto ex2_201 = EB(indx + i + 1, j - 1, indz + k, em::ex2); - const auto ex2_011 = EB(indx + i - 1, j, indz + k, em::ex2); - const auto ex2_111 = EB(indx + i, j, indz + k, em::ex2); - const auto ex2_211 = EB(indx + i + 1, j, indz + k, em::ex2); - const auto ex2_021 = EB(indx + i - 1, j + 1, indz + k, em::ex2); - const auto ex2_121 = EB(indx + i, j + 1, indz + k, em::ex2); - const auto ex2_221 = EB(indx + i + 1, j + 1, indz + k, em::ex2); - const auto ex2_002 = EB(indx + i - 1, j - 1, indz + k + 1, em::ex2); - const auto ex2_102 = EB(indx + i, j - 1, indz + k + 1, em::ex2); - const auto ex2_202 = EB(indx + i + 1, j - 1, indz + k + 1, em::ex2); - const auto ex2_012 = EB(indx + i - 1, j, indz + k + 1, em::ex2); - const auto ex2_112 = EB(indx + i, j, indz + k + 1, em::ex2); - const auto ex2_212 = EB(indx + i + 1, j, indz + k + 1, em::ex2); - const auto ex2_022 = EB(indx + i - 1, j + 1, indz + k + 1, em::ex2); - const auto ex2_122 = EB(indx + i, j + 1, indz + k + 1, em::ex2); - const auto ex2_222 = EB(indx + i + 1, j + 1, indz + k + 1, em::ex2); - // clang-format on - - const auto ex2_0_0 = ex2_000 * w0px + ex2_100 * w1px + ex1_200 * w2px; - const auto ex2_1_0 = ex2_010 * w0px + ex2_110 * w1px + ex1_210 * w2px; - const auto ex2_2_0 = ex2_020 * w0px + ex2_120 * w1px + ex1_220 * w2px; - const auto ex2_0_1 = ex2_001 * w0px + ex2_101 * w1px + ex2_201 * w2px; - const auto ex2_1_1 = ex2_011 * w0px + ex2_111 * w1px + ex2_211 * w2px; - const auto ex2_2_1 = ex2_021 * w0px + ex2_121 * w1px + ex2_221 * w2px; - const auto ex2_0_2 = ex2_002 * w0px + ex2_102 * w1px + ex2_202 * w2px; - const auto ex2_1_2 = ex2_012 * w0px + ex2_112 * w1px + ex2_212 * w2px; - const auto ex2_2_2 = ex2_022 * w0px + ex2_122 * w1px + ex2_222 * w2px; - - const auto ex2_00 = ex2_0_0 * w0dy + ex2_1_0 * w1dy + ex2_2_0 * w2dy; - const auto ex2_01 = ex2_0_1 * w0dy + ex2_1_1 * w1dy + ex2_2_1 * w2dy; - const auto ex2_02 = ex2_0_2 * w0dy + ex2_1_2 * w1dy + ex2_2_2 * w2dy; - - e0[1] = ex2_00 * w0pz + ex2_01 * w1pz + ex2_02 * w2pz; - - // Ex3 - // Interpolate -- (primal, primal, dual) - // clang-format off - const auto ex3_000 = EB(indx + i - 1, indy + j - 1, k - 1, em::ex3); - const auto ex3_100 = EB(indx + i, indy + j - 1, k - 1, em::ex3); - const auto ex3_200 = EB(indx + i + 1, indy + j - 1, k - 1, em::ex3); - const auto ex3_010 = EB(indx + i - 1, indy + j, k - 1, em::ex3); - const auto ex3_110 = EB(indx + i, indy + j, k - 1, em::ex3); - const auto ex3_210 = EB(indx + i + 1, indy + j, k - 1, em::ex3); - const auto ex3_020 = EB(indx + i - 1, indy + j + 1, k - 1, em::ex3); - const auto ex3_120 = EB(indx + i, indy + j + 1, k - 1, em::ex3); - const auto ex3_220 = EB(indx + i + 1, indy + j + 1, k - 1, em::ex3); - const auto ex3_001 = EB(indx + i - 1, indy + j - 1, k, em::ex3); - const auto ex3_101 = EB(indx + i, indy + j - 1, k, em::ex3); - const auto ex3_201 = EB(indx + i + 1, indy + j - 1, k, em::ex3); - const auto ex3_011 = EB(indx + i - 1, indy + j, k, em::ex3); - const auto ex3_111 = EB(indx + i, indy + j, k, em::ex3); - const auto ex3_211 = EB(indx + i + 1, indy + j, k, em::ex3); - const auto ex3_021 = EB(indx + i - 1, indy + j + 1, k, em::ex3); - const auto ex3_121 = EB(indx + i, indy + j + 1, k, em::ex3); - const auto ex3_221 = EB(indx + i + 1, indy + j + 1, k, em::ex3); - const auto ex3_002 = EB(indx + i - 1, indy + j - 1, k + 1, em::ex3); - const auto ex3_102 = EB(indx + i, indy + j - 1, k + 1, em::ex3); - const auto ex3_202 = EB(indx + i + 1, indy + j - 1, k + 1, em::ex3); - const auto ex3_012 = EB(indx + i - 1, indy + j, k + 1, em::ex3); - const auto ex3_112 = EB(indx + i, indy + j, k + 1, em::ex3); - const auto ex3_212 = EB(indx + i + 1, indy + j, k + 1, em::ex3); - const auto ex3_022 = EB(indx + i - 1, indy + j + 1, k + 1, em::ex3); - const auto ex3_122 = EB(indx + i, indy + j + 1, k + 1, em::ex3); - const auto ex3_222 = EB(indx + i + 1, indy + j + 1, k + 1, em::ex3); - // clang-format on - - const auto ex3_0_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; - const auto ex3_1_0 = ex3_010 * w0px + ex3_110 * w1px + ex3_210 * w2px; - const auto ex3_2_0 = ex3_020 * w0px + ex3_120 * w1px + ex3_220 * w2px; - const auto ex3_0_1 = ex3_001 * w0px + ex3_101 * w1px + ex3_201 * w2px; - const auto ex3_1_1 = ex3_011 * w0px + ex3_111 * w1px + ex3_211 * w2px; - const auto ex3_2_1 = ex3_021 * w0px + ex3_121 * w1px + ex3_221 * w2px; - const auto ex3_0_2 = ex3_002 * w0px + ex3_102 * w1px + ex3_202 * w2px; - const auto ex3_1_2 = ex3_012 * w0px + ex3_112 * w1px + ex3_212 * w2px; - const auto ex3_2_2 = ex3_022 * w0px + ex3_122 * w1px + ex3_222 * w2px; - - const auto ex3_00 = ex3_0_0 * w0py + ex3_1_0 * w1py + ex3_2_0 * w2py; - const auto ex3_01 = ex3_0_1 * w0py + ex3_1_1 * w1py + ex3_2_1 * w2py; - const auto ex3_02 = ex3_0_2 * w0py + ex3_1_2 * w1py + ex3_2_2 * w2py; - - e0[2] = ex3_00 * w0dz + ex3_01 * w1dz + ex3_02 * w2dz; - - // Bx1 - // Interpolate -- (primal, dual, dual) - // clang-format off - const auto bx1_000 = EB(indx + i - 1, j - 1, k - 1, em::bx1); - const auto bx1_100 = EB(indx + i, j - 1, k - 1, em::bx1); - const auto bx1_200 = EB(indx + i + 1, j - 1, k - 1, em::bx1); - const auto bx1_010 = EB(indx + i - 1, j, k - 1, em::bx1); - const auto bx1_110 = EB(indx + i, j, k - 1, em::bx1); - const auto bx1_210 = EB(indx + i + 1, j, k - 1, em::bx1); - const auto bx1_020 = EB(indx + i - 1, j + 1, k - 1, em::bx1); - const auto bx1_120 = EB(indx + i, j + 1, k - 1, em::bx1); - const auto bx1_220 = EB(indx + i + 1, j + 1, k - 1, em::bx1); - const auto bx1_001 = EB(indx + i - 1, j - 1, k, em::bx1); - const auto bx1_101 = EB(indx + i, j - 1, k, em::bx1); - const auto bx1_201 = EB(indx + i + 1, j - 1, k, em::bx1); - const auto bx1_011 = EB(indx + i - 1, j, k, em::bx1); - const auto bx1_111 = EB(indx + i, j, k, em::bx1); - const auto bx1_211 = EB(indx + i + 1, j, k, em::bx1); - const auto bx1_021 = EB(indx + i - 1, j + 1, k, em::bx1); - const auto bx1_121 = EB(indx + i, j + 1, k, em::bx1); - const auto bx1_221 = EB(indx + i + 1, j + 1, k, em::bx1); - const auto bx1_002 = EB(indx + i - 1, j - 1, k + 1, em::bx1); - const auto bx1_102 = EB(indx + i, j - 1, k + 1, em::bx1); - const auto bx1_202 = EB(indx + i + 1, j - 1, k + 1, em::bx1); - const auto bx1_012 = EB(indx + i - 1, j, k + 1, em::bx1); - const auto bx1_112 = EB(indx + i, j, k + 1, em::bx1); - const auto bx1_212 = EB(indx + i + 1, j, k + 1, em::bx1); - const auto bx1_022 = EB(indx + i - 1, j + 1, k + 1, em::bx1); - const auto bx1_122 = EB(indx + i, j + 1, k + 1, em::bx1); - const auto bx1_222 = EB(indx + i + 1, j + 1, k + 1, em::bx1); - // clang-format on - - const auto bx1_0_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; - const auto bx1_1_0 = bx1_010 * w0px + bx1_110 * w1px + bx1_210 * w2px; - const auto bx1_2_0 = bx1_020 * w0px + bx1_120 * w1px + bx1_220 * w2px; - const auto bx1_0_1 = bx1_001 * w0px + bx1_101 * w1px + bx1_201 * w2px; - const auto bx1_1_1 = bx1_011 * w0px + bx1_111 * w1px + bx1_211 * w2px; - const auto bx1_2_1 = bx1_021 * w0px + bx1_121 * w1px + bx1_221 * w2px; - const auto bx1_0_2 = bx1_002 * w0px + bx1_102 * w1px + bx1_202 * w2px; - const auto bx1_1_2 = bx1_012 * w0px + bx1_112 * w1px + bx1_212 * w2px; - const auto bx1_2_2 = bx1_022 * w0px + bx1_122 * w1px + bx1_222 * w2px; - - const auto bx1_00 = bx1_0_0 * w0dy + bx1_1_0 * w1dy + bx1_2_0 * w2dy; - const auto bx1_01 = bx1_0_1 * w0dy + bx1_1_1 * w1dy + bx1_2_1 * w2dy; - const auto bx1_02 = bx1_0_2 * w0dy + bx1_1_2 * w1dy + bx1_2_2 * w2dy; - - b0[0] = bx1_00 * w0dz + bx1_01 * w1dz + bx1_02 * w2dz; - - // Bx2 - // Interpolate -- (dual, primal, dual) - // clang-format off - const auto bx2_000 = EB(i - 1, indy + j - 1, k - 1, em::bx2); - const auto bx2_100 = EB(i, indy + j - 1, k - 1, em::bx2); - const auto bx2_200 = EB(i + 1, indy + j - 1, k - 1, em::bx2); - const auto bx2_010 = EB(i - 1, indy + j, k - 1, em::bx2); - const auto bx2_110 = EB(i, indy + j, k - 1, em::bx2); - const auto bx2_210 = EB(i + 1, indy + j, k - 1, em::bx2); - const auto bx2_020 = EB(i - 1, indy + j + 1, k - 1, em::bx2); - const auto bx2_120 = EB(i, indy + j + 1, k - 1, em::bx2); - const auto bx2_220 = EB(i + 1, indy + j + 1, k - 1, em::bx2); - const auto bx2_001 = EB(i - 1, indy + j - 1, k, em::bx2); - const auto bx2_101 = EB(i, indy + j - 1, k, em::bx2); - const auto bx2_201 = EB(i + 1, indy + j - 1, k, em::bx2); - const auto bx2_011 = EB(i - 1, indy + j, k, em::bx2); - const auto bx2_111 = EB(i, indy + j, k, em::bx2); - const auto bx2_211 = EB(i + 1, indy + j, k, em::bx2); - const auto bx2_021 = EB(i - 1, indy + j + 1, k, em::bx2); - const auto bx2_121 = EB(i, indy + j + 1, k, em::bx2); - const auto bx2_221 = EB(i + 1, indy + j + 1, k, em::bx2); - const auto bx2_002 = EB(i - 1, indy + j - 1, k + 1, em::bx2); - const auto bx2_102 = EB(i, indy + j - 1, k + 1, em::bx2); - const auto bx2_202 = EB(i + 1, indy + j - 1, k + 1, em::bx2); - const auto bx2_012 = EB(i - 1, indy + j, k + 1, em::bx2); - const auto bx2_112 = EB(i, indy + j, k + 1, em::bx2); - const auto bx2_212 = EB(i + 1, indy + j, k + 1, em::bx2); - const auto bx2_022 = EB(i - 1, indy + j + 1, k + 1, em::bx2); - const auto bx2_122 = EB(i, indy + j + 1, k + 1, em::bx2); - const auto bx2_222 = EB(i + 1, indy + j + 1, k + 1, em::bx2); - // clang-format on - - const auto bx2_0_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; - const auto bx2_1_0 = bx2_010 * w0dx + bx2_110 * w1dx + bx2_210 * w2dx; - const auto bx2_2_0 = bx2_020 * w0dx + bx2_120 * w1dx + bx2_220 * w2dx; - const auto bx2_0_1 = bx2_001 * w0dx + bx2_101 * w1dx + bx2_201 * w2dx; - const auto bx2_1_1 = bx2_011 * w0dx + bx2_111 * w1dx + bx2_211 * w2dx; - const auto bx2_2_1 = bx2_021 * w0dx + bx2_121 * w1dx + bx2_221 * w2dx; - const auto bx2_0_2 = bx2_002 * w0dx + bx2_102 * w1dx + bx2_202 * w2dx; - const auto bx2_1_2 = bx2_012 * w0dx + bx2_112 * w1dx + bx2_212 * w2dx; - const auto bx2_2_2 = bx2_022 * w0dx + bx2_122 * w1dx + bx2_222 * w2dx; - - const auto bx2_00 = bx2_0_0 * w0py + bx2_1_0 * w1py + bx2_2_0 * w2py; - const auto bx2_01 = bx2_0_1 * w0py + bx2_1_1 * w1py + bx2_2_1 * w2py; - const auto bx2_02 = bx2_0_2 * w0py + bx2_1_2 * w1py + bx2_2_2 * w2py; - - b0[1] = bx2_00 * w0dz + bx2_01 * w1dz + bx2_02 * w2dz; - - // Bx3 - // Interpolate -- (dual, dual, primal) - // clang-format off - const auto bx3_000 = EB(i - 1, j - 1, indz + k - 1, em::bx3); - const auto bx3_100 = EB(i, j - 1, indz + k - 1, em::bx3); - const auto bx3_200 = EB(i + 1, j - 1, indz + k - 1, em::bx3); - const auto bx3_010 = EB(i - 1, j, indz + k - 1, em::bx3); - const auto bx3_110 = EB(i, j, indz + k - 1, em::bx3); - const auto bx3_210 = EB(i + 1, j, indz + k - 1, em::bx3); - const auto bx3_020 = EB(i - 1, j + 1, indz + k - 1, em::bx3); - const auto bx3_120 = EB(i, j + 1, indz + k - 1, em::bx3); - const auto bx3_220 = EB(i + 1, j + 1, indz + k - 1, em::bx3); - const auto bx3_001 = EB(i - 1, j - 1, indz + k, em::bx3); - const auto bx3_101 = EB(i, j - 1, indz + k, em::bx3); - const auto bx3_201 = EB(i + 1, j - 1, indz + k, em::bx3); - const auto bx3_011 = EB(i - 1, j, indz + k, em::bx3); - const auto bx3_111 = EB(i, j, indz + k, em::bx3); - const auto bx3_211 = EB(i + 1, j, indz + k, em::bx3); - const auto bx3_021 = EB(i - 1, j + 1, indz + k, em::bx3); - const auto bx3_121 = EB(i, j + 1, indz + k, em::bx3); - const auto bx3_221 = EB(i + 1, j + 1, indz + k, em::bx3); - const auto bx3_002 = EB(i - 1, j - 1, indz + k + 1, em::bx3); - const auto bx3_102 = EB(i, j - 1, indz + k + 1, em::bx3); - const auto bx3_202 = EB(i + 1, j - 1, indz + k + 1, em::bx3); - const auto bx3_012 = EB(i - 1, j, indz + k + 1, em::bx3); - const auto bx3_112 = EB(i, j, indz + k + 1, em::bx3); - const auto bx3_212 = EB(i + 1, j, indz + k + 1, em::bx3); - const auto bx3_022 = EB(i - 1, j + 1, indz + k + 1, em::bx3); - const auto bx3_122 = EB(i, j + 1, indz + k + 1, em::bx3); - const auto bx3_222 = EB(i + 1, j + 1, indz + k + 1, em::bx3); - // clang-format on - - const auto bx3_0_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; - const auto bx3_1_0 = bx3_010 * w0dx + bx3_110 * w1dx + bx3_210 * w2dx; - const auto bx3_2_0 = bx3_020 * w0dx + bx3_120 * w1dx + bx3_220 * w2dx; - const auto bx3_0_1 = bx3_001 * w0dx + bx3_101 * w1dx + bx3_201 * w2dx; - const auto bx3_1_1 = bx3_011 * w0dx + bx3_111 * w1dx + bx3_211 * w2dx; - const auto bx3_2_1 = bx3_021 * w0dx + bx3_121 * w1dx + bx3_221 * w2dx; - const auto bx3_0_2 = bx3_002 * w0dx + bx3_102 * w1dx + bx3_202 * w2dx; - const auto bx3_1_2 = bx3_012 * w0dx + bx3_112 * w1dx + bx3_212 * w2dx; - const auto bx3_2_2 = bx3_022 * w0dx + bx3_122 * w1dx + bx3_222 * w2dx; - - const auto bx3_00 = bx3_0_0 * w0dy + bx3_1_0 * w1dy + bx3_2_0 * w2dy; - const auto bx3_01 = bx3_0_1 * w0dy + bx3_1_1 * w1dy + bx3_2_1 * w2dy; - const auto bx3_02 = bx3_0_2 * w0dy + bx3_1_2 * w1dy + bx3_2_2 * w2dy; - - b0[2] = bx3_00 * w0pz + bx3_01 * w1pz + bx3_02 * w2pz; + // Ex3 -- primal + e0[2] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + e0[2] += Sp[idx1] * EB(ip_min + idx1, em::ex3); + } + + // Bx1 -- primal + b0[0] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + b0[0] += Sp[idx1] * EB(ip_min + idx1, em::bx1); + } + + // Bx2 -- dual + b0[1] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + b0[1] += Sd[idx1] * EB(id_min + idx1, em::bx2); + } + + // Bx3 -- dual + b0[2] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + b0[2] += Sd[idx1] * EB(id_min + idx1, em::bx3); + } + + } else if constexpr (D == Dim::_2D) { + + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + + // primal and dual shape function + real_t S1p[O + 1], S1d[O + 1]; + real_t S2p[O + 1], S2d[O + 1]; + // minimum contributing cells + int ip_min, id_min; + int jp_min, jd_min; + + // primal shape function - not staggered + prtl_shape::order(i, dx1_, ip_min, S1p); + prtl_shape::order(j, dx2_, jp_min, S2p); + // dual shape function - staggered + prtl_shape::order(i, dx1_, id_min, S1d); + prtl_shape::order(j, dx2_, jd_min, S2d); + + // Ex1 -- dual, primal + e0[0] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, em::ex1); + } + e0[0] += c0 * S2p[idx2]; + } + + // Ex2 -- primal, dual + e0[1] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, em::ex2); + } + e0[1] += c0 * S2d[idx2]; + } + + // Ex3 -- primal, primal + e0[2] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1p[idx1] * EB(ip_min + idx1, jp_min + idx2, em::ex3); + } + e0[2] += c0 * S2p[idx2]; + } + + // Bx1 -- primal, dual + b0[0] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, em::bx1); + } + b0[0] += c0 * S2d[idx2]; + } + + // Bx2 -- dual, primal + b0[1] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, em::bx2); + } + b0[1] += c0 * S2p[idx2]; + } + + // Bx3 -- dual, dual + b0[2] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1d[idx1] * EB(id_min + idx1, jd_min + idx2, em::bx3); + } + b0[2] += c0 * S2d[idx2]; + } + + } else if constexpr (D == Dim::_3D) { + + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const int k { i3(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + const auto dx3_ { static_cast(dx3(p)) }; + + // primal and dual shape function + real_t S1p[O + 1], S1d[O + 1]; + real_t S2p[O + 1], S2d[O + 1]; + real_t S3p[O + 1], S3d[O + 1]; + + // minimum contributing cells + int ip_min, id_min; + int jp_min, jd_min; + int kp_min, kd_min; + + // primal shape function - not staggered + prtl_shape::order(i, dx1_, ip_min, S1p); + prtl_shape::order(j, dx2_, jp_min, S2p); + prtl_shape::order(k, dx3_, kp_min, S3p); + // dual shape function - staggered + prtl_shape::order(i, dx1_, id_min, S1d); + prtl_shape::order(j, dx2_, jd_min, S2d); + prtl_shape::order(k, dx3_, kd_min, S3d); + + // Ex1 -- dual, primal, primal + e0[0] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1d[idx1] * + EB(id_min + idx1, jp_min + idx2, kp_min + idx3, em::ex1); + } + c0 += c00 * S2p[idx2]; + } + e0[0] += c0 * S3p[idx3]; + } + + // Ex2 -- primal, dual, primal + e0[1] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1p[idx1] * + EB(ip_min + idx1, jd_min + idx2, kp_min + idx3, em::ex2); + } + c0 += c00 * S2d[idx2]; + } + e0[1] += c0 * S3p[idx3]; + } + + // Ex3 -- primal, primal, dual + e0[2] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1p[idx1] * + EB(ip_min + idx1, jp_min + idx2, kd_min + idx3, em::ex3); + } + c0 += c00 * S2p[idx2]; + } + e0[2] += c0 * S3d[idx3]; + } + + // Bx1 -- primal, dual, dual + b0[0] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1p[idx1] * + EB(ip_min + idx1, jd_min + idx2, kd_min + idx3, em::bx1); + } + c0 += c00 * S2d[idx2]; + } + b0[0] += c0 * S3d[idx3]; + } + + // Bx2 -- dual, primal, dual + b0[1] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1d[idx1] * + EB(id_min + idx1, jp_min + idx2, kd_min + idx3, em::bx2); + } + c0 += c00 * S2p[idx2]; + } + b0[1] += c0 * S3d[idx3]; + } + + // Bx3 -- dual, dual, primal + b0[2] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1d[idx1] * + EB(id_min + idx1, jd_min + idx2, kp_min + idx3, em::bx3); + } + c0 += c00 * S2d[idx2]; + } + b0[2] += c0 * S3p[idx3]; + } + } } } From e1274d50248723e9f92b568cbebdeaf77a282ab7 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 13 Aug 2025 18:49:14 -0500 Subject: [PATCH 52/82] bugfixes for indexing --- src/kernels/currents_deposit.hpp | 54 ++++++++++---------------------- src/kernels/particle_shapes.hpp | 4 +-- 2 files changed, 19 insertions(+), 39 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 51610800..cb8862f3 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -448,33 +448,22 @@ namespace kernel { for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - // Esirkepov 2001, Eq. 38 - Wx1[i][j] = (fS_x1[i] - iS_x1[i]) * - (iS_x2[j] + HALF * (fS_x2[j] - iS_x2[j])); + // Esirkepov 2001, Eq. 38 (simplified) + Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - Wx2[i][j] = (fS_x2[j] - iS_x2[j]) * - (iS_x2[j] + HALF * (fS_x1[i] - iS_x1[i])); + Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - Wx3[i][j] = iS_x1[i] * iS_x2[j] + - HALF * (fS_x1[i] - fS_x1[i]) * iS_x2[j] + - HALF * iS_x1[i] * (fS_x2[j] - iS_x2[j]) + - THIRD * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - - // Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - - // Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - - // Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x2[j]) + - // iS_x2[j] * (HALF * fS_x2[j] + iS_x2[i])); + Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + + iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); } } // contribution within the shape function stencil - real_t jx1[O + 2][O + 2], jx2[O + 2][O + 2], jx3[O + 2][O + 2]; + real_t jx1[O + 2][O + 2], jx2[O + 2][O + 2]; // prefactors for j update - const real_t Qdx1dt = -coeff * inv_dt; - const real_t Qdx2dt = -coeff * inv_dt; + const real_t Qdx1dt = coeff * inv_dt; + const real_t Qdx2dt = coeff * inv_dt; const real_t QVx3 = coeff * vp[2]; // Calculate current contribution @@ -482,37 +471,28 @@ namespace kernel { // jx1 #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx1[0][j] = Wx1[0][j]; + jx1[0][j] = -Qdx1dt * Wx1[0][j]; } #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx1[i][j] = jx1[i - 1][j] + Wx1[i][j]; + jx1[i][j] = jx1[i - 1][j] - Qdx1dt * Wx1[i][j]; } } // jx2 #pragma unroll for (int i = 0; i < O + 2; ++i) { - jx2[i][0] = Wx2[i][0]; + jx2[i][0] = -Qdx2dt * Wx2[i][0]; } #pragma unroll for (int j = 1; j < O + 2; ++j) { #pragma unroll for (int i = 0; i < O + 2; ++i) { - jx2[i][j] = jx2[i][j - 1] + Wx2[i][j]; - } - } - - // jx3 -#pragma unroll - for (int i = 0; i < O + 2; ++i) { -#pragma unroll - for (int j = 0; j < O + 2; ++j) { - jx3[i][j] = Wx3[i][j]; + jx2[i][j] = jx2[i][j - 1] - Qdx2dt * Wx2[i][j]; } } @@ -531,21 +511,21 @@ namespace kernel { */ auto J_acc = J.access(); - for (int i = 0; i <= di_x1; ++i) { + for (int i = 0; i < di_x1; ++i) { for (int j = 0; j <= di_x2; ++j) { - J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx1[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx1) += jx1[i][j]; } } for (int i = 0; i <= di_x1; ++i) { - for (int j = 0; j <= di_x2; ++j) { - J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jx2[i][j]; + for (int j = 0; j < di_x2; ++j) { + J_acc(i1_min + i, i2_min + j, cur::jx2) += jx2[i][j]; } } for (int i = 0; i <= di_x1; ++i) { for (int j = 0; j <= di_x2; ++j) { - J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jx3[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * Wx3[i][j]; } } diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 7d626c9d..c35642d5 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -313,7 +313,7 @@ namespace prtl_shape { if (i_init_min < i_fin_min) { i_min = i_init_min; - i_max = i_fin_min + O; + i_max = i_min + O + 1; #pragma unroll for (int j = 0; j < O + 1; j++) { @@ -329,7 +329,7 @@ namespace prtl_shape { } else if (i_init_min > i_fin_min) { i_min = i_fin_min; - i_max = i_init_min + O; + i_max = i_min + O + 1; iS[0] = ZERO; #pragma unroll From eae6a13513e64d0ad6fed9b23cc86188b38435f0 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 14 Aug 2025 17:57:28 -0500 Subject: [PATCH 53/82] add remaining shape_order cases --- src/engines/srpic.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index b63415a0..a99d33d6 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -549,6 +549,12 @@ namespace ntt { deposit_with<1u>(species, domain.mesh.metric, scatter_cur, dt); } else if (shape_order == 2) { deposit_with<2u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 3) { + deposit_with<3u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 4) { + deposit_with<4u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 5) { + deposit_with<5u>(species, domain.mesh.metric, scatter_cur, dt); } else { raise::Error("Invalid shape order for current deposition", HERE); } From acfd1361d35b8e707d1619b7e50094521923365b Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 14 Aug 2025 17:57:43 -0500 Subject: [PATCH 54/82] bugfix for 3D deposit --- src/kernels/currents_deposit.hpp | 298 +++++-------------------------- 1 file changed, 43 insertions(+), 255 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index cb8862f3..040503bc 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -449,12 +449,12 @@ namespace kernel { #pragma unroll for (int j = 0; j < O + 2; ++j) { // Esirkepov 2001, Eq. 38 (simplified) - Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); + Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); + Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + - iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); + Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + + iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); } } @@ -643,7 +643,7 @@ namespace kernel { for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx2[i][j][0] = -Qdydt * Wx2[i][j][0]; + jx3[i][j][0] = -Qdydt * Wx3[i][j][0]; } } @@ -658,268 +658,56 @@ namespace kernel { } } + // account for ghost cells + i1_min += N_GHOSTS; + i2_min += N_GHOSTS; + i3_min += N_GHOSTS; + i1_max += N_GHOSTS; + i2_max += N_GHOSTS; + i3_max += N_GHOSTS; + + // get number of update indices for asymmetric movement + const int di_x1 = i1_max - i1_min; + const int di_x2 = i2_max - i2_min; + const int di_x3 = i3_max - i3_min; + /* Current update */ auto J_acc = J.access(); -#pragma unroll - for (int i = 0; i < O + 2; ++i) { -#pragma unroll - for (int j = 0; j < O + 2; ++j) { -#pragma unroll - for (int k = 1; k < O + 2; ++k) { - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx1) += jx1[i][j][k]; - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx2) += jx2[i][j][k]; - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx3) += jx3[i][j][k]; + for (int i = 0; i < di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { + for (int k = 0; k <= di_x3; ++k) { + J_acc(i1_min + i, i2_min + j, i3_min + k, cur::jx1) += jx1[i][j][k]; + } + } + } + + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j < di_x2; ++j) { + for (int k = 0; k <= di_x3; ++k) { + J_acc(i1_min + i, i2_min + j, i3_min + k, cur::jx2) += jx2[i][j][k]; + } + } + } + + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { + for (int k = 0; k < di_x3; ++k) { + J_acc(i1_min + i, i2_min + j, i3_min + k, cur::jx3) += jx3[i][j][k]; } } } } - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); + } } - } - }; -} // namespace kernel + }; + } // namespace kernel #undef i_di_to_Xi -// -// } else if constexpr (O == 2u) { -// /* -// * Higher order charge conserving current deposition based on -// * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract -// **/ - -// // iS -> shape function for init position -// // fS -> shape function for final position - -// // shape function at integer points (one coeff is always ZERO) -// int i1_min; -// real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; -// real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; - -// // clang-format off -// prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), -// i1(p), static_cast(dx1(p)), -// i1_min, -// iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, -// fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); -// // clang-format on - -// if constexpr (D == Dim::_1D) { -// raise::KernelNotImplementedError(HERE); -// } else if constexpr (D == Dim::_2D) { - -// // shape function at integer points (one coeff is always ZERO) -// int i2_min; -// real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; -// real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; - -// // clang-format off -// prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), -// i2(p), static_cast(dx2(p)), -// i2_min, -// iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, -// fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); -// // clang-format on -// // x1-components -// const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); -// const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); -// const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); -// const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); - -// const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); -// const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); -// const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); -// const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); - -// const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); -// const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); -// const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); -// const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); - -// const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); -// const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); -// const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); -// const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); - -// // x2-components -// const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); -// const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); -// const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); -// const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); - -// const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); -// const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); -// const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); -// const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); - -// const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); -// const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); -// const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); -// const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); - -// const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); -// const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); -// const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); -// const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); - -// // x3-components -// const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + -// iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); -// const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + -// iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); -// const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + -// iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); -// const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + -// iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); - -// const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + -// iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); -// const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + -// iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); -// const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + -// iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); -// const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + -// iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); - -// const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + -// iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); -// const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + -// iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); -// const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + -// iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); -// const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + -// iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); - -// const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + -// iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); -// const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + -// iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); -// const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + -// iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); -// const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + -// iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); - -// // x1-component -// const auto jx1_00 = Wx1_00; -// const auto jx1_10 = jx1_00 + Wx1_10; -// const auto jx1_20 = jx1_10 + Wx1_20; -// const auto jx1_30 = jx1_20 + Wx1_30; - -// const auto jx1_01 = Wx1_01; -// const auto jx1_11 = jx1_01 + Wx1_11; -// const auto jx1_21 = jx1_11 + Wx1_21; -// const auto jx1_31 = jx1_21 + Wx1_31; - -// const auto jx1_02 = Wx1_02; -// const auto jx1_12 = jx1_02 + Wx1_12; -// const auto jx1_22 = jx1_12 + Wx1_22; -// const auto jx1_32 = jx1_22 + Wx1_32; - -// const auto jx1_03 = Wx1_03; -// const auto jx1_13 = jx1_03 + Wx1_13; -// const auto jx1_23 = jx1_13 + Wx1_23; -// const auto jx1_33 = jx1_23 + Wx1_33; - -// // y-component -// const auto jx2_00 = Wx2_00; -// const auto jx2_01 = jx2_00 + Wx2_01; -// const auto jx2_02 = jx2_01 + Wx2_02; -// const auto jx2_03 = jx2_02 + Wx2_03; - -// const auto jx2_10 = Wx2_10; -// const auto jx2_11 = jx2_10 + Wx2_11; -// const auto jx2_12 = jx2_11 + Wx2_12; -// const auto jx2_13 = jx2_12 + Wx2_13; - -// const auto jx2_20 = Wx2_20; -// const auto jx2_21 = jx2_20 + Wx2_21; -// const auto jx2_22 = jx2_21 + Wx2_22; -// const auto jx2_23 = jx2_22 + Wx2_23; - -// const auto jx2_30 = Wx2_30; -// const auto jx2_31 = jx2_30 + Wx2_31; -// const auto jx2_32 = jx2_31 + Wx2_32; -// const auto jx2_33 = jx2_32 + Wx2_33; - -// i1_min += N_GHOSTS; -// i2_min += N_GHOSTS; - -// // @TODO: not sure about the signs here -// const real_t Qdx1dt = -coeff * inv_dt; -// const real_t Qdx2dt = -coeff * inv_dt; -// const real_t QVx3 = coeff * vp[2]; - -// auto J_acc = J.access(); - -// // x1-currents -// J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; -// J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; -// J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; -// J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; - -// J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; -// J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; -// J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; -// J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; - -// J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; -// J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; -// J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; -// J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; - -// J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; -// J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; -// J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; -// J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; - -// // x2-currents -// J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; -// J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; -// J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; -// J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; - -// J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; -// J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; -// J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; -// J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; - -// J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; -// J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; -// J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; -// J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; - -// J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; -// J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; -// J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; -// J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; - -// // x3-currents -// J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; -// J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; -// J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; -// J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; - -// J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; -// J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; -// J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; -// J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; - -// J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; -// J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; -// J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; -// J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; - -// J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; -// J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; -// J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; -// J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; - -// } else if constexpr (D == Dim::_3D) { -// raise::KernelNotImplementedError(HERE); -// } // dimension #endif // KERNELS_CURRENTS_DEPOSIT_HPP From bd72a7e0422ce9ef09d4e3c68e5295f71940a52a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 29 Aug 2025 14:55:39 -0500 Subject: [PATCH 55/82] bugfix in first order shape function --- src/kernels/particle_shapes.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index c35642d5..10e2ddaa 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -33,8 +33,8 @@ namespace prtl_shape { S[1] = ONE - S[0]; } else { i_min = i; - S[1] = static_cast(1.5) - di; - S[0] = ONE - S[1]; + S[0] = static_cast(1.5) - di; + S[1] = ONE - S[0]; } } // staggered } else if constexpr (O == 2u) { From 6568008f0761be6ba3b284fffc0cd890cca4949c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 29 Aug 2025 14:56:01 -0500 Subject: [PATCH 56/82] cleanup --- src/kernels/particle_pusher_sr.hpp | 45 +++++++++++++----------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 873f488c..bf4cfd2d 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -478,12 +478,7 @@ namespace kernel::sr { bool is_gca { false }; // field interpolation 1st-6th order - //getInterpFlds(p, ei, bi); - - for (auto i { 0u }; i < 3u; ++i) { - ei[i] = ZERO; - bi[i] = ZERO; - } + getInterpFlds(p, ei, bi); metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); @@ -840,7 +835,7 @@ namespace kernel::sr { vec_t& b0) const { // ToDo: implement template in srpic.hpp - const unsigned int O = 2u; + const unsigned int O = 1u; // ToDo: change to 1u! if constexpr (O == 0u) { @@ -1180,7 +1175,7 @@ namespace kernel::sr { // Ex1 -- dual, primal e0[0] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, em::ex1); } @@ -1190,7 +1185,7 @@ namespace kernel::sr { // Ex2 -- primal, dual e0[1] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, em::ex2); } @@ -1200,7 +1195,7 @@ namespace kernel::sr { // Ex3 -- primal, primal e0[2] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1p[idx1] * EB(ip_min + idx1, jp_min + idx2, em::ex3); } @@ -1210,7 +1205,7 @@ namespace kernel::sr { // Bx1 -- primal, dual b0[0] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, em::bx1); } @@ -1220,7 +1215,7 @@ namespace kernel::sr { // Bx2 -- dual, primal b0[1] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, em::bx2); } @@ -1230,7 +1225,7 @@ namespace kernel::sr { // Bx3 -- dual, dual b0[2] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1d[idx1] * EB(id_min + idx1, jd_min + idx2, em::bx3); } @@ -1268,9 +1263,9 @@ namespace kernel::sr { // Ex1 -- dual, primal, primal e0[0] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, kp_min + idx3, em::ex1); @@ -1283,9 +1278,9 @@ namespace kernel::sr { // Ex2 -- primal, dual, primal e0[1] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, kp_min + idx3, em::ex2); @@ -1298,9 +1293,9 @@ namespace kernel::sr { // Ex3 -- primal, primal, dual e0[2] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1p[idx1] * EB(ip_min + idx1, jp_min + idx2, kd_min + idx3, em::ex3); @@ -1313,9 +1308,9 @@ namespace kernel::sr { // Bx1 -- primal, dual, dual b0[0] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, kd_min + idx3, em::bx1); @@ -1328,9 +1323,9 @@ namespace kernel::sr { // Bx2 -- dual, primal, dual b0[1] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, kd_min + idx3, em::bx2); @@ -1343,9 +1338,9 @@ namespace kernel::sr { // Bx3 -- dual, dual, primal b0[2] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1d[idx1] * EB(id_min + idx1, jd_min + idx2, kp_min + idx3, em::bx3); From ffde338cff176188024f0f6f09441ca1ed7615ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 9 Sep 2025 16:48:56 -0500 Subject: [PATCH 57/82] bugfix in static_cast --- src/kernels/particle_shapes.hpp | 238 +++++++++++--------------------- 1 file changed, 81 insertions(+), 157 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 10e2ddaa..0ad83b10 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -49,15 +49,15 @@ namespace prtl_shape { S[2] = ONE - S[0] - S[1]; } else { i_min = i; - S[0] = HALF * SQR(static_cast(3 / 2) - di); - S[2] = HALF * SQR(di - HALF); - S[1] = ONE - S[0] - S[2]; + S[0] = HALF * SQR(static_cast(3.0 / 2.0) - di); + S[1] = THREE_FOURTHS - SQR(ONE - di); + S[2] = ONE - S[0] - S[1]; } } else { // compute at i + 1/2 positions i_min = i - 1; - S[1] = THREE_FOURTHS - SQR(di - HALF); - S[2] = HALF * SQR(di); - S[0] = ONE - S[1] - S[2]; + S[0] = HALF * SQR(ONE - di); + S[2] = HALF * SQR(di); + S[1] = ONE - S[0] - S[2]; } // staggered } else if constexpr (O == 3u) { // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^2) |x| < 1 @@ -65,24 +65,24 @@ namespace prtl_shape { // 0.0 |x| ≥ 2 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = static_cast(1 / 6) * CUBE(ONE - di); - S[3] = static_cast(1 / 6) * CUBE(di); - S[1] = static_cast(1 / 6) * + S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); + S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; - S[0] = static_cast(1 / 6) * CUBE(HALF - di); - S[3] = static_cast(1 / 6) * CUBE(HALF + di); - S[1] = static_cast(1 / 6) * + S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(HALF - di) + THREE * CUBE(HALF - di)); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = static_cast(1 / 6) * CUBE(HALF + di); - S[3] = static_cast(1 / 6) * CUBE(HALF + di); - S[1] = static_cast(1 / 6) * + S[0] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); S[2] = ONE - S[0] - S[1] - S[3]; } @@ -94,37 +94,37 @@ namespace prtl_shape { if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 2; - S[0] = static_cast(1 / 25) * SQR(SQR(HALF - di)); - S[4] = static_cast(1 / 25) * SQR(SQR(HALF + di)); - S[1] = static_cast(5 / 8) - SQR(ONE + di) + - static_cast(32 / 45) * CUBE(ONE + di) - - static_cast(98 / 675) * SQR(SQR(ONE + di)); - S[2] = static_cast(5 / 8) - SQR(di) + - static_cast(32 / 45) * CUBE(di) - - static_cast(98 / 675) * SQR(SQR(di)); + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(HALF - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(HALF + di)); + S[1] = static_cast(5.0 / 8.0) - SQR(ONE + di) + + static_cast(32.0 / 45.0) * CUBE(ONE + di) - + static_cast(98.0 / 675.0) * SQR(SQR(ONE + di)); + S[2] = static_cast(5.0 / 8.0) - SQR(di) + + static_cast(32.0 / 45.0) * CUBE(di) - + static_cast(98.0 / 675.0) * SQR(SQR(di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } else { i_min = i - 1; - S[0] = static_cast(1 / 25) * SQR(SQR(THREE * HALF - di)); - S[4] = static_cast(1 / 25) * SQR(SQR(di - HALF)); - S[1] = static_cast(5 / 8) - SQR(di) + - static_cast(32 / 45) * CUBE(di) - - static_cast(98 / 675) * SQR(SQR(di)); - S[2] = static_cast(5 / 8) - SQR(ONE - di) + - static_cast(32 / 45) * CUBE(ONE - di) - - static_cast(98 / 675) * SQR(SQR(ONE - di)); + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(THREE * HALF - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di - HALF)); + S[1] = static_cast(5.0 / 8.0) - SQR(di) + + static_cast(32.0 / 45.0) * CUBE(di) - + static_cast(98.0 / 675.0) * SQR(SQR(di)); + S[2] = static_cast(5.0 / 8.0) - SQR(ONE - di) + + static_cast(32.0 / 45.0) * CUBE(ONE - di) - + static_cast(98.0 / 675.0) * SQR(SQR(ONE - di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } } else { // compute at i + 1/2 positions i_min = i - 2; - S[0] = static_cast(1 / 25) * SQR(SQR(ONE - di)); - S[4] = static_cast(1 / 25) * SQR(SQR(di)); - S[1] = static_cast(5 / 8) - SQR(HALF + di) + - static_cast(32 / 45) * CUBE(HALF + di) - - static_cast(98 / 675) * SQR(SQR(HALF + di)); - S[2] = static_cast(5 / 8) - SQR(HALF - di) + - static_cast(32 / 45) * CUBE(HALF - di) - - static_cast(98 / 675) * SQR(SQR(HALF - di)); + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(ONE - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di)); + S[1] = static_cast(5.0 / 8.0) - SQR(HALF + di) + + static_cast(32.0 / 45.0) * CUBE(HALF + di) - + static_cast(98.0 / 675.0) * SQR(SQR(HALF + di)); + S[2] = static_cast(5.0 / 8.0) - SQR(HALF - di) + + static_cast(32.0 / 45.0) * CUBE(HALF - di) - + static_cast(98.0 / 675.0) * SQR(SQR(HALF - di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } // staggered } else if constexpr (O == 5u) { @@ -133,146 +133,70 @@ namespace prtl_shape { // 0.0 |x| ≥ 3 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = static_cast(1 / 135) * SQR(CUBE(ONE - di)); // - S[1] = static_cast(3 / 5) - SQR(ONE + di) + - static_cast(5 / 6) * CUBE(ONE + di) - - static_cast(19 / 72) * SQR(SQR(ONE + di)) + - static_cast(13 / 432) * SQR(CUBE(ONE + di)); - S[2] = static_cast(3 / 5) - SQR(di) + - static_cast(5 / 6) * CUBE(di) - - static_cast(19 / 72) * SQR(SQR(di)) + - static_cast(13 / 432) * SQR(CUBE(di)); - S[3] = static_cast(3 / 5) - SQR(ONE - di) + - static_cast(5 / 6) * CUBE(ONE - di) - - static_cast(19 / 72) * SQR(SQR(ONE - di)) + - static_cast(13 / 432) * SQR(CUBE(ONE - di)); - S[5] = static_cast(1 / 135) * SQR(CUBE(di)); + S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(ONE - di)); // + S[1] = static_cast(3.0 / 5.0) - SQR(ONE + di) + + static_cast(5.0 / 6.0) * CUBE(ONE + di) - + static_cast(19.0 / 72.0) * SQR(SQR(ONE + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(ONE + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(di) + + static_cast(5.0 / 6.0) * CUBE(di) - + static_cast(19.0 / 72.0) * SQR(SQR(di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(di)); + S[3] = static_cast(3.0 / 5.0) - SQR(ONE - di) + + static_cast(5.0 / 6.0) * CUBE(ONE - di) - + static_cast(19.0 / 72.0) * SQR(SQR(ONE - di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(ONE - di)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; - S[0] = static_cast(1 / 135) * SQR(CUBE(HALF - di)); - S[1] = static_cast(3 / 5) - + S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF - di)); + S[1] = static_cast(3.0 / 5.0) - SQR(static_cast(3 / 2) + di) + - static_cast(5 / 6) * + static_cast(5.0 / 6.0) * CUBE(static_cast(3 / 2) + di) - - static_cast(19 / 72) * + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(3 / 2) + di)) + - static_cast(13 / 432) * + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(3 / 2) + di)); - S[2] = static_cast(3 / 5) - SQR(HALF + di) + - static_cast(5 / 6) * CUBE(HALF + di) - - static_cast(19 / 72) * SQR(SQR(HALF + di)) + - static_cast(13 / 432) * SQR(CUBE(HALF + di)); - S[3] = static_cast(3 / 5) - SQR(HALF - di) + - static_cast(5 / 6) * CUBE(HALF - di) - - static_cast(19 / 72) * SQR(SQR(HALF - di)) + - static_cast(13 / 432) * SQR(CUBE(HALF - di)); - S[5] = static_cast(1 / 135) * SQR(CUBE(HALF + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(HALF + di) + + static_cast(5.0 / 6.0) * CUBE(HALF + di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); + S[3] = static_cast(3.0 / 5.0) - SQR(HALF - di) + + static_cast(5.0 / 6.0) * CUBE(HALF - di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF - di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF - di)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF + di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } else { i_min = i - 2; - S[0] = static_cast(1 / 135) * + S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(static_cast(3 / 2) - di)); - S[1] = static_cast(3 / 5) - SQR(HALF + di) + - static_cast(5 / 6) * CUBE(HALF + di) - - static_cast(19 / 72) * SQR(SQR(HALF + di)) + - static_cast(13 / 432) * SQR(CUBE(HALF + di)); - S[2] = static_cast(3 / 5) - SQR(di - HALF) + - static_cast(5 / 6) * CUBE(di - HALF) - - static_cast(19 / 72) * SQR(SQR(di - HALF)) + - static_cast(13 / 432) * SQR(CUBE(di - HALF)); - S[3] = static_cast(3 / 5) - + S[1] = static_cast(3.0 / 5.0) - SQR(HALF + di) + + static_cast(5.0 / 6.0) * CUBE(HALF + di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(di - HALF) + + static_cast(5.0 / 6.0) * CUBE(di - HALF) - + static_cast(19.0 / 72.0) * SQR(SQR(di - HALF)) + + static_cast(13.0 / 432.0) * SQR(CUBE(di - HALF)); + S[3] = static_cast(3.0 / 5.0) - SQR(static_cast(3 / 2) - di) + - static_cast(5 / 6) * + static_cast(5.0 / 6.0) * CUBE(static_cast(3 / 2) - di) - - static_cast(19 / 72) * + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(3 / 2) - di)) + - static_cast(13 / 432) * + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(3 / 2) - di)); - S[5] = static_cast(1 / 135) * SQR(CUBE(di - HALF)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di - HALF)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } } // staggered } } - Inline void for_deposit_2nd(const int& i_init, - const real_t& di_init, - const int& i_fin, - const real_t& di_fin, - int& i_min, - real_t& iS_0, - real_t& iS_1, - real_t& iS_2, - real_t& iS_3, - real_t& fS_0, - real_t& fS_1, - real_t& fS_2, - real_t& fS_3) { - - /* - The second order shape function per particle is a 4 element array - where the shape function contributes to only 3 elements. - We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position - - Let * be the particle position at the current timestep - Let x be the particle position at the previous timestep - - - 0 1 2 3 - ____________________________ - | x* | x* | x* | | // i_init_min = i_fin_min - |______|______|______|______| - | x | x* | x* | * | // i_init_min < i_fin_min - |______|______|______|______| - | * | x* | x* | x | // i_init_min > i_fin_min - |______|______|______|______| - */ - - int i_init_min, i_fin_min; - - real_t iS_[3], fS_[3]; - - order(i_init, di_init, i_init_min, iS_); - order(i_fin, di_fin, i_fin_min, fS_); - - if (i_init_min < i_fin_min) { - i_min = i_init_min; - iS_0 = iS_[0]; - iS_1 = iS_[1]; - iS_2 = iS_[2]; - iS_3 = ZERO; - - fS_0 = ZERO; - fS_1 = iS_[0]; - fS_2 = iS_[1]; - fS_3 = iS_[2]; - } else if (i_init_min > i_fin_min) { - i_min = i_fin_min; - iS_0 = ZERO; - iS_1 = iS_[0]; - iS_2 = iS_[1]; - iS_3 = iS_[2]; - - fS_0 = iS_[0]; - fS_1 = iS_[1]; - fS_2 = iS_[2]; - fS_3 = ZERO; - } else { - i_min = i_init_min; - iS_0 = iS_[0]; - iS_1 = iS_[1]; - iS_2 = iS_[2]; - iS_3 = ZERO; - - fS_0 = iS_[0]; - fS_1 = iS_[1]; - fS_2 = iS_[2]; - fS_3 = ZERO; - } - } template Inline void for_deposit(const int& i_init, From dbb6b42d5f192d911c91b9fda6d1321f5cc37dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 17:28:08 -0500 Subject: [PATCH 58/82] bugfix in 3rd order shape function --- src/kernels/particle_shapes.hpp | 143 ++++++++++++++++---------------- 1 file changed, 70 insertions(+), 73 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 0ad83b10..d0e318f1 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -60,72 +60,75 @@ namespace prtl_shape { S[1] = ONE - S[0] - S[2]; } // staggered } else if constexpr (O == 3u) { - // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^2) |x| < 1 + // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^3) |x| < 1 // S(x) = 1/6 * ( 2 - |x|)^3 1 ≤ |x| < 2 // 0.0 |x| ≥ 2 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[1] = static_cast(1.0 / 6.0) * - (FOUR - SIX * SQR(HALF - di) + THREE * CUBE(HALF - di)); + (FOUR - SIX * SQR(HALF + di) + THREE * CUBE(HALF + di)); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * CUBE(HALF + di); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[0] = static_cast(1.0 / 6.0) * CUBE(static_cast(1.5) - di); S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); + S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered } else if constexpr (O == 4u) { - // 1/25 * ( 5/2 - |x|)^4 |x| < 3/2 - // S(x) = 5/8 - |x|^2 + 32/45 * |x|^3 - 98/675 * |x|^4 3/2 ≤ |x| < 5/2 + // 5/8 - |x|^2 + 32/45 * |x|^3 - 98/675 * |x|^4 |x| < 3/2 + // S(x) = 1/25 * ( 5/2 - |x|)^4 3/2 ≤ |x| < 5/2 // 0.0 |x| ≥ 5/2 if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 25.0) * SQR(SQR(HALF - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(HALF + di)); S[1] = static_cast(5.0 / 8.0) - SQR(ONE + di) + static_cast(32.0 / 45.0) * CUBE(ONE + di) - static_cast(98.0 / 675.0) * SQR(SQR(ONE + di)); - S[2] = static_cast(5.0 / 8.0) - SQR(di) + + S[2] = static_cast(5.0 / 8.0) - SQR(di) + static_cast(32.0 / 45.0) * CUBE(di) - static_cast(98.0 / 675.0) * SQR(SQR(di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + S[3] = static_cast(5.0 / 8.0) - SQR(ONE - di) + + static_cast(32.0 / 45.0) * CUBE(ONE - di) - + static_cast(98.0 / 675.0) * SQR(SQR(ONE - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(HALF + di)); + S[2] = ONE - S[0] - S[1] - S[3] - S[4]; } else { i_min = i - 1; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(THREE * HALF - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di - HALF)); + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(static_cast(1.5) - di)); S[1] = static_cast(5.0 / 8.0) - SQR(di) + static_cast(32.0 / 45.0) * CUBE(di) - static_cast(98.0 / 675.0) * SQR(SQR(di)); - S[2] = static_cast(5.0 / 8.0) - SQR(ONE - di) + - static_cast(32.0 / 45.0) * CUBE(ONE - di) - - static_cast(98.0 / 675.0) * SQR(SQR(ONE - di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + S[3] = static_cast(5.0 / 8.0) - SQR(TWO - di) + + static_cast(32.0 / 45.0) * CUBE(TWO - di) - + static_cast(98.0 / 675.0) * SQR(SQR(TWO - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di - HALF)); + S[2] = ONE - S[0] - S[1] - S[3] - S[4]; } } else { // compute at i + 1/2 positions - i_min = i - 2; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(ONE - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di)); - S[1] = static_cast(5.0 / 8.0) - SQR(HALF + di) + - static_cast(32.0 / 45.0) * CUBE(HALF + di) - - static_cast(98.0 / 675.0) * SQR(SQR(HALF + di)); - S[2] = static_cast(5.0 / 8.0) - SQR(HALF - di) + - static_cast(32.0 / 45.0) * CUBE(HALF - di) - - static_cast(98.0 / 675.0) * SQR(SQR(HALF - di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + i_min = i - 2; + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(ONE - di)); + S[1] = static_cast(5.0 / 8.0) - SQR(HALF + di) + + static_cast(32.0 / 45.0) * CUBE(HALF + di) - + static_cast(98.0 / 675.0) * SQR(SQR(HALF + di)); + S[3] = static_cast(5.0 / 8.0) - SQR(TWO - di) + + static_cast(32.0 / 45.0) * CUBE(TWO - di) - + static_cast(98.0 / 675.0) * SQR(SQR(TWO - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di)); + S[2] = ONE - S[0] - S[1] - S[3] - S[4]; } // staggered } else if constexpr (O == 5u) { // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 @@ -133,65 +136,59 @@ namespace prtl_shape { // 0.0 |x| ≥ 3 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(ONE - di)); // + S[0] = static_cast(1.0 / 135.0) * SQR(SQR(ONE + di))*(ONE - di); S[1] = static_cast(3.0 / 5.0) - SQR(ONE + di) + static_cast(5.0 / 6.0) * CUBE(ONE + di) - static_cast(19.0 / 72.0) * SQR(SQR(ONE + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(ONE + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(di) + + static_cast(13.0 / 432.0) * SQR(SQR(ONE + di))*(ONE + di); + S[2] = static_cast(3.0 / 5.0) - SQR(di) + static_cast(5.0 / 6.0) * CUBE(di) - static_cast(19.0 / 72.0) * SQR(SQR(di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(di)); - S[3] = static_cast(3.0 / 5.0) - SQR(ONE - di) + + static_cast(13.0 / 432.0) * SQR(SQR(di)) * di; + S[3] = static_cast(3.0 / 5.0) - SQR(ONE - di) + static_cast(5.0 / 6.0) * CUBE(ONE - di) - static_cast(19.0 / 72.0) * SQR(SQR(ONE - di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(ONE - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + static_cast(13.0 / 432.0) * SQR(SQR(ONE - di))*(ONE - di); + S[4] = static_cast(3.0 / 5.0) - SQR(TWO - di) + + static_cast(5.0 / 6.0) * CUBE(TWO - di) - + static_cast(19.0 / 72.0) * SQR(SQR(TWO - di)) + + static_cast(13.0 / 432.0) * SQR(SQR(TWO - di))*(TWO - di); + S[5] = static_cast(1.0 / 135.0) * SQR(SQR(di))*di; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF - di)); - S[1] = static_cast(3.0 / 5.0) - - SQR(static_cast(3 / 2) + di) + - static_cast(5.0 / 6.0) * - CUBE(static_cast(3 / 2) + di) - - static_cast(19.0 / 72.0) * - SQR(SQR(static_cast(3 / 2) + di)) + - static_cast(13.0 / 432.0) * - SQR(CUBE(static_cast(3 / 2) + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(HALF + di) + - static_cast(5.0 / 6.0) * CUBE(HALF + di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); - S[3] = static_cast(3.0 / 5.0) - SQR(HALF - di) + - static_cast(5.0 / 6.0) * CUBE(HALF - di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF - di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF + di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + S[1] = static_cast(3.0 / 5.0) - SQR(static_cast(1.5) + di) + + static_cast(5.0 / 6.0) * CUBE(static_cast(1.5) + di) - + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(1.5) + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(1.5) + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(HALF + di) + + static_cast(5.0 / 6.0) * CUBE(HALF + di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); + S[4] = static_cast(3.0 / 5.0) - SQR(static_cast(1.5) - di) + + static_cast(5.0 / 6.0) * CUBE(static_cast(1.5) - di) - + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(1.5) - di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(1.5) - di)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF + di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4] - S[5]; } else { i_min = i - 2; - S[0] = static_cast(1.0 / 135.0) * - SQR(CUBE(static_cast(3 / 2) - di)); - S[1] = static_cast(3.0 / 5.0) - SQR(HALF + di) + - static_cast(5.0 / 6.0) * CUBE(HALF + di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(di - HALF) + - static_cast(5.0 / 6.0) * CUBE(di - HALF) - - static_cast(19.0 / 72.0) * SQR(SQR(di - HALF)) + - static_cast(13.0 / 432.0) * SQR(CUBE(di - HALF)); - S[3] = static_cast(3.0 / 5.0) - - SQR(static_cast(3 / 2) - di) + - static_cast(5.0 / 6.0) * - CUBE(static_cast(3 / 2) - di) - - static_cast(19.0 / 72.0) * - SQR(SQR(static_cast(3 / 2) - di)) + - static_cast(13.0 / 432.0) * - SQR(CUBE(static_cast(3 / 2) - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di - HALF)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(static_cast(1.5) - di)); + S[1] = static_cast(3.0 / 5.0) - SQR(HALF + di) + + static_cast(5.0 / 6.0) * CUBE(HALF + di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(di - HALF) + + static_cast(5.0 / 6.0) * CUBE(di - HALF) - + static_cast(19.0 / 72.0) * SQR(SQR(di - HALF)) + + static_cast(13.0 / 432.0) * SQR(CUBE(di - HALF)); + S[4] = static_cast(3.0 / 5.0) - SQR(static_cast(2.5) - di) + + static_cast(5.0 / 6.0) * CUBE(static_cast(2.5) - di) - + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(2.5) - di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(2.5) - di)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di - HALF)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4] - S[5]; } } // staggered } From ba443748baccaf484c36c3fe1ce727399d0179b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 23:01:35 -0500 Subject: [PATCH 59/82] shape function up-to 8th order --- src/engines/srpic.hpp | 6 + src/kernels/currents_deposit.hpp | 2 +- src/kernels/particle_shapes.hpp | 349 ++++++++++++++++++++++++------- 3 files changed, 278 insertions(+), 79 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index a99d33d6..9fc4b7fc 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -555,6 +555,12 @@ namespace ntt { deposit_with<4u>(species, domain.mesh.metric, scatter_cur, dt); } else if (shape_order == 5) { deposit_with<5u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 6) { + deposit_with<6u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 7) { + deposit_with<7u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 8) { + deposit_with<8u>(species, domain.mesh.metric, scatter_cur, dt); } else { raise::Error("Invalid shape order for current deposition", HERE); } diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 040503bc..c0991650 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -401,7 +401,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr ((O >= 1u) and (O <= 5u)) { + } else if constexpr ((O >= 1u) and (O <= 8u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index d0e318f1..2ebf87ac 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,6 +17,172 @@ namespace prtl_shape { + Inline real_t S4(real_t x) + { + if (x < HALF) + { + return static_cast(115.0 / 192.0) - static_cast(5.0 / 8.0) * SQR(x) + + INV_4 * SQR(SQR(x)); + } else if (x < static_cast(1.5)) + { + return static_cast(55.0/96.0) + static_cast(5.0/24.0) * x - + static_cast(5.0 / 4.0) * SQR(x) + static_cast(5.0 / 6.0) * CUBE(x) - + static_cast(1.0 / 6.0) * SQR(SQR(x)); + } else if (x < static_cast(2.5)) + { + return static_cast(625.0/384.0) - static_cast(125.0/48.0) * x + + static_cast(25.0 / 16.0) * SQR(x) - static_cast(5.0 / 12.0) * CUBE(x) + + static_cast(1.0 / 24.0) * SQR(SQR(x)); + } else { + return ZERO; + } + } + + Inline real_t S5(real_t x) + { + if (x <= ONE) + { + return static_cast(11.0 / 20.0) - HALF * SQR(x) + + INV_4 * SQR(SQR(x)) - static_cast(1.0 / 12.0) * CUBE(x) * SQR(x); + } else if (x < TWO) + { + return static_cast(17.0/40.0) + FIVE * INV_8 * x - + static_cast(7.0) * INV_4 * SQR(x) + FIVE * INV_4 * CUBE(x) - + THREE * INV_8 * SQR(SQR(x)) + static_cast(1.0 / 24.0) * CUBE(x) * SQR(x); + } else if (x < THREE) + { + return static_cast(81.0/40.0) - static_cast(27.0/8.0) * x + + static_cast(9.0) * INV_4 * SQR(x) - THREE_FOURTHS * CUBE(x) + + INV_8 * SQR(SQR(x)) - static_cast(1.0 / 120.0) * CUBE(x) * SQR(x); + } else { + return ZERO; + } + } + + Inline real_t S6(real_t x) + { + if (x <= HALF) { + return static_cast(5887.0 / 11520.0) - static_cast(77.0 / 192.0) * SQR(x) + + static_cast(7.0 / 48.0) * SQR(SQR(x)) - static_cast(1.0 / 36.0) * SQR(SQR(x)) * SQR(x); + } else if (x < static_cast(1.5)) { + return static_cast(7861.0/15360.0) - static_cast(7.0/768.0) * x - + static_cast(91.0/256.0) * SQR(x) - static_cast(35.0/288.0) * CUBE(x) + + static_cast(21.0/64.0) * SQR(SQR(x)) - static_cast(7.0 / 48.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 48.0) * SQR(SQR(x)) * SQR(x); + } else if (x < static_cast(2.5)) { + return static_cast(1379.0/7680.0) + + static_cast(1267.0/960.0) * x - + static_cast(329.0/128.0) * SQR(x) + + static_cast(133.0/72.0) * CUBE(x) - + static_cast(21.0/32.0) * SQR(SQR(x)) + + static_cast(7.0 / 60.0) * CUBE(x) * SQR(x) - + static_cast(1.0 / 120.0) * SQR(SQR(x)) * SQR(x); + } else if (x < static_cast(3.5)) { + return static_cast(117649.0/46080.0) - + static_cast(16807.0/3840.0) * x + + static_cast(2401.0/768.0) * SQR(x) - + static_cast(343.0/288.0) * CUBE(x) + + static_cast(49.0/192.0) * SQR(SQR(x)) - + static_cast(7.0 / 240.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 720.0) * SQR(SQR(x)) * SQR(x); + } else { + return ZERO; + } + } + + Inline real_t S7(real_t x) + { + if (x < ONE) { + return static_cast(151.0) / static_cast(315.0) - + THIRD * SQR(x) + + static_cast(1.0) / static_cast(9.0) * SQR(SQR(x)) - + static_cast(1.0) / static_cast(36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0) / static_cast(144.0) * SQR(SQR(x)) * CUBE(x); + } else if (x <= TWO) { + return static_cast(103.0)/static_cast(210.0) - + static_cast(7.0)/static_cast(90.0) * x - + static_cast(1.0)/static_cast(10.0) * SQR(x) - + static_cast(7.0)/static_cast(18.0) * CUBE(x) + + HALF * SQR(SQR(x)) - + static_cast(7.0) / static_cast(30.0) * CUBE(x) * SQR(x) + + static_cast(1.0) / static_cast(20.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0) / static_cast(270.0) * SQR(SQR(x)) * CUBE(x); + } else if (x < THREE) { + return static_cast(217.0)/static_cast(90.0) * x - + static_cast(23.0)/static_cast(6.0) * SQR(x) + + static_cast(49.0)/static_cast(18.0) * CUBE(x) - + static_cast(19.0)/static_cast(18.0) * SQR(SQR(x)) + + static_cast(7.0)/static_cast(30.0) * CUBE(x) * SQR(x) - + static_cast(1.0)/static_cast(36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0)/static_cast(720.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(139.0)/static_cast(630.0); + } else if (x < FOUR) { + return static_cast(1024.0)/static_cast(315.0) - + static_cast(256.0)/static_cast(45.0) * x + + static_cast(64.0)/static_cast(15.0) * SQR(x) - + static_cast(16.0)/static_cast(9.0) * CUBE(x) + + static_cast(4.0)/static_cast(9.0) * SQR(SQR(x)) - + static_cast(1.0)/static_cast(15.0) * CUBE(x) * SQR(x) + + static_cast(1.0)/static_cast(180.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0)/static_cast(5040.0) * SQR(SQR(x)) * CUBE(x); + } else { + return ZERO; + } + } + + Inline real_t S8(real_t x) + { + if (x < HALF) { + return static_cast(259723.0 / 573440.0) - + static_cast(289.0 / 1024.0) * SQR(x) + + static_cast(43.0 / 512.0) * SQR(SQR(x)) - + static_cast(1.0 / 64.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))); + } else if (x <= static_cast(1.5)) { + return static_cast(64929.0/143360.0) + + static_cast(1.0/5120.0) * x - + static_cast(363.0/1280.0) * SQR(x) + + static_cast(7.0/1280.0) * CUBE(x) + + static_cast(9.0/128.0) * SQR(SQR(x)) + + static_cast(7.0 / 320.0) * CUBE(x) * SQR(x) - + static_cast(3.0 / 80.0) * SQR(CUBE(x)) + + static_cast(1.0 / 80.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(1.0 / 720.0) * SQR(SQR(SQR(x))); + } else if (x < static_cast(2.5)) { + return static_cast(145167.0/286720.0) - + static_cast(1457.0/5120.0) * x + + static_cast(195.0/512.0) * SQR(x) - + static_cast(1127.0/1280.0) * CUBE(x) + + static_cast(207.0/256.0) * SQR(SQR(x)) - + static_cast(119.0 / 320.0) * CUBE(x) * SQR(x) + + static_cast(3.0 / 32.0) * SQR(CUBE(x)) - + static_cast(1.0 / 80.0) * SQR(SQR(x)) * CUBE(x) + + static_cast(1.0 / 1440.0) * SQR(SQR(SQR(x))); + } else if (x < static_cast(3.5)) { + return static_cast(146051.0/35840.0) * x - + static_cast(1465.0/256.0) * SQR(x) + + static_cast(5123.0/1280.0) * CUBE(x) - + static_cast(209.0/128.0) * SQR(SQR(x)) + + static_cast(131.0 / 320.0) * CUBE(x) * SQR(x) - + static_cast(1.0 / 16.0) * SQR(CUBE(x)) + + static_cast(3.0 / 560.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(1.0 / 5040.0) * SQR(SQR(SQR(x))) - + static_cast(122729.0/143360.0); + } else if (x < static_cast(4.5)) { + return static_cast(4782969.0/1146880.0) - + static_cast(531441.0/71680.0) * x + + static_cast(59049.0/10240.0) * SQR(x) - + static_cast(6561.0/2560.0) * CUBE(x) + + static_cast(729.0/1024.0) * SQR(SQR(x)) - + static_cast(81.0 / 640.0) * CUBE(x) * SQR(x) + + static_cast(9.0 / 640.0) * SQR(CUBE(x)) - + static_cast(1.0 / 1120.0) * SQR(SQR(x)) * CUBE(x) + + static_cast(1.0 / 40320.0) * SQR(SQR(SQR(x))); + } else { + return ZERO; + } + } + template Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { if constexpr (O == 1u) { @@ -92,43 +258,36 @@ namespace prtl_shape { // S(x) = 1/25 * ( 5/2 - |x|)^4 3/2 ≤ |x| < 5/2 // 0.0 |x| ≥ 5/2 if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { i_min = i - 2; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(HALF - di)); - S[1] = static_cast(5.0 / 8.0) - SQR(ONE + di) + - static_cast(32.0 / 45.0) * CUBE(ONE + di) - - static_cast(98.0 / 675.0) * SQR(SQR(ONE + di)); - S[2] = static_cast(5.0 / 8.0) - SQR(di) + - static_cast(32.0 / 45.0) * CUBE(di) - - static_cast(98.0 / 675.0) * SQR(SQR(di)); - S[3] = static_cast(5.0 / 8.0) - SQR(ONE - di) + - static_cast(32.0 / 45.0) * CUBE(ONE - di) - - static_cast(98.0 / 675.0) * SQR(SQR(ONE - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(HALF + di)); - S[2] = ONE - S[0] - S[1] - S[3] - S[4]; + + for (int n = 0; n < 5; n++) { + S[n] = S4(Kokkos::fabs(TWO + di - static_cast(n))); + } + + Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); + Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); } else { i_min = i - 1; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(static_cast(1.5) - di)); - S[1] = static_cast(5.0 / 8.0) - SQR(di) + - static_cast(32.0 / 45.0) * CUBE(di) - - static_cast(98.0 / 675.0) * SQR(SQR(di)); - S[3] = static_cast(5.0 / 8.0) - SQR(TWO - di) + - static_cast(32.0 / 45.0) * CUBE(TWO - di) - - static_cast(98.0 / 675.0) * SQR(SQR(TWO - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di - HALF)); - S[2] = ONE - S[0] - S[1] - S[3] - S[4]; + + for (int n = 0; n < 5; n++) { + S[n] = S4(Kokkos::fabs(ONE + di - static_cast(n))); + } + + Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); + Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); } } else { // compute at i + 1/2 positions i_min = i - 2; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(ONE - di)); - S[1] = static_cast(5.0 / 8.0) - SQR(HALF + di) + - static_cast(32.0 / 45.0) * CUBE(HALF + di) - - static_cast(98.0 / 675.0) * SQR(SQR(HALF + di)); - S[3] = static_cast(5.0 / 8.0) - SQR(TWO - di) + - static_cast(32.0 / 45.0) * CUBE(TWO - di) - - static_cast(98.0 / 675.0) * SQR(SQR(TWO - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di)); - S[2] = ONE - S[0] - S[1] - S[3] - S[4]; + + for (int n = 0; n < 5; n++) { + S[i] = S4(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + } + + Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); + Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); + } // staggered } else if constexpr (O == 5u) { // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 @@ -136,61 +295,95 @@ namespace prtl_shape { // 0.0 |x| ≥ 3 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = static_cast(1.0 / 135.0) * SQR(SQR(ONE + di))*(ONE - di); - S[1] = static_cast(3.0 / 5.0) - SQR(ONE + di) + - static_cast(5.0 / 6.0) * CUBE(ONE + di) - - static_cast(19.0 / 72.0) * SQR(SQR(ONE + di)) + - static_cast(13.0 / 432.0) * SQR(SQR(ONE + di))*(ONE + di); - S[2] = static_cast(3.0 / 5.0) - SQR(di) + - static_cast(5.0 / 6.0) * CUBE(di) - - static_cast(19.0 / 72.0) * SQR(SQR(di)) + - static_cast(13.0 / 432.0) * SQR(SQR(di)) * di; - S[3] = static_cast(3.0 / 5.0) - SQR(ONE - di) + - static_cast(5.0 / 6.0) * CUBE(ONE - di) - - static_cast(19.0 / 72.0) * SQR(SQR(ONE - di)) + - static_cast(13.0 / 432.0) * SQR(SQR(ONE - di))*(ONE - di); - S[4] = static_cast(3.0 / 5.0) - SQR(TWO - di) + - static_cast(5.0 / 6.0) * CUBE(TWO - di) - - static_cast(19.0 / 72.0) * SQR(SQR(TWO - di)) + - static_cast(13.0 / 432.0) * SQR(SQR(TWO - di))*(TWO - di); - S[5] = static_cast(1.0 / 135.0) * SQR(SQR(di))*di; + + for (int n = 0; n < 6; n++) { + S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); + } + + Kokkos::printf("S: %e %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4], S[5]); + Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4] + S[5]); } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; - S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF - di)); - S[1] = static_cast(3.0 / 5.0) - SQR(static_cast(1.5) + di) + - static_cast(5.0 / 6.0) * CUBE(static_cast(1.5) + di) - - static_cast(19.0 / 72.0) * SQR(SQR(static_cast(1.5) + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(1.5) + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(HALF + di) + - static_cast(5.0 / 6.0) * CUBE(HALF + di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); - S[4] = static_cast(3.0 / 5.0) - SQR(static_cast(1.5) - di) + - static_cast(5.0 / 6.0) * CUBE(static_cast(1.5) - di) - - static_cast(19.0 / 72.0) * SQR(SQR(static_cast(1.5) - di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(1.5) - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF + di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4] - S[5]; + + for (int n = 0; n < 6; n++) { + S[n] = S5(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + } + } else { + i_min = i - 2; + for (int n = 0; n < 6; n++) { + S[n] = S5(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + } + } + } // staggered + } else if constexpr (O == 6u) { + if constexpr (not STAGGERED) { // compute at i positions + + if (di < HALF) { + i_min = i - 3; + + for (int n = 0; n < 7; n++) { + S[n] = S6(Kokkos::fabs(THREE + di - static_cast(n))); + } } else { i_min = i - 2; - S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(static_cast(1.5) - di)); - S[1] = static_cast(3.0 / 5.0) - SQR(HALF + di) + - static_cast(5.0 / 6.0) * CUBE(HALF + di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(di - HALF) + - static_cast(5.0 / 6.0) * CUBE(di - HALF) - - static_cast(19.0 / 72.0) * SQR(SQR(di - HALF)) + - static_cast(13.0 / 432.0) * SQR(CUBE(di - HALF)); - S[4] = static_cast(3.0 / 5.0) - SQR(static_cast(2.5) - di) + - static_cast(5.0 / 6.0) * CUBE(static_cast(2.5) - di) - - static_cast(19.0 / 72.0) * SQR(SQR(static_cast(2.5) - di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(2.5) - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di - HALF)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4] - S[5]; + + for (int n = 0; n < 5; n++) { + S[i] = S6(Kokkos::fabs(TWO + di - static_cast(n))); + } } + } else { // compute at i + 1/2 positions + i_min = i - 3; + + for (int n = 0; n < 5; n++) { + S[n] = S6(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + } } // staggered + } else if constexpr (O == 7u) { + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 3; + + for (int n = 0; n < 8; n++) { + S[n] = S7(Kokkos::fabs(THREE + di - static_cast(n))); + } + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 4; + + for (int n = 0; n < 8; n++) { + S[n] = S7(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + } + } else { + i_min = i - 2; + for (int n = 0; n < 8; n++) { + S[n] = S7(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + } + } + } // staggered + } else if constexpr (O == 8u) { + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 4; + + for (int n = 0; n < 9; n++) { + S[n] = S8(Kokkos::fabs(FOUR + di - static_cast(n))); + } + } else { + i_min = i - 3; + + for (int n = 0; n < 9; n++) { + S[n] = S8(Kokkos::fabs(THREE + di - static_cast(n))); + } + } + } else { // compute at i + 1/2 positions + i_min = i - 4; + + for (int n = 0; n < 9; n++) { + S[n] = S8(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + } + } // staggered + } else { + //ERROR("Interpolation order not implemented"); } } From 13a85226e3717dbd2c618b056df262276e8e7d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 23:08:12 -0500 Subject: [PATCH 60/82] bugfix, loop unroll pragma and error handling fix --- src/kernels/particle_shapes.hpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 2ebf87ac..8b6e6111 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -262,6 +262,7 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 2; + #pragma unroll for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(TWO + di - static_cast(n))); } @@ -271,6 +272,7 @@ namespace prtl_shape { } else { i_min = i - 1; + #pragma unroll for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(ONE + di - static_cast(n))); } @@ -281,6 +283,7 @@ namespace prtl_shape { } else { // compute at i + 1/2 positions i_min = i - 2; + #pragma unroll for (int n = 0; n < 5; n++) { S[i] = S4(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); } @@ -296,6 +299,7 @@ namespace prtl_shape { if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; + #pragma unroll for (int n = 0; n < 6; n++) { S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); } @@ -306,11 +310,14 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 3; + #pragma unroll for (int n = 0; n < 6; n++) { S[n] = S5(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); } } else { i_min = i - 2; + + #pragma unroll for (int n = 0; n < 6; n++) { S[n] = S5(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); } @@ -322,12 +329,14 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 3; + #pragma unroll for (int n = 0; n < 7; n++) { S[n] = S6(Kokkos::fabs(THREE + di - static_cast(n))); } } else { i_min = i - 2; + #pragma unroll for (int n = 0; n < 5; n++) { S[i] = S6(Kokkos::fabs(TWO + di - static_cast(n))); } @@ -335,6 +344,7 @@ namespace prtl_shape { } else { // compute at i + 1/2 positions i_min = i - 3; + #pragma unroll for (int n = 0; n < 5; n++) { S[n] = S6(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); } @@ -343,6 +353,7 @@ namespace prtl_shape { if constexpr (not STAGGERED) { // compute at i positions i_min = i - 3; + #pragma unroll for (int n = 0; n < 8; n++) { S[n] = S7(Kokkos::fabs(THREE + di - static_cast(n))); } @@ -354,7 +365,9 @@ namespace prtl_shape { S[n] = S7(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); } } else { - i_min = i - 2; + i_min = i - 3; + + #pragma unroll for (int n = 0; n < 8; n++) { S[n] = S7(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); } @@ -365,12 +378,14 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 4; + #pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(FOUR + di - static_cast(n))); } } else { i_min = i - 3; + #pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(THREE + di - static_cast(n))); } @@ -378,12 +393,13 @@ namespace prtl_shape { } else { // compute at i + 1/2 positions i_min = i - 4; + #pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); } } // staggered } else { - //ERROR("Interpolation order not implemented"); + raise::KernelError(HERE, "Unsupported interpolation order"); } } From af001b7f00058a03047cf813df03b59e309ebf68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 23:15:01 -0500 Subject: [PATCH 61/82] remove print --- src/kernels/particle_shapes.hpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 8b6e6111..0ad3d0fd 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -266,9 +266,6 @@ namespace prtl_shape { for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(TWO + di - static_cast(n))); } - - Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); - Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); } else { i_min = i - 1; @@ -276,9 +273,6 @@ namespace prtl_shape { for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(ONE + di - static_cast(n))); } - - Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); - Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); } } else { // compute at i + 1/2 positions i_min = i - 2; @@ -287,10 +281,6 @@ namespace prtl_shape { for (int n = 0; n < 5; n++) { S[i] = S4(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); } - - Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); - Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); - } // staggered } else if constexpr (O == 5u) { // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 @@ -303,9 +293,6 @@ namespace prtl_shape { for (int n = 0; n < 6; n++) { S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); } - - Kokkos::printf("S: %e %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4], S[5]); - Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4] + S[5]); } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; From 9b3bcd5d4384ee14a104a5b722a81d6557edd35f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 23:28:30 -0500 Subject: [PATCH 62/82] bugfixes --- src/kernels/particle_shapes.hpp | 39 +++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 0ad3d0fd..46300cf0 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,7 +17,7 @@ namespace prtl_shape { - Inline real_t S4(real_t x) + Inline real_t S4(const real_t x) { if (x < HALF) { @@ -38,7 +38,7 @@ namespace prtl_shape { } } - Inline real_t S5(real_t x) + Inline real_t S5(const real_t x) { if (x <= ONE) { @@ -59,16 +59,21 @@ namespace prtl_shape { } } - Inline real_t S6(real_t x) + Inline real_t S6(const real_t x) { if (x <= HALF) { - return static_cast(5887.0 / 11520.0) - static_cast(77.0 / 192.0) * SQR(x) + - static_cast(7.0 / 48.0) * SQR(SQR(x)) - static_cast(1.0 / 36.0) * SQR(SQR(x)) * SQR(x); + return static_cast(5887.0 / 11520.0) - + static_cast(77.0 / 192.0) * SQR(x) + + static_cast(7.0 / 48.0) * SQR(SQR(x)) - + static_cast(1.0 / 36.0) * SQR(CUBE(x)); } else if (x < static_cast(1.5)) { - return static_cast(7861.0/15360.0) - static_cast(7.0/768.0) * x - - static_cast(91.0/256.0) * SQR(x) - static_cast(35.0/288.0) * CUBE(x) + - static_cast(21.0/64.0) * SQR(SQR(x)) - static_cast(7.0 / 48.0) * CUBE(x) * SQR(x) + - static_cast(1.0 / 48.0) * SQR(SQR(x)) * SQR(x); + return static_cast(7861.0/15360.0) - + static_cast(7.0/768.0) * x - + static_cast(91.0/256.0) * SQR(x) - + static_cast(35.0/288.0) * CUBE(x) + + static_cast(21.0/64.0) * SQR(SQR(x)) - + static_cast(7.0 / 48.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 48.0) * SQR(CUBE(x)); } else if (x < static_cast(2.5)) { return static_cast(1379.0/7680.0) + static_cast(1267.0/960.0) * x - @@ -76,7 +81,7 @@ namespace prtl_shape { static_cast(133.0/72.0) * CUBE(x) - static_cast(21.0/32.0) * SQR(SQR(x)) + static_cast(7.0 / 60.0) * CUBE(x) * SQR(x) - - static_cast(1.0 / 120.0) * SQR(SQR(x)) * SQR(x); + static_cast(1.0 / 120.0) * SQR(CUBE(x)); } else if (x < static_cast(3.5)) { return static_cast(117649.0/46080.0) - static_cast(16807.0/3840.0) * x + @@ -84,13 +89,13 @@ namespace prtl_shape { static_cast(343.0/288.0) * CUBE(x) + static_cast(49.0/192.0) * SQR(SQR(x)) - static_cast(7.0 / 240.0) * CUBE(x) * SQR(x) + - static_cast(1.0 / 720.0) * SQR(SQR(x)) * SQR(x); + static_cast(1.0 / 720.0) * SQR(CUBE(x)); } else { return ZERO; } } - Inline real_t S7(real_t x) + Inline real_t S7(const real_t x) { if (x < ONE) { return static_cast(151.0) / static_cast(315.0) - @@ -106,7 +111,7 @@ namespace prtl_shape { HALF * SQR(SQR(x)) - static_cast(7.0) / static_cast(30.0) * CUBE(x) * SQR(x) + static_cast(1.0) / static_cast(20.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0) / static_cast(270.0) * SQR(SQR(x)) * CUBE(x); + static_cast(1.0) / static_cast(240.0) * SQR(SQR(x)) * CUBE(x); } else if (x < THREE) { return static_cast(217.0)/static_cast(90.0) * x - static_cast(23.0)/static_cast(6.0) * SQR(x) + @@ -130,7 +135,7 @@ namespace prtl_shape { } } - Inline real_t S8(real_t x) + Inline real_t S8(const real_t x) { if (x < HALF) { return static_cast(259723.0 / 573440.0) - @@ -324,15 +329,15 @@ namespace prtl_shape { i_min = i - 2; #pragma unroll - for (int n = 0; n < 5; n++) { - S[i] = S6(Kokkos::fabs(TWO + di - static_cast(n))); + for (int n = 0; n < 7; n++) { + S[n] = S6(Kokkos::fabs(TWO + di - static_cast(n))); } } } else { // compute at i + 1/2 positions i_min = i - 3; #pragma unroll - for (int n = 0; n < 5; n++) { + for (int n = 0; n < 7; n++) { S[n] = S6(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); } } // staggered From 7741cc59379cbc48ed470986842b5ebbe04f443f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 11 Sep 2025 01:40:31 -0500 Subject: [PATCH 63/82] 9th order shape function --- src/engines/srpic.hpp | 2 + src/kernels/currents_deposit.hpp | 2 +- src/kernels/particle_shapes.hpp | 136 +++++++++++++++++++++++++------ 3 files changed, 112 insertions(+), 28 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 9fc4b7fc..c0ba54c3 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -561,6 +561,8 @@ namespace ntt { deposit_with<7u>(species, domain.mesh.metric, scatter_cur, dt); } else if (shape_order == 8) { deposit_with<8u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 9) { + deposit_with<9u>(species, domain.mesh.metric, scatter_cur, dt); } else { raise::Error("Invalid shape order for current deposition", HERE); } diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index c0991650..f8c8607a 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -401,7 +401,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr ((O >= 1u) and (O <= 8u)) { + } else if constexpr ((O >= 1u) and (O <= 9u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 46300cf0..12f126a9 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -98,38 +98,38 @@ namespace prtl_shape { Inline real_t S7(const real_t x) { if (x < ONE) { - return static_cast(151.0) / static_cast(315.0) - + return static_cast(151.0/315.0) - THIRD * SQR(x) + - static_cast(1.0) / static_cast(9.0) * SQR(SQR(x)) - - static_cast(1.0) / static_cast(36.0) * SQR(SQR(x)) * SQR(x) + - static_cast(1.0) / static_cast(144.0) * SQR(SQR(x)) * CUBE(x); + static_cast(1.0/9.0) * SQR(SQR(x)) - + static_cast(1.0/36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0/144.0) * SQR(SQR(x)) * CUBE(x); } else if (x <= TWO) { - return static_cast(103.0)/static_cast(210.0) - - static_cast(7.0)/static_cast(90.0) * x - - static_cast(1.0)/static_cast(10.0) * SQR(x) - - static_cast(7.0)/static_cast(18.0) * CUBE(x) + + return static_cast(103.0/210.0) - + static_cast(7.0/90.0) * x - + static_cast(1.0/10.0) * SQR(x) - + static_cast(7.0/18.0) * CUBE(x) + HALF * SQR(SQR(x)) - - static_cast(7.0) / static_cast(30.0) * CUBE(x) * SQR(x) + - static_cast(1.0) / static_cast(20.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0) / static_cast(240.0) * SQR(SQR(x)) * CUBE(x); + static_cast(7.0/30.0) * CUBE(x) * SQR(x) + + static_cast(1.0/20.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0/240.0) * SQR(SQR(x)) * CUBE(x); } else if (x < THREE) { - return static_cast(217.0)/static_cast(90.0) * x - - static_cast(23.0)/static_cast(6.0) * SQR(x) + - static_cast(49.0)/static_cast(18.0) * CUBE(x) - - static_cast(19.0)/static_cast(18.0) * SQR(SQR(x)) + - static_cast(7.0)/static_cast(30.0) * CUBE(x) * SQR(x) - - static_cast(1.0)/static_cast(36.0) * SQR(SQR(x)) * SQR(x) + - static_cast(1.0)/static_cast(720.0) * SQR(SQR(x)) * CUBE(x) - - static_cast(139.0)/static_cast(630.0); + return static_cast(217.0/90.0) * x - + static_cast(23.0/6.0) * SQR(x) + + static_cast(49.0/18.0) * CUBE(x) - + static_cast(19.0/18.0) * SQR(SQR(x)) + + static_cast(7.0/30.0) * CUBE(x) * SQR(x) - + static_cast(1.0/36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0/720.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(139.0/630.0); } else if (x < FOUR) { - return static_cast(1024.0)/static_cast(315.0) - - static_cast(256.0)/static_cast(45.0) * x + - static_cast(64.0)/static_cast(15.0) * SQR(x) - - static_cast(16.0)/static_cast(9.0) * CUBE(x) + - static_cast(4.0)/static_cast(9.0) * SQR(SQR(x)) - - static_cast(1.0)/static_cast(15.0) * CUBE(x) * SQR(x) + - static_cast(1.0)/static_cast(180.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0)/static_cast(5040.0) * SQR(SQR(x)) * CUBE(x); + return static_cast(1024.0/315.0) - + static_cast(256.0/45.0) * x + + static_cast(64.0/15.0) * SQR(x) - + static_cast(16.0/9.0) * CUBE(x) + + static_cast(4.0/9.0) * SQR(SQR(x)) - + static_cast(1.0/15.0) * CUBE(x) * SQR(x) + + static_cast(1.0/180.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0/5040.0) * SQR(SQR(x)) * CUBE(x); } else { return ZERO; } @@ -188,6 +188,64 @@ namespace prtl_shape { } } + Inline real_t S9(const real_t x) + { + if (x <= ONE) { + return static_cast(15619.0 / 36288.0) - + static_cast(35.0 / 144.0) * SQR(x) + + static_cast(19.0 / 288.0) * SQR(SQR(x)) - + static_cast(5.0 / 432.0) * SQR(CUBE(x)) + + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 2880.0) * SQR(SQR(SQR(x))) * x; + } else if (x < TWO) { + return static_cast(7799.0/18144.0) + + static_cast(1.0/192.0) * x - + static_cast(19.0/72.0) * SQR(x) + + static_cast(7.0/144.0) * CUBE(x) - + static_cast(1.0/144.0) * SQR(SQR(x)) + + static_cast(7.0 / 96.0) * CUBE(x) * SQR(x) - + static_cast(13.0 / 216.0) * SQR(CUBE(x)) + + static_cast(1.0 / 48.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(1.0 / 288.0) * SQR(SQR(SQR(x))) + + static_cast(1.0 / 4320.0) * CUBE(CUBE(x)); + } else if (x <= THREE) { + return static_cast(1553.0/2592.0) - + static_cast(339.0/448.0) * x + + static_cast(635.0/504.0) * SQR(x) - + static_cast(83.0/48.0) * CUBE(x) + + static_cast(191.0/144.0) * SQR(SQR(x)) - + static_cast(19.0 / 32.0) * CUBE(x) * SQR(x) + + static_cast(35.0 / 216.0) * SQR(CUBE(x)) - + static_cast(3.0 / 112.0) * SQR(SQR(x)) * CUBE(x) + + static_cast(5.0 / 2016.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 10080.0) * CUBE(CUBE(x)); + } else if (x < FOUR) { + return static_cast(5883.0/896.0) * x - + static_cast(2449.0/288.0) * SQR(x) + + static_cast(563.0/96.0) * CUBE(x) - + static_cast(1423.0/576.0) * SQR(SQR(x)) + + static_cast(43.0/64.0) * CUBE(x) * SQR(x) - + static_cast(103.0/864.0) * SQR(CUBE(x)) + + static_cast(3.0 / 224.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(1.0 / 1152.0) * SQR(SQR(SQR(x))) + + static_cast(1.0 / 40320.0) * CUBE(CUBE(x)) - + static_cast(133663.0/72576.0); + } else if (x < FIVE) { + return static_cast(390625.0/72576.0) - + static_cast(78125.0/8064.0) * x + + static_cast(15625.0/2016.0) * SQR(x) - + static_cast(3125.0/864.0) * CUBE(x) + + static_cast(625.0/576.0) * SQR(SQR(x)) - + static_cast(125.0 / 576.0) * CUBE(x) * SQR(x) + + static_cast(25.0 / 864.0) * SQR(CUBE(x)) - + static_cast(5.0 / 2016.0) * SQR(SQR(x)) * CUBE(x) + + static_cast(1.0 / 8064.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 362880.0) * CUBE(CUBE(x)); + } else { + return ZERO; + } + } + template Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { if constexpr (O == 1u) { @@ -390,6 +448,30 @@ namespace prtl_shape { S[n] = S8(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); } } // staggered + } else if constexpr (O == 9u) { + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 4; + + #pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S9(Kokkos::fabs(FOUR + di - static_cast(n))); + } + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 5; + + for (int n = 0; n < 10; n++) { + S[n] = S9(Kokkos::fabs(static_cast(4.5) + di - static_cast(n))); + } + } else { + i_min = i - 4; + + #pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S9(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + } + } + } // staggered } else { raise::KernelError(HERE, "Unsupported interpolation order"); } From e0d39ee0a579a92f845707bd3f37ae7095e7a0a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 11 Sep 2025 10:55:49 -0700 Subject: [PATCH 64/82] applied formatting, improved comments and error handling --- src/kernels/currents_deposit.hpp | 18 +- src/kernels/particle_shapes.hpp | 494 +++++++++++++++++++------------ 2 files changed, 316 insertions(+), 196 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index f8c8607a..1299be69 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -449,12 +449,12 @@ namespace kernel { #pragma unroll for (int j = 0; j < O + 2; ++j) { // Esirkepov 2001, Eq. 38 (simplified) - Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); + Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); + Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + - iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); + Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + + iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); } } @@ -701,12 +701,12 @@ namespace kernel { } } - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); - } + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); } - }; - } // namespace kernel + } + }; +} // namespace kernel #undef i_di_to_Xi diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 12f126a9..0109ba94 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,77 +17,101 @@ namespace prtl_shape { - Inline real_t S4(const real_t x) - { - if (x < HALF) - { - return static_cast(115.0 / 192.0) - static_cast(5.0 / 8.0) * SQR(x) + - INV_4 * SQR(SQR(x)); - } else if (x < static_cast(1.5)) - { - return static_cast(55.0/96.0) + static_cast(5.0/24.0) * x - - static_cast(5.0 / 4.0) * SQR(x) + static_cast(5.0 / 6.0) * CUBE(x) - + // clang-format off + // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 + // S(x) = 55/96 + (5/24) * |x| - (5/4) * |x|^2 + (5/6) * |x|^3 - (1/6) * |x|^4 1/2 ≤ |x| < 3/2 + // 625/384 - (125/48) * |x| + (25/16) * |x|^2 - (5/12) * |x|^3 + (1/24) * |x|^4 3/2 ≤ |x| < 5/2 + // 0.0 |x| ≥ 5/2 + // clang-format on + Inline real_t S4(const real_t x) { + if (x < HALF) { + return static_cast(115.0 / 192.0) - + static_cast(5.0 / 8.0) * SQR(x) + INV_4 * SQR(SQR(x)); + } else if (x < static_cast(1.5)) { + return static_cast(55.0 / 96.0) + + static_cast(5.0 / 24.0) * x - + static_cast(5.0 / 4.0) * SQR(x) + + static_cast(5.0 / 6.0) * CUBE(x) - static_cast(1.0 / 6.0) * SQR(SQR(x)); - } else if (x < static_cast(2.5)) - { - return static_cast(625.0/384.0) - static_cast(125.0/48.0) * x + - static_cast(25.0 / 16.0) * SQR(x) - static_cast(5.0 / 12.0) * CUBE(x) + + } else if (x < static_cast(2.5)) { + return static_cast(625.0 / 384.0) - + static_cast(125.0 / 48.0) * x + + static_cast(25.0 / 16.0) * SQR(x) - + static_cast(5.0 / 12.0) * CUBE(x) + static_cast(1.0 / 24.0) * SQR(SQR(x)); } else { return ZERO; } } - Inline real_t S5(const real_t x) - { - if (x <= ONE) - { + // clang-format off + // S5(x) = + // 11/20 - (1/2) * |x|^2 + (1/4) * |x|^4 - (1/12) * |x|^5 if |x| ≤ 1 + // 17/40 + (5/8) * |x| - (7/4) * |x|^2 + (5/4) * |x|^3 - (3/8) * |x|^4 + (1/24) * |x|^5 if 1 < |x| < 2 + // 81/40 - (27/8) * |x| + (9/4) * |x|^2 - (3/4) * |x|^3 + (1/8) * |x|^4 - (1/120) * |x|^5 if 2 ≤ |x| < 3 + // 0.0 if |x| > 3 + // clang-format on + Inline real_t S5(const real_t x) { + if (x <= ONE) { return static_cast(11.0 / 20.0) - HALF * SQR(x) + - INV_4 * SQR(SQR(x)) - static_cast(1.0 / 12.0) * CUBE(x) * SQR(x); - } else if (x < TWO) - { - return static_cast(17.0/40.0) + FIVE * INV_8 * x - - static_cast(7.0) * INV_4 * SQR(x) + FIVE * INV_4 * CUBE(x) - - THREE * INV_8 * SQR(SQR(x)) + static_cast(1.0 / 24.0) * CUBE(x) * SQR(x); - } else if (x < THREE) - { - return static_cast(81.0/40.0) - static_cast(27.0/8.0) * x + - static_cast(9.0) * INV_4 * SQR(x) - THREE_FOURTHS * CUBE(x) + - INV_8 * SQR(SQR(x)) - static_cast(1.0 / 120.0) * CUBE(x) * SQR(x); + INV_4 * SQR(SQR(x)) - + static_cast(1.0 / 12.0) * CUBE(x) * SQR(x); + } else if (x < TWO) { + return static_cast(17.0 / 40.0) + static_cast(5.0 / 8.0) * x - + static_cast(7.0 / 4.0) * SQR(x) + + static_cast(5.0 / 4.0) * CUBE(x) - + static_cast(3.0 / 8.0) * SQR(SQR(x)) + + static_cast(1.0 / 24.0) * CUBE(x) * SQR(x); + } else if (x < THREE) { + return static_cast(81.0 / 40.0) - + static_cast(27.0 / 8.0) * x + + static_cast(9.0 / 4.0) * SQR(x) - THREE_FOURTHS * CUBE(x) + + INV_8 * SQR(SQR(x)) - + static_cast(1.0 / 120.0) * CUBE(x) * SQR(x); } else { return ZERO; } } - Inline real_t S6(const real_t x) - { + // clang-format off + // S6(x) = + // 5887/11520 - (77/192) * |x|^2 + (7/48) * |x|^4 - (1/36) * |x|^6 if |x| ≤ 1/2 + // 7861/15360 - (7/768) * |x| - (91/256) * |x|^2 - (35/288) * |x|^3 + (21/64) * |x|^4 + // - (7/48) * |x|^5 + (1/48) * |x|^6 if 1/2 < |x| < 3/2 + // 1379/7680 + (1267/960) * |x| - (329/128) * |x|^2 + (133/72) * |x|^3 + // - (21/32) * |x|^4 + (7/60) * |x|^5 - (1/120) * |x|^6 if 3/2 ≤ |x| < 5/2 + // 117649/46080 - (16807/3840) * |x| + (2401/768) * |x|^2 - (343/288) * |x|^3 + // + (49/192) * |x|^4 - (7/240) * |x|^5 + (1/720) * |x|^6 if 5/2 ≤ |x| < 7/2 + // 0.0 if |x| ≥ 7/2 + // clang-format on + Inline real_t S6(const real_t x) { if (x <= HALF) { - return static_cast(5887.0 / 11520.0) - + return static_cast(5887.0 / 11520.0) - static_cast(77.0 / 192.0) * SQR(x) + - static_cast(7.0 / 48.0) * SQR(SQR(x)) - + static_cast(7.0 / 48.0) * SQR(SQR(x)) - static_cast(1.0 / 36.0) * SQR(CUBE(x)); } else if (x < static_cast(1.5)) { - return static_cast(7861.0/15360.0) - - static_cast(7.0/768.0) * x - - static_cast(91.0/256.0) * SQR(x) - - static_cast(35.0/288.0) * CUBE(x) + - static_cast(21.0/64.0) * SQR(SQR(x)) - + return static_cast(7861.0 / 15360.0) - + static_cast(7.0 / 768.0) * x - + static_cast(91.0 / 256.0) * SQR(x) - + static_cast(35.0 / 288.0) * CUBE(x) + + static_cast(21.0 / 64.0) * SQR(SQR(x)) - static_cast(7.0 / 48.0) * CUBE(x) * SQR(x) + static_cast(1.0 / 48.0) * SQR(CUBE(x)); } else if (x < static_cast(2.5)) { - return static_cast(1379.0/7680.0) + - static_cast(1267.0/960.0) * x - - static_cast(329.0/128.0) * SQR(x) + - static_cast(133.0/72.0) * CUBE(x) - - static_cast(21.0/32.0) * SQR(SQR(x)) + + return static_cast(1379.0 / 7680.0) + + static_cast(1267.0 / 960.0) * x - + static_cast(329.0 / 128.0) * SQR(x) + + static_cast(133.0 / 72.0) * CUBE(x) - + static_cast(21.0 / 32.0) * SQR(SQR(x)) + static_cast(7.0 / 60.0) * CUBE(x) * SQR(x) - static_cast(1.0 / 120.0) * SQR(CUBE(x)); } else if (x < static_cast(3.5)) { - return static_cast(117649.0/46080.0) - - static_cast(16807.0/3840.0) * x + - static_cast(2401.0/768.0) * SQR(x) - - static_cast(343.0/288.0) * CUBE(x) + - static_cast(49.0/192.0) * SQR(SQR(x)) - + return static_cast(117649.0 / 46080.0) - + static_cast(16807.0 / 3840.0) * x + + static_cast(2401.0 / 768.0) * SQR(x) - + static_cast(343.0 / 288.0) * CUBE(x) + + static_cast(49.0 / 192.0) * SQR(SQR(x)) - static_cast(7.0 / 240.0) * CUBE(x) * SQR(x) + static_cast(1.0 / 720.0) * SQR(CUBE(x)); } else { @@ -95,90 +119,110 @@ namespace prtl_shape { } } - Inline real_t S7(const real_t x) - { + // clang-format off + // S7(x) = + // 151/315 - (1/3) * |x|^2 + (1/9) * |x|^4 - (1/36) * |x|^6 + (1/144) * |x|^7 if |x| < 1 + // 103/210 - (7/90) * |x| - (1/10) * |x|^2 - (7/18) * |x|^3 + (1/2) * |x|^4 + // - (7/30) * |x|^5 + (1/20) * |x|^6 - (1/240) * |x|^7 if 1 ≤ |x| ≤ 2 + // (217/90) * |x| - (23/6) * |x|^2 + (49/18) * |x|^3 - (19/18) * |x|^4 + // + (7/30) * |x|^5 - (1/36) * |x|^6 + (1/720) * |x|^7 - (139/630) if 2 < |x| < 3 + // 1024/315 - (256/45) * |x| + (64/15) * |x|^2 - (16/9) * |x|^3 + (4/9) * |x|^4 + // - (1/15) * |x|^5 + (1/180) * |x|^6 - (1/5040) * |x|^7 if 3 ≤ |x| < 4 + // 0.0 if |x| ≥ 4 + // clang-format on + Inline real_t S7(const real_t x) { if (x < ONE) { - return static_cast(151.0/315.0) - - THIRD * SQR(x) + - static_cast(1.0/9.0) * SQR(SQR(x)) - - static_cast(1.0/36.0) * SQR(SQR(x)) * SQR(x) + - static_cast(1.0/144.0) * SQR(SQR(x)) * CUBE(x); + return static_cast(151.0 / 315.0) - THIRD * SQR(x) + + static_cast(1.0 / 9.0) * SQR(SQR(x)) - + static_cast(1.0 / 36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0 / 144.0) * SQR(SQR(x)) * CUBE(x); } else if (x <= TWO) { - return static_cast(103.0/210.0) - - static_cast(7.0/90.0) * x - - static_cast(1.0/10.0) * SQR(x) - - static_cast(7.0/18.0) * CUBE(x) + - HALF * SQR(SQR(x)) - - static_cast(7.0/30.0) * CUBE(x) * SQR(x) + - static_cast(1.0/20.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0/240.0) * SQR(SQR(x)) * CUBE(x); + return static_cast(103.0 / 210.0) - + static_cast(7.0 / 90.0) * x - + static_cast(1.0 / 10.0) * SQR(x) - + static_cast(7.0 / 18.0) * CUBE(x) + HALF * SQR(SQR(x)) - + static_cast(7.0 / 30.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 20.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0 / 240.0) * SQR(SQR(x)) * CUBE(x); } else if (x < THREE) { - return static_cast(217.0/90.0) * x - - static_cast(23.0/6.0) * SQR(x) + - static_cast(49.0/18.0) * CUBE(x) - - static_cast(19.0/18.0) * SQR(SQR(x)) + - static_cast(7.0/30.0) * CUBE(x) * SQR(x) - - static_cast(1.0/36.0) * SQR(SQR(x)) * SQR(x) + - static_cast(1.0/720.0) * SQR(SQR(x)) * CUBE(x) - - static_cast(139.0/630.0); + return static_cast(217.0 / 90.0) * x - + static_cast(23.0 / 6.0) * SQR(x) + + static_cast(49.0 / 18.0) * CUBE(x) - + static_cast(19.0 / 18.0) * SQR(SQR(x)) + + static_cast(7.0 / 30.0) * CUBE(x) * SQR(x) - + static_cast(1.0 / 36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0 / 720.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(139.0 / 630.0); } else if (x < FOUR) { - return static_cast(1024.0/315.0) - - static_cast(256.0/45.0) * x + - static_cast(64.0/15.0) * SQR(x) - - static_cast(16.0/9.0) * CUBE(x) + - static_cast(4.0/9.0) * SQR(SQR(x)) - - static_cast(1.0/15.0) * CUBE(x) * SQR(x) + - static_cast(1.0/180.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0/5040.0) * SQR(SQR(x)) * CUBE(x); + return static_cast(1024.0 / 315.0) - + static_cast(256.0 / 45.0) * x + + static_cast(64.0 / 15.0) * SQR(x) - + static_cast(16.0 / 9.0) * CUBE(x) + + static_cast(4.0 / 9.0) * SQR(SQR(x)) - + static_cast(1.0 / 15.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 180.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0 / 5040.0) * SQR(SQR(x)) * CUBE(x); } else { return ZERO; } } - Inline real_t S8(const real_t x) - { + // clang-format off + // S8(x) = + // 259723/573440 - (289/1024) * |x|^2 + (43/512) * |x|^4 - (1/64) * |x|^6 + (1/576) * |x|^8 if |x| < 1/2 + // 64929/143360 + (1/5120) * |x| - (363/1280) * |x|^2 + (7/1280) * |x|^3 + (9/128) * |x|^4 + // + (7/320) * |x|^5 - (3/80) * |x|^6 + (1/80) * |x|^7 - (1/720) * |x|^8 if 1/2 ≤ |x| ≤ 3/2 + // 145167/286720 - (1457/5120) * |x| + (195/512) * |x|^2 - (1127/1280) * |x|^3 + (207/256) * |x|^4 + // - (119/320) * |x|^5 + (3/32) * |x|^6 - (1/80) * |x|^7 + (1/1440) * |x|^8 if 3/2 < |x| < 2.5 + // (146051/35840) * |x| - (1465/256) * |x|^2 + (5123/1280) * |x|^3 - (209/128) * |x|^4 + // + (131/320) * |x|^5 - (1/16) * |x|^6 + (3/560) * |x|^7 - (1/5040) * |x|^8 - (122729/143360) if 2.5 ≤ |x| < 3.5 + // 4782969/1146880 - (531441/71680) * |x| + (59049/10240) * |x|^2 - (6561/2560) * |x|^3 + (729/1024) * |x|^4 + // - (81/640) * |x|^5 + (9/640) * |x|^6 - (1/1120) * |x|^7 + (1/40320) * |x|^8 if 3.5 ≤ |x| < 4.5 + // 0.0 + // clang-format on + Inline real_t S8(const real_t x) { if (x < HALF) { - return static_cast(259723.0 / 573440.0) - + return static_cast(259723.0 / 573440.0) - static_cast(289.0 / 1024.0) * SQR(x) + - static_cast(43.0 / 512.0) * SQR(SQR(x)) - + static_cast(43.0 / 512.0) * SQR(SQR(x)) - static_cast(1.0 / 64.0) * SQR(SQR(x)) * SQR(x) + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))); } else if (x <= static_cast(1.5)) { - return static_cast(64929.0/143360.0) + - static_cast(1.0/5120.0) * x - - static_cast(363.0/1280.0) * SQR(x) + - static_cast(7.0/1280.0) * CUBE(x) + - static_cast(9.0/128.0) * SQR(SQR(x)) + + return static_cast(64929.0 / 143360.0) + + static_cast(1.0 / 5120.0) * x - + static_cast(363.0 / 1280.0) * SQR(x) + + static_cast(7.0 / 1280.0) * CUBE(x) + + static_cast(9.0 / 128.0) * SQR(SQR(x)) + static_cast(7.0 / 320.0) * CUBE(x) * SQR(x) - static_cast(3.0 / 80.0) * SQR(CUBE(x)) + static_cast(1.0 / 80.0) * SQR(SQR(x)) * CUBE(x) - static_cast(1.0 / 720.0) * SQR(SQR(SQR(x))); } else if (x < static_cast(2.5)) { - return static_cast(145167.0/286720.0) - - static_cast(1457.0/5120.0) * x + - static_cast(195.0/512.0) * SQR(x) - - static_cast(1127.0/1280.0) * CUBE(x) + - static_cast(207.0/256.0) * SQR(SQR(x)) - + return static_cast(145167.0 / 286720.0) - + static_cast(1457.0 / 5120.0) * x + + static_cast(195.0 / 512.0) * SQR(x) - + static_cast(1127.0 / 1280.0) * CUBE(x) + + static_cast(207.0 / 256.0) * SQR(SQR(x)) - static_cast(119.0 / 320.0) * CUBE(x) * SQR(x) + static_cast(3.0 / 32.0) * SQR(CUBE(x)) - static_cast(1.0 / 80.0) * SQR(SQR(x)) * CUBE(x) + static_cast(1.0 / 1440.0) * SQR(SQR(SQR(x))); } else if (x < static_cast(3.5)) { - return static_cast(146051.0/35840.0) * x - - static_cast(1465.0/256.0) * SQR(x) + - static_cast(5123.0/1280.0) * CUBE(x) - - static_cast(209.0/128.0) * SQR(SQR(x)) + + return static_cast(146051.0 / 35840.0) * x - + static_cast(1465.0 / 256.0) * SQR(x) + + static_cast(5123.0 / 1280.0) * CUBE(x) - + static_cast(209.0 / 128.0) * SQR(SQR(x)) + static_cast(131.0 / 320.0) * CUBE(x) * SQR(x) - static_cast(1.0 / 16.0) * SQR(CUBE(x)) + static_cast(3.0 / 560.0) * SQR(SQR(x)) * CUBE(x) - static_cast(1.0 / 5040.0) * SQR(SQR(SQR(x))) - - static_cast(122729.0/143360.0); + static_cast(122729.0 / 143360.0); } else if (x < static_cast(4.5)) { - return static_cast(4782969.0/1146880.0) - - static_cast(531441.0/71680.0) * x + - static_cast(59049.0/10240.0) * SQR(x) - - static_cast(6561.0/2560.0) * CUBE(x) + - static_cast(729.0/1024.0) * SQR(SQR(x)) - + return static_cast(4782969.0 / 1146880.0) - + static_cast(531441.0 / 71680.0) * x + + static_cast(59049.0 / 10240.0) * SQR(x) - + static_cast(6561.0 / 2560.0) * CUBE(x) + + static_cast(729.0 / 1024.0) * SQR(SQR(x)) - static_cast(81.0 / 640.0) * CUBE(x) * SQR(x) + static_cast(9.0 / 640.0) * SQR(CUBE(x)) - static_cast(1.0 / 1120.0) * SQR(SQR(x)) * CUBE(x) + @@ -188,54 +232,66 @@ namespace prtl_shape { } } - Inline real_t S9(const real_t x) - { + // clang-format off + // S9(x) = + // 15619/36288 - (35/144) * |x|^2 + (19/288) * |x|^4 - (5/432) * |x|^6 + (1/576) * |x|^8 - (1/2880) * |x|^9 if |x| ≤ 1 + // 7799/18144 + (1/192) * |x| - (19/72) * |x|^2 + (7/144) * |x|^3 - (1/144) * |x|^4 + (7/96) * |x|^5 + // - (13/216) * |x|^6 + (1/48) * |x|^7 - (1/288) * |x|^8 + (1/4320) * |x|^9 if 1 < |x| < 2 + // 1553/2592 - (339/448) * |x| + (635/504) * |x|^2 - (83/48) * |x|^3 + (191/144) * |x|^4 - (19/32) * |x|^5 + // + (35/216) * |x|^6 - (3/112) * |x|^7 + (5/2016) * |x|^8 - (1/10080) * |x|^9 if 2 ≤ |x| < 3 + // (5883/896) * |x| - (2449/288) * |x|^2 + (563/96) * |x|^3 - (1423/576) * |x|^4 + (43/64) * |x|^5 + // - (103/864) * |x|^6 + (3/224) * |x|^7 - (1/1152) * |x|^8 + (1/40320) * |x|^9 - (133663/72576) if 3 ≤ |x| < 4 + // 390625/72576 - (78125/8064) * |x| + (15625/2016) * |x|^2 - (3125/864) * |x|^3 + (625/576) * |x|^4 + // - (125/576) * |x|^5 + (25/864) * |x|^6 - (5/2016) * |x|^7 + (1/8064) * |x|^8 - (1/362880) * |x|^9 if 4 ≤ |x| < 5 + // 0.0 if |x| ≥ 5 + // clang-format on + Inline real_t S9(const real_t x) { if (x <= ONE) { - return static_cast(15619.0 / 36288.0) - + return static_cast(15619.0 / 36288.0) - static_cast(35.0 / 144.0) * SQR(x) + - static_cast(19.0 / 288.0) * SQR(SQR(x)) - + static_cast(19.0 / 288.0) * SQR(SQR(x)) - static_cast(5.0 / 432.0) * SQR(CUBE(x)) + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))) - static_cast(1.0 / 2880.0) * SQR(SQR(SQR(x))) * x; } else if (x < TWO) { - return static_cast(7799.0/18144.0) + - static_cast(1.0/192.0) * x - - static_cast(19.0/72.0) * SQR(x) + - static_cast(7.0/144.0) * CUBE(x) - - static_cast(1.0/144.0) * SQR(SQR(x)) + + return static_cast(7799.0 / 18144.0) + + static_cast(1.0 / 192.0) * x - + static_cast(19.0 / 72.0) * SQR(x) + + static_cast(7.0 / 144.0) * CUBE(x) - + static_cast(1.0 / 144.0) * SQR(SQR(x)) + static_cast(7.0 / 96.0) * CUBE(x) * SQR(x) - static_cast(13.0 / 216.0) * SQR(CUBE(x)) + static_cast(1.0 / 48.0) * SQR(SQR(x)) * CUBE(x) - static_cast(1.0 / 288.0) * SQR(SQR(SQR(x))) + static_cast(1.0 / 4320.0) * CUBE(CUBE(x)); } else if (x <= THREE) { - return static_cast(1553.0/2592.0) - - static_cast(339.0/448.0) * x + - static_cast(635.0/504.0) * SQR(x) - - static_cast(83.0/48.0) * CUBE(x) + - static_cast(191.0/144.0) * SQR(SQR(x)) - + return static_cast(1553.0 / 2592.0) - + static_cast(339.0 / 448.0) * x + + static_cast(635.0 / 504.0) * SQR(x) - + static_cast(83.0 / 48.0) * CUBE(x) + + static_cast(191.0 / 144.0) * SQR(SQR(x)) - static_cast(19.0 / 32.0) * CUBE(x) * SQR(x) + static_cast(35.0 / 216.0) * SQR(CUBE(x)) - static_cast(3.0 / 112.0) * SQR(SQR(x)) * CUBE(x) + static_cast(5.0 / 2016.0) * SQR(SQR(SQR(x))) - static_cast(1.0 / 10080.0) * CUBE(CUBE(x)); } else if (x < FOUR) { - return static_cast(5883.0/896.0) * x - - static_cast(2449.0/288.0) * SQR(x) + - static_cast(563.0/96.0) * CUBE(x) - - static_cast(1423.0/576.0) * SQR(SQR(x)) + - static_cast(43.0/64.0) * CUBE(x) * SQR(x) - - static_cast(103.0/864.0) * SQR(CUBE(x)) + + return static_cast(5883.0 / 896.0) * x - + static_cast(2449.0 / 288.0) * SQR(x) + + static_cast(563.0 / 96.0) * CUBE(x) - + static_cast(1423.0 / 576.0) * SQR(SQR(x)) + + static_cast(43.0 / 64.0) * CUBE(x) * SQR(x) - + static_cast(103.0 / 864.0) * SQR(CUBE(x)) + static_cast(3.0 / 224.0) * SQR(SQR(x)) * CUBE(x) - static_cast(1.0 / 1152.0) * SQR(SQR(SQR(x))) + static_cast(1.0 / 40320.0) * CUBE(CUBE(x)) - - static_cast(133663.0/72576.0); + static_cast(133663.0 / 72576.0); } else if (x < FIVE) { - return static_cast(390625.0/72576.0) - - static_cast(78125.0/8064.0) * x + - static_cast(15625.0/2016.0) * SQR(x) - - static_cast(3125.0/864.0) * CUBE(x) + - static_cast(625.0/576.0) * SQR(SQR(x)) - + return static_cast(390625.0 / 72576.0) - + static_cast(78125.0 / 8064.0) * x + + static_cast(15625.0 / 2016.0) * SQR(x) - + static_cast(3125.0 / 864.0) * CUBE(x) + + static_cast(625.0 / 576.0) * SQR(SQR(x)) - static_cast(125.0 / 576.0) * CUBE(x) * SQR(x) + static_cast(25.0 / 864.0) * SQR(CUBE(x)) - static_cast(5.0 / 2016.0) * SQR(SQR(x)) * CUBE(x) + @@ -284,9 +340,9 @@ namespace prtl_shape { } } else { // compute at i + 1/2 positions i_min = i - 1; - S[0] = HALF * SQR(ONE - di); - S[2] = HALF * SQR(di); - S[1] = ONE - S[0] - S[2]; + S[0] = HALF * SQR(ONE - di); + S[2] = HALF * SQR(di); + S[1] = ONE - S[0] - S[2]; } // staggered } else if constexpr (O == 3u) { // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^3) |x| < 1 @@ -297,7 +353,7 @@ namespace prtl_shape { S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { @@ -305,179 +361,243 @@ namespace prtl_shape { S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(HALF + di) + THREE * CUBE(HALF + di)); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * CUBE(static_cast(1.5) - di); - S[1] = static_cast(1.0 / 6.0) * + S[0] = static_cast(1.0 / 6.0) * + CUBE(static_cast(1.5) - di); + S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); - S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); + S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered } else if constexpr (O == 4u) { - // 5/8 - |x|^2 + 32/45 * |x|^3 - 98/675 * |x|^4 |x| < 3/2 - // S(x) = 1/25 * ( 5/2 - |x|)^4 3/2 ≤ |x| < 5/2 - // 0.0 |x| ≥ 5/2 + // clang-format off + // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 + // S(x) = 55/96 + (5/24) * |x| - (5/4) * |x|^2 + (5/6) * |x|^3 - (1/6) * |x|^4 1/2 ≤ |x| < 3/2 + // 625/384 - (125/48) * |x| + (25/16) * |x|^2 - (5/12) * |x|^3 + (1/24) * |x|^4 3/2 ≤ |x| < 5/2 + // 0.0 |x| ≥ 5/2 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions - + if (di < HALF) { i_min = i - 2; - #pragma unroll +#pragma unroll for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(TWO + di - static_cast(n))); } } else { i_min = i - 1; - #pragma unroll +#pragma unroll for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(ONE + di - static_cast(n))); } } } else { // compute at i + 1/2 positions - i_min = i - 2; + i_min = i - 2; - #pragma unroll - for (int n = 0; n < 5; n++) { - S[i] = S4(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); - } +#pragma unroll + for (int n = 0; n < 5; n++) { + S[i] = S4( + Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + } } // staggered } else if constexpr (O == 5u) { - // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 - // S(x) = 1/135 * (3 - |x|)^5 2 ≤ |x| < 3 - // 0.0 |x| ≥ 3 + // clang-format off + // S5(x) = + // 11/20 - (1/2) * |x|^2 + (1/4) * |x|^4 - (1/12) * |x|^5 if |x| ≤ 1 + // 17/40 + (5/8) * |x| - (7/4) * |x|^2 + (5/4) * |x|^3 - (3/8) * |x|^4 + (1/24) * |x|^5 if 1 < |x| < 2 + // 81/40 - (27/8) * |x| + (9/4) * |x|^2 - (3/4) * |x|^3 + (1/8) * |x|^4 - (1/120) * |x|^5 if 2 ≤ |x| < 3 + // 0.0 if |x| > 3 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - - #pragma unroll + +#pragma unroll for (int n = 0; n < 6; n++) { - S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); + S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); } } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; - #pragma unroll +#pragma unroll for (int n = 0; n < 6; n++) { - S[n] = S5(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + S[n] = S5(Kokkos::fabs( + static_cast(2.5) + di - static_cast(n))); } } else { i_min = i - 2; - #pragma unroll +#pragma unroll for (int n = 0; n < 6; n++) { - S[n] = S5(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + S[n] = S5(Kokkos::fabs( + static_cast(1.5) + di - static_cast(n))); } } } // staggered } else if constexpr (O == 6u) { + // clang-format off + // S6(x) = + // 5887/11520 - (77/192) * |x|^2 + (7/48) * |x|^4 - (1/36) * |x|^6 if |x| ≤ 1/2 + // 7861/15360 - (7/768) * |x| - (91/256) * |x|^2 - (35/288) * |x|^3 + (21/64) * |x|^4 + // - (7/48) * |x|^5 + (1/48) * |x|^6 if 1/2 < |x| < 3/2 + // 1379/7680 + (1267/960) * |x| - (329/128) * |x|^2 + (133/72) * |x|^3 + // - (21/32) * |x|^4 + (7/60) * |x|^5 - (1/120) * |x|^6 if 3/2 ≤ |x| < 5/2 + // 117649/46080 - (16807/3840) * |x| + (2401/768) * |x|^2 - (343/288) * |x|^3 + // + (49/192) * |x|^4 - (7/240) * |x|^5 + (1/720) * |x|^6 if 5/2 ≤ |x| < 7/2 + // 0.0 if |x| ≥ 7/2 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions - + if (di < HALF) { i_min = i - 3; - #pragma unroll +#pragma unroll for (int n = 0; n < 7; n++) { S[n] = S6(Kokkos::fabs(THREE + di - static_cast(n))); } } else { i_min = i - 2; - #pragma unroll +#pragma unroll for (int n = 0; n < 7; n++) { S[n] = S6(Kokkos::fabs(TWO + di - static_cast(n))); } } } else { // compute at i + 1/2 positions - i_min = i - 3; + i_min = i - 3; - #pragma unroll - for (int n = 0; n < 7; n++) { - S[n] = S6(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); - } +#pragma unroll + for (int n = 0; n < 7; n++) { + S[n] = S6( + Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + } } // staggered } else if constexpr (O == 7u) { + // clang-format off + // S7(x) = + // 151/315 - (1/3) * |x|^2 + (1/9) * |x|^4 - (1/36) * |x|^6 + (1/144) * |x|^7 if |x| < 1 + // 103/210 - (7/90) * |x| - (1/10) * |x|^2 - (7/18) * |x|^3 + (1/2) * |x|^4 + // - (7/30) * |x|^5 + (1/20) * |x|^6 - (1/240) * |x|^7 if 1 ≤ |x| ≤ 2 + // (217/90) * |x| - (23/6) * |x|^2 + (49/18) * |x|^3 - (19/18) * |x|^4 + // + (7/30) * |x|^5 - (1/36) * |x|^6 + (1/720) * |x|^7 - (139/630) if 2 < |x| < 3 + // 1024/315 - (256/45) * |x| + (64/15) * |x|^2 - (16/9) * |x|^3 + (4/9) * |x|^4 + // - (1/15) * |x|^5 + (1/180) * |x|^6 - (1/5040) * |x|^7 if 3 ≤ |x| < 4 + // 0.0 if |x| ≥ 4 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions i_min = i - 3; - - #pragma unroll + +#pragma unroll for (int n = 0; n < 8; n++) { - S[n] = S7(Kokkos::fabs(THREE + di - static_cast(n))); + S[n] = S7(Kokkos::fabs(THREE + di - static_cast(n))); } } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 4; for (int n = 0; n < 8; n++) { - S[n] = S7(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + S[n] = S7(Kokkos::fabs( + static_cast(3.5) + di - static_cast(n))); } } else { i_min = i - 3; - #pragma unroll +#pragma unroll for (int n = 0; n < 8; n++) { - S[n] = S7(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + S[n] = S7(Kokkos::fabs( + static_cast(2.5) + di - static_cast(n))); } } } // staggered } else if constexpr (O == 8u) { + // clang-format off + // S8(x) = + // 259723/573440 - (289/1024) * |x|^2 + (43/512) * |x|^4 - (1/64) * |x|^6 + (1/576) * |x|^8 if |x| < 1/2 + // 64929/143360 + (1/5120) * |x| - (363/1280) * |x|^2 + (7/1280) * |x|^3 + (9/128) * |x|^4 + // + (7/320) * |x|^5 - (3/80) * |x|^6 + (1/80) * |x|^7 - (1/720) * |x|^8 if 1/2 ≤ |x| ≤ 3/2 + // 145167/286720 - (1457/5120) * |x| + (195/512) * |x|^2 - (1127/1280) * |x|^3 + (207/256) * |x|^4 + // - (119/320) * |x|^5 + (3/32) * |x|^6 - (1/80) * |x|^7 + (1/1440) * |x|^8 if 3/2 < |x| < 2.5 + // (146051/35840) * |x| - (1465/256) * |x|^2 + (5123/1280) * |x|^3 - (209/128) * |x|^4 + // + (131/320) * |x|^5 - (1/16) * |x|^6 + (3/560) * |x|^7 - (1/5040) * |x|^8 - (122729/143360) if 2.5 ≤ |x| < 3.5 + // 4782969/1146880 - (531441/71680) * |x| + (59049/10240) * |x|^2 - (6561/2560) * |x|^3 + (729/1024) * |x|^4 + // - (81/640) * |x|^5 + (9/640) * |x|^6 - (1/1120) * |x|^7 + (1/40320) * |x|^8 if 3.5 ≤ |x| < 4.5 + // 0.0 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 4; - #pragma unroll +#pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(FOUR + di - static_cast(n))); } } else { i_min = i - 3; - #pragma unroll +#pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(THREE + di - static_cast(n))); } } } else { // compute at i + 1/2 positions - i_min = i - 4; + i_min = i - 4; - #pragma unroll - for (int n = 0; n < 9; n++) { - S[n] = S8(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); - } +#pragma unroll + for (int n = 0; n < 9; n++) { + S[n] = S8( + Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + } } // staggered } else if constexpr (O == 9u) { + // clang-format off + // S9(x) = + // 15619/36288 - (35/144) * |x|^2 + (19/288) * |x|^4 - (5/432) * |x|^6 + (1/576) * |x|^8 - (1/2880) * |x|^9 if |x| ≤ 1 + // 7799/18144 + (1/192) * |x| - (19/72) * |x|^2 + (7/144) * |x|^3 - (1/144) * |x|^4 + (7/96) * |x|^5 + // - (13/216) * |x|^6 + (1/48) * |x|^7 - (1/288) * |x|^8 + (1/4320) * |x|^9 if 1 < |x| < 2 + // 1553/2592 - (339/448) * |x| + (635/504) * |x|^2 - (83/48) * |x|^3 + (191/144) * |x|^4 - (19/32) * |x|^5 + // + (35/216) * |x|^6 - (3/112) * |x|^7 + (5/2016) * |x|^8 - (1/10080) * |x|^9 if 2 ≤ |x| < 3 + // (5883/896) * |x| - (2449/288) * |x|^2 + (563/96) * |x|^3 - (1423/576) * |x|^4 + (43/64) * |x|^5 + // - (103/864) * |x|^6 + (3/224) * |x|^7 - (1/1152) * |x|^8 + (1/40320) * |x|^9 - (133663/72576) if 3 ≤ |x| < 4 + // 390625/72576 - (78125/8064) * |x| + (15625/2016) * |x|^2 - (3125/864) * |x|^3 + (625/576) * |x|^4 + // - (125/576) * |x|^5 + (25/864) * |x|^6 - (5/2016) * |x|^7 + (1/8064) * |x|^8 - (1/362880) * |x|^9 if 4 ≤ |x| < 5 + // 0.0 if |x| ≥ 5 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions i_min = i - 4; - - #pragma unroll + +#pragma unroll for (int n = 0; n < 10; n++) { - S[n] = S9(Kokkos::fabs(FOUR + di - static_cast(n))); + S[n] = S9(Kokkos::fabs(FOUR + di - static_cast(n))); } } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 5; for (int n = 0; n < 10; n++) { - S[n] = S9(Kokkos::fabs(static_cast(4.5) + di - static_cast(n))); + S[n] = S9(Kokkos::fabs( + static_cast(4.5) + di - static_cast(n))); } } else { i_min = i - 4; - #pragma unroll +#pragma unroll for (int n = 0; n < 10; n++) { - S[n] = S9(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + S[n] = S9(Kokkos::fabs( + static_cast(3.5) + di - static_cast(n))); } } } // staggered } else { - raise::KernelError(HERE, "Unsupported interpolation order"); + raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); } } - template Inline void for_deposit(const int& i_init, const real_t& di_init, From aa500e3dd2003891c5145c8525e40c5582621eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 11 Sep 2025 15:32:06 -0500 Subject: [PATCH 65/82] first attempt to give shape function order at compile time --- CMakeLists.txt | 14 ++++++++++++++ cmake/config.cmake | 15 +++++++++++++++ cmake/defaults.cmake | 3 +++ cmake/report.cmake | 12 ++++++++++++ src/engines/srpic.hpp | 26 +++----------------------- src/global/global.h | 8 +++++++- src/kernels/particle_pusher_sr.hpp | 12 +++++------- src/kernels/particle_shapes.hpp | 1 - 8 files changed, 59 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cff5b41..1afa1eef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,11 @@ set(DEBUG set(precision ${default_precision} CACHE STRING "Precision") + +set(shapefunction + ${default_shapefunction} + CACHE STRING "Shape function") + set(pgen ${default_pgen} CACHE STRING "Problem generator") @@ -75,6 +80,14 @@ set(precisions "single" "double" CACHE STRING "Precisions") +set(shapefunctions + "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" + CACHE STRING "Shape functions") + +set(deposits + "Zig-zag" "Esirkepov" + CACHE STRING "Deposits") + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) # ------------------------- Third-Party Tests ------------------------------ # @@ -92,6 +105,7 @@ include_directories(${plog_SRC}/include) # -------------------------------- Main code ------------------------------- # set_precision(${precision}) +set_shapefunction(${shapefunction}) if("${Kokkos_DEVICES}" MATCHES "CUDA") add_compile_options("-D CUDA_ENABLED") set(DEVICE_ENABLED ON) diff --git a/cmake/config.cmake b/cmake/config.cmake index 97ed658e..cf2941ab 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -16,6 +16,21 @@ function(set_precision precision_name) endif() endfunction() +# -------------------------------- Shape function ------------------------------- # +function(set_shapefunction shapefunction) + list(FIND shapefunctions ${shapefunction} SHAPEFUNCTION_FOUND) + + if(${SHAPEFUNCTION_FOUND} EQUAL -1) + message( + FATAL_ERROR + "Invalid shape function order: ${shapefunction}\nValid options are: ${shapefunctions}" + ) + endif() + + add_compile_options("-DN_ORDER=${shapefunction}") + +endfunction() + # ---------------------------- Problem generator --------------------------- # function(set_problem_generator pgen_name) if(pgen_name STREQUAL ".") diff --git a/cmake/defaults.cmake b/cmake/defaults.cmake index 2bfa9a61..9e3da4cb 100644 --- a/cmake/defaults.cmake +++ b/cmake/defaults.cmake @@ -19,6 +19,9 @@ set(default_engine set(default_precision "single" CACHE INTERNAL "Default precision") +set(default_shapefunction + 0 + CACHE INTERNAL "Default shape function") set(default_pgen "." CACHE INTERNAL "Default problem generator") diff --git a/cmake/report.cmake b/cmake/report.cmake index 33443d29..397d656d 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -37,6 +37,15 @@ printchoices( "${Blue}" PRECISION_REPORT 46) +printchoices( + "Shapefunction Order" + "shapefunction" + "${shapefunctions}" + ${shapefunction} + ${default_shapefunction} + "${Blue}" + SHAPEFUNCTION_REPORT + 46) printchoices( "Output" "output" @@ -113,6 +122,9 @@ string( ${PRECISION_REPORT} "\n" " " + ${SHAPEFUNCTION_REPORT} + "\n" + " " ${OUTPUT_REPORT} "\n") diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index c0ba54c3..50d42e3c 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -543,29 +543,9 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - if (shape_order == 0) { - deposit_with<0u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 1) { - deposit_with<1u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 2) { - deposit_with<2u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 3) { - deposit_with<3u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 4) { - deposit_with<4u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 5) { - deposit_with<5u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 6) { - deposit_with<6u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 7) { - deposit_with<7u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 8) { - deposit_with<8u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 9) { - deposit_with<9u>(species, domain.mesh.metric, scatter_cur, dt); - } else { - raise::Error("Invalid shape order for current deposition", HERE); - } + + deposit_with(species, domain.mesh.metric, scatter_cur, dt); + } Kokkos::Experimental::contribute(domain.fields.cur, scatter_cur); } diff --git a/src/global/global.h b/src/global/global.h index adffcf6e..52a4feae 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -113,7 +113,13 @@ namespace files { namespace ntt { - inline constexpr std::size_t N_GHOSTS = 2; +#ifndef N_ORDER +#define N_ORDER 0 +inline constexpr std::size_t N_GHOSTS = 2; +#else // N_ORDER + inline constexpr std::size_t N_GHOSTS = static_cast((N_ORDER + 1)/2) + 1; +#endif // N_ORDER + // Coordinate shift to account for ghost cells #define COORD(I) \ (static_cast(static_cast((I)) - static_cast(N_GHOSTS))) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index bf4cfd2d..f981e058 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -477,8 +477,8 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - // field interpolation 1st-6th order - getInterpFlds(p, ei, bi); + // field interpolation 0th-9th order + getInterpFlds(p, ei, bi); metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); @@ -830,14 +830,12 @@ namespace kernel::sr { } } + template Inline void getInterpFlds(index_t& p, vec_t& e0, vec_t& b0) const { - // ToDo: implement template in srpic.hpp - const unsigned int O = 1u; - - // ToDo: change to 1u! + // Zig-zag interpolation if constexpr (O == 0u) { if constexpr (D == Dim::_1D) { @@ -1099,7 +1097,7 @@ namespace kernel::sr { c1 = c01 * ponpmy + c11 * ponppy; b0[2] = c0 * ponpmz + c1 * ponppz; } - } else if constexpr ((O >= 1u) and (O <= 5u)) { + } else if constexpr (O >= 1u) { if constexpr (D == Dim::_1D) { const int i { i1(p) + static_cast(N_GHOSTS) }; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 0109ba94..da2f5169 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -612,7 +612,6 @@ namespace prtl_shape { The N-th order shape function per particle is a N+2 element array where the shape function contributes to only N+1 elements. We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position Let * be the particle position at the current timestep Let x be the particle position at the previous timestep From 578cec2e3a5e47181a6846f166e00e9b05a158e8 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 11 Sep 2025 13:45:42 -0700 Subject: [PATCH 66/82] cmake for shape funcs --- CMakeLists.txt | 16 +++++++++------- cmake/config.cmake | 19 +++++++------------ cmake/report.cmake | 10 +++++----- src/engines/srpic.hpp | 3 +-- src/global/global.h | 14 ++++++++------ 5 files changed, 30 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1afa1eef..5c3b2710 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,14 +80,16 @@ set(precisions "single" "double" CACHE STRING "Precisions") -set(shapefunctions - "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" - CACHE STRING "Shape functions") - -set(deposits - "Zig-zag" "Esirkepov" +set(deposit + "zigzag" "esirkepov" CACHE STRING "Deposits") +if(${deposit} STREQUAL "esirkepov") + set(shape_order + 1 + CACHE INTEGER "Shape functions") +endif() + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) # ------------------------- Third-Party Tests ------------------------------ # @@ -105,7 +107,7 @@ include_directories(${plog_SRC}/include) # -------------------------------- Main code ------------------------------- # set_precision(${precision}) -set_shapefunction(${shapefunction}) +set_shape_order(${shape_order}) if("${Kokkos_DEVICES}" MATCHES "CUDA") add_compile_options("-D CUDA_ENABLED") set(DEVICE_ENABLED ON) diff --git a/cmake/config.cmake b/cmake/config.cmake index cf2941ab..8324957b 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -16,19 +16,14 @@ function(set_precision precision_name) endif() endfunction() -# -------------------------------- Shape function ------------------------------- # -function(set_shapefunction shapefunction) - list(FIND shapefunctions ${shapefunction} SHAPEFUNCTION_FOUND) - - if(${SHAPEFUNCTION_FOUND} EQUAL -1) - message( - FATAL_ERROR - "Invalid shape function order: ${shapefunction}\nValid options are: ${shapefunctions}" - ) +# ------------------------------- Shape function --------------------------- # +function(set_shape_order shape_order) + if(${deposit} STREQUAL "esirkepov") + if(${shape_order} GREATER 9) + message(FATAL_ERROR "Shape order must be between 1 and 9") + endif() + add_compile_options("-DSHAPE_ORDER=${shape_order}") endif() - - add_compile_options("-DN_ORDER=${shapefunction}") - endfunction() # ---------------------------- Problem generator --------------------------- # diff --git a/cmake/report.cmake b/cmake/report.cmake index 397d656d..94ba9f65 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -38,11 +38,11 @@ printchoices( PRECISION_REPORT 46) printchoices( - "Shapefunction Order" - "shapefunction" - "${shapefunctions}" - ${shapefunction} - ${default_shapefunction} + "Shape order" + "shape_order" + "" + ${shape_order} + "1" "${Blue}" SHAPEFUNCTION_REPORT 46) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 50d42e3c..ef626360 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -543,9 +543,8 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - - deposit_with(species, domain.mesh.metric, scatter_cur, dt); + deposit_with(species, domain.mesh.metric, scatter_cur, dt); } Kokkos::Experimental::contribute(domain.fields.cur, scatter_cur); } diff --git a/src/global/global.h b/src/global/global.h index 52a4feae..f6d5b13d 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -113,12 +113,14 @@ namespace files { namespace ntt { -#ifndef N_ORDER -#define N_ORDER 0 -inline constexpr std::size_t N_GHOSTS = 2; -#else // N_ORDER - inline constexpr std::size_t N_GHOSTS = static_cast((N_ORDER + 1)/2) + 1; -#endif // N_ORDER +#if !defined(SHAPE_ORDER) + #define SHAPE_ORDER 0 + inline constexpr std::size_t N_GHOSTS = 2; +#else // SHAPE_ORDER + inline constexpr std::size_t N_GHOSTS = static_cast( + (SHAPE_ORDER + 1) / 2) + + 1; +#endif // SHAPE_ORDER // Coordinate shift to account for ghost cells #define COORD(I) \ From a4f75de9b415be4bd73ee9f56aa800ea7d24c5db Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 11 Sep 2025 16:18:57 -0500 Subject: [PATCH 67/82] fix cmake options for deposit and shape order --- CMakeLists.txt | 20 ++++++++++++++------ cmake/defaults.cmake | 9 ++++++--- cmake/report.cmake | 16 ++++++++++++++-- src/kernels/particle_pusher_sr.hpp | 2 +- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c3b2710..76a52f29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,8 +35,12 @@ set(precision ${default_precision} CACHE STRING "Precision") -set(shapefunction - ${default_shapefunction} +set(deposit + ${default_deposit} + CACHE STRING "Deposit") + +set(shape_order + ${default_shape_order} CACHE STRING "Shape function") set(pgen @@ -80,16 +84,20 @@ set(precisions "single" "double" CACHE STRING "Precisions") -set(deposit +set(deposits "zigzag" "esirkepov" CACHE STRING "Deposits") -if(${deposit} STREQUAL "esirkepov") +if(${deposit} STREQUAL "zigzag") set(shape_order - 1 - CACHE INTEGER "Shape functions") + ${default_shape_order} + CACHE STRING "Shape functions") endif() +set(shape_orders + "1" "2" "3" "4" "5" "6" "7" "8" "9" + CACHE STRING "Shape orders") + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) # ------------------------- Third-Party Tests ------------------------------ # diff --git a/cmake/defaults.cmake b/cmake/defaults.cmake index 9e3da4cb..fb879001 100644 --- a/cmake/defaults.cmake +++ b/cmake/defaults.cmake @@ -19,9 +19,12 @@ set(default_engine set(default_precision "single" CACHE INTERNAL "Default precision") -set(default_shapefunction - 0 - CACHE INTERNAL "Default shape function") +set(default_deposit + "zigzag" + CACHE INTERNAL "Default deposit") +set(default_shape_order + 1 + CACHE INTERNAL "Default shape function order") set(default_pgen "." CACHE INTERNAL "Default problem generator") diff --git a/cmake/report.cmake b/cmake/report.cmake index 94ba9f65..8f62ac17 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -37,12 +37,21 @@ printchoices( "${Blue}" PRECISION_REPORT 46) +printchoices( + "Deposit" + "deposit" + "${deposits}" + ${deposit} + ${default_deposit} + "${Blue}" + DEPOSIT_REPORT + 46) printchoices( "Shape order" "shape_order" - "" + "${shape_orders}" ${shape_order} - "1" + ${default_shape_order} "${Blue}" SHAPEFUNCTION_REPORT 46) @@ -122,6 +131,9 @@ string( ${PRECISION_REPORT} "\n" " " + ${DEPOSIT_REPORT} + "\n" + " " ${SHAPEFUNCTION_REPORT} "\n" " " diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index f981e058..2707bb80 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -478,7 +478,7 @@ namespace kernel::sr { bool is_gca { false }; // field interpolation 0th-9th order - getInterpFlds(p, ei, bi); + getInterpFlds(p, ei, bi); metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); From 7c192020b16f4789a6a647a4cc7366464eca403a Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Fri, 12 Sep 2025 18:59:28 -0500 Subject: [PATCH 68/82] bugfix in 3rd order shape function i_min for not staggered case --- src/kernels/particle_shapes.hpp | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index da2f5169..d35c77d6 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -323,9 +323,9 @@ namespace prtl_shape { } } // staggered } else if constexpr (O == 2u) { - // 3/4 - |x|^2 |x| < 1/2 + // 3/4 - |x|^2 |x| < 1/2 // S(x) = 1/2 * (3/2 - |x|)^2 1/2 ≤ |x| < 3/2 - // 0.0 |x| ≥ 3/2 + // 0.0 |x| ≥ 3/2 if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 1; @@ -345,30 +345,29 @@ namespace prtl_shape { S[1] = ONE - S[0] - S[2]; } // staggered } else if constexpr (O == 3u) { - // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^3) |x| < 1 - // S(x) = 1/6 * ( 2 - |x|)^3 1 ≤ |x| < 2 - // 0.0 |x| ≥ 2 + // 2/3 - x^2 + 1/2 * x^3 |x| < 1 + // S(x) = 1/6 * (2 - |x|)^3 1 ≤ |x| < 2 + // 0.0 |x| ≥ 2 if constexpr (not STAGGERED) { // compute at i positions - i_min = i - 2; + i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); - S[1] = static_cast(1.0 / 6.0) * - (FOUR - SIX * SQR(di) + THREE * CUBE(di)); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); - S[2] = ONE - S[0] - S[1] - S[3]; + S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); + S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[1] = static_cast(1.0 / 6.0) * - (FOUR - SIX * SQR(HALF + di) + THREE * CUBE(HALF + di)); + S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + + HALF * CUBE(HALF + di); S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(static_cast(1.5) - di); - S[1] = static_cast(1.0 / 6.0) * - (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); + S[1] = static_cast(2.0 / 3.0) - SQR(HALF - di) + + HALF * CUBE(HALF - di); S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); S[2] = ONE - S[0] - S[1] - S[3]; } @@ -527,7 +526,7 @@ namespace prtl_shape { // + (131/320) * |x|^5 - (1/16) * |x|^6 + (3/560) * |x|^7 - (1/5040) * |x|^8 - (122729/143360) if 2.5 ≤ |x| < 3.5 // 4782969/1146880 - (531441/71680) * |x| + (59049/10240) * |x|^2 - (6561/2560) * |x|^3 + (729/1024) * |x|^4 // - (81/640) * |x|^5 + (9/640) * |x|^6 - (1/1120) * |x|^7 + (1/40320) * |x|^8 if 3.5 ≤ |x| < 4.5 - // 0.0 + // 0.0 // clang-format on if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { From 58950c5bc7481371408c22a2802d1a2ed157316f Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 17 Sep 2025 18:07:47 -0500 Subject: [PATCH 69/82] simplifications for 3rd order shape functions --- src/global/utils/numeric.h | 6 ++- src/kernels/particle_shapes.hpp | 73 +++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index 3b7b9f55..856ccb83 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -41,7 +41,8 @@ inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; inline constexpr float THIRD = 0.333333f; -inline constexpr float THREE_FOURTHS = 0.75f; +inline constexpr float THREE_FOURTHS = 0.75f; +inline constexpr float THREE_HALFS = 1.5f; inline constexpr float INV_2 = 0.5f; inline constexpr float INV_4 = 0.25f; inline constexpr float INV_8 = 0.125f; @@ -59,7 +60,8 @@ inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; inline constexpr double THIRD = 0.3333333333333333; -inline constexpr double THREE_FOURTHS = 0.75; +inline constexpr double THREE_FOURTHS = 0.75; +inline constexpr float THREE_HALFS = 1.5; inline constexpr double INV_2 = 0.5; inline constexpr double INV_4 = 0.25; inline constexpr double INV_8 = 0.125; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index d35c77d6..ddf85266 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,6 +17,23 @@ namespace prtl_shape { + // clang-format off + // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 + // S(x) = 55/96 + (5/24) * |x| - (5/4) * |x|^2 + (5/6) * |x|^3 - (1/6) * |x|^4 1/2 ≤ |x| < 3/2 + // 625/384 - (125/48) * |x| + (25/16) * |x|^2 - (5/12) * |x|^3 + (1/24) * |x|^4 3/2 ≤ |x| < 5/2 + // 0.0 |x| ≥ 5/2 + // clang-format on + Inline real_t S3(const real_t x) { + if (x < ONE) { + return static_cast(2.0 / 3.0) - SQR(x) + HALF * CUBE(x); + } else if (x < TWO) { + return static_cast(4.0 / 3.0) - TWO * x + SQR(x) - + static_cast(1.0 / 6.0) * CUBE(x); + } else { + return ZERO; + } + } + // clang-format off // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 // S(x) = 55/96 + (5/24) * |x| - (5/4) * |x|^2 + (5/6) * |x|^3 - (1/6) * |x|^4 1/2 ≤ |x| < 3/2 @@ -27,7 +44,7 @@ namespace prtl_shape { if (x < HALF) { return static_cast(115.0 / 192.0) - static_cast(5.0 / 8.0) * SQR(x) + INV_4 * SQR(SQR(x)); - } else if (x < static_cast(1.5)) { + } else if (x < THREE_HALFS) { return static_cast(55.0 / 96.0) + static_cast(5.0 / 24.0) * x - static_cast(5.0 / 4.0) * SQR(x) + @@ -51,6 +68,7 @@ namespace prtl_shape { // 81/40 - (27/8) * |x| + (9/4) * |x|^2 - (3/4) * |x|^3 + (1/8) * |x|^4 - (1/120) * |x|^5 if 2 ≤ |x| < 3 // 0.0 if |x| > 3 // clang-format on + Inline real_t S5(const real_t x) { if (x <= ONE) { return static_cast(11.0 / 20.0) - HALF * SQR(x) + @@ -90,7 +108,7 @@ namespace prtl_shape { static_cast(77.0 / 192.0) * SQR(x) + static_cast(7.0 / 48.0) * SQR(SQR(x)) - static_cast(1.0 / 36.0) * SQR(CUBE(x)); - } else if (x < static_cast(1.5)) { + } else if (x < THREE_HALFS) { return static_cast(7861.0 / 15360.0) - static_cast(7.0 / 768.0) * x - static_cast(91.0 / 256.0) * SQR(x) - @@ -187,7 +205,7 @@ namespace prtl_shape { static_cast(43.0 / 512.0) * SQR(SQR(x)) - static_cast(1.0 / 64.0) * SQR(SQR(x)) * SQR(x) + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))); - } else if (x <= static_cast(1.5)) { + } else if (x <= THREE_HALFS) { return static_cast(64929.0 / 143360.0) + static_cast(1.0 / 5120.0) * x - static_cast(363.0 / 1280.0) * SQR(x) + @@ -318,7 +336,7 @@ namespace prtl_shape { S[1] = ONE - S[0]; } else { i_min = i; - S[0] = static_cast(1.5) - di; + S[0] = THREE_HALFS - di; S[1] = ONE - S[0]; } } // staggered @@ -334,7 +352,7 @@ namespace prtl_shape { S[2] = ONE - S[0] - S[1]; } else { i_min = i; - S[0] = HALF * SQR(static_cast(3.0 / 2.0) - di); + S[0] = HALF * SQR(THREE_HALFS - di); S[1] = THREE_FOURTHS - SQR(ONE - di); S[2] = ONE - S[0] - S[1]; } @@ -352,26 +370,49 @@ namespace prtl_shape { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); + S[3] = static_cast(1.0 / 6.0) * CUBE(FOUR - di); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + - HALF * CUBE(HALF + di); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[1] = static_cast(2.0 / 3.0) - SQR(THREE_HALFS + di) + + HALF * CUBE(THREE_HALFS + di); + S[3] = static_cast(1.0 / 6.0) * + CUBE(static_cast(3.5) - di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * - CUBE(static_cast(1.5) - di); - S[1] = static_cast(2.0 / 3.0) - SQR(HALF - di) + + S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); + S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + HALF * CUBE(HALF - di); - S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); + S[3] = static_cast(1.0 / 6.0) * + CUBE(static_cast(2.5) - di); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered + + // if constexpr (not STAGGERED) { // compute at i positions + // i_min = i - 1; + // #pragma unroll + // for (int n = 0; n < 4; n++) { + // S[n] = S3(Kokkos::fabs(ONE + di - static_cast(n))); + // } + // } else { // compute at i + 1/2 positions + // if (di < HALF) { + // i_min = i - 2; + // #pragma unroll + // for (int n = 0; n < 4; n++) { + // S[n] = S3(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); + // } + // } else { + // i_min = i - 1; + // #pragma unroll + // for (int n = 0; n < 4; n++) { + // S[n] = S3(Kokkos::fabs(HALF + di - static_cast(n))); + // } + // } + // } // staggered } else if constexpr (O == 4u) { // clang-format off // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 @@ -401,8 +442,7 @@ namespace prtl_shape { #pragma unroll for (int n = 0; n < 5; n++) { - S[i] = S4( - Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + S[i] = S4(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); } } // staggered } else if constexpr (O == 5u) { @@ -434,8 +474,7 @@ namespace prtl_shape { #pragma unroll for (int n = 0; n < 6; n++) { - S[n] = S5(Kokkos::fabs( - static_cast(1.5) + di - static_cast(n))); + S[n] = S5(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); } } } // staggered From 5025936fca03c5e3ea5be30262ba8e948209c635 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 18 Sep 2025 21:50:42 -0500 Subject: [PATCH 70/82] bugfix and optimisation for S3 --- src/kernels/particle_shapes.hpp | 34 ++++++--------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index ddf85266..2615b431 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -370,49 +370,27 @@ namespace prtl_shape { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); - S[3] = static_cast(1.0 / 6.0) * CUBE(FOUR - di); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[1] = static_cast(2.0 / 3.0) - SQR(THREE_HALFS + di) + - HALF * CUBE(THREE_HALFS + di); + S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + + HALF * CUBE(HALF + di); S[3] = static_cast(1.0 / 6.0) * - CUBE(static_cast(3.5) - di); + CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + - HALF * CUBE(HALF - di); + HALF * CUBE(di - HALF); S[3] = static_cast(1.0 / 6.0) * - CUBE(static_cast(2.5) - di); + CUBE(HALF - di); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered - - // if constexpr (not STAGGERED) { // compute at i positions - // i_min = i - 1; - // #pragma unroll - // for (int n = 0; n < 4; n++) { - // S[n] = S3(Kokkos::fabs(ONE + di - static_cast(n))); - // } - // } else { // compute at i + 1/2 positions - // if (di < HALF) { - // i_min = i - 2; - // #pragma unroll - // for (int n = 0; n < 4; n++) { - // S[n] = S3(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); - // } - // } else { - // i_min = i - 1; - // #pragma unroll - // for (int n = 0; n < 4; n++) { - // S[n] = S3(Kokkos::fabs(HALF + di - static_cast(n))); - // } - // } - // } // staggered } else if constexpr (O == 4u) { // clang-format off // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 From 554fcc2de265acc30d00c2674901f198f62dda20 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 22 Sep 2025 17:19:37 -0500 Subject: [PATCH 71/82] bugfix in 4th order shape function --- src/kernels/particle_shapes.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 2615b431..5eada861 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -420,7 +420,7 @@ namespace prtl_shape { #pragma unroll for (int n = 0; n < 5; n++) { - S[i] = S4(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); + S[n] = S4(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); } } // staggered } else if constexpr (O == 5u) { From 8faa0ee679894e4cb9e2f525cfd81e31f144cf75 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 22 Sep 2025 18:30:15 -0500 Subject: [PATCH 72/82] bugfix in 3D indexing --- src/kernels/currents_deposit.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 1299be69..f44b54ce 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -606,13 +606,13 @@ namespace kernel { jx1[0][j][k] = -Qdxdt * Wx1[0][j][k]; } } - + #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { #pragma unroll - for (int k = 0; j < O + 2; ++k) { + for (int k = 0; k < O + 2; ++k) { jx1[i][j][k] = jx1[i - 1][j][k] - Qdxdt * Wx1[i][j][k]; } } @@ -699,8 +699,8 @@ namespace kernel { } } } - } + } // dim } else { // order raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); } From f4faa870b1332df0c2c92c1c510d2918e04fa2d6 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 8 Oct 2025 15:12:55 -0500 Subject: [PATCH 73/82] 10th order shape function --- src/kernels/currents_deposit.hpp | 4 +- src/kernels/particle_shapes.hpp | 116 ++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 3 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 5fae8767..f6e8579c 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -402,7 +402,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr ((O >= 1u) and (O <= 9u)) { + } else if constexpr ((O >= 1u) and (O <= 10u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; @@ -703,7 +703,7 @@ namespace kernel { } // dim } else { // order - raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); + raise::KernelError(HERE, "Unsupported interpolation order. O > 10 not supported. Seriously. What are you even doing here?"); } } }; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 5eada861..8814a33b 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -320,6 +320,79 @@ namespace prtl_shape { } } + inline real_t S10(const real_t x) { + if (x < HALF) { + return static_cast(381773117.0 / 928972800.0) + - static_cast(156409.0 / 737280.0) * SQR(x) + + static_cast(14597.0 / 276480.0) * SQR(SQR(x)) + - static_cast(583.0 / 69120.0) * SQR(CUBE(x)) + + static_cast(11.0 / 11520.0) * SQR(SQR(SQR(x))) + - static_cast(1.0 / 14400.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < THREE_HALFS) { + return static_cast(152709293.0 / 371589120.0) + - static_cast(11.0 / 4423680.0) * x + - static_cast(62557.0 / 294912.0) * SQR(x) + - static_cast(11.0 / 92160.0) * CUBE(x) + + static_cast(5885.0 / 110592.0) * SQR(SQR(x)) + - static_cast(77.0 / 76800.0) * CUBE(x) * SQR(x) + - static_cast(187.0 / 27648.0) * SQR(CUBE(x)) + - static_cast(11.0 / 5760.0) * SQR(CUBE(x)) * x + + static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) + - static_cast(11.0 / 17280.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 17280.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x <= static_cast(2.5)) { + return static_cast(37690169.0 / 92897280.0) + + static_cast(135311.0 / 3870720.0) * x + - static_cast(163603.0 / 516096.0) * SQR(x) + + static_cast(7513.0 / 40320.0) * CUBE(x) + - static_cast(4543.0 / 27648.0) * SQR(SQR(x)) + + static_cast(1661.0 / 9600.0) * CUBE(x) * SQR(x) + - static_cast(715.0 / 6912.0) * SQR(CUBE(x)) + + static_cast(11.0 / 315.0) * SQR(CUBE(x)) * x + - static_cast(55.0 / 8064.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 15120.0) * SQR(SQR(SQR(x))) * x + - static_cast(1.0 / 30240.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(3.5)) { + return static_cast(623786977.0 / 743178240.0) + - static_cast(11695211.0 / 6881280.0) * x + + static_cast(1654543.0 / 589824.0) * SQR(x) + - static_cast(1352153.0 / 430080.0) * CUBE(x) + + static_cast(479281.0 / 221184.0) * SQR(SQR(x)) + - static_cast(48433.0 / 51200.0) * CUBE(x) * SQR(x) + + static_cast(14905.0 / 55296.0) * SQR(CUBE(x)) + - static_cast(451.0 / 8960.0) * SQR(CUBE(x)) * x + + static_cast(55.0 / 9216.0) * SQR(SQR(SQR(x))) + - static_cast(11.0 / 26880.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 80640.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(4.5)) { + return static_cast(-1241720381.0 / 371589120.0) + + static_cast(237959711.0 / 23224320.0) * x + - static_cast(3702215.0 / 294912.0) * SQR(x) + + static_cast(2070343.0 / 241920.0) * CUBE(x) + - static_cast(407429.0 / 110592.0) * SQR(SQR(x)) + + static_cast(61061.0 / 57600.0) * CUBE(x) * SQR(x) + - static_cast(5753.0 / 27648.0) * SQR(CUBE(x)) + + static_cast(209.0 / 7560.0) * SQR(CUBE(x)) * x + - static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 90720.0) * SQR(SQR(SQR(x))) * x + - static_cast(1.0 / 362880.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(5.5)) { + return static_cast(25937424601.0 / 3715891200.0) + - static_cast(2357947691.0 / 185794560.0) * x + + static_cast(214358881.0 / 20643840.0) * SQR(x) + - static_cast(19487171.0 / 3870720.0) * CUBE(x) + + static_cast(1771561.0 / 1105920.0) * SQR(SQR(x)) + - static_cast(161051.0 / 460800.0) * CUBE(x) * SQR(x) + + static_cast(14641.0 / 276480.0) * SQR(CUBE(x)) + - static_cast(1331.0 / 241920.0) * SQR(CUBE(x)) * x + + static_cast(121.0 / 322560.0) * SQR(SQR(SQR(x))) + - static_cast(11.0 / 725760.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 3628800.0) * SQR(SQR(SQR(x))) * SQR(x); + } else { + return ZERO; + } +} + template Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { if constexpr (O == 1u) { @@ -609,8 +682,49 @@ namespace prtl_shape { } } } // staggered + } else if constexpr (O == 10u) { + // clang-format off + // S10(x) = + // 381773117/928972800 - (156409/737280) * |x|^2 + (14597/276480) * |x|^4 - (583/69120) * |x|^6 + (11/11520) * |x|^8 - (1/14400) * |x|^10 if |x| ≤ 0.5 + // 152709293/371589120 - (11/4423680) * |x| - (62557/294912) * |x|^2 - (11/92160) * |x|^3 + (5885/110592) * |x|^4 - (77/76800) * |x|^5 - + // (187/27648) * |x|^6 - (11/5760) * |x|^7 + (11/4608) * |x|^8 - (11/17280) * |x|^9 + (1/17280) * |x|^10 if 0.5 < |x| ≤ 1.5 + // 37690169/92897280 + (135311/3870720) * |x| - (163603/516096) * |x|^2 + (7513/40320) * |x|^3 - (4543/27648) * |x|^4 + // + (1661/9600) * |x|^5 - (715/6912) * |x|^6 + (11/315) * |x|^7 - (55/8064) * |x|^8 + (11/15120) * |x|^9 - (1/30240) * |x|^10 if 1.5 < |x| ≤ 2.5 + // 623786977/743178240 - (11695211/6881280) * |x| + (1654543/589824) * |x|^2 - (1352153/430080) * |x|^3 + (479281/221184) * |x|^4 + // - (48433/51200) * |x|^5 + (14905/55296) * |x|^6 - (451/8960) * |x|^7 + (55/9216) * |x|^8 - (11/26880) * |x|^9 + (1/80640) * |x|^10 if 2.5 < |x| ≤ 3.5 + // -1241720381/371589120 + (237959711/23224320) * |x| - (3702215/294912) * |x|^2 + (2070343/241920) * |x|^3 - (407429/110592) * |x|^4 + // + (61061/57600) * |x|^5 - (5753/27648) * |x|^6 + (209/7560) * |x|^7 - (11/4608) * |x|^8 + (11/90720) * |x|^9 - (1/362880) * |x|^10 if 3.5 < |x| ≤ 4.5 + // 25937424601/3715891200 - (2357947691/185794560) * |x| + (214358881/20643840) * |x|^2 - (19487171/3870720) * |x|^3 + (1771561/1105920) * |x|^4 + // - (161051/460800) * |x|^5 + (14641/276480) * |x|^6 - (1331/241920) * |x|^7 + (121/322560) * |x|^8 - (11/725760) * |x|^9 + (1/3628800) * |x|^10 if 4.5 < |x| ≤ 5.5 + // 0.0 otherwise + // clang-format on + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 5; + +#pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S10(Kokkos::fabs(FIVE + di - static_cast(n))); + } + } else { + i_min = i - 4; + +#pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S10(Kokkos::fabs(FOUR + di - static_cast(n))); + } + } + } else { // compute at i + 1/2 positions + i_min = i - 5; + +#pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S10(Kokkos::fabs(static_cast(4.5) + + di - static_cast(n))); + } + } // staggered } else { - raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); + raise::KernelError(HERE, "Unsupported interpolation order. O > 10 not supported. Seriously. What are you even doing here?"); } } From 2a23da1ddf243caa32c039b55d9aab5365d6e382 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 9 Oct 2025 11:53:23 -0500 Subject: [PATCH 74/82] updated cmake options for 10th order --- cmake/config.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 8324957b..43899ee4 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -19,8 +19,8 @@ endfunction() # ------------------------------- Shape function --------------------------- # function(set_shape_order shape_order) if(${deposit} STREQUAL "esirkepov") - if(${shape_order} GREATER 9) - message(FATAL_ERROR "Shape order must be between 1 and 9") + if(${shape_order} GREATER 10) + message(FATAL_ERROR "Shape order must be between 1 and 10") endif() add_compile_options("-DSHAPE_ORDER=${shape_order}") endif() From 0e9320367259f614abdc79e6a4b6d120d29cbee8 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 9 Oct 2025 12:35:49 -0500 Subject: [PATCH 75/82] entity now goes to 11! --- CMakeLists.txt | 2 +- cmake/config.cmake | 4 +- src/kernels/currents_deposit.hpp | 5 +- src/kernels/particle_shapes.hpp | 292 ++++++++++++++++++++++--------- 4 files changed, 219 insertions(+), 84 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 76a52f29..48e5689b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ if(${deposit} STREQUAL "zigzag") endif() set(shape_orders - "1" "2" "3" "4" "5" "6" "7" "8" "9" + "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" CACHE STRING "Shape orders") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) diff --git a/cmake/config.cmake b/cmake/config.cmake index 43899ee4..e9b0de39 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -19,8 +19,8 @@ endfunction() # ------------------------------- Shape function --------------------------- # function(set_shape_order shape_order) if(${deposit} STREQUAL "esirkepov") - if(${shape_order} GREATER 10) - message(FATAL_ERROR "Shape order must be between 1 and 10") + if(${shape_order} GREATER 11) + message(FATAL_ERROR "Shape order must be between 1 and 11.") endif() add_compile_options("-DSHAPE_ORDER=${shape_order}") endif() diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index f6e8579c..18955e79 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -703,7 +703,10 @@ namespace kernel { } // dim } else { // order - raise::KernelError(HERE, "Unsupported interpolation order. O > 10 not supported. Seriously. What are you even doing here?"); + raise::KernelError( + HERE, + "Unsupported interpolation order. O > 11 not supported. Seriously. " + "What are you even doing here? Entity already goes to 11!"); } } }; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 8814a33b..c5d5748d 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -321,77 +321,156 @@ namespace prtl_shape { } inline real_t S10(const real_t x) { - if (x < HALF) { - return static_cast(381773117.0 / 928972800.0) - - static_cast(156409.0 / 737280.0) * SQR(x) - + static_cast(14597.0 / 276480.0) * SQR(SQR(x)) - - static_cast(583.0 / 69120.0) * SQR(CUBE(x)) - + static_cast(11.0 / 11520.0) * SQR(SQR(SQR(x))) - - static_cast(1.0 / 14400.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x < THREE_HALFS) { - return static_cast(152709293.0 / 371589120.0) - - static_cast(11.0 / 4423680.0) * x - - static_cast(62557.0 / 294912.0) * SQR(x) - - static_cast(11.0 / 92160.0) * CUBE(x) - + static_cast(5885.0 / 110592.0) * SQR(SQR(x)) - - static_cast(77.0 / 76800.0) * CUBE(x) * SQR(x) - - static_cast(187.0 / 27648.0) * SQR(CUBE(x)) - - static_cast(11.0 / 5760.0) * SQR(CUBE(x)) * x - + static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) - - static_cast(11.0 / 17280.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 17280.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x <= static_cast(2.5)) { - return static_cast(37690169.0 / 92897280.0) - + static_cast(135311.0 / 3870720.0) * x - - static_cast(163603.0 / 516096.0) * SQR(x) - + static_cast(7513.0 / 40320.0) * CUBE(x) - - static_cast(4543.0 / 27648.0) * SQR(SQR(x)) - + static_cast(1661.0 / 9600.0) * CUBE(x) * SQR(x) - - static_cast(715.0 / 6912.0) * SQR(CUBE(x)) - + static_cast(11.0 / 315.0) * SQR(CUBE(x)) * x - - static_cast(55.0 / 8064.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 15120.0) * SQR(SQR(SQR(x))) * x - - static_cast(1.0 / 30240.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x < static_cast(3.5)) { - return static_cast(623786977.0 / 743178240.0) - - static_cast(11695211.0 / 6881280.0) * x - + static_cast(1654543.0 / 589824.0) * SQR(x) - - static_cast(1352153.0 / 430080.0) * CUBE(x) - + static_cast(479281.0 / 221184.0) * SQR(SQR(x)) - - static_cast(48433.0 / 51200.0) * CUBE(x) * SQR(x) - + static_cast(14905.0 / 55296.0) * SQR(CUBE(x)) - - static_cast(451.0 / 8960.0) * SQR(CUBE(x)) * x - + static_cast(55.0 / 9216.0) * SQR(SQR(SQR(x))) - - static_cast(11.0 / 26880.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 80640.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x < static_cast(4.5)) { - return static_cast(-1241720381.0 / 371589120.0) - + static_cast(237959711.0 / 23224320.0) * x - - static_cast(3702215.0 / 294912.0) * SQR(x) - + static_cast(2070343.0 / 241920.0) * CUBE(x) - - static_cast(407429.0 / 110592.0) * SQR(SQR(x)) - + static_cast(61061.0 / 57600.0) * CUBE(x) * SQR(x) - - static_cast(5753.0 / 27648.0) * SQR(CUBE(x)) - + static_cast(209.0 / 7560.0) * SQR(CUBE(x)) * x - - static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 90720.0) * SQR(SQR(SQR(x))) * x - - static_cast(1.0 / 362880.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x < static_cast(5.5)) { - return static_cast(25937424601.0 / 3715891200.0) - - static_cast(2357947691.0 / 185794560.0) * x - + static_cast(214358881.0 / 20643840.0) * SQR(x) - - static_cast(19487171.0 / 3870720.0) * CUBE(x) - + static_cast(1771561.0 / 1105920.0) * SQR(SQR(x)) - - static_cast(161051.0 / 460800.0) * CUBE(x) * SQR(x) - + static_cast(14641.0 / 276480.0) * SQR(CUBE(x)) - - static_cast(1331.0 / 241920.0) * SQR(CUBE(x)) * x - + static_cast(121.0 / 322560.0) * SQR(SQR(SQR(x))) - - static_cast(11.0 / 725760.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 3628800.0) * SQR(SQR(SQR(x))) * SQR(x); - } else { - return ZERO; + if (x < HALF) { + return static_cast(381773117.0 / 928972800.0) - + static_cast(156409.0 / 737280.0) * SQR(x) + + static_cast(14597.0 / 276480.0) * SQR(SQR(x)) - + static_cast(583.0 / 69120.0) * SQR(CUBE(x)) + + static_cast(11.0 / 11520.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 14400.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < THREE_HALFS) { + return static_cast(152709293.0 / 371589120.0) - + static_cast(11.0 / 4423680.0) * x - + static_cast(62557.0 / 294912.0) * SQR(x) - + static_cast(11.0 / 92160.0) * CUBE(x) + + static_cast(5885.0 / 110592.0) * SQR(SQR(x)) - + static_cast(77.0 / 76800.0) * CUBE(x) * SQR(x) - + static_cast(187.0 / 27648.0) * SQR(CUBE(x)) - + static_cast(11.0 / 5760.0) * SQR(CUBE(x)) * x + + static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 17280.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 17280.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x <= static_cast(2.5)) { + return static_cast(37690169.0 / 92897280.0) + + static_cast(135311.0 / 3870720.0) * x - + static_cast(163603.0 / 516096.0) * SQR(x) + + static_cast(7513.0 / 40320.0) * CUBE(x) - + static_cast(4543.0 / 27648.0) * SQR(SQR(x)) + + static_cast(1661.0 / 9600.0) * CUBE(x) * SQR(x) - + static_cast(715.0 / 6912.0) * SQR(CUBE(x)) + + static_cast(11.0 / 315.0) * SQR(CUBE(x)) * x - + static_cast(55.0 / 8064.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 15120.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 30240.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(3.5)) { + return static_cast(623786977.0 / 743178240.0) - + static_cast(11695211.0 / 6881280.0) * x + + static_cast(1654543.0 / 589824.0) * SQR(x) - + static_cast(1352153.0 / 430080.0) * CUBE(x) + + static_cast(479281.0 / 221184.0) * SQR(SQR(x)) - + static_cast(48433.0 / 51200.0) * CUBE(x) * SQR(x) + + static_cast(14905.0 / 55296.0) * SQR(CUBE(x)) - + static_cast(451.0 / 8960.0) * SQR(CUBE(x)) * x + + static_cast(55.0 / 9216.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 26880.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 80640.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(4.5)) { + return static_cast(-1241720381.0 / 371589120.0) + + static_cast(237959711.0 / 23224320.0) * x - + static_cast(3702215.0 / 294912.0) * SQR(x) + + static_cast(2070343.0 / 241920.0) * CUBE(x) - + static_cast(407429.0 / 110592.0) * SQR(SQR(x)) + + static_cast(61061.0 / 57600.0) * CUBE(x) * SQR(x) - + static_cast(5753.0 / 27648.0) * SQR(CUBE(x)) + + static_cast(209.0 / 7560.0) * SQR(CUBE(x)) * x - + static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 90720.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 362880.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(5.5)) { + return static_cast(25937424601.0 / 3715891200.0) - + static_cast(2357947691.0 / 185794560.0) * x + + static_cast(214358881.0 / 20643840.0) * SQR(x) - + static_cast(19487171.0 / 3870720.0) * CUBE(x) + + static_cast(1771561.0 / 1105920.0) * SQR(SQR(x)) - + static_cast(161051.0 / 460800.0) * CUBE(x) * SQR(x) + + static_cast(14641.0 / 276480.0) * SQR(CUBE(x)) - + static_cast(1331.0 / 241920.0) * SQR(CUBE(x)) * x + + static_cast(121.0 / 322560.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 725760.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 3628800.0) * SQR(SQR(SQR(x))) * SQR(x); + } else { + return ZERO; + } + } + + inline real_t S11(const real_t x) { + if (x < ONE) { + return static_cast(655177.0 / 1663200.0) - + static_cast(809.0 / 4320.0) * SQR(x) + + static_cast(31.0 / 720.0) * SQR(SQR(x)) - + static_cast(23.0 / 3600.0) * CUBE(SQR(x)) + + static_cast(1.0 / 1440.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 14400.0) * SQR(SQR(SQR(x))) * SQR(x) + + static_cast(1.0 / 86400.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x <= TWO) { + return static_cast(65521.0 / 166320.0) - + static_cast(11.0 / 50400.0) * x - + static_cast(563.0 / 3024.0) * SQR(x) - + static_cast(11.0 / 3360.0) * CUBE(x) + + static_cast(25.0 / 504.0) * SQR(SQR(x)) - + static_cast(11.0 / 1200.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 360.0) * SQR(CUBE(x)) - + static_cast(11.0 / 1680.0) * SQR(CUBE(x)) * x + + static_cast(1.0 / 252.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 10080.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 6720.0) * SQR(SQR(SQR(x))) * SQR(x) - + static_cast(1.0 / 120960.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x < THREE) { + return static_cast(61297.0 / 166320.0) + + static_cast(781.0 / 5600.0) * x - + static_cast(1619.0 / 3024.0) * SQR(x) + + static_cast(583.0 / 1120.0) * CUBE(x) - + static_cast(239.0 / 504.0) * SQR(SQR(x)) + + static_cast(143.0 / 400.0) * CUBE(x) * SQR(x) - + static_cast(13.0 / 72.0) * SQR(CUBE(x)) + + static_cast(33.0 / 560.0) * SQR(CUBE(x)) * x - + static_cast(25.0 / 2016.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 6720.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 8064.0) * SQR(SQR(SQR(x))) * SQR(x) + + static_cast(1.0 / 241920.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x <= FOUR) { + return static_cast(894727.0 / 665280.0) - + static_cast(38533.0 / 11200.0) * x + + static_cast(9385.0 / 1728.0) * SQR(x) - + static_cast(12199.0 / 2240.0) * CUBE(x) + + static_cast(1009.0 / 288.0) * SQR(SQR(x)) - + static_cast(1199.0 / 800.0) * CUBE(x) * SQR(x) + + static_cast(631.0 / 1440.0) * SQR(CUBE(x)) - + static_cast(99.0 / 1120.0) * SQR(CUBE(x)) * x + + static_cast(7.0 / 576.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 10080.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 17280.0) * SQR(SQR(SQR(x))) * SQR(x) - + static_cast(1.0 / 725760.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x < FIVE) { + return -static_cast(18595037.0 / 3326400.0) + + static_cast(4726777.0 / 302400.0) * x - + static_cast(1113317.0 / 60480.0) * SQR(x) + + static_cast(250657.0 / 20160.0) * CUBE(x) - + static_cast(54797.0 / 10080.0) * SQR(SQR(x)) + + static_cast(11737.0 / 7200.0) * CUBE(x) * SQR(x) - + static_cast(2477.0 / 7200.0) * SQR(CUBE(x)) + + static_cast(517.0 / 10080.0) * SQR(CUBE(x)) * x - + static_cast(107.0 / 20160.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 30240.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 67200.0) * SQR(SQR(SQR(x))) * SQR(x) + + static_cast(1.0 / 3628800.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x < SIX) { + return static_cast(17496.0 / 1925.0) - + static_cast(2916.0 / 175.0) * x + + static_cast(486.0 / 35.0) * SQR(x) - + static_cast(243.0 / 35.0) * CUBE(x) + + static_cast(81.0 / 35.0) * SQR(SQR(x)) - + static_cast(27.0 / 50.0) * CUBE(x) * SQR(x) + + static_cast(9.0 / 100.0) * SQR(CUBE(x)) - + static_cast(3.0 / 280.0) * SQR(CUBE(x)) * x + + static_cast(1.0 / 1120.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 20160.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 604800.0) * SQR(SQR(SQR(x))) * SQR(x) - + static_cast(1.0 / 39916800.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else { + return ZERO; + } } -} template Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { @@ -451,16 +530,14 @@ namespace prtl_shape { S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + HALF * CUBE(HALF + di); - S[3] = static_cast(1.0 / 6.0) * - CUBE(HALF + di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + HALF * CUBE(di - HALF); - S[3] = static_cast(1.0 / 6.0) * - CUBE(HALF - di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF - di); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered @@ -703,14 +780,14 @@ namespace prtl_shape { i_min = i - 5; #pragma unroll - for (int n = 0; n < 10; n++) { + for (int n = 0; n < 11; n++) { S[n] = S10(Kokkos::fabs(FIVE + di - static_cast(n))); } } else { i_min = i - 4; #pragma unroll - for (int n = 0; n < 10; n++) { + for (int n = 0; n < 11; n++) { S[n] = S10(Kokkos::fabs(FOUR + di - static_cast(n))); } } @@ -718,13 +795,68 @@ namespace prtl_shape { i_min = i - 5; #pragma unroll - for (int n = 0; n < 10; n++) { - S[n] = S10(Kokkos::fabs(static_cast(4.5) + - di - static_cast(n))); + for (int n = 0; n < 11; n++) { + S[n] = S10( + Kokkos::fabs(static_cast(4.5) + di - static_cast(n))); + } + } // staggered + } else if constexpr (O == 11u) { + // clang-format off + // S11(x) = + // 655177/1663200 - (809/4320) * |x|^2 + (31/720) * |x|^4 - (23/3600) * |x|^6 + // + (1/1440) * |x|^8 - (1/14400) * |x|^10 + (1/86400) * |x|^11 if |x| < 1 + // 65521/166320 - (11/50400) * |x| - (563/3024) * |x|^2 - (11/3360) * |x|^3 + // + (25/504) * |x|^4 - (11/1200) * |x|^5 + (1/360) * |x|^6 + // - (11/1680) * |x|^7 + (1/252) * |x|^8 - (11/10080) * |x|^9 + // + (1/6720) * |x|^10 - (1/120960) * |x|^11 if 1 ≤ |x| ≤ 2 + // 61297/166320 + (781/5600) * |x| - (1619/3024) * |x|^2 + (583/1120) * |x|^3 + // - (239/504) * |x|^4 + (143/400) * |x|^5 - (13/72) * |x|^6 + // + (33/560) * |x|^7 - (25/2016) * |x|^8 + (11/6720) * |x|^9 + // - (1/8064) * |x|^10 + (1/241920) * |x|^11 if 2 < |x| < 3 + // 894727/665280 - (38533/11200) * |x| + (9385/1728) * |x|^2 - (12199/2240) * |x|^3 + // + (1009/288) * |x|^4 - (1199/800) * |x|^5 + (631/1440) * |x|^6 + // - (99/1120) * |x|^7 + (7/576) * |x|^8 - (11/10080) * |x|^9 + // + (1/17280) * |x|^10 - (1/725760) * |x|^11 if 3 ≤ |x| ≤ 4 + // -18595037/3326400 + (4726777/302400) * |x| - (1113317/60480) * |x|^2 + (250657/20160) * |x|^3 + // - (54797/10080) * |x|^4 + (11737/7200) * |x|^5 - (2477/7200) * |x|^6 + // + (517/10080) * |x|^7 - (107/20160) * |x|^8 + (11/30240) * |x|^9 + // - (1/67200) * |x|^10 + (1/3628800) * |x|^11 if 4 < |x| < 5 + // 17496/1925 - (2916/175) * |x| + (486/35) * |x|^2 - (243/35) * |x|^3 + // + (81/35) * |x|^4 - (27/50) * |x|^5 + (9/100) * |x|^6 + // - (3/280) * |x|^7 + (1/1120) * |x|^8 - (1/20160) * |x|^9 + // + (1/604800) * |x|^10 - (1/39916800) * |x|^11 if 5 ≤ |x| < 6 + // 0.0 otherwise + // clang-format on + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 5; + +#pragma unroll + for (int n = 0; n < 12; n++) { + S[n] = S11(Kokkos::fabs(FIVE + di - static_cast(n))); + } + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 6; + + for (int n = 0; n < 12; n++) { + S[n] = S11(Kokkos::fabs( + static_cast(5.5) + di - static_cast(n))); + } + } else { + i_min = i - 5; + +#pragma unroll + for (int n = 0; n < 12; n++) { + S[n] = S11(Kokkos::fabs( + static_cast(4.5) + di - static_cast(n))); + } } } // staggered } else { - raise::KernelError(HERE, "Unsupported interpolation order. O > 10 not supported. Seriously. What are you even doing here?"); + raise::KernelError( + HERE, + "Unsupported interpolation order. O > 11 not supported. Seriously. " + "What are you even doing here? Entity already goes to 11!"); } } From 65cbba694e68a4d69600d5ef8ccb65738be56a6c Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 9 Oct 2025 14:16:59 -0500 Subject: [PATCH 76/82] revert to unoptimized version for S3 for further testing --- src/kernels/particle_shapes.hpp | 53 ++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index c5d5748d..ded47ec5 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -518,27 +518,52 @@ namespace prtl_shape { // 2/3 - x^2 + 1/2 * x^3 |x| < 1 // S(x) = 1/6 * (2 - |x|)^3 1 ≤ |x| < 2 // 0.0 |x| ≥ 2 + // if constexpr (not STAGGERED) { // compute at i positions + // i_min = i - 1; + // S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); + // S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); + // S[3] = static_cast(1.0 / 6.0) * CUBE(di); + // S[2] = ONE - S[0] - S[1] - S[3]; + // } else { // compute at i + 1/2 positions + // if (di < HALF) { + // i_min = i - 2; + // S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); + // S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + + // HALF * CUBE(HALF + di); + // S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + // S[2] = ONE - S[0] - S[1] - S[3]; + // } else { + // i_min = i - 1; + // S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); + // S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + + // HALF * CUBE(di - HALF); + // S[3] = static_cast(1.0 / 6.0) * CUBE(HALF - di); + // S[2] = ONE - S[0] - S[1] - S[3]; + // } + // } // staggered if constexpr (not STAGGERED) { // compute at i positions i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); - S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); - S[2] = ONE - S[0] - S[1] - S[3]; + +#pragma unroll + for (int n = 0; n < 4; n++) { + S[n] = S3(Kokkos::fabs(ONE + di - static_cast(n))); + } } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; - S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + - HALF * CUBE(HALF + di); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); - S[2] = ONE - S[0] - S[1] - S[3]; + +#pragma unroll + for (int n = 0; n < 4; n++) { + S[n] = S3(Kokkos::fabs( + static_cast(1.5) + di - static_cast(n))); + } } else { i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); - S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + - HALF * CUBE(di - HALF); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[2] = ONE - S[0] - S[1] - S[3]; + +#pragma unroll + for (int n = 0; n < 4; n++) { + S[n] = S3(Kokkos::fabs(HALF + di - static_cast(n))); + } } } // staggered } else if constexpr (O == 4u) { From 4adee23d9e00579eb4ecd10a70302f53a8b01ef7 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 22 Oct 2025 16:21:36 -0500 Subject: [PATCH 77/82] temporary bugfix --- src/framework/parameters.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index a14aee99..7a35f1c8 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -416,6 +416,9 @@ namespace ntt { toml::find_or(toml_data, "algorithms", "deposit", "order", 1)); /* [algorithms.fieldsolver] --------------------------------------------- */ + set("algorithms.fieldsolver.enable", + toml::find_or(toml_data, "algorithms", "fieldsolver", "enable", true)); + set("algorithms.fieldsolver.delta_x", toml::find_or(toml_data, "algorithms", From 3848cc3e7b0ec61eca3870d8186d7604b6f1f116 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 22 Oct 2025 18:41:32 -0500 Subject: [PATCH 78/82] Esirkepov 1D --- src/kernels/currents_deposit.hpp | 62 ++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 18955e79..88d6edc0 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -420,8 +420,64 @@ namespace kernel { fS_x1); if constexpr (D == Dim::_1D) { - // ToDo - raise::KernelNotImplementedError(HERE); + // define weight vectors + real_t Wx1[O + 2]; + real_t Wx23[O + 2]; + + // first seperate + Wx1[0] = fS_x1[0]; + Wx23[0] = HALF * fS_x1[0]; + // last seperate + Wx1[O + 1] = -iS_x1[O + 1]; + Wx23[O + 1] = HALF * iS_x1[O + 1]; + + // Calculate weight function +#pragma unroll + for (int i = 1; i < O + 1; ++i) { + // Esirkepov 2001, Eq. 38 for 1D case + Wx1[i] = fS_x1[i] - iS_x1[i - 1]; + Wx23[i] = HALF * (fS_x1[i] + iS_x1[i - 1]); + } + + // contribution within the shape function stencil + real_t jx1[O + 2]; + + // prefactors for j update + const real_t Qdx1dt = coeff * inv_dt; + const real_t QVx2 = coeff * vp[1]; + const real_t QVx3 = coeff * vp[2]; + + // Calculate current contribution + jx1[0] = -Qdx1dt * Wx1[0]; +#pragma unroll + for (int i = 1; i < O + 2; ++i) { + jx1[i] = jx1[i - 1] - Qdx1dt * Wx1[i]; + } + + // account for ghost cells + i1_min += N_GHOSTS; + i1_max += N_GHOSTS; + + // get number of update indices for asymmetric movement + const int di_x1 = i1_max - i1_min; + + /* + Current update + */ + auto J_acc = J.access(); + + for (int i = 0; i < di_x1; ++i) { + J_acc(i1_min + i, cur::jx1) += jx1[i]; + } + + for (int i = 0; i <= di_x1; ++i) { + J_acc(i1_min + i, cur::jx2) += QVx2 * Wx23[i]; + } + + for (int i = 0; i <= di_x1; ++i) { + J_acc(i1_min + i, cur::jx3) += QVx3 * Wx23[i]; + } + } else if constexpr (D == Dim::_2D) { // shape function in dim1 -> always required @@ -607,7 +663,7 @@ namespace kernel { jx1[0][j][k] = -Qdxdt * Wx1[0][j][k]; } } - + #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll From 417f03c009e082aae79d24fe4ffe07f17e197333 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 30 Oct 2025 14:16:25 -0400 Subject: [PATCH 79/82] reduced 1d from 2d deposit --- src/kernels/currents_deposit.hpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 88d6edc0..c37d1ea5 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -424,19 +424,12 @@ namespace kernel { real_t Wx1[O + 2]; real_t Wx23[O + 2]; - // first seperate - Wx1[0] = fS_x1[0]; - Wx23[0] = HALF * fS_x1[0]; - // last seperate - Wx1[O + 1] = -iS_x1[O + 1]; - Wx23[O + 1] = HALF * iS_x1[O + 1]; - // Calculate weight function #pragma unroll - for (int i = 1; i < O + 1; ++i) { + for (int i = 0; i < O + 2; ++i) { // Esirkepov 2001, Eq. 38 for 1D case - Wx1[i] = fS_x1[i] - iS_x1[i - 1]; - Wx23[i] = HALF * (fS_x1[i] + iS_x1[i - 1]); + Wx1[i] = fS_x1[i] - iS_x1[i]; + Wx23[i] = HALF * (fS_x1[i] + iS_x1[i]); } // contribution within the shape function stencil From 65450c382b7ccae217457508d5812690a3b08a02 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 3 Nov 2025 15:40:38 -0500 Subject: [PATCH 80/82] 11th order + printing fixed --- CMakeLists.txt | 2 +- cmake/styling.cmake | 31 +++++++++++++++++-------------- src/kernels/currents_deposit.hpp | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 48e5689b..260c3e87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ if(${deposit} STREQUAL "zigzag") endif() set(shape_orders - "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" + "1;2;3;4;5;6;7;8;9;10;11" CACHE STRING "Shape orders") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) diff --git a/cmake/styling.cmake b/cmake/styling.cmake index 5f1e4a7a..daae19c6 100644 --- a/cmake/styling.cmake +++ b/cmake/styling.cmake @@ -140,12 +140,6 @@ function( else() padto("${rstring}" " " ${Padding} rstring) - set(new_choices ${Choices}) - foreach(ch IN LISTS new_choices) - string(REPLACE ${ch} "${Dim}${ch}${ColorReset}" new_choices - "${new_choices}") - endforeach() - set(Choices ${new_choices}) if(${Value} STREQUAL "ON") set(col ${Green}) elseif(${Value} STREQUAL "OFF") @@ -153,14 +147,23 @@ function( else() set(col ${Color}) endif() - if(NOT "${Value}" STREQUAL "") - string(REPLACE ${Value} "${col}${Value}${ColorReset}" Choices - "${Choices}") - endif() - if(NOT "${Default}" STREQUAL "") - string(REPLACE ${Default} "${Underline}${Default}${ColorReset}" Choices - "${Choices}") - endif() + set(new_choices "") + foreach(ch IN LISTS Choices) + set(elem "${ch}") + if((NOT "${Value}" STREQUAL "") AND (${ch} STREQUAL ${Value})) + set(elem "${col}${ch}${ColorReset}") + else() + set(elem "${Dim}${ch}${ColorReset}") + endif() + if((NOT "${Default}" STREQUAL "") AND (${ch} STREQUAL ${Default})) + set(elem "${Underline}${elem}${ColorReset}") + endif() + string(APPEND new_choices "${elem};") + endforeach() + string(LENGTH "${new_choices}" nlen) + math(EXPR nlen "${nlen} - 1") + string(SUBSTRING "${new_choices}" 0 ${nlen} new_choices) + set(Choices ${new_choices}) string(REPLACE ";" "/" Choices "${Choices}") string(APPEND rstring "${Choices}") endif() diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index c37d1ea5..0fbab19f 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -402,7 +402,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr ((O >= 1u) and (O <= 10u)) { + } else if constexpr ((O >= 1u) and (O <= 11u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; From 34ddd52c3f842ba80c57d7e877a392892958383e Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 26 Nov 2025 15:41:38 +0000 Subject: [PATCH 81/82] add output of deposit scheme and order to info file --- src/engines/engine_printer.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index cbfde930..793ba96e 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -305,6 +305,12 @@ namespace ntt { params.template get("simulation.name").c_str()); add_param(report, 4, "Problem generator", "%s", pgen.c_str()); add_param(report, 4, "Engine", "%s", SimEngine(S).to_string()); + #if SHAPE_ORDER == 0 + add_param(report, 4, "Deposit", "%s", "zigzag"); +#else + add_param(report, 4, "Deposit", "%s", "esirkepov"); + add_param(report, 4, "Interpolation order", "%i", SHAPE_ORDER); +#endif add_param(report, 4, "Metric", "%s", Metric(M::MetricType).to_string()); add_param(report, 4, "Timestep [dt]", "%.3e", dt); add_param(report, 4, "Runtime", "%.3e [%d steps]", runtime, max_steps); From 6a079313aa05e018cfedfd0da1f8c0ae731a0f42 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Sun, 30 Nov 2025 15:23:31 +0000 Subject: [PATCH 82/82] fix order of printing for more conistency --- src/engines/engine_printer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index 793ba96e..91863abf 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -305,13 +305,13 @@ namespace ntt { params.template get("simulation.name").c_str()); add_param(report, 4, "Problem generator", "%s", pgen.c_str()); add_param(report, 4, "Engine", "%s", SimEngine(S).to_string()); - #if SHAPE_ORDER == 0 + add_param(report, 4, "Metric", "%s", Metric(M::MetricType).to_string()); +#if SHAPE_ORDER == 0 add_param(report, 4, "Deposit", "%s", "zigzag"); #else add_param(report, 4, "Deposit", "%s", "esirkepov"); add_param(report, 4, "Interpolation order", "%i", SHAPE_ORDER); #endif - add_param(report, 4, "Metric", "%s", Metric(M::MetricType).to_string()); add_param(report, 4, "Timestep [dt]", "%.3e", dt); add_param(report, 4, "Runtime", "%.3e [%d steps]", runtime, max_steps); report += "\n";