From 90b6b2749c95da860c8027e9319b7c16b8e8c220 Mon Sep 17 00:00:00 2001 From: Christopher Albert Date: Fri, 26 Dec 2025 19:31:09 +0100 Subject: [PATCH 1/3] interpolate: add masked batch spline evaluation --- src/interpolate/batch_interpolate.f90 | 7 +- src/interpolate/batch_interpolate_1d.f90 | 71 ++++++++ src/interpolate/batch_interpolate_3d.f90 | 24 ++- src/interpolate/interpolate.f90 | 6 +- test/CMakeLists.txt | 4 + test/source/test_batch_interpolate_mask.f90 | 182 ++++++++++++++++++++ 6 files changed, 290 insertions(+), 4 deletions(-) create mode 100644 test/source/test_batch_interpolate_mask.f90 diff --git a/src/interpolate/batch_interpolate.f90 b/src/interpolate/batch_interpolate.f90 index 1330f4bb..72fb928f 100644 --- a/src/interpolate/batch_interpolate.f90 +++ b/src/interpolate/batch_interpolate.f90 @@ -16,6 +16,7 @@ module batch_interpolate evaluate_batch_splines_1d_many_der, & evaluate_batch_splines_1d_many_der2, & evaluate_batch_splines_1d_many_der3, & + evaluate_batch_splines_1d_many_der3_mask, & evaluate_batch_splines_1d_der, & evaluate_batch_splines_1d_der2, & evaluate_batch_splines_1d_der3 @@ -40,7 +41,8 @@ module batch_interpolate evaluate_batch_splines_3d_many, & evaluate_batch_splines_3d_many_resident, & evaluate_batch_splines_3d_many_der, & - evaluate_batch_splines_3d_many_der2 + evaluate_batch_splines_3d_many_der2, & + evaluate_batch_splines_3d_many_der2_mask implicit none @@ -56,6 +58,7 @@ module batch_interpolate public :: evaluate_batch_splines_1d_many, evaluate_batch_splines_1d_many_resident public :: evaluate_batch_splines_1d_many_der public :: evaluate_batch_splines_1d_many_der2, evaluate_batch_splines_1d_many_der3 + public :: evaluate_batch_splines_1d_many_der3_mask public :: evaluate_batch_splines_1d_der, evaluate_batch_splines_1d_der2 public :: evaluate_batch_splines_1d_der3 @@ -77,5 +80,5 @@ module batch_interpolate public :: evaluate_batch_splines_3d_der2_rmix public :: evaluate_batch_splines_3d_many, evaluate_batch_splines_3d_many_resident public :: evaluate_batch_splines_3d_many_der, evaluate_batch_splines_3d_many_der2 - + public :: evaluate_batch_splines_3d_many_der2_mask end module batch_interpolate diff --git a/src/interpolate/batch_interpolate_1d.f90 b/src/interpolate/batch_interpolate_1d.f90 index e98731cd..edb370e4 100644 --- a/src/interpolate/batch_interpolate_1d.f90 +++ b/src/interpolate/batch_interpolate_1d.f90 @@ -34,6 +34,7 @@ module batch_interpolate_1d public :: evaluate_batch_splines_1d_many_der public :: evaluate_batch_splines_1d_many_der2 public :: evaluate_batch_splines_1d_many_der3 + public :: evaluate_batch_splines_1d_many_der3_mask contains @@ -708,6 +709,76 @@ subroutine evaluate_batch_splines_1d_many_der3(spl, x, y_batch, dy_batch, d2y_ba end do end subroutine evaluate_batch_splines_1d_many_der3 + subroutine evaluate_batch_splines_1d_many_der3_mask(spl, x, mask, y_batch, dy_batch, & + d2y_batch, d3y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + logical, intent(in) :: mask(:) + real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :), d3y_batch(:, :) + + integer :: ipt, iq, k, idx, npts, nq, N + real(dp) :: xj, x_norm, x_local, x_min, h_step, period + + npts = size(x) + if (size(mask) /= npts) then + error stop "evaluate_batch_splines_1d_many_der3_mask: mask size mismatch" + end if + + nq = spl%num_quantities + N = spl%order + x_min = spl%x_min + h_step = spl%h_step + period = h_step*real(spl%num_points - 1, dp) + + do ipt = 1, npts + if (.not. mask(ipt)) cycle + + if (spl%periodic) then + xj = modulo(x(ipt) - x_min, period) + x_min + else + xj = x(ipt) + end if + x_norm = (xj - x_min)/h_step + idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 + x_local = (x_norm - real(idx - 1, dp))*h_step + + do iq = 1, nq + y_batch(iq, ipt) = spl%coeff(iq, N, idx) + dy_batch(iq, ipt) = N*spl%coeff(iq, N, idx) + d2y_batch(iq, ipt) = N*(N - 1)*spl%coeff(iq, N, idx) + d3y_batch(iq, ipt) = N*(N - 1)*(N - 2)*spl%coeff(iq, N, idx) + end do + + do k = N - 1, 3, -1 + do iq = 1, nq + d3y_batch(iq, ipt) = k*(k - 1)*(k - 2)*spl%coeff(iq, k, idx) + & + x_local*d3y_batch(iq, ipt) + end do + end do + + do k = N - 1, 2, -1 + do iq = 1, nq + d2y_batch(iq, ipt) = k*(k - 1)*spl%coeff(iq, k, idx) + & + x_local*d2y_batch(iq, ipt) + end do + end do + + do k = N - 1, 1, -1 + do iq = 1, nq + dy_batch(iq, ipt) = k*spl%coeff(iq, k, idx) + & + x_local*dy_batch(iq, ipt) + end do + end do + + do k = N - 1, 0, -1 + do iq = 1, nq + y_batch(iq, ipt) = spl%coeff(iq, k, idx) + & + x_local*y_batch(iq, ipt) + end do + end do + end do + end subroutine evaluate_batch_splines_1d_many_der3_mask + subroutine evaluate_batch_splines_1d_der(spl, x, y_batch, dy_batch) type(BatchSplineData1D), intent(in) :: spl real(dp), intent(in) :: x diff --git a/src/interpolate/batch_interpolate_3d.f90 b/src/interpolate/batch_interpolate_3d.f90 index d35707f3..2566f3f3 100644 --- a/src/interpolate/batch_interpolate_3d.f90 +++ b/src/interpolate/batch_interpolate_3d.f90 @@ -38,7 +38,7 @@ module batch_interpolate_3d public :: evaluate_batch_splines_3d_many_resident public :: evaluate_batch_splines_3d_many_der public :: evaluate_batch_splines_3d_many_der2 - + public :: evaluate_batch_splines_3d_many_der2_mask contains subroutine construct_batch_splines_3d(x_min, x_max, y_batch, order, periodic, spl) @@ -1064,6 +1064,28 @@ subroutine evaluate_batch_splines_3d_many_der2(spl, x, y_batch, dy_batch, d2y_ba end do end subroutine evaluate_batch_splines_3d_many_der2 + subroutine evaluate_batch_splines_3d_many_der2_mask(spl, x, mask, y_batch, dy_batch, d2y_batch) + type(BatchSplineData3D), intent(in) :: spl + real(dp), intent(in) :: x(:,:) ! (3, npts) + logical, intent(in) :: mask(:) + real(dp), intent(inout) :: y_batch(:,:) ! (nq, npts) + real(dp), intent(inout) :: dy_batch(:,:,:) ! (3, nq, npts) + real(dp), intent(inout) :: d2y_batch(:,:,:) ! (6, nq, npts) + + integer :: ipt, npts + + npts = size(x, 2) + if (size(mask) /= npts) then + error stop "evaluate_batch_splines_3d_many_der2_mask: mask size mismatch" + end if + + do ipt = 1, npts + if (.not. mask(ipt)) cycle + call evaluate_batch_splines_3d_der2_core(spl, x(:, ipt), & + y_batch(:, ipt), dy_batch(:, :, ipt), d2y_batch(:, :, ipt)) + end do + end subroutine evaluate_batch_splines_3d_many_der2_mask + subroutine evaluate_batch_splines_3d_der(spl, x, y_batch, dy_batch) type(BatchSplineData3D), intent(in) :: spl real(dp), intent(in) :: x(3) diff --git a/src/interpolate/interpolate.f90 b/src/interpolate/interpolate.f90 index 309b2d90..5f4404be 100644 --- a/src/interpolate/interpolate.f90 +++ b/src/interpolate/interpolate.f90 @@ -20,6 +20,7 @@ module interpolate evaluate_batch_splines_1d_many_der, & evaluate_batch_splines_1d_many_der2, & evaluate_batch_splines_1d_many_der3, & + evaluate_batch_splines_1d_many_der3_mask, & evaluate_batch_splines_1d_der, & evaluate_batch_splines_1d_der2, & evaluate_batch_splines_1d_der3 @@ -34,7 +35,8 @@ module interpolate evaluate_batch_splines_3d_many, & evaluate_batch_splines_3d_many_resident, & evaluate_batch_splines_3d_many_der, & - evaluate_batch_splines_3d_many_der2 + evaluate_batch_splines_3d_many_der2, & + evaluate_batch_splines_3d_many_der2_mask implicit none @@ -53,6 +55,7 @@ module interpolate public :: evaluate_batch_splines_1d_many, evaluate_batch_splines_1d_many_resident public :: evaluate_batch_splines_1d_many_der public :: evaluate_batch_splines_1d_many_der2, evaluate_batch_splines_1d_many_der3 + public :: evaluate_batch_splines_1d_many_der3_mask public :: evaluate_batch_splines_1d_der, evaluate_batch_splines_1d_der2 public :: evaluate_batch_splines_1d_der3 public :: evaluate_batch_splines_2d, evaluate_batch_splines_2d_der @@ -61,6 +64,7 @@ module interpolate evaluate_batch_splines_3d_der2, evaluate_batch_splines_3d_der2_rmix public :: evaluate_batch_splines_3d_many, evaluate_batch_splines_3d_many_resident public :: evaluate_batch_splines_3d_many_der, evaluate_batch_splines_3d_many_der2 + public :: evaluate_batch_splines_3d_many_der2_mask ! Single-quantity non-batch spline types and routines public :: SplineData1D, SplineData2D, SplineData3D diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 32d82149..5612fa64 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -159,6 +159,9 @@ target_link_libraries(test_batch_interpolate_rmix.x ${COMMON_LIBS}) add_executable(test_batch_interpolate_oracle.x source/test_batch_interpolate_oracle.f90) target_link_libraries(test_batch_interpolate_oracle.x ${COMMON_LIBS}) +add_executable(test_batch_interpolate_mask.x source/test_batch_interpolate_mask.f90) +target_link_libraries(test_batch_interpolate_mask.x ${COMMON_LIBS}) + add_executable(test_batch_eval_oracle.x source/test_batch_eval_oracle.f90 source/spline_eval_reference.f90) @@ -218,6 +221,7 @@ add_test(NAME test_batch_interpolate COMMAND test_batch_interpolate.x) add_test(NAME test_batch_interpolate_many COMMAND test_batch_interpolate_many.x) add_test(NAME test_batch_interpolate_rmix COMMAND test_batch_interpolate_rmix.x) add_test(NAME test_batch_interpolate_oracle COMMAND test_batch_interpolate_oracle.x) +add_test(NAME test_batch_interpolate_mask COMMAND test_batch_interpolate_mask.x) add_test(NAME test_batch_eval_oracle COMMAND test_batch_eval_oracle.x) add_test(NAME test_spline_performance COMMAND test_spline_performance.x) add_test(NAME test_collision_freqs COMMAND test_collision_freqs.x) diff --git a/test/source/test_batch_interpolate_mask.f90 b/test/source/test_batch_interpolate_mask.f90 new file mode 100644 index 00000000..142207c3 --- /dev/null +++ b/test/source/test_batch_interpolate_mask.f90 @@ -0,0 +1,182 @@ +program test_batch_interpolate_mask + use, intrinsic :: iso_fortran_env, only: dp => real64 + use batch_interpolate, only: BatchSplineData1D, BatchSplineData3D + use batch_interpolate, only: construct_batch_splines_1d, destroy_batch_splines_1d + use batch_interpolate, only: construct_batch_splines_3d, destroy_batch_splines_3d + use batch_interpolate, only: evaluate_batch_splines_1d_many_der3 + use batch_interpolate, only: evaluate_batch_splines_3d_many_der2 + use batch_interpolate, only: evaluate_batch_splines_1d_many_der3_mask + use batch_interpolate, only: evaluate_batch_splines_3d_many_der2_mask + implicit none + + call test_1d_der3_mask() + call test_3d_der2_mask() + +contains + + subroutine assert_close(name, a, b, tol) + character(len=*), intent(in) :: name + real(dp), intent(in) :: a(:) + real(dp), intent(in) :: b(:) + real(dp), intent(in) :: tol + real(dp) :: diff + + diff = maxval(abs(a - b)) + if (diff > tol) then + write (*, *) "FAIL", trim(name), "diff", diff, "tol", tol + error stop 1 + end if + end subroutine assert_close + + + subroutine assert_unchanged(name, a, sentinel) + character(len=*), intent(in) :: name + real(dp), intent(in) :: a(:) + real(dp), intent(in) :: sentinel + + if (any(a /= sentinel)) then + write (*, *) "FAIL", trim(name), "unexpected overwrite" + error stop 1 + end if + end subroutine assert_unchanged + + + subroutine test_1d_der3_mask() + integer, parameter :: order = 5 + integer, parameter :: ngrid = 32 + integer, parameter :: nq = 3 + integer, parameter :: npts = 17 + logical, parameter :: periodic = .true. + real(dp), parameter :: x_min = 0.0d0 + real(dp), parameter :: x_max = 2.0d0 + real(dp), parameter :: tol = 1.0d-12 + real(dp), parameter :: sentinel = -999.0d0 + + type(BatchSplineData1D) :: spl + real(dp) :: x_grid(ngrid) + real(dp) :: y_grid(ngrid, nq) + real(dp) :: x_eval(npts) + logical :: mask(npts) + + real(dp) :: y_full(nq, npts), dy_full(nq, npts), d2_full(nq, npts), d3_full(nq, npts) + real(dp) :: y_mask(nq, npts), dy_mask(nq, npts), d2_mask(nq, npts), d3_mask(nq, npts) + integer :: i, iq + + do i = 1, ngrid + x_grid(i) = x_min + (x_max - x_min) * real(i - 1, dp) / real(ngrid - 1, dp) + end do + do iq = 1, nq + do i = 1, ngrid + y_grid(i, iq) = cos(x_grid(i) + 0.1d0 * real(iq - 1, dp)) + end do + end do + call construct_batch_splines_1d(x_min, x_max, y_grid, order, periodic, spl) + + do i = 1, npts + x_eval(i) = x_min + 0.73d0 * real(i - 1, dp) + mask(i) = mod(i, 2) == 1 + end do + + call evaluate_batch_splines_1d_many_der3(spl, x_eval, y_full, dy_full, d2_full, d3_full) + + y_mask = sentinel + dy_mask = sentinel + d2_mask = sentinel + d3_mask = sentinel + call evaluate_batch_splines_1d_many_der3_mask(spl, x_eval, mask, y_mask, dy_mask, & + d2_mask, d3_mask) + + do i = 1, npts + if (mask(i)) then + call assert_close("1d_y", y_full(:, i), y_mask(:, i), tol) + call assert_close("1d_dy", dy_full(:, i), dy_mask(:, i), tol) + call assert_close("1d_d2", d2_full(:, i), d2_mask(:, i), tol) + call assert_close("1d_d3", d3_full(:, i), d3_mask(:, i), tol) + else + call assert_unchanged("1d_y_sentinel", y_mask(:, i), sentinel) + call assert_unchanged("1d_dy_sentinel", dy_mask(:, i), sentinel) + call assert_unchanged("1d_d2_sentinel", d2_mask(:, i), sentinel) + call assert_unchanged("1d_d3_sentinel", d3_mask(:, i), sentinel) + end if + end do + + call destroy_batch_splines_1d(spl) + end subroutine test_1d_der3_mask + + + subroutine test_3d_der2_mask() + integer, parameter :: order(3) = [5, 5, 5] + integer, parameter :: ngrid(3) = [16, 12, 10] + integer, parameter :: nq = 2 + integer, parameter :: npts = 19 + logical, parameter :: periodic(3) = [.false., .true., .true.] + real(dp), parameter :: x_min(3) = [0.0d0, 0.0d0, 0.0d0] + real(dp), parameter :: x_max(3) = [1.0d0, 2.0d0, 3.0d0] + real(dp), parameter :: tol = 1.0d-12 + real(dp), parameter :: sentinel = -999.0d0 + + type(BatchSplineData3D) :: spl + real(dp) :: y_grid(ngrid(1), ngrid(2), ngrid(3), nq) + real(dp) :: x_eval(3, npts) + logical :: mask(npts) + + real(dp) :: y_full(nq, npts), y_mask(nq, npts) + real(dp) :: dy_full(3, nq, npts), dy_mask(3, nq, npts) + real(dp) :: d2_full(6, nq, npts), d2_mask(6, nq, npts) + integer :: i1, i2, i3, iq, ip + + do iq = 1, nq + do i3 = 1, ngrid(3) + do i2 = 1, ngrid(2) + do i1 = 1, ngrid(1) + y_grid(i1, i2, i3, iq) = & + cos(x_min(1) + real(i1 - 1, dp) * (x_max(1) - x_min(1)) / & + real(ngrid(1) - 1, dp)) * & + cos(x_min(2) + real(i2 - 1, dp) * (x_max(2) - x_min(2)) / & + real(ngrid(2) - 1, dp)) * & + cos(x_min(3) + real(i3 - 1, dp) * (x_max(3) - x_min(3)) / & + real(ngrid(3) - 1, dp)) * & + (1.0d0 + 0.07d0 * real(iq - 1, dp)) + end do + end do + end do + end do + + call construct_batch_splines_3d(x_min, x_max, y_grid, order, periodic, spl) + + do ip = 1, npts + x_eval(1, ip) = x_min(1) + (x_max(1) - x_min(1)) * real(ip - 1, dp) / real(npts, dp) + x_eval(2, ip) = x_min(2) + (x_max(2) - x_min(2)) * real(2 * ip - 1, dp) / & + real(2 * npts, dp) + x_eval(3, ip) = x_min(3) + (x_max(3) - x_min(3)) * real(3 * ip - 1, dp) / & + real(3 * npts, dp) + mask(ip) = mod(ip, 3) /= 0 + end do + + call evaluate_batch_splines_3d_many_der2(spl, x_eval, y_full, dy_full, d2_full) + + y_mask = sentinel + dy_mask = sentinel + d2_mask = sentinel + call evaluate_batch_splines_3d_many_der2_mask(spl, x_eval, mask, y_mask, dy_mask, d2_mask) + + do ip = 1, npts + if (mask(ip)) then + call assert_close("3d_y", y_full(:, ip), y_mask(:, ip), tol) + call assert_close("3d_dy", reshape(dy_full(:, :, ip), [3 * nq]), & + reshape(dy_mask(:, :, ip), [3 * nq]), tol) + call assert_close("3d_d2", reshape(d2_full(:, :, ip), [6 * nq]), & + reshape(d2_mask(:, :, ip), [6 * nq]), tol) + else + call assert_unchanged("3d_y_sentinel", y_mask(:, ip), sentinel) + call assert_unchanged("3d_dy_sentinel", reshape(dy_mask(:, :, ip), [3 * nq]), & + sentinel) + call assert_unchanged("3d_d2_sentinel", reshape(d2_mask(:, :, ip), [6 * nq]), & + sentinel) + end if + end do + + call destroy_batch_splines_3d(spl) + end subroutine test_3d_der2_mask + +end program test_batch_interpolate_mask From c064cae9f54c95e2dd2e1242a57284ccda97b7ee Mon Sep 17 00:00:00 2001 From: Christopher Albert Date: Fri, 26 Dec 2025 19:33:07 +0100 Subject: [PATCH 2/3] interpolate: add masked 1d der2 eval --- src/interpolate/batch_interpolate.f90 | 2 + src/interpolate/batch_interpolate_1d.f90 | 62 +++++++++++++++++++++ src/interpolate/interpolate.f90 | 2 + test/source/test_batch_interpolate_mask.f90 | 18 ++++++ 4 files changed, 84 insertions(+) diff --git a/src/interpolate/batch_interpolate.f90 b/src/interpolate/batch_interpolate.f90 index 72fb928f..bc550033 100644 --- a/src/interpolate/batch_interpolate.f90 +++ b/src/interpolate/batch_interpolate.f90 @@ -16,6 +16,7 @@ module batch_interpolate evaluate_batch_splines_1d_many_der, & evaluate_batch_splines_1d_many_der2, & evaluate_batch_splines_1d_many_der3, & + evaluate_batch_splines_1d_many_der2_mask, & evaluate_batch_splines_1d_many_der3_mask, & evaluate_batch_splines_1d_der, & evaluate_batch_splines_1d_der2, & @@ -58,6 +59,7 @@ module batch_interpolate public :: evaluate_batch_splines_1d_many, evaluate_batch_splines_1d_many_resident public :: evaluate_batch_splines_1d_many_der public :: evaluate_batch_splines_1d_many_der2, evaluate_batch_splines_1d_many_der3 + public :: evaluate_batch_splines_1d_many_der2_mask public :: evaluate_batch_splines_1d_many_der3_mask public :: evaluate_batch_splines_1d_der, evaluate_batch_splines_1d_der2 public :: evaluate_batch_splines_1d_der3 diff --git a/src/interpolate/batch_interpolate_1d.f90 b/src/interpolate/batch_interpolate_1d.f90 index edb370e4..50803d69 100644 --- a/src/interpolate/batch_interpolate_1d.f90 +++ b/src/interpolate/batch_interpolate_1d.f90 @@ -34,6 +34,7 @@ module batch_interpolate_1d public :: evaluate_batch_splines_1d_many_der public :: evaluate_batch_splines_1d_many_der2 public :: evaluate_batch_splines_1d_many_der3 + public :: evaluate_batch_splines_1d_many_der2_mask public :: evaluate_batch_splines_1d_many_der3_mask contains @@ -646,6 +647,67 @@ subroutine evaluate_batch_splines_1d_many_der2(spl, x, y_batch, dy_batch, d2y_ba end do end subroutine evaluate_batch_splines_1d_many_der2 + subroutine evaluate_batch_splines_1d_many_der2_mask(spl, x, mask, y_batch, dy_batch, d2y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + logical, intent(in) :: mask(:) + real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :) + + integer :: ipt, iq, k, idx, npts, nq, N + real(dp) :: xj, x_norm, x_local, x_min, h_step, period + + npts = size(x) + if (size(mask) /= npts) then + error stop "evaluate_batch_splines_1d_many_der2_mask: mask size mismatch" + end if + + nq = spl%num_quantities + N = spl%order + x_min = spl%x_min + h_step = spl%h_step + period = h_step*real(spl%num_points - 1, dp) + + do ipt = 1, npts + if (.not. mask(ipt)) cycle + + if (spl%periodic) then + xj = modulo(x(ipt) - x_min, period) + x_min + else + xj = x(ipt) + end if + x_norm = (xj - x_min)/h_step + idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 + x_local = (x_norm - real(idx - 1, dp))*h_step + + do iq = 1, nq + y_batch(iq, ipt) = spl%coeff(iq, N, idx) + dy_batch(iq, ipt) = N*spl%coeff(iq, N, idx) + d2y_batch(iq, ipt) = N*(N - 1)*spl%coeff(iq, N, idx) + end do + + do k = N - 1, 2, -1 + do iq = 1, nq + d2y_batch(iq, ipt) = k*(k - 1)*spl%coeff(iq, k, idx) + & + x_local*d2y_batch(iq, ipt) + end do + end do + + do k = N - 1, 1, -1 + do iq = 1, nq + dy_batch(iq, ipt) = k*spl%coeff(iq, k, idx) + & + x_local*dy_batch(iq, ipt) + end do + end do + + do k = N - 1, 0, -1 + do iq = 1, nq + y_batch(iq, ipt) = spl%coeff(iq, k, idx) + & + x_local*y_batch(iq, ipt) + end do + end do + end do + end subroutine evaluate_batch_splines_1d_many_der2_mask + subroutine evaluate_batch_splines_1d_many_der3(spl, x, y_batch, dy_batch, d2y_batch, & d3y_batch) type(BatchSplineData1D), intent(in) :: spl diff --git a/src/interpolate/interpolate.f90 b/src/interpolate/interpolate.f90 index 5f4404be..1df385b0 100644 --- a/src/interpolate/interpolate.f90 +++ b/src/interpolate/interpolate.f90 @@ -20,6 +20,7 @@ module interpolate evaluate_batch_splines_1d_many_der, & evaluate_batch_splines_1d_many_der2, & evaluate_batch_splines_1d_many_der3, & + evaluate_batch_splines_1d_many_der2_mask, & evaluate_batch_splines_1d_many_der3_mask, & evaluate_batch_splines_1d_der, & evaluate_batch_splines_1d_der2, & @@ -55,6 +56,7 @@ module interpolate public :: evaluate_batch_splines_1d_many, evaluate_batch_splines_1d_many_resident public :: evaluate_batch_splines_1d_many_der public :: evaluate_batch_splines_1d_many_der2, evaluate_batch_splines_1d_many_der3 + public :: evaluate_batch_splines_1d_many_der2_mask public :: evaluate_batch_splines_1d_many_der3_mask public :: evaluate_batch_splines_1d_der, evaluate_batch_splines_1d_der2 public :: evaluate_batch_splines_1d_der3 diff --git a/test/source/test_batch_interpolate_mask.f90 b/test/source/test_batch_interpolate_mask.f90 index 142207c3..24256da3 100644 --- a/test/source/test_batch_interpolate_mask.f90 +++ b/test/source/test_batch_interpolate_mask.f90 @@ -4,7 +4,9 @@ program test_batch_interpolate_mask use batch_interpolate, only: construct_batch_splines_1d, destroy_batch_splines_1d use batch_interpolate, only: construct_batch_splines_3d, destroy_batch_splines_3d use batch_interpolate, only: evaluate_batch_splines_1d_many_der3 + use batch_interpolate, only: evaluate_batch_splines_1d_many_der2 use batch_interpolate, only: evaluate_batch_splines_3d_many_der2 + use batch_interpolate, only: evaluate_batch_splines_1d_many_der2_mask use batch_interpolate, only: evaluate_batch_splines_1d_many_der3_mask use batch_interpolate, only: evaluate_batch_splines_3d_many_der2_mask implicit none @@ -60,6 +62,8 @@ subroutine test_1d_der3_mask() real(dp) :: y_full(nq, npts), dy_full(nq, npts), d2_full(nq, npts), d3_full(nq, npts) real(dp) :: y_mask(nq, npts), dy_mask(nq, npts), d2_mask(nq, npts), d3_mask(nq, npts) + real(dp) :: y2_full(nq, npts), dy2_full(nq, npts), d22_full(nq, npts) + real(dp) :: y2_mask(nq, npts), dy2_mask(nq, npts), d22_mask(nq, npts) integer :: i, iq do i = 1, ngrid @@ -79,6 +83,14 @@ subroutine test_1d_der3_mask() call evaluate_batch_splines_1d_many_der3(spl, x_eval, y_full, dy_full, d2_full, d3_full) + call evaluate_batch_splines_1d_many_der2(spl, x_eval, y2_full, dy2_full, d22_full) + + y2_mask = sentinel + dy2_mask = sentinel + d22_mask = sentinel + call evaluate_batch_splines_1d_many_der2_mask(spl, x_eval, mask, y2_mask, dy2_mask, & + d22_mask) + y_mask = sentinel dy_mask = sentinel d2_mask = sentinel @@ -88,11 +100,17 @@ subroutine test_1d_der3_mask() do i = 1, npts if (mask(i)) then + call assert_close("1d_y2", y2_full(:, i), y2_mask(:, i), tol) + call assert_close("1d_dy2", dy2_full(:, i), dy2_mask(:, i), tol) + call assert_close("1d_d22", d22_full(:, i), d22_mask(:, i), tol) call assert_close("1d_y", y_full(:, i), y_mask(:, i), tol) call assert_close("1d_dy", dy_full(:, i), dy_mask(:, i), tol) call assert_close("1d_d2", d2_full(:, i), d2_mask(:, i), tol) call assert_close("1d_d3", d3_full(:, i), d3_mask(:, i), tol) else + call assert_unchanged("1d_y2_sentinel", y2_mask(:, i), sentinel) + call assert_unchanged("1d_dy2_sentinel", dy2_mask(:, i), sentinel) + call assert_unchanged("1d_d22_sentinel", d22_mask(:, i), sentinel) call assert_unchanged("1d_y_sentinel", y_mask(:, i), sentinel) call assert_unchanged("1d_dy_sentinel", dy_mask(:, i), sentinel) call assert_unchanged("1d_d2_sentinel", d2_mask(:, i), sentinel) From 8d49ccb6d0e3c36b3e26cf02c2dfc570b98319a1 Mon Sep 17 00:00:00 2001 From: Christopher Albert Date: Fri, 26 Dec 2025 20:50:46 +0100 Subject: [PATCH 3/3] interpolate: add GPU-style masked batch evaluation Add masked batch spline evaluation for 2D, plus OpenACC resident kernels for 1D der2/der3 and 3D der2, using unified per-point cores. Also export MAX_BATCH_* constants and extend mask tests to cover 2D. --- src/interpolate/batch_interpolate.f90 | 13 +- src/interpolate/batch_interpolate_1d.f90 | 1801 +++++++++---------- src/interpolate/batch_interpolate_types.f90 | 117 +- src/interpolate/interpolate.f90 | 132 +- test/source/test_batch_interpolate_mask.f90 | 142 +- 5 files changed, 1144 insertions(+), 1061 deletions(-) diff --git a/src/interpolate/batch_interpolate.f90 b/src/interpolate/batch_interpolate.f90 index bc550033..38f5c83a 100644 --- a/src/interpolate/batch_interpolate.f90 +++ b/src/interpolate/batch_interpolate.f90 @@ -29,7 +29,10 @@ module batch_interpolate evaluate_batch_splines_2d, & evaluate_batch_splines_2d_der, & evaluate_batch_splines_2d_many, & - evaluate_batch_splines_2d_many_resident + evaluate_batch_splines_2d_many_mask, & + evaluate_batch_splines_2d_many_resident, & + evaluate_batch_splines_2d_many_der, & + evaluate_batch_splines_2d_many_der_mask use batch_interpolate_3d, only: construct_batch_splines_3d, & construct_batch_splines_3d_lines, & construct_batch_splines_3d_resident, & @@ -38,7 +41,6 @@ module batch_interpolate evaluate_batch_splines_3d, & evaluate_batch_splines_3d_der, & evaluate_batch_splines_3d_der2, & - evaluate_batch_splines_3d_der2_rmix, & evaluate_batch_splines_3d_many, & evaluate_batch_splines_3d_many_resident, & evaluate_batch_splines_3d_many_der, & @@ -70,7 +72,10 @@ module batch_interpolate public :: construct_batch_splines_2d_resident public :: construct_batch_splines_2d_resident_device public :: evaluate_batch_splines_2d, evaluate_batch_splines_2d_der - public :: evaluate_batch_splines_2d_many, evaluate_batch_splines_2d_many_resident + public :: evaluate_batch_splines_2d_many, evaluate_batch_splines_2d_many_mask + public :: evaluate_batch_splines_2d_many_resident + public :: evaluate_batch_splines_2d_many_der, & + evaluate_batch_splines_2d_many_der_mask ! Re-export 3D routines from batch_interpolate_3d public :: construct_batch_splines_3d, destroy_batch_splines_3d @@ -79,8 +84,8 @@ module batch_interpolate public :: construct_batch_splines_3d_resident_device public :: evaluate_batch_splines_3d, evaluate_batch_splines_3d_der public :: evaluate_batch_splines_3d_der2 - public :: evaluate_batch_splines_3d_der2_rmix public :: evaluate_batch_splines_3d_many, evaluate_batch_splines_3d_many_resident public :: evaluate_batch_splines_3d_many_der, evaluate_batch_splines_3d_many_der2 public :: evaluate_batch_splines_3d_many_der2_mask + end module batch_interpolate diff --git a/src/interpolate/batch_interpolate_1d.f90 b/src/interpolate/batch_interpolate_1d.f90 index 50803d69..030bf170 100644 --- a/src/interpolate/batch_interpolate_1d.f90 +++ b/src/interpolate/batch_interpolate_1d.f90 @@ -1,978 +1,971 @@ module batch_interpolate_1d - use, intrinsic :: iso_fortran_env, only: dp => real64 - use batch_interpolate_types, only: BatchSplineData1D - use spline_build_lines, only: spl_three_per_line, spl_three_reg_line, & - spl_four_per_line, spl_four_reg_line, & - spl_five_per_line, spl_five_reg_line - use spl_three_to_five_sub, only: spl_per, spl_reg + use, intrinsic :: iso_fortran_env, only: dp => real64 + use batch_interpolate_types, only: BatchSplineData1D + use spline_build_lines, only: spl_three_per_line, spl_three_reg_line, & + spl_four_per_line, spl_four_reg_line, & + spl_five_per_line, spl_five_reg_line + use spl_three_to_five_sub, only: spl_per, spl_reg #ifdef _OPENACC - use openacc, only: acc_is_present + use openacc, only: acc_is_present #endif - implicit none - private + implicit none + private - ! Export batch spline construction/destruction routines - public :: construct_batch_splines_1d - public :: construct_batch_splines_1d_lines - public :: construct_batch_splines_1d_resident - public :: construct_batch_splines_1d_resident_device - public :: destroy_batch_splines_1d + ! Export batch spline construction/destruction routines + public :: construct_batch_splines_1d + public :: construct_batch_splines_1d_lines + public :: construct_batch_splines_1d_resident + public :: construct_batch_splines_1d_resident_device + public :: destroy_batch_splines_1d #ifdef LIBNEO_ENABLE_SPLINE_ORACLE - public :: construct_batch_splines_1d_legacy + public :: construct_batch_splines_1d_legacy #endif - ! Export batch spline evaluation routines - public :: evaluate_batch_splines_1d - public :: evaluate_batch_splines_1d_single - public :: evaluate_batch_splines_1d_many - public :: evaluate_batch_splines_1d_many_resident - public :: evaluate_batch_splines_1d_der - public :: evaluate_batch_splines_1d_der2 - public :: evaluate_batch_splines_1d_der3 - public :: evaluate_batch_splines_1d_many_der - public :: evaluate_batch_splines_1d_many_der2 - public :: evaluate_batch_splines_1d_many_der3 - public :: evaluate_batch_splines_1d_many_der2_mask - public :: evaluate_batch_splines_1d_many_der3_mask + ! Export batch spline evaluation routines + public :: evaluate_batch_splines_1d + public :: evaluate_batch_splines_1d_single + public :: evaluate_batch_splines_1d_many + public :: evaluate_batch_splines_1d_many_resident + public :: evaluate_batch_splines_1d_der + public :: evaluate_batch_splines_1d_der2 + public :: evaluate_batch_splines_1d_der3 + public :: evaluate_batch_splines_1d_many_der + public :: evaluate_batch_splines_1d_many_der2 + public :: evaluate_batch_splines_1d_many_der3 + public :: evaluate_batch_splines_1d_many_der2_mask + public :: evaluate_batch_splines_1d_many_der3_mask contains - subroutine construct_batch_splines_1d(x_min, x_max, y_batch, order, periodic, spl) - real(dp), intent(in) :: x_min, x_max - real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) - integer, intent(in) :: order - logical, intent(in) :: periodic - type(BatchSplineData1D), intent(out) :: spl + subroutine evaluate_batch_splines_1d_der2_core(spl, x, y, dy, d2y) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x + real(dp), intent(out) :: y(:), dy(:), d2y(:) - call construct_batch_splines_1d_legacy(x_min, x_max, y_batch, order, periodic, spl) + integer :: iq, k, idx, nq, N + real(dp) :: xj, x_norm, x_local, x_min, h_step, period #ifdef _OPENACC - ! Map only the allocatable component, not the whole derived type - ! This is compatible with both gfortran and nvfortran - !$acc enter data copyin(spl%coeff) + !$acc routine seq #endif - end subroutine construct_batch_splines_1d - - subroutine construct_batch_splines_1d_legacy(x_min, x_max, y_batch, order, periodic, spl) - real(dp), intent(in) :: x_min, x_max - real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) - integer, intent(in) :: order - logical, intent(in) :: periodic - type(BatchSplineData1D), intent(out) :: spl - - integer :: iq, n_points, n_quantities, istat - real(dp), dimension(:, :), allocatable :: splcoe_temp - - n_points = size(y_batch, 1) - n_quantities = size(y_batch, 2) - - ! Validate input - if (n_points < 2) then - error stop "construct_batch_splines_1d: Need at least 2 points" - end if - if (n_quantities < 1) then - error stop "construct_batch_splines_1d: Need at least 1 quantity" - end if - if (order < 3 .or. order > 5) then - error stop "construct_batch_splines_1d: Order must be between 3 and 5" - end if - - ! Store metadata - spl%order = order - spl%num_points = n_points - spl%periodic = periodic - spl%x_min = x_min - spl%h_step = (x_max - x_min)/dble(n_points - 1) - spl%inv_h_step = 1.0_dp/spl%h_step - spl%period = spl%h_step*real(n_points - 1, dp) - spl%num_quantities = n_quantities - - ! Allocate batch coefficients with cache-friendly layout - allocate (spl%coeff(n_quantities, 0:order, n_points), stat=istat) - if (istat /= 0) then - error stop "construct_batch_splines_1d: Allocation failed for coeff" - end if - - ! Temporary array for single spline construction - allocate (splcoe_temp(0:order, n_points), stat=istat) - if (istat /= 0) then - error stop "construct_batch_splines_1d: Allocation failed for splcoe_temp" - end if - - ! Process each quantity - do iq = 1, n_quantities - ! Set the data values - splcoe_temp(0, :) = y_batch(:, iq) - - ! Construct spline coefficients - if (periodic) then - call spl_per(order, n_points, spl%h_step, splcoe_temp) - else - call spl_reg(order, n_points, spl%h_step, splcoe_temp) - end if - - ! Store coefficients in batch array with quantities first - spl%coeff(iq, :, :) = splcoe_temp(:, :) - end do - - deallocate (splcoe_temp) - end subroutine construct_batch_splines_1d_legacy - - subroutine construct_batch_splines_1d_lines(x_min, x_max, y_batch, order, periodic, spl) - real(dp), intent(in) :: x_min, x_max - real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) - integer, intent(in) :: order - logical, intent(in) :: periodic - type(BatchSplineData1D), intent(out) :: spl - - integer :: iq, n_points, n_quantities, istat - real(dp), allocatable :: work(:, :, :) - - n_points = size(y_batch, 1) - n_quantities = size(y_batch, 2) - - if (n_points < 2) then - error stop "construct_batch_splines_1d_lines: Need at least 2 points" - end if - if (n_quantities < 1) then - error stop "construct_batch_splines_1d_lines: Need at least 1 quantity" - end if - if (order < 3 .or. order > 5) then - error stop "construct_batch_splines_1d_lines: Order must be between 3 and 5" - end if - if (order == 3) then - if (periodic .and. n_points < 3) then - error stop "construct_batch_splines_1d_lines: Need at least 3 points "// & - "for periodic order=3" - end if - else if (order == 4) then - if (n_points < 5) then - error stop "construct_batch_splines_1d_lines: Need at least 5 points for order=4" - end if - else - if (n_points < 6) then - error stop "construct_batch_splines_1d_lines: Need at least 6 points for order=5" - end if - end if - - spl%order = order - spl%num_points = n_points - spl%periodic = periodic - spl%x_min = x_min - spl%h_step = (x_max - x_min)/dble(n_points - 1) - spl%inv_h_step = 1.0_dp/spl%h_step - spl%period = spl%h_step*real(n_points - 1, dp) - spl%num_quantities = n_quantities - - allocate (spl%coeff(n_quantities, 0:order, n_points), stat=istat) - if (istat /= 0) then - error stop "construct_batch_splines_1d_lines: Allocation failed for coeff" - end if - - allocate (work(n_points, n_quantities, 0:order), stat=istat) - if (istat /= 0) then - error stop "construct_batch_splines_1d_lines: Allocation failed for work" - end if - - do iq = 1, n_quantities - work(:, iq, 0) = y_batch(:, iq) - end do + nq = spl%num_quantities + N = spl%order + x_min = spl%x_min + h_step = spl%h_step + period = h_step*real(spl%num_points - 1, dp) + + if (spl%periodic) then + xj = modulo(x - x_min, period) + x_min + else + xj = x + end if + x_norm = (xj - x_min)/h_step + idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 + x_local = (x_norm - real(idx - 1, dp))*h_step + + do iq = 1, nq + y(iq) = spl%coeff(iq, N, idx) + dy(iq) = real(N, dp)*spl%coeff(iq, N, idx) + d2y(iq) = real(N*(N - 1), dp)*spl%coeff(iq, N, idx) + end do + + do k = N - 1, 2, -1 + do iq = 1, nq + d2y(iq) = real(k*(k - 1), dp)*spl%coeff(iq, k, idx) + & + x_local*d2y(iq) + end do + end do -#ifdef _OPENMP -!$omp parallel do default(none) shared(work, n_points, n_quantities, order, periodic, & -!$omp& spl) private(iq) + do k = N - 1, 1, -1 + do iq = 1, nq + dy(iq) = real(k, dp)*spl%coeff(iq, k, idx) + x_local*dy(iq) + end do + end do + + do k = N - 1, 0, -1 + do iq = 1, nq + y(iq) = spl%coeff(iq, k, idx) + x_local*y(iq) + end do + end do + end subroutine evaluate_batch_splines_1d_der2_core + + subroutine evaluate_batch_splines_1d_der3_core(spl, x, y, dy, d2y, d3y) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x + real(dp), intent(out) :: y(:), dy(:), d2y(:), d3y(:) + + integer :: iq, k, idx, nq, N + real(dp) :: xj, x_norm, x_local, x_min, h_step, period + +#ifdef _OPENACC + !$acc routine seq +#endif + nq = spl%num_quantities + N = spl%order + x_min = spl%x_min + h_step = spl%h_step + period = h_step*real(spl%num_points - 1, dp) + + if (spl%periodic) then + xj = modulo(x - x_min, period) + x_min + else + xj = x + end if + x_norm = (xj - x_min)/h_step + idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 + x_local = (x_norm - real(idx - 1, dp))*h_step + + do iq = 1, nq + y(iq) = spl%coeff(iq, N, idx) + dy(iq) = real(N, dp)*spl%coeff(iq, N, idx) + d2y(iq) = real(N*(N - 1), dp)*spl%coeff(iq, N, idx) + d3y(iq) = real(N*(N - 1)*(N - 2), dp)*spl%coeff(iq, N, idx) + end do + + do k = N - 1, 3, -1 + do iq = 1, nq + d3y(iq) = real(k*(k - 1)*(k - 2), dp)*spl%coeff(iq, k, idx) + & + x_local*d3y(iq) + end do + end do + + do k = N - 1, 2, -1 + do iq = 1, nq + d2y(iq) = real(k*(k - 1), dp)*spl%coeff(iq, k, idx) + & + x_local*d2y(iq) + end do + end do + + do k = N - 1, 1, -1 + do iq = 1, nq + dy(iq) = real(k, dp)*spl%coeff(iq, k, idx) + x_local*dy(iq) + end do + end do + + do k = N - 1, 0, -1 + do iq = 1, nq + y(iq) = spl%coeff(iq, k, idx) + x_local*y(iq) + end do + end do + end subroutine evaluate_batch_splines_1d_der3_core + +#ifdef _OPENACC + subroutine evaluate_batch_splines_1d_many_der2_resident(spl, x, y_batch, dy_batch, & + d2y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :) + + integer :: ipt, npts + + npts = size(x) + !$acc parallel loop present(spl%coeff, x, y_batch, dy_batch, d2y_batch) + do ipt = 1, npts + call evaluate_batch_splines_1d_der2_core(spl, x(ipt), & + y_batch(:, ipt), dy_batch(:, & + ipt), d2y_batch(:, ipt)) + end do + !$acc end parallel loop + end subroutine evaluate_batch_splines_1d_many_der2_resident + + subroutine evaluate_batch_splines_1d_many_der2_mask_resident(spl, x, & + mask, y_batch, & + dy_batch, d2y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + logical, intent(in) :: mask(:) + real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :) + + integer :: ipt, npts + + npts = size(x) + !$acc parallel loop present(spl%coeff, x, mask, y_batch, dy_batch, d2y_batch) + do ipt = 1, npts + if (.not. mask(ipt)) cycle + call evaluate_batch_splines_1d_der2_core(spl, x(ipt), & + y_batch(:, ipt), dy_batch(:, & + ipt), d2y_batch(:, ipt)) + end do + !$acc end parallel loop + end subroutine evaluate_batch_splines_1d_many_der2_mask_resident + + subroutine evaluate_batch_splines_1d_many_der3_resident(spl, x, y_batch, dy_batch, & + d2y_batch, d3y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :), & + d3y_batch(:, :) + + integer :: ipt, npts + + npts = size(x) + !$acc parallel loop present(spl%coeff, x, y_batch, dy_batch, d2y_batch, & + !$acc& d3y_batch) + do ipt = 1, npts + call evaluate_batch_splines_1d_der3_core(spl, x(ipt), & + y_batch(:, ipt), dy_batch(:, & + ipt), d2y_batch(:, ipt), & + d3y_batch(:, ipt)) + end do + !$acc end parallel loop + end subroutine evaluate_batch_splines_1d_many_der3_resident + + subroutine evaluate_batch_splines_1d_many_der3_mask_resident(spl, x, & + mask, y_batch, & + dy_batch, d2y_batch, & + d3y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + logical, intent(in) :: mask(:) + real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :), & + d3y_batch(:, :) + + integer :: ipt, npts + + npts = size(x) + !$acc parallel loop present(spl%coeff, x, mask, y_batch, dy_batch, d2y_batch, & + !$acc& d3y_batch) + do ipt = 1, npts + if (.not. mask(ipt)) cycle + call evaluate_batch_splines_1d_der3_core(spl, x(ipt), & + y_batch(:, ipt), dy_batch(:, & + ipt), d2y_batch(:, ipt), & + d3y_batch(:, ipt)) + end do + !$acc end parallel loop + end subroutine evaluate_batch_splines_1d_many_der3_mask_resident #endif - do iq = 1, n_quantities - if (order == 3) then + + subroutine construct_batch_splines_1d(x_min, x_max, y_batch, order, periodic, spl) + real(dp), intent(in) :: x_min, x_max + real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) + integer, intent(in) :: order + logical, intent(in) :: periodic + type(BatchSplineData1D), intent(out) :: spl + + call construct_batch_splines_1d_legacy(x_min, x_max, y_batch, order, & + periodic, spl) + +#ifdef _OPENACC + ! Map only the allocatable component, not the whole derived type + ! This is compatible with both gfortran and nvfortran + !$acc enter data copyin(spl%coeff) +#endif + end subroutine construct_batch_splines_1d + + subroutine construct_batch_splines_1d_legacy(x_min, x_max, y_batch, order, & + periodic, spl) + real(dp), intent(in) :: x_min, x_max + real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) + integer, intent(in) :: order + logical, intent(in) :: periodic + type(BatchSplineData1D), intent(out) :: spl + + integer :: iq, n_points, n_quantities, istat + real(dp), dimension(:, :), allocatable :: splcoe_temp + + n_points = size(y_batch, 1) + n_quantities = size(y_batch, 2) + + ! Validate input + if (n_points < 2) then + error stop "construct_batch_splines_1d: Need at least 2 points" + end if + if (n_quantities < 1) then + error stop "construct_batch_splines_1d: Need at least 1 quantity" + end if + if (order < 3 .or. order > 5) then + error stop "construct_batch_splines_1d: Order must be between 3 and 5" + end if + + ! Store metadata + spl%order = order + spl%num_points = n_points + spl%periodic = periodic + spl%x_min = x_min + spl%h_step = (x_max - x_min)/dble(n_points - 1) + spl%num_quantities = n_quantities + + ! Allocate batch coefficients with cache-friendly layout + allocate (spl%coeff(n_quantities, 0:order, n_points), stat=istat) + if (istat /= 0) then + error stop "construct_batch_splines_1d: Allocation failed for coeff" + end if + + ! Temporary array for single spline construction + allocate (splcoe_temp(0:order, n_points), stat=istat) + if (istat /= 0) then + error stop "construct_batch_splines_1d: Allocation failed for splcoe_temp" + end if + + ! Process each quantity + do iq = 1, n_quantities + ! Set the data values + splcoe_temp(0, :) = y_batch(:, iq) + + ! Construct spline coefficients if (periodic) then - call spl_three_per_line(n_points, spl%h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3)) + call spl_per(order, n_points, spl%h_step, splcoe_temp) else - call spl_three_reg_line(n_points, spl%h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3)) + call spl_reg(order, n_points, spl%h_step, splcoe_temp) end if - else if (order == 4) then - if (periodic) then - call spl_four_per_line(n_points, spl%h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3), work(:, iq, 4)) - else - call spl_four_reg_line(n_points, spl%h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3), work(:, iq, 4)) + + ! Store coefficients in batch array with quantities first + spl%coeff(iq, :, :) = splcoe_temp(:, :) + end do + + deallocate (splcoe_temp) + end subroutine construct_batch_splines_1d_legacy + + subroutine construct_batch_splines_1d_lines(x_min, x_max, y_batch, order, & + periodic, spl) + real(dp), intent(in) :: x_min, x_max + real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) + integer, intent(in) :: order + logical, intent(in) :: periodic + type(BatchSplineData1D), intent(out) :: spl + + integer :: iq, n_points, n_quantities, istat + real(dp), allocatable :: work(:, :, :) + + n_points = size(y_batch, 1) + n_quantities = size(y_batch, 2) + + if (n_points < 2) then + error stop "construct_batch_splines_1d_lines: Need at least 2 points" + end if + if (n_quantities < 1) then + error stop "construct_batch_splines_1d_lines: Need at least 1 quantity" + end if + if (order < 3 .or. order > 5) then + error stop "construct_batch_splines_1d_lines: Order must be between 3 and 5" + end if + if (order == 3) then + if (periodic .and. n_points < 3) then + error stop & + "construct_batch_splines_1d_lines: Need at least 3 points "// & + "for periodic order=3" end if - else - if (periodic) then - call spl_five_per_line(n_points, spl%h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3), work(:, iq, 4), & - work(:, iq, 5)) + else if (order == 4) then + if (n_points < 5) then + error stop "construct_batch_splines_1d_lines: Need at least 5 points for order=4" + end if + else + if (n_points < 6) then + error stop "construct_batch_splines_1d_lines: Need at least 6 points for order=5" + end if + end if + + spl%order = order + spl%num_points = n_points + spl%periodic = periodic + spl%x_min = x_min + spl%h_step = (x_max - x_min)/dble(n_points - 1) + spl%num_quantities = n_quantities + + allocate (spl%coeff(n_quantities, 0:order, n_points), stat=istat) + if (istat /= 0) then + error stop "construct_batch_splines_1d_lines: Allocation failed for coeff" + end if + + allocate (work(n_points, n_quantities, 0:order), stat=istat) + if (istat /= 0) then + error stop "construct_batch_splines_1d_lines: Allocation failed for work" + end if + + do iq = 1, n_quantities + work(:, iq, 0) = y_batch(:, iq) + end do + +#ifdef _OPENMP +!$omp parallel do default(none) shared(work, n_points, n_quantities, order, periodic, & +!$omp& spl) private(iq) +#endif + do iq = 1, n_quantities + if (order == 3) then + if (periodic) then + call spl_three_per_line(n_points, spl%h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3)) + else + call spl_three_reg_line(n_points, spl%h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3)) + end if + else if (order == 4) then + if (periodic) then + call spl_four_per_line(n_points, spl%h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3), work(:, iq, 4)) + else + call spl_four_reg_line(n_points, spl%h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3), work(:, iq, 4)) + end if else - call spl_five_reg_line(n_points, spl%h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3), work(:, iq, 4), & - work(:, iq, 5)) + if (periodic) then + call spl_five_per_line(n_points, spl%h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3), work(:, iq, 4), & + work(:, iq, 5)) + else + call spl_five_reg_line(n_points, spl%h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3), work(:, iq, 4), & + work(:, iq, 5)) + end if end if - end if - end do + end do #ifdef _OPENMP !$omp end parallel do #endif - do iq = 1, n_quantities - spl%coeff(iq, :, :) = transpose(work(:, iq, :)) - end do - - deallocate (work) - end subroutine construct_batch_splines_1d_lines - - subroutine construct_batch_splines_1d_resident(x_min, x_max, y_batch, order, & - periodic, spl) - real(dp), intent(in) :: x_min, x_max - real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) - integer, intent(in) :: order - logical, intent(in) :: periodic - type(BatchSplineData1D), intent(out) :: spl - - call construct_batch_splines_1d(x_min, x_max, y_batch, order, periodic, spl) - end subroutine construct_batch_splines_1d_resident - - subroutine construct_batch_splines_1d_resident_device(x_min, x_max, y_batch, & - order, periodic, spl, & - update_host, & - assume_y_present) - real(dp), intent(in) :: x_min, x_max - real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) - integer, intent(in) :: order - logical, intent(in) :: periodic - type(BatchSplineData1D), intent(out) :: spl - logical, intent(in), optional :: update_host - logical, intent(in), optional :: assume_y_present - - integer :: n_points, n_quantities - logical :: do_update, do_assume_present - - do_update = .true. - if (present(update_host)) do_update = update_host - do_assume_present = .false. - if (present(assume_y_present)) do_assume_present = assume_y_present - - n_points = size(y_batch, 1) - n_quantities = size(y_batch, 2) + do iq = 1, n_quantities + spl%coeff(iq, :, :) = transpose(work(:, iq, :)) + end do + + deallocate (work) + end subroutine construct_batch_splines_1d_lines + + subroutine construct_batch_splines_1d_resident(x_min, x_max, y_batch, order, & + periodic, spl) + real(dp), intent(in) :: x_min, x_max + real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) + integer, intent(in) :: order + logical, intent(in) :: periodic + type(BatchSplineData1D), intent(out) :: spl + + call construct_batch_splines_1d(x_min, x_max, y_batch, order, periodic, spl) + end subroutine construct_batch_splines_1d_resident + + subroutine construct_batch_splines_1d_resident_device(x_min, x_max, y_batch, & + order, periodic, spl, & + update_host, & + assume_y_present) + real(dp), intent(in) :: x_min, x_max + real(dp), intent(in) :: y_batch(:, :) ! (n_points, n_quantities) + integer, intent(in) :: order + logical, intent(in) :: periodic + type(BatchSplineData1D), intent(out) :: spl + logical, intent(in), optional :: update_host + logical, intent(in), optional :: assume_y_present + + integer :: n_points, n_quantities + logical :: do_update, do_assume_present + + do_update = .true. + if (present(update_host)) do_update = update_host + do_assume_present = .false. + if (present(assume_y_present)) do_assume_present = assume_y_present + + n_points = size(y_batch, 1) + n_quantities = size(y_batch, 2) #ifdef _OPENACC - call construct_batch_splines_1d_resident_device_impl(x_min, x_max, n_points, & - n_quantities, y_batch, & - order, periodic, spl, & - do_update, do_assume_present) + call construct_batch_splines_1d_resident_device_impl(x_min, x_max, n_points, & + n_quantities, y_batch, & + order, periodic, spl, & + do_update, & + do_assume_present) #else - call construct_batch_splines_1d_resident(x_min, x_max, y_batch, order, & - periodic, spl) + call construct_batch_splines_1d_resident(x_min, x_max, y_batch, order, & + periodic, spl) #endif - end subroutine construct_batch_splines_1d_resident_device - - subroutine construct_batch_splines_1d_resident_device_impl(x_min, x_max, n_points, & - n_quantities, y_batch, & - order, periodic, spl, & - do_update, & - do_assume_present) - real(dp), intent(in) :: x_min, x_max - integer, intent(in) :: n_points, n_quantities - real(dp), intent(in) :: y_batch(n_points, n_quantities) - integer, intent(in) :: order - logical, intent(in) :: periodic - type(BatchSplineData1D), intent(out) :: spl - logical, intent(in) :: do_update - logical, intent(in) :: do_assume_present - - integer :: istat - integer :: ip, iq, k - real(dp), allocatable :: work(:, :, :) - real(dp) :: h_step - - if (n_points < 2) then - error stop "construct_batch_splines_1d_resident_device:"// & - " Need at least 2 points" - end if - if (n_quantities < 1) then - error stop "construct_batch_splines_1d_resident_device:"// & - " Need at least 1 quantity" - end if - if (order < 3 .or. order > 5) then - error stop "construct_batch_splines_1d_resident_device:"// & - " Order must be between 3 and 5" - end if - if (order == 3) then - if (periodic .and. n_points < 3) then + end subroutine construct_batch_splines_1d_resident_device + + subroutine construct_batch_splines_1d_resident_device_impl(x_min, x_max, n_points, & + n_quantities, y_batch, & + order, periodic, spl, & + do_update, & + do_assume_present) + real(dp), intent(in) :: x_min, x_max + integer, intent(in) :: n_points, n_quantities + real(dp), intent(in) :: y_batch(n_points, n_quantities) + integer, intent(in) :: order + logical, intent(in) :: periodic + type(BatchSplineData1D), intent(out) :: spl + logical, intent(in) :: do_update + logical, intent(in) :: do_assume_present + + integer :: istat + integer :: ip, iq, k + real(dp), allocatable :: work(:, :, :) + real(dp) :: h_step + + if (n_points < 2) then error stop "construct_batch_splines_1d_resident_device:"// & - " Need at least 3 points for periodic order=3" - end if - else if (order == 4) then - if (n_points < 5) then + " Need at least 2 points" + end if + if (n_quantities < 1) then error stop "construct_batch_splines_1d_resident_device:"// & - " Need at least 5 points for order=4" - end if - else - if (n_points < 6) then + " Need at least 1 quantity" + end if + if (order < 3 .or. order > 5) then error stop "construct_batch_splines_1d_resident_device:"// & - " Need at least 6 points for order=5" - end if - end if - - spl%order = order - spl%num_points = n_points - spl%periodic = periodic - spl%x_min = x_min - spl%h_step = (x_max - x_min)/dble(n_points - 1) - spl%inv_h_step = 1.0_dp/spl%h_step - spl%period = spl%h_step*real(n_points - 1, dp) - spl%num_quantities = n_quantities - - allocate (spl%coeff(n_quantities, 0:order, n_points), stat=istat) - if (istat /= 0) then - error stop "construct_batch_splines_1d_resident_device:"// & - " Allocation failed for coeff" - end if - - !$acc enter data create(spl%coeff) - - allocate (work(n_points, n_quantities, 0:order), stat=istat) - if (istat /= 0) then - error stop "construct_batch_splines_1d_resident_device:"// & - " Allocation failed for work" - end if - - h_step = spl%h_step + " Order must be between 3 and 5" + end if + if (order == 3) then + if (periodic .and. n_points < 3) then + error stop "construct_batch_splines_1d_resident_device:"// & + " Need at least 3 points for periodic order=3" + end if + else if (order == 4) then + if (n_points < 5) then + error stop "construct_batch_splines_1d_resident_device:"// & + " Need at least 5 points for order=4" + end if + else + if (n_points < 6) then + error stop "construct_batch_splines_1d_resident_device:"// & + " Need at least 6 points for order=5" + end if + end if -#ifdef _OPENACC - if (do_assume_present) then - if (.not. acc_is_present(y_batch)) then + spl%order = order + spl%num_points = n_points + spl%periodic = periodic + spl%x_min = x_min + spl%h_step = (x_max - x_min)/dble(n_points - 1) + spl%num_quantities = n_quantities + + allocate (spl%coeff(n_quantities, 0:order, n_points), stat=istat) + if (istat /= 0) then error stop "construct_batch_splines_1d_resident_device:"// & - " assume_y_present=T but y_batch is not present" - end if - end if -#endif - !$acc data present_or_copyin(y_batch(1:n_points, 1:n_quantities)) create(work) - !$acc parallel loop collapse(2) gang - do iq = 1, n_quantities - do ip = 1, n_points - work(ip, iq, 0) = y_batch(ip, iq) - end do - end do - - !$acc parallel loop gang - do iq = 1, n_quantities - if (order == 3) then - if (periodic) then - call spl_three_per_line(n_points, h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3)) - else - call spl_three_reg_line(n_points, h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3)) - end if - else if (order == 4) then - if (periodic) then - call spl_four_per_line(n_points, h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3), work(:, iq, 4)) - else - call spl_four_reg_line(n_points, h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3), work(:, iq, 4)) + " Allocation failed for coeff" + end if + + !$acc enter data create(spl%coeff) + + allocate (work(n_points, n_quantities, 0:order), stat=istat) + if (istat /= 0) then + error stop "construct_batch_splines_1d_resident_device:"// & + " Allocation failed for work" + end if + + h_step = spl%h_step + +#ifdef _OPENACC + if (do_assume_present) then + if (.not. acc_is_present(y_batch)) then + error stop "construct_batch_splines_1d_resident_device:"// & + " assume_y_present=T but y_batch is not present" end if - else - if (periodic) then - call spl_five_per_line(n_points, h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3), work(:, iq, 4), & - work(:, iq, 5)) + end if +#endif + !$acc data present_or_copyin(y_batch(1:n_points, 1:n_quantities)) create(work) + !$acc parallel loop collapse(2) gang + do iq = 1, n_quantities + do ip = 1, n_points + work(ip, iq, 0) = y_batch(ip, iq) + end do + end do + + !$acc parallel loop gang + do iq = 1, n_quantities + if (order == 3) then + if (periodic) then + call spl_three_per_line(n_points, h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3)) + else + call spl_three_reg_line(n_points, h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3)) + end if + else if (order == 4) then + if (periodic) then + call spl_four_per_line(n_points, h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3), work(:, iq, 4)) + else + call spl_four_reg_line(n_points, h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3), work(:, iq, 4)) + end if else - call spl_five_reg_line(n_points, h_step, work(:, iq, 0), & - work(:, iq, 1), work(:, iq, 2), & - work(:, iq, 3), work(:, iq, 4), & - work(:, iq, 5)) + if (periodic) then + call spl_five_per_line(n_points, h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3), work(:, iq, 4), & + work(:, iq, 5)) + else + call spl_five_reg_line(n_points, h_step, work(:, iq, 0), & + work(:, iq, 1), work(:, iq, 2), & + work(:, iq, 3), work(:, iq, 4), & + work(:, iq, 5)) + end if end if - end if - end do + end do - call store_coeff_1d(n_points, n_quantities, order, work, spl%coeff) - !$acc end data + call store_coeff_1d(n_points, n_quantities, order, work, spl%coeff) + !$acc end data - deallocate (work) - if (do_update) then - !$acc update self(spl%coeff(1:n_quantities, 0:order, 1:n_points)) - end if - end subroutine construct_batch_splines_1d_resident_device_impl + deallocate (work) + if (do_update) then + !$acc update self(spl%coeff(1:n_quantities, 0:order, 1:n_points)) + end if + end subroutine construct_batch_splines_1d_resident_device_impl - subroutine store_coeff_1d(n_points, n_quantities, order, work, coeff) - integer, intent(in) :: n_points, n_quantities, order - real(dp), intent(in) :: work(n_points, n_quantities, 0:order) - real(dp), intent(out) :: coeff(n_quantities, 0:order, n_points) + subroutine store_coeff_1d(n_points, n_quantities, order, work, coeff) + integer, intent(in) :: n_points, n_quantities, order + real(dp), intent(in) :: work(n_points, n_quantities, 0:order) + real(dp), intent(out) :: coeff(n_quantities, 0:order, n_points) - integer :: ip, iq, k + integer :: ip, iq, k #ifdef _OPENACC - !$acc parallel loop collapse(2) gang present(work, coeff) + !$acc parallel loop collapse(2) gang present(work, coeff) #endif - do ip = 1, n_points - do k = 0, order + do ip = 1, n_points + do k = 0, order #ifdef _OPENACC - !$acc loop vector + !$acc loop vector #endif - do iq = 1, n_quantities - coeff(iq, k, ip) = work(ip, iq, k) + do iq = 1, n_quantities + coeff(iq, k, ip) = work(ip, iq, k) + end do end do - end do - end do - end subroutine store_coeff_1d + end do + end subroutine store_coeff_1d - subroutine destroy_batch_splines_1d(spl) - type(BatchSplineData1D), intent(inout) :: spl + subroutine destroy_batch_splines_1d(spl) + type(BatchSplineData1D), intent(inout) :: spl #ifdef _OPENACC - if (allocated(spl%coeff)) then - if (acc_is_present(spl%coeff)) then - !$acc exit data delete(spl%coeff) - end if - end if + if (allocated(spl%coeff)) then + if (acc_is_present(spl%coeff)) then + !$acc exit data delete(spl%coeff) + end if + end if #endif - if (allocated(spl%coeff)) deallocate (spl%coeff) - end subroutine destroy_batch_splines_1d - - subroutine evaluate_batch_splines_1d(spl, x, y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x - real(dp), intent(out) :: y_batch(:) ! (n_quantities) - - real(dp) :: x_arr(1) - real(dp) :: y_arr(spl%num_quantities, 1) - - x_arr(1) = x - call evaluate_batch_splines_1d_many(spl, x_arr, y_arr) - y_batch(1:spl%num_quantities) = y_arr(:, 1) - end subroutine evaluate_batch_splines_1d - - subroutine evaluate_batch_splines_1d_single(spl, x, iq, y) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x - integer, intent(in) :: iq ! quantity index - real(dp), intent(out) :: y - - real(dp) :: y_all(spl%num_quantities) - - if (iq < 1 .or. iq > spl%num_quantities) then - error stop "evaluate_batch_splines_1d_single: Invalid quantity index" - end if - - call evaluate_batch_splines_1d(spl, x, y_all) - y = y_all(iq) - end subroutine evaluate_batch_splines_1d_single - - subroutine evaluate_batch_splines_1d_many(spl, x, y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x(:) - real(dp), intent(out) :: y_batch(:, :) ! (n_quantities, n_points) - - integer :: ipt, iq, k_power, idx, k_wrap - integer :: num_points, nq, order - logical :: periodic - real(dp) :: xj, x_norm, x_local, x_min, h_step, period, t, w - - if (size(y_batch, 1) < spl%num_quantities) then - error stop "evaluate_batch_splines_1d_many: First dimension too small" - end if - if (size(y_batch, 2) /= size(x)) then - error stop "evaluate_batch_splines_1d_many: y_batch second dim mismatch" - end if + if (allocated(spl%coeff)) deallocate (spl%coeff) + end subroutine destroy_batch_splines_1d + + subroutine evaluate_batch_splines_1d(spl, x, y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x + real(dp), intent(out) :: y_batch(:) ! (n_quantities) + + real(dp) :: x_arr(1) + real(dp) :: y_arr(spl%num_quantities, 1) + + x_arr(1) = x + call evaluate_batch_splines_1d_many(spl, x_arr, y_arr) + y_batch(1:spl%num_quantities) = y_arr(:, 1) + end subroutine evaluate_batch_splines_1d + + subroutine evaluate_batch_splines_1d_single(spl, x, iq, y) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x + integer, intent(in) :: iq ! quantity index + real(dp), intent(out) :: y + + real(dp) :: y_all(spl%num_quantities) + + if (iq < 1 .or. iq > spl%num_quantities) then + error stop "evaluate_batch_splines_1d_single: Invalid quantity index" + end if + + call evaluate_batch_splines_1d(spl, x, y_all) + y = y_all(iq) + end subroutine evaluate_batch_splines_1d_single + + subroutine evaluate_batch_splines_1d_many(spl, x, y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + real(dp), intent(out) :: y_batch(:, :) ! (n_quantities, n_points) + + integer :: ipt, iq, k_power, idx, k_wrap + integer :: num_points, nq, order + logical :: periodic + real(dp) :: xj, x_norm, x_local, x_min, h_step, period, t, w + + if (size(y_batch, 1) < spl%num_quantities) then + error stop "evaluate_batch_splines_1d_many: First dimension too small" + end if + if (size(y_batch, 2) /= size(x)) then + error stop "evaluate_batch_splines_1d_many: y_batch second dim mismatch" + end if #ifdef _OPENACC - if (acc_is_present(spl%coeff)) then - !$acc data copyin(x) copy(y_batch) - call evaluate_batch_splines_1d_many_resident(spl, x, y_batch) - !$acc end data - return - end if + if (acc_is_present(spl%coeff)) then + !$acc data copyin(x) copy(y_batch) + call evaluate_batch_splines_1d_many_resident(spl, x, y_batch) + !$acc end data + return + end if #endif - order = spl%order - num_points = spl%num_points - nq = spl%num_quantities - periodic = spl%periodic - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(num_points - 1, dp) - - do ipt = 1, size(x) - include "spline1d_many_point_body.inc" - end do - end subroutine evaluate_batch_splines_1d_many - - subroutine evaluate_batch_splines_1d_many_resident(spl, x, y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x(:) - real(dp), intent(inout) :: y_batch(:, :) - - integer :: ipt, iq, k_power, idx, k_wrap - integer :: num_points, nq, order - logical :: periodic - real(dp) :: xj, x_norm, x_local, x_min, h_step, period, t, w - - if (size(y_batch, 1) < spl%num_quantities) then - error stop "evaluate_batch_splines_1d_many_resident: y_batch dim1 too small" - end if - if (size(y_batch, 2) /= size(x)) then - error stop "evaluate_batch_splines_1d_many_resident: y_batch dim2 mismatch" - end if - - order = spl%order - num_points = spl%num_points - nq = spl%num_quantities - periodic = spl%periodic - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(num_points - 1, dp) - - !$acc parallel loop present(spl%coeff, x, y_batch) & - !$acc& private(ipt, iq, k_power, idx, k_wrap, xj, x_norm, x_local, t, w) - do ipt = 1, size(x) - include "spline1d_many_point_body.inc" - end do - !$acc end parallel loop - end subroutine evaluate_batch_splines_1d_many_resident - - subroutine evaluate_batch_splines_1d_many_der(spl, x, y_batch, dy_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x(:) - real(dp), intent(out) :: y_batch(:, :) ! (nq, npts) - real(dp), intent(out) :: dy_batch(:, :) ! (nq, npts) - - integer :: ipt, iq, k, idx, npts, nq, N - real(dp) :: xj, x_norm, x_local, x_min, h_step, period - - npts = size(x) - nq = spl%num_quantities - N = spl%order - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(spl%num_points - 1, dp) - - do ipt = 1, npts - if (spl%periodic) then - xj = modulo(x(ipt) - x_min, period) + x_min - else - xj = x(ipt) - end if - x_norm = (xj - x_min)/h_step - idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 - x_local = (x_norm - real(idx - 1, dp))*h_step - - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, N, idx) - dy_batch(iq, ipt) = N*spl%coeff(iq, N, idx) - end do - - do k = N - 1, 1, -1 - do iq = 1, nq - dy_batch(iq, ipt) = k*spl%coeff(iq, k, idx) + & - x_local*dy_batch(iq, ipt) - end do - end do + order = spl%order + num_points = spl%num_points + nq = spl%num_quantities + periodic = spl%periodic + x_min = spl%x_min + h_step = spl%h_step + period = h_step*real(num_points - 1, dp) + + do ipt = 1, size(x) + include "spline1d_many_point_body.inc" + end do + end subroutine evaluate_batch_splines_1d_many + + subroutine evaluate_batch_splines_1d_many_resident(spl, x, y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + real(dp), intent(inout) :: y_batch(:, :) + + integer :: ipt, iq, k_power, idx, k_wrap + integer :: num_points, nq, order + logical :: periodic + real(dp) :: xj, x_norm, x_local, x_min, h_step, period, t, w + + if (size(y_batch, 1) < spl%num_quantities) then + error stop "evaluate_batch_splines_1d_many_resident: y_batch dim1 too small" + end if + if (size(y_batch, 2) /= size(x)) then + error stop "evaluate_batch_splines_1d_many_resident: y_batch dim2 mismatch" + end if + + order = spl%order + num_points = spl%num_points + nq = spl%num_quantities + periodic = spl%periodic + x_min = spl%x_min + h_step = spl%h_step + period = h_step*real(num_points - 1, dp) + + !$acc parallel loop present(spl%coeff, x, y_batch) & + !$acc& private(ipt, iq, k_power, idx, k_wrap, xj, x_norm, x_local, t, w) + do ipt = 1, size(x) + include "spline1d_many_point_body.inc" + end do + !$acc end parallel loop + end subroutine evaluate_batch_splines_1d_many_resident + + subroutine evaluate_batch_splines_1d_many_der(spl, x, y_batch, dy_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + real(dp), intent(out) :: y_batch(:, :) ! (nq, npts) + real(dp), intent(out) :: dy_batch(:, :) ! (nq, npts) + + integer :: ipt, iq, k, idx, npts, nq, N + real(dp) :: xj, x_norm, x_local, x_min, h_step, period + + npts = size(x) + nq = spl%num_quantities + N = spl%order + x_min = spl%x_min + h_step = spl%h_step + period = h_step*real(spl%num_points - 1, dp) + + do ipt = 1, npts + if (spl%periodic) then + xj = modulo(x(ipt) - x_min, period) + x_min + else + xj = x(ipt) + end if + x_norm = (xj - x_min)/h_step + idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 + x_local = (x_norm - real(idx - 1, dp))*h_step - do k = N - 1, 0, -1 - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, k, idx) + & - x_local*y_batch(iq, ipt) - end do - end do - end do - end subroutine evaluate_batch_splines_1d_many_der - - subroutine evaluate_batch_splines_1d_many_der2(spl, x, y_batch, dy_batch, d2y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x(:) - real(dp), intent(out) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :) - - integer :: ipt, iq, k, idx, npts, nq, N - real(dp) :: xj, x_norm, x_local, x_min, h_step, period - - npts = size(x) - nq = spl%num_quantities - N = spl%order - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(spl%num_points - 1, dp) - - do ipt = 1, npts - if (spl%periodic) then - xj = modulo(x(ipt) - x_min, period) + x_min - else - xj = x(ipt) - end if - x_norm = (xj - x_min)/h_step - idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 - x_local = (x_norm - real(idx - 1, dp))*h_step - - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, N, idx) - dy_batch(iq, ipt) = N*spl%coeff(iq, N, idx) - d2y_batch(iq, ipt) = N*(N - 1)*spl%coeff(iq, N, idx) - end do - - do k = N - 1, 2, -1 do iq = 1, nq - d2y_batch(iq, ipt) = k*(k - 1)*spl%coeff(iq, k, idx) + & - x_local*d2y_batch(iq, ipt) + y_batch(iq, ipt) = spl%coeff(iq, N, idx) + dy_batch(iq, ipt) = N*spl%coeff(iq, N, idx) end do - end do - do k = N - 1, 1, -1 - do iq = 1, nq - dy_batch(iq, ipt) = k*spl%coeff(iq, k, idx) + & - x_local*dy_batch(iq, ipt) + do k = N - 1, 1, -1 + do iq = 1, nq + dy_batch(iq, ipt) = k*spl%coeff(iq, k, idx) + & + x_local*dy_batch(iq, ipt) + end do end do - end do - do k = N - 1, 0, -1 - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, k, idx) + & - x_local*y_batch(iq, ipt) + do k = N - 1, 0, -1 + do iq = 1, nq + y_batch(iq, ipt) = spl%coeff(iq, k, idx) + & + x_local*y_batch(iq, ipt) + end do end do - end do - end do - end subroutine evaluate_batch_splines_1d_many_der2 - - subroutine evaluate_batch_splines_1d_many_der2_mask(spl, x, mask, y_batch, dy_batch, d2y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x(:) - logical, intent(in) :: mask(:) - real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :) - - integer :: ipt, iq, k, idx, npts, nq, N - real(dp) :: xj, x_norm, x_local, x_min, h_step, period - - npts = size(x) - if (size(mask) /= npts) then - error stop "evaluate_batch_splines_1d_many_der2_mask: mask size mismatch" - end if - - nq = spl%num_quantities - N = spl%order - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(spl%num_points - 1, dp) - - do ipt = 1, npts - if (.not. mask(ipt)) cycle - - if (spl%periodic) then - xj = modulo(x(ipt) - x_min, period) + x_min - else - xj = x(ipt) - end if - x_norm = (xj - x_min)/h_step - idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 - x_local = (x_norm - real(idx - 1, dp))*h_step - - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, N, idx) - dy_batch(iq, ipt) = N*spl%coeff(iq, N, idx) - d2y_batch(iq, ipt) = N*(N - 1)*spl%coeff(iq, N, idx) - end do - - do k = N - 1, 2, -1 - do iq = 1, nq - d2y_batch(iq, ipt) = k*(k - 1)*spl%coeff(iq, k, idx) + & - x_local*d2y_batch(iq, ipt) - end do - end do + end do + end subroutine evaluate_batch_splines_1d_many_der - do k = N - 1, 1, -1 - do iq = 1, nq - dy_batch(iq, ipt) = k*spl%coeff(iq, k, idx) + & - x_local*dy_batch(iq, ipt) - end do - end do + subroutine evaluate_batch_splines_1d_many_der2(spl, x, y_batch, dy_batch, d2y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + real(dp), intent(out) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :) - do k = N - 1, 0, -1 - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, k, idx) + & - x_local*y_batch(iq, ipt) - end do - end do - end do - end subroutine evaluate_batch_splines_1d_many_der2_mask - - subroutine evaluate_batch_splines_1d_many_der3(spl, x, y_batch, dy_batch, d2y_batch, & - d3y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x(:) - real(dp), intent(out) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :), d3y_batch(:, :) - - integer :: ipt, iq, k, idx, npts, nq, N - real(dp) :: xj, x_norm, x_local, x_min, h_step, period - - npts = size(x) - nq = spl%num_quantities - N = spl%order - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(spl%num_points - 1, dp) - - do ipt = 1, npts - if (spl%periodic) then - xj = modulo(x(ipt) - x_min, period) + x_min - else - xj = x(ipt) - end if - x_norm = (xj - x_min)/h_step - idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 - x_local = (x_norm - real(idx - 1, dp))*h_step - - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, N, idx) - dy_batch(iq, ipt) = N*spl%coeff(iq, N, idx) - d2y_batch(iq, ipt) = N*(N - 1)*spl%coeff(iq, N, idx) - d3y_batch(iq, ipt) = N*(N - 1)*(N - 2)*spl%coeff(iq, N, idx) - end do - - do k = N - 1, 3, -1 - do iq = 1, nq - d3y_batch(iq, ipt) = k*(k - 1)*(k - 2)*spl%coeff(iq, k, idx) + & - x_local*d3y_batch(iq, ipt) - end do - end do - - do k = N - 1, 2, -1 - do iq = 1, nq - d2y_batch(iq, ipt) = k*(k - 1)*spl%coeff(iq, k, idx) + & - x_local*d2y_batch(iq, ipt) - end do - end do + integer :: ipt, npts - do k = N - 1, 1, -1 - do iq = 1, nq - dy_batch(iq, ipt) = k*spl%coeff(iq, k, idx) + & - x_local*dy_batch(iq, ipt) - end do - end do + npts = size(x) +#ifdef _OPENACC + if (acc_is_present(spl%coeff)) then + !$acc data copyin(x) copyout(y_batch, dy_batch, d2y_batch) + call evaluate_batch_splines_1d_many_der2_resident(spl, x, & + y_batch, & + dy_batch, d2y_batch) + !$acc end data + return + end if +#endif - do k = N - 1, 0, -1 - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, k, idx) + & - x_local*y_batch(iq, ipt) - end do - end do - end do - end subroutine evaluate_batch_splines_1d_many_der3 - - subroutine evaluate_batch_splines_1d_many_der3_mask(spl, x, mask, y_batch, dy_batch, & - d2y_batch, d3y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x(:) - logical, intent(in) :: mask(:) - real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :), d3y_batch(:, :) - - integer :: ipt, iq, k, idx, npts, nq, N - real(dp) :: xj, x_norm, x_local, x_min, h_step, period - - npts = size(x) - if (size(mask) /= npts) then - error stop "evaluate_batch_splines_1d_many_der3_mask: mask size mismatch" - end if - - nq = spl%num_quantities - N = spl%order - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(spl%num_points - 1, dp) - - do ipt = 1, npts - if (.not. mask(ipt)) cycle - - if (spl%periodic) then - xj = modulo(x(ipt) - x_min, period) + x_min - else - xj = x(ipt) - end if - x_norm = (xj - x_min)/h_step - idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 - x_local = (x_norm - real(idx - 1, dp))*h_step - - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, N, idx) - dy_batch(iq, ipt) = N*spl%coeff(iq, N, idx) - d2y_batch(iq, ipt) = N*(N - 1)*spl%coeff(iq, N, idx) - d3y_batch(iq, ipt) = N*(N - 1)*(N - 2)*spl%coeff(iq, N, idx) - end do - - do k = N - 1, 3, -1 - do iq = 1, nq - d3y_batch(iq, ipt) = k*(k - 1)*(k - 2)*spl%coeff(iq, k, idx) + & - x_local*d3y_batch(iq, ipt) - end do - end do + do ipt = 1, npts + call evaluate_batch_splines_1d_der2_core(spl, x(ipt), & + y_batch(:, ipt), dy_batch(:, & + ipt), d2y_batch(:, ipt)) + end do + end subroutine evaluate_batch_splines_1d_many_der2 + + subroutine evaluate_batch_splines_1d_many_der2_mask(spl, x, mask, y_batch, & + dy_batch, d2y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + logical, intent(in) :: mask(:) + real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :) + + integer :: ipt, npts + + npts = size(x) + if (size(mask) /= npts) then + error stop "evaluate_batch_splines_1d_many_der2_mask: mask size mismatch" + end if +#ifdef _OPENACC + if (acc_is_present(spl%coeff)) then + !$acc data copyin(x, mask) copy(y_batch, dy_batch, d2y_batch) + call evaluate_batch_splines_1d_many_der2_mask_resident(spl, x, & + mask, y_batch, & + dy_batch, & + d2y_batch) + !$acc end data + return + end if +#endif - do k = N - 1, 2, -1 - do iq = 1, nq - d2y_batch(iq, ipt) = k*(k - 1)*spl%coeff(iq, k, idx) + & - x_local*d2y_batch(iq, ipt) - end do - end do + do ipt = 1, npts + if (.not. mask(ipt)) cycle + call evaluate_batch_splines_1d_der2_core(spl, x(ipt), & + y_batch(:, ipt), dy_batch(:, & + ipt), d2y_batch(:, ipt)) + end do + end subroutine evaluate_batch_splines_1d_many_der2_mask + + subroutine evaluate_batch_splines_1d_many_der3(spl, x, y_batch, dy_batch, & + d2y_batch, & + d3y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + real(dp), intent(out) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :), & + d3y_batch(:, :) + + integer :: ipt, npts + + npts = size(x) +#ifdef _OPENACC + if (acc_is_present(spl%coeff)) then + !$acc data copyin(x) copyout(y_batch, dy_batch, d2y_batch, d3y_batch) + call evaluate_batch_splines_1d_many_der3_resident(spl, x, y_batch, & + dy_batch, & + d2y_batch, & + d3y_batch) + !$acc end data + return + end if +#endif - do k = N - 1, 1, -1 - do iq = 1, nq - dy_batch(iq, ipt) = k*spl%coeff(iq, k, idx) + & - x_local*dy_batch(iq, ipt) - end do - end do + do ipt = 1, npts + call evaluate_batch_splines_1d_der3_core(spl, x(ipt), y_batch(:, ipt), & + dy_batch(:, ipt), d2y_batch(:, & + ipt), d3y_batch(:, ipt)) + end do + end subroutine evaluate_batch_splines_1d_many_der3 + + subroutine evaluate_batch_splines_1d_many_der3_mask(spl, x, mask, y_batch, & + dy_batch, & + d2y_batch, d3y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x(:) + logical, intent(in) :: mask(:) + real(dp), intent(inout) :: y_batch(:, :), dy_batch(:, :), d2y_batch(:, :), & + d3y_batch(:, :) + + integer :: ipt, npts + + npts = size(x) + if (size(mask) /= npts) then + error stop "evaluate_batch_splines_1d_many_der3_mask: mask size mismatch" + end if +#ifdef _OPENACC + if (acc_is_present(spl%coeff)) then + !$acc data copyin(x, mask) copy(y_batch, dy_batch, d2y_batch, d3y_batch) + call evaluate_batch_splines_1d_many_der3_mask_resident(spl, x, mask, & + y_batch, dy_batch, & + d2y_batch, & + d3y_batch) + !$acc end data + return + end if +#endif - do k = N - 1, 0, -1 - do iq = 1, nq - y_batch(iq, ipt) = spl%coeff(iq, k, idx) + & - x_local*y_batch(iq, ipt) - end do - end do - end do - end subroutine evaluate_batch_splines_1d_many_der3_mask - - subroutine evaluate_batch_splines_1d_der(spl, x, y_batch, dy_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x - real(dp), intent(out) :: y_batch(:) ! (n_quantities) - real(dp), intent(out) :: dy_batch(:) ! (n_quantities) - - real(dp) :: x_arr(1) - real(dp) :: y_arr(spl%num_quantities, 1), dy_arr(spl%num_quantities, 1) - - x_arr(1) = x - call evaluate_batch_splines_1d_many_der(spl, x_arr, y_arr, dy_arr) - y_batch(1:spl%num_quantities) = y_arr(:, 1) - dy_batch(1:spl%num_quantities) = dy_arr(:, 1) - end subroutine evaluate_batch_splines_1d_der - - subroutine evaluate_batch_splines_1d_der2(spl, x, y_batch, dy_batch, d2y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x - real(dp), intent(out) :: y_batch(:), dy_batch(:), d2y_batch(:) - - integer :: iq, k, idx, nq, N - real(dp) :: xj, x_norm, x_local, x_min, h_step, period - - nq = spl%num_quantities - N = spl%order - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(spl%num_points - 1, dp) - - if (spl%periodic) then - xj = x - if (xj < x_min .or. xj >= x_min + period) then - xj = modulo(xj - x_min, period) + x_min - end if - else - xj = x - end if - - x_norm = (xj - x_min)/h_step - idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 - x_local = (x_norm - real(idx - 1, dp))*h_step - - do iq = 1, nq - y_batch(iq) = spl%coeff(iq, N, idx) - dy_batch(iq) = N*spl%coeff(iq, N, idx) - d2y_batch(iq) = N*(N - 1)*spl%coeff(iq, N, idx) - end do - - do k = N - 1, 2, -1 - do iq = 1, nq - d2y_batch(iq) = k*(k - 1)*spl%coeff(iq, k, idx) + & - x_local*d2y_batch(iq) - end do - end do - - do k = N - 1, 1, -1 - do iq = 1, nq - dy_batch(iq) = k*spl%coeff(iq, k, idx) + & - x_local*dy_batch(iq) - end do - end do - - do k = N - 1, 0, -1 - do iq = 1, nq - y_batch(iq) = spl%coeff(iq, k, idx) + & - x_local*y_batch(iq) - end do - end do - end subroutine evaluate_batch_splines_1d_der2 - - subroutine evaluate_batch_splines_1d_der3(spl, x, y_batch, dy_batch, d2y_batch, & - d3y_batch) - type(BatchSplineData1D), intent(in) :: spl - real(dp), intent(in) :: x - real(dp), intent(out) :: y_batch(:), dy_batch(:), d2y_batch(:), d3y_batch(:) - - integer :: iq, k, idx, nq, N - real(dp) :: xj, x_norm, x_local, x_min, h_step, period - - nq = spl%num_quantities - N = spl%order - x_min = spl%x_min - h_step = spl%h_step - period = h_step*real(spl%num_points - 1, dp) - - if (spl%periodic) then - xj = x - if (xj < x_min .or. xj >= x_min + period) then - xj = modulo(xj - x_min, period) + x_min - end if - else - xj = x - end if - - x_norm = (xj - x_min)/h_step - idx = max(0, min(spl%num_points - 2, int(x_norm))) + 1 - x_local = (x_norm - real(idx - 1, dp))*h_step - - do iq = 1, nq - y_batch(iq) = spl%coeff(iq, N, idx) - dy_batch(iq) = N*spl%coeff(iq, N, idx) - d2y_batch(iq) = N*(N - 1)*spl%coeff(iq, N, idx) - d3y_batch(iq) = N*(N - 1)*(N - 2)*spl%coeff(iq, N, idx) - end do - - do k = N - 1, 3, -1 - do iq = 1, nq - d3y_batch(iq) = k*(k - 1)*(k - 2)*spl%coeff(iq, k, idx) + & - x_local*d3y_batch(iq) - end do - end do - - do k = N - 1, 2, -1 - do iq = 1, nq - d2y_batch(iq) = k*(k - 1)*spl%coeff(iq, k, idx) + & - x_local*d2y_batch(iq) - end do - end do - - do k = N - 1, 1, -1 - do iq = 1, nq - dy_batch(iq) = k*spl%coeff(iq, k, idx) + & - x_local*dy_batch(iq) - end do - end do - - do k = N - 1, 0, -1 - do iq = 1, nq - y_batch(iq) = spl%coeff(iq, k, idx) + & - x_local*y_batch(iq) - end do - end do - end subroutine evaluate_batch_splines_1d_der3 + do ipt = 1, npts + if (.not. mask(ipt)) cycle + call evaluate_batch_splines_1d_der3_core(spl, x(ipt), y_batch(:, ipt), & + dy_batch(:, ipt), d2y_batch(:, & + ipt), d3y_batch(:, ipt)) + end do + end subroutine evaluate_batch_splines_1d_many_der3_mask + + subroutine evaluate_batch_splines_1d_der(spl, x, y_batch, dy_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x + real(dp), intent(out) :: y_batch(:) ! (n_quantities) + real(dp), intent(out) :: dy_batch(:) ! (n_quantities) + + real(dp) :: x_arr(1) + real(dp) :: y_arr(spl%num_quantities, 1), dy_arr(spl%num_quantities, 1) + + x_arr(1) = x + call evaluate_batch_splines_1d_many_der(spl, x_arr, y_arr, dy_arr) + y_batch(1:spl%num_quantities) = y_arr(:, 1) + dy_batch(1:spl%num_quantities) = dy_arr(:, 1) + end subroutine evaluate_batch_splines_1d_der + + subroutine evaluate_batch_splines_1d_der2(spl, x, y_batch, dy_batch, d2y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x + real(dp), intent(out) :: y_batch(:), dy_batch(:), d2y_batch(:) + + real(dp) :: x_arr(1) + real(dp) :: y_arr(spl%num_quantities, 1) + real(dp) :: dy_arr(spl%num_quantities, 1) + real(dp) :: d2y_arr(spl%num_quantities, 1) + + x_arr(1) = x + call evaluate_batch_splines_1d_many_der2(spl, x_arr, y_arr, dy_arr, d2y_arr) + y_batch(1:spl%num_quantities) = y_arr(:, 1) + dy_batch(1:spl%num_quantities) = dy_arr(:, 1) + d2y_batch(1:spl%num_quantities) = d2y_arr(:, 1) + end subroutine evaluate_batch_splines_1d_der2 + + subroutine evaluate_batch_splines_1d_der3(spl, x, y_batch, dy_batch, d2y_batch, & + d3y_batch) + type(BatchSplineData1D), intent(in) :: spl + real(dp), intent(in) :: x + real(dp), intent(out) :: y_batch(:), dy_batch(:), d2y_batch(:), d3y_batch(:) + + real(dp) :: x_arr(1) + real(dp) :: y_arr(spl%num_quantities, 1) + real(dp) :: dy_arr(spl%num_quantities, 1) + real(dp) :: d2y_arr(spl%num_quantities, 1) + real(dp) :: d3y_arr(spl%num_quantities, 1) + + x_arr(1) = x + call evaluate_batch_splines_1d_many_der3(spl, x_arr, y_arr, dy_arr, & + d2y_arr, d3y_arr) + y_batch(1:spl%num_quantities) = y_arr(:, 1) + dy_batch(1:spl%num_quantities) = dy_arr(:, 1) + d2y_batch(1:spl%num_quantities) = d2y_arr(:, 1) + d3y_batch(1:spl%num_quantities) = d3y_arr(:, 1) + end subroutine evaluate_batch_splines_1d_der3 end module batch_interpolate_1d diff --git a/src/interpolate/batch_interpolate_types.f90 b/src/interpolate/batch_interpolate_types.f90 index f0ddd0ef..4b98d900 100644 --- a/src/interpolate/batch_interpolate_types.f90 +++ b/src/interpolate/batch_interpolate_types.f90 @@ -1,60 +1,63 @@ module batch_interpolate_types - use, intrinsic :: iso_fortran_env, only: dp => real64 - implicit none - private - - ! Export batch spline types - public :: BatchSplineData1D, BatchSplineData2D, BatchSplineData3D - ! dp is for internal use only, not exported - - ! Batch spline types for multiple quantities on shared grid - type :: BatchSplineData1D - ! Shared grid data - integer :: order - integer :: num_points - logical :: periodic - real(dp) :: x_min - real(dp) :: h_step - real(dp) :: inv_h_step - real(dp) :: period - - ! Batch data - integer :: num_quantities - ! Memory layout: (n_quantities, 0:order, n_points) for cache efficiency - real(dp), dimension(:, :, :), allocatable :: coeff - end type BatchSplineData1D - - type :: BatchSplineData2D - ! Shared grid data - integer :: order(2) - integer :: num_points(2) - logical :: periodic(2) - real(dp) :: h_step(2) - real(dp) :: x_min(2) - real(dp) :: inv_h_step(2) - real(dp) :: period(2) - - ! Batch data - integer :: num_quantities - ! Memory layout: (n_quantities, 0:order1, 0:order2, n1, n2) for cache efficiency - real(dp), dimension(:, :, :, :, :), allocatable :: coeff - end type BatchSplineData2D - - type :: BatchSplineData3D - ! Shared grid data - integer :: order(3) - integer :: num_points(3) - logical :: periodic(3) - real(dp) :: h_step(3) - real(dp) :: x_min(3) - real(dp) :: inv_h_step(3) - real(dp) :: period(3) - - ! Batch data - integer :: num_quantities - ! Memory layout: (n_quantities, 0:order1, 0:order2, 0:order3, n1, n2, n3) for - ! cache efficiency - real(dp), dimension(:, :, :, :, :, :, :), allocatable :: coeff - end type BatchSplineData3D + use, intrinsic :: iso_fortran_env, only: dp => real64 + implicit none + private + + ! Export batch spline types + public :: BatchSplineData1D, BatchSplineData2D, BatchSplineData3D + public :: MAX_BATCH_QUANTITIES, MAX_BATCH_ORDER + + integer, parameter :: MAX_BATCH_QUANTITIES = 8 + integer, parameter :: MAX_BATCH_ORDER = 8 + + ! Batch spline types for multiple quantities on shared grid + type :: BatchSplineData1D + ! Shared grid data + integer :: order + integer :: num_points + logical :: periodic + real(dp) :: x_min + real(dp) :: h_step + real(dp) :: inv_h_step + real(dp) :: period + + ! Batch data + integer :: num_quantities + ! Memory layout: (n_quantities, 0:order, n_points) for cache efficiency + real(dp), dimension(:, :, :), allocatable :: coeff + end type BatchSplineData1D + + type :: BatchSplineData2D + ! Shared grid data + integer :: order(2) + integer :: num_points(2) + logical :: periodic(2) + real(dp) :: h_step(2) + real(dp) :: x_min(2) + real(dp) :: inv_h_step(2) + real(dp) :: period(2) + + ! Batch data + integer :: num_quantities + ! Memory layout: (n_quantities, 0:order1, 0:order2, n1, n2) for cache efficiency + real(dp), dimension(:, :, :, :, :), allocatable :: coeff + end type BatchSplineData2D + + type :: BatchSplineData3D + ! Shared grid data + integer :: order(3) + integer :: num_points(3) + logical :: periodic(3) + real(dp) :: h_step(3) + real(dp) :: x_min(3) + real(dp) :: inv_h_step(3) + real(dp) :: period(3) + + ! Batch data + integer :: num_quantities + ! Memory layout: (n_quantities, 0:order1, 0:order2, 0:order3, n1, n2, n3) for + ! cache efficiency + real(dp), dimension(:, :, :, :, :, :, :), allocatable :: coeff + end type BatchSplineData3D end module batch_interpolate_types diff --git a/src/interpolate/interpolate.f90 b/src/interpolate/interpolate.f90 index 1df385b0..7cce06a7 100644 --- a/src/interpolate/interpolate.f90 +++ b/src/interpolate/interpolate.f90 @@ -28,11 +28,13 @@ module interpolate use batch_interpolate, only: evaluate_batch_splines_2d, & evaluate_batch_splines_2d_der, & evaluate_batch_splines_2d_many, & - evaluate_batch_splines_2d_many_resident + evaluate_batch_splines_2d_many_mask, & + evaluate_batch_splines_2d_many_resident, & + evaluate_batch_splines_2d_many_der, & + evaluate_batch_splines_2d_many_der_mask use batch_interpolate, only: evaluate_batch_splines_3d, & evaluate_batch_splines_3d_der, & evaluate_batch_splines_3d_der2, & - evaluate_batch_splines_3d_der2_rmix, & evaluate_batch_splines_3d_many, & evaluate_batch_splines_3d_many_resident, & evaluate_batch_splines_3d_many_der, & @@ -61,9 +63,12 @@ module interpolate public :: evaluate_batch_splines_1d_der, evaluate_batch_splines_1d_der2 public :: evaluate_batch_splines_1d_der3 public :: evaluate_batch_splines_2d, evaluate_batch_splines_2d_der - public :: evaluate_batch_splines_2d_many, evaluate_batch_splines_2d_many_resident + public :: evaluate_batch_splines_2d_many, evaluate_batch_splines_2d_many_mask + public :: evaluate_batch_splines_2d_many_resident + public :: evaluate_batch_splines_2d_many_der, & + evaluate_batch_splines_2d_many_der_mask public :: evaluate_batch_splines_3d, evaluate_batch_splines_3d_der, & - evaluate_batch_splines_3d_der2, evaluate_batch_splines_3d_der2_rmix + evaluate_batch_splines_3d_der2 public :: evaluate_batch_splines_3d_many, evaluate_batch_splines_3d_many_resident public :: evaluate_batch_splines_3d_many_der, evaluate_batch_splines_3d_many_der2 public :: evaluate_batch_splines_3d_many_der2_mask @@ -481,56 +486,56 @@ subroutine construct_splines_3d(x_min, x_max, y, order, periodic, spl) ! Spline over x3 allocate (splcoe(0:spl%order(3), spl%num_points(3))) do i2 = 1, spl%num_points(2) - do i1 = 1, spl%num_points(1) - splcoe(0, :) = y(i1, i2, :) - if (spl%periodic(3)) then - call spl_per(spl%order(3), spl%num_points(3), spl%h_step(3), splcoe) - else - call spl_reg(spl%order(3), spl%num_points(3), spl%h_step(3), splcoe) - end if - spl%coeff(order(1), 0, :, i1, i2, :) = splcoe - end do + do i1 = 1, spl%num_points(1) + splcoe(0, :) = y(i1, i2, :) + if (spl%periodic(3)) then + call spl_per(spl%order(3), spl%num_points(3), spl%h_step(3), splcoe) + else + call spl_reg(spl%order(3), spl%num_points(3), spl%h_step(3), splcoe) + end if + spl%coeff(order(1), 0, :, i1, i2, :) = splcoe + end do end do deallocate (splcoe) ! Spline over x2 allocate (splcoe(0:spl%order(2), spl%num_points(2))) do i3 = 1, spl%num_points(3) - do i1 = 1, spl%num_points(1) - do k3 = 0, spl%order(3) - splcoe(0, :) = spl%coeff(order(1), 0, k3, i1, :, i3) - if (spl%periodic(2)) then - call spl_per( & - spl%order(2), spl%num_points(2), spl%h_step(2), splcoe) - else - call spl_reg( & - spl%order(2), spl%num_points(2), spl%h_step(2), splcoe) - end if - spl%coeff(order(1), :, k3, i1, :, i3) = splcoe - end do + do i1 = 1, spl%num_points(1) + do k3 = 0, spl%order(3) + splcoe(0, :) = spl%coeff(order(1), 0, k3, i1, :, i3) + if (spl%periodic(2)) then + call spl_per( & + spl%order(2), spl%num_points(2), spl%h_step(2), splcoe) + else + call spl_reg( & + spl%order(2), spl%num_points(2), spl%h_step(2), splcoe) + end if + spl%coeff(order(1), :, k3, i1, :, i3) = splcoe end do end do + end do deallocate (splcoe) ! Spline over x1 allocate (splcoe(0:spl%order(1), spl%num_points(1))) do i3 = 1, spl%num_points(3) - do i2 = 1, spl%num_points(2) - do k3 = 0, spl%order(3) - do k2 = 0, spl%order(2) - splcoe(0, :) = spl%coeff(order(1), k2, k3, :, i2, i3) - if (spl%periodic(1)) then - call spl_per( & - spl%order(1), spl%num_points(1), spl%h_step(1), splcoe) - else - call spl_reg( & - spl%order(1), spl%num_points(1), spl%h_step(1), splcoe) - end if - spl%coeff(order(1):0:-1, k2, k3, :, i2, i3) = splcoe - end do - end do + do i2 = 1, spl%num_points(2) + do k3 = 0, spl%order(3) + do k2 = 0, spl%order(2) + splcoe(0, :) = spl%coeff(order(1), k2, k3, :, i2, i3) + if (spl%periodic(1)) then + call spl_per( & + spl%order(1), spl%num_points(1), spl%h_step(1), splcoe) + else + call spl_reg( & + spl%order(1), spl%num_points(1), spl%h_step(1), splcoe) + end if + spl%coeff(order(1):0:-1, k2, k3, :, i2, i3) = splcoe + end do end do end do + end do deallocate (splcoe) end subroutine construct_splines_3d @@ -650,13 +655,14 @@ subroutine evaluate_splines_3d_many_der(spl, x, y, dy) do k2 = 0, N2 coeff_23(k2, k3) = spl%coeff(0, k2, k3, interval_index(1) + 1, & interval_index(2) + 1, & - interval_index(3) + 1) + interval_index(3) + 1) do k1 = 1, N1 coeff_23(k2, k3) = spl%coeff(k1, k2, k3, & interval_index(1) + 1, & - interval_index(2) + 1, & + interval_index(2) + 1, & interval_index(3) + 1) + & - x_local(1)*coeff_23(k2, k3) + x_local(1)*coeff_23(k2, & + k3) end do end do end do @@ -666,16 +672,13 @@ subroutine evaluate_splines_3d_many_der(spl, x, y, dy) coeff_23_dx1(k2, k3) = spl%coeff(0, k2, k3, & interval_index(1) + 1, & interval_index(2) + 1, & - interval_index(3) + 1)*N1 + interval_index(3) + 1)*N1 do k1 = 1, N1 - 1 coeff_23_dx1(k2, k3) = spl%coeff(k1, k2, k3, & - interval_index(1) + 1, & - interval_index(2) + 1, & + interval_index(1) + 1, interval_index(2) + 1, & interval_index(3) + & - 1)*(N1 - k1) & - + & - x_local(1)*coeff_23_dx1(k2, & - k3) + 1)*(N1 - k1) + & + x_local(1)*coeff_23_dx1(k2, k3) end do end do end do @@ -743,13 +746,14 @@ subroutine evaluate_splines_3d_many_der2(spl, x, y, dy, d2y) do k2 = 0, N2 coeff_23(k2, k3) = spl%coeff(0, k2, k3, interval_index(1) + 1, & interval_index(2) + 1, & - interval_index(3) + 1) + interval_index(3) + 1) do k1 = 1, N1 coeff_23(k2, k3) = spl%coeff(k1, k2, k3, & interval_index(1) + 1, & - interval_index(2) + 1, & + interval_index(2) + 1, & interval_index(3) + 1) + & - x_local(1)*coeff_23(k2, k3) + x_local(1)*coeff_23(k2, & + k3) end do end do end do @@ -759,16 +763,13 @@ subroutine evaluate_splines_3d_many_der2(spl, x, y, dy, d2y) coeff_23_dx1(k2, k3) = spl%coeff(0, k2, k3, & interval_index(1) + 1, & interval_index(2) + 1, & - interval_index(3) + 1)*N1 + interval_index(3) + 1)*N1 do k1 = 1, N1 - 1 coeff_23_dx1(k2, k3) = spl%coeff(k1, k2, k3, & - interval_index(1) + 1, & - interval_index(2) + 1, & + interval_index(1) + 1, interval_index(2) + 1, & interval_index(3) + & - 1)*(N1 - k1) & - + & - x_local(1)*coeff_23_dx1(k2, & - k3) + 1)*(N1 - k1) + & + x_local(1)*coeff_23_dx1(k2, k3) end do end do end do @@ -778,15 +779,14 @@ subroutine evaluate_splines_3d_many_der2(spl, x, y, dy, d2y) coeff_23_dx1x1(k2, k3) = spl%coeff(0, k2, k3, & interval_index(1) + 1, & interval_index(2) + 1, & - interval_index(3) + & - 1)*N1*(N1 - 1) + interval_index(3) + & + 1)*N1*(N1 - 1) do k1 = 1, N1 - 2 coeff_23_dx1x1(k2, k3) = spl%coeff(k1, k2, k3, & - interval_index(1) + 1, & - interval_index(2) + 1, & - interval_index(3) & - + 1)*(N1 - & - k1)*(N1 - k1 - 1) & + interval_index(1) + 1, interval_index(2) + 1, & + interval_index(3) + & + 1)*(N1 - k1)*(N1 & + - k1 - 1) & + x_local(1)*coeff_23_dx1x1(k2, k3) end do end do diff --git a/test/source/test_batch_interpolate_mask.f90 b/test/source/test_batch_interpolate_mask.f90 index 24256da3..0d1261ec 100644 --- a/test/source/test_batch_interpolate_mask.f90 +++ b/test/source/test_batch_interpolate_mask.f90 @@ -1,17 +1,24 @@ program test_batch_interpolate_mask use, intrinsic :: iso_fortran_env, only: dp => real64 use batch_interpolate, only: BatchSplineData1D, BatchSplineData3D + use batch_interpolate, only: BatchSplineData2D use batch_interpolate, only: construct_batch_splines_1d, destroy_batch_splines_1d + use batch_interpolate, only: construct_batch_splines_2d, destroy_batch_splines_2d use batch_interpolate, only: construct_batch_splines_3d, destroy_batch_splines_3d use batch_interpolate, only: evaluate_batch_splines_1d_many_der3 use batch_interpolate, only: evaluate_batch_splines_1d_many_der2 + use batch_interpolate, only: evaluate_batch_splines_2d_many + use batch_interpolate, only: evaluate_batch_splines_2d_many_der use batch_interpolate, only: evaluate_batch_splines_3d_many_der2 use batch_interpolate, only: evaluate_batch_splines_1d_many_der2_mask use batch_interpolate, only: evaluate_batch_splines_1d_many_der3_mask + use batch_interpolate, only: evaluate_batch_splines_2d_many_mask + use batch_interpolate, only: evaluate_batch_splines_2d_many_der_mask use batch_interpolate, only: evaluate_batch_splines_3d_many_der2_mask implicit none call test_1d_der3_mask() + call test_2d_mask() call test_3d_der2_mask() contains @@ -30,7 +37,6 @@ subroutine assert_close(name, a, b, tol) end if end subroutine assert_close - subroutine assert_unchanged(name, a, sentinel) character(len=*), intent(in) :: name real(dp), intent(in) :: a(:) @@ -42,7 +48,6 @@ subroutine assert_unchanged(name, a, sentinel) end if end subroutine assert_unchanged - subroutine test_1d_der3_mask() integer, parameter :: order = 5 integer, parameter :: ngrid = 32 @@ -60,42 +65,48 @@ subroutine test_1d_der3_mask() real(dp) :: x_eval(npts) logical :: mask(npts) - real(dp) :: y_full(nq, npts), dy_full(nq, npts), d2_full(nq, npts), d3_full(nq, npts) - real(dp) :: y_mask(nq, npts), dy_mask(nq, npts), d2_mask(nq, npts), d3_mask(nq, npts) + real(dp) :: y_full(nq, npts), dy_full(nq, npts), d2_full(nq, npts), & + d3_full(nq, npts) + real(dp) :: y_mask(nq, npts), dy_mask(nq, npts), d2_mask(nq, npts), & + d3_mask(nq, npts) real(dp) :: y2_full(nq, npts), dy2_full(nq, npts), d22_full(nq, npts) real(dp) :: y2_mask(nq, npts), dy2_mask(nq, npts), d22_mask(nq, npts) integer :: i, iq do i = 1, ngrid - x_grid(i) = x_min + (x_max - x_min) * real(i - 1, dp) / real(ngrid - 1, dp) + x_grid(i) = x_min + (x_max - x_min)*real(i - 1, dp)/real(ngrid - 1, dp) end do do iq = 1, nq do i = 1, ngrid - y_grid(i, iq) = cos(x_grid(i) + 0.1d0 * real(iq - 1, dp)) + y_grid(i, iq) = cos(x_grid(i) + 0.1d0*real(iq - 1, dp)) end do end do call construct_batch_splines_1d(x_min, x_max, y_grid, order, periodic, spl) do i = 1, npts - x_eval(i) = x_min + 0.73d0 * real(i - 1, dp) + x_eval(i) = x_min + 0.73d0*real(i - 1, dp) mask(i) = mod(i, 2) == 1 end do - call evaluate_batch_splines_1d_many_der3(spl, x_eval, y_full, dy_full, d2_full, d3_full) + call evaluate_batch_splines_1d_many_der3(spl, x_eval, y_full, dy_full, & + d2_full, d3_full) - call evaluate_batch_splines_1d_many_der2(spl, x_eval, y2_full, dy2_full, d22_full) + call evaluate_batch_splines_1d_many_der2(spl, x_eval, y2_full, & + dy2_full, d22_full) y2_mask = sentinel dy2_mask = sentinel d22_mask = sentinel - call evaluate_batch_splines_1d_many_der2_mask(spl, x_eval, mask, y2_mask, dy2_mask, & + call evaluate_batch_splines_1d_many_der2_mask(spl, x_eval, mask, y2_mask, & + dy2_mask, & d22_mask) y_mask = sentinel dy_mask = sentinel d2_mask = sentinel d3_mask = sentinel - call evaluate_batch_splines_1d_many_der3_mask(spl, x_eval, mask, y_mask, dy_mask, & + call evaluate_batch_splines_1d_many_der3_mask(spl, x_eval, mask, & + y_mask, dy_mask, & d2_mask, d3_mask) do i = 1, npts @@ -121,6 +132,73 @@ subroutine test_1d_der3_mask() call destroy_batch_splines_1d(spl) end subroutine test_1d_der3_mask + subroutine test_2d_mask() + integer, parameter :: order(2) = [5, 5] + integer, parameter :: ngrid(2) = [16, 12] + integer, parameter :: nq = 2 + integer, parameter :: npts = 19 + logical, parameter :: periodic(2) = [.false., .true.] + real(dp), parameter :: x_min(2) = [0.0d0, 0.0d0] + real(dp), parameter :: x_max(2) = [1.0d0, 2.0d0] + real(dp), parameter :: tol = 1.0d-12 + real(dp), parameter :: sentinel = -999.0d0 + + type(BatchSplineData2D) :: spl + real(dp) :: y_grid(ngrid(1), ngrid(2), nq) + real(dp) :: x_eval(2, npts) + logical :: mask(npts) + + real(dp) :: y_full(nq, npts), y_mask(nq, npts) + real(dp) :: dy_full(2, nq, npts), dy_mask(2, nq, npts) + integer :: i1, i2, iq, ip + real(dp) :: x1, x2 + + do iq = 1, nq + do i2 = 1, ngrid(2) + x2 = x_min(2) + real(i2 - 1, dp)*(x_max(2) - x_min(2))/ & + real(ngrid(2) - 1, dp) + do i1 = 1, ngrid(1) + x1 = x_min(1) + real(i1 - 1, dp)*(x_max(1) - x_min(1))/ & + real(ngrid(1) - 1, dp) + y_grid(i1, i2, iq) = cos(x1)*cos(x2)*(1.0d0 + & + 0.3d0*real(iq - 1, dp)) + end do + end do + end do + + call construct_batch_splines_2d(x_min, x_max, y_grid, order, periodic, spl) + + do ip = 1, npts + x_eval(1, ip) = x_min(1) + (x_max(1) - x_min(1))*real(ip - 1, dp)/ & + real(npts, dp) + x_eval(2, ip) = x_min(2) + 0.91d0*real(ip - 1, dp) + mask(ip) = mod(ip, 4) /= 0 + end do + + call evaluate_batch_splines_2d_many(spl, x_eval, y_full) + call evaluate_batch_splines_2d_many_der(spl, x_eval, y_full, dy_full) + + y_mask = sentinel + call evaluate_batch_splines_2d_many_mask(spl, x_eval, mask, y_mask) + + dy_mask = sentinel + call evaluate_batch_splines_2d_many_der_mask(spl, x_eval, mask, y_mask, dy_mask) + + do ip = 1, npts + if (mask(ip)) then + call assert_close("2d_y", y_full(:, ip), y_mask(:, ip), tol) + call assert_close("2d_dy", reshape(dy_full(:, :, ip), [2*nq]), & + reshape(dy_mask(:, :, ip), [2*nq]), tol) + else + call assert_unchanged("2d_y_sentinel", y_mask(:, ip), sentinel) + call assert_unchanged("2d_dy_sentinel", reshape(dy_mask(:, :, & + ip), [2*nq]), & + sentinel) + end if + end do + + call destroy_batch_splines_2d(spl) + end subroutine test_2d_mask subroutine test_3d_der2_mask() integer, parameter :: order(3) = [5, 5, 5] @@ -148,13 +226,13 @@ subroutine test_3d_der2_mask() do i2 = 1, ngrid(2) do i1 = 1, ngrid(1) y_grid(i1, i2, i3, iq) = & - cos(x_min(1) + real(i1 - 1, dp) * (x_max(1) - x_min(1)) / & - real(ngrid(1) - 1, dp)) * & - cos(x_min(2) + real(i2 - 1, dp) * (x_max(2) - x_min(2)) / & - real(ngrid(2) - 1, dp)) * & - cos(x_min(3) + real(i3 - 1, dp) * (x_max(3) - x_min(3)) / & - real(ngrid(3) - 1, dp)) * & - (1.0d0 + 0.07d0 * real(iq - 1, dp)) + cos(x_min(1) + real(i1 - 1, dp)*(x_max(1) - x_min(1))/ & + real(ngrid(1) - 1, dp))* & + cos(x_min(2) + real(i2 - 1, dp)*(x_max(2) - x_min(2))/ & + real(ngrid(2) - 1, dp))* & + cos(x_min(3) + real(i3 - 1, dp)*(x_max(3) - x_min(3))/ & + real(ngrid(3) - 1, dp))* & + (1.0d0 + 0.07d0*real(iq - 1, dp)) end do end do end do @@ -163,11 +241,12 @@ subroutine test_3d_der2_mask() call construct_batch_splines_3d(x_min, x_max, y_grid, order, periodic, spl) do ip = 1, npts - x_eval(1, ip) = x_min(1) + (x_max(1) - x_min(1)) * real(ip - 1, dp) / real(npts, dp) - x_eval(2, ip) = x_min(2) + (x_max(2) - x_min(2)) * real(2 * ip - 1, dp) / & - real(2 * npts, dp) - x_eval(3, ip) = x_min(3) + (x_max(3) - x_min(3)) * real(3 * ip - 1, dp) / & - real(3 * npts, dp) + x_eval(1, ip) = x_min(1) + (x_max(1) - x_min(1))*real(ip - 1, & + dp)/real(npts, dp) + x_eval(2, ip) = x_min(2) + (x_max(2) - x_min(2))*real(2*ip - 1, dp)/ & + real(2*npts, dp) + x_eval(3, ip) = x_min(3) + (x_max(3) - x_min(3))*real(3*ip - 1, dp)/ & + real(3*npts, dp) mask(ip) = mod(ip, 3) /= 0 end do @@ -176,20 +255,23 @@ subroutine test_3d_der2_mask() y_mask = sentinel dy_mask = sentinel d2_mask = sentinel - call evaluate_batch_splines_3d_many_der2_mask(spl, x_eval, mask, y_mask, dy_mask, d2_mask) + call evaluate_batch_splines_3d_many_der2_mask(spl, x_eval, mask, y_mask, & + dy_mask, d2_mask) do ip = 1, npts if (mask(ip)) then call assert_close("3d_y", y_full(:, ip), y_mask(:, ip), tol) - call assert_close("3d_dy", reshape(dy_full(:, :, ip), [3 * nq]), & - reshape(dy_mask(:, :, ip), [3 * nq]), tol) - call assert_close("3d_d2", reshape(d2_full(:, :, ip), [6 * nq]), & - reshape(d2_mask(:, :, ip), [6 * nq]), tol) + call assert_close("3d_dy", reshape(dy_full(:, :, ip), [3*nq]), & + reshape(dy_mask(:, :, ip), [3*nq]), tol) + call assert_close("3d_d2", reshape(d2_full(:, :, ip), [6*nq]), & + reshape(d2_mask(:, :, ip), [6*nq]), tol) else call assert_unchanged("3d_y_sentinel", y_mask(:, ip), sentinel) - call assert_unchanged("3d_dy_sentinel", reshape(dy_mask(:, :, ip), [3 * nq]), & + call assert_unchanged("3d_dy_sentinel", reshape(dy_mask(:, :, & + ip), [3*nq]), & sentinel) - call assert_unchanged("3d_d2_sentinel", reshape(d2_mask(:, :, ip), [6 * nq]), & + call assert_unchanged("3d_d2_sentinel", reshape(d2_mask(:, :, & + ip), [6*nq]), & sentinel) end if end do