diff --git a/.gitignore b/.gitignore index b9749d00..88fd62a5 100644 --- a/.gitignore +++ b/.gitignore @@ -66,5 +66,6 @@ lib/ .venv dist/ test/**/*.png +bench_neo_bspline_*.png version.f90 Testing/Temporary/CTestCostData.txt diff --git a/src/interpolate/CMakeLists.txt b/src/interpolate/CMakeLists.txt index d6eb1f2c..9c8eefc1 100644 --- a/src/interpolate/CMakeLists.txt +++ b/src/interpolate/CMakeLists.txt @@ -1,6 +1,10 @@ add_library(interpolate STATIC ../spl_three_to_five.f90 acc_spline_build_order5.f90 + neo_bspline_base.f90 + neo_bspline_interp.f90 + neo_bspline_lsq.f90 + neo_bspline.f90 batch_interpolate_types.f90 batch_interpolate_1d.f90 batch_interpolate_2d.f90 diff --git a/src/interpolate/neo_bspline.f90 b/src/interpolate/neo_bspline.f90 new file mode 100644 index 00000000..5263f5a3 --- /dev/null +++ b/src/interpolate/neo_bspline.f90 @@ -0,0 +1,11 @@ +module neo_bspline + !! Umbrella module for B-spline functionality. + !! Re-exports types and procedures from submodules: + !! - neo_bspline_base: Core types, init, eval, basis functions + !! - neo_bspline_interp: Direct interpolation (collocation solve) + !! - neo_bspline_lsq: Least-squares fitting (CGLS solver) + use neo_bspline_base + use neo_bspline_interp + use neo_bspline_lsq + implicit none +end module neo_bspline diff --git a/src/interpolate/neo_bspline_base.f90 b/src/interpolate/neo_bspline_base.f90 new file mode 100644 index 00000000..02057c29 --- /dev/null +++ b/src/interpolate/neo_bspline_base.f90 @@ -0,0 +1,213 @@ +module neo_bspline_base + !! Core B-spline types and basis function evaluation. + !! No external dependencies beyond intrinsics. + use, intrinsic :: iso_fortran_env, only: dp => real64 + implicit none + private + + type, public :: bspline_1d + integer :: degree + integer :: n_ctrl + real(dp), allocatable :: knots(:) + real(dp) :: x_min, x_max + end type bspline_1d + + type, public :: bspline_2d + type(bspline_1d) :: sx, sy + end type bspline_2d + + type, public :: bspline_3d + type(bspline_1d) :: sx, sy, sz + end type bspline_3d + + public :: bspline_1d_init_uniform, bspline_2d_init_uniform, bspline_3d_init_uniform + public :: bspline_1d_eval, bspline_2d_eval, bspline_3d_eval + public :: find_span, basis_funs + +contains + + subroutine bspline_1d_init_uniform(spl, degree, n_ctrl, x_min, x_max) + type(bspline_1d), intent(out) :: spl + integer, intent(in) :: degree, n_ctrl + real(dp), intent(in) :: x_min, x_max + integer :: p, n, m, i + real(dp) :: h + + p = degree + n = n_ctrl + m = n + p + 1 + spl%degree = p + spl%n_ctrl = n + spl%x_min = x_min + spl%x_max = x_max + allocate(spl%knots(m)) + h = (x_max - x_min)/real(n - p, dp) + do i = 1, m + if (i <= p + 1) then + spl%knots(i) = x_min + else if (i >= m - p) then + spl%knots(i) = x_max + else + spl%knots(i) = x_min + real(i - (p + 1), dp)*h + end if + end do + end subroutine bspline_1d_init_uniform + + + subroutine bspline_2d_init_uniform(spl, degree, n_ctrl, x_min, x_max) + type(bspline_2d), intent(out) :: spl + integer, intent(in) :: degree(2), n_ctrl(2) + real(dp), intent(in) :: x_min(2), x_max(2) + + call bspline_1d_init_uniform(spl%sx, degree(1), n_ctrl(1), x_min(1), & + x_max(1)) + call bspline_1d_init_uniform(spl%sy, degree(2), n_ctrl(2), x_min(2), & + x_max(2)) + end subroutine bspline_2d_init_uniform + + + subroutine bspline_3d_init_uniform(spl, degree, n_ctrl, x_min, x_max) + type(bspline_3d), intent(out) :: spl + integer, intent(in) :: degree(3), n_ctrl(3) + real(dp), intent(in) :: x_min(3), x_max(3) + + call bspline_1d_init_uniform(spl%sx, degree(1), n_ctrl(1), x_min(1), & + x_max(1)) + call bspline_1d_init_uniform(spl%sy, degree(2), n_ctrl(2), x_min(2), & + x_max(2)) + call bspline_1d_init_uniform(spl%sz, degree(3), n_ctrl(3), x_min(3), & + x_max(3)) + end subroutine bspline_3d_init_uniform + + + subroutine find_span(spl, x, span) + type(bspline_1d), intent(in) :: spl + real(dp), intent(in) :: x + integer, intent(out) :: span + integer :: low, high, mid, p, n, m + real(dp) :: xx + + p = spl%degree + n = spl%n_ctrl + m = n + p + 1 + xx = min(max(x, spl%x_min), spl%x_max) + if (xx >= spl%knots(m - p)) then + span = n + return + end if + low = p + 1 + high = n + 1 + do + mid = (low + high)/2 + if (xx < spl%knots(mid)) then + high = mid + else if (xx >= spl%knots(mid + 1)) then + low = mid + else + span = mid + exit + end if + end do + end subroutine find_span + + + subroutine basis_funs(spl, span, x, N) + type(bspline_1d), intent(in) :: spl + integer, intent(in) :: span + real(dp), intent(in) :: x + real(dp), intent(out) :: N(0:) + integer :: p, j, r + real(dp) :: left(0:spl%degree), right(0:spl%degree), saved, temp + + p = spl%degree + N(0) = 1.0_dp + do j = 1, p + left(j) = x - spl%knots(span + 1 - j) + right(j) = spl%knots(span + j) - x + saved = 0.0_dp + do r = 0, j - 1 + temp = N(r)/(right(r + 1) + left(j - r)) + N(r) = saved + right(r + 1)*temp + saved = left(j - r)*temp + end do + N(j) = saved + end do + end subroutine basis_funs + + + subroutine bspline_1d_eval(spl, coeff, x, y) + type(bspline_1d), intent(in) :: spl + real(dp), intent(in) :: coeff(:) + real(dp), intent(in) :: x + real(dp), intent(out) :: y + integer :: span, j, p + real(dp) :: N(0:spl%degree) + + p = spl%degree + call find_span(spl, x, span) + call basis_funs(spl, span, x, N) + y = 0.0_dp + !$omp simd reduction(+:y) + do j = 0, p + y = y + N(j)*coeff(span - p + j) + end do + end subroutine bspline_1d_eval + + + subroutine bspline_2d_eval(spl, coeff, x, y) + type(bspline_2d), intent(in) :: spl + real(dp), intent(in) :: coeff(:,:) + real(dp), intent(in) :: x(2) + real(dp), intent(out) :: y + integer :: spanx, spany, px, py, a, b, ix, iy + real(dp) :: Nx(0:spl%sx%degree), Ny(0:spl%sy%degree) + + px = spl%sx%degree + py = spl%sy%degree + call find_span(spl%sx, x(1), spanx) + call basis_funs(spl%sx, spanx, x(1), Nx) + call find_span(spl%sy, x(2), spany) + call basis_funs(spl%sy, spany, x(2), Ny) + y = 0.0_dp + do a = 0, px + ix = spanx - px + a + do b = 0, py + iy = spany - py + b + y = y + Nx(a)*Ny(b)*coeff(ix, iy) + end do + end do + end subroutine bspline_2d_eval + + + subroutine bspline_3d_eval(spl, coeff, x, y) + type(bspline_3d), intent(in) :: spl + real(dp), intent(in) :: coeff(:,:,:) + real(dp), intent(in) :: x(3) + real(dp), intent(out) :: y + integer :: spanx, spany, spanz, px, py, pz, a, b, c, ix, iy, iz + real(dp) :: Nx(0:spl%sx%degree), Ny(0:spl%sy%degree) + real(dp) :: Nz(0:spl%sz%degree) + + px = spl%sx%degree + py = spl%sy%degree + pz = spl%sz%degree + call find_span(spl%sx, x(1), spanx) + call basis_funs(spl%sx, spanx, x(1), Nx) + call find_span(spl%sy, x(2), spany) + call basis_funs(spl%sy, spany, x(2), Ny) + call find_span(spl%sz, x(3), spanz) + call basis_funs(spl%sz, spanz, x(3), Nz) + y = 0.0_dp + do a = 0, px + ix = spanx - px + a + do b = 0, py + iy = spany - py + b + do c = 0, pz + iz = spanz - pz + c + y = y + Nx(a)*Ny(b)*Nz(c)*coeff(ix, iy, iz) + end do + end do + end do + end subroutine bspline_3d_eval + +end module neo_bspline_base diff --git a/src/interpolate/neo_bspline_interp.f90 b/src/interpolate/neo_bspline_interp.f90 new file mode 100644 index 00000000..9be28a60 --- /dev/null +++ b/src/interpolate/neo_bspline_interp.f90 @@ -0,0 +1,218 @@ +module neo_bspline_interp + !! Direct B-spline interpolation via collocation. + !! Solves the linear system A*c = f where A_ij = B_j(x_i). + !! Uses separable tensor-product structure for efficiency in 2D/3D. + use, intrinsic :: iso_fortran_env, only: dp => real64 + use neo_bspline_base, only: bspline_1d, bspline_2d, bspline_3d, & + find_span, basis_funs + implicit none + private + + interface + subroutine dgbsv(n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info) + import :: dp + integer :: n, kl, ku, nrhs, ldab, ldb, info + integer :: ipiv(*) + real(dp) :: ab(ldab, *), b(ldb, *) + end subroutine dgbsv + end interface + + public :: bspline_1d_interp, bspline_2d_interp, bspline_3d_interp + +contains + + subroutine bspline_1d_interp(spl, x_data, f_data, coeff) + !! Direct 1D B-spline interpolation: solve collocation system A*c = f. + type(bspline_1d), intent(in) :: spl + real(dp), intent(in) :: x_data(:) + real(dp), intent(in) :: f_data(:) + real(dp), intent(out) :: coeff(:) + + integer :: n, p, i, j, span, info, kl, ku, ldab + real(dp), allocatable :: ab(:,:), work_f(:), Nvals(:) + integer, allocatable :: ipiv(:) + + n = size(x_data) + p = spl%degree + kl = p + ku = p + ldab = 2*kl + ku + 1 + allocate(ab(ldab, n), work_f(n), ipiv(n), Nvals(0:p)) + ab = 0.0_dp + + do i = 1, n + call find_span(spl, x_data(i), span) + call basis_funs(spl, span, x_data(i), Nvals) + do j = 0, p + ab(kl + ku + 1 + j, span - p + j) = Nvals(j) + end do + end do + + work_f = f_data + call dgbsv(n, kl, ku, 1, ab, ldab, ipiv, work_f, n, info) + if (info /= 0) then + coeff = 0.0_dp + return + end if + coeff = work_f + end subroutine bspline_1d_interp + + + subroutine bspline_2d_interp(spl, x1, x2, f_grid, coeff) + !! Direct 2D B-spline interpolation using separable tensor product. + !! Solves dimension-by-dimension: first along x1, then along x2. + type(bspline_2d), intent(in) :: spl + real(dp), intent(in) :: x1(:), x2(:) + real(dp), intent(in) :: f_grid(:,:) + real(dp), intent(out) :: coeff(:,:) + + integer :: n1, n2, i, j, span, p1, p2, info, pmax + real(dp), allocatable :: A1(:,:), A2(:,:), temp(:,:), work(:), Nvals(:) + integer, allocatable :: ipiv(:) + + n1 = size(x1) + n2 = size(x2) + p1 = spl%sx%degree + p2 = spl%sy%degree + pmax = max(p1, p2) + allocate(A1(n1, n1), A2(n2, n2), temp(n1, n2), ipiv(max(n1, n2))) + allocate(work(max(n1, n2)), Nvals(0:pmax)) + + A1 = 0.0_dp + do i = 1, n1 + call find_span(spl%sx, x1(i), span) + call basis_funs(spl%sx, span, x1(i), Nvals(0:p1)) + do j = 0, p1 + A1(i, span - p1 + j) = Nvals(j) + end do + end do + + A2 = 0.0_dp + do i = 1, n2 + call find_span(spl%sy, x2(i), span) + call basis_funs(spl%sy, span, x2(i), Nvals(0:p2)) + do j = 0, p2 + A2(i, span - p2 + j) = Nvals(j) + end do + end do + + temp = f_grid + call dgetrf(n1, n1, A1, n1, ipiv, info) + if (info /= 0) then + coeff = 0.0_dp + return + end if + do j = 1, n2 + work(1:n1) = temp(:, j) + call dgetrs('N', n1, 1, A1, n1, ipiv, work, n1, info) + temp(:, j) = work(1:n1) + end do + + call dgetrf(n2, n2, A2, n2, ipiv, info) + if (info /= 0) then + coeff = 0.0_dp + return + end if + do i = 1, n1 + work(1:n2) = temp(i, :) + call dgetrs('N', n2, 1, A2, n2, ipiv, work, n2, info) + coeff(i, :) = work(1:n2) + end do + end subroutine bspline_2d_interp + + + subroutine bspline_3d_interp(spl, x1, x2, x3, f_grid, coeff) + !! Direct 3D B-spline interpolation using separable tensor product. + !! Solves dimension-by-dimension: x1 -> x2 -> x3. + type(bspline_3d), intent(in) :: spl + real(dp), intent(in) :: x1(:), x2(:), x3(:) + real(dp), intent(in) :: f_grid(:,:,:) + real(dp), intent(out) :: coeff(:,:,:) + + integer :: n1, n2, n3, i, j, k, span, p1, p2, p3, info, pmax, nmax + real(dp), allocatable :: A1(:,:), A2(:,:), A3(:,:), temp(:,:,:) + real(dp), allocatable :: Nvals(:) + integer, allocatable :: ipiv1(:), ipiv2(:), ipiv3(:) + + n1 = size(x1) + n2 = size(x2) + n3 = size(x3) + p1 = spl%sx%degree + p2 = spl%sy%degree + p3 = spl%sz%degree + pmax = max(p1, p2, p3) + nmax = max(n1, n2, n3) + allocate(A1(n1, n1), A2(n2, n2), A3(n3, n3)) + allocate(temp(n1, n2, n3), ipiv1(n1), ipiv2(n2), ipiv3(n3)) + allocate(Nvals(0:pmax)) + + A1 = 0.0_dp + do i = 1, n1 + call find_span(spl%sx, x1(i), span) + call basis_funs(spl%sx, span, x1(i), Nvals(0:p1)) + do j = 0, p1 + A1(i, span - p1 + j) = Nvals(j) + end do + end do + + A2 = 0.0_dp + do i = 1, n2 + call find_span(spl%sy, x2(i), span) + call basis_funs(spl%sy, span, x2(i), Nvals(0:p2)) + do j = 0, p2 + A2(i, span - p2 + j) = Nvals(j) + end do + end do + + A3 = 0.0_dp + do i = 1, n3 + call find_span(spl%sz, x3(i), span) + call basis_funs(spl%sz, span, x3(i), Nvals(0:p3)) + do j = 0, p3 + A3(i, span - p3 + j) = Nvals(j) + end do + end do + + temp = f_grid + + call dgetrf(n1, n1, A1, n1, ipiv1, info) + if (info /= 0) then + coeff = 0.0_dp + return + end if + call dgetrf(n2, n2, A2, n2, ipiv2, info) + if (info /= 0) then + coeff = 0.0_dp + return + end if + call dgetrf(n3, n3, A3, n3, ipiv3, info) + if (info /= 0) then + coeff = 0.0_dp + return + end if + + !$omp parallel do collapse(2) private(j, k) schedule(static) + do k = 1, n3 + do j = 1, n2 + call dgetrs('N', n1, 1, A1, n1, ipiv1, temp(:, j, k), n1, info) + end do + end do + + !$omp parallel do collapse(2) private(i, k) schedule(static) + do k = 1, n3 + do i = 1, n1 + call dgetrs('N', n2, 1, A2, n2, ipiv2, temp(i, :, k), n2, info) + end do + end do + + !$omp parallel do collapse(2) private(i, j) schedule(static) + do j = 1, n2 + do i = 1, n1 + call dgetrs('N', n3, 1, A3, n3, ipiv3, temp(i, j, :), n3, info) + end do + end do + + coeff = temp + end subroutine bspline_3d_interp + +end module neo_bspline_interp diff --git a/src/interpolate/neo_bspline_lsq.f90 b/src/interpolate/neo_bspline_lsq.f90 new file mode 100644 index 00000000..611a506d --- /dev/null +++ b/src/interpolate/neo_bspline_lsq.f90 @@ -0,0 +1,516 @@ +module neo_bspline_lsq + !! Least-squares B-spline fitting via matrix-free CGLS. + !! Exploits regular grid structure with separable basis caching. + use, intrinsic :: iso_fortran_env, only: dp => real64 + use neo_bspline_base, only: bspline_1d, bspline_2d, bspline_3d, & + find_span, basis_funs + implicit none + private + + type :: grid_cache_1d + integer :: ng1, px + integer, allocatable :: span1(:) + real(dp), allocatable :: B1(:,:) + end type grid_cache_1d + + type :: grid_cache_2d + integer :: ng1, ng2, px, py + integer, allocatable :: span1(:), span2(:) + real(dp), allocatable :: B1(:,:), B2(:,:) + end type grid_cache_2d + + type :: grid_cache_3d + integer :: ng1, ng2, ng3, px, py, pz + integer, allocatable :: span1(:), span2(:), span3(:) + real(dp), allocatable :: B1(:,:), B2(:,:), B3(:,:) + end type grid_cache_3d + + public :: bspline_1d_lsq_cgls, bspline_2d_lsq_cgls, bspline_3d_lsq_cgls + +contains + + subroutine precompute_grid_1d(spl, x1, cache) + type(bspline_1d), intent(in) :: spl + real(dp), intent(in) :: x1(:) + type(grid_cache_1d), intent(out) :: cache + integer :: i, n1, px + + n1 = size(x1) + px = spl%degree + cache%ng1 = n1 + cache%px = px + allocate(cache%span1(n1), cache%B1(0:px, n1)) + + !$omp parallel do simd private(i) schedule(simd:static) + do i = 1, n1 + call find_span(spl, x1(i), cache%span1(i)) + end do + + !$omp parallel do private(i) schedule(static) + do i = 1, n1 + call basis_funs(spl, cache%span1(i), x1(i), cache%B1(:, i)) + end do + end subroutine precompute_grid_1d + + + subroutine precompute_grid_2d(spl, x1, x2, cache) + type(bspline_2d), intent(in) :: spl + real(dp), intent(in) :: x1(:), x2(:) + type(grid_cache_2d), intent(out) :: cache + integer :: i, n1, n2, px, py + + n1 = size(x1) + n2 = size(x2) + px = spl%sx%degree + py = spl%sy%degree + cache%ng1 = n1 + cache%ng2 = n2 + cache%px = px + cache%py = py + allocate(cache%span1(n1), cache%span2(n2)) + allocate(cache%B1(0:px, n1), cache%B2(0:py, n2)) + + !$omp parallel do simd private(i) schedule(simd:static) + do i = 1, n1 + call find_span(spl%sx, x1(i), cache%span1(i)) + end do + !$omp parallel do private(i) schedule(static) + do i = 1, n1 + call basis_funs(spl%sx, cache%span1(i), x1(i), cache%B1(:, i)) + end do + + !$omp parallel do simd private(i) schedule(simd:static) + do i = 1, n2 + call find_span(spl%sy, x2(i), cache%span2(i)) + end do + !$omp parallel do private(i) schedule(static) + do i = 1, n2 + call basis_funs(spl%sy, cache%span2(i), x2(i), cache%B2(:, i)) + end do + end subroutine precompute_grid_2d + + + subroutine precompute_grid_3d(spl, x1, x2, x3, cache) + type(bspline_3d), intent(in) :: spl + real(dp), intent(in) :: x1(:), x2(:), x3(:) + type(grid_cache_3d), intent(out) :: cache + integer :: i, n1, n2, n3, px, py, pz + + n1 = size(x1) + n2 = size(x2) + n3 = size(x3) + px = spl%sx%degree + py = spl%sy%degree + pz = spl%sz%degree + cache%ng1 = n1 + cache%ng2 = n2 + cache%ng3 = n3 + cache%px = px + cache%py = py + cache%pz = pz + allocate(cache%span1(n1), cache%span2(n2), cache%span3(n3)) + allocate(cache%B1(0:px, n1), cache%B2(0:py, n2), cache%B3(0:pz, n3)) + + !$omp parallel do simd private(i) schedule(simd:static) + do i = 1, n1 + call find_span(spl%sx, x1(i), cache%span1(i)) + end do + !$omp parallel do private(i) schedule(static) + do i = 1, n1 + call basis_funs(spl%sx, cache%span1(i), x1(i), cache%B1(:, i)) + end do + + !$omp parallel do simd private(i) schedule(simd:static) + do i = 1, n2 + call find_span(spl%sy, x2(i), cache%span2(i)) + end do + !$omp parallel do private(i) schedule(static) + do i = 1, n2 + call basis_funs(spl%sy, cache%span2(i), x2(i), cache%B2(:, i)) + end do + + !$omp parallel do simd private(i) schedule(simd:static) + do i = 1, n3 + call find_span(spl%sz, x3(i), cache%span3(i)) + end do + !$omp parallel do private(i) schedule(static) + do i = 1, n3 + call basis_funs(spl%sz, cache%span3(i), x3(i), cache%B3(:, i)) + end do + end subroutine precompute_grid_3d + + + subroutine apply_A_1d(cache, coeff, f) + type(grid_cache_1d), intent(in) :: cache + real(dp), intent(in) :: coeff(:) + real(dp), intent(out) :: f(:) + integer :: i, a, ix, n1, px, s1 + + n1 = cache%ng1 + px = cache%px + !$omp parallel do simd private(i, a, ix, s1) schedule(simd:static) + do i = 1, n1 + s1 = cache%span1(i) + f(i) = 0.0_dp + do a = 0, px + ix = s1 - px + a + f(i) = f(i) + cache%B1(a, i)*coeff(ix) + end do + end do + end subroutine apply_A_1d + + + subroutine apply_AT_1d(cache, r, g, n_ctrl) + type(grid_cache_1d), intent(in) :: cache + real(dp), intent(in) :: r(:) + real(dp), intent(out) :: g(:) + integer, intent(in) :: n_ctrl + integer :: i, a, ix, n1, px, s1 + real(dp), allocatable :: g_local(:) + + n1 = cache%ng1 + px = cache%px + g = 0.0_dp + + !$omp parallel private(g_local, i, a, ix, s1) default(shared) + allocate(g_local(n_ctrl)) + g_local = 0.0_dp + !$omp do schedule(static) + do i = 1, n1 + s1 = cache%span1(i) + do a = 0, px + ix = s1 - px + a + g_local(ix) = g_local(ix) + cache%B1(a, i)*r(i) + end do + end do + !$omp end do + !$omp critical + g = g + g_local + !$omp end critical + deallocate(g_local) + !$omp end parallel + end subroutine apply_AT_1d + + + subroutine apply_A_2d(cache, coeff, f) + type(grid_cache_2d), intent(in) :: cache + real(dp), intent(in) :: coeff(:,:) + real(dp), intent(out) :: f(:,:) + integer :: i1, i2, a, b, ix, iy, n1, n2, px, py, s1, s2 + real(dp) :: val + + n1 = cache%ng1 + n2 = cache%ng2 + px = cache%px + py = cache%py + + !$omp parallel do collapse(2) private(i1, i2, a, b, ix, iy, s1, s2, val) & + !$omp& schedule(static) + do i2 = 1, n2 + do i1 = 1, n1 + s1 = cache%span1(i1) + s2 = cache%span2(i2) + val = 0.0_dp + do a = 0, px + ix = s1 - px + a + do b = 0, py + iy = s2 - py + b + val = val + cache%B1(a, i1)*cache%B2(b, i2)*coeff(ix, iy) + end do + end do + f(i1, i2) = val + end do + end do + end subroutine apply_A_2d + + + subroutine apply_AT_2d(cache, r, g, nx, ny) + type(grid_cache_2d), intent(in) :: cache + real(dp), intent(in) :: r(:,:) + real(dp), intent(out) :: g(:,:) + integer, intent(in) :: nx, ny + integer :: i1, i2, a, b, ix, iy, n1, n2, px, py, s1, s2 + real(dp), allocatable :: g_local(:,:) + + n1 = cache%ng1 + n2 = cache%ng2 + px = cache%px + py = cache%py + g = 0.0_dp + + !$omp parallel private(g_local, i1, i2, a, b, ix, iy, s1, s2) default(shared) + allocate(g_local(nx, ny)) + g_local = 0.0_dp + !$omp do collapse(2) schedule(static) + do i2 = 1, n2 + do i1 = 1, n1 + s1 = cache%span1(i1) + s2 = cache%span2(i2) + do a = 0, px + ix = s1 - px + a + do b = 0, py + iy = s2 - py + b + g_local(ix, iy) = g_local(ix, iy) + & + cache%B1(a, i1)*cache%B2(b, i2)*r(i1, i2) + end do + end do + end do + end do + !$omp end do + !$omp critical + g = g + g_local + !$omp end critical + deallocate(g_local) + !$omp end parallel + end subroutine apply_AT_2d + + + subroutine apply_A_3d(cache, coeff, f) + type(grid_cache_3d), intent(in) :: cache + real(dp), intent(in) :: coeff(:,:,:) + real(dp), intent(out) :: f(:,:,:) + integer :: i1, i2, i3, a, b, c, ix, iy, iz + integer :: n1, n2, n3, px, py, pz, s1, s2, s3 + real(dp) :: val, Nab + + n1 = cache%ng1 + n2 = cache%ng2 + n3 = cache%ng3 + px = cache%px + py = cache%py + pz = cache%pz + + !$omp parallel do collapse(3) & + !$omp& private(i1, i2, i3, a, b, c, ix, iy, iz, s1, s2, s3, val, Nab) & + !$omp& schedule(static) + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + s1 = cache%span1(i1) + s2 = cache%span2(i2) + s3 = cache%span3(i3) + val = 0.0_dp + do a = 0, px + ix = s1 - px + a + do b = 0, py + iy = s2 - py + b + Nab = cache%B1(a, i1)*cache%B2(b, i2) + !$omp simd reduction(+:val) + do c = 0, pz + iz = s3 - pz + c + val = val + Nab*cache%B3(c, i3)*coeff(ix, iy, iz) + end do + end do + end do + f(i1, i2, i3) = val + end do + end do + end do + end subroutine apply_A_3d + + + subroutine apply_AT_3d(cache, r, g, nx, ny, nz) + type(grid_cache_3d), intent(in) :: cache + real(dp), intent(in) :: r(:,:,:) + real(dp), intent(out) :: g(:,:,:) + integer, intent(in) :: nx, ny, nz + integer :: i1, i2, i3, a, b, c, ix, iy, iz + integer :: n1, n2, n3, px, py, pz, s1, s2, s3 + real(dp) :: ri, Nab + real(dp), allocatable :: g_local(:,:,:) + + n1 = cache%ng1 + n2 = cache%ng2 + n3 = cache%ng3 + px = cache%px + py = cache%py + pz = cache%pz + g = 0.0_dp + + !$omp parallel & + !$omp& private(g_local, i1, i2, i3, a, b, c, ix, iy, iz, s1, s2, s3, ri, Nab) & + !$omp& default(shared) + allocate(g_local(nx, ny, nz)) + g_local = 0.0_dp + !$omp do collapse(3) schedule(static) + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + s1 = cache%span1(i1) + s2 = cache%span2(i2) + s3 = cache%span3(i3) + ri = r(i1, i2, i3) + do a = 0, px + ix = s1 - px + a + do b = 0, py + iy = s2 - py + b + Nab = cache%B1(a, i1)*cache%B2(b, i2)*ri + do c = 0, pz + iz = s3 - pz + c + g_local(ix, iy, iz) = g_local(ix, iy, iz) + & + Nab*cache%B3(c, i3) + end do + end do + end do + end do + end do + end do + !$omp end do + !$omp critical + g = g + g_local + !$omp end critical + deallocate(g_local) + !$omp end parallel + end subroutine apply_AT_3d + + + subroutine bspline_1d_lsq_cgls(spl, x_data, f_data, coeff, max_iter, tol) + type(bspline_1d), intent(in) :: spl + real(dp), intent(in) :: x_data(:) + real(dp), intent(in) :: f_data(:) + real(dp), intent(inout) :: coeff(:) + integer, intent(in), optional :: max_iter + real(dp), intent(in), optional :: tol + + type(grid_cache_1d) :: cache + integer :: n_data, n_ctrl, k, kmax + real(dp) :: atol, gamma, gamma_new, alpha, beta, denom, rhs_norm + real(dp), allocatable :: r(:), q(:), s(:), p(:) + + n_data = size(x_data) + n_ctrl = spl%n_ctrl + kmax = 200 + atol = 1.0d-10 + if (present(max_iter)) kmax = max_iter + if (present(tol)) atol = tol + + call precompute_grid_1d(spl, x_data, cache) + + allocate(r(n_data), q(n_data), s(n_ctrl), p(n_ctrl)) + coeff = 0.0_dp + r = f_data + call apply_AT_1d(cache, r, s, n_ctrl) + p = s + gamma = dot_product(s, s) + rhs_norm = sqrt(dot_product(f_data, f_data)) + if (rhs_norm == 0.0_dp) return + + do k = 1, kmax + call apply_A_1d(cache, p, q) + denom = dot_product(q, q) + if (denom <= 0.0_dp) exit + alpha = gamma/denom + coeff = coeff + alpha*p + r = r - alpha*q + call apply_AT_1d(cache, r, s, n_ctrl) + gamma_new = dot_product(s, s) + if (gamma_new <= (atol*rhs_norm)**2) exit + beta = gamma_new/gamma + gamma = gamma_new + p = s + beta*p + end do + end subroutine bspline_1d_lsq_cgls + + + subroutine bspline_2d_lsq_cgls(spl, x1, x2, f_grid, coeff, max_iter, tol) + type(bspline_2d), intent(in) :: spl + real(dp), intent(in) :: x1(:), x2(:) + real(dp), intent(in) :: f_grid(:,:) + real(dp), intent(inout) :: coeff(:,:) + integer, intent(in), optional :: max_iter + real(dp), intent(in), optional :: tol + + type(grid_cache_2d) :: cache + integer :: n1, n2, nx, ny, k, kmax + real(dp) :: atol, gamma, gamma_new, alpha, beta, denom, rhs_norm + real(dp), allocatable :: r(:,:), q(:,:), s(:,:), p(:,:) + + n1 = size(x1) + n2 = size(x2) + nx = spl%sx%n_ctrl + ny = spl%sy%n_ctrl + kmax = 400 + atol = 1.0d-10 + if (present(max_iter)) kmax = max_iter + if (present(tol)) atol = tol + + call precompute_grid_2d(spl, x1, x2, cache) + + allocate(r(n1, n2), q(n1, n2), s(nx, ny), p(nx, ny)) + coeff = 0.0_dp + r = f_grid + call apply_AT_2d(cache, r, s, nx, ny) + p = s + gamma = sum(s*s) + rhs_norm = sqrt(sum(f_grid*f_grid)) + if (rhs_norm == 0.0_dp) return + + do k = 1, kmax + call apply_A_2d(cache, p, q) + denom = sum(q*q) + if (denom <= 0.0_dp) exit + alpha = gamma/denom + coeff = coeff + alpha*p + r = r - alpha*q + call apply_AT_2d(cache, r, s, nx, ny) + gamma_new = sum(s*s) + if (gamma_new <= (atol*rhs_norm)**2) exit + beta = gamma_new/gamma + gamma = gamma_new + p = s + beta*p + end do + end subroutine bspline_2d_lsq_cgls + + + subroutine bspline_3d_lsq_cgls(spl, x1, x2, x3, f_grid, coeff, max_iter, tol) + type(bspline_3d), intent(in) :: spl + real(dp), intent(in) :: x1(:), x2(:), x3(:) + real(dp), intent(in) :: f_grid(:,:,:) + real(dp), intent(inout) :: coeff(:,:,:) + integer, intent(in), optional :: max_iter + real(dp), intent(in), optional :: tol + + type(grid_cache_3d) :: cache + integer :: n1, n2, n3, nx, ny, nz, k, kmax + real(dp) :: atol, gamma, gamma_new, alpha, beta, denom, rhs_norm + real(dp), allocatable :: r(:,:,:), q(:,:,:), s(:,:,:), p(:,:,:) + + n1 = size(x1) + n2 = size(x2) + n3 = size(x3) + nx = spl%sx%n_ctrl + ny = spl%sy%n_ctrl + nz = spl%sz%n_ctrl + kmax = 400 + atol = 1.0d-10 + if (present(max_iter)) kmax = max_iter + if (present(tol)) atol = tol + + call precompute_grid_3d(spl, x1, x2, x3, cache) + + allocate(r(n1, n2, n3), q(n1, n2, n3), s(nx, ny, nz), p(nx, ny, nz)) + coeff = 0.0_dp + r = f_grid + call apply_AT_3d(cache, r, s, nx, ny, nz) + p = s + gamma = sum(s*s) + rhs_norm = sqrt(sum(f_grid*f_grid)) + if (rhs_norm == 0.0_dp) return + + do k = 1, kmax + call apply_A_3d(cache, p, q) + denom = sum(q*q) + if (denom <= 0.0_dp) exit + alpha = gamma/denom + coeff = coeff + alpha*p + r = r - alpha*q + call apply_AT_3d(cache, r, s, nx, ny, nz) + gamma_new = sum(s*s) + if (gamma_new <= (atol*rhs_norm)**2) exit + beta = gamma_new/gamma + gamma = gamma_new + p = s + beta*p + end do + end subroutine bspline_3d_lsq_cgls + +end module neo_bspline_lsq diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 32d82149..1eebd904 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -144,6 +144,12 @@ target_link_libraries(test_rng.x ${COMMON_LIBS}) add_executable(test_polylag_3.x source/test_polylag_3.f90) target_link_libraries(test_polylag_3.x ${COMMON_LIBS} ${MAGFIE_LIB}) +add_executable(test_neo_bspline_1d.x source/test_neo_bspline_1d.f90) +target_link_libraries(test_neo_bspline_1d.x ${COMMON_LIBS}) +add_executable(test_neo_bspline_2d.x source/test_neo_bspline_2d.f90) +target_link_libraries(test_neo_bspline_2d.x ${COMMON_LIBS}) +add_executable(test_neo_bspline_3d.x source/test_neo_bspline_3d.f90) +target_link_libraries(test_neo_bspline_3d.x ${COMMON_LIBS}) add_executable(test_interpolate.x source/test_interpolate.f90) target_link_libraries(test_interpolate.x ${COMMON_LIBS}) @@ -197,6 +203,9 @@ target_link_libraries(test_nctools_nc_get3.x ${COMMON_LIBS} ) +add_executable(bench_neo_bspline_vs_interpolate.x + source/bench_neo_bspline_vs_interpolate.f90) +target_link_libraries(bench_neo_bspline_vs_interpolate.x ${COMMON_LIBS}) ## Standard tests according to standard libneo Fortran test convention. @@ -213,6 +222,9 @@ add_test(NAME test_spl_three_to_five COMMAND test_spl_three_to_five.x) add_test(NAME test_species COMMAND test_species.x) add_test(NAME test_rng COMMAND test_rng.x) add_test(NAME test_polylag_3 COMMAND test_polylag_3.x) +add_test(NAME test_neo_bspline_1d COMMAND test_neo_bspline_1d.x) +add_test(NAME test_neo_bspline_2d COMMAND test_neo_bspline_2d.x) +add_test(NAME test_neo_bspline_3d COMMAND test_neo_bspline_3d.x) add_test(NAME test_interpolate COMMAND test_interpolate.x) add_test(NAME test_batch_interpolate COMMAND test_batch_interpolate.x) add_test(NAME test_batch_interpolate_many COMMAND test_batch_interpolate_many.x) diff --git a/test/scripts/plot_neo_bspline_1d.py b/test/scripts/plot_neo_bspline_1d.py new file mode 100644 index 00000000..4c8a9582 --- /dev/null +++ b/test/scripts/plot_neo_bspline_1d.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +import argparse +import pathlib + +import matplotlib.pyplot as plt +import numpy as np + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Plot 1D neo_bspline LSQ fit vs analytic reference." + ) + parser.add_argument( + "--data", + type=pathlib.Path, + default=pathlib.Path("bspline_1d_lsq.dat"), + help="Path to data file written by test_neo_bspline_1d.x", + ) + parser.add_argument( + "--output", + type=pathlib.Path, + default=pathlib.Path("bspline_1d_lsq.png"), + help="Output PNG file for the plot.", + ) + args = parser.parse_args() + + data = np.loadtxt(args.data) + x = data[:, 0] + f_true = data[:, 1] + f_fit = data[:, 2] + + fig, ax = plt.subplots(figsize=(6, 4)) + ax.plot(x, f_true, label="analytic", color="k", linewidth=1.5) + ax.plot(x, f_fit, label="neo_bspline fit", color="C0", linestyle="--") + ax.set_xlabel("x") + ax.set_ylabel("f(x)") + ax.set_title("neo_bspline 1D LSQ fit") + ax.legend() + fig.tight_layout() + fig.savefig(args.output, dpi=150) + + +if __name__ == "__main__": + main() + diff --git a/test/scripts/plot_neo_bspline_2d_circle.py b/test/scripts/plot_neo_bspline_2d_circle.py new file mode 100644 index 00000000..0c1d109b --- /dev/null +++ b/test/scripts/plot_neo_bspline_2d_circle.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +import argparse +import pathlib + +import matplotlib.pyplot as plt +import numpy as np + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Plot 2D neo_bspline LSQ fit vs analytic along a circle." + ) + parser.add_argument( + "--data", + type=pathlib.Path, + default=pathlib.Path("bspline_2d_lsq_circle.dat"), + help="Path to circle data file written by test_neo_bspline_2d.x", + ) + parser.add_argument( + "--output", + type=pathlib.Path, + default=pathlib.Path("bspline_2d_lsq_circle.png"), + help="Output PNG file.", + ) + args = parser.parse_args() + + data = np.loadtxt(args.data) + theta = data[:, 0] + f_true = data[:, 3] + f_fit = data[:, 4] + + fig, ax = plt.subplots(figsize=(6, 4)) + ax.plot(theta, f_true, label="analytic", color="k", linewidth=1.5) + ax.plot(theta, f_fit, label="neo_bspline fit", color="C1", linestyle="--") + ax.set_xlabel("theta") + ax.set_ylabel("f(theta)") + ax.set_title("neo_bspline 2D LSQ fit along circle") + ax.legend() + fig.tight_layout() + fig.savefig(args.output, dpi=150) + + +if __name__ == "__main__": + main() + diff --git a/test/scripts/plot_neo_bspline_2d_grid.py b/test/scripts/plot_neo_bspline_2d_grid.py new file mode 100644 index 00000000..bcaf4895 --- /dev/null +++ b/test/scripts/plot_neo_bspline_2d_grid.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +import argparse +import pathlib + +import matplotlib.pyplot as plt +import numpy as np + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Plot 2D neo_bspline LSQ fit error on full grid." + ) + parser.add_argument( + "--data", + type=pathlib.Path, + default=pathlib.Path("bspline_2d_lsq_grid.dat"), + help="Path to grid data file written by test_neo_bspline_2d.x", + ) + parser.add_argument( + "--output", + type=pathlib.Path, + default=pathlib.Path("bspline_2d_lsq_grid.png"), + help="Output PNG file.", + ) + args = parser.parse_args() + + data = np.loadtxt(args.data) + x = data[:, 0] + y = data[:, 1] + f_true = data[:, 2] + f_fit = data[:, 3] + err = f_fit - f_true + + fig, ax = plt.subplots(figsize=(6, 4)) + sc = ax.scatter(x, y, c=err, cmap="coolwarm", s=8, edgecolor="none") + ax.set_xlabel("x") + ax.set_ylabel("y") + ax.set_title("neo_bspline 2D LSQ full-grid error (fit - analytic)") + cbar = fig.colorbar(sc, ax=ax) + cbar.set_label("error") + fig.tight_layout() + fig.savefig(args.output, dpi=150) + + +if __name__ == "__main__": + main() + diff --git a/test/scripts/plot_neo_bspline_3d_slice.py b/test/scripts/plot_neo_bspline_3d_slice.py new file mode 100644 index 00000000..b7437d62 --- /dev/null +++ b/test/scripts/plot_neo_bspline_3d_slice.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +import argparse +import pathlib + +import matplotlib.pyplot as plt +import numpy as np + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Plot 3D neo_bspline LSQ fit error on a mid-plane slice.", + ) + parser.add_argument( + "--data", + type=pathlib.Path, + default=pathlib.Path("bspline_3d_lsq_grid.dat"), + help="Path to 3D grid data file written by test_neo_bspline_3d.x", + ) + parser.add_argument( + "--output", + type=pathlib.Path, + default=pathlib.Path("bspline_3d_lsq_slice.png"), + help="Output PNG file.", + ) + args = parser.parse_args() + + data = np.loadtxt(args.data) + x = data[:, 0] + y = data[:, 1] + z = data[:, 2] + f_true = data[:, 3] + f_fit = data[:, 4] + err = f_fit - f_true + + z_levels = np.unique(z) + z_mid = z_levels[len(z_levels) // 2] + mask = np.isclose(z, z_mid) + + fig, ax = plt.subplots(figsize=(6, 4)) + sc = ax.scatter(x[mask], y[mask], c=err[mask], cmap="coolwarm", s=8, edgecolor="none") + ax.set_xlabel("x") + ax.set_ylabel("y") + ax.set_title(f"neo_bspline 3D LSQ mid-plane error (z = {z_mid:.3f})") + cbar = fig.colorbar(sc, ax=ax) + cbar.set_label("error") + fig.tight_layout() + fig.savefig(args.output, dpi=150) + + +if __name__ == "__main__": + main() + diff --git a/test/scripts/plot_neo_bspline_benchmarks.py b/test/scripts/plot_neo_bspline_benchmarks.py new file mode 100644 index 00000000..b1e9b568 --- /dev/null +++ b/test/scripts/plot_neo_bspline_benchmarks.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +import argparse +import pathlib + +import matplotlib.pyplot as plt +import numpy as np + + +def load_bench( + path: pathlib.Path, +) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + data = np.loadtxt(path) + n = data[:, 0] + t_create_interp = data[:, 1] + t_eval_interp = data[:, 2] + t_create_bs = data[:, 3] + t_eval_bs = data[:, 4] + t_create_dir = data[:, 5] + t_eval_dir = data[:, 6] + return n, t_create_interp, t_eval_interp, t_create_bs, t_eval_bs, t_create_dir, t_eval_dir + + +def plot_dim_create( + path: pathlib.Path, + title: str, + output: pathlib.Path, +) -> None: + n, t_ci, _, t_cb, _, t_cd, _ = load_bench(path) + + # ignore skipped cases (-1) for direct interpolation + mask_dir = t_cd > 0 + + fig, ax = plt.subplots(figsize=(6, 4)) + ax.loglog(n, t_ci, "o-", label="interp create") + ax.loglog(n, t_cb, "s-", label="neo_bspline create") + if np.any(mask_dir): + ax.loglog(n[mask_dir], t_cd[mask_dir], "^-", label="neo_bspline direct create") + + ax.set_xlabel("number of data points") + ax.set_ylabel("time [s]") + ax.set_title(title) + ax.legend() + ax.grid(True, which="both", ls=":") + + fig.tight_layout() + fig.savefig(output, dpi=150) + + +def plot_dim_eval( + path: pathlib.Path, + title: str, + output: pathlib.Path, +) -> None: + n, _, t_ei, _, t_eb, _, t_ed = load_bench(path) + + mask_dir = t_ed > 0 + + fig, ax = plt.subplots(figsize=(6, 4)) + ax.loglog(n, t_ei, "o--", label="interp eval") + ax.loglog(n, t_eb, "s--", label="neo_bspline eval") + if np.any(mask_dir): + ax.loglog(n[mask_dir], t_ed[mask_dir], "^--", label="neo_bspline direct eval") + + ax.set_xlabel("number of data points") + ax.set_ylabel("time [s]") + ax.set_title(title) + ax.legend() + ax.grid(True, which="both", ls=":") + + fig.tight_layout() + fig.savefig(output, dpi=150) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Plot runtime benchmarks: interpolate vs neo_bspline.", + ) + parser.add_argument( + "--data-dir", + type=pathlib.Path, + default=pathlib.Path("."), + help="Directory with bench_neo_bspline_*.dat files.", + ) + parser.add_argument( + "--output-dir", + type=pathlib.Path, + default=pathlib.Path("."), + help="Directory for output PNG files.", + ) + args = parser.parse_args() + + args.output_dir.mkdir(parents=True, exist_ok=True) + + plot_dim_create( + args.data_dir / "bench_neo_bspline_1d.dat", + "1D create: interpolate vs neo_bspline", + args.output_dir / "bench_neo_bspline_1d_create.png", + ) + plot_dim_eval( + args.data_dir / "bench_neo_bspline_1d.dat", + "1D eval: interpolate vs neo_bspline", + args.output_dir / "bench_neo_bspline_1d_eval.png", + ) + plot_dim_create( + args.data_dir / "bench_neo_bspline_2d.dat", + "2D create: interpolate vs neo_bspline", + args.output_dir / "bench_neo_bspline_2d_create.png", + ) + plot_dim_eval( + args.data_dir / "bench_neo_bspline_2d.dat", + "2D eval: interpolate vs neo_bspline", + args.output_dir / "bench_neo_bspline_2d_eval.png", + ) + plot_dim_create( + args.data_dir / "bench_neo_bspline_3d.dat", + "3D create: interpolate vs neo_bspline", + args.output_dir / "bench_neo_bspline_3d_create.png", + ) + plot_dim_eval( + args.data_dir / "bench_neo_bspline_3d.dat", + "3D eval: interpolate vs neo_bspline", + args.output_dir / "bench_neo_bspline_3d_eval.png", + ) + + +if __name__ == "__main__": + main() diff --git a/test/source/bench_neo_bspline_vs_interpolate.f90 b/test/source/bench_neo_bspline_vs_interpolate.f90 new file mode 100644 index 00000000..9db7407f --- /dev/null +++ b/test/source/bench_neo_bspline_vs_interpolate.f90 @@ -0,0 +1,421 @@ +program bench_neo_bspline_vs_interpolate + use, intrinsic :: iso_fortran_env, only: dp_bench => real64 + use, intrinsic :: iso_c_binding, only: c_int, c_long + use interpolate + use neo_bspline + + implicit none + + integer, parameter :: N_CASE = 9 + integer, parameter :: N_TOTAL_LIST(N_CASE) = & + [10, 30, 100, 300, 1000, 3000, 10000, 30000, 100000] + real(dp_bench), parameter :: PI_BENCH = acos(-1.0_dp_bench) + real(dp_bench), parameter :: TWOPI_BENCH = 2.0_dp_bench*PI_BENCH + + type, bind(C) :: timespec + integer(c_long) :: tv_sec + integer(c_long) :: tv_nsec + end type timespec + +!> clock id for clock_gettime; macOS uses 6 for CLOCK_MONOTONIC, Linux uses 1. +#ifdef __APPLE__ + integer(c_int), parameter :: CLOCK_MONOTONIC = 6_c_int +#else + integer(c_int), parameter :: CLOCK_MONOTONIC = 1_c_int +#endif + + interface + function c_clock_gettime(clock_id, tp) bind(C, name="clock_gettime") + import :: c_int, timespec + integer(c_int), value :: clock_id + type(timespec) :: tp + integer(c_int) :: c_clock_gettime + end function c_clock_gettime + end interface + + call run_bench_1d() + call run_bench_2d() + call run_bench_3d() + +contains + + subroutine bench_time_now(t) + real(dp_bench), intent(out) :: t + type(timespec) :: ts + integer(c_int) :: ierr + + ierr = c_clock_gettime(CLOCK_MONOTONIC, ts) + if (ierr == 0_c_int) then + t = real(ts%tv_sec, dp_bench) & + + real(ts%tv_nsec, dp_bench)*1.0e-9_dp_bench + else + ! Fallback for platforms where CLOCK_MONOTONIC is unavailable. + call cpu_time(t) + end if + end subroutine bench_time_now + + subroutine run_bench_1d() + type(SplineData1D) :: spl_interp + type(bspline_1d) :: spl_bs, spl_bs_dir + integer :: i, k, n, degree_bs + real(dp_bench) :: t0, t1 + real(dp_bench) :: t_create_interp, t_eval_interp + real(dp_bench) :: t_create_lsq, t_eval_lsq + real(dp_bench) :: t_create_dir, t_eval_dir + real(dp_bench), allocatable :: x(:), f(:), coeff_lsq(:), coeff_dir(:) + real(dp_bench) :: x_min, x_max, y + integer :: unit, n_ctrl_lsq + + x_min = 1.23_dp_bench + x_max = x_min + TWOPI_BENCH + + open(newunit=unit, file="bench_neo_bspline_1d.dat", & + status="replace", action="write", form="formatted") + + do k = 1, N_CASE + n = N_TOTAL_LIST(k) + if (n < 8) cycle + + allocate(x(n), f(n)) + do i = 1, n + x(i) = x_min + (x_max - x_min)*real(i - 1, dp_bench) & + /real(n - 1, dp_bench) + f(i) = cos(2.0_dp_bench*x(i)) + 0.5_dp_bench*sin(3.0_dp_bench*x(i)) + end do + + ! 1) interpolate module: construct + evaluate + call bench_time_now(t0) + call construct_splines_1d(x_min, x_max, f, 5, .false., spl_interp) + call bench_time_now(t1) + t_create_interp = t1 - t0 + + call bench_time_now(t0) + do i = 1, n + call evaluate_splines_1d(spl_interp, x(i), y) + end do + call bench_time_now(t1) + t_eval_interp = t1 - t0 + + ! 2) neo_bspline LSQ: construct + evaluate + degree_bs = 5 + n_ctrl_lsq = min(40, n/2) + n_ctrl_lsq = max(degree_bs + 1, n_ctrl_lsq) + + allocate(coeff_lsq(n_ctrl_lsq)) + coeff_lsq = 0.0_dp_bench + + call bench_time_now(t0) + call bspline_1d_init_uniform(spl_bs, degree_bs, n_ctrl_lsq, x_min, & + x_max) + call bspline_1d_lsq_cgls(spl_bs, x, f, coeff_lsq, max_iter=400, & + tol=1.0d-10) + call bench_time_now(t1) + t_create_lsq = t1 - t0 + + call bench_time_now(t0) + do i = 1, n + call bspline_1d_eval(spl_bs, coeff_lsq, x(i), y) + end do + call bench_time_now(t1) + t_eval_lsq = t1 - t0 + + ! 3) neo_bspline direct interp: construct + evaluate (skip large N) + allocate(coeff_dir(n)) + + call bench_time_now(t0) + call bspline_1d_init_uniform(spl_bs_dir, degree_bs, n, x_min, x_max) + call bspline_1d_interp(spl_bs_dir, x, f, coeff_dir) + call bench_time_now(t1) + t_create_dir = t1 - t0 + + call bench_time_now(t0) + do i = 1, n + call bspline_1d_eval(spl_bs_dir, coeff_dir, x(i), y) + end do + call bench_time_now(t1) + t_eval_dir = t1 - t0 + + deallocate(coeff_dir) + + write(unit,'(i10,6es16.8)') n, t_create_interp, t_eval_interp, & + t_create_lsq, t_eval_lsq, t_create_dir, t_eval_dir + + deallocate(x, f, coeff_lsq) + call destroy_splines_1d(spl_interp) + end do + + close(unit) + end subroutine run_bench_1d + + + subroutine run_bench_2d() + type(SplineData2D) :: spl_interp + type(bspline_2d) :: spl_bs, spl_bs_dir + integer :: i1, i2, k + integer :: n_total, n_side, n1, n2 + integer :: degree(2), n_ctrl_lsq(2) + real(dp_bench) :: t0, t1 + real(dp_bench) :: t_create_interp, t_eval_interp + real(dp_bench) :: t_create_lsq, t_eval_lsq + real(dp_bench) :: t_create_dir, t_eval_dir + real(dp_bench), allocatable :: x1(:), x2(:) + real(dp_bench), allocatable :: f_grid(:,:) + real(dp_bench), allocatable :: coeff_lsq(:,:), coeff_dir(:,:) + real(dp_bench) :: x_min(2), x_max(2) + real(dp_bench) :: x(2), y + integer :: unit, n_data + + x_min = [1.23_dp_bench, 1.23_dp_bench] + x_max = x_min + [TWOPI_BENCH, TWOPI_BENCH] + + open(newunit=unit, file="bench_neo_bspline_2d.dat", & + status="replace", action="write", form="formatted") + + degree = [3, 3] + + do k = 1, N_CASE + n_total = N_TOTAL_LIST(k) + n_side = int(sqrt(real(n_total, dp_bench))) + if (n_side < degree(1) + 2) n_side = degree(1) + 2 + + n1 = n_side + n2 = n_side + n_data = n1*n2 + + allocate(x1(n1), x2(n2), f_grid(n1, n2)) + + do i1 = 1, n1 + x1(i1) = x_min(1) + (x_max(1) - x_min(1))* & + real(i1 - 1, dp_bench)/real(n1 - 1, dp_bench) + end do + do i2 = 1, n2 + x2(i2) = x_min(2) + (x_max(2) - x_min(2))* & + real(i2 - 1, dp_bench)/real(n2 - 1, dp_bench) + end do + + do i2 = 1, n2 + do i1 = 1, n1 + f_grid(i1, i2) = cos(x1(i1))*cos(2.0_dp_bench*x2(i2)) + end do + end do + + ! 1) interpolate module: construct + evaluate + call bench_time_now(t0) + call construct_splines_2d(x_min, x_max, f_grid, [5, 5], & + [.false., .false.], spl_interp) + call bench_time_now(t1) + t_create_interp = t1 - t0 + + call bench_time_now(t0) + do i2 = 1, n2 + do i1 = 1, n1 + x = [x1(i1), x2(i2)] + call evaluate_splines_2d(spl_interp, x, y) + end do + end do + call bench_time_now(t1) + t_eval_interp = t1 - t0 + + ! 2) neo_bspline LSQ: construct + evaluate + n_ctrl_lsq(1) = min(16, n1/2) + n_ctrl_lsq(2) = min(16, n2/2) + n_ctrl_lsq(1) = max(degree(1) + 1, n_ctrl_lsq(1)) + n_ctrl_lsq(2) = max(degree(2) + 1, n_ctrl_lsq(2)) + + allocate(coeff_lsq(n_ctrl_lsq(1), n_ctrl_lsq(2))) + coeff_lsq = 0.0_dp_bench + + call bench_time_now(t0) + call bspline_2d_init_uniform(spl_bs, degree, n_ctrl_lsq, x_min, x_max) + call bspline_2d_lsq_cgls(spl_bs, x1, x2, f_grid, coeff_lsq, & + max_iter=800, tol=1.0d-10) + call bench_time_now(t1) + t_create_lsq = t1 - t0 + + call bench_time_now(t0) + do i2 = 1, n2 + do i1 = 1, n1 + x = [x1(i1), x2(i2)] + call bspline_2d_eval(spl_bs, coeff_lsq, x, y) + end do + end do + call bench_time_now(t1) + t_eval_lsq = t1 - t0 + + ! 3) neo_bspline direct interp: construct + evaluate (skip large N) + allocate(coeff_dir(n1, n2)) + + call bench_time_now(t0) + call bspline_2d_init_uniform(spl_bs_dir, degree, [n1, n2], x_min, & + x_max) + call bspline_2d_interp(spl_bs_dir, x1, x2, f_grid, coeff_dir) + call bench_time_now(t1) + t_create_dir = t1 - t0 + + call bench_time_now(t0) + do i2 = 1, n2 + do i1 = 1, n1 + x = [x1(i1), x2(i2)] + call bspline_2d_eval(spl_bs_dir, coeff_dir, x, y) + end do + end do + call bench_time_now(t1) + t_eval_dir = t1 - t0 + + deallocate(coeff_dir) + + write(unit,'(i10,6es16.8)') n_data, t_create_interp, t_eval_interp, & + t_create_lsq, t_eval_lsq, t_create_dir, t_eval_dir + + deallocate(x1, x2, f_grid, coeff_lsq) + call destroy_splines_2d(spl_interp) + end do + + close(unit) + end subroutine run_bench_2d + + + subroutine run_bench_3d() + type(SplineData3D) :: spl_interp + type(bspline_3d) :: spl_bs, spl_bs_dir + integer :: i1, i2, i3, k + integer :: n_total, n_side, n1, n2, n3 + integer :: degree(3), n_ctrl_lsq(3) + real(dp_bench) :: t0, t1 + real(dp_bench) :: t_create_interp, t_eval_interp + real(dp_bench) :: t_create_lsq, t_eval_lsq + real(dp_bench) :: t_create_dir, t_eval_dir + real(dp_bench), allocatable :: x1(:), x2(:), x3(:) + real(dp_bench), allocatable :: f3d(:,:,:) + real(dp_bench), allocatable :: coeff_lsq(:,:,:), coeff_dir(:,:,:) + real(dp_bench) :: x_min(3), x_max(3) + real(dp_bench) :: x(3), y + integer :: unit, n_data + + x_min = [1.23_dp_bench, 1.23_dp_bench, 1.23_dp_bench] + x_max = x_min + [TWOPI_BENCH, TWOPI_BENCH, TWOPI_BENCH] + + open(newunit=unit, file="bench_neo_bspline_3d.dat", & + status="replace", action="write", form="formatted") + + degree = [3, 3, 3] + + do k = 1, N_CASE + n_total = N_TOTAL_LIST(k) + n_side = int(real(n_total, dp_bench)**(1.0_dp_bench/3.0_dp_bench)) + if (n_side < degree(1) + 2) n_side = degree(1) + 2 + + n1 = n_side + n2 = n_side + n3 = n_side + n_data = n1*n2*n3 + + allocate(x1(n1), x2(n2), x3(n3)) + allocate(f3d(n1, n2, n3)) + + do i1 = 1, n1 + x1(i1) = x_min(1) + (x_max(1) - x_min(1))* & + real(i1 - 1, dp_bench)/real(n1 - 1, dp_bench) + end do + do i2 = 1, n2 + x2(i2) = x_min(2) + (x_max(2) - x_min(2))* & + real(i2 - 1, dp_bench)/real(n2 - 1, dp_bench) + end do + do i3 = 1, n3 + x3(i3) = x_min(3) + (x_max(3) - x_min(3))* & + real(i3 - 1, dp_bench)/real(n3 - 1, dp_bench) + end do + + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + f3d(i1, i2, i3) = cos(x1(i1))*cos(2.0_dp_bench*x2(i2)) & + *cos(3.0_dp_bench*x3(i3)) + end do + end do + end do + + ! 1) interpolate module: construct + evaluate + call bench_time_now(t0) + call construct_splines_3d(x_min, x_max, f3d, [5, 5, 5], & + [.false., .false., .false.], spl_interp) + call bench_time_now(t1) + t_create_interp = t1 - t0 + + call bench_time_now(t0) + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + x = [x1(i1), x2(i2), x3(i3)] + call evaluate_splines_3d(spl_interp, x, y) + end do + end do + end do + call bench_time_now(t1) + t_eval_interp = t1 - t0 + + ! 2) neo_bspline LSQ: construct + evaluate + n_ctrl_lsq(1) = min(16, n1/2) + n_ctrl_lsq(2) = min(16, n2/2) + n_ctrl_lsq(3) = min(16, n3/2) + n_ctrl_lsq(1) = max(degree(1) + 1, n_ctrl_lsq(1)) + n_ctrl_lsq(2) = max(degree(2) + 1, n_ctrl_lsq(2)) + n_ctrl_lsq(3) = max(degree(3) + 1, n_ctrl_lsq(3)) + + allocate(coeff_lsq(n_ctrl_lsq(1), n_ctrl_lsq(2), n_ctrl_lsq(3))) + coeff_lsq = 0.0_dp_bench + + call bench_time_now(t0) + call bspline_3d_init_uniform(spl_bs, degree, n_ctrl_lsq, x_min, x_max) + call bspline_3d_lsq_cgls(spl_bs, x1, x2, x3, f3d, coeff_lsq, & + max_iter=800, tol=1.0d-10) + call bench_time_now(t1) + t_create_lsq = t1 - t0 + + call bench_time_now(t0) + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + x = [x1(i1), x2(i2), x3(i3)] + call bspline_3d_eval(spl_bs, coeff_lsq, x, y) + end do + end do + end do + call bench_time_now(t1) + t_eval_lsq = t1 - t0 + + ! 3) neo_bspline direct interp: construct + evaluate (skip large N) + allocate(coeff_dir(n1, n2, n3)) + + call bench_time_now(t0) + call bspline_3d_init_uniform(spl_bs_dir, degree, [n1, n2, n3], & + x_min, x_max) + call bspline_3d_interp(spl_bs_dir, x1, x2, x3, f3d, coeff_dir) + call bench_time_now(t1) + t_create_dir = t1 - t0 + + call bench_time_now(t0) + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + x = [x1(i1), x2(i2), x3(i3)] + call bspline_3d_eval(spl_bs_dir, coeff_dir, x, y) + end do + end do + end do + call bench_time_now(t1) + t_eval_dir = t1 - t0 + + deallocate(coeff_dir) + + write(unit,'(i10,6es16.8)') n_data, t_create_interp, t_eval_interp, & + t_create_lsq, t_eval_lsq, t_create_dir, t_eval_dir + + deallocate(x1, x2, x3, f3d, coeff_lsq) + if (n_data <= 10000) call destroy_splines_3d(spl_interp) + end do + + close(unit) + end subroutine run_bench_3d + +end program bench_neo_bspline_vs_interpolate diff --git a/test/source/test_neo_bspline_1d.f90 b/test/source/test_neo_bspline_1d.f90 new file mode 100644 index 00000000..dab6895a --- /dev/null +++ b/test/source/test_neo_bspline_1d.f90 @@ -0,0 +1,180 @@ +program test_neo_bspline_1d + use libneo_kinds, only : dp + use math_constants + use libneo_util, only : linspace + use neo_bspline + + implicit none + + real(dp), parameter :: TOL_L2 = 5.0d-7 + real(dp), parameter :: X_MIN = 1.23d0 + real(dp), parameter :: X_MAX = TWOPI + 1.23d0 + + call test_bspline_1d_partition_unity() + call test_bspline_1d_lsq_cos() + call test_bspline_1d_lsq_batch() + +contains + + subroutine test_bspline_1d_partition_unity() + type(bspline_1d) :: spl + integer, parameter :: DEGREE = 3 + integer, parameter :: N_CTRL = 20 + integer, parameter :: N_SAMPLE = 200 + + real(dp) :: x_eval(N_SAMPLE) + real(dp) :: sum_basis, x, temp + real(dp) :: max_err + real(dp) :: c(N_CTRL) + integer :: i, j + + print *, "Testing neo_bspline 1D partition of unity" + + call bspline_1d_init_uniform(spl, DEGREE, N_CTRL, X_MIN, X_MAX) + call linspace(X_MIN, X_MAX, N_SAMPLE, x_eval) + + max_err = 0.0d0 + do i = 1, N_SAMPLE + x = x_eval(i) + sum_basis = 0.0d0 + do j = 1, N_CTRL + c = 0.0d0 + c(j) = 1.0d0 + call bspline_1d_eval(spl, c, x, temp) + sum_basis = sum_basis + temp + end do + max_err = max(max_err, abs(sum_basis - 1.0d0)) + end do + + print *, " max |sum_j N_j(x) - 1| =", max_err + + if (max_err > 1.0d-10) then + error stop "neo_bspline 1D partition of unity violated" + end if + + end subroutine test_bspline_1d_partition_unity + + + subroutine test_bspline_1d_lsq_cos() + type(bspline_1d) :: spl + integer, parameter :: DEGREE = 5 + integer, parameter :: N_CTRL = 60 + integer, parameter :: N_DATA = 400 + integer, parameter :: N_PLOT = 201 + + real(dp) :: x_data(N_DATA), f_data(N_DATA) + real(dp) :: coeff(N_CTRL) + real(dp) :: x_plot(N_PLOT), f_true(N_PLOT), f_fit(N_PLOT) + real(dp) :: err2 + integer :: i + + print *, "Testing neo_bspline 1D LSQ CGLS (cos)" + + call bspline_1d_init_uniform(spl, DEGREE, N_CTRL, X_MIN, X_MAX) + + ! Scattered data in [X_MIN, X_MAX] + do i = 1, N_DATA + x_data(i) = X_MIN + (X_MAX - X_MIN) * real(i, dp) / real(N_DATA + 1, dp) + x_data(i) = x_data(i) + 0.1d0 * sin(3.0d0*real(i, dp)) + if (x_data(i) < X_MIN) x_data(i) = X_MIN + if (x_data(i) > X_MAX) x_data(i) = X_MAX + f_data(i) = cos(2.0d0*x_data(i)) + 0.5d0*sin(3.0d0*x_data(i)) + end do + + coeff = 0.0_dp + call bspline_1d_lsq_cgls(spl, x_data, f_data, coeff, max_iter=400, tol=1.0d-10) + + call linspace(X_MIN, X_MAX, N_PLOT, x_plot) + do i = 1, N_PLOT + f_true(i) = cos(2.0d0*x_plot(i)) + 0.5d0*sin(3.0d0*x_plot(i)) + call bspline_1d_eval(spl, coeff, x_plot(i), f_fit(i)) + end do + + err2 = sum((f_fit - f_true)**2)/real(N_PLOT, dp) + print *, " LSQ L2 error =", sqrt(err2) + + if (sqrt(err2) > TOL_L2) then + error stop "neo_bspline 1D LSQ error too large" + end if + + call write_plot_data("bspline_1d_lsq.dat", N_PLOT, x_plot, f_true, f_fit) + + end subroutine test_bspline_1d_lsq_cos + + + subroutine test_bspline_1d_lsq_batch() + type(bspline_1d) :: spl + integer, parameter :: DEGREE = 5 + integer, parameter :: N_CTRL = 60 + integer, parameter :: N_DATA = 400 + integer, parameter :: N_PLOT = 201 + integer, parameter :: N_RHS = 2 + + real(dp) :: x_data(N_DATA) + real(dp) :: f_data(N_DATA) + real(dp) :: coeff(N_CTRL, N_RHS) + real(dp) :: x_plot(N_PLOT) + real(dp) :: f_true(N_PLOT, N_RHS) + real(dp) :: f_fit(N_PLOT, N_RHS) + real(dp) :: err2(N_RHS) + integer :: i, k + + print *, "Testing neo_bspline 1D LSQ CGLS (batched)" + + call bspline_1d_init_uniform(spl, DEGREE, N_CTRL, X_MIN, X_MAX) + + do i = 1, N_DATA + x_data(i) = X_MIN + (X_MAX - X_MIN)*real(i, dp)/real(N_DATA + 1, dp) + x_data(i) = x_data(i) + 0.1d0*sin(3.0d0*real(i, dp)) + if (x_data(i) < X_MIN) x_data(i) = X_MIN + if (x_data(i) > X_MAX) x_data(i) = X_MAX + end do + + do k = 1, N_RHS + do i = 1, N_DATA + if (k == 1) then + f_data(i) = cos(2.0d0*x_data(i)) + 0.5d0*sin(3.0d0*x_data(i)) + else + f_data(i) = sin(1.5d0*x_data(i)) + 0.3d0*cos(4.0d0*x_data(i)) + end if + end do + coeff(:, k) = 0.0_dp + call bspline_1d_lsq_cgls(spl, x_data, f_data, coeff(:, k), & + max_iter=400, tol=1.0d-10) + end do + + call linspace(X_MIN, X_MAX, N_PLOT, x_plot) + do i = 1, N_PLOT + f_true(i, 1) = cos(2.0d0*x_plot(i)) + 0.5d0*sin(3.0d0*x_plot(i)) + f_true(i, 2) = sin(1.5d0*x_plot(i)) + 0.3d0*cos(4.0d0*x_plot(i)) + do k = 1, N_RHS + call bspline_1d_eval(spl, coeff(:, k), x_plot(i), f_fit(i, k)) + end do + end do + + do k = 1, N_RHS + err2(k) = sum((f_fit(:, k) - f_true(:, k))**2)/real(N_PLOT, dp) + print *, " LSQ L2 error (rhs =", k, ") =", sqrt(err2(k)) + if (sqrt(err2(k)) > TOL_L2) then + error stop "neo_bspline 1D batched LSQ error too large" + end if + end do + + end subroutine test_bspline_1d_lsq_batch + + + subroutine write_plot_data(fname, n, x, f_true, f_fit) + character(*), intent(in) :: fname + integer, intent(in) :: n + real(dp), intent(in) :: x(n), f_true(n), f_fit(n) + integer :: iunit, i + + open(newunit=iunit, file=fname, status="replace", action="write", & + form="formatted") + do i = 1, n + write(iunit,'(3es24.16)') x(i), f_true(i), f_fit(i) + end do + close(iunit) + end subroutine write_plot_data + +end program test_neo_bspline_1d diff --git a/test/source/test_neo_bspline_2d.f90 b/test/source/test_neo_bspline_2d.f90 new file mode 100644 index 00000000..d7d1188b --- /dev/null +++ b/test/source/test_neo_bspline_2d.f90 @@ -0,0 +1,186 @@ +program test_neo_bspline_2d + use libneo_kinds, only: dp + use math_constants + use libneo_util, only: linspace + use neo_bspline + + implicit none + + real(dp), parameter :: X_MIN = 1.23d0 + real(dp), parameter :: X_MAX = TWOPI + 1.23d0 + real(dp), parameter :: TOL_L2_FULL = 5.0d-4 + + call test_bspline_2d_lsq_full_grid() + call test_bspline_2d_interp_exact() + call test_bspline_2d_lsq_full_grid_batch() + +contains + + subroutine test_bspline_2d_lsq_full_grid() + type(bspline_2d) :: spl + integer, parameter :: DEGREE(2) = [3, 3] + integer, parameter :: N_CTRL(2) = [32, 28] + integer, parameter :: N_GRID1 = 40 + integer, parameter :: N_GRID2 = 36 + + real(dp) :: x1(N_GRID1), x2(N_GRID2) + real(dp) :: f_grid(N_GRID1, N_GRID2) + real(dp) :: coeff(N_CTRL(1), N_CTRL(2)) + + integer :: i1, i2 + real(dp) :: x(2), f_true, f_fit + real(dp) :: err2 + + print *, "Testing neo_bspline 2D LSQ CGLS (full grid)" + + call bspline_2d_init_uniform(spl, DEGREE, N_CTRL, [X_MIN, X_MIN], & + [X_MAX, X_MAX]) + + call linspace(X_MIN, X_MAX, N_GRID1, x1) + call linspace(X_MIN, X_MAX, N_GRID2, x2) + + do i2 = 1, N_GRID2 + do i1 = 1, N_GRID1 + f_grid(i1, i2) = cos(x1(i1))*cos(2.0d0*x2(i2)) + end do + end do + + coeff = 0.0_dp + call bspline_2d_lsq_cgls(spl, x1, x2, f_grid, coeff, & + max_iter=800, tol=1.0d-10) + + err2 = 0.0d0 + open(unit=20, file="bspline_2d_lsq_grid.dat", status="replace", & + action="write") + do i2 = 1, N_GRID2 + do i1 = 1, N_GRID1 + x(1) = x1(i1) + x(2) = x2(i2) + f_true = cos(x(1))*cos(2.0d0*x(2)) + call bspline_2d_eval(spl, coeff, x, f_fit) + err2 = err2 + (f_fit - f_true)**2 + write(20,'(4es24.16)') x(1), x(2), f_true, f_fit + end do + end do + close(20) + + err2 = err2/real(N_GRID1*N_GRID2, dp) + print *, " 2D LSQ full-grid L2 error =", sqrt(err2) + + if (sqrt(err2) > TOL_L2_FULL) then + error stop "neo_bspline 2D LSQ full-grid error too large" + end if + + end subroutine test_bspline_2d_lsq_full_grid + + + subroutine test_bspline_2d_interp_exact() + type(bspline_2d) :: spl + integer, parameter :: DEGREE(2) = [3, 3] + integer, parameter :: N1 = 18 + integer, parameter :: N2 = 14 + + real(dp) :: x1(N1), x2(N2) + real(dp) :: f_grid(N1, N2) + real(dp) :: coeff(N1, N2) + real(dp) :: f_fit, f_true, x(2) + real(dp) :: err_max + integer :: i1, i2 + + print *, "Testing neo_bspline 2D direct interpolation (collocation)" + + call bspline_2d_init_uniform(spl, DEGREE, [N1, N2], [X_MIN, X_MIN], & + [X_MAX, X_MAX]) + + call linspace(X_MIN, X_MAX, N1, x1) + call linspace(X_MIN, X_MAX, N2, x2) + + do i2 = 1, N2 + do i1 = 1, N1 + f_grid(i1, i2) = cos(x1(i1))*cos(2.0d0*x2(i2)) + end do + end do + + call bspline_2d_interp(spl, x1, x2, f_grid, coeff) + + err_max = 0.0d0 + do i2 = 1, N2 + do i1 = 1, N1 + x = [x1(i1), x2(i2)] + f_true = cos(x(1))*cos(2.0d0*x(2)) + call bspline_2d_eval(spl, coeff, x, f_fit) + err_max = max(err_max, abs(f_fit - f_true)) + end do + end do + + print *, " 2D direct interp max error =", err_max + if (err_max > 1.0d-10) then + error stop "neo_bspline 2D direct interpolation error too large" + end if + end subroutine test_bspline_2d_interp_exact + + + subroutine test_bspline_2d_lsq_full_grid_batch() + type(bspline_2d) :: spl + integer, parameter :: DEGREE(2) = [3, 3] + integer, parameter :: N_CTRL(2) = [32, 28] + integer, parameter :: N_GRID1 = 40 + integer, parameter :: N_GRID2 = 36 + integer, parameter :: N_RHS = 2 + + real(dp) :: x1(N_GRID1), x2(N_GRID2) + real(dp) :: f_grid(N_GRID1, N_GRID2) + real(dp) :: coeff(N_CTRL(1), N_CTRL(2), N_RHS) + + real(dp) :: x(2), f_true(N_RHS), f_fit + real(dp) :: err2(N_RHS) + integer :: i1, i2, k + + print *, "Testing neo_bspline 2D LSQ CGLS (full grid, batched)" + + call bspline_2d_init_uniform(spl, DEGREE, N_CTRL, [X_MIN, X_MIN], & + [X_MAX, X_MAX]) + + call linspace(X_MIN, X_MAX, N_GRID1, x1) + call linspace(X_MIN, X_MAX, N_GRID2, x2) + + do k = 1, N_RHS + do i2 = 1, N_GRID2 + do i1 = 1, N_GRID1 + if (k == 1) then + f_grid(i1, i2) = cos(x1(i1))*cos(2.0d0*x2(i2)) + else + f_grid(i1, i2) = sin(x1(i1))*cos(3.0d0*x2(i2)) + end if + end do + end do + coeff(:, :, k) = 0.0_dp + call bspline_2d_lsq_cgls(spl, x1, x2, f_grid, coeff(:, :, k), & + max_iter=800, tol=1.0d-10) + end do + + err2 = 0.0d0 + do i2 = 1, N_GRID2 + do i1 = 1, N_GRID1 + x(1) = x1(i1) + x(2) = x2(i2) + f_true(1) = cos(x(1))*cos(2.0d0*x(2)) + f_true(2) = sin(x(1))*cos(3.0d0*x(2)) + do k = 1, N_RHS + call bspline_2d_eval(spl, coeff(:, :, k), x, f_fit) + err2(k) = err2(k) + (f_fit - f_true(k))**2 + end do + end do + end do + + err2 = err2/real(N_GRID1*N_GRID2, dp) + do k = 1, N_RHS + print *, " 2D LSQ full-grid L2 error (rhs =", k, ") =", sqrt(err2(k)) + if (sqrt(err2(k)) > TOL_L2_FULL) then + error stop "neo_bspline 2D batched LSQ full-grid error too large" + end if + end do + + end subroutine test_bspline_2d_lsq_full_grid_batch + +end program test_neo_bspline_2d diff --git a/test/source/test_neo_bspline_3d.f90 b/test/source/test_neo_bspline_3d.f90 new file mode 100644 index 00000000..f38b4d25 --- /dev/null +++ b/test/source/test_neo_bspline_3d.f90 @@ -0,0 +1,155 @@ +program test_neo_bspline_3d + use libneo_kinds, only: dp + use math_constants + use libneo_util, only: linspace + use neo_bspline + + implicit none + + real(dp), parameter :: X_MIN = 1.23d0 + real(dp), parameter :: X_MAX = TWOPI + 1.23d0 + real(dp), parameter :: TOL_L2_3D = 2.0d-3 + + call test_bspline_3d_lsq_full_grid() + call test_bspline_3d_lsq_batch() + +contains + + subroutine test_bspline_3d_lsq_full_grid() + type(bspline_3d) :: spl + integer, parameter :: DEGREE(3) = [3, 3, 3] + integer, parameter :: N_CTRL(3) = [24, 20, 16] + integer, parameter :: N_GRID1 = 18 + integer, parameter :: N_GRID2 = 16 + integer, parameter :: N_GRID3 = 14 + + real(dp) :: x1(N_GRID1), x2(N_GRID2), x3(N_GRID3) + real(dp) :: f_grid(N_GRID1, N_GRID2, N_GRID3) + real(dp) :: coeff(N_CTRL(1), N_CTRL(2), N_CTRL(3)) + + real(dp) :: x(3), f_true, f_fit + real(dp) :: err2 + integer :: i1, i2, i3 + + print *, "Testing neo_bspline 3D LSQ CGLS (full grid)" + + call bspline_3d_init_uniform(spl, DEGREE, N_CTRL, & + [X_MIN, X_MIN, X_MIN], [X_MAX, X_MAX, X_MAX]) + + call linspace(X_MIN, X_MAX, N_GRID1, x1) + call linspace(X_MIN, X_MAX, N_GRID2, x2) + call linspace(X_MIN, X_MAX, N_GRID3, x3) + + do i3 = 1, N_GRID3 + do i2 = 1, N_GRID2 + do i1 = 1, N_GRID1 + f_grid(i1, i2, i3) = cos(x1(i1))*cos(2.0d0*x2(i2)) & + *cos(3.0d0*x3(i3)) + end do + end do + end do + + coeff = 0.0_dp + call bspline_3d_lsq_cgls(spl, x1, x2, x3, f_grid, coeff, & + max_iter=800, tol=1.0d-10) + + err2 = 0.0d0 + open(unit=30, file="bspline_3d_lsq_grid.dat", status="replace", & + action="write") + do i3 = 1, N_GRID3 + do i2 = 1, N_GRID2 + do i1 = 1, N_GRID1 + x(1) = x1(i1) + x(2) = x2(i2) + x(3) = x3(i3) + f_true = cos(x(1))*cos(2.0d0*x(2))*cos(3.0d0*x(3)) + call bspline_3d_eval(spl, coeff, x, f_fit) + err2 = err2 + (f_fit - f_true)**2 + write(30,'(5es24.16)') x(1), x(2), x(3), f_true, f_fit + end do + end do + end do + close(30) + + err2 = err2/real(N_GRID1*N_GRID2*N_GRID3, dp) + print *, " 3D LSQ full-grid L2 error =", sqrt(err2) + + if (sqrt(err2) > TOL_L2_3D) then + error stop "neo_bspline 3D LSQ full-grid error too large" + end if + + end subroutine test_bspline_3d_lsq_full_grid + + subroutine test_bspline_3d_lsq_batch() + type(bspline_3d) :: spl + integer, parameter :: DEGREE(3) = [3, 3, 3] + integer, parameter :: N_CTRL(3) = [24, 20, 16] + integer, parameter :: N_GRID1 = 18 + integer, parameter :: N_GRID2 = 16 + integer, parameter :: N_GRID3 = 14 + integer, parameter :: N_RHS = 2 + + real(dp) :: x1(N_GRID1), x2(N_GRID2), x3(N_GRID3) + real(dp) :: f_grid(N_GRID1, N_GRID2, N_GRID3) + real(dp) :: coeff(N_CTRL(1), N_CTRL(2), N_CTRL(3), N_RHS) + + real(dp) :: x(3), f_true(N_RHS), f_fit + real(dp) :: err2(N_RHS) + integer :: i1, i2, i3, k + + print *, "Testing neo_bspline 3D LSQ CGLS (batched)" + + call bspline_3d_init_uniform(spl, DEGREE, N_CTRL, & + [X_MIN, X_MIN, X_MIN], [X_MAX, X_MAX, X_MAX]) + + call linspace(X_MIN, X_MAX, N_GRID1, x1) + call linspace(X_MIN, X_MAX, N_GRID2, x2) + call linspace(X_MIN, X_MAX, N_GRID3, x3) + + do k = 1, N_RHS + do i3 = 1, N_GRID3 + do i2 = 1, N_GRID2 + do i1 = 1, N_GRID1 + if (k == 1) then + f_grid(i1, i2, i3) = cos(x1(i1))*cos(2.0d0*x2(i2)) & + *cos(3.0d0*x3(i3)) + else + f_grid(i1, i2, i3) = sin(x1(i1))*cos(2.0d0*x2(i2)) & + *cos(1.5d0*x3(i3)) + end if + end do + end do + end do + coeff(:, :, :, k) = 0.0_dp + call bspline_3d_lsq_cgls(spl, x1, x2, x3, f_grid, & + coeff(:, :, :, k), max_iter=800, tol=1.0d-10) + end do + + err2 = 0.0d0 + do i3 = 1, N_GRID3 + do i2 = 1, N_GRID2 + do i1 = 1, N_GRID1 + x(1) = x1(i1) + x(2) = x2(i2) + x(3) = x3(i3) + f_true(1) = cos(x(1))*cos(2.0d0*x(2))*cos(3.0d0*x(3)) + f_true(2) = sin(x(1))*cos(2.0d0*x(2))*cos(1.5d0*x(3)) + do k = 1, N_RHS + call bspline_3d_eval(spl, coeff(:, :, :, k), x, f_fit) + err2(k) = err2(k) + (f_fit - f_true(k))**2 + end do + end do + end do + end do + + err2 = err2/real(N_GRID1*N_GRID2*N_GRID3, dp) + do k = 1, N_RHS + print *, " 3D LSQ full-grid L2 error (rhs =", k, ") =", sqrt(err2(k)) + if (sqrt(err2(k)) > TOL_L2_3D) then + error stop "neo_bspline 3D batched LSQ full-grid error too large" + end if + end do + + end subroutine test_bspline_3d_lsq_batch + +end program test_neo_bspline_3d