diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index bd655ca..7cc1477 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -6,16 +6,55 @@ module rpoisson use types, only: dp use utils, only: stop_error use constants, only: pi -use ode1d, only: adams_extrapolation_outward, adams_interp_outward -use ode1d, only: integrate, get_midpoints, rk4_integrate3 +use ode1d, only: integrate, get_midpoints, rk4_integrate3, & + adams_extrapolation_outward, adams_interp_outward implicit none private -public rpoisson_outward_pc +public rpoisson_outward_pc, rpoisson_kernel1, rpoisson_kernel2 contains +subroutine rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p) +real(dp), intent(in) :: R(:), Rp(:), rho(:) +real(dp), intent(inout) :: u1(:), u2(:), u1p(:), u2p(:) +integer, parameter :: max_it=2 +integer :: i, it +do i = 4, size(R)-1 + u1(i+1) = u1(i) + adams_extrapolation_outward(u1p, i) + u2(i+1) = u2(i) + adams_extrapolation_outward(u2p, i) + do it = 1, max_it + u1p(i+1) = +Rp(i+1) * u2(i+1) + u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) + u1(i+1) = u1(i) + adams_interp_outward(u1p, i) + u2(i+1) = u2(i) + adams_interp_outward(u2p, i) + end do +end do +end subroutine + +subroutine rpoisson_kernel2(R, Rp, rho, u1, u2, u1p, u2p) +real(dp), intent(in) :: R(:), Rp(:), rho(:) +real(dp), intent(inout) :: u1(:), u2(:), u1p(:), u2p(:) +real(dp) :: RR +integer :: i +do i = 4, size(R)-1 + RR = Rp(i+1)/R(i+1) + u2p(i+1) = & + - u2(i) * (2 - 3._dp /2 *RR)*RR & + - u2p(i) * (19._dp/12 - 55._dp/16*RR)*RR & + + u2p(i-1) * (5._dp /12 - 59._dp/16*RR)*RR & + - u2p(i-2) * (1._dp /12 - 37._dp/16*RR)*RR & + - u2p(i-3) * 9._dp /16*RR *RR & + - rho(i+1) * (4 - 3 *RR)*Rp(i+1)*pi + + u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 + + u1p(i+1) = +Rp(i+1) * u2(i+1) + u1(i+1) = u1(i) + (9*u1p(i+1) + 19*u1p(i) - 5*u1p(i-1) + u1p(i-2)) / 24 +end do +end subroutine + function rpoisson_outward_pc(R, Rp, rho) result(V) ! Solves the equation V''(r) + 2/r*V'(r) = -4*pi*rho ! @@ -34,30 +73,16 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) real(dp) :: V(size(R)) real(dp), dimension(size(R)) :: u1, u2, u1p, u2p -integer :: N, i, it -integer, parameter :: max_it = 2 real(dp) :: rho_mid(3) -N = size(R) rho_mid = get_midpoints(R(:4), rho(:4)) call rpoisson_outward_rk4(rho(:4), rho_mid, R(:4), & 4*pi*integrate(Rp, rho*R), & 0.0_dp, & u1(:4), u2(:4)) - u1p(:4) = u2(:4) * Rp(:4) u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4) - -do i = 4, N-1 - u1(i+1) = u1(i) + adams_extrapolation_outward(u1p, i) - u2(i+1) = u2(i) + adams_extrapolation_outward(u2p, i) - do it = 1, max_it - u1p(i+1) = +Rp(i+1) * u2(i+1) - u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) - u1(i+1) = u1(i) + adams_interp_outward(u1p, i) - u2(i+1) = u2(i) + adams_interp_outward(u2p, i) - end do -end do +call rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p) V = u1 end function diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ff11625..e892d96 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory(atom_U) add_subdirectory(oscillator) add_subdirectory(well) add_subdirectory(pseudopotential) +add_subdirectory(bench_poisson) if (WITH_LAPACK) add_subdirectory(double_min) endif () diff --git a/tests/bench_poisson/CMakeLists.txt b/tests/bench_poisson/CMakeLists.txt new file mode 100644 index 0000000..a8bde77 --- /dev/null +++ b/tests/bench_poisson/CMakeLists.txt @@ -0,0 +1,11 @@ +include_directories(${PROJECT_BINARY_DIR}/src) + +project(bench_poisson) + +add_executable(bench_poisson bench_poisson.f90) +target_link_libraries(bench_poisson dftatom) +add_test(bench_poisson ${PROJECT_BINARY_DIR}/bench_poisson) + +add_executable(test_poisson_kernels test_poisson_kernels.f90) +target_link_libraries(test_poisson_kernels dftatom) +add_test(test_poisson_kernels ${PROJECT_BINARY_DIR}/test_poisson_kernels) diff --git a/tests/bench_poisson/Plots.ipynb b/tests/bench_poisson/Plots.ipynb new file mode 100644 index 0000000..40b198f --- /dev/null +++ b/tests/bench_poisson/Plots.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%pylab inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "D = loadtxt(\"a.txt\", skiprows=1)\n", + "N = D[:, 0]\n", + "t1 = D[:, 2]\n", + "t2 = D[:, 3]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plot(N, t1, label=\"kernel 1\")\n", + "plot(N, t2, label=\"kernel 2\")\n", + "legend(loc=\"upper left\")\n", + "grid()\n", + "xlabel(\"N\")\n", + "ylabel(\"time [s]\")\n", + "title(\"Radial Poisson kernels comparison\")\n", + "show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plot(N, t1/t2)\n", + "grid()\n", + "xlabel(\"N\")\n", + "ylabel(\"Speedup\")\n", + "title(\"Speedup of kernel 2 over kernel 1\")\n", + "show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "freq = 2.6e9*1.2\n", + "plot(N, t1*freq/N, label=\"kernel 1\")\n", + "plot(N, t2*freq/N, label=\"kernel 2\")\n", + "legend()\n", + "grid()\n", + "xlabel(\"N\")\n", + "ylabel(\"Cycles per element\")\n", + "title(\"Comparison in clock cycles per loop body (array element)\")\n", + "ylim([30, None])\n", + "show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/tests/bench_poisson/bench_poisson.f90 b/tests/bench_poisson/bench_poisson.f90 new file mode 100644 index 0000000..f730abb --- /dev/null +++ b/tests/bench_poisson/bench_poisson.f90 @@ -0,0 +1,39 @@ +program bench_poisson +use types, only: dp +use mesh, only: mesh_exp, mesh_exp_deriv +use rpoisson, only: rpoisson_kernel1, rpoisson_kernel2 +implicit none + +real(dp), parameter :: r_min = 1e-7_dp, r_max = 10, a = 2.7e6_dp +real(dp), allocatable, dimension(:) :: R, Rp, u1, u2, u1p, u2p, rho +real(dp) :: t1, t2, dt1, dt2 +integer :: N, i, iter + +print *, "N iter dt1[s] dt2[s] total_time[s] total_mem[MB]" +do N = 5000, 50000, 5000 + allocate(R(N), Rp(N), u1(N), u2(N), u1p(N), u2p(N), rho(N)) + R = mesh_exp(r_min, r_max, a, N-1) + Rp = mesh_exp_deriv(r_min, r_max, a, N-1) + rho = exp(-R) + u1 = 1 + u2 = 1 + u1p = 0 + u2p = 0 + iter = 40 * 5000/N + call cpu_time(t1) + do i = 1, iter + call rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p) + end do + call cpu_time(t2) + dt1 = (t2-t1)/iter + call cpu_time(t1) + do i = 1, iter + call rpoisson_kernel2(R, Rp, rho, u1, u2, u1p, u2p) + end do + call cpu_time(t2) + dt2 = (t2-t1)/iter + print "(i6, i6, es12.4, es12.4, f6.2, f6.2)", N, iter, dt1, dt2, dt1*iter, & + N*8*7/1024._dp**2 + deallocate(R, Rp, u1, u2, u1p, u2p, rho) +end do +end program diff --git a/tests/bench_poisson/test_poisson_kernels.f90 b/tests/bench_poisson/test_poisson_kernels.f90 new file mode 100644 index 0000000..94d62a6 --- /dev/null +++ b/tests/bench_poisson/test_poisson_kernels.f90 @@ -0,0 +1,34 @@ +program test_poisson_kernels +use utils, only: assert +use types, only: dp +use mesh, only: mesh_exp, mesh_exp_deriv +use rpoisson, only: rpoisson_kernel1, rpoisson_kernel2 +implicit none + +! Mesh parameters: +real(dp), parameter :: r_min = 1e-7_dp, r_max = 10, a = 2.7e6_dp +integer, parameter :: NN = 5000 + +real(dp) :: R(NN+1) +real(dp), dimension(size(R)) :: Rp, rho +real(dp), dimension(size(R), 2) :: u1, u2, u1p, u2p + +R = mesh_exp(r_min, r_max, a, NN) +Rp = mesh_exp_deriv(r_min, r_max, a, NN) +rho = exp(-R) +u1 = 1 +u2 = 1 +u1p = 0 +u2p = 0 +call rpoisson_kernel1(R, Rp, rho, u1(:, 1), u2(:, 1), u1p(:, 1), u2p(:, 1)) +call rpoisson_kernel2(R, Rp, rho, u1(:, 2), u2(:, 2), u1p(:, 2), u2p(:, 2)) +print *, "Errors:" +print *, maxval(abs(u1(:,1)-u1(:,2))) +print *, maxval(abs(u2(:,1)-u2(:,2))) +print *, maxval(abs(u1p(:,1)-u1p(:,2))) +print *, maxval(abs(u2p(:,1)-u2p(:,2))) +call assert(maxval(abs(u1(:,1)-u1(:,2))) < 1e-10_dp) +call assert(maxval(abs(u2(:,1)-u2(:,2))) < 1e-15_dp) +call assert(maxval(abs(u1p(:,1)-u1p(:,2))) < 1e-9_dp) +call assert(maxval(abs(u2p(:,1)-u2p(:,2))) < 1e-16_dp) +end program