diff --git a/src/rpoisson.f90 b/src/rpoisson.f90
index bd655ca..7cc1477 100644
--- a/src/rpoisson.f90
+++ b/src/rpoisson.f90
@@ -6,16 +6,55 @@ module rpoisson
 use types, only: dp
 use utils, only: stop_error
 use constants, only: pi
-use ode1d, only: adams_extrapolation_outward, adams_interp_outward
-use ode1d, only: integrate, get_midpoints, rk4_integrate3
+use ode1d, only: integrate, get_midpoints, rk4_integrate3, &
+    adams_extrapolation_outward, adams_interp_outward
 
 implicit none
 
 private
-public rpoisson_outward_pc
+public rpoisson_outward_pc, rpoisson_kernel1, rpoisson_kernel2
 
 contains
 
+subroutine rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p)
+real(dp), intent(in) :: R(:), Rp(:), rho(:)
+real(dp), intent(inout) :: u1(:), u2(:), u1p(:), u2p(:)
+integer, parameter :: max_it=2
+integer :: i, it
+do i = 4, size(R)-1
+    u1(i+1)  = u1(i)  + adams_extrapolation_outward(u1p, i)
+    u2(i+1)  = u2(i)  + adams_extrapolation_outward(u2p, i)
+    do it = 1, max_it
+        u1p(i+1) = +Rp(i+1) * u2(i+1)
+        u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1))
+        u1(i+1)  = u1(i)  + adams_interp_outward(u1p, i)
+        u2(i+1)  = u2(i)  + adams_interp_outward(u2p, i)
+    end do
+end do
+end subroutine
+
+subroutine rpoisson_kernel2(R, Rp, rho, u1, u2, u1p, u2p)
+real(dp), intent(in) :: R(:), Rp(:), rho(:)
+real(dp), intent(inout) :: u1(:), u2(:), u1p(:), u2p(:)
+real(dp) :: RR
+integer :: i
+do i = 4, size(R)-1
+    RR = Rp(i+1)/R(i+1)
+    u2p(i+1) = &
+         - u2(i)    * (2         - 3._dp /2 *RR)*RR &
+         - u2p(i)   * (19._dp/12 - 55._dp/16*RR)*RR &
+         + u2p(i-1) * (5._dp /12 - 59._dp/16*RR)*RR &
+         - u2p(i-2) * (1._dp /12 - 37._dp/16*RR)*RR &
+         - u2p(i-3) *              9._dp /16*RR *RR &
+         - rho(i+1) * (4         - 3        *RR)*Rp(i+1)*pi
+
+    u2(i+1)  = u2(i)  + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24
+
+    u1p(i+1) = +Rp(i+1) * u2(i+1)
+    u1(i+1)  = u1(i)  + (9*u1p(i+1) + 19*u1p(i) - 5*u1p(i-1) + u1p(i-2)) / 24
+end do
+end subroutine
+
 function rpoisson_outward_pc(R, Rp, rho) result(V)
 ! Solves the equation V''(r) + 2/r*V'(r) = -4*pi*rho
 !
@@ -34,30 +73,16 @@ function rpoisson_outward_pc(R, Rp, rho) result(V)
 real(dp) :: V(size(R))
 
 real(dp), dimension(size(R)) :: u1, u2, u1p, u2p
-integer :: N, i, it
-integer, parameter :: max_it = 2
 real(dp) :: rho_mid(3)
 
-N = size(R)
 rho_mid = get_midpoints(R(:4), rho(:4))
 call rpoisson_outward_rk4(rho(:4), rho_mid, R(:4), &
     4*pi*integrate(Rp, rho*R), &
     0.0_dp, &
     u1(:4), u2(:4))
-
 u1p(:4) = u2(:4) * Rp(:4)
 u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4)
-
-do i = 4, N-1
-    u1(i+1)  = u1(i)  + adams_extrapolation_outward(u1p, i)
-    u2(i+1)  = u2(i)  + adams_extrapolation_outward(u2p, i)
-    do it = 1, max_it
-        u1p(i+1) = +Rp(i+1) * u2(i+1)
-        u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1))
-        u1(i+1)  = u1(i)  + adams_interp_outward(u1p, i)
-        u2(i+1)  = u2(i)  + adams_interp_outward(u2p, i)
-    end do
-end do
+call rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p)
 V = u1
 end function
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ff11625..e892d96 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -9,6 +9,7 @@ add_subdirectory(atom_U)
 add_subdirectory(oscillator)
 add_subdirectory(well)
 add_subdirectory(pseudopotential)
+add_subdirectory(bench_poisson)
 if (WITH_LAPACK)
     add_subdirectory(double_min)
 endif ()
diff --git a/tests/bench_poisson/CMakeLists.txt b/tests/bench_poisson/CMakeLists.txt
new file mode 100644
index 0000000..a8bde77
--- /dev/null
+++ b/tests/bench_poisson/CMakeLists.txt
@@ -0,0 +1,11 @@
+include_directories(${PROJECT_BINARY_DIR}/src)
+
+project(bench_poisson)
+
+add_executable(bench_poisson bench_poisson.f90)
+target_link_libraries(bench_poisson dftatom)
+add_test(bench_poisson ${PROJECT_BINARY_DIR}/bench_poisson)
+
+add_executable(test_poisson_kernels test_poisson_kernels.f90)
+target_link_libraries(test_poisson_kernels dftatom)
+add_test(test_poisson_kernels ${PROJECT_BINARY_DIR}/test_poisson_kernels)
diff --git a/tests/bench_poisson/Plots.ipynb b/tests/bench_poisson/Plots.ipynb
new file mode 100644
index 0000000..40b198f
--- /dev/null
+++ b/tests/bench_poisson/Plots.ipynb
@@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "D = loadtxt(\"a.txt\", skiprows=1)\n",
+    "N = D[:, 0]\n",
+    "t1 = D[:, 2]\n",
+    "t2 = D[:, 3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plot(N, t1, label=\"kernel 1\")\n",
+    "plot(N, t2, label=\"kernel 2\")\n",
+    "legend(loc=\"upper left\")\n",
+    "grid()\n",
+    "xlabel(\"N\")\n",
+    "ylabel(\"time [s]\")\n",
+    "title(\"Radial Poisson kernels comparison\")\n",
+    "show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plot(N, t1/t2)\n",
+    "grid()\n",
+    "xlabel(\"N\")\n",
+    "ylabel(\"Speedup\")\n",
+    "title(\"Speedup of kernel 2 over kernel 1\")\n",
+    "show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "freq = 2.6e9*1.2\n",
+    "plot(N, t1*freq/N, label=\"kernel 1\")\n",
+    "plot(N, t2*freq/N, label=\"kernel 2\")\n",
+    "legend()\n",
+    "grid()\n",
+    "xlabel(\"N\")\n",
+    "ylabel(\"Cycles per element\")\n",
+    "title(\"Comparison in clock cycles per loop body (array element)\")\n",
+    "ylim([30, None])\n",
+    "show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/tests/bench_poisson/bench_poisson.f90 b/tests/bench_poisson/bench_poisson.f90
new file mode 100644
index 0000000..f730abb
--- /dev/null
+++ b/tests/bench_poisson/bench_poisson.f90
@@ -0,0 +1,39 @@
+program bench_poisson
+use types, only: dp
+use mesh, only: mesh_exp, mesh_exp_deriv
+use rpoisson, only: rpoisson_kernel1, rpoisson_kernel2
+implicit none
+
+real(dp), parameter :: r_min = 1e-7_dp, r_max = 10, a = 2.7e6_dp
+real(dp), allocatable, dimension(:) :: R, Rp, u1, u2, u1p, u2p, rho
+real(dp) :: t1, t2, dt1, dt2
+integer :: N, i, iter
+
+print *, "N iter dt1[s] dt2[s] total_time[s] total_mem[MB]"
+do N = 5000, 50000, 5000
+    allocate(R(N), Rp(N), u1(N), u2(N), u1p(N), u2p(N), rho(N))
+    R = mesh_exp(r_min, r_max, a, N-1)
+    Rp = mesh_exp_deriv(r_min, r_max, a, N-1)
+    rho = exp(-R)
+    u1 = 1
+    u2 = 1
+    u1p = 0
+    u2p = 0
+    iter = 40 * 5000/N
+    call cpu_time(t1)
+    do i = 1, iter
+        call rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p)
+    end do
+    call cpu_time(t2)
+    dt1 = (t2-t1)/iter
+    call cpu_time(t1)
+    do i = 1, iter
+        call rpoisson_kernel2(R, Rp, rho, u1, u2, u1p, u2p)
+    end do
+    call cpu_time(t2)
+    dt2 = (t2-t1)/iter
+    print "(i6, i6, es12.4, es12.4, f6.2, f6.2)", N, iter, dt1, dt2, dt1*iter, &
+        N*8*7/1024._dp**2
+    deallocate(R, Rp, u1, u2, u1p, u2p, rho)
+end do
+end program
diff --git a/tests/bench_poisson/test_poisson_kernels.f90 b/tests/bench_poisson/test_poisson_kernels.f90
new file mode 100644
index 0000000..94d62a6
--- /dev/null
+++ b/tests/bench_poisson/test_poisson_kernels.f90
@@ -0,0 +1,34 @@
+program test_poisson_kernels
+use utils, only: assert
+use types, only: dp
+use mesh, only: mesh_exp, mesh_exp_deriv
+use rpoisson, only: rpoisson_kernel1, rpoisson_kernel2
+implicit none
+
+! Mesh parameters:
+real(dp), parameter :: r_min = 1e-7_dp, r_max = 10, a = 2.7e6_dp
+integer, parameter :: NN = 5000
+
+real(dp) :: R(NN+1)
+real(dp), dimension(size(R)) :: Rp, rho
+real(dp), dimension(size(R), 2) :: u1, u2, u1p, u2p
+
+R = mesh_exp(r_min, r_max, a, NN)
+Rp = mesh_exp_deriv(r_min, r_max, a, NN)
+rho = exp(-R)
+u1 = 1
+u2 = 1
+u1p = 0
+u2p = 0
+call rpoisson_kernel1(R, Rp, rho, u1(:, 1), u2(:, 1), u1p(:, 1), u2p(:, 1))
+call rpoisson_kernel2(R, Rp, rho, u1(:, 2), u2(:, 2), u1p(:, 2), u2p(:, 2))
+print *, "Errors:"
+print *, maxval(abs(u1(:,1)-u1(:,2)))
+print *, maxval(abs(u2(:,1)-u2(:,2)))
+print *, maxval(abs(u1p(:,1)-u1p(:,2)))
+print *, maxval(abs(u2p(:,1)-u2p(:,2)))
+call assert(maxval(abs(u1(:,1)-u1(:,2))) < 1e-10_dp)
+call assert(maxval(abs(u2(:,1)-u2(:,2))) < 1e-15_dp)
+call assert(maxval(abs(u1p(:,1)-u1p(:,2))) < 1e-9_dp)
+call assert(maxval(abs(u2p(:,1)-u2p(:,2))) < 1e-16_dp)
+end program