From 1b5a8c4917616723c24c59fbffc1e455163f1ee0 Mon Sep 17 00:00:00 2001 From: Angus Gibson Date: Tue, 1 Apr 2025 15:22:18 +1100 Subject: [PATCH] Drastically improve efficiency of tripole generation In the tripole, we use the make_topo_gen routine. This was allocating two arrays with 20,000,000 elements to hold topography for the patch. The target grid is divided into blocks of 100x25 points, and for each point in these blocks, for ever block, both of the large arrays were being completely zeroed. This meant that performance was almost completely dominated by memset. Isolating to just 10 of these blocks (but including NetCDF input and output), I saw 98.2% of the CPU time spent in memset, taking over 2 minutes to process. With this patch, it takes about 5 seconds to process the blocks, where most of that time is spent in KD tree generation and NetCDF input/output. An entire invocation of gen_topo from GEBCO to a 1/10 full-globe grid went down from around 2 hours to 6 minutes. --- src/gen_topo.f90 | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/gen_topo.f90 b/src/gen_topo.f90 index 9ca1e27..cae8e36 100644 --- a/src/gen_topo.f90 +++ b/src/gen_topo.f90 @@ -581,7 +581,6 @@ subroutine make_topo_gen(topo_in, x_in, y_in, topo_out, x_out, y_out, topo_all_o tree => kdtree2_create(possie, sort =.false., rearrange =.true.) num_max = 20000000 - allocate(t_s(num_max), t_s_all(num_max)) allocate(results(num_max)) do j = 1, jm @@ -610,14 +609,16 @@ subroutine make_topo_gen(topo_in, x_in, y_in, topo_out, x_out, y_out, topo_all_o cycle end if + num_found = min(num_found, num_max) + + allocate(t_s(num_found), t_s_all(num_found), source=0.0) + wet_in_poly = 0 in_poly = 0 - t_s = 0 - t_s_all = 0 bndsx = [x_out(i, j), x_out(i+1, j), x_out(i+1, j+1), x_out(i, j+1)] bndsy = [y_out(i, j), y_out(i+1, j), y_out(i+1, j+1), y_out(i, j+1)] - do n = 1, min(num_found, num_max) + do n = 1, num_found ib = idx(results(n)%idx) jb = jdx(results(n)%idx) !if (topo_in(ib, jb) > 0.0) cycle @@ -645,6 +646,8 @@ subroutine make_topo_gen(topo_in, x_in, y_in, topo_out, x_out, y_out, topo_all_o call quicksort(t_s, frst, lst) topo_med_out(i, j) = t_s(max(wet_in_poly/2, 1)) end if + + deallocate(t_s, t_s_all) end do end do