From d01d2c0f1cb80b407fad2ea28e3699f59858dc16 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 10:29:33 +0100
Subject: [PATCH 01/21] Optimize cell neighbor lookup in pairwise loops

---
 src/computef.f90 | 53 ++++++++++++++++++++++++++++++++++-------------
 src/computeg.f90 | 54 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 78 insertions(+), 29 deletions(-)

diff --git a/src/computef.f90 b/src/computef.f90
index b5927ce..cff5016 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -17,6 +17,10 @@ subroutine computef(n,x,f)
    integer :: n, i, j, k, icell
    integer :: ilugan, ilubar, icart, itype, imol, iatom, idatom
    integer :: cell(3)
+   integer :: ip1, jp1, kp1, jm1, km1
+   integer :: neigh000, neigh100, neigh010, neigh001
+   integer :: neigh1m10, neigh10m1, neigh01m1, neigh011, neigh110, neigh101
+   integer :: neigh1m1m1, neigh1m11, neigh11m1, neigh111
 
    double precision :: v1(3), v2(3), v3(3)
    double precision :: x(n)
@@ -120,28 +124,49 @@ subroutine computef(n,x,f)
       j = cell(2)
       k = cell(3)
 
-      icart = latomfirst(i,j,k)
+      ip1 = cell_ind(i + 1, ncells(1))
+      jp1 = cell_ind(j + 1, ncells(2))
+      kp1 = cell_ind(k + 1, ncells(3))
+      jm1 = cell_ind(j - 1, ncells(2))
+      km1 = cell_ind(k - 1, ncells(3))
+
+      neigh000 = latomfirst(i,j,k)
+      neigh100 = latomfirst(ip1,j,k)
+      neigh010 = latomfirst(i,jp1,k)
+      neigh001 = latomfirst(i,j,kp1)
+      neigh1m10 = latomfirst(ip1,jm1,k)
+      neigh10m1 = latomfirst(ip1,j,km1)
+      neigh01m1 = latomfirst(i,jp1,km1)
+      neigh011 = latomfirst(i,jp1,kp1)
+      neigh110 = latomfirst(ip1,jp1,k)
+      neigh101 = latomfirst(ip1,j,kp1)
+      neigh1m1m1 = latomfirst(ip1,jm1,km1)
+      neigh1m11 = latomfirst(ip1,jm1,kp1)
+      neigh11m1 = latomfirst(ip1,jp1,km1)
+      neigh111 = latomfirst(ip1,jp1,kp1)
+
+      icart = neigh000
       do while( icart > 0 )
 
          if(comptype(ibtype(icart))) then
             ! Interactions inside cell
             f = f + fparc(icart,latomnext(icart))
             ! Interactions of cells that share faces (6 faces - 3 forward)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),j,k)) ! 4 - (1, 0, 0)
-            f = f + fparc(icart,latomfirst(i,cell_ind(j+1, ncells(2)),k)) ! 5 - (0, 1, 0)
-            f = f + fparc(icart,latomfirst(i,j,cell_ind(k+1, ncells(3)))) ! 6 - (0, 0, 1)
+            f = f + fparc(icart,neigh100) ! 4 - (1, 0, 0)
+            f = f + fparc(icart,neigh010) ! 5 - (0, 1, 0)
+            f = f + fparc(icart,neigh001) ! 6 - (0, 0, 1)
             ! Interactions of cells that share axes (12 edges - 6 forward)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j-1, ncells(2)),k)) ! 4 - (1, -1, 0)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),j,cell_ind(k-1, ncells(3)))) ! 5 - (1, 0, -1)
-            f = f + fparc(icart,latomfirst(i,cell_ind(j+1, ncells(2)),cell_ind(k-1, ncells(3)))) ! 6 - (0, 1, -1)
-            f = f + fparc(icart,latomfirst(i,cell_ind(j+1, ncells(2)),cell_ind(k+1, ncells(3)))) ! 9 - (0, 1, 1)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j+1, ncells(2)),k)) ! 10 - (1, 1, 0)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),j,cell_ind(k+1, ncells(3)))) ! 11 - (1, 0, 1)
+            f = f + fparc(icart,neigh1m10) ! 4 - (1, -1, 0)
+            f = f + fparc(icart,neigh10m1) ! 5 - (1, 0, -1)
+            f = f + fparc(icart,neigh01m1) ! 6 - (0, 1, -1)
+            f = f + fparc(icart,neigh011) ! 9 - (0, 1, 1)
+            f = f + fparc(icart,neigh110) ! 10 - (1, 1, 0)
+            f = f + fparc(icart,neigh101) ! 11 - (1, 0, 1)
             ! Interactions of cells that share vertices (8 vertices, 8 forward)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j-1, ncells(2)),cell_ind(k-1, ncells(3)))) ! 5 - (1, -1, -1)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j-1, ncells(2)),cell_ind(k+1, ncells(3)))) ! 6 - (1, -1, 1)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j+1, ncells(2)),cell_ind(k-1, ncells(3)))) ! 7 - (1, 1, -1)
-            f = f + fparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j+1, ncells(2)),cell_ind(k+1, ncells(3)))) ! 8 - (1, 1, 1)
+            f = f + fparc(icart,neigh1m1m1) ! 5 - (1, -1, -1)
+            f = f + fparc(icart,neigh1m11) ! 6 - (1, -1, 1)
+            f = f + fparc(icart,neigh11m1) ! 7 - (1, 1, -1)
+            f = f + fparc(icart,neigh111) ! 8 - (1, 1, 1)
          end if
 
          icart = latomnext(icart)
diff --git a/src/computeg.f90 b/src/computeg.f90
index ddbddf9..25a6370 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -20,6 +20,10 @@ subroutine computeg(n,x,g)
    integer :: icell, cell(3)
    integer :: k1, k2
    integer :: iratcount
+   integer :: ip1, jp1, kp1, jm1, km1
+   integer :: neigh000, neigh100, neigh010, neigh001
+   integer :: neigh1m10, neigh10m1, neigh01m1, neigh011, neigh110, neigh101
+   integer :: neigh1m1m1, neigh1m11, neigh11m1, neigh111
 
    double precision :: x(n), g(n)
    double precision :: dv1beta(3), dv1gama(3), dv1teta(3),&
@@ -110,28 +114,49 @@ subroutine computeg(n,x,g)
          j = cell(2)
          k = cell(3)
 
-         icart = latomfirst(i,j,k)
+         ip1 = cell_ind(i + 1, ncells(1))
+         jp1 = cell_ind(j + 1, ncells(2))
+         kp1 = cell_ind(k + 1, ncells(3))
+         jm1 = cell_ind(j - 1, ncells(2))
+         km1 = cell_ind(k - 1, ncells(3))
+
+         neigh000 = latomfirst(i,j,k)
+         neigh100 = latomfirst(ip1,j,k)
+         neigh010 = latomfirst(i,jp1,k)
+         neigh001 = latomfirst(i,j,kp1)
+         neigh011 = latomfirst(i,jp1,kp1)
+         neigh01m1 = latomfirst(i,jp1,km1)
+         neigh110 = latomfirst(ip1,jp1,k)
+         neigh101 = latomfirst(ip1,j,kp1)
+         neigh1m10 = latomfirst(ip1,jm1,k)
+         neigh10m1 = latomfirst(ip1,j,km1)
+         neigh111 = latomfirst(ip1,jp1,kp1)
+         neigh11m1 = latomfirst(ip1,jp1,km1)
+         neigh1m11 = latomfirst(ip1,jm1,kp1)
+         neigh1m1m1 = latomfirst(ip1,jm1,km1)
+
+         icart = neigh000
          do while( icart > 0 )
 
             if(comptype(ibtype(icart))) then
                ! Interactions inside cell
                call gparc(icart,latomnext(icart))
                ! Interactions of cells that share faces (6 faces - 3 forward)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),j,k)) ! 4 - (1, 0, 0)
-               call gparc(icart,latomfirst(i,cell_ind(j+1, ncells(2)),k)) ! 5 - (0, 1, 0)
-               call gparc(icart,latomfirst(i,j,cell_ind(k+1, ncells(3)))) ! 6 - (0, 0, 1)
+               call gparc(icart,neigh100) ! 4 - (1, 0, 0)
+               call gparc(icart,neigh010) ! 5 - (0, 1, 0)
+               call gparc(icart,neigh001) ! 6 - (0, 0, 1)
                ! Interactions of cells that share axes (12 edges - 6 forward)
-               call gparc(icart,latomfirst(i,cell_ind(j+1, ncells(2)),cell_ind(k+1, ncells(3)))) ! 5 - (0, 1, 1)
-               call gparc(icart,latomfirst(i,cell_ind(j+1, ncells(2)),cell_ind(k-1, ncells(3)))) ! 6 - (0, 1, -1)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j+1, ncells(2)),k)) ! 9 - (1, 1, 0)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),j,cell_ind(k+1, ncells(3)))) ! 10 - (1, 0, 1)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j-1, ncells(2)),k)) ! 11 - (1, -1, 0)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),j,cell_ind(k-1, ncells(3)))) ! 12 - (1, 0, -1)
+               call gparc(icart,neigh011) ! 5 - (0, 1, 1)
+               call gparc(icart,neigh01m1) ! 6 - (0, 1, -1)
+               call gparc(icart,neigh110) ! 9 - (1, 1, 0)
+               call gparc(icart,neigh101) ! 10 - (1, 0, 1)
+               call gparc(icart,neigh1m10) ! 11 - (1, -1, 0)
+               call gparc(icart,neigh10m1) ! 12 - (1, 0, -1)
                ! Interactions of cells that share vertices (8 vertices, 4 forward)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j+1, ncells(2)),cell_ind(k+1, ncells(3)))) ! 1 - (1, 1, 1)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j+1, ncells(2)),cell_ind(k-1, ncells(3)))) ! 2 - (1, 1, -1)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j-1, ncells(2)),cell_ind(k+1, ncells(3)))) ! 3 - (1, -1, 1)
-               call gparc(icart,latomfirst(cell_ind(i+1, ncells(1)),cell_ind(j-1, ncells(2)),cell_ind(k-1, ncells(3)))) ! 4 - (1, -1, -1)
+               call gparc(icart,neigh111) ! 1 - (1, 1, 1)
+               call gparc(icart,neigh11m1) ! 2 - (1, 1, -1)
+               call gparc(icart,neigh1m11) ! 3 - (1, -1, 1)
+               call gparc(icart,neigh1m1m1) ! 4 - (1, -1, -1)
             end if
 
             icart = latomnext(icart)
@@ -242,4 +267,3 @@ subroutine computeg(n,x,g)
 
    return
 end subroutine computeg
-

From 9a03d75e724e98d94eb235e219427fdd87d5d660 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 11:12:18 +0100
Subject: [PATCH 02/21] Optimize pairwise kernels in fparc/gparc

---
 src/fparc.f90 | 45 +++++++++++++++++++++++++++++++------------
 src/gparc.f90 | 53 ++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/src/fparc.f90 b/src/fparc.f90
index 13429f1..be73625 100644
--- a/src/fparc.f90
+++ b/src/fparc.f90
@@ -11,7 +11,7 @@ double precision function fparc(icart,firstjcart)
 
    use sizes
    use compute_data
-   use pbc, only : delta_vector, pbc_length
+   use pbc, only : pbc_length
    implicit none
 
    ! SCALAR ARGUMENTS
@@ -20,7 +20,23 @@ double precision function fparc(icart,firstjcart)
    ! LOCAL SCALARS
    integer :: jcart
    double precision :: datom, tol, short_tol, short_tol_penalty, short_tol_scale
-   double precision :: vdiff(3)
+   double precision :: dx, dy, dz
+   double precision :: xi, yi, zi, ri, rii, fsi, short_ri, short_si
+   double precision :: inv_pbcx, inv_pbcy, inv_pbcz
+   logical :: use_short_i
+
+   xi = xcart(icart,1)
+   yi = xcart(icart,2)
+   zi = xcart(icart,3)
+   ri = radius(icart)
+   rii = radius_ini(icart)
+   fsi = fscale(icart)
+   short_ri = short_radius(icart)
+   short_si = short_radius_scale(icart)
+   use_short_i = use_short_radius(icart)
+   inv_pbcx = 1.d0 / pbc_length(1)
+   inv_pbcy = 1.d0 / pbc_length(2)
+   inv_pbcz = 1.d0 / pbc_length(3)
 
    fparc = 0.0d0
    jcart = firstjcart
@@ -50,22 +66,28 @@ double precision function fparc(icart,firstjcart)
       !
       ! Otherwise, compute distance and evaluate function for this pair
       !
-      vdiff = delta_vector(xcart(icart,:), xcart(jcart,:), pbc_length)
-      datom = ( vdiff(1) )**2 + ( vdiff(2) )**2 + ( vdiff(3) )**2
-      tol = (radius(icart)+radius(jcart))**2
+      dx = xi - xcart(jcart,1)
+      dy = yi - xcart(jcart,2)
+      dz = zi - xcart(jcart,3)
+      dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
+      dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
+      dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
+      datom = dx*dx + dy*dy + dz*dz
+
+      tol = (ri + radius(jcart))**2
       if ( datom < tol ) then
-         fparc = fparc + fscale(icart)*fscale(jcart)*(datom-tol)**2
-         if ( use_short_radius(icart) .or. use_short_radius(jcart) ) then
-            short_tol = (short_radius(icart)+short_radius(jcart))**2
+         fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
+         if ( use_short_i .or. use_short_radius(jcart) ) then
+            short_tol = (short_ri + short_radius(jcart))**2
             if ( datom < short_tol ) then
                short_tol_penalty = datom-short_tol
-               short_tol_scale = dsqrt(short_radius_scale(icart)*short_radius_scale(jcart))
+               short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
                short_tol_scale = short_tol_scale*(tol**2/short_tol**2)
-               fparc = fparc + fscale(icart)*fscale(jcart)*short_tol_scale*short_tol_penalty**2
+               fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty**2
             end if
          end if
       end if
-      tol = (radius_ini(icart)+radius_ini(jcart))**2
+      tol = (rii + radius_ini(jcart))**2
       fdist = dmax1(tol-datom,fdist)
       if ( move ) then
          fdist_atom(icart) = dmax1(tol-datom,fdist_atom(icart))
@@ -75,4 +97,3 @@ double precision function fparc(icart,firstjcart)
    end do
 
 end function fparc
-
diff --git a/src/gparc.f90 b/src/gparc.f90
index 9a68a76..7858520 100644
--- a/src/gparc.f90
+++ b/src/gparc.f90
@@ -10,7 +10,7 @@ subroutine gparc(icart,firstjcart)
 
    use sizes
    use compute_data
-   use pbc, only : delta_vector, pbc_length
+   use pbc, only : pbc_length
    implicit none
 
    ! SCALAR ARGUMENTS
@@ -19,7 +19,22 @@ subroutine gparc(icart,firstjcart)
    ! LOCAL SCALARS
    integer :: jcart
    double precision :: datom, dtemp, xdiff, tol, short_tol, short_tol_scale
-   double precision :: vdiff(3)
+   double precision :: dx, dy, dz
+   double precision :: xi, yi, zi, ri, fsi, short_ri, short_si
+   double precision :: inv_pbcx, inv_pbcy, inv_pbcz
+   logical :: use_short_i
+
+   xi = xcart(icart,1)
+   yi = xcart(icart,2)
+   zi = xcart(icart,3)
+   ri = radius(icart)
+   fsi = fscale(icart)
+   short_ri = short_radius(icart)
+   short_si = short_radius_scale(icart)
+   use_short_i = use_short_radius(icart)
+   inv_pbcx = 1.d0 / pbc_length(1)
+   inv_pbcy = 1.d0 / pbc_length(2)
+   inv_pbcz = 1.d0 / pbc_length(3)
 
    jcart = firstjcart
    do while ( jcart > 0 )
@@ -48,33 +63,38 @@ subroutine gparc(icart,firstjcart)
       !
       ! Otherwise, compute distance and evaluate function for this pair
       !
-      tol = (radius(icart)+radius(jcart))**2
-      vdiff = delta_vector(xcart(icart,:), xcart(jcart,:), pbc_length)
-      datom = (vdiff(1))**2 + (vdiff(2))**2 + (vdiff(3))**2
+      tol = (ri+radius(jcart))**2
+      dx = xi - xcart(jcart,1)
+      dy = yi - xcart(jcart,2)
+      dz = zi - xcart(jcart,3)
+      dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
+      dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
+      dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
+      datom = dx*dx + dy*dy + dz*dz
       if( datom < tol ) then
-         dtemp = fscale(icart)*fscale(jcart) * 4.d0 * (datom - tol)
-         xdiff = dtemp*vdiff(1)
+         dtemp = fsi*fscale(jcart) * 4.d0 * (datom - tol)
+         xdiff = dtemp*dx
          gxcar(icart,1)= gxcar(icart,1) + xdiff
          gxcar(jcart,1)= gxcar(jcart,1) - xdiff
-         xdiff = dtemp*vdiff(2)
+         xdiff = dtemp*dy
          gxcar(icart,2)= gxcar(icart,2) + xdiff
          gxcar(jcart,2)= gxcar(jcart,2) - xdiff
-         xdiff = dtemp*vdiff(3)
+         xdiff = dtemp*dz
          gxcar(icart,3)= gxcar(icart,3) + xdiff
          gxcar(jcart,3)= gxcar(jcart,3) - xdiff
-         if ( use_short_radius(icart) .or. use_short_radius(jcart) ) then
-            short_tol = ( short_radius(icart) + short_radius(jcart) )**2
+         if ( use_short_i .or. use_short_radius(jcart) ) then
+            short_tol = ( short_ri + short_radius(jcart) )**2
             if ( datom < short_tol ) then
-               short_tol_scale = dsqrt(short_radius_scale(icart)*short_radius_scale(jcart))
+               short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
                short_tol_scale = short_tol_scale*( tol**2 / short_tol**2 )
-               dtemp = fscale(icart)*fscale(jcart) * 4.d0 * short_tol_scale*(datom - short_tol)
-               xdiff = dtemp*vdiff(1)
+               dtemp = fsi*fscale(jcart) * 4.d0 * short_tol_scale*(datom - short_tol)
+               xdiff = dtemp*dx
                gxcar(icart,1)= gxcar(icart,1) + xdiff
                gxcar(jcart,1)= gxcar(jcart,1) - xdiff
-               xdiff = dtemp*vdiff(2)
+               xdiff = dtemp*dy
                gxcar(icart,2)= gxcar(icart,2) + xdiff
                gxcar(jcart,2)= gxcar(jcart,2) - xdiff
-               xdiff = dtemp*vdiff(3)
+               xdiff = dtemp*dz
                gxcar(icart,3)= gxcar(icart,3) + xdiff
                gxcar(jcart,3)= gxcar(jcart,3) - xdiff
             end if
@@ -84,4 +104,3 @@ subroutine gparc(icart,firstjcart)
    end do
    return
 end subroutine gparc
-

From 208f05a9f0a54e0a4a6fddef63dddd53cfa93c02 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 11:41:26 +0100
Subject: [PATCH 03/21] Refactor forward cell-neighbor traversal with shared
 offsets

---
 src/cell_indexing.f90 | 27 ++++++++++++++++--
 src/computef.f90      | 63 ++++++++++++------------------------------
 src/computeg.f90      | 64 ++++++++++++-------------------------------
 3 files changed, 61 insertions(+), 93 deletions(-)

diff --git a/src/cell_indexing.f90 b/src/cell_indexing.f90
index fc8c517..7595b5a 100644
--- a/src/cell_indexing.f90
+++ b/src/cell_indexing.f90
@@ -1,7 +1,30 @@
 module cell_indexing
 
-implicit none
-public :: setcell, icell_to_cell, index_cell
+implicit none
+public :: setcell, icell_to_cell, index_cell, n_forward_offsets, forward_offsets
+
+! Ordered list of relative cell offsets used for forward-only traversal.
+!  1: (0, 0, 0) -> self cell (handled with latomnext to avoid double-counting)
+!  2-4: forward face neighbors
+!  5-10: forward edge neighbors
+! 11-14: forward vertex neighbors
+integer, parameter :: n_forward_offsets = 14
+integer, parameter :: forward_offsets(3,n_forward_offsets) = reshape([ &
+   0, 0, 0, &
+   1, 0, 0, &
+   0, 1, 0, &
+   0, 0, 1, &
+   1,-1, 0, &
+   1, 0,-1, &
+   0, 1,-1, &
+   0, 1, 1, &
+   1, 1, 0, &
+   1, 0, 1, &
+   1,-1,-1, &
+   1,-1, 1, &
+   1, 1,-1, &
+   1, 1, 1  &
+], [3,n_forward_offsets])
 
 contains 
 !
diff --git a/src/computef.f90 b/src/computef.f90
index cff5016..9206b3a 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -9,18 +9,16 @@
 subroutine computef(n,x,f)
 
    use sizes
-   use cell_indexing, only: index_cell, icell_to_cell, setcell
+   use cell_indexing, only: index_cell, icell_to_cell, setcell, n_forward_offsets, forward_offsets
    use compute_data
    use pbc
    implicit none
 
    integer :: n, i, j, k, icell
    integer :: ilugan, ilubar, icart, itype, imol, iatom, idatom
-   integer :: cell(3)
-   integer :: ip1, jp1, kp1, jm1, km1
-   integer :: neigh000, neigh100, neigh010, neigh001
-   integer :: neigh1m10, neigh10m1, neigh01m1, neigh011, neigh110, neigh101
-   integer :: neigh1m1m1, neigh1m11, neigh11m1, neigh111
+   integer :: cell(3), neigh_cell(3)
+   integer :: ioffset
+   integer :: neigh_first(n_forward_offsets)
 
    double precision :: v1(3), v2(3), v3(3)
    double precision :: x(n)
@@ -124,49 +122,24 @@ subroutine computef(n,x,f)
       j = cell(2)
       k = cell(3)
 
-      ip1 = cell_ind(i + 1, ncells(1))
-      jp1 = cell_ind(j + 1, ncells(2))
-      kp1 = cell_ind(k + 1, ncells(3))
-      jm1 = cell_ind(j - 1, ncells(2))
-      km1 = cell_ind(k - 1, ncells(3))
-
-      neigh000 = latomfirst(i,j,k)
-      neigh100 = latomfirst(ip1,j,k)
-      neigh010 = latomfirst(i,jp1,k)
-      neigh001 = latomfirst(i,j,kp1)
-      neigh1m10 = latomfirst(ip1,jm1,k)
-      neigh10m1 = latomfirst(ip1,j,km1)
-      neigh01m1 = latomfirst(i,jp1,km1)
-      neigh011 = latomfirst(i,jp1,kp1)
-      neigh110 = latomfirst(ip1,jp1,k)
-      neigh101 = latomfirst(ip1,j,kp1)
-      neigh1m1m1 = latomfirst(ip1,jm1,km1)
-      neigh1m11 = latomfirst(ip1,jm1,kp1)
-      neigh11m1 = latomfirst(ip1,jp1,km1)
-      neigh111 = latomfirst(ip1,jp1,kp1)
-
-      icart = neigh000
+      ! Load current cell and forward neighbors using the shared offset ordering:
+      ! (0,0,0), 3 faces, 6 edges, 4 vertices.
+      do ioffset = 1, n_forward_offsets
+         neigh_cell(1) = cell_ind(i + forward_offsets(1,ioffset), ncells(1))
+         neigh_cell(2) = cell_ind(j + forward_offsets(2,ioffset), ncells(2))
+         neigh_cell(3) = cell_ind(k + forward_offsets(3,ioffset), ncells(3))
+         neigh_first(ioffset) = latomfirst(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+      end do
+
+      icart = neigh_first(1)
       do while( icart > 0 )
 
          if(comptype(ibtype(icart))) then
-            ! Interactions inside cell
+            ! Offset #1 is self-cell and uses latomnext(icart) to keep forward-only pairs.
             f = f + fparc(icart,latomnext(icart))
-            ! Interactions of cells that share faces (6 faces - 3 forward)
-            f = f + fparc(icart,neigh100) ! 4 - (1, 0, 0)
-            f = f + fparc(icart,neigh010) ! 5 - (0, 1, 0)
-            f = f + fparc(icart,neigh001) ! 6 - (0, 0, 1)
-            ! Interactions of cells that share axes (12 edges - 6 forward)
-            f = f + fparc(icart,neigh1m10) ! 4 - (1, -1, 0)
-            f = f + fparc(icart,neigh10m1) ! 5 - (1, 0, -1)
-            f = f + fparc(icart,neigh01m1) ! 6 - (0, 1, -1)
-            f = f + fparc(icart,neigh011) ! 9 - (0, 1, 1)
-            f = f + fparc(icart,neigh110) ! 10 - (1, 1, 0)
-            f = f + fparc(icart,neigh101) ! 11 - (1, 0, 1)
-            ! Interactions of cells that share vertices (8 vertices, 8 forward)
-            f = f + fparc(icart,neigh1m1m1) ! 5 - (1, -1, -1)
-            f = f + fparc(icart,neigh1m11) ! 6 - (1, -1, 1)
-            f = f + fparc(icart,neigh11m1) ! 7 - (1, 1, -1)
-            f = f + fparc(icart,neigh111) ! 8 - (1, 1, 1)
+            do ioffset = 2, n_forward_offsets
+               f = f + fparc(icart,neigh_first(ioffset))
+            end do
          end if
 
          icart = latomnext(icart)
diff --git a/src/computeg.f90 b/src/computeg.f90
index 25a6370..51b2d5f 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -9,7 +9,7 @@
 subroutine computeg(n,x,g)
 
    use sizes
-   use cell_indexing, only: index_cell, icell_to_cell, setcell
+   use cell_indexing, only: index_cell, icell_to_cell, setcell, n_forward_offsets, forward_offsets
    use compute_data
    use pbc
    implicit none
@@ -17,13 +17,10 @@ subroutine computeg(n,x,g)
    integer :: n
    integer :: idatom, iatom, irest
    integer :: i, j, k, ilubar, ilugan, icart, itype, imol
-   integer :: icell, cell(3)
+   integer :: icell, cell(3), neigh_cell(3)
    integer :: k1, k2
-   integer :: iratcount
-   integer :: ip1, jp1, kp1, jm1, km1
-   integer :: neigh000, neigh100, neigh010, neigh001
-   integer :: neigh1m10, neigh10m1, neigh01m1, neigh011, neigh110, neigh101
-   integer :: neigh1m1m1, neigh1m11, neigh11m1, neigh111
+   integer :: iratcount, ioffset
+   integer :: neigh_first(n_forward_offsets)
 
    double precision :: x(n), g(n)
    double precision :: dv1beta(3), dv1gama(3), dv1teta(3),&
@@ -114,49 +111,24 @@ subroutine computeg(n,x,g)
          j = cell(2)
          k = cell(3)
 
-         ip1 = cell_ind(i + 1, ncells(1))
-         jp1 = cell_ind(j + 1, ncells(2))
-         kp1 = cell_ind(k + 1, ncells(3))
-         jm1 = cell_ind(j - 1, ncells(2))
-         km1 = cell_ind(k - 1, ncells(3))
-
-         neigh000 = latomfirst(i,j,k)
-         neigh100 = latomfirst(ip1,j,k)
-         neigh010 = latomfirst(i,jp1,k)
-         neigh001 = latomfirst(i,j,kp1)
-         neigh011 = latomfirst(i,jp1,kp1)
-         neigh01m1 = latomfirst(i,jp1,km1)
-         neigh110 = latomfirst(ip1,jp1,k)
-         neigh101 = latomfirst(ip1,j,kp1)
-         neigh1m10 = latomfirst(ip1,jm1,k)
-         neigh10m1 = latomfirst(ip1,j,km1)
-         neigh111 = latomfirst(ip1,jp1,kp1)
-         neigh11m1 = latomfirst(ip1,jp1,km1)
-         neigh1m11 = latomfirst(ip1,jm1,kp1)
-         neigh1m1m1 = latomfirst(ip1,jm1,km1)
-
-         icart = neigh000
+         ! Load current cell and forward neighbors using the shared offset ordering:
+         ! (0,0,0), 3 faces, 6 edges, 4 vertices.
+         do ioffset = 1, n_forward_offsets
+            neigh_cell(1) = cell_ind(i + forward_offsets(1,ioffset), ncells(1))
+            neigh_cell(2) = cell_ind(j + forward_offsets(2,ioffset), ncells(2))
+            neigh_cell(3) = cell_ind(k + forward_offsets(3,ioffset), ncells(3))
+            neigh_first(ioffset) = latomfirst(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+         end do
+
+         icart = neigh_first(1)
          do while( icart > 0 )
 
             if(comptype(ibtype(icart))) then
-               ! Interactions inside cell
+               ! Offset #1 is self-cell and uses latomnext(icart) to keep forward-only pairs.
                call gparc(icart,latomnext(icart))
-               ! Interactions of cells that share faces (6 faces - 3 forward)
-               call gparc(icart,neigh100) ! 4 - (1, 0, 0)
-               call gparc(icart,neigh010) ! 5 - (0, 1, 0)
-               call gparc(icart,neigh001) ! 6 - (0, 0, 1)
-               ! Interactions of cells that share axes (12 edges - 6 forward)
-               call gparc(icart,neigh011) ! 5 - (0, 1, 1)
-               call gparc(icart,neigh01m1) ! 6 - (0, 1, -1)
-               call gparc(icart,neigh110) ! 9 - (1, 1, 0)
-               call gparc(icart,neigh101) ! 10 - (1, 0, 1)
-               call gparc(icart,neigh1m10) ! 11 - (1, -1, 0)
-               call gparc(icart,neigh10m1) ! 12 - (1, 0, -1)
-               ! Interactions of cells that share vertices (8 vertices, 4 forward)
-               call gparc(icart,neigh111) ! 1 - (1, 1, 1)
-               call gparc(icart,neigh11m1) ! 2 - (1, 1, -1)
-               call gparc(icart,neigh1m11) ! 3 - (1, -1, 1)
-               call gparc(icart,neigh1m1m1) ! 4 - (1, -1, -1)
+               do ioffset = 2, n_forward_offsets
+                  call gparc(icart,neigh_first(ioffset))
+               end do
             end if
 
             icart = latomnext(icart)

From dcd96911a654f72d3e25f080c4b3775a22a905da Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 11:54:15 +0100
Subject: [PATCH 04/21] Split pair kernels into fast/short/fixed paths and
 prefilter active atoms

---
 src/computef.f90 |  19 +++--
 src/computeg.f90 |  19 +++--
 src/fparc.f90    | 168 ++++++++++++++++++++++++++++-----------
 src/gparc.f90    | 200 +++++++++++++++++++++++++++++++++++------------
 4 files changed, 296 insertions(+), 110 deletions(-)

diff --git a/src/computef.f90 b/src/computef.f90
index 9206b3a..9566633 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -129,19 +129,26 @@ subroutine computef(n,x,f)
          neigh_cell(2) = cell_ind(j + forward_offsets(2,ioffset), ncells(2))
          neigh_cell(3) = cell_ind(k + forward_offsets(3,ioffset), ncells(3))
          neigh_first(ioffset) = latomfirst(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+         do while ( neigh_first(ioffset) > 0 )
+            if ( comptype(ibtype(neigh_first(ioffset))) ) exit
+            neigh_first(ioffset) = latomnext(neigh_first(ioffset))
+         end do
       end do
 
       icart = neigh_first(1)
       do while( icart > 0 )
 
-         if(comptype(ibtype(icart))) then
-            ! Offset #1 is self-cell and uses latomnext(icart) to keep forward-only pairs.
-            f = f + fparc(icart,latomnext(icart))
-            do ioffset = 2, n_forward_offsets
-               f = f + fparc(icart,neigh_first(ioffset))
-            end do
+         if ( .not. comptype(ibtype(icart)) ) then
+            icart = latomnext(icart)
+            cycle
          end if
 
+         ! Offset #1 is self-cell and uses latomnext(icart) to keep forward-only pairs.
+         f = f + fparc(icart,latomnext(icart))
+         do ioffset = 2, n_forward_offsets
+            f = f + fparc(icart,neigh_first(ioffset))
+         end do
+
          icart = latomnext(icart)
       end do
 
diff --git a/src/computeg.f90 b/src/computeg.f90
index 51b2d5f..0e39955 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -118,19 +118,26 @@ subroutine computeg(n,x,g)
             neigh_cell(2) = cell_ind(j + forward_offsets(2,ioffset), ncells(2))
             neigh_cell(3) = cell_ind(k + forward_offsets(3,ioffset), ncells(3))
             neigh_first(ioffset) = latomfirst(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+            do while ( neigh_first(ioffset) > 0 )
+               if ( comptype(ibtype(neigh_first(ioffset))) ) exit
+               neigh_first(ioffset) = latomnext(neigh_first(ioffset))
+            end do
          end do
 
          icart = neigh_first(1)
          do while( icart > 0 )
 
-            if(comptype(ibtype(icart))) then
-               ! Offset #1 is self-cell and uses latomnext(icart) to keep forward-only pairs.
-               call gparc(icart,latomnext(icart))
-               do ioffset = 2, n_forward_offsets
-                  call gparc(icart,neigh_first(ioffset))
-               end do
+            if ( .not. comptype(ibtype(icart)) ) then
+               icart = latomnext(icart)
+               cycle
             end if
 
+            ! Offset #1 is self-cell and uses latomnext(icart) to keep forward-only pairs.
+            call gparc(icart,latomnext(icart))
+            do ioffset = 2, n_forward_offsets
+               call gparc(icart,neigh_first(ioffset))
+            end do
+
             icart = latomnext(icart)
          end do
 
diff --git a/src/fparc.f90 b/src/fparc.f90
index be73625..bff6efe 100644
--- a/src/fparc.f90
+++ b/src/fparc.f90
@@ -23,7 +23,7 @@ double precision function fparc(icart,firstjcart)
    double precision :: dx, dy, dz
    double precision :: xi, yi, zi, ri, rii, fsi, short_ri, short_si
    double precision :: inv_pbcx, inv_pbcy, inv_pbcz
-   logical :: use_short_i
+   logical :: use_short_i, fixed_i
 
    xi = xcart(icart,1)
    yi = xcart(icart,2)
@@ -34,50 +34,78 @@ double precision function fparc(icart,firstjcart)
    short_ri = short_radius(icart)
    short_si = short_radius_scale(icart)
    use_short_i = use_short_radius(icart)
+   fixed_i = fixedatom(icart)
    inv_pbcx = 1.d0 / pbc_length(1)
    inv_pbcy = 1.d0 / pbc_length(2)
    inv_pbcz = 1.d0 / pbc_length(3)
 
    fparc = 0.0d0
-   jcart = firstjcart
-   do while ( jcart > 0 )
-      !
-      ! Cycle if this type is not to be computed
-      !
-      if ( .not. comptype(ibtype(jcart))) then
-         jcart = latomnext(jcart)
-         cycle
-      end if
-      !
-      ! Cycle if the atoms are from the same molecule
-      !
-      if ( ibmol(icart) == ibmol(jcart) .and. &
-         ibtype(icart) == ibtype(jcart) ) then
-         jcart = latomnext(jcart)
-         cycle
-      end if
-      !
-      ! Cycle if both atoms are from fixed molecules
-      !
-      if ( fixedatom(icart) .and. fixedatom(jcart) ) then
+   if ( .not. fixed_i .and. .not. use_short_i ) then
+      ! Fast path: movable atom i and no short-radius term from i.
+      jcart = firstjcart
+      do while ( jcart > 0 )
+         if ( .not. comptype(ibtype(jcart))) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
+         dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
+         dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
+         dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
+         datom = dx*dx + dy*dy + dz*dz
+
+         tol = (ri + radius(jcart))**2
+         if ( datom < tol ) then
+            fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
+            if ( use_short_radius(jcart) ) then
+               short_tol = (short_ri + short_radius(jcart))**2
+               if ( datom < short_tol ) then
+                  short_tol_penalty = datom-short_tol
+                  short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
+                  short_tol_scale = short_tol_scale*(tol**2/short_tol**2)
+                  fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty**2
+               end if
+            end if
+         end if
+         tol = (rii + radius_ini(jcart))**2
+         fdist = dmax1(tol-datom,fdist)
+         if ( move ) then
+            fdist_atom(icart) = dmax1(tol-datom,fdist_atom(icart))
+            fdist_atom(jcart) = dmax1(tol-datom,fdist_atom(jcart))
+         end if
          jcart = latomnext(jcart)
-         cycle
-      end if
-      !
-      ! Otherwise, compute distance and evaluate function for this pair
-      !
-      dx = xi - xcart(jcart,1)
-      dy = yi - xcart(jcart,2)
-      dz = zi - xcart(jcart,3)
-      dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
-      dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
-      dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
-      datom = dx*dx + dy*dy + dz*dz
+      end do
+   else if ( .not. fixed_i ) then
+      ! Short-radius-enabled path for movable atoms.
+      jcart = firstjcart
+      do while ( jcart > 0 )
+         if ( .not. comptype(ibtype(jcart))) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
+         dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
+         dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
+         dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
+         datom = dx*dx + dy*dy + dz*dz
 
-      tol = (ri + radius(jcart))**2
-      if ( datom < tol ) then
-         fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
-         if ( use_short_i .or. use_short_radius(jcart) ) then
+         tol = (ri + radius(jcart))**2
+         if ( datom < tol ) then
+            fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
             short_tol = (short_ri + short_radius(jcart))**2
             if ( datom < short_tol ) then
                short_tol_penalty = datom-short_tol
@@ -86,14 +114,60 @@ double precision function fparc(icart,firstjcart)
                fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty**2
             end if
          end if
-      end if
-      tol = (rii + radius_ini(jcart))**2
-      fdist = dmax1(tol-datom,fdist)
-      if ( move ) then
-         fdist_atom(icart) = dmax1(tol-datom,fdist_atom(icart))
-         fdist_atom(jcart) = dmax1(tol-datom,fdist_atom(jcart))
-      end if
-      jcart = latomnext(jcart)
-   end do
+         tol = (rii + radius_ini(jcart))**2
+         fdist = dmax1(tol-datom,fdist)
+         if ( move ) then
+            fdist_atom(icart) = dmax1(tol-datom,fdist_atom(icart))
+            fdist_atom(jcart) = dmax1(tol-datom,fdist_atom(jcart))
+         end if
+         jcart = latomnext(jcart)
+      end do
+   else
+      ! Fixed-atom path (keeps explicit fixed/fixed exclusion).
+      jcart = firstjcart
+      do while ( jcart > 0 )
+         if ( .not. comptype(ibtype(jcart))) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+         if ( fixedatom(jcart) ) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
+         dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
+         dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
+         dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
+         datom = dx*dx + dy*dy + dz*dz
+
+         tol = (ri + radius(jcart))**2
+         if ( datom < tol ) then
+            fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
+            if ( use_short_i .or. use_short_radius(jcart) ) then
+               short_tol = (short_ri + short_radius(jcart))**2
+               if ( datom < short_tol ) then
+                  short_tol_penalty = datom-short_tol
+                  short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
+                  short_tol_scale = short_tol_scale*(tol**2/short_tol**2)
+                  fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty**2
+               end if
+            end if
+         end if
+         tol = (rii + radius_ini(jcart))**2
+         fdist = dmax1(tol-datom,fdist)
+         if ( move ) then
+            fdist_atom(icart) = dmax1(tol-datom,fdist_atom(icart))
+            fdist_atom(jcart) = dmax1(tol-datom,fdist_atom(jcart))
+         end if
+         jcart = latomnext(jcart)
+      end do
+   end if
 
 end function fparc
diff --git a/src/gparc.f90 b/src/gparc.f90
index 7858520..920ea1b 100644
--- a/src/gparc.f90
+++ b/src/gparc.f90
@@ -22,7 +22,7 @@ subroutine gparc(icart,firstjcart)
    double precision :: dx, dy, dz
    double precision :: xi, yi, zi, ri, fsi, short_ri, short_si
    double precision :: inv_pbcx, inv_pbcy, inv_pbcz
-   logical :: use_short_i
+   logical :: use_short_i, fixed_i
 
    xi = xcart(icart,1)
    yi = xcart(icart,2)
@@ -32,62 +32,101 @@ subroutine gparc(icart,firstjcart)
    short_ri = short_radius(icart)
    short_si = short_radius_scale(icart)
    use_short_i = use_short_radius(icart)
+   fixed_i = fixedatom(icart)
    inv_pbcx = 1.d0 / pbc_length(1)
    inv_pbcy = 1.d0 / pbc_length(2)
    inv_pbcz = 1.d0 / pbc_length(3)
 
-   jcart = firstjcart
-   do while ( jcart > 0 )
-      !
-      ! Cycle if this type is not to be computed
-      !
-      if ( .not. comptype(ibtype(jcart))) then
-         jcart = latomnext(jcart)
-         cycle
-      end if
-      !
-      ! Cycle if the atoms are from the same molecule
-      !
-      if ( ibmol(icart) == ibmol(jcart) .and. &
-         ibtype(icart) == ibtype(jcart) ) then
-         jcart = latomnext(jcart)
-         cycle
-      end if
-      !
-      ! Cycle if both atoms are from fixed molecules
-      !
-      if ( fixedatom(icart) .and. fixedatom(jcart) ) then
+   if ( .not. fixed_i .and. .not. use_short_i ) then
+      ! Fast path: movable atom i and no short-radius term from i.
+      jcart = firstjcart
+      do while ( jcart > 0 )
+         if ( .not. comptype(ibtype(jcart))) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+
+         tol = (ri+radius(jcart))**2
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
+         dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
+         dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
+         dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
+         datom = dx*dx + dy*dy + dz*dz
+         if( datom < tol ) then
+            dtemp = fsi*fscale(jcart) * 4.d0 * (datom - tol)
+            xdiff = dtemp*dx
+            gxcar(icart,1)= gxcar(icart,1) + xdiff
+            gxcar(jcart,1)= gxcar(jcart,1) - xdiff
+            xdiff = dtemp*dy
+            gxcar(icart,2)= gxcar(icart,2) + xdiff
+            gxcar(jcart,2)= gxcar(jcart,2) - xdiff
+            xdiff = dtemp*dz
+            gxcar(icart,3)= gxcar(icart,3) + xdiff
+            gxcar(jcart,3)= gxcar(jcart,3) - xdiff
+            if ( use_short_radius(jcart) ) then
+               short_tol = ( short_ri + short_radius(jcart) )**2
+               if ( datom < short_tol ) then
+                  short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
+                  short_tol_scale = short_tol_scale*( tol**2 / short_tol**2 )
+                  dtemp = fsi*fscale(jcart) * short_tol_scale * 4.d0 * (datom - short_tol)
+                  xdiff = dtemp*dx
+                  gxcar(icart,1)= gxcar(icart,1) + xdiff
+                  gxcar(jcart,1)= gxcar(jcart,1) - xdiff
+                  xdiff = dtemp*dy
+                  gxcar(icart,2)= gxcar(icart,2) + xdiff
+                  gxcar(jcart,2)= gxcar(jcart,2) - xdiff
+                  xdiff = dtemp*dz
+                  gxcar(icart,3)= gxcar(icart,3) + xdiff
+                  gxcar(jcart,3)= gxcar(jcart,3) - xdiff
+               end if
+            end if
+         end if
+
          jcart = latomnext(jcart)
-         cycle
-      end if
-      !
-      ! Otherwise, compute distance and evaluate function for this pair
-      !
-      tol = (ri+radius(jcart))**2
-      dx = xi - xcart(jcart,1)
-      dy = yi - xcart(jcart,2)
-      dz = zi - xcart(jcart,3)
-      dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
-      dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
-      dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
-      datom = dx*dx + dy*dy + dz*dz
-      if( datom < tol ) then
-         dtemp = fsi*fscale(jcart) * 4.d0 * (datom - tol)
-         xdiff = dtemp*dx
-         gxcar(icart,1)= gxcar(icart,1) + xdiff
-         gxcar(jcart,1)= gxcar(jcart,1) - xdiff
-         xdiff = dtemp*dy
-         gxcar(icart,2)= gxcar(icart,2) + xdiff
-         gxcar(jcart,2)= gxcar(jcart,2) - xdiff
-         xdiff = dtemp*dz
-         gxcar(icart,3)= gxcar(icart,3) + xdiff
-         gxcar(jcart,3)= gxcar(jcart,3) - xdiff
-         if ( use_short_i .or. use_short_radius(jcart) ) then
+      end do
+   else if ( .not. fixed_i ) then
+      ! Short-radius-enabled path for movable atoms.
+      jcart = firstjcart
+      do while ( jcart > 0 )
+         if ( .not. comptype(ibtype(jcart))) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+
+         tol = (ri+radius(jcart))**2
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
+         dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
+         dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
+         dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
+         datom = dx*dx + dy*dy + dz*dz
+         if( datom < tol ) then
+            dtemp = fsi*fscale(jcart) * 4.d0 * (datom - tol)
+            xdiff = dtemp*dx
+            gxcar(icart,1)= gxcar(icart,1) + xdiff
+            gxcar(jcart,1)= gxcar(jcart,1) - xdiff
+            xdiff = dtemp*dy
+            gxcar(icart,2)= gxcar(icart,2) + xdiff
+            gxcar(jcart,2)= gxcar(jcart,2) - xdiff
+            xdiff = dtemp*dz
+            gxcar(icart,3)= gxcar(icart,3) + xdiff
+            gxcar(jcart,3)= gxcar(jcart,3) - xdiff
             short_tol = ( short_ri + short_radius(jcart) )**2
             if ( datom < short_tol ) then
                short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
                short_tol_scale = short_tol_scale*( tol**2 / short_tol**2 )
-               dtemp = fsi*fscale(jcart) * 4.d0 * short_tol_scale*(datom - short_tol)
+               dtemp = fsi*fscale(jcart) * short_tol_scale * 4.d0 * (datom - short_tol)
                xdiff = dtemp*dx
                gxcar(icart,1)= gxcar(icart,1) + xdiff
                gxcar(jcart,1)= gxcar(jcart,1) - xdiff
@@ -99,8 +138,67 @@ subroutine gparc(icart,firstjcart)
                gxcar(jcart,3)= gxcar(jcart,3) - xdiff
             end if
          end if
-      end if
-      jcart = latomnext(jcart)
-   end do
+
+         jcart = latomnext(jcart)
+      end do
+   else
+      ! Fixed-atom path (keeps explicit fixed/fixed exclusion).
+      jcart = firstjcart
+      do while ( jcart > 0 )
+         if ( .not. comptype(ibtype(jcart))) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+         if ( fixedatom(jcart) ) then
+            jcart = latomnext(jcart)
+            cycle
+         end if
+
+         tol = (ri+radius(jcart))**2
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
+         dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
+         dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
+         dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
+         datom = dx*dx + dy*dy + dz*dz
+         if( datom < tol ) then
+            dtemp = fsi*fscale(jcart) * 4.d0 * (datom - tol)
+            xdiff = dtemp*dx
+            gxcar(icart,1)= gxcar(icart,1) + xdiff
+            gxcar(jcart,1)= gxcar(jcart,1) - xdiff
+            xdiff = dtemp*dy
+            gxcar(icart,2)= gxcar(icart,2) + xdiff
+            gxcar(jcart,2)= gxcar(jcart,2) - xdiff
+            xdiff = dtemp*dz
+            gxcar(icart,3)= gxcar(icart,3) + xdiff
+            gxcar(jcart,3)= gxcar(jcart,3) - xdiff
+            if ( use_short_i .or. use_short_radius(jcart) ) then
+               short_tol = ( short_ri + short_radius(jcart) )**2
+               if ( datom < short_tol ) then
+                  short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
+                  short_tol_scale = short_tol_scale*( tol**2 / short_tol**2 )
+                  dtemp = fsi*fscale(jcart) * short_tol_scale * 4.d0 * (datom - short_tol)
+                  xdiff = dtemp*dx
+                  gxcar(icart,1)= gxcar(icart,1) + xdiff
+                  gxcar(jcart,1)= gxcar(jcart,1) - xdiff
+                  xdiff = dtemp*dy
+                  gxcar(icart,2)= gxcar(icart,2) + xdiff
+                  gxcar(jcart,2)= gxcar(jcart,2) - xdiff
+                  xdiff = dtemp*dz
+                  gxcar(icart,3)= gxcar(icart,3) + xdiff
+                  gxcar(jcart,3)= gxcar(jcart,3) - xdiff
+               end if
+            end if
+         end if
+
+         jcart = latomnext(jcart)
+      end do
+   end if
+
    return
 end subroutine gparc

From 5775ac5abf2c9eda5a43256e703d1579bfe7033e Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 12:06:58 +0100
Subject: [PATCH 05/21] Add hot-path scalar streams for pairwise kernels

---
 src/compute_data.f90 | 31 ++++++++++++++++++++++++++++
 src/computef.f90     |  7 ++++++-
 src/computeg.f90     |  7 ++++++-
 src/fparc.f90        | 48 +++++++++++++++++++++++---------------------
 src/gparc.f90        | 48 +++++++++++++++++++++++---------------------
 src/initial.f90      |  2 ++
 src/setsizes.f90     |  3 ++-
 7 files changed, 97 insertions(+), 49 deletions(-)

diff --git a/src/compute_data.f90 b/src/compute_data.f90
index 3bce7c0..d19cd31 100644
--- a/src/compute_data.f90
+++ b/src/compute_data.f90
@@ -18,6 +18,9 @@ module compute_data
    integer, allocatable :: ityperest(:) ! (maxrest)
    integer, allocatable :: ibmol(:) ! (ntotat)
    integer, allocatable :: ibtype(:) ! (ntotat)
+   integer, allocatable :: ibmol_hot(:) ! packed stream mirror for hot paths
+   integer, allocatable :: ibtype_hot(:) ! packed stream mirror for hot paths
+   integer, allocatable :: fixed_short_marker(:) ! 0: movable/no-short, 1: movable/short, 2: fixed/no-short, 3: fixed/short
 
    double precision :: scale, scale2
    double precision :: fdist, frest
@@ -26,6 +29,7 @@ module compute_data
    double precision :: radmax
 
    double precision, allocatable :: xcart(:,:) ! (ntotat,3)
+   double precision, allocatable :: x(:), y(:), z(:) ! hot scalar streams for xcart
    double precision, allocatable :: coor(:,:) ! (ntotat,3)
    double precision, allocatable :: restpars(:,:) ! (maxrest,9)
    double precision, allocatable :: rot_bound(:,:,:) ! (ntype,3,2)
@@ -59,4 +63,31 @@ module compute_data
    integer, allocatable :: lcellnext(:) ! (ncells(1)*ncells(2)*ncells(3))
    logical, allocatable :: empty_cell(:,:,:) ! (ncells(1),ncells(2),ncells(3))
 
+contains
+
+   subroutine refresh_hot_buffers_full()
+      implicit none
+
+      if ( .not. allocated(x) ) return
+      x(:) = xcart(:,1)
+      y(:) = xcart(:,2)
+      z(:) = xcart(:,3)
+      ibtype_hot(:) = ibtype(:)
+      ibmol_hot(:) = ibmol(:)
+      fixed_short_marker(:) = merge(2, 0, fixedatom(:)) + merge(1, 0, use_short_radius(:))
+   end subroutine refresh_hot_buffers_full
+
+   subroutine refresh_hot_buffers_atom(icart)
+      implicit none
+      integer, intent(in) :: icart
+
+      if ( .not. allocated(x) ) return
+      x(icart) = xcart(icart,1)
+      y(icart) = xcart(icart,2)
+      z(icart) = xcart(icart,3)
+      ibtype_hot(icart) = ibtype(icart)
+      ibmol_hot(icart) = ibmol(icart)
+      fixed_short_marker(icart) = merge(2, 0, fixedatom(icart)) + merge(1, 0, use_short_radius(icart))
+   end subroutine refresh_hot_buffers_atom
+
 end module compute_data
diff --git a/src/computef.f90 b/src/computef.f90
index 9566633..20fb101 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -34,7 +34,10 @@ subroutine computef(n,x,f)
 
    ! Reset cells
 
-   if(.not.init1) call resetcells()
+   if(.not.init1) then
+      call resetcells()
+      call refresh_hot_buffers_full()
+   end if
 
    ! Transform baricenter and angles into cartesian coordinates
    ! Computes cartesian coordinates from vector x and coor
@@ -72,6 +75,7 @@ subroutine computef(n,x,f)
             ! Computing the cartesian coordinates for this atom
 
             call compcart(xcart(icart,1:3),xcm,coor(idatom,1:3),v1,v2,v3)
+            call refresh_hot_buffers_atom(icart)
 
             ! Adding to f the value relative to constraints for this atom
 
@@ -99,6 +103,7 @@ subroutine computef(n,x,f)
 
                ibtype(icart) = itype
                ibmol(icart) = imol
+               call refresh_hot_buffers_atom(icart)
 
             end if
 
diff --git a/src/computeg.f90 b/src/computeg.f90
index 0e39955..b3c086f 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -35,7 +35,10 @@ subroutine computeg(n,x,g)
 
    ! Reset cells
 
-   if(.not.init1) call resetcells()
+   if(.not.init1) then
+      call resetcells()
+      call refresh_hot_buffers_full()
+   end if
 
    ! Transform baricenter and angles into cartesian coordinates
 
@@ -65,6 +68,7 @@ subroutine computeg(n,x,g)
             icart = icart + 1
             idatom = idatom + 1
             call compcart(xcart(icart,1:3),xcm,coor(idatom,1:3),v1,v2,v3)
+            call refresh_hot_buffers_atom(icart)
 
             ! Gradient relative to the wall distace
             do iratcount = 1, nratom(icart)
@@ -88,6 +92,7 @@ subroutine computeg(n,x,g)
 
                ibtype(icart) = itype
                ibmol(icart) = imol
+               call refresh_hot_buffers_atom(icart)
             end if
 
          end do
diff --git a/src/fparc.f90 b/src/fparc.f90
index bff6efe..09e6b84 100644
--- a/src/fparc.f90
+++ b/src/fparc.f90
@@ -23,18 +23,20 @@ double precision function fparc(icart,firstjcart)
    double precision :: dx, dy, dz
    double precision :: xi, yi, zi, ri, rii, fsi, short_ri, short_si
    double precision :: inv_pbcx, inv_pbcy, inv_pbcz
+   integer :: marker_i
    logical :: use_short_i, fixed_i
 
-   xi = xcart(icart,1)
-   yi = xcart(icart,2)
-   zi = xcart(icart,3)
+   xi = x(icart)
+   yi = y(icart)
+   zi = z(icart)
    ri = radius(icart)
    rii = radius_ini(icart)
    fsi = fscale(icart)
    short_ri = short_radius(icart)
    short_si = short_radius_scale(icart)
-   use_short_i = use_short_radius(icart)
-   fixed_i = fixedatom(icart)
+   marker_i = fixed_short_marker(icart)
+   use_short_i = iand(marker_i,1) /= 0
+   fixed_i = marker_i >= 2
    inv_pbcx = 1.d0 / pbc_length(1)
    inv_pbcy = 1.d0 / pbc_length(2)
    inv_pbcz = 1.d0 / pbc_length(3)
@@ -44,18 +46,18 @@ double precision function fparc(icart,firstjcart)
       ! Fast path: movable atom i and no short-radius term from i.
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype(jcart))) then
+         if ( .not. comptype(ibtype_hot(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
-         dx = xi - xcart(jcart,1)
-         dy = yi - xcart(jcart,2)
-         dz = zi - xcart(jcart,3)
+         dx = xi - x(jcart)
+         dy = yi - y(jcart)
+         dz = zi - z(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -64,7 +66,7 @@ double precision function fparc(icart,firstjcart)
          tol = (ri + radius(jcart))**2
          if ( datom < tol ) then
             fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
-            if ( use_short_radius(jcart) ) then
+            if ( iand(fixed_short_marker(jcart),1) /= 0 ) then
                short_tol = (short_ri + short_radius(jcart))**2
                if ( datom < short_tol ) then
                   short_tol_penalty = datom-short_tol
@@ -86,18 +88,18 @@ double precision function fparc(icart,firstjcart)
       ! Short-radius-enabled path for movable atoms.
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype(jcart))) then
+         if ( .not. comptype(ibtype_hot(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
-         dx = xi - xcart(jcart,1)
-         dy = yi - xcart(jcart,2)
-         dz = zi - xcart(jcart,3)
+         dx = xi - x(jcart)
+         dy = yi - y(jcart)
+         dz = zi - z(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -126,22 +128,22 @@ double precision function fparc(icart,firstjcart)
       ! Fixed-atom path (keeps explicit fixed/fixed exclusion).
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype(jcart))) then
+         if ( .not. comptype(ibtype_hot(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( fixedatom(jcart) ) then
+         if ( fixed_short_marker(jcart) >= 2 ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
-         dx = xi - xcart(jcart,1)
-         dy = yi - xcart(jcart,2)
-         dz = zi - xcart(jcart,3)
+         dx = xi - x(jcart)
+         dy = yi - y(jcart)
+         dz = zi - z(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -150,7 +152,7 @@ double precision function fparc(icart,firstjcart)
          tol = (ri + radius(jcart))**2
          if ( datom < tol ) then
             fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
-            if ( use_short_i .or. use_short_radius(jcart) ) then
+            if ( use_short_i .or. iand(fixed_short_marker(jcart),1) /= 0 ) then
                short_tol = (short_ri + short_radius(jcart))**2
                if ( datom < short_tol ) then
                   short_tol_penalty = datom-short_tol
diff --git a/src/gparc.f90 b/src/gparc.f90
index 920ea1b..1525476 100644
--- a/src/gparc.f90
+++ b/src/gparc.f90
@@ -22,17 +22,19 @@ subroutine gparc(icart,firstjcart)
    double precision :: dx, dy, dz
    double precision :: xi, yi, zi, ri, fsi, short_ri, short_si
    double precision :: inv_pbcx, inv_pbcy, inv_pbcz
+   integer :: marker_i
    logical :: use_short_i, fixed_i
 
-   xi = xcart(icart,1)
-   yi = xcart(icart,2)
-   zi = xcart(icart,3)
+   xi = x(icart)
+   yi = y(icart)
+   zi = z(icart)
    ri = radius(icart)
    fsi = fscale(icart)
    short_ri = short_radius(icart)
    short_si = short_radius_scale(icart)
-   use_short_i = use_short_radius(icart)
-   fixed_i = fixedatom(icart)
+   marker_i = fixed_short_marker(icart)
+   use_short_i = iand(marker_i,1) /= 0
+   fixed_i = marker_i >= 2
    inv_pbcx = 1.d0 / pbc_length(1)
    inv_pbcy = 1.d0 / pbc_length(2)
    inv_pbcz = 1.d0 / pbc_length(3)
@@ -41,19 +43,19 @@ subroutine gparc(icart,firstjcart)
       ! Fast path: movable atom i and no short-radius term from i.
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype(jcart))) then
+         if ( .not. comptype(ibtype_hot(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
          tol = (ri+radius(jcart))**2
-         dx = xi - xcart(jcart,1)
-         dy = yi - xcart(jcart,2)
-         dz = zi - xcart(jcart,3)
+         dx = xi - x(jcart)
+         dy = yi - y(jcart)
+         dz = zi - z(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -69,7 +71,7 @@ subroutine gparc(icart,firstjcart)
             xdiff = dtemp*dz
             gxcar(icart,3)= gxcar(icart,3) + xdiff
             gxcar(jcart,3)= gxcar(jcart,3) - xdiff
-            if ( use_short_radius(jcart) ) then
+            if ( iand(fixed_short_marker(jcart),1) /= 0 ) then
                short_tol = ( short_ri + short_radius(jcart) )**2
                if ( datom < short_tol ) then
                   short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
@@ -94,19 +96,19 @@ subroutine gparc(icart,firstjcart)
       ! Short-radius-enabled path for movable atoms.
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype(jcart))) then
+         if ( .not. comptype(ibtype_hot(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
          tol = (ri+radius(jcart))**2
-         dx = xi - xcart(jcart,1)
-         dy = yi - xcart(jcart,2)
-         dz = zi - xcart(jcart,3)
+         dx = xi - x(jcart)
+         dy = yi - y(jcart)
+         dz = zi - z(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -145,23 +147,23 @@ subroutine gparc(icart,firstjcart)
       ! Fixed-atom path (keeps explicit fixed/fixed exclusion).
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype(jcart))) then
+         if ( .not. comptype(ibtype_hot(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol(icart) == ibmol(jcart) .and. ibtype(icart) == ibtype(jcart) ) then
+         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( fixedatom(jcart) ) then
+         if ( fixed_short_marker(jcart) >= 2 ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
          tol = (ri+radius(jcart))**2
-         dx = xi - xcart(jcart,1)
-         dy = yi - xcart(jcart,2)
-         dz = zi - xcart(jcart,3)
+         dx = xi - x(jcart)
+         dy = yi - y(jcart)
+         dz = zi - z(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -177,7 +179,7 @@ subroutine gparc(icart,firstjcart)
             xdiff = dtemp*dz
             gxcar(icart,3)= gxcar(icart,3) + xdiff
             gxcar(jcart,3)= gxcar(jcart,3) - xdiff
-            if ( use_short_i .or. use_short_radius(jcart) ) then
+            if ( use_short_i .or. iand(fixed_short_marker(jcart),1) /= 0 ) then
                short_tol = ( short_ri + short_radius(jcart) )**2
                if ( datom < short_tol ) then
                   short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
diff --git a/src/initial.f90 b/src/initial.f90
index 2f8237e..daa1147 100644
--- a/src/initial.f90
+++ b/src/initial.f90
@@ -134,6 +134,7 @@ subroutine initial(n,x)
             idatom = idatom + 1
             call compcart(xcart(icart,1:3),xcm,coor(idatom,1:3),v1,v2,v3)
             fixedatom(icart) = .false.
+            call refresh_hot_buffers_atom(icart)
          end do
       end do
    end do
@@ -148,6 +149,7 @@ subroutine initial(n,x)
             xcart(icart,2) = coor(idfatom,2)
             xcart(icart,3) = coor(idfatom,3)
             fixedatom(icart) = .true.
+            call refresh_hot_buffers_atom(icart)
             ! Check if fixed molecules are compatible with PBC given
             if (using_pbc) then
                do i = 1, 3
diff --git a/src/setsizes.f90 b/src/setsizes.f90
index a73337e..6d368a8 100644
--- a/src/setsizes.f90
+++ b/src/setsizes.f90
@@ -325,7 +325,8 @@ subroutine setsizes()
    ! Allocate arrays depending on ntotat, nn, maxrest, and mrperatom
 
    allocate(nratom(ntotat),iratom(ntotat,mrperatom),ibmol(ntotat),&
-      ibtype(ntotat),xcart(ntotat,3),coor(ntotat,3),&
+      ibtype(ntotat),ibmol_hot(ntotat),ibtype_hot(ntotat),fixed_short_marker(ntotat),&
+      xcart(ntotat,3),x(ntotat),y(ntotat),z(ntotat),coor(ntotat,3),&
       radius(ntotat),radius_ini(ntotat),fscale(ntotat),&
       use_short_radius(ntotat), short_radius(ntotat), short_radius_scale(ntotat),&
       gxcar(ntotat,3),&

From 7a0703f2c131c42ec4bbe1bd03c629d29f18c638 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 12:14:20 +0100
Subject: [PATCH 06/21] Add per-cell radius bounds and pair-distance pruning

---
 src/compute_data.f90 |    2 +
 src/computef.f90     |   32 ++
 src/computeg.f90     |   31 ++
 src/initial.f90      | 1124 +++++++++++++++++++++---------------------
 src/resetcells.f90   |   94 ++--
 5 files changed, 680 insertions(+), 603 deletions(-)

diff --git a/src/compute_data.f90 b/src/compute_data.f90
index d19cd31..b9a1935 100644
--- a/src/compute_data.f90
+++ b/src/compute_data.f90
@@ -62,6 +62,8 @@ module compute_data
    integer :: lcellfirst
    integer, allocatable :: lcellnext(:) ! (ncells(1)*ncells(2)*ncells(3))
    logical, allocatable :: empty_cell(:,:,:) ! (ncells(1),ncells(2),ncells(3))
+   double precision, allocatable :: cell_max_radius(:,:,:) ! (ncells(1),ncells(2),ncells(3))
+   double precision, allocatable :: cell_max_short_radius(:,:,:) ! (ncells(1),ncells(2),ncells(3))
 
 contains
 
diff --git a/src/computef.f90 b/src/computef.f90
index 20fb101..f053930 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -20,6 +20,8 @@ subroutine computef(n,x,f)
    integer :: ioffset
    integer :: neigh_first(n_forward_offsets)
 
+   double precision :: min_cell_dist2, max_reach, reg_reach, short_reach
+
    double precision :: v1(3), v2(3), v3(3)
    double precision :: x(n)
    double precision :: f,fparc,fplus
@@ -93,6 +95,11 @@ subroutine computef(n,x,f)
                latomnext(icart) = latomfirst(cell(1),cell(2),cell(3))
                latomfirst(cell(1),cell(2),cell(3)) = icart
 
+               cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
+               if ( use_short_radius(icart) ) then
+                  cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_short_radius(cell(1),cell(2),cell(3)), short_radius(icart))
+               end if
+
                ! cell with atoms linked list
                if ( empty_cell(cell(1),cell(2),cell(3)) ) then
                   empty_cell(cell(1),cell(2),cell(3)) = .false.
@@ -138,6 +145,14 @@ subroutine computef(n,x,f)
             if ( comptype(ibtype(neigh_first(ioffset))) ) exit
             neigh_first(ioffset) = latomnext(neigh_first(ioffset))
          end do
+
+         if ( neigh_first(ioffset) <= 0 ) cycle
+
+         min_cell_dist2 = cell_pair_min_dist2(cell, neigh_cell)
+         reg_reach = cell_max_radius(cell(1),cell(2),cell(3)) + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+         short_reach = cell_max_short_radius(cell(1),cell(2),cell(3)) + cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+         max_reach = dmax1(reg_reach, short_reach)
+         if ( min_cell_dist2 > max_reach*max_reach ) neigh_first(ioffset) = 0
       end do
 
       icart = neigh_first(1)
@@ -161,4 +176,21 @@ subroutine computef(n,x,f)
    end do
 
    return
+contains
+
+   double precision function cell_pair_min_dist2(cell_a, cell_b)
+      integer, intent(in) :: cell_a(3), cell_b(3)
+      integer :: idim
+      double precision :: center_a, center_b, delta, gap
+
+      cell_pair_min_dist2 = 0.d0
+      do idim = 1, 3
+         center_a = pbc_min(idim) + (dble(cell_a(idim)) - 0.5d0) * cell_length(idim)
+         center_b = pbc_min(idim) + (dble(cell_b(idim)) - 0.5d0) * cell_length(idim)
+         delta = dabs(delta_vector(center_a, center_b, pbc_length(idim)))
+         gap = dmax1(0.d0, delta - cell_length(idim))
+         cell_pair_min_dist2 = cell_pair_min_dist2 + gap*gap
+      end do
+   end function cell_pair_min_dist2
+
 end subroutine computef
diff --git a/src/computeg.f90 b/src/computeg.f90
index b3c086f..84ed5b7 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -22,6 +22,7 @@ subroutine computeg(n,x,g)
    integer :: iratcount, ioffset
    integer :: neigh_first(n_forward_offsets)
 
+   double precision :: min_cell_dist2, max_reach, reg_reach, short_reach
    double precision :: x(n), g(n)
    double precision :: dv1beta(3), dv1gama(3), dv1teta(3),&
       dv2beta(3), dv2gama(3), dv2teta(3),&
@@ -82,6 +83,11 @@ subroutine computeg(n,x,g)
                latomnext(icart) = latomfirst(cell(1),cell(2),cell(3))
                latomfirst(cell(1),cell(2),cell(3)) = icart
 
+               cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
+               if ( use_short_radius(icart) ) then
+                  cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_short_radius(cell(1),cell(2),cell(3)), short_radius(icart))
+               end if
+
                ! cell with atoms linked list
                if ( empty_cell(cell(1),cell(2),cell(3))) then
                   empty_cell(cell(1),cell(2),cell(3)) = .false.
@@ -127,6 +133,14 @@ subroutine computeg(n,x,g)
                if ( comptype(ibtype(neigh_first(ioffset))) ) exit
                neigh_first(ioffset) = latomnext(neigh_first(ioffset))
             end do
+
+            if ( neigh_first(ioffset) <= 0 ) cycle
+
+            min_cell_dist2 = cell_pair_min_dist2(cell, neigh_cell)
+            reg_reach = cell_max_radius(cell(1),cell(2),cell(3)) + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+            short_reach = cell_max_short_radius(cell(1),cell(2),cell(3)) + cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+            max_reach = dmax1(reg_reach, short_reach)
+            if ( min_cell_dist2 > max_reach*max_reach ) neigh_first(ioffset) = 0
          end do
 
          icart = neigh_first(1)
@@ -250,4 +264,21 @@ subroutine computeg(n,x,g)
    end do
 
    return
+contains
+
+   double precision function cell_pair_min_dist2(cell_a, cell_b)
+      integer, intent(in) :: cell_a(3), cell_b(3)
+      integer :: idim
+      double precision :: center_a, center_b, delta, gap
+
+      cell_pair_min_dist2 = 0.d0
+      do idim = 1, 3
+         center_a = pbc_min(idim) + (dble(cell_a(idim)) - 0.5d0) * cell_length(idim)
+         center_b = pbc_min(idim) + (dble(cell_b(idim)) - 0.5d0) * cell_length(idim)
+         delta = dabs(delta_vector(center_a, center_b, pbc_length(idim)))
+         gap = dmax1(0.d0, delta - cell_length(idim))
+         cell_pair_min_dist2 = cell_pair_min_dist2 + gap*gap
+      end do
+   end function cell_pair_min_dist2
+
 end subroutine computeg
diff --git a/src/initial.f90 b/src/initial.f90
index daa1147..3a4a6de 100644
--- a/src/initial.f90
+++ b/src/initial.f90
@@ -1,560 +1,564 @@
-!
-!  Written by Leandro Martínez, 2009-2011.
-!  Copyright (c) 2009-2018, Leandro Martínez, Jose Mario Martinez,
-!  Ernesto G. Birgin.
-!
-! Subroutine initial: Subroutine that reset parameters and
-!                     builds the initial point
-!
-
-subroutine initial(n,x)
-
-   use exit_codes
-   use sizes
-   use cell_indexing, only: setcell
-   use compute_data
-   use input, only : randini, ntype_with_fixed, fix, chkgrad, avoidoverlap,&
-      discale, precision, sidemax, restart_from, input_itype,&
-      nloop0_type
-   use usegencan
-   use ahestetic
-   use pbc
-   implicit none
-   integer :: n, i, j, idatom, iatom, ilubar, ilugan, icart, itype, &
-      imol, ntry, cell(3), ic, jc, kc, ifatom, &
-      idfatom, iftype, jatom, ioerr, max_guess_try
-
-   double precision :: x(n), beta, gamma, theta, &
-      fx, cell_side, rnd, xrnd(3), v1(3), v2(3), v3(3), xcm(3)
-   double precision :: scale_rnd_guess
-   double precision, parameter :: twopi = 8.d0*datan(1.d0)
-   double precision, allocatable :: cm_min(:,:), cm_max(:,:)
-
-   logical :: overlap, movebadprint, hasbad
-
-   character(len=strl) :: record
-
-   ! We need to initialize the move logical variable
-   move = .false.
-
-   ! Default status of the function evaluation
-   init1 = .false.
-   lcellfirst = 0
-
-   ! Initialize the comptype logical array
-   comptype(1:ntype_with_fixed) = .true.
-
-   ! Penalty factors for the objective function relative to restrictions
-   ! Default values: scale = 1.d2, scale2 = 1.d1
-
-   scale = 1.d0
-   scale2 = 1.d-2
-   call tobar()
-
-   ! Compute maximum internal distance within each type of molecule
-
-   do itype = 1, ntype
-      dmax(itype) = 0.d0
-      idatom = idfirst(itype) - 1
-      do iatom = 1, natoms(itype) - 1
-         do jatom = iatom + 1, natoms(itype)
-            dmax(itype) = dmax1 ( dmax(itype),&
-               (coor(idatom+iatom,1)-coor(idatom+jatom,1))**2+&
-               (coor(idatom+iatom,2)-coor(idatom+jatom,2))**2+&
-               (coor(idatom+iatom,3)-coor(idatom+jatom,3))**2 )
-         end do
-      end do
-      dmax(itype) = dsqrt(dmax(itype))
-      write(*,*) ' Maximum internal distance of type ',itype,': ',&
-         dmax(itype)
-      if(dmax(itype).eq.0.) dmax(itype) = 1.d0
-   end do
-
-   ! Maximum size of the system: if you system is very large (about
-   ! 80 nm wide), increase the sidemax parameter.
-   ! Otherwise, the packing can be slow and unsucesful
-   allocate(cm_min(ntype,3), cm_max(ntype,3))
-   x(:) = 0.d0
-   call restmol(1,0,n,x,fx,.true.)
-   sizemin(1:3) = x(1:3) - sidemax
-   sizemax(1:3) = x(1:3) + sidemax
-   write(*,*) ' All atoms must be within these coordinates: '
-   write(*,*) '  x: [ ', sizemin(1),', ', sizemax(1), ' ] '
-   write(*,*) '  y: [ ', sizemin(2),', ', sizemax(2), ' ] '
-   write(*,*) '  z: [ ', sizemin(3),', ', sizemax(3), ' ] '
-   write(*,*) ' If the system is larger than this, increase the sidemax parameter. '
-
-   ! Create first aleatory guess
-
-   i = 0
-   j = ntotmol*3
-   do itype = 1, ntype
-      do imol = 1, nmols(itype)
-         x(i+1) = sizemin(1) + rnd()*(sizemax(1)-sizemin(1))
-         x(i+2) = sizemin(2) + rnd()*(sizemax(2)-sizemin(2))
-         x(i+3) = sizemin(3) + rnd()*(sizemax(3)-sizemin(3))
-         if ( constrain_rot(itype,1) ) then
-            x(j+1) = ( rot_bound(itype,1,1) - dabs(rot_bound(itype,1,2)) ) + &
-               2.d0*rnd()*dabs(rot_bound(itype,1,2))
-         else
-            x(j+1) = twopi*rnd()
-         end if
-         if ( constrain_rot(itype,2) ) then
-            x(j+2) = ( rot_bound(itype,2,1) - dabs(rot_bound(itype,2,2)) ) + &
-               2.d0*rnd()*dabs(rot_bound(itype,2,2))
-         else
-            x(j+2) = twopi*rnd()
-         end if
-         if ( constrain_rot(itype,3) ) then
-            x(j+3) = ( rot_bound(itype,3,1) - dabs(rot_bound(itype,3,2)) ) + &
-               2.d0*rnd()*dabs(rot_bound(itype,3,2))
-         else
-            x(j+3) = twopi*rnd()
-         end if
-         i = i + 3
-         j = j + 3
-      end do
-   end do
-
-   ! Initialize cartesian coordinate array for the first time
-
-   ilubar = 0
-   ilugan = ntotmol*3
-   icart = 0
-   do itype = 1, ntype
-      do imol = 1, nmols(itype)
-         xcm = x(ilubar+1:ilubar+3)
-         beta = x(ilugan+1)
-         gamma = x(ilugan+2)
-         theta = x(ilugan+3)
-         call eulerrmat(beta,gamma,theta,v1,v2,v3)
-         idatom = idfirst(itype) - 1
-         do iatom = 1, natoms(itype)
-            icart = icart + 1
-            idatom = idatom + 1
-            call compcart(xcart(icart,1:3),xcm,coor(idatom,1:3),v1,v2,v3)
-            fixedatom(icart) = .false.
-            call refresh_hot_buffers_atom(icart)
-         end do
-      end do
-   end do
-   if(fix) then
-      icart = ntotat - nfixedat
-      do iftype = ntype + 1, ntype_with_fixed
-         idfatom = idfirst(iftype) - 1
-         do ifatom = 1, natoms(iftype)
-            idfatom = idfatom + 1
-            icart = icart + 1
-            xcart(icart,1) = coor(idfatom,1)
-            xcart(icart,2) = coor(idfatom,2)
-            xcart(icart,3) = coor(idfatom,3)
-            fixedatom(icart) = .true.
-            call refresh_hot_buffers_atom(icart)
-            ! Check if fixed molecules are compatible with PBC given
-            if (using_pbc) then
-               do i = 1, 3
-                  if (xcart(icart, i) < pbc_min(i) .or. xcart(icart, i) > pbc_max(i)) then
-                     write(*,*) "ERROR: Fixed molecule are outside the PBC box:"
-                     write(*,*) "   Atom: ", ifatom, " of molecule: ", input_itype(iftype), " - coordinate: ", i
-                     write(*,*) "  ", xcart(icart, i), " not in [", pbc_min(i), ", ", pbc_max(i), "]"
-                     write(*,*) "(after translating/rotation the fixed molecule with the given orientation)"
-                     stop exit_code_input_error
-                  end if
-               end do
-            end if
-         end do
-      end do
-   end if
-
-   ! Performing some steps of optimization for the restrictions only
-
-   write(*,hash3_line)
-   write(*,"('  Building initial approximation ... ' )")
-   write(*,hash3_line)
-   write(*,"('  Adjusting initial point to fit the constraints ')")
-   write(*,dash2_line)
-   init1 = .true.
-   call swaptype(n,x,itype,0) ! Initialize swap arrays
-   itype = 0
-   do while( itype <= ntype )
-      itype = itype + 1
-      if ( itype <= ntype ) then
-         call swaptype(n,x,itype,1) ! Set arrays for this type
-      else
-         call swaptype(n,x,itype,3) ! Restore arrays if itype = ntype + 1
-         exit
-      end if
-      write(*,dash3_line)
-      write(*,*) ' Molecules of type: ', input_itype(itype)
-      write(*,*)
-      i = 0
-      hasbad = .true.
-      call computef(n,x,fx)
-      do while( frest > precision .and. i.le. nloop0_type(itype)-1 .and. hasbad)
-         i = i + 1
-         write(*,prog1_line)
-         call pgencan(n,x,fx)
-         call computef(n,x,fx)
-         if(frest > precision) then
-            write(*,"( a,i6,a,i6 )")'  Fixing bad orientations ... ', i,' of ', nloop0_type(itype)
-            movebadprint = .true.
-            call movebad(n,x,fx,movebadprint)
-         end if
-      end do
-      write(*,*)
-      write(*,*) ' Restraint-only function value: ', fx
-      write(*,*) ' Maximum violation of the restraints: ', frest
-
-      call swaptype(n,x,itype,2) ! Save current type results
-
-      if( hasbad .and. frest > precision ) then
-         write(*,*) ' ERROR: Packmol was unable to put the molecules'
-         write(*,*) '        in the desired regions even without'
-         write(*,*) '        considering distance tolerances. '
-         write(*,*) '        Probably there is something wrong with'
-         write(*,*) '        the constraints, since it seems that'
-         write(*,*) '        the molecules cannot satisfy them at'
-         write(*,*) '        at all. '
-         write(*,*) '        Please check the spatial constraints and'
-         write(*,*) '        try again.'
-         if ( i .ge. nloop0_type(itype)-1 ) then
-         end if
-         write(*,*) ' >The maximum number of cycles (',nloop0_type(itype),') was achieved.'
-         write(*,*) '  You may try increasing it with the',' nloop0 keyword, as in: nloop0 1000 '
-         stop exit_code_failed_to_converge
-      end if
-   end do
-   init1 = .false.
-
-   ! Rescaling sizemin and sizemax in order to build the patch of cells
-
-   write(*,dash3_line)
-   write(*,*) ' Rescaling maximum and minimum coordinates... '
-   sizemin(1:3) = 1.d20
-   sizemax(1:3) = -1.d20
-   ! Maximum and minimum coordinates of fixed molecules
-   icart = ntotat - nfixedat
-   do itype = ntype + 1, ntype_with_fixed
-      do imol = 1, nmols(itype)
-         do iatom = 1, natoms(itype)
-            icart = icart + 1
-            sizemin(1) = dmin1(sizemin(1),xcart(icart,1))
-            sizemin(2) = dmin1(sizemin(2),xcart(icart,2))
-            sizemin(3) = dmin1(sizemin(3),xcart(icart,3))
-            sizemax(1) = dmax1(sizemax(1),xcart(icart,1))
-            sizemax(2) = dmax1(sizemax(2),xcart(icart,2))
-            sizemax(3) = dmax1(sizemax(3),xcart(icart,3))
-         end do
-      end do
-   end do
-   icart = 0
-   do itype = 1, ntype
-      do imol = 1, nmols(itype)
-         do iatom = 1, natoms(itype)
-            icart = icart + 1
-            sizemin(1) = dmin1(sizemin(1),xcart(icart,1))
-            sizemin(2) = dmin1(sizemin(2),xcart(icart,2))
-            sizemin(3) = dmin1(sizemin(3),xcart(icart,3))
-            sizemax(1) = dmax1(sizemax(1),xcart(icart,1))
-            sizemax(2) = dmax1(sizemax(2),xcart(icart,2))
-            sizemax(3) = dmax1(sizemax(3),xcart(icart,3))
-         end do
-      end do
-   end do
-   write(*,*) ' Mininum and maximum coordinates after constraint fitting: '
-   write(*,*) '  x: [ ', sizemin(1),', ', sizemax(1), ' ] '
-   write(*,*) '  y: [ ', sizemin(2),', ', sizemax(2), ' ] '
-   write(*,*) '  z: [ ', sizemin(3),', ', sizemax(3), ' ] '
-   sizemin = sizemin - 1.1d0 * radmax
-   sizemax = sizemax + 1.1d0 * radmax
-   ! When *not* using PBC, actually PBCs are used, but particles
-   ! wont interact across the PBC because there is an empty layer
-   ! of cells, given by the shift in the sizemin and sizemax dimensions,
-   ! above defined. 
-   if (.not. using_pbc) then
-      pbc_min = sizemin
-      pbc_max = sizemax
-      pbc_length = pbc_max - pbc_min
-   end if 
-
-   ! Computing the size of the patches
-   write(*,*) ' Computing size of patches... '
-   cell_side = discale * (1.01d0 * radmax)
-   do i = 1, 3
-      ncells(i) = max(1,floor(pbc_length(i)/cell_side))
-      cell_length(i) = pbc_length(i) / ncells(i)
-   end do
-   write(*,*) ' Number of cells in each direction and cell sides: '
-   write(*,*) '  x: ', ncells(1), ' cells of size ', cell_length(1)
-   write(*,*) '  y: ', ncells(2), ' cells of size ', cell_length(2)
-   write(*,*) '  z: ', ncells(3), ' cells of size ', cell_length(3)
-   write(*,"(a, 3(f10.5))") '  Cell-system length: ', pbc_length(1:3) 
-
-   ! Allocate arrays depending on the number of cells
-   allocate(latomfirst(ncells(1),ncells(2),ncells(3)))
-   allocate(latomfix(ncells(1),ncells(2),ncells(3)))
-   allocate(lcellnext(ncells(1)*ncells(2)*ncells(3)))
-   allocate(empty_cell(ncells(1),ncells(2),ncells(3)))
-
-   ! Reseting linked lists arrays
-   latomfix(:,:,:) = 0
-   latomfirst(:,:,:) = 0
-   latomnext(:) = 0
-   empty_cell(:,:,:) = .true.
-
-   ! If there are fixed molecules, add them permanently to the latomfix array
-   if(fix) then
-      write(*,*) ' Add fixed molecules to permanent arrays... '
-      icart = ntotat - nfixedat
-      do iftype = ntype + 1, ntype_with_fixed
-         do ifatom = 1, natoms(iftype)
-            icart = icart + 1
-            call setcell(xcart(icart,:),cell)
-            latomnext(icart) = latomfix(cell(1),cell(2),cell(3))
-            latomfix(cell(1),cell(2),cell(3)) = icart
-            latomfirst(cell(1),cell(2),cell(3)) = icart
-            ibtype(icart) = iftype
-            ibmol(icart) = 1
-         end do
-      end do
-   end if
-
-   ! Reseting mass centers to be within the regions
-
-   write(*,*) ' Reseting center of mass... '
-   cm_min = 1.d20
-   cm_max = -1.d20
-   icart = 0
-   do itype = 1, ntype
-      do imol = 1, nmols(itype)
-         xcm(:) = 0.d0
-         do iatom = 1, natoms(itype)
-            icart = icart + 1
-            xcm(:) = xcm(:) + xcart(icart,1:3)
-         end do
-         xcm(:) = xcm(:) / natoms(itype)
-         cm_min(itype,:) = dmin1(cm_min(itype,:),xcm(:))
-         cm_max(itype,:) = dmax1(cm_max(itype,:),xcm(:))
-      end do
-   end do
-
-   ! If there is a restart file for all system, read it
-
-   if ( restart_from(0) /= 'none' ) then
-      record = restart_from(0)
-      write(*,*) ' Restarting all system from file: ', trim(adjustl(record))
-      open(10,file=restart_from(0),status='old',action='read',iostat=ioerr)
-      ilubar = 0
-      ilugan = ntotmol*3
-      do i = 1, ntotmol
-         read(10,*,iostat=ioerr) x(ilubar+1), x(ilubar+2), x(ilubar+3), &
-            x(ilugan+1), x(ilugan+2), x(ilugan+3)
-         if ( ioerr /= 0 ) then
-            write(*,*) ' ERROR: Could not read restart file: ', trim(adjustl(record))
-            stop exit_code_open_file
-         end if
-         ilubar = ilubar + 3
-         ilugan = ilugan + 3
-      end do
-      close(10)
-      return
-   end if
-
-   ! Building random initial point
-
-   write(*,dash3_line)
-   write(*,*) ' Setting initial trial coordinates ... '
-   write(*,dash2_line)
-
-   if ( chkgrad ) then
-      write(*,*) ' For checking gradient, will set avoidoverlap to false. '
-      avoidoverlap = .false.
-      max_guess_try = 1
-      scale_rnd_guess = 1.5d0 ! move guess outside the constraints/box
-   else
-      max_guess_try = 20
-      scale_rnd_guess = 1.d0
-   end if
-
-   ! Setting random center of mass coordinates, within size limits
-
-   ilubar = 0
-   do itype = 1, ntype
-      if ( restart_from(itype) /= 'none' ) then
-         ilubar = ilubar + nmols(itype)*3
-         cycle
-      end if
-      do imol = 1, nmols(itype)
-         if ( .not. avoidoverlap ) then
-            fx = 1.d0
-            ntry = 0
-            do while((fx > precision) .and. (ntry < max_guess_try))
-               ntry = ntry + 1
-               call random_number(xrnd)
-               x(ilubar+1:ilubar+3) = &
-                  cm_min(itype,:) + xrnd(:)*(cm_max(itype,:)-cm_min(itype,:)) * scale_rnd_guess
-               call restmol(itype,ilubar,n,x,fx,.false.)
-            end do
-         else
-            fx = 1.d0
-            ntry = 0
-            overlap = .false.
-            do while((overlap .or. fx > precision) .and. (ntry < max_guess_try))
-               overlap = .false.
-               ntry = ntry + 1
-               call random_number(xrnd)
-               x(ilubar+1:ilubar+3) = &
-                  cm_min(itype,:) + xrnd(:)*(cm_max(itype,:)-cm_min(itype,:)) * scale_rnd_guess
-               if(fix) then
-                  call setcell(x(ilubar+1:ilubar+3),cell)
-                  icell: do ic = -1, 1 
-                     do jc = -1, 1
-                        do kc = -1, 1
-                           if(latomfix(cell_ind(cell(1)+ic,ncells(1)),&
-                                       cell_ind(cell(2)+jc,ncells(2)),&
-                                       cell_ind(cell(3)+kc,ncells(3)) &
-                                      ) /= 0) then
-                              overlap = .true.
-                              exit icell
-                           end if
-                        end do
-                     end do
-                  end do icell
-               end if
-               if(.not.overlap) call restmol(itype,ilubar,n,x,fx,.false.)
-            end do
-         end if
-         ilubar = ilubar + 3
-      end do
-   end do
-
-   ! Setting random angles, except if the rotations were constrained
-
-   ilugan = ntotmol*3
-   do itype = 1, ntype
-      if ( restart_from(itype) /= 'none' ) then
-         ilugan = ilugan + nmols(itype)*3
-         cycle
-      end if
-      do imol = 1, nmols(itype)
-         if ( constrain_rot(itype,1) ) then
-            x(ilugan+1) = ( rot_bound(itype,1,1) - dabs(rot_bound(itype,1,2)) ) + &
-               2.d0*rnd()*dabs(rot_bound(itype,1,2))
-         else
-            x(ilugan+1) = twopi*rnd()
-         end if
-         if ( constrain_rot(itype,2) ) then
-            x(ilugan+2) = ( rot_bound(itype,2,1) - dabs(rot_bound(itype,2,2)) ) + &
-               2.d0*rnd()*dabs(rot_bound(itype,2,2))
-         else
-            x(ilugan+2) = twopi*rnd()
-         end if
-         if ( constrain_rot(itype,3) ) then
-            x(ilugan+3) = ( rot_bound(itype,3,1) - dabs(rot_bound(itype,3,2)) ) + &
-               2.d0*rnd()*dabs(rot_bound(itype,3,2))
-         else
-            x(ilugan+3) = twopi*rnd()
-         end if
-         ilugan = ilugan + 3
-      end do
-   end do
-
-   ! Compare analytical and finite-difference gradients
-
-   if(chkgrad) then
-      cell_side = discale * (1.01d0 * radmax)
-      do i = 1, 3
-         ncells(i) = max(1,floor(pbc_length(i)/cell_side))
-         cell_length(i) = pbc_length(i) / ncells(i)
-      end do
-      call comparegrad(n,x)
-      stop
-   end if
-
-   !
-   ! Reading restart files of specific molecule types, if available
-   !
-
-   ilubar = 0
-   ilugan = ntotmol*3
-   do itype = 1, ntype
-      if ( restart_from(itype) /= 'none' ) then
-         record = restart_from(itype)
-         write(*,dash3_line)
-         write(*,*) ' Molecules of type: ', input_itype(itype)
-         write(*,*) ' Will restart coordinates from: ', trim(adjustl(record))
-         open(10,file=record,status='old',action='read',iostat=ioerr)
-         if ( ioerr /= 0 ) then
-            write(*,*) ' ERROR: Could not open restart file: ', trim(adjustl(record))
-            stop exit_code_open_file
-         end if
-         do i = 1, nmols(itype)
-            read(10,*,iostat=ioerr) x(ilubar+1), x(ilubar+2), x(ilubar+3), &
-               x(ilugan+1), x(ilugan+2), x(ilugan+3)
-            if ( ioerr /= 0 ) then
-               write(*,*) ' ERROR: Could not read restart file: ', trim(adjustl(record))
-               stop exit_code_open_file
-            end if
-            ilubar = ilubar + 3
-            ilugan = ilugan + 3
-         end do
-         close(10)
-         call swaptype(n,x,itype,0) ! Initialize swap arrays
-         call swaptype(n,x,itype,1) ! Set arrays for this type
-         call computef(n,x,fx)
-         write(*,*) ' Maximum violation of the restraints: ', frest
-         write(*,*) ' Maximum violation of minimum atom distances: ', fdist
-         call swaptype(n,x,itype,3) ! Restore all-molecule arrays
-      else
-         ilubar = ilubar + nmols(itype)*3
-         ilugan = ilugan + nmols(itype)*3
-      end if
-   end do
-
-   ! Return with current random point (not default)
-
-   if(randini) return
-
-   ! Adjusting current point to fit the constraints
-
-   init1 = .true.
-   call swaptype(n,x,itype,0) ! Initialize swap arrays
-   itype = 0
-   do while( itype <= ntype )
-      itype = itype + 1
-      if ( itype == ntype + 1 ) then
-         call swaptype(n,x,itype,3) ! Restore arrays for all molecules
-         exit
-      end if
-      if ( restart_from(itype) /= 'none' ) cycle
-      call swaptype(n,x,itype,1) ! Set arrays for this type
-      write(*,dash3_line)
-      write(*,*) ' Molecules of type: ', input_itype(itype)
-      write(*,*) ' Adjusting random positions to fit the constraints. '
-      i = 0
-      call computef(n,x,fx)
-      hasbad = .true.
-      do while( frest > precision .and. i <= nloop0_type(itype)-1 .and. hasbad)
-         i = i + 1
-         write(*,prog1_line)
-         call pgencan(n,x,fx)
-         call computef(n,x,fx)
-         if(frest > precision) then
-            write(*,"( a,i6,a,i6 )")'  Fixing bad orientations ... ', i,' of ', nloop0_type(itype)
-            movebadprint = .true.
-            call movebad(n,x,fx,movebadprint)
-         end if
-      end do
-      write(*,*) ' Restraint-only function value: ', fx
-      write(*,*) ' Maximum violation of the restraints: ', frest
-      call swaptype(n,x,itype,2) ! Save results for this type
-   end do
-   init1 = .false.
-   write(*,hash3_line)
-
-   ! Deallocate allocated arrays
-   deallocate(cm_min, cm_max)
-
-   return
-end subroutine initial
-
+!
+!  Written by Leandro Martínez, 2009-2011.
+!  Copyright (c) 2009-2018, Leandro Martínez, Jose Mario Martinez,
+!  Ernesto G. Birgin.
+!
+! Subroutine initial: Subroutine that reset parameters and
+!                     builds the initial point
+!
+
+subroutine initial(n,x)
+
+   use exit_codes
+   use sizes
+   use cell_indexing, only: setcell
+   use compute_data
+   use input, only : randini, ntype_with_fixed, fix, chkgrad, avoidoverlap,&
+      discale, precision, sidemax, restart_from, input_itype,&
+      nloop0_type
+   use usegencan
+   use ahestetic
+   use pbc
+   implicit none
+   integer :: n, i, j, idatom, iatom, ilubar, ilugan, icart, itype, &
+      imol, ntry, cell(3), ic, jc, kc, ifatom, &
+      idfatom, iftype, jatom, ioerr, max_guess_try
+
+   double precision :: x(n), beta, gamma, theta, &
+      fx, cell_side, rnd, xrnd(3), v1(3), v2(3), v3(3), xcm(3)
+   double precision :: scale_rnd_guess
+   double precision, parameter :: twopi = 8.d0*datan(1.d0)
+   double precision, allocatable :: cm_min(:,:), cm_max(:,:)
+
+   logical :: overlap, movebadprint, hasbad
+
+   character(len=strl) :: record
+
+   ! We need to initialize the move logical variable
+   move = .false.
+
+   ! Default status of the function evaluation
+   init1 = .false.
+   lcellfirst = 0
+
+   ! Initialize the comptype logical array
+   comptype(1:ntype_with_fixed) = .true.
+
+   ! Penalty factors for the objective function relative to restrictions
+   ! Default values: scale = 1.d2, scale2 = 1.d1
+
+   scale = 1.d0
+   scale2 = 1.d-2
+   call tobar()
+
+   ! Compute maximum internal distance within each type of molecule
+
+   do itype = 1, ntype
+      dmax(itype) = 0.d0
+      idatom = idfirst(itype) - 1
+      do iatom = 1, natoms(itype) - 1
+         do jatom = iatom + 1, natoms(itype)
+            dmax(itype) = dmax1 ( dmax(itype),&
+               (coor(idatom+iatom,1)-coor(idatom+jatom,1))**2+&
+               (coor(idatom+iatom,2)-coor(idatom+jatom,2))**2+&
+               (coor(idatom+iatom,3)-coor(idatom+jatom,3))**2 )
+         end do
+      end do
+      dmax(itype) = dsqrt(dmax(itype))
+      write(*,*) ' Maximum internal distance of type ',itype,': ',&
+         dmax(itype)
+      if(dmax(itype).eq.0.) dmax(itype) = 1.d0
+   end do
+
+   ! Maximum size of the system: if you system is very large (about
+   ! 80 nm wide), increase the sidemax parameter.
+   ! Otherwise, the packing can be slow and unsucesful
+   allocate(cm_min(ntype,3), cm_max(ntype,3))
+   x(:) = 0.d0
+   call restmol(1,0,n,x,fx,.true.)
+   sizemin(1:3) = x(1:3) - sidemax
+   sizemax(1:3) = x(1:3) + sidemax
+   write(*,*) ' All atoms must be within these coordinates: '
+   write(*,*) '  x: [ ', sizemin(1),', ', sizemax(1), ' ] '
+   write(*,*) '  y: [ ', sizemin(2),', ', sizemax(2), ' ] '
+   write(*,*) '  z: [ ', sizemin(3),', ', sizemax(3), ' ] '
+   write(*,*) ' If the system is larger than this, increase the sidemax parameter. '
+
+   ! Create first aleatory guess
+
+   i = 0
+   j = ntotmol*3
+   do itype = 1, ntype
+      do imol = 1, nmols(itype)
+         x(i+1) = sizemin(1) + rnd()*(sizemax(1)-sizemin(1))
+         x(i+2) = sizemin(2) + rnd()*(sizemax(2)-sizemin(2))
+         x(i+3) = sizemin(3) + rnd()*(sizemax(3)-sizemin(3))
+         if ( constrain_rot(itype,1) ) then
+            x(j+1) = ( rot_bound(itype,1,1) - dabs(rot_bound(itype,1,2)) ) + &
+               2.d0*rnd()*dabs(rot_bound(itype,1,2))
+         else
+            x(j+1) = twopi*rnd()
+         end if
+         if ( constrain_rot(itype,2) ) then
+            x(j+2) = ( rot_bound(itype,2,1) - dabs(rot_bound(itype,2,2)) ) + &
+               2.d0*rnd()*dabs(rot_bound(itype,2,2))
+         else
+            x(j+2) = twopi*rnd()
+         end if
+         if ( constrain_rot(itype,3) ) then
+            x(j+3) = ( rot_bound(itype,3,1) - dabs(rot_bound(itype,3,2)) ) + &
+               2.d0*rnd()*dabs(rot_bound(itype,3,2))
+         else
+            x(j+3) = twopi*rnd()
+         end if
+         i = i + 3
+         j = j + 3
+      end do
+   end do
+
+   ! Initialize cartesian coordinate array for the first time
+
+   ilubar = 0
+   ilugan = ntotmol*3
+   icart = 0
+   do itype = 1, ntype
+      do imol = 1, nmols(itype)
+         xcm = x(ilubar+1:ilubar+3)
+         beta = x(ilugan+1)
+         gamma = x(ilugan+2)
+         theta = x(ilugan+3)
+         call eulerrmat(beta,gamma,theta,v1,v2,v3)
+         idatom = idfirst(itype) - 1
+         do iatom = 1, natoms(itype)
+            icart = icart + 1
+            idatom = idatom + 1
+            call compcart(xcart(icart,1:3),xcm,coor(idatom,1:3),v1,v2,v3)
+            fixedatom(icart) = .false.
+            call refresh_hot_buffers_atom(icart)
+         end do
+      end do
+   end do
+   if(fix) then
+      icart = ntotat - nfixedat
+      do iftype = ntype + 1, ntype_with_fixed
+         idfatom = idfirst(iftype) - 1
+         do ifatom = 1, natoms(iftype)
+            idfatom = idfatom + 1
+            icart = icart + 1
+            xcart(icart,1) = coor(idfatom,1)
+            xcart(icart,2) = coor(idfatom,2)
+            xcart(icart,3) = coor(idfatom,3)
+            fixedatom(icart) = .true.
+            call refresh_hot_buffers_atom(icart)
+            ! Check if fixed molecules are compatible with PBC given
+            if (using_pbc) then
+               do i = 1, 3
+                  if (xcart(icart, i) < pbc_min(i) .or. xcart(icart, i) > pbc_max(i)) then
+                     write(*,*) "ERROR: Fixed molecule are outside the PBC box:"
+                     write(*,*) "   Atom: ", ifatom, " of molecule: ", input_itype(iftype), " - coordinate: ", i
+                     write(*,*) "  ", xcart(icart, i), " not in [", pbc_min(i), ", ", pbc_max(i), "]"
+                     write(*,*) "(after translating/rotation the fixed molecule with the given orientation)"
+                     stop exit_code_input_error
+                  end if
+               end do
+            end if
+         end do
+      end do
+   end if
+
+   ! Performing some steps of optimization for the restrictions only
+
+   write(*,hash3_line)
+   write(*,"('  Building initial approximation ... ' )")
+   write(*,hash3_line)
+   write(*,"('  Adjusting initial point to fit the constraints ')")
+   write(*,dash2_line)
+   init1 = .true.
+   call swaptype(n,x,itype,0) ! Initialize swap arrays
+   itype = 0
+   do while( itype <= ntype )
+      itype = itype + 1
+      if ( itype <= ntype ) then
+         call swaptype(n,x,itype,1) ! Set arrays for this type
+      else
+         call swaptype(n,x,itype,3) ! Restore arrays if itype = ntype + 1
+         exit
+      end if
+      write(*,dash3_line)
+      write(*,*) ' Molecules of type: ', input_itype(itype)
+      write(*,*)
+      i = 0
+      hasbad = .true.
+      call computef(n,x,fx)
+      do while( frest > precision .and. i.le. nloop0_type(itype)-1 .and. hasbad)
+         i = i + 1
+         write(*,prog1_line)
+         call pgencan(n,x,fx)
+         call computef(n,x,fx)
+         if(frest > precision) then
+            write(*,"( a,i6,a,i6 )")'  Fixing bad orientations ... ', i,' of ', nloop0_type(itype)
+            movebadprint = .true.
+            call movebad(n,x,fx,movebadprint)
+         end if
+      end do
+      write(*,*)
+      write(*,*) ' Restraint-only function value: ', fx
+      write(*,*) ' Maximum violation of the restraints: ', frest
+
+      call swaptype(n,x,itype,2) ! Save current type results
+
+      if( hasbad .and. frest > precision ) then
+         write(*,*) ' ERROR: Packmol was unable to put the molecules'
+         write(*,*) '        in the desired regions even without'
+         write(*,*) '        considering distance tolerances. '
+         write(*,*) '        Probably there is something wrong with'
+         write(*,*) '        the constraints, since it seems that'
+         write(*,*) '        the molecules cannot satisfy them at'
+         write(*,*) '        at all. '
+         write(*,*) '        Please check the spatial constraints and'
+         write(*,*) '        try again.'
+         if ( i .ge. nloop0_type(itype)-1 ) then
+         end if
+         write(*,*) ' >The maximum number of cycles (',nloop0_type(itype),') was achieved.'
+         write(*,*) '  You may try increasing it with the',' nloop0 keyword, as in: nloop0 1000 '
+         stop exit_code_failed_to_converge
+      end if
+   end do
+   init1 = .false.
+
+   ! Rescaling sizemin and sizemax in order to build the patch of cells
+
+   write(*,dash3_line)
+   write(*,*) ' Rescaling maximum and minimum coordinates... '
+   sizemin(1:3) = 1.d20
+   sizemax(1:3) = -1.d20
+   ! Maximum and minimum coordinates of fixed molecules
+   icart = ntotat - nfixedat
+   do itype = ntype + 1, ntype_with_fixed
+      do imol = 1, nmols(itype)
+         do iatom = 1, natoms(itype)
+            icart = icart + 1
+            sizemin(1) = dmin1(sizemin(1),xcart(icart,1))
+            sizemin(2) = dmin1(sizemin(2),xcart(icart,2))
+            sizemin(3) = dmin1(sizemin(3),xcart(icart,3))
+            sizemax(1) = dmax1(sizemax(1),xcart(icart,1))
+            sizemax(2) = dmax1(sizemax(2),xcart(icart,2))
+            sizemax(3) = dmax1(sizemax(3),xcart(icart,3))
+         end do
+      end do
+   end do
+   icart = 0
+   do itype = 1, ntype
+      do imol = 1, nmols(itype)
+         do iatom = 1, natoms(itype)
+            icart = icart + 1
+            sizemin(1) = dmin1(sizemin(1),xcart(icart,1))
+            sizemin(2) = dmin1(sizemin(2),xcart(icart,2))
+            sizemin(3) = dmin1(sizemin(3),xcart(icart,3))
+            sizemax(1) = dmax1(sizemax(1),xcart(icart,1))
+            sizemax(2) = dmax1(sizemax(2),xcart(icart,2))
+            sizemax(3) = dmax1(sizemax(3),xcart(icart,3))
+         end do
+      end do
+   end do
+   write(*,*) ' Mininum and maximum coordinates after constraint fitting: '
+   write(*,*) '  x: [ ', sizemin(1),', ', sizemax(1), ' ] '
+   write(*,*) '  y: [ ', sizemin(2),', ', sizemax(2), ' ] '
+   write(*,*) '  z: [ ', sizemin(3),', ', sizemax(3), ' ] '
+   sizemin = sizemin - 1.1d0 * radmax
+   sizemax = sizemax + 1.1d0 * radmax
+   ! When *not* using PBC, actually PBCs are used, but particles
+   ! wont interact across the PBC because there is an empty layer
+   ! of cells, given by the shift in the sizemin and sizemax dimensions,
+   ! above defined. 
+   if (.not. using_pbc) then
+      pbc_min = sizemin
+      pbc_max = sizemax
+      pbc_length = pbc_max - pbc_min
+   end if 
+
+   ! Computing the size of the patches
+   write(*,*) ' Computing size of patches... '
+   cell_side = discale * (1.01d0 * radmax)
+   do i = 1, 3
+      ncells(i) = max(1,floor(pbc_length(i)/cell_side))
+      cell_length(i) = pbc_length(i) / ncells(i)
+   end do
+   write(*,*) ' Number of cells in each direction and cell sides: '
+   write(*,*) '  x: ', ncells(1), ' cells of size ', cell_length(1)
+   write(*,*) '  y: ', ncells(2), ' cells of size ', cell_length(2)
+   write(*,*) '  z: ', ncells(3), ' cells of size ', cell_length(3)
+   write(*,"(a, 3(f10.5))") '  Cell-system length: ', pbc_length(1:3) 
+
+   ! Allocate arrays depending on the number of cells
+   allocate(latomfirst(ncells(1),ncells(2),ncells(3)))
+   allocate(latomfix(ncells(1),ncells(2),ncells(3)))
+   allocate(lcellnext(ncells(1)*ncells(2)*ncells(3)))
+   allocate(empty_cell(ncells(1),ncells(2),ncells(3)))
+   allocate(cell_max_radius(ncells(1),ncells(2),ncells(3)))
+   allocate(cell_max_short_radius(ncells(1),ncells(2),ncells(3)))
+
+   ! Reseting linked lists arrays
+   latomfix(:,:,:) = 0
+   latomfirst(:,:,:) = 0
+   latomnext(:) = 0
+   empty_cell(:,:,:) = .true.
+   cell_max_radius(:,:,:) = 0.d0
+   cell_max_short_radius(:,:,:) = 0.d0
+
+   ! If there are fixed molecules, add them permanently to the latomfix array
+   if(fix) then
+      write(*,*) ' Add fixed molecules to permanent arrays... '
+      icart = ntotat - nfixedat
+      do iftype = ntype + 1, ntype_with_fixed
+         do ifatom = 1, natoms(iftype)
+            icart = icart + 1
+            call setcell(xcart(icart,:),cell)
+            latomnext(icart) = latomfix(cell(1),cell(2),cell(3))
+            latomfix(cell(1),cell(2),cell(3)) = icart
+            latomfirst(cell(1),cell(2),cell(3)) = icart
+            ibtype(icart) = iftype
+            ibmol(icart) = 1
+         end do
+      end do
+   end if
+
+   ! Reseting mass centers to be within the regions
+
+   write(*,*) ' Reseting center of mass... '
+   cm_min = 1.d20
+   cm_max = -1.d20
+   icart = 0
+   do itype = 1, ntype
+      do imol = 1, nmols(itype)
+         xcm(:) = 0.d0
+         do iatom = 1, natoms(itype)
+            icart = icart + 1
+            xcm(:) = xcm(:) + xcart(icart,1:3)
+         end do
+         xcm(:) = xcm(:) / natoms(itype)
+         cm_min(itype,:) = dmin1(cm_min(itype,:),xcm(:))
+         cm_max(itype,:) = dmax1(cm_max(itype,:),xcm(:))
+      end do
+   end do
+
+   ! If there is a restart file for all system, read it
+
+   if ( restart_from(0) /= 'none' ) then
+      record = restart_from(0)
+      write(*,*) ' Restarting all system from file: ', trim(adjustl(record))
+      open(10,file=restart_from(0),status='old',action='read',iostat=ioerr)
+      ilubar = 0
+      ilugan = ntotmol*3
+      do i = 1, ntotmol
+         read(10,*,iostat=ioerr) x(ilubar+1), x(ilubar+2), x(ilubar+3), &
+            x(ilugan+1), x(ilugan+2), x(ilugan+3)
+         if ( ioerr /= 0 ) then
+            write(*,*) ' ERROR: Could not read restart file: ', trim(adjustl(record))
+            stop exit_code_open_file
+         end if
+         ilubar = ilubar + 3
+         ilugan = ilugan + 3
+      end do
+      close(10)
+      return
+   end if
+
+   ! Building random initial point
+
+   write(*,dash3_line)
+   write(*,*) ' Setting initial trial coordinates ... '
+   write(*,dash2_line)
+
+   if ( chkgrad ) then
+      write(*,*) ' For checking gradient, will set avoidoverlap to false. '
+      avoidoverlap = .false.
+      max_guess_try = 1
+      scale_rnd_guess = 1.5d0 ! move guess outside the constraints/box
+   else
+      max_guess_try = 20
+      scale_rnd_guess = 1.d0
+   end if
+
+   ! Setting random center of mass coordinates, within size limits
+
+   ilubar = 0
+   do itype = 1, ntype
+      if ( restart_from(itype) /= 'none' ) then
+         ilubar = ilubar + nmols(itype)*3
+         cycle
+      end if
+      do imol = 1, nmols(itype)
+         if ( .not. avoidoverlap ) then
+            fx = 1.d0
+            ntry = 0
+            do while((fx > precision) .and. (ntry < max_guess_try))
+               ntry = ntry + 1
+               call random_number(xrnd)
+               x(ilubar+1:ilubar+3) = &
+                  cm_min(itype,:) + xrnd(:)*(cm_max(itype,:)-cm_min(itype,:)) * scale_rnd_guess
+               call restmol(itype,ilubar,n,x,fx,.false.)
+            end do
+         else
+            fx = 1.d0
+            ntry = 0
+            overlap = .false.
+            do while((overlap .or. fx > precision) .and. (ntry < max_guess_try))
+               overlap = .false.
+               ntry = ntry + 1
+               call random_number(xrnd)
+               x(ilubar+1:ilubar+3) = &
+                  cm_min(itype,:) + xrnd(:)*(cm_max(itype,:)-cm_min(itype,:)) * scale_rnd_guess
+               if(fix) then
+                  call setcell(x(ilubar+1:ilubar+3),cell)
+                  icell: do ic = -1, 1 
+                     do jc = -1, 1
+                        do kc = -1, 1
+                           if(latomfix(cell_ind(cell(1)+ic,ncells(1)),&
+                                       cell_ind(cell(2)+jc,ncells(2)),&
+                                       cell_ind(cell(3)+kc,ncells(3)) &
+                                      ) /= 0) then
+                              overlap = .true.
+                              exit icell
+                           end if
+                        end do
+                     end do
+                  end do icell
+               end if
+               if(.not.overlap) call restmol(itype,ilubar,n,x,fx,.false.)
+            end do
+         end if
+         ilubar = ilubar + 3
+      end do
+   end do
+
+   ! Setting random angles, except if the rotations were constrained
+
+   ilugan = ntotmol*3
+   do itype = 1, ntype
+      if ( restart_from(itype) /= 'none' ) then
+         ilugan = ilugan + nmols(itype)*3
+         cycle
+      end if
+      do imol = 1, nmols(itype)
+         if ( constrain_rot(itype,1) ) then
+            x(ilugan+1) = ( rot_bound(itype,1,1) - dabs(rot_bound(itype,1,2)) ) + &
+               2.d0*rnd()*dabs(rot_bound(itype,1,2))
+         else
+            x(ilugan+1) = twopi*rnd()
+         end if
+         if ( constrain_rot(itype,2) ) then
+            x(ilugan+2) = ( rot_bound(itype,2,1) - dabs(rot_bound(itype,2,2)) ) + &
+               2.d0*rnd()*dabs(rot_bound(itype,2,2))
+         else
+            x(ilugan+2) = twopi*rnd()
+         end if
+         if ( constrain_rot(itype,3) ) then
+            x(ilugan+3) = ( rot_bound(itype,3,1) - dabs(rot_bound(itype,3,2)) ) + &
+               2.d0*rnd()*dabs(rot_bound(itype,3,2))
+         else
+            x(ilugan+3) = twopi*rnd()
+         end if
+         ilugan = ilugan + 3
+      end do
+   end do
+
+   ! Compare analytical and finite-difference gradients
+
+   if(chkgrad) then
+      cell_side = discale * (1.01d0 * radmax)
+      do i = 1, 3
+         ncells(i) = max(1,floor(pbc_length(i)/cell_side))
+         cell_length(i) = pbc_length(i) / ncells(i)
+      end do
+      call comparegrad(n,x)
+      stop
+   end if
+
+   !
+   ! Reading restart files of specific molecule types, if available
+   !
+
+   ilubar = 0
+   ilugan = ntotmol*3
+   do itype = 1, ntype
+      if ( restart_from(itype) /= 'none' ) then
+         record = restart_from(itype)
+         write(*,dash3_line)
+         write(*,*) ' Molecules of type: ', input_itype(itype)
+         write(*,*) ' Will restart coordinates from: ', trim(adjustl(record))
+         open(10,file=record,status='old',action='read',iostat=ioerr)
+         if ( ioerr /= 0 ) then
+            write(*,*) ' ERROR: Could not open restart file: ', trim(adjustl(record))
+            stop exit_code_open_file
+         end if
+         do i = 1, nmols(itype)
+            read(10,*,iostat=ioerr) x(ilubar+1), x(ilubar+2), x(ilubar+3), &
+               x(ilugan+1), x(ilugan+2), x(ilugan+3)
+            if ( ioerr /= 0 ) then
+               write(*,*) ' ERROR: Could not read restart file: ', trim(adjustl(record))
+               stop exit_code_open_file
+            end if
+            ilubar = ilubar + 3
+            ilugan = ilugan + 3
+         end do
+         close(10)
+         call swaptype(n,x,itype,0) ! Initialize swap arrays
+         call swaptype(n,x,itype,1) ! Set arrays for this type
+         call computef(n,x,fx)
+         write(*,*) ' Maximum violation of the restraints: ', frest
+         write(*,*) ' Maximum violation of minimum atom distances: ', fdist
+         call swaptype(n,x,itype,3) ! Restore all-molecule arrays
+      else
+         ilubar = ilubar + nmols(itype)*3
+         ilugan = ilugan + nmols(itype)*3
+      end if
+   end do
+
+   ! Return with current random point (not default)
+
+   if(randini) return
+
+   ! Adjusting current point to fit the constraints
+
+   init1 = .true.
+   call swaptype(n,x,itype,0) ! Initialize swap arrays
+   itype = 0
+   do while( itype <= ntype )
+      itype = itype + 1
+      if ( itype == ntype + 1 ) then
+         call swaptype(n,x,itype,3) ! Restore arrays for all molecules
+         exit
+      end if
+      if ( restart_from(itype) /= 'none' ) cycle
+      call swaptype(n,x,itype,1) ! Set arrays for this type
+      write(*,dash3_line)
+      write(*,*) ' Molecules of type: ', input_itype(itype)
+      write(*,*) ' Adjusting random positions to fit the constraints. '
+      i = 0
+      call computef(n,x,fx)
+      hasbad = .true.
+      do while( frest > precision .and. i <= nloop0_type(itype)-1 .and. hasbad)
+         i = i + 1
+         write(*,prog1_line)
+         call pgencan(n,x,fx)
+         call computef(n,x,fx)
+         if(frest > precision) then
+            write(*,"( a,i6,a,i6 )")'  Fixing bad orientations ... ', i,' of ', nloop0_type(itype)
+            movebadprint = .true.
+            call movebad(n,x,fx,movebadprint)
+         end if
+      end do
+      write(*,*) ' Restraint-only function value: ', fx
+      write(*,*) ' Maximum violation of the restraints: ', frest
+      call swaptype(n,x,itype,2) ! Save results for this type
+   end do
+   init1 = .false.
+   write(*,hash3_line)
+
+   ! Deallocate allocated arrays
+   deallocate(cm_min, cm_max)
+
+   return
+end subroutine initial
+
diff --git a/src/resetcells.f90 b/src/resetcells.f90
index f3e9ba7..dc05f93 100644
--- a/src/resetcells.f90
+++ b/src/resetcells.f90
@@ -1,43 +1,51 @@
-!
-!  Written by Leandro Martínez, 2009-2011.
-!  Copyright (c) 2009-2018, Leandro Martínez, Jose Mario Martinez,
-!  Ernesto G. Birgin.
-!
-! Subroutine resetcells: Subroutine that resets the occupancy of
-!                        linked cell cells
-!
-
-subroutine resetcells()
-   use input, only: fix, ntype_with_fixed
-   use cell_indexing, only: icell_to_cell, index_cell, setcell
-   use compute_data, only: latomfirst, lcellfirst, lcellnext, empty_cell, ncells, &
-                           ntype, natoms, ntotat, nfixedat, xcart, latomnext
-   implicit none
-   integer :: cell(3), icell, iftype, icart, ifatom
-
-   ! Reset data for cells that contain fixed atom
-   lcellnext(:) = 0
-   lcellfirst = 0
-   latomfirst(:,:,:) = 0
-   latomnext(:) = 0
-   empty_cell(:,:,:) = .true.
-   if(fix) then
-      icart = ntotat - nfixedat
-      do iftype = ntype + 1, ntype_with_fixed
-         do ifatom = 1, natoms(iftype)
-            icart = icart + 1
-            call setcell(xcart(icart,:),cell)
-            latomnext(icart) = latomfirst(cell(1),cell(2),cell(3))
-            latomfirst(cell(1),cell(2),cell(3)) = icart
-            if ( empty_cell(cell(1),cell(2),cell(3)) ) then
-               empty_cell(cell(1),cell(2),cell(3)) = .false.
-               icell = index_cell(cell,ncells)
-               lcellnext(icell) = lcellfirst
-               lcellfirst = icell
-            end if
-         end do
-      end do
-   end if
-
-end subroutine resetcells
-
+!
+!  Written by Leandro Martínez, 2009-2011.
+!  Copyright (c) 2009-2018, Leandro Martínez, Jose Mario Martinez,
+!  Ernesto G. Birgin.
+!
+! Subroutine resetcells: Subroutine that resets the occupancy of
+!                        linked cell cells
+!
+
+subroutine resetcells()
+   use input, only: fix, ntype_with_fixed
+   use cell_indexing, only: icell_to_cell, index_cell, setcell
+   use compute_data, only: latomfirst, lcellfirst, lcellnext, empty_cell, ncells, &
+                           ntype, natoms, ntotat, nfixedat, xcart, latomnext, radius, &
+                           use_short_radius, short_radius, cell_max_radius, cell_max_short_radius
+   implicit none
+   integer :: cell(3), icell, iftype, icart, ifatom
+
+   ! Reset data for cells that contain fixed atom
+   lcellnext(:) = 0
+   lcellfirst = 0
+   latomfirst(:,:,:) = 0
+   latomnext(:) = 0
+   empty_cell(:,:,:) = .true.
+   cell_max_radius(:,:,:) = 0.d0
+   cell_max_short_radius(:,:,:) = 0.d0
+   if(fix) then
+      icart = ntotat - nfixedat
+      do iftype = ntype + 1, ntype_with_fixed
+         do ifatom = 1, natoms(iftype)
+            icart = icart + 1
+            call setcell(xcart(icart,:),cell)
+            latomnext(icart) = latomfirst(cell(1),cell(2),cell(3))
+            latomfirst(cell(1),cell(2),cell(3)) = icart
+            cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
+            if ( use_short_radius(icart) ) then
+               cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_short_radius(cell(1),cell(2),cell(3)), short_radius(icart))
+            end if
+
+            if ( empty_cell(cell(1),cell(2),cell(3)) ) then
+               empty_cell(cell(1),cell(2),cell(3)) = .false.
+               icell = index_cell(cell,ncells)
+               lcellnext(icell) = lcellfirst
+               lcellfirst = icell
+            end if
+         end do
+      end do
+   end if
+
+end subroutine resetcells
+

From 6b84f713834138ab79288749ddab5ae45fe890ac Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 12:27:03 +0100
Subject: [PATCH 07/21] Optimize hot-loop square operations in pair kernels

---
 src/computef.f90 |  5 +++--
 src/computeg.f90 |  5 +++--
 src/fparc.f90    | 55 ++++++++++++++++++++++++++++++++----------------
 src/gparc.f90    | 31 +++++++++++++++++++--------
 4 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/src/computef.f90 b/src/computef.f90
index f053930..2ff7431 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -20,7 +20,7 @@ subroutine computef(n,x,f)
    integer :: ioffset
    integer :: neigh_first(n_forward_offsets)
 
-   double precision :: min_cell_dist2, max_reach, reg_reach, short_reach
+   double precision :: min_cell_dist2, max_reach, max_reach2, reg_reach, short_reach
 
    double precision :: v1(3), v2(3), v3(3)
    double precision :: x(n)
@@ -152,7 +152,8 @@ subroutine computef(n,x,f)
          reg_reach = cell_max_radius(cell(1),cell(2),cell(3)) + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
          short_reach = cell_max_short_radius(cell(1),cell(2),cell(3)) + cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
          max_reach = dmax1(reg_reach, short_reach)
-         if ( min_cell_dist2 > max_reach*max_reach ) neigh_first(ioffset) = 0
+         max_reach2 = max_reach*max_reach
+         if ( min_cell_dist2 > max_reach2 ) neigh_first(ioffset) = 0
       end do
 
       icart = neigh_first(1)
diff --git a/src/computeg.f90 b/src/computeg.f90
index 84ed5b7..312dfa9 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -22,7 +22,7 @@ subroutine computeg(n,x,g)
    integer :: iratcount, ioffset
    integer :: neigh_first(n_forward_offsets)
 
-   double precision :: min_cell_dist2, max_reach, reg_reach, short_reach
+   double precision :: min_cell_dist2, max_reach, max_reach2, reg_reach, short_reach
    double precision :: x(n), g(n)
    double precision :: dv1beta(3), dv1gama(3), dv1teta(3),&
       dv2beta(3), dv2gama(3), dv2teta(3),&
@@ -140,7 +140,8 @@ subroutine computeg(n,x,g)
             reg_reach = cell_max_radius(cell(1),cell(2),cell(3)) + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
             short_reach = cell_max_short_radius(cell(1),cell(2),cell(3)) + cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
             max_reach = dmax1(reg_reach, short_reach)
-            if ( min_cell_dist2 > max_reach*max_reach ) neigh_first(ioffset) = 0
+            max_reach2 = max_reach*max_reach
+            if ( min_cell_dist2 > max_reach2 ) neigh_first(ioffset) = 0
          end do
 
          icart = neigh_first(1)
diff --git a/src/fparc.f90 b/src/fparc.f90
index 09e6b84..900274a 100644
--- a/src/fparc.f90
+++ b/src/fparc.f90
@@ -20,6 +20,7 @@ double precision function fparc(icart,firstjcart)
    ! LOCAL SCALARS
    integer :: jcart
    double precision :: datom, tol, short_tol, short_tol_penalty, short_tol_scale
+   double precision :: sumr, short_sumr, delta, tol_sq, short_tol_sq
    double precision :: dx, dy, dz
    double precision :: xi, yi, zi, ri, rii, fsi, short_ri, short_si
    double precision :: inv_pbcx, inv_pbcy, inv_pbcz
@@ -63,20 +64,26 @@ double precision function fparc(icart,firstjcart)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
          datom = dx*dx + dy*dy + dz*dz
 
-         tol = (ri + radius(jcart))**2
+         sumr = ri + radius(jcart)
+         tol = sumr*sumr
          if ( datom < tol ) then
-            fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
+            delta = datom - tol
+            fparc = fparc + fsi*fscale(jcart)*delta*delta
             if ( iand(fixed_short_marker(jcart),1) /= 0 ) then
-               short_tol = (short_ri + short_radius(jcart))**2
+               short_sumr = short_ri + short_radius(jcart)
+               short_tol = short_sumr*short_sumr
                if ( datom < short_tol ) then
                   short_tol_penalty = datom-short_tol
                   short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
-                  short_tol_scale = short_tol_scale*(tol**2/short_tol**2)
-                  fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty**2
+                  tol_sq = tol*tol
+                  short_tol_sq = short_tol*short_tol
+                  short_tol_scale = short_tol_scale*(tol_sq/short_tol_sq)
+                  fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty*short_tol_penalty
                end if
             end if
          end if
-         tol = (rii + radius_ini(jcart))**2
+         sumr = rii + radius_ini(jcart)
+         tol = sumr*sumr
          fdist = dmax1(tol-datom,fdist)
          if ( move ) then
             fdist_atom(icart) = dmax1(tol-datom,fdist_atom(icart))
@@ -105,18 +112,24 @@ double precision function fparc(icart,firstjcart)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
          datom = dx*dx + dy*dy + dz*dz
 
-         tol = (ri + radius(jcart))**2
+         sumr = ri + radius(jcart)
+         tol = sumr*sumr
          if ( datom < tol ) then
-            fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
-            short_tol = (short_ri + short_radius(jcart))**2
+            delta = datom - tol
+            fparc = fparc + fsi*fscale(jcart)*delta*delta
+            short_sumr = short_ri + short_radius(jcart)
+            short_tol = short_sumr*short_sumr
             if ( datom < short_tol ) then
                short_tol_penalty = datom-short_tol
                short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
-               short_tol_scale = short_tol_scale*(tol**2/short_tol**2)
-               fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty**2
+               tol_sq = tol*tol
+               short_tol_sq = short_tol*short_tol
+               short_tol_scale = short_tol_scale*(tol_sq/short_tol_sq)
+               fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty*short_tol_penalty
             end if
          end if
-         tol = (rii + radius_ini(jcart))**2
+         sumr = rii + radius_ini(jcart)
+         tol = sumr*sumr
          fdist = dmax1(tol-datom,fdist)
          if ( move ) then
             fdist_atom(icart) = dmax1(tol-datom,fdist_atom(icart))
@@ -149,20 +162,26 @@ double precision function fparc(icart,firstjcart)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
          datom = dx*dx + dy*dy + dz*dz
 
-         tol = (ri + radius(jcart))**2
+         sumr = ri + radius(jcart)
+         tol = sumr*sumr
          if ( datom < tol ) then
-            fparc = fparc + fsi*fscale(jcart)*(datom-tol)**2
+            delta = datom - tol
+            fparc = fparc + fsi*fscale(jcart)*delta*delta
             if ( use_short_i .or. iand(fixed_short_marker(jcart),1) /= 0 ) then
-               short_tol = (short_ri + short_radius(jcart))**2
+               short_sumr = short_ri + short_radius(jcart)
+               short_tol = short_sumr*short_sumr
                if ( datom < short_tol ) then
                   short_tol_penalty = datom-short_tol
                   short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
-                  short_tol_scale = short_tol_scale*(tol**2/short_tol**2)
-                  fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty**2
+                  tol_sq = tol*tol
+                  short_tol_sq = short_tol*short_tol
+                  short_tol_scale = short_tol_scale*(tol_sq/short_tol_sq)
+                  fparc = fparc + fsi*fscale(jcart)*short_tol_scale*short_tol_penalty*short_tol_penalty
                end if
             end if
          end if
-         tol = (rii + radius_ini(jcart))**2
+         sumr = rii + radius_ini(jcart)
+         tol = sumr*sumr
          fdist = dmax1(tol-datom,fdist)
          if ( move ) then
             fdist_atom(icart) = dmax1(tol-datom,fdist_atom(icart))
diff --git a/src/gparc.f90 b/src/gparc.f90
index 1525476..a2f5d0c 100644
--- a/src/gparc.f90
+++ b/src/gparc.f90
@@ -19,6 +19,7 @@ subroutine gparc(icart,firstjcart)
    ! LOCAL SCALARS
    integer :: jcart
    double precision :: datom, dtemp, xdiff, tol, short_tol, short_tol_scale
+   double precision :: sumr, short_sumr, tol_sq, short_tol_sq
    double precision :: dx, dy, dz
    double precision :: xi, yi, zi, ri, fsi, short_ri, short_si
    double precision :: inv_pbcx, inv_pbcy, inv_pbcz
@@ -52,7 +53,8 @@ subroutine gparc(icart,firstjcart)
             cycle
          end if
 
-         tol = (ri+radius(jcart))**2
+         sumr = ri + radius(jcart)
+         tol = sumr*sumr
          dx = xi - x(jcart)
          dy = yi - y(jcart)
          dz = zi - z(jcart)
@@ -72,10 +74,13 @@ subroutine gparc(icart,firstjcart)
             gxcar(icart,3)= gxcar(icart,3) + xdiff
             gxcar(jcart,3)= gxcar(jcart,3) - xdiff
             if ( iand(fixed_short_marker(jcart),1) /= 0 ) then
-               short_tol = ( short_ri + short_radius(jcart) )**2
+               short_sumr = short_ri + short_radius(jcart)
+               short_tol = short_sumr*short_sumr
                if ( datom < short_tol ) then
                   short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
-                  short_tol_scale = short_tol_scale*( tol**2 / short_tol**2 )
+                  tol_sq = tol*tol
+                  short_tol_sq = short_tol*short_tol
+                  short_tol_scale = short_tol_scale*( tol_sq / short_tol_sq )
                   dtemp = fsi*fscale(jcart) * short_tol_scale * 4.d0 * (datom - short_tol)
                   xdiff = dtemp*dx
                   gxcar(icart,1)= gxcar(icart,1) + xdiff
@@ -105,7 +110,8 @@ subroutine gparc(icart,firstjcart)
             cycle
          end if
 
-         tol = (ri+radius(jcart))**2
+         sumr = ri + radius(jcart)
+         tol = sumr*sumr
          dx = xi - x(jcart)
          dy = yi - y(jcart)
          dz = zi - z(jcart)
@@ -124,10 +130,13 @@ subroutine gparc(icart,firstjcart)
             xdiff = dtemp*dz
             gxcar(icart,3)= gxcar(icart,3) + xdiff
             gxcar(jcart,3)= gxcar(jcart,3) - xdiff
-            short_tol = ( short_ri + short_radius(jcart) )**2
+            short_sumr = short_ri + short_radius(jcart)
+            short_tol = short_sumr*short_sumr
             if ( datom < short_tol ) then
                short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
-               short_tol_scale = short_tol_scale*( tol**2 / short_tol**2 )
+               tol_sq = tol*tol
+               short_tol_sq = short_tol*short_tol
+               short_tol_scale = short_tol_scale*( tol_sq / short_tol_sq )
                dtemp = fsi*fscale(jcart) * short_tol_scale * 4.d0 * (datom - short_tol)
                xdiff = dtemp*dx
                gxcar(icart,1)= gxcar(icart,1) + xdiff
@@ -160,7 +169,8 @@ subroutine gparc(icart,firstjcart)
             cycle
          end if
 
-         tol = (ri+radius(jcart))**2
+         sumr = ri + radius(jcart)
+         tol = sumr*sumr
          dx = xi - x(jcart)
          dy = yi - y(jcart)
          dz = zi - z(jcart)
@@ -180,10 +190,13 @@ subroutine gparc(icart,firstjcart)
             gxcar(icart,3)= gxcar(icart,3) + xdiff
             gxcar(jcart,3)= gxcar(jcart,3) - xdiff
             if ( use_short_i .or. iand(fixed_short_marker(jcart),1) /= 0 ) then
-               short_tol = ( short_ri + short_radius(jcart) )**2
+               short_sumr = short_ri + short_radius(jcart)
+               short_tol = short_sumr*short_sumr
                if ( datom < short_tol ) then
                   short_tol_scale = dsqrt(short_si*short_radius_scale(jcart))
-                  short_tol_scale = short_tol_scale*( tol**2 / short_tol**2 )
+                  tol_sq = tol*tol
+                  short_tol_sq = short_tol*short_tol
+                  short_tol_scale = short_tol_scale*( tol_sq / short_tol_sq )
                   dtemp = fsi*fscale(jcart) * short_tol_scale * 4.d0 * (datom - short_tol)
                   xdiff = dtemp*dx
                   gxcar(icart,1)= gxcar(icart,1) + xdiff

From 99a44a9dcf8c468b881e99826f00aae298678ce0 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 12:35:12 +0100
Subject: [PATCH 08/21] Add explicit build profiles and numerics profile check

---
 CMakeLists.txt                     |  41 ++++++++++--
 Makefile                           | 102 ++++++++++++++++++++++-------
 README.md                          |  58 ++++++++++++++++
 testing/check_numerics_profiles.sh |  90 +++++++++++++++++++++++++
 4 files changed, 265 insertions(+), 26 deletions(-)
 create mode 100755 testing/check_numerics_profiles.sh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 860b74b..149dc3b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,15 +1,48 @@
 cmake_minimum_required(VERSION 3.10)
 project(packmol Fortran)
 
+set(PACKMOL_PROFILE "baseline" CACHE STRING "Build profile: baseline, perf-native, debug, sanitize")
+set_property(CACHE PACKMOL_PROFILE PROPERTY STRINGS baseline perf-native debug sanitize)
+option(PACKMOL_UNSAFE_MATH "Enable unsafe/fast math optimizations" OFF)
 
 if(NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE Release)
-endif(NOT CMAKE_BUILD_TYPE)
+endif()
 
 if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
-    add_compile_options(
-        -Wall "$<$<CONFIG:Debug>:-Werror>"
-    )
+    add_compile_options(-Wall)
+endif()
+
+if(PACKMOL_PROFILE STREQUAL "baseline")
+    add_compile_options(-O2)
+elseif(PACKMOL_PROFILE STREQUAL "perf-native")
+    add_compile_options(-O3)
+    if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
+        add_compile_options(-march=native -mtune=native -funroll-loops)
+    elseif(CMAKE_Fortran_COMPILER_ID MATCHES IntelLLVM OR CMAKE_Fortran_COMPILER_ID MATCHES Intel)
+        add_compile_options(-xHost -ipo)
+    endif()
+elseif(PACKMOL_PROFILE STREQUAL "debug")
+    add_compile_options(-g)
+    if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
+        add_compile_options(-fcheck=bounds -fbacktrace -ffpe-trap=zero,overflow,underflow)
+    endif()
+elseif(PACKMOL_PROFILE STREQUAL "sanitize")
+    add_compile_options(-O1 -g)
+    if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
+        add_compile_options(-fno-omit-frame-pointer -fsanitize=address,undefined -fcheck=all)
+        add_link_options(-fsanitize=address,undefined)
+    endif()
+else()
+    message(FATAL_ERROR "Unknown PACKMOL_PROFILE='${PACKMOL_PROFILE}'. Use one of: baseline, perf-native, debug, sanitize")
+endif()
+
+if(PACKMOL_UNSAFE_MATH)
+    if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
+        add_compile_options(-ffast-math)
+    elseif(CMAKE_Fortran_COMPILER_ID MATCHES IntelLLVM OR CMAKE_Fortran_COMPILER_ID MATCHES Intel)
+        add_compile_options(-fp-model fast=2)
+    endif()
 endif()
 
 # Build the executable
diff --git a/Makefile b/Makefile
index 11a0535..b0db730 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,26 @@ FORTRAN=/usr/bin/gfortran
 #
 # Change the flags of the compilation if you want:
 #
-FLAGS= -O3 -march=native -funroll-loops
+PROFILE ?= baseline
+UNSAFE_MATH ?= 0
+
+ifneq (,$(filter perf perf-native,$(MAKECMDGOALS)))
+PROFILE := perf-native
+endif
+ifneq (,$(filter devel,$(MAKECMDGOALS)))
+PROFILE := devel
+endif
+ifneq (,$(filter sanitize,$(MAKECMDGOALS)))
+PROFILE := sanitize
+endif
+ifneq (,$(filter static,$(MAKECMDGOALS)))
+PROFILE := static
+endif
+ifneq (,$(filter baseline,$(MAKECMDGOALS)))
+PROFILE := baseline
+endif
+
+FLAGS=
 SRCDIR= src
 MAINDIR= app 
 ###################################################################
@@ -30,20 +49,54 @@ MAINDIR= app
 # Flags for compiling development version
 #
 GENCANFLAGS := $(FLAGS)
-# Flags for the routines that signal with --fast-math
-IEEE_SIGNAL_FLAGS := $(FLAGS)
-ifeq ($(MAKECMDGOALS),devel)
+FC_VERSION := $(shell $(FORTRAN) --version 2>/dev/null | head -n 1)
+
+ifeq ($(PROFILE),baseline)
+FLAGS = -O2
+GENCANFLAGS = $(FLAGS)
+endif
+
+ifeq ($(PROFILE),perf-native)
+FLAGS = -O3
+GENCANFLAGS = $(FLAGS)
+ifneq (,$(findstring GNU Fortran,$(FC_VERSION)))
+FLAGS += -march=native -mtune=native -funroll-loops
+GENCANFLAGS += -march=native -mtune=native -funroll-loops
+else ifneq (,$(findstring ifx,$(FC_VERSION)))
+FLAGS += -xHost -ipo
+GENCANFLAGS += -xHost -ipo
+else ifneq (,$(findstring ifort,$(FC_VERSION)))
+FLAGS += -xHost -ipo
+GENCANFLAGS += -xHost -ipo
+endif
+endif
+
+ifeq ($(PROFILE),devel)
 FLAGS = -Wall -fcheck=bounds -g -fbacktrace -ffpe-trap=zero,overflow,underflow
-GENCANFLAGS = -fcheck=bounds -g -fbacktrace -ffpe-trap=zero,overflow,underflow 
+GENCANFLAGS = -fcheck=bounds -g -fbacktrace -ffpe-trap=zero,overflow,underflow
 endif
-ifeq ($(MAKECMDGOALS),perf)
-FLAGS = -g -pg
-GENCANFLAGS = -g -pg
+
+ifeq ($(PROFILE),sanitize)
+FLAGS = -O1 -g -fno-omit-frame-pointer
+GENCANFLAGS = $(FLAGS)
+ifneq (,$(findstring GNU Fortran,$(FC_VERSION)))
+FLAGS += -fsanitize=address,undefined -fcheck=all
+GENCANFLAGS += -fsanitize=address,undefined -fcheck=all
 endif
-ifeq ($(MAKECMDGOALS),static)
-FLAGS = -O3 --fast-math -static
-GENCANFLAGS = -O3 --fast-math -static
 endif
+
+ifeq ($(PROFILE),static)
+FLAGS = -O2 -static
+GENCANFLAGS = $(FLAGS)
+endif
+
+ifeq ($(UNSAFE_MATH),1)
+FLAGS += -ffast-math
+GENCANFLAGS += -ffast-math
+endif
+IEEE_SIGNAL_FLAGS = $(FLAGS)
+
+
 #
 # Files required
 #
@@ -103,17 +156,22 @@ all : $(oall)
 #
 # Compiling with flags for development
 #
-static : devel
-perf : devel
-devel : $(oall)
-	@echo " ------------------------------------------------------ " 
-	@echo " Compiling packmol with $(FORTRAN) " 
-	@echo " Flags: $(FLAGS)"
-	@echo " ------------------------------------------------------ "
-	@$(FORTRAN) -o packmol $(oall) $(FLAGS)
-	@echo " ------------------------------------------------------ " 
-	@echo " Packmol succesfully built. " 
-	@echo " ------------------------------------------------------ " 
+baseline: PROFILE=baseline
+baseline: all
+
+perf-native: PROFILE=perf-native
+perf-native: all
+
+devel: PROFILE=devel
+devel: all
+
+sanitize: PROFILE=sanitize
+sanitize: all
+
+static: PROFILE=static
+static: all
+
+perf : perf-native
 #
 # Modules
 #
diff --git a/README.md b/README.md
index 79ef999..258501f 100644
--- a/README.md
+++ b/README.md
@@ -83,6 +83,64 @@ uvx packmol < inp.pack
    `FPM_FC=compiler`, for example for `ifort`, use in bash, `export FPM_FC=ifort`.
 
 
+
+### Build profiles (Makefile and CMake)
+
+Packmol now supports explicit build profiles so you can choose between conservative optimized builds, aggressive benchmarking builds, and debugging/sanitized builds.
+
+#### Baseline release profile (default)
+
+This profile prioritizes stability and portability with conservative optimization.
+
+- **Makefile**: `make` (or `make baseline`)
+- **CMake**: `cmake -S . -B build -DPACKMOL_PROFILE=baseline`
+
+#### Aggressive benchmark profile (`perf-native`)
+
+This profile is opt-in and applies compiler-specific native tuning (for example, `-march=native` for GNU or `-xHost` for Intel compilers). Use this for local benchmarking only.
+
+- **Makefile**: `make perf-native`
+- **CMake**: `cmake -S . -B build -DPACKMOL_PROFILE=perf-native`
+
+#### Debug / sanitized profiles
+
+Use these profiles for troubleshooting and memory/UB diagnostics.
+
+- **Makefile debug checks**: `make devel`
+- **Makefile sanitizers (GNU)**: `make sanitize`
+- **CMake debug checks**: `cmake -S . -B build -DPACKMOL_PROFILE=debug`
+- **CMake sanitizers (GNU)**: `cmake -S . -B build -DPACKMOL_PROFILE=sanitize`
+
+#### Unsafe math is explicit opt-in
+
+Unsafe/fast-math is **not** enabled globally.
+
+- **Makefile**: add `UNSAFE_MATH=1`, e.g. `make perf-native UNSAFE_MATH=1`
+- **CMake**: set `-DPACKMOL_UNSAFE_MATH=ON`
+
+### Reproducibility and numerics check across profiles
+
+To compare objective and gradient behavior across profiles using existing inputs under `testing/`, run:
+
+```bash
+./testing/check_numerics_profiles.sh
+```
+
+What it does:
+
+1. Builds and runs `baseline` and `perf-native` binaries.
+2. Uses `testing/input_files/benzene2.inp` with `chkgrad` enabled (small system for fast finite-difference gradient checking).
+3. Compares values extracted from each run's `chkgrad.log`:
+   - Objective function value (`Function Value = ...`)
+   - Gradient check summary (`Maximum difference = ... Error= ...`)
+4. Fails if differences exceed tolerances.
+
+You can override tolerances with environment variables:
+
+```bash
+OBJ_TOL=1e-8 GRAD_TOL=1e-3 ./testing/check_numerics_profiles.sh
+```
+
 ## References
 
 Please always cite one of the following references in publications for which Packmol was useful:
diff --git a/testing/check_numerics_profiles.sh b/testing/check_numerics_profiles.sh
new file mode 100755
index 0000000..09edf70
--- /dev/null
+++ b/testing/check_numerics_profiles.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+TEST_DIR="${ROOT_DIR}/testing"
+TMP_DIR="${TEST_DIR}/tmp_profile_check"
+
+BASELINE_BIN="${ROOT_DIR}/packmol-baseline"
+PERF_BIN="${ROOT_DIR}/packmol-perf-native"
+INPUT_FILE="${TEST_DIR}/input_files/benzene2.inp"
+INPUT_CHKGRAD="${TMP_DIR}/benzene2_chkgrad.inp"
+
+OBJ_TOL="${OBJ_TOL:-1e-8}"
+GRAD_TOL="${GRAD_TOL:-1e-3}"
+
+extract_function_value() {
+    awk '/Function Value =/ {print $4}' "$1"
+}
+
+extract_max_grad_error() {
+    awk '/Maximum difference =/ {print $7}' "$1"
+}
+
+run_profile() {
+    local profile="$1"
+    local binary="$2"
+    local outdir="${TMP_DIR}/${profile}"
+
+    rm -rf "${outdir}"
+    mkdir -p "${outdir}"
+    cp -r "${TEST_DIR}/structure_files" "${outdir}/"
+
+    (
+        cd "${ROOT_DIR}"
+        make clean >/dev/null
+        make "${profile}" >/dev/null
+        cp packmol "${binary}"
+    )
+
+    (
+        cd "${outdir}"
+        "${binary}" < "${INPUT_CHKGRAD}" > profile.log
+    )
+
+    cp "${outdir}/chkgrad.log" "${TMP_DIR}/chkgrad-${profile}.log"
+}
+
+mkdir -p "${TMP_DIR}"
+cp "${INPUT_FILE}" "${INPUT_CHKGRAD}"
+echo "chkgrad" >> "${INPUT_CHKGRAD}"
+
+echo "[1/2] Running baseline profile"
+run_profile baseline "${BASELINE_BIN}"
+
+echo "[2/2] Running perf-native profile"
+run_profile perf-native "${PERF_BIN}"
+
+base_obj="$(extract_function_value "${TMP_DIR}/chkgrad-baseline.log")"
+perf_obj="$(extract_function_value "${TMP_DIR}/chkgrad-perf-native.log")"
+base_grad_err="$(extract_max_grad_error "${TMP_DIR}/chkgrad-baseline.log")"
+perf_grad_err="$(extract_max_grad_error "${TMP_DIR}/chkgrad-perf-native.log")"
+
+python - "$base_obj" "$perf_obj" "$base_grad_err" "$perf_grad_err" "$OBJ_TOL" "$GRAD_TOL" <<'PY'
+import math
+import sys
+
+base_obj = float(sys.argv[1])
+perf_obj = float(sys.argv[2])
+base_grad_err = float(sys.argv[3])
+perf_grad_err = float(sys.argv[4])
+obj_tol = float(sys.argv[5])
+grad_tol = float(sys.argv[6])
+
+obj_delta = abs(base_obj - perf_obj)
+grad_delta = abs(base_grad_err - perf_grad_err)
+
+print(f"Baseline objective: {base_obj:.16e}")
+print(f"Perf-native objective: {perf_obj:.16e}")
+print(f"|Δ objective| = {obj_delta:.6e} (tol={obj_tol:.6e})")
+print(f"Baseline max grad check error: {base_grad_err:.6e}")
+print(f"Perf-native max grad check error: {perf_grad_err:.6e}")
+print(f"|Δ grad error| = {grad_delta:.6e} (tol={grad_tol:.6e})")
+
+if obj_delta > obj_tol:
+    raise SystemExit("Objective delta exceeds tolerance")
+if grad_delta > grad_tol:
+    raise SystemExit("Gradient delta exceeds tolerance")
+PY
+
+echo "Numerics comparison passed."

From 8c0615b7a20a1655da4a82526053fb381b5283e3 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 13:06:07 +0100
Subject: [PATCH 09/21] docs: update build/profile docs and add Michele Bonus
 credentials

---
 AUTHORS      |  5 +++++
 CHANGELOG.md |  2 ++
 README.md    | 38 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/AUTHORS b/AUTHORS
index 59bd0d1..4e9ad04 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -9,3 +9,8 @@ dynamics and docking. Journal of Computational Chemistry, 24(7):819-825,
 2003. 
 
 Home-Page: http://m3g.iqm.unicamp.br/packmol
+
+Contributor:
+- Michele Bonus, Heinrich Heine University Düsseldorf
+  Email: michele.bonus@hhu.de
+  GitHub: https://github.com/MicheleBonus/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index efbc3bd..48ca224 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,8 @@ packmol changelog
 Version 21.2.2-DEV
 --------------
 - ![INFO][badge-info] Add changelog verification CI run.
+- ![INFO][badge-info] Update README build/profile instructions and document recent cell-list and pair-kernel performance work.
+- ![INFO][badge-info] Add contributor credentials for Michele Bonus (Heinrich Heine University Düsseldorf).
 
 Version 21.2.1
 --------------
diff --git a/README.md b/README.md
index 258501f..648ffcb 100644
--- a/README.md
+++ b/README.md
@@ -95,6 +95,12 @@ This profile prioritizes stability and portability with conservative optimizatio
 - **Makefile**: `make` (or `make baseline`)
 - **CMake**: `cmake -S . -B build -DPACKMOL_PROFILE=baseline`
 
+Then build with:
+
+```bash
+cmake --build build --config Release
+```
+
 #### Aggressive benchmark profile (`perf-native`)
 
 This profile is opt-in and applies compiler-specific native tuning (for example, `-march=native` for GNU or `-xHost` for Intel compilers). Use this for local benchmarking only.
@@ -102,6 +108,12 @@ This profile is opt-in and applies compiler-specific native tuning (for example,
 - **Makefile**: `make perf-native`
 - **CMake**: `cmake -S . -B build -DPACKMOL_PROFILE=perf-native`
 
+Then build with:
+
+```bash
+cmake --build build --config Release
+```
+
 #### Debug / sanitized profiles
 
 Use these profiles for troubleshooting and memory/UB diagnostics.
@@ -111,6 +123,31 @@ Use these profiles for troubleshooting and memory/UB diagnostics.
 - **CMake debug checks**: `cmake -S . -B build -DPACKMOL_PROFILE=debug`
 - **CMake sanitizers (GNU)**: `cmake -S . -B build -DPACKMOL_PROFILE=sanitize`
 
+Then build with:
+
+```bash
+cmake --build build --config Debug
+```
+
+### Performance-oriented kernel improvements
+
+Recent performance work in the pairwise cell-list kernels includes:
+
+- Prebuilt neighbor-cell offsets used in `computef` / `computeg` traversal.
+- Reduced branch pressure in pair kernels by separating hot paths.
+- Better data locality in hot loops through structure-of-arrays style access.
+- Conservative per-cell broad-phase reject checks before expensive pair evaluation.
+- Consistent `x*x` math micro-optimizations replacing hot-loop `**2` exponentiation.
+- Build-profile driven tuning with numerics validation support.
+
+These updates are designed to speed up large systems while preserving numerical behavior.
+
+## Contributors
+
+- Michele Bonus, Heinrich Heine University Düsseldorf  
+  Email: `michele.bonus@hhu.de`  
+  GitHub: https://github.com/MicheleBonus/
+
 #### Unsafe math is explicit opt-in
 
 Unsafe/fast-math is **not** enabled globally.
@@ -151,4 +188,3 @@ JM Martinez, L Martinez, Packing optimization for the automated generation of co
 (https://doi.org/10.1002/jcc.10216)
 
 
-

From c6909bb42738a7223bbcf24440a68aa41ff03b17 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:05:51 +0100
Subject: [PATCH 10/21] Fix pgencan module import ambiguity for x

---
 src/pgencan.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pgencan.f90 b/src/pgencan.f90
index f907298..26362d5 100644
--- a/src/pgencan.f90
+++ b/src/pgencan.f90
@@ -11,8 +11,8 @@
 subroutine pgencan(n,x,fx)
 
    use sizes
-   use compute_data
-   use usegencan
+   use compute_data, only : ntype, nmols, constrain_rot, rot_bound
+   use usegencan, only : maxit, iprint1, iprint2, wi, wd, l, u, g
    implicit none
 
    double precision :: lambda(1), rho(1)

From dee125a6fca870aef58e4d622808c8155e9a00fe Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:09:28 +0100
Subject: [PATCH 11/21] Fix missing init1 import in pgencan

---
 src/pgencan.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pgencan.f90 b/src/pgencan.f90
index 26362d5..7e35139 100644
--- a/src/pgencan.f90
+++ b/src/pgencan.f90
@@ -11,7 +11,7 @@
 subroutine pgencan(n,x,fx)
 
    use sizes
-   use compute_data, only : ntype, nmols, constrain_rot, rot_bound
+   use compute_data, only : ntype, nmols, constrain_rot, rot_bound, init1
    use usegencan, only : maxit, iprint1, iprint2, wi, wd, l, u, g
    implicit none
 

From 04c78883b80568cefc01c7f30edea0719f383f92 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:24:50 +0100
Subject: [PATCH 12/21] Rename hot coordinate buffers and tighten compute_data
 imports

---
 src/checkpoint.f90   |  2 +-
 src/compute_data.f90 | 18 +++++++++---------
 src/computef.f90     |  5 ++++-
 src/computeg.f90     |  5 ++++-
 src/fparc.f90        | 24 ++++++++++++------------
 src/gparc.f90        | 24 ++++++++++++------------
 src/heuristics.f90   |  3 ++-
 src/initial.f90      |  5 ++++-
 src/output.f90       |  2 +-
 src/setsizes.f90     |  2 +-
 10 files changed, 50 insertions(+), 40 deletions(-)

diff --git a/src/checkpoint.f90 b/src/checkpoint.f90
index f43dff8..bdd8f47 100644
--- a/src/checkpoint.f90
+++ b/src/checkpoint.f90
@@ -12,7 +12,7 @@
 subroutine checkpoint(n,x)
 
    use sizes
-   use compute_data
+   use compute_data, only : comptype, init1
    use input
    use usegencan
    use ahestetic
diff --git a/src/compute_data.f90 b/src/compute_data.f90
index b9a1935..ca76996 100644
--- a/src/compute_data.f90
+++ b/src/compute_data.f90
@@ -29,7 +29,7 @@ module compute_data
    double precision :: radmax
 
    double precision, allocatable :: xcart(:,:) ! (ntotat,3)
-   double precision, allocatable :: x(:), y(:), z(:) ! hot scalar streams for xcart
+   double precision, allocatable :: x_hot(:), y_hot(:), z_hot(:) ! hot scalar streams for xcart
    double precision, allocatable :: coor(:,:) ! (ntotat,3)
    double precision, allocatable :: restpars(:,:) ! (maxrest,9)
    double precision, allocatable :: rot_bound(:,:,:) ! (ntype,3,2)
@@ -70,10 +70,10 @@ module compute_data
    subroutine refresh_hot_buffers_full()
       implicit none
 
-      if ( .not. allocated(x) ) return
-      x(:) = xcart(:,1)
-      y(:) = xcart(:,2)
-      z(:) = xcart(:,3)
+      if ( .not. allocated(x_hot) ) return
+      x_hot(:) = xcart(:,1)
+      y_hot(:) = xcart(:,2)
+      z_hot(:) = xcart(:,3)
       ibtype_hot(:) = ibtype(:)
       ibmol_hot(:) = ibmol(:)
       fixed_short_marker(:) = merge(2, 0, fixedatom(:)) + merge(1, 0, use_short_radius(:))
@@ -83,10 +83,10 @@ subroutine refresh_hot_buffers_atom(icart)
       implicit none
       integer, intent(in) :: icart
 
-      if ( .not. allocated(x) ) return
-      x(icart) = xcart(icart,1)
-      y(icart) = xcart(icart,2)
-      z(icart) = xcart(icart,3)
+      if ( .not. allocated(x_hot) ) return
+      x_hot(icart) = xcart(icart,1)
+      y_hot(icart) = xcart(icart,2)
+      z_hot(icart) = xcart(icart,3)
       ibtype_hot(icart) = ibtype(icart)
       ibmol_hot(icart) = ibmol(icart)
       fixed_short_marker(icart) = merge(2, 0, fixedatom(icart)) + merge(1, 0, use_short_radius(icart))
diff --git a/src/computef.f90 b/src/computef.f90
index 2ff7431..ffac54c 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -10,7 +10,10 @@ subroutine computef(n,x,f)
 
    use sizes
    use cell_indexing, only: index_cell, icell_to_cell, setcell, n_forward_offsets, forward_offsets
-   use compute_data
+   use compute_data, only : ntotmol, ntype, ncells, nmols, natoms, idfirst, ibmol, ibtype, fdist, frest, &
+      cell_length, xcart, coor, radius, short_radius, frest_atom, comptype, use_short_radius, init1, move, &
+      latomnext, latomfirst, lcellfirst, lcellnext, empty_cell, cell_max_radius, cell_max_short_radius, &
+      refresh_hot_buffers_full, refresh_hot_buffers_atom
    use pbc
    implicit none
 
diff --git a/src/computeg.f90 b/src/computeg.f90
index 312dfa9..406f7dc 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -10,7 +10,10 @@ subroutine computeg(n,x,g)
 
    use sizes
    use cell_indexing, only: index_cell, icell_to_cell, setcell, n_forward_offsets, forward_offsets
-   use compute_data
+   use compute_data, only : ntotmol, ntype, ncells, nmols, natoms, idfirst, nratom, iratom, ibmol, ibtype, &
+      cell_length, xcart, coor, radius, short_radius, gxcar, comptype, use_short_radius, init1, latomnext, &
+      latomfirst, lcellfirst, lcellnext, empty_cell, cell_max_radius, cell_max_short_radius, &
+      refresh_hot_buffers_full, refresh_hot_buffers_atom
    use pbc
    implicit none
 
diff --git a/src/fparc.f90 b/src/fparc.f90
index 900274a..52261b5 100644
--- a/src/fparc.f90
+++ b/src/fparc.f90
@@ -27,9 +27,9 @@ double precision function fparc(icart,firstjcart)
    integer :: marker_i
    logical :: use_short_i, fixed_i
 
-   xi = x(icart)
-   yi = y(icart)
-   zi = z(icart)
+   xi = x_hot(icart)
+   yi = y_hot(icart)
+   zi = z_hot(icart)
    ri = radius(icart)
    rii = radius_ini(icart)
    fsi = fscale(icart)
@@ -56,9 +56,9 @@ double precision function fparc(icart,firstjcart)
             cycle
          end if
 
-         dx = xi - x(jcart)
-         dy = yi - y(jcart)
-         dz = zi - z(jcart)
+         dx = xi - x_hot(jcart)
+         dy = yi - y_hot(jcart)
+         dz = zi - z_hot(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -104,9 +104,9 @@ double precision function fparc(icart,firstjcart)
             cycle
          end if
 
-         dx = xi - x(jcart)
-         dy = yi - y(jcart)
-         dz = zi - z(jcart)
+         dx = xi - x_hot(jcart)
+         dy = yi - y_hot(jcart)
+         dz = zi - z_hot(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -154,9 +154,9 @@ double precision function fparc(icart,firstjcart)
             cycle
          end if
 
-         dx = xi - x(jcart)
-         dy = yi - y(jcart)
-         dz = zi - z(jcart)
+         dx = xi - x_hot(jcart)
+         dy = yi - y_hot(jcart)
+         dz = zi - z_hot(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
diff --git a/src/gparc.f90 b/src/gparc.f90
index a2f5d0c..79cd6e8 100644
--- a/src/gparc.f90
+++ b/src/gparc.f90
@@ -26,9 +26,9 @@ subroutine gparc(icart,firstjcart)
    integer :: marker_i
    logical :: use_short_i, fixed_i
 
-   xi = x(icart)
-   yi = y(icart)
-   zi = z(icart)
+   xi = x_hot(icart)
+   yi = y_hot(icart)
+   zi = z_hot(icart)
    ri = radius(icart)
    fsi = fscale(icart)
    short_ri = short_radius(icart)
@@ -55,9 +55,9 @@ subroutine gparc(icart,firstjcart)
 
          sumr = ri + radius(jcart)
          tol = sumr*sumr
-         dx = xi - x(jcart)
-         dy = yi - y(jcart)
-         dz = zi - z(jcart)
+         dx = xi - x_hot(jcart)
+         dy = yi - y_hot(jcart)
+         dz = zi - z_hot(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -112,9 +112,9 @@ subroutine gparc(icart,firstjcart)
 
          sumr = ri + radius(jcart)
          tol = sumr*sumr
-         dx = xi - x(jcart)
-         dy = yi - y(jcart)
-         dz = zi - z(jcart)
+         dx = xi - x_hot(jcart)
+         dy = yi - y_hot(jcart)
+         dz = zi - z_hot(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -171,9 +171,9 @@ subroutine gparc(icart,firstjcart)
 
          sumr = ri + radius(jcart)
          tol = sumr*sumr
-         dx = xi - x(jcart)
-         dy = yi - y(jcart)
-         dz = zi - z(jcart)
+         dx = xi - x_hot(jcart)
+         dy = yi - y_hot(jcart)
+         dz = zi - z_hot(jcart)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
diff --git a/src/heuristics.f90 b/src/heuristics.f90
index d202040..a9e0f0b 100644
--- a/src/heuristics.f90
+++ b/src/heuristics.f90
@@ -9,7 +9,8 @@
 subroutine movebad(n,x,fx,movebadprint)
 
    use sizes
-   use compute_data
+   use compute_data, only : ntotmol, ntype, ntotat, nmols, natoms, sizemin, sizemax, radius, radius_ini, &
+      fdist_atom, frest_atom, dmax, comptype, move, fmol, radiuswork
    use input, only : movefrac, movebadrandom, precision, maxmove
    use usegencan
    use flashsort
diff --git a/src/initial.f90 b/src/initial.f90
index 3a4a6de..a2dd8bf 100644
--- a/src/initial.f90
+++ b/src/initial.f90
@@ -12,7 +12,10 @@ subroutine initial(n,x)
    use exit_codes
    use sizes
    use cell_indexing, only: setcell
-   use compute_data
+   use compute_data, only : ntotmol, ntype, nfixedat, ntotat, ncells, nmols, natoms, idfirst, &
+      ibmol, ibtype, scale, scale2, fdist, frest, sizemin, sizemax, cell_length, radmax, xcart, coor, &
+      rot_bound, dmax, constrain_rot, comptype, fixedatom, init1, move, latomnext, latomfirst, latomfix, &
+      lcellfirst, lcellnext, empty_cell, cell_max_radius, cell_max_short_radius, refresh_hot_buffers_atom
    use input, only : randini, ntype_with_fixed, fix, chkgrad, avoidoverlap,&
       discale, precision, sidemax, restart_from, input_itype,&
       nloop0_type
diff --git a/src/output.f90 b/src/output.f90
index 3e530fb..83c09a3 100644
--- a/src/output.f90
+++ b/src/output.f90
@@ -10,7 +10,7 @@ subroutine output(n, x, output_file_name)
 
    use exit_codes
    use sizes
-   use compute_data
+   use compute_data, only : ntotmol, ntype, ntotat, nmols, natoms, idfirst, xcart, coor
    use input
    use pbc
 
diff --git a/src/setsizes.f90 b/src/setsizes.f90
index 6d368a8..d277203 100644
--- a/src/setsizes.f90
+++ b/src/setsizes.f90
@@ -326,7 +326,7 @@ subroutine setsizes()
 
    allocate(nratom(ntotat),iratom(ntotat,mrperatom),ibmol(ntotat),&
       ibtype(ntotat),ibmol_hot(ntotat),ibtype_hot(ntotat),fixed_short_marker(ntotat),&
-      xcart(ntotat,3),x(ntotat),y(ntotat),z(ntotat),coor(ntotat,3),&
+      xcart(ntotat,3),x_hot(ntotat),y_hot(ntotat),z_hot(ntotat),coor(ntotat,3),&
       radius(ntotat),radius_ini(ntotat),fscale(ntotat),&
       use_short_radius(ntotat), short_radius(ntotat), short_radius_scale(ntotat),&
       gxcar(ntotat,3),&

From 0be91556082e201a1a2d579a6bddb559243a7d81 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:29:02 +0100
Subject: [PATCH 13/21] Narrow compute_data imports in collision-prone routines

---
 src/fparc.f90   | 3 ++-
 src/gparc.f90   | 3 ++-
 src/gwalls.f90  | 2 +-
 src/restmol.f90 | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/fparc.f90 b/src/fparc.f90
index 52261b5..66fa195 100644
--- a/src/fparc.f90
+++ b/src/fparc.f90
@@ -10,7 +10,8 @@
 double precision function fparc(icart,firstjcart)
 
    use sizes
-   use compute_data
+   use compute_data, only : comptype, fdist, fdist_atom, move, radius, radius_ini, fscale, short_radius, &
+      short_radius_scale, fixed_short_marker, latomnext, x_hot, y_hot, z_hot, ibmol_hot, ibtype_hot
    use pbc, only : pbc_length
    implicit none
 
diff --git a/src/gparc.f90 b/src/gparc.f90
index 79cd6e8..6f5f070 100644
--- a/src/gparc.f90
+++ b/src/gparc.f90
@@ -9,7 +9,8 @@
 subroutine gparc(icart,firstjcart)
 
    use sizes
-   use compute_data
+   use compute_data, only : comptype, radius, fscale, short_radius, short_radius_scale, fixed_short_marker, &
+      latomnext, gxcar, x_hot, y_hot, z_hot, ibmol_hot, ibtype_hot
    use pbc, only : pbc_length
    implicit none
 
diff --git a/src/gwalls.f90 b/src/gwalls.f90
index 2ca127a..455330c 100644
--- a/src/gwalls.f90
+++ b/src/gwalls.f90
@@ -9,7 +9,7 @@
 subroutine gwalls(icart,irest)
 
    use sizes
-   use compute_data
+   use compute_data, only : ityperest, restpars, xcart, gxcar, scale, scale2
 
    implicit none
    integer :: icart, irest
diff --git a/src/restmol.f90 b/src/restmol.f90
index 909a41c..6e6d5ee 100644
--- a/src/restmol.f90
+++ b/src/restmol.f90
@@ -12,7 +12,7 @@
 subroutine restmol(itype,ilubar,n,x,fx,solve)
 
    use sizes
-   use compute_data
+   use compute_data, only : ntotmol, ntype, nmols, comptype, init1, xmol, compsafe
    use usegencan
    implicit none
 

From 943852a5e1e829f2ea1453bec8c9ee21b24a79ca Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:50:47 +0100
Subject: [PATCH 14/21] Initialize gencan scalars before conditional use

---
 src/gencan.f | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/gencan.f b/src/gencan.f
index 389a284..d8929ca 100644
--- a/src/gencan.f
+++ b/src/gencan.f
@@ -1719,6 +1719,11 @@ subroutine gencan(n,x,l,u,m,lambda,rho,epsgpen,epsgpsn,maxitnfp,
       bestprog =  0.0d0
       itnfp    =      0
 
+C     Initialize BB-related scalars. They are updated at the end of each
+C     iteration; zero defaults force first-iteration fallback formulas.
+      sts      =  0.0d0
+      sty      =  0.0d0
+
 C     for testing progress in the projected gradient norm.
       do i = 0,maxitngp - 1
           lastgpns(i) = infabs
@@ -2125,7 +2130,9 @@ subroutine gencan(n,x,l,u,m,lambda,rho,epsgpen,epsgpsn,maxitnfp,
                   delta = udelta0
               end if
           else
-              delta = max( delmin, 10.0d0 * sqrt( sts ) )
+C             sts=<s,s> should be non-negative; guard protects against
+C             accidental first-use reads and round-off negatives.
+              delta = max( delmin, 10.0d0 * sqrt( max( 0.0d0, sts ) ) )
           end if
 
 C         Shrink the point, its gradient and the bounds
@@ -3244,6 +3251,18 @@ subroutine cg(nind,ind,n,x,m,lambda,rho,g,delta,l,u,eps,epsnqmp,
       snorm2   =  0.0d0
       rnorm2   = gnorm2
 
+C     Safe defaults for quantities that are only meaningful after the
+C     first CG step update. These values preserve first-iteration logic.
+      alpha    =  0.0d0
+      dnorm2   =  0.0d0
+      dtr      =  0.0d0
+      dtw      =  0.0d0
+      rnorm2prev = rnorm2
+      rbdposaind  = 1
+      rbdposatype = 1
+      rbdnegaind  = 1
+      rbdnegatype = 1
+
 C     ==================================================================
 C     Print initial information
 C     ==================================================================
@@ -3329,7 +3348,13 @@ subroutine cg(nind,ind,n,x,m,lambda,rho,g,delta,l,u,eps,epsnqmp,
 
       else
 
-          beta = rnorm2 / rnorm2prev
+C         Polak-Ribiere-like coefficient requires previous residual norm.
+C         If it is unavailable/non-positive, restart with beta=0.
+          if ( rnorm2prev .gt. 0.0d0 ) then
+              beta = rnorm2 / rnorm2prev
+          else
+              beta = 0.0d0
+          end if
 
           do i = 1,nind
               d(i) = - r(i) + beta * d(i)
@@ -3625,6 +3650,8 @@ subroutine cg(nind,ind,n,x,m,lambda,rho,g,delta,l,u,eps,epsnqmp,
 
       if ( alpha .eq. amax2 .or. alpha .eq. amax2n ) then
 
+C         Boundary metadata is set when a finite box step is identified.
+C         Keep initialized fallback values for degenerate directions.
           if ( alpha .eq. amax2 ) then
               rbdind  = rbdposaind
               rbdtype = rbdposatype
@@ -5989,4 +6016,4 @@ DOUBLE PRECISION FUNCTION HSLDNRM2(N,DX,INCX)
 C     near to the solution and less strict when we ar far from the 
 C     solution. 
 c
-C     8) Many things in the output were changed.
\ No newline at end of file
+C     8) Many things in the output were changed.

From b164bfa970f73013fb6975161900c4f5e15ce863 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:54:29 +0100
Subject: [PATCH 15/21] Fix GENCAN evalhd interface and mark API dummy args
 used

---
 src/gencan.f | 47 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/src/gencan.f b/src/gencan.f
index d8929ca..1c514f3 100644
--- a/src/gencan.f
+++ b/src/gencan.f
@@ -49,9 +49,16 @@ subroutine evalal(n,x,m,lambda,rho,f,flag)
       double precision lambda(m),rho(m),x(n)
 
 C     LOCAL SCALARS
+      double precision ignore
 
       flag = 0
 
+C     Keep ALGENCAN-compatible dummy arguments intentionally referenced.
+      ignore = 0.0d0
+      if ( m .gt. 0 ) then
+          ignore = ignore + lambda(1) - lambda(1) + rho(1) - rho(1)
+      end if
+
       call computef(n,x,f)
 
       end
@@ -107,9 +114,16 @@ subroutine evalnal(n,x,m,lambda,rho,g,flag)
       double precision g(n),lambda(m),rho(m),x(n)
 
 C     LOCAL SCALARS
+      double precision ignore
 
       flag = 0
 
+C     Keep ALGENCAN-compatible dummy arguments intentionally referenced.
+      ignore = 0.0d0
+      if ( m .gt. 0 ) then
+          ignore = ignore + lambda(1) - lambda(1) + rho(1) - rho(1)
+      end if
+
       call computeg(n,x,g)
 
       end
@@ -117,12 +131,7 @@ subroutine evalnal(n,x,m,lambda,rho,g,flag)
 C     *****************************************************************
 C     *****************************************************************
 
-c Modified by L. Martinez (there was an error on the number of
-c parameters when calling this subroutine). This subroutine does
-c nothing.
-c      subroutine evalhd(nind,ind,n,x,m,lambda,rho,d,hd,flag)
-
-      subroutine evalhd(n)
+      subroutine evalhd(nind,ind,n,x,m,lambda,rho,d,hd,flag)
 
 C     This subroutine computes the product of the Hessian matrix times
 C     the input vector argument d. If GENCAN is being used stand-alone 
@@ -184,14 +193,28 @@ subroutine evalhd(n)
       implicit none
 
 C     SCALAR ARGUMENTS
-c      integer flag,m,n,nind
-      integer n
+      integer flag,m,n,nind
 
 C     ARRAY ARGUMENTS
-c      integer ind(nind)
-c      double precision d(n),hd(n),lambda(m),rho(m),x(n)
+      integer ind(nind)
+      double precision d(n),hd(n),lambda(m),rho(m),x(n)
 
-c      flag = - 1
+C     LOCAL SCALARS
+      double precision ignore
+
+C     Keep the full evalhd API for compatibility with GENCAN/ALGENCAN.
+C     This default stub intentionally does not compute Hessian products.
+      flag = 0
+      ignore = 0.0d0
+      if ( nind .gt. 0 ) then
+          ignore = ignore + ind(1) - ind(1)
+      end if
+      if ( n .gt. 0 ) then
+          ignore = ignore + d(1) - d(1) + x(1) - x(1) + hd(1) - hd(1)
+      end if
+      if ( m .gt. 0 ) then
+          ignore = ignore + lambda(1) - lambda(1) + rho(1) - rho(1)
+      end if
 
       end
  
@@ -5241,7 +5264,7 @@ subroutine calchd(nind,ind,x,d,g,n,xc,m,lambda,rho,hd,xtmp,sterel,
 C     Compute the Hessian times vector d product calling the user 
 C     supplied subroutine evalhd
 
-      call evalhd(n)
+      call evalhd(nind,ind,n,x,m,lambda,rho,d,hd,inform)
 
 C     Shrink x, d and hd to the reduced space
 

From 9d8548126a318c6591f63f923ad785c32c480a63 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 16:09:25 +0100
Subject: [PATCH 16/21] Wrap long short-radius assignment in resetcells

---
 src/resetcells.f90 | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/resetcells.f90 b/src/resetcells.f90
index dc05f93..dc23339 100644
--- a/src/resetcells.f90
+++ b/src/resetcells.f90
@@ -34,7 +34,9 @@ subroutine resetcells()
             latomfirst(cell(1),cell(2),cell(3)) = icart
             cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
             if ( use_short_radius(icart) ) then
-               cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_short_radius(cell(1),cell(2),cell(3)), short_radius(icart))
+               cell_max_short_radius(cell(1),cell(2),cell(3)) = &
+                  dmax1(cell_max_short_radius(cell(1),cell(2),cell(3)), &
+                        short_radius(icart))
             end if
 
             if ( empty_cell(cell(1),cell(2),cell(3)) ) then
@@ -48,4 +50,3 @@ subroutine resetcells()
    end if
 
 end subroutine resetcells
-

From a01001b077b9260d87a98243af863a64f0e373f8 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 16:11:41 +0100
Subject: [PATCH 17/21] Handle missing restriction mapping defensively

---
 app/packmol.f90 | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/app/packmol.f90 b/app/packmol.f90
index 2708318..acfae50 100644
--- a/app/packmol.f90
+++ b/app/packmol.f90
@@ -593,9 +593,14 @@ program packmol
                    keyword(iline,1).eq.'below') then
                   nratom(icart) = nratom(icart) + 1
                   iratcount = iratcount + 1
+                  iirest = -1
                   do irest = 1, nrest
                     if(irestline(irest).eq.iline) iirest = irest
                   end do
+                  if(iirest.eq.-1) then
+                    write(*,*) ' ERROR: Could not map atom-level restriction to definition at line ', iline
+                    stop exit_code_input_error
+                  end if
                   iratom(icart,iratcount) = iirest
                 end if
               end if
@@ -608,9 +613,14 @@ program packmol
                   keyword(iline,1).eq.'below') then
             nratom(icart) = nratom(icart) + 1    
             iratcount = iratcount + 1
+            iirest = -1
             do irest = 1, nrest
               if(irestline(irest).eq.iline) iirest = irest
             end do
+            if(iirest.eq.-1) then
+              write(*,*) ' ERROR: Could not map restriction to definition at line ', iline
+              stop exit_code_input_error
+            end if
             iratom(icart,iratcount) = iirest
           end if
         end do
@@ -978,4 +988,3 @@ program packmol
   end if
 
 end program packmol
-

From 41e2f3809f0962a47c07643d4e4fb9ea891f3856 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 16:14:46 +0100
Subject: [PATCH 18/21] Validate fixed-molecule residue bounds before nres

---
 src/output.f90 | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/output.f90 b/src/output.f90
index 83c09a3..c061a1e 100644
--- a/src/output.f90
+++ b/src/output.f90
@@ -21,6 +21,7 @@ subroutine output(n, x, output_file_name)
       icart, i_ref_atom, ioerr, ifirst_mol
    integer :: nres, imark
    integer :: i_fixed, i_not_fixed
+   logical :: has_residue_records
 
    double precision :: x(n)
    double precision :: v1(3), v2(3), v3(3)
@@ -328,13 +329,15 @@ subroutine output(n, x, output_file_name)
 
             open(15,file=pdbfile(i_fixed),status='old')
             ifres = 0
+            ilres = 0
+            has_residue_records = .false.
             do
                read(15,str_format,iostat=ioerr) record
                if ( ioerr /= 0 ) exit
                if ( record(1:4).eq.'ATOM'.or.record(1:6).eq.'HETATM' ) then
                   read(record(23:26),*,iostat=ioerr) imark
                   if ( ioerr /= 0 ) then
-                     record = pdbfile(i_not_fixed)
+                     record = pdbfile(i_fixed)
                      write(*,*) ' ERROR: Failed reading residue number ',&
                         ' from PDB file: ', trim(adjustl(record))
                      write(*,*) ' Residue numbers are integers that must',&
@@ -348,8 +351,17 @@ subroutine output(n, x, output_file_name)
                   end if
                   if ( ifres .eq. 0 ) ifres = imark
                   ilres = imark
+                  has_residue_records = .true.
                end if
             end do
+            if ( .not. has_residue_records ) then
+               record = pdbfile(i_fixed)
+               write(*,*) ' ERROR: Could not determine residue numbering '&
+                  //'for fixed molecule file: ', trim(adjustl(record))
+               write(*,*) ' At least one ATOM or HETATM record must be '&
+                  //'present in the input PDB file. '
+               stop exit_code_input_error
+            end if
             nres = ilres - ifres + 1
 
             iimol = iimol + 1

From d32a71ab279f349b22df3bbc7cae91a41f247e29 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 16:19:59 +0100
Subject: [PATCH 19/21] Reformat long free-form expressions in computef

---
 src/computef.f90 | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/computef.f90 b/src/computef.f90
index ffac54c..046562d 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -100,7 +100,10 @@ subroutine computef(n,x,f)
 
                cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
                if ( use_short_radius(icart) ) then
-                  cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_short_radius(cell(1),cell(2),cell(3)), short_radius(icart))
+                  cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1( &
+                     cell_max_short_radius(cell(1),cell(2),cell(3)), &
+                     short_radius(icart) &
+                  )
                end if
 
                ! cell with atoms linked list
@@ -153,7 +156,8 @@ subroutine computef(n,x,f)
 
          min_cell_dist2 = cell_pair_min_dist2(cell, neigh_cell)
          reg_reach = cell_max_radius(cell(1),cell(2),cell(3)) + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-         short_reach = cell_max_short_radius(cell(1),cell(2),cell(3)) + cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+         short_reach = cell_max_short_radius(cell(1),cell(2),cell(3)) + &
+            cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
          max_reach = dmax1(reg_reach, short_reach)
          max_reach2 = max_reach*max_reach
          if ( min_cell_dist2 > max_reach2 ) neigh_first(ioffset) = 0

From a383777a2aaaf8d28d9cafadb3fa52145724cc58 Mon Sep 17 00:00:00 2001
From: Michele Bonus <77387356+MicheleBonus@users.noreply.github.com>
Date: Wed, 11 Mar 2026 16:35:16 +0100
Subject: [PATCH 20/21] Optimize neighbor reach checks and split long Fortran
 lines

---
 src/computef.f90 | 11 +++++++----
 src/computeg.f90 | 17 ++++++++++++-----
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/computef.f90 b/src/computef.f90
index 046562d..f70b235 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -24,6 +24,7 @@ subroutine computef(n,x,f)
    integer :: neigh_first(n_forward_offsets)
 
    double precision :: min_cell_dist2, max_reach, max_reach2, reg_reach, short_reach
+   double precision :: cell_reg_reach, cell_short_reach
 
    double precision :: v1(3), v2(3), v3(3)
    double precision :: x(n)
@@ -80,7 +81,6 @@ subroutine computef(n,x,f)
             ! Computing the cartesian coordinates for this atom
 
             call compcart(xcart(icart,1:3),xcm,coor(idatom,1:3),v1,v2,v3)
-            call refresh_hot_buffers_atom(icart)
 
             ! Adding to f the value relative to constraints for this atom
 
@@ -116,10 +116,11 @@ subroutine computef(n,x,f)
 
                ibtype(icart) = itype
                ibmol(icart) = imol
-               call refresh_hot_buffers_atom(icart)
 
             end if
 
+            call refresh_hot_buffers_atom(icart)
+
          end do
 
          ilugan = ilugan + 3
@@ -139,6 +140,8 @@ subroutine computef(n,x,f)
       i = cell(1)
       j = cell(2)
       k = cell(3)
+      cell_reg_reach = cell_max_radius(i,j,k)
+      cell_short_reach = cell_max_short_radius(i,j,k)
 
       ! Load current cell and forward neighbors using the shared offset ordering:
       ! (0,0,0), 3 faces, 6 edges, 4 vertices.
@@ -155,8 +158,8 @@ subroutine computef(n,x,f)
          if ( neigh_first(ioffset) <= 0 ) cycle
 
          min_cell_dist2 = cell_pair_min_dist2(cell, neigh_cell)
-         reg_reach = cell_max_radius(cell(1),cell(2),cell(3)) + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-         short_reach = cell_max_short_radius(cell(1),cell(2),cell(3)) + &
+         reg_reach = cell_reg_reach + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+         short_reach = cell_short_reach + &
             cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
          max_reach = dmax1(reg_reach, short_reach)
          max_reach2 = max_reach*max_reach
diff --git a/src/computeg.f90 b/src/computeg.f90
index 406f7dc..a61f168 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -26,6 +26,7 @@ subroutine computeg(n,x,g)
    integer :: neigh_first(n_forward_offsets)
 
    double precision :: min_cell_dist2, max_reach, max_reach2, reg_reach, short_reach
+   double precision :: cell_reg_reach, cell_short_reach
    double precision :: x(n), g(n)
    double precision :: dv1beta(3), dv1gama(3), dv1teta(3),&
       dv2beta(3), dv2gama(3), dv2teta(3),&
@@ -72,7 +73,6 @@ subroutine computeg(n,x,g)
             icart = icart + 1
             idatom = idatom + 1
             call compcart(xcart(icart,1:3),xcm,coor(idatom,1:3),v1,v2,v3)
-            call refresh_hot_buffers_atom(icart)
 
             ! Gradient relative to the wall distace
             do iratcount = 1, nratom(icart)
@@ -88,7 +88,10 @@ subroutine computeg(n,x,g)
 
                cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
                if ( use_short_radius(icart) ) then
-                  cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_short_radius(cell(1),cell(2),cell(3)), short_radius(icart))
+                  cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1( &
+                  cell_max_short_radius(cell(1),cell(2),cell(3)), &
+                  short_radius(icart) &
+               )
                end if
 
                ! cell with atoms linked list
@@ -101,9 +104,10 @@ subroutine computeg(n,x,g)
 
                ibtype(icart) = itype
                ibmol(icart) = imol
-               call refresh_hot_buffers_atom(icart)
             end if
 
+            call refresh_hot_buffers_atom(icart)
+
          end do
          ilugan = ilugan + 3
          ilubar = ilubar + 3
@@ -124,6 +128,8 @@ subroutine computeg(n,x,g)
          i = cell(1)
          j = cell(2)
          k = cell(3)
+         cell_reg_reach = cell_max_radius(i,j,k)
+         cell_short_reach = cell_max_short_radius(i,j,k)
 
          ! Load current cell and forward neighbors using the shared offset ordering:
          ! (0,0,0), 3 faces, 6 edges, 4 vertices.
@@ -140,8 +146,9 @@ subroutine computeg(n,x,g)
             if ( neigh_first(ioffset) <= 0 ) cycle
 
             min_cell_dist2 = cell_pair_min_dist2(cell, neigh_cell)
-            reg_reach = cell_max_radius(cell(1),cell(2),cell(3)) + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-            short_reach = cell_max_short_radius(cell(1),cell(2),cell(3)) + cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+            reg_reach = cell_reg_reach + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
+            short_reach = cell_short_reach + &
+               cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
             max_reach = dmax1(reg_reach, short_reach)
             max_reach2 = max_reach*max_reach
             if ( min_cell_dist2 > max_reach2 ) neigh_first(ioffset) = 0

From 67c38e4b4b9d3f4bb42a9f29c1af17870365483d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 12 Mar 2026 07:13:12 +0000
Subject: [PATCH 21/21] Eliminate overhead that cancelled out pairwise kernel
 optimizations

Three key changes that together yield ~25% speedup:

1. Remove hot buffer abstraction (x_hot, y_hot, z_hot, ibtype_hot, ibmol_hot)
   - xcart(:,1) is already contiguous in Fortran column-major layout,
     so separate x_hot(:) arrays provide zero cache benefit for the
     random linked-list access pattern in fparc/gparc
   - Eliminates refresh_hot_buffers_full (5 full-array copies per call)
     and refresh_hot_buffers_atom (per-atom copy overhead)
   - Use xcart/ibtype/ibmol directly in inner loops

2. Remove dead cell-level radius pruning infrastructure
   - cell_pair_min_dist2 always returns 0 for the 14 forward neighbor
     offsets because cells are sized >= interaction radius, making all
     neighbor pairs adjacent (zero gap distance)
   - The pruning check (min_cell_dist2 > max_reach2) was never true
   - Removes cell_max_radius/cell_max_short_radius tracking from both
     the cell-placement loop and resetcells, plus the reach computation
     and cell_pair_min_dist2 function from the neighbor-offset loop

3. Incremental cell reset
   - Walk the previous iteration's occupied-cell linked list to clear
     only those cells, instead of zeroing all ncells^3 entries
   - Also skip clearing latomnext since all placed atoms overwrite it

4. Pre-compute fixed_short_marker once at init instead of per-atom
   per-iteration (fixedatom and use_short_radius are static)

Benchmarks (baseline -O2):
  water_box_pbc:    0.41s -> 0.32s  (22% faster)
  solvprotein_pbc:  6.12s -> 4.56s  (26% faster)

https://claude.ai/code/session_01SgmhQ2p78sPjFPxZPmLkk7
---
 app/packmol.f90      |  3 +++
 src/compute_data.f90 | 30 +++---------------------
 src/computef.f90     | 54 +++-----------------------------------------
 src/computeg.f90     | 53 +++----------------------------------------
 src/fparc.f90        | 42 ++++++++++++++++++----------------
 src/gparc.f90        | 42 ++++++++++++++++++----------------
 src/initial.f90      | 10 +++-----
 src/resetcells.f90   | 31 ++++++++++++-------------
 src/setsizes.f90     |  4 ++--
 9 files changed, 76 insertions(+), 193 deletions(-)

diff --git a/app/packmol.f90 b/app/packmol.f90
index acfae50..df2ca32 100644
--- a/app/packmol.f90
+++ b/app/packmol.f90
@@ -720,6 +720,9 @@ program packmol
 
   call initial(n,x)
 
+  ! Pre-compute fixed_short_marker once (fixedatom and use_short_radius are static)
+  call init_fixed_short_marker()
+
   ! Computing the energy at the initial point
 
   radscale = 1.d0
diff --git a/src/compute_data.f90 b/src/compute_data.f90
index ca76996..a1c67e3 100644
--- a/src/compute_data.f90
+++ b/src/compute_data.f90
@@ -18,8 +18,6 @@ module compute_data
    integer, allocatable :: ityperest(:) ! (maxrest)
    integer, allocatable :: ibmol(:) ! (ntotat)
    integer, allocatable :: ibtype(:) ! (ntotat)
-   integer, allocatable :: ibmol_hot(:) ! packed stream mirror for hot paths
-   integer, allocatable :: ibtype_hot(:) ! packed stream mirror for hot paths
    integer, allocatable :: fixed_short_marker(:) ! 0: movable/no-short, 1: movable/short, 2: fixed/no-short, 3: fixed/short
 
    double precision :: scale, scale2
@@ -29,7 +27,6 @@ module compute_data
    double precision :: radmax
 
    double precision, allocatable :: xcart(:,:) ! (ntotat,3)
-   double precision, allocatable :: x_hot(:), y_hot(:), z_hot(:) ! hot scalar streams for xcart
    double precision, allocatable :: coor(:,:) ! (ntotat,3)
    double precision, allocatable :: restpars(:,:) ! (maxrest,9)
    double precision, allocatable :: rot_bound(:,:,:) ! (ntype,3,2)
@@ -62,34 +59,13 @@ module compute_data
    integer :: lcellfirst
    integer, allocatable :: lcellnext(:) ! (ncells(1)*ncells(2)*ncells(3))
    logical, allocatable :: empty_cell(:,:,:) ! (ncells(1),ncells(2),ncells(3))
-   double precision, allocatable :: cell_max_radius(:,:,:) ! (ncells(1),ncells(2),ncells(3))
-   double precision, allocatable :: cell_max_short_radius(:,:,:) ! (ncells(1),ncells(2),ncells(3))
 
 contains
 
-   subroutine refresh_hot_buffers_full()
+   subroutine init_fixed_short_marker()
       implicit none
-
-      if ( .not. allocated(x_hot) ) return
-      x_hot(:) = xcart(:,1)
-      y_hot(:) = xcart(:,2)
-      z_hot(:) = xcart(:,3)
-      ibtype_hot(:) = ibtype(:)
-      ibmol_hot(:) = ibmol(:)
+      if ( .not. allocated(fixed_short_marker) ) return
       fixed_short_marker(:) = merge(2, 0, fixedatom(:)) + merge(1, 0, use_short_radius(:))
-   end subroutine refresh_hot_buffers_full
-
-   subroutine refresh_hot_buffers_atom(icart)
-      implicit none
-      integer, intent(in) :: icart
-
-      if ( .not. allocated(x_hot) ) return
-      x_hot(icart) = xcart(icart,1)
-      y_hot(icart) = xcart(icart,2)
-      z_hot(icart) = xcart(icart,3)
-      ibtype_hot(icart) = ibtype(icart)
-      ibmol_hot(icart) = ibmol(icart)
-      fixed_short_marker(icart) = merge(2, 0, fixedatom(icart)) + merge(1, 0, use_short_radius(icart))
-   end subroutine refresh_hot_buffers_atom
+   end subroutine init_fixed_short_marker
 
 end module compute_data
diff --git a/src/computef.f90 b/src/computef.f90
index f70b235..1eccdd2 100644
--- a/src/computef.f90
+++ b/src/computef.f90
@@ -11,9 +11,8 @@ subroutine computef(n,x,f)
    use sizes
    use cell_indexing, only: index_cell, icell_to_cell, setcell, n_forward_offsets, forward_offsets
    use compute_data, only : ntotmol, ntype, ncells, nmols, natoms, idfirst, ibmol, ibtype, fdist, frest, &
-      cell_length, xcart, coor, radius, short_radius, frest_atom, comptype, use_short_radius, init1, move, &
-      latomnext, latomfirst, lcellfirst, lcellnext, empty_cell, cell_max_radius, cell_max_short_radius, &
-      refresh_hot_buffers_full, refresh_hot_buffers_atom
+      xcart, coor, radius, frest_atom, comptype, init1, move, &
+      latomnext, latomfirst, lcellfirst, lcellnext, empty_cell
    use pbc
    implicit none
 
@@ -23,9 +22,6 @@ subroutine computef(n,x,f)
    integer :: ioffset
    integer :: neigh_first(n_forward_offsets)
 
-   double precision :: min_cell_dist2, max_reach, max_reach2, reg_reach, short_reach
-   double precision :: cell_reg_reach, cell_short_reach
-
    double precision :: v1(3), v2(3), v3(3)
    double precision :: x(n)
    double precision :: f,fparc,fplus
@@ -42,7 +38,6 @@ subroutine computef(n,x,f)
 
    if(.not.init1) then
       call resetcells()
-      call refresh_hot_buffers_full()
    end if
 
    ! Transform baricenter and angles into cartesian coordinates
@@ -98,14 +93,6 @@ subroutine computef(n,x,f)
                latomnext(icart) = latomfirst(cell(1),cell(2),cell(3))
                latomfirst(cell(1),cell(2),cell(3)) = icart
 
-               cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
-               if ( use_short_radius(icart) ) then
-                  cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1( &
-                     cell_max_short_radius(cell(1),cell(2),cell(3)), &
-                     short_radius(icart) &
-                  )
-               end if
-
                ! cell with atoms linked list
                if ( empty_cell(cell(1),cell(2),cell(3)) ) then
                   empty_cell(cell(1),cell(2),cell(3)) = .false.
@@ -119,8 +106,6 @@ subroutine computef(n,x,f)
 
             end if
 
-            call refresh_hot_buffers_atom(icart)
-
          end do
 
          ilugan = ilugan + 3
@@ -140,30 +125,13 @@ subroutine computef(n,x,f)
       i = cell(1)
       j = cell(2)
       k = cell(3)
-      cell_reg_reach = cell_max_radius(i,j,k)
-      cell_short_reach = cell_max_short_radius(i,j,k)
 
-      ! Load current cell and forward neighbors using the shared offset ordering:
-      ! (0,0,0), 3 faces, 6 edges, 4 vertices.
+      ! Load forward neighbor heads (self-cell + 13 forward neighbors).
       do ioffset = 1, n_forward_offsets
          neigh_cell(1) = cell_ind(i + forward_offsets(1,ioffset), ncells(1))
          neigh_cell(2) = cell_ind(j + forward_offsets(2,ioffset), ncells(2))
          neigh_cell(3) = cell_ind(k + forward_offsets(3,ioffset), ncells(3))
          neigh_first(ioffset) = latomfirst(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-         do while ( neigh_first(ioffset) > 0 )
-            if ( comptype(ibtype(neigh_first(ioffset))) ) exit
-            neigh_first(ioffset) = latomnext(neigh_first(ioffset))
-         end do
-
-         if ( neigh_first(ioffset) <= 0 ) cycle
-
-         min_cell_dist2 = cell_pair_min_dist2(cell, neigh_cell)
-         reg_reach = cell_reg_reach + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-         short_reach = cell_short_reach + &
-            cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-         max_reach = dmax1(reg_reach, short_reach)
-         max_reach2 = max_reach*max_reach
-         if ( min_cell_dist2 > max_reach2 ) neigh_first(ioffset) = 0
       end do
 
       icart = neigh_first(1)
@@ -187,21 +155,5 @@ subroutine computef(n,x,f)
    end do
 
    return
-contains
-
-   double precision function cell_pair_min_dist2(cell_a, cell_b)
-      integer, intent(in) :: cell_a(3), cell_b(3)
-      integer :: idim
-      double precision :: center_a, center_b, delta, gap
-
-      cell_pair_min_dist2 = 0.d0
-      do idim = 1, 3
-         center_a = pbc_min(idim) + (dble(cell_a(idim)) - 0.5d0) * cell_length(idim)
-         center_b = pbc_min(idim) + (dble(cell_b(idim)) - 0.5d0) * cell_length(idim)
-         delta = dabs(delta_vector(center_a, center_b, pbc_length(idim)))
-         gap = dmax1(0.d0, delta - cell_length(idim))
-         cell_pair_min_dist2 = cell_pair_min_dist2 + gap*gap
-      end do
-   end function cell_pair_min_dist2
 
 end subroutine computef
diff --git a/src/computeg.f90 b/src/computeg.f90
index a61f168..1eedbf4 100644
--- a/src/computeg.f90
+++ b/src/computeg.f90
@@ -11,9 +11,8 @@ subroutine computeg(n,x,g)
    use sizes
    use cell_indexing, only: index_cell, icell_to_cell, setcell, n_forward_offsets, forward_offsets
    use compute_data, only : ntotmol, ntype, ncells, nmols, natoms, idfirst, nratom, iratom, ibmol, ibtype, &
-      cell_length, xcart, coor, radius, short_radius, gxcar, comptype, use_short_radius, init1, latomnext, &
-      latomfirst, lcellfirst, lcellnext, empty_cell, cell_max_radius, cell_max_short_radius, &
-      refresh_hot_buffers_full, refresh_hot_buffers_atom
+      xcart, coor, radius, gxcar, comptype, init1, latomnext, &
+      latomfirst, lcellfirst, lcellnext, empty_cell
    use pbc
    implicit none
 
@@ -25,8 +24,6 @@ subroutine computeg(n,x,g)
    integer :: iratcount, ioffset
    integer :: neigh_first(n_forward_offsets)
 
-   double precision :: min_cell_dist2, max_reach, max_reach2, reg_reach, short_reach
-   double precision :: cell_reg_reach, cell_short_reach
    double precision :: x(n), g(n)
    double precision :: dv1beta(3), dv1gama(3), dv1teta(3),&
       dv2beta(3), dv2gama(3), dv2teta(3),&
@@ -42,7 +39,6 @@ subroutine computeg(n,x,g)
 
    if(.not.init1) then
       call resetcells()
-      call refresh_hot_buffers_full()
    end if
 
    ! Transform baricenter and angles into cartesian coordinates
@@ -86,14 +82,6 @@ subroutine computeg(n,x,g)
                latomnext(icart) = latomfirst(cell(1),cell(2),cell(3))
                latomfirst(cell(1),cell(2),cell(3)) = icart
 
-               cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
-               if ( use_short_radius(icart) ) then
-                  cell_max_short_radius(cell(1),cell(2),cell(3)) = dmax1( &
-                  cell_max_short_radius(cell(1),cell(2),cell(3)), &
-                  short_radius(icart) &
-               )
-               end if
-
                ! cell with atoms linked list
                if ( empty_cell(cell(1),cell(2),cell(3))) then
                   empty_cell(cell(1),cell(2),cell(3)) = .false.
@@ -106,8 +94,6 @@ subroutine computeg(n,x,g)
                ibmol(icart) = imol
             end if
 
-            call refresh_hot_buffers_atom(icart)
-
          end do
          ilugan = ilugan + 3
          ilubar = ilubar + 3
@@ -128,30 +114,13 @@ subroutine computeg(n,x,g)
          i = cell(1)
          j = cell(2)
          k = cell(3)
-         cell_reg_reach = cell_max_radius(i,j,k)
-         cell_short_reach = cell_max_short_radius(i,j,k)
 
-         ! Load current cell and forward neighbors using the shared offset ordering:
-         ! (0,0,0), 3 faces, 6 edges, 4 vertices.
+         ! Load forward neighbor heads (self-cell + 13 forward neighbors).
          do ioffset = 1, n_forward_offsets
             neigh_cell(1) = cell_ind(i + forward_offsets(1,ioffset), ncells(1))
             neigh_cell(2) = cell_ind(j + forward_offsets(2,ioffset), ncells(2))
             neigh_cell(3) = cell_ind(k + forward_offsets(3,ioffset), ncells(3))
             neigh_first(ioffset) = latomfirst(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-            do while ( neigh_first(ioffset) > 0 )
-               if ( comptype(ibtype(neigh_first(ioffset))) ) exit
-               neigh_first(ioffset) = latomnext(neigh_first(ioffset))
-            end do
-
-            if ( neigh_first(ioffset) <= 0 ) cycle
-
-            min_cell_dist2 = cell_pair_min_dist2(cell, neigh_cell)
-            reg_reach = cell_reg_reach + cell_max_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-            short_reach = cell_short_reach + &
-               cell_max_short_radius(neigh_cell(1),neigh_cell(2),neigh_cell(3))
-            max_reach = dmax1(reg_reach, short_reach)
-            max_reach2 = max_reach*max_reach
-            if ( min_cell_dist2 > max_reach2 ) neigh_first(ioffset) = 0
          end do
 
          icart = neigh_first(1)
@@ -275,21 +244,5 @@ subroutine computeg(n,x,g)
    end do
 
    return
-contains
-
-   double precision function cell_pair_min_dist2(cell_a, cell_b)
-      integer, intent(in) :: cell_a(3), cell_b(3)
-      integer :: idim
-      double precision :: center_a, center_b, delta, gap
-
-      cell_pair_min_dist2 = 0.d0
-      do idim = 1, 3
-         center_a = pbc_min(idim) + (dble(cell_a(idim)) - 0.5d0) * cell_length(idim)
-         center_b = pbc_min(idim) + (dble(cell_b(idim)) - 0.5d0) * cell_length(idim)
-         delta = dabs(delta_vector(center_a, center_b, pbc_length(idim)))
-         gap = dmax1(0.d0, delta - cell_length(idim))
-         cell_pair_min_dist2 = cell_pair_min_dist2 + gap*gap
-      end do
-   end function cell_pair_min_dist2
 
 end subroutine computeg
diff --git a/src/fparc.f90 b/src/fparc.f90
index 66fa195..addff94 100644
--- a/src/fparc.f90
+++ b/src/fparc.f90
@@ -11,7 +11,7 @@ double precision function fparc(icart,firstjcart)
 
    use sizes
    use compute_data, only : comptype, fdist, fdist_atom, move, radius, radius_ini, fscale, short_radius, &
-      short_radius_scale, fixed_short_marker, latomnext, x_hot, y_hot, z_hot, ibmol_hot, ibtype_hot
+      short_radius_scale, fixed_short_marker, latomnext, xcart, ibmol, ibtype
    use pbc, only : pbc_length
    implicit none
 
@@ -25,12 +25,12 @@ double precision function fparc(icart,firstjcart)
    double precision :: dx, dy, dz
    double precision :: xi, yi, zi, ri, rii, fsi, short_ri, short_si
    double precision :: inv_pbcx, inv_pbcy, inv_pbcz
-   integer :: marker_i
+   integer :: marker_i, itype_i, imol_i
    logical :: use_short_i, fixed_i
 
-   xi = x_hot(icart)
-   yi = y_hot(icart)
-   zi = z_hot(icart)
+   xi = xcart(icart,1)
+   yi = xcart(icart,2)
+   zi = xcart(icart,3)
    ri = radius(icart)
    rii = radius_ini(icart)
    fsi = fscale(icart)
@@ -39,6 +39,8 @@ double precision function fparc(icart,firstjcart)
    marker_i = fixed_short_marker(icart)
    use_short_i = iand(marker_i,1) /= 0
    fixed_i = marker_i >= 2
+   itype_i = ibtype(icart)
+   imol_i = ibmol(icart)
    inv_pbcx = 1.d0 / pbc_length(1)
    inv_pbcy = 1.d0 / pbc_length(2)
    inv_pbcz = 1.d0 / pbc_length(3)
@@ -48,18 +50,18 @@ double precision function fparc(icart,firstjcart)
       ! Fast path: movable atom i and no short-radius term from i.
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype_hot(jcart))) then
+         if ( .not. comptype(ibtype(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
+         if ( imol_i == ibmol(jcart) .and. itype_i == ibtype(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
-         dx = xi - x_hot(jcart)
-         dy = yi - y_hot(jcart)
-         dz = zi - z_hot(jcart)
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -96,18 +98,18 @@ double precision function fparc(icart,firstjcart)
       ! Short-radius-enabled path for movable atoms.
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype_hot(jcart))) then
+         if ( .not. comptype(ibtype(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
+         if ( imol_i == ibmol(jcart) .and. itype_i == ibtype(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
-         dx = xi - x_hot(jcart)
-         dy = yi - y_hot(jcart)
-         dz = zi - z_hot(jcart)
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -142,11 +144,11 @@ double precision function fparc(icart,firstjcart)
       ! Fixed-atom path (keeps explicit fixed/fixed exclusion).
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype_hot(jcart))) then
+         if ( .not. comptype(ibtype(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
+         if ( imol_i == ibmol(jcart) .and. itype_i == ibtype(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
@@ -155,9 +157,9 @@ double precision function fparc(icart,firstjcart)
             cycle
          end if
 
-         dx = xi - x_hot(jcart)
-         dy = yi - y_hot(jcart)
-         dz = zi - z_hot(jcart)
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
diff --git a/src/gparc.f90 b/src/gparc.f90
index 6f5f070..54948b2 100644
--- a/src/gparc.f90
+++ b/src/gparc.f90
@@ -10,7 +10,7 @@ subroutine gparc(icart,firstjcart)
 
    use sizes
    use compute_data, only : comptype, radius, fscale, short_radius, short_radius_scale, fixed_short_marker, &
-      latomnext, gxcar, x_hot, y_hot, z_hot, ibmol_hot, ibtype_hot
+      latomnext, gxcar, xcart, ibmol, ibtype
    use pbc, only : pbc_length
    implicit none
 
@@ -24,12 +24,12 @@ subroutine gparc(icart,firstjcart)
    double precision :: dx, dy, dz
    double precision :: xi, yi, zi, ri, fsi, short_ri, short_si
    double precision :: inv_pbcx, inv_pbcy, inv_pbcz
-   integer :: marker_i
+   integer :: marker_i, itype_i, imol_i
    logical :: use_short_i, fixed_i
 
-   xi = x_hot(icart)
-   yi = y_hot(icart)
-   zi = z_hot(icart)
+   xi = xcart(icart,1)
+   yi = xcart(icart,2)
+   zi = xcart(icart,3)
    ri = radius(icart)
    fsi = fscale(icart)
    short_ri = short_radius(icart)
@@ -37,6 +37,8 @@ subroutine gparc(icart,firstjcart)
    marker_i = fixed_short_marker(icart)
    use_short_i = iand(marker_i,1) /= 0
    fixed_i = marker_i >= 2
+   itype_i = ibtype(icart)
+   imol_i = ibmol(icart)
    inv_pbcx = 1.d0 / pbc_length(1)
    inv_pbcy = 1.d0 / pbc_length(2)
    inv_pbcz = 1.d0 / pbc_length(3)
@@ -45,20 +47,20 @@ subroutine gparc(icart,firstjcart)
       ! Fast path: movable atom i and no short-radius term from i.
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype_hot(jcart))) then
+         if ( .not. comptype(ibtype(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
+         if ( imol_i == ibmol(jcart) .and. itype_i == ibtype(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
          sumr = ri + radius(jcart)
          tol = sumr*sumr
-         dx = xi - x_hot(jcart)
-         dy = yi - y_hot(jcart)
-         dz = zi - z_hot(jcart)
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -102,20 +104,20 @@ subroutine gparc(icart,firstjcart)
       ! Short-radius-enabled path for movable atoms.
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype_hot(jcart))) then
+         if ( .not. comptype(ibtype(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
+         if ( imol_i == ibmol(jcart) .and. itype_i == ibtype(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
 
          sumr = ri + radius(jcart)
          tol = sumr*sumr
-         dx = xi - x_hot(jcart)
-         dy = yi - y_hot(jcart)
-         dz = zi - z_hot(jcart)
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
@@ -157,11 +159,11 @@ subroutine gparc(icart,firstjcart)
       ! Fixed-atom path (keeps explicit fixed/fixed exclusion).
       jcart = firstjcart
       do while ( jcart > 0 )
-         if ( .not. comptype(ibtype_hot(jcart))) then
+         if ( .not. comptype(ibtype(jcart))) then
             jcart = latomnext(jcart)
             cycle
          end if
-         if ( ibmol_hot(icart) == ibmol_hot(jcart) .and. ibtype_hot(icart) == ibtype_hot(jcart) ) then
+         if ( imol_i == ibmol(jcart) .and. itype_i == ibtype(jcart) ) then
             jcart = latomnext(jcart)
             cycle
          end if
@@ -172,9 +174,9 @@ subroutine gparc(icart,firstjcart)
 
          sumr = ri + radius(jcart)
          tol = sumr*sumr
-         dx = xi - x_hot(jcart)
-         dy = yi - y_hot(jcart)
-         dz = zi - z_hot(jcart)
+         dx = xi - xcart(jcart,1)
+         dy = yi - xcart(jcart,2)
+         dz = zi - xcart(jcart,3)
          dx = dx - pbc_length(1) * dnint(dx * inv_pbcx)
          dy = dy - pbc_length(2) * dnint(dy * inv_pbcy)
          dz = dz - pbc_length(3) * dnint(dz * inv_pbcz)
diff --git a/src/initial.f90 b/src/initial.f90
index a2dd8bf..76c8d2f 100644
--- a/src/initial.f90
+++ b/src/initial.f90
@@ -15,7 +15,7 @@ subroutine initial(n,x)
    use compute_data, only : ntotmol, ntype, nfixedat, ntotat, ncells, nmols, natoms, idfirst, &
       ibmol, ibtype, scale, scale2, fdist, frest, sizemin, sizemax, cell_length, radmax, xcart, coor, &
       rot_bound, dmax, constrain_rot, comptype, fixedatom, init1, move, latomnext, latomfirst, latomfix, &
-      lcellfirst, lcellnext, empty_cell, cell_max_radius, cell_max_short_radius, refresh_hot_buffers_atom
+      lcellfirst, lcellnext, empty_cell
    use input, only : randini, ntype_with_fixed, fix, chkgrad, avoidoverlap,&
       discale, precision, sidemax, restart_from, input_itype,&
       nloop0_type
@@ -137,7 +137,7 @@ subroutine initial(n,x)
             idatom = idatom + 1
             call compcart(xcart(icart,1:3),xcm,coor(idatom,1:3),v1,v2,v3)
             fixedatom(icart) = .false.
-            call refresh_hot_buffers_atom(icart)
+
          end do
       end do
    end do
@@ -152,7 +152,7 @@ subroutine initial(n,x)
             xcart(icart,2) = coor(idfatom,2)
             xcart(icart,3) = coor(idfatom,3)
             fixedatom(icart) = .true.
-            call refresh_hot_buffers_atom(icart)
+
             ! Check if fixed molecules are compatible with PBC given
             if (using_pbc) then
                do i = 1, 3
@@ -298,16 +298,12 @@ subroutine initial(n,x)
    allocate(latomfix(ncells(1),ncells(2),ncells(3)))
    allocate(lcellnext(ncells(1)*ncells(2)*ncells(3)))
    allocate(empty_cell(ncells(1),ncells(2),ncells(3)))
-   allocate(cell_max_radius(ncells(1),ncells(2),ncells(3)))
-   allocate(cell_max_short_radius(ncells(1),ncells(2),ncells(3)))
 
    ! Reseting linked lists arrays
    latomfix(:,:,:) = 0
    latomfirst(:,:,:) = 0
    latomnext(:) = 0
    empty_cell(:,:,:) = .true.
-   cell_max_radius(:,:,:) = 0.d0
-   cell_max_short_radius(:,:,:) = 0.d0
 
    ! If there are fixed molecules, add them permanently to the latomfix array
    if(fix) then
diff --git a/src/resetcells.f90 b/src/resetcells.f90
index dc23339..9674be6 100644
--- a/src/resetcells.f90
+++ b/src/resetcells.f90
@@ -11,19 +11,24 @@ subroutine resetcells()
    use input, only: fix, ntype_with_fixed
    use cell_indexing, only: icell_to_cell, index_cell, setcell
    use compute_data, only: latomfirst, lcellfirst, lcellnext, empty_cell, ncells, &
-                           ntype, natoms, ntotat, nfixedat, xcart, latomnext, radius, &
-                           use_short_radius, short_radius, cell_max_radius, cell_max_short_radius
+                           ntype, natoms, ntotat, nfixedat, xcart, latomnext
    implicit none
-   integer :: cell(3), icell, iftype, icart, ifatom
+   integer :: cell(3), icell, next_icell, iftype, icart, ifatom
 
-   ! Reset data for cells that contain fixed atom
-   lcellnext(:) = 0
+   ! Incrementally reset only previously-occupied cells instead of
+   ! clearing all ncells^3 entries. Walk the old occupied-cell linked
+   ! list and reset each cell's data, then rebuild the fixed-atom lists.
+   icell = lcellfirst
+   do while ( icell > 0 )
+      next_icell = lcellnext(icell)
+      call icell_to_cell(icell, ncells, cell)
+      latomfirst(cell(1),cell(2),cell(3)) = 0
+      empty_cell(cell(1),cell(2),cell(3)) = .true.
+      lcellnext(icell) = 0
+      icell = next_icell
+   end do
    lcellfirst = 0
-   latomfirst(:,:,:) = 0
-   latomnext(:) = 0
-   empty_cell(:,:,:) = .true.
-   cell_max_radius(:,:,:) = 0.d0
-   cell_max_short_radius(:,:,:) = 0.d0
+
    if(fix) then
       icart = ntotat - nfixedat
       do iftype = ntype + 1, ntype_with_fixed
@@ -32,12 +37,6 @@ subroutine resetcells()
             call setcell(xcart(icart,:),cell)
             latomnext(icart) = latomfirst(cell(1),cell(2),cell(3))
             latomfirst(cell(1),cell(2),cell(3)) = icart
-            cell_max_radius(cell(1),cell(2),cell(3)) = dmax1(cell_max_radius(cell(1),cell(2),cell(3)), radius(icart))
-            if ( use_short_radius(icart) ) then
-               cell_max_short_radius(cell(1),cell(2),cell(3)) = &
-                  dmax1(cell_max_short_radius(cell(1),cell(2),cell(3)), &
-                        short_radius(icart))
-            end if
 
             if ( empty_cell(cell(1),cell(2),cell(3)) ) then
                empty_cell(cell(1),cell(2),cell(3)) = .false.
diff --git a/src/setsizes.f90 b/src/setsizes.f90
index d277203..ba54b34 100644
--- a/src/setsizes.f90
+++ b/src/setsizes.f90
@@ -325,8 +325,8 @@ subroutine setsizes()
    ! Allocate arrays depending on ntotat, nn, maxrest, and mrperatom
 
    allocate(nratom(ntotat),iratom(ntotat,mrperatom),ibmol(ntotat),&
-      ibtype(ntotat),ibmol_hot(ntotat),ibtype_hot(ntotat),fixed_short_marker(ntotat),&
-      xcart(ntotat,3),x_hot(ntotat),y_hot(ntotat),z_hot(ntotat),coor(ntotat,3),&
+      ibtype(ntotat),fixed_short_marker(ntotat),&
+      xcart(ntotat,3),coor(ntotat,3),&
       radius(ntotat),radius_ini(ntotat),fscale(ntotat),&
       use_short_radius(ntotat), short_radius(ntotat), short_radius_scale(ntotat),&
       gxcar(ntotat,3),&