Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
d01d2c0
Optimize cell neighbor lookup in pairwise loops
MicheleBonus Mar 11, 2026
ec58ec0
Merge pull request #1 from MicheleBonus/codex/optimize-performance-of…
MicheleBonus Mar 11, 2026
9a03d75
Optimize pairwise kernels in fparc/gparc
MicheleBonus Mar 11, 2026
988029a
Merge pull request #2 from MicheleBonus/codex/optimize-packmol-for-speed
MicheleBonus Mar 11, 2026
208f05a
Refactor forward cell-neighbor traversal with shared offsets
MicheleBonus Mar 11, 2026
b10f2c8
Merge pull request #3 from MicheleBonus/codex/refactor-neighbor-acces…
MicheleBonus Mar 11, 2026
dcd9691
Split pair kernels into fast/short/fixed paths and prefilter active a…
MicheleBonus Mar 11, 2026
eba4d12
Merge pull request #4 from MicheleBonus/codex/refactor-hot-logic-in-f…
MicheleBonus Mar 11, 2026
5775ac5
Add hot-path scalar streams for pairwise kernels
MicheleBonus Mar 11, 2026
a579b0e
Merge pull request #5 from MicheleBonus/codex/define-explicit-hot-pat…
MicheleBonus Mar 11, 2026
7a0703f
Add per-cell radius bounds and pair-distance pruning
MicheleBonus Mar 11, 2026
0753de9
Merge pull request #6 from MicheleBonus/codex/extend-cell-bookkeeping…
MicheleBonus Mar 11, 2026
6b84f71
Optimize hot-loop square operations in pair kernels
MicheleBonus Mar 11, 2026
672f6bf
Merge pull request #7 from MicheleBonus/codex/refactor-hot-loop-squar…
MicheleBonus Mar 11, 2026
99a44a9
Add explicit build profiles and numerics profile check
MicheleBonus Mar 11, 2026
f187d25
Merge pull request #8 from MicheleBonus/codex/update-build-configurat…
MicheleBonus Mar 11, 2026
8c0615b
docs: update build/profile docs and add Michele Bonus credentials
MicheleBonus Mar 11, 2026
408bfb3
Merge pull request #9 from MicheleBonus/codex/plan-adjustments-and-im…
MicheleBonus Mar 11, 2026
c6909bb
Fix pgencan module import ambiguity for x
MicheleBonus Mar 11, 2026
c77f328
Merge pull request #10 from MicheleBonus/codex/fix-unused-arguments-a…
MicheleBonus Mar 11, 2026
dee125a
Fix missing init1 import in pgencan
MicheleBonus Mar 11, 2026
7e43663
Merge pull request #11 from MicheleBonus/codex/fix-unused-dummy-argum…
MicheleBonus Mar 11, 2026
04c7888
Rename hot coordinate buffers and tighten compute_data imports
MicheleBonus Mar 11, 2026
eb5b78e
Merge pull request #12 from MicheleBonus/codex/rename-hot-buffer-arra…
MicheleBonus Mar 11, 2026
0be9155
Narrow compute_data imports in collision-prone routines
MicheleBonus Mar 11, 2026
8848245
Merge pull request #13 from MicheleBonus/codex/refactor-imports-in-hi…
MicheleBonus Mar 11, 2026
943852a
Initialize gencan scalars before conditional use
MicheleBonus Mar 11, 2026
d884d8d
Merge pull request #14 from MicheleBonus/codex/fix-variable-initializ…
MicheleBonus Mar 11, 2026
b164bfa
Fix GENCAN evalhd interface and mark API dummy args used
MicheleBonus Mar 11, 2026
0264b8f
Merge pull request #15 from MicheleBonus/codex/verify-and-clean-dummy…
MicheleBonus Mar 11, 2026
9d85481
Wrap long short-radius assignment in resetcells
MicheleBonus Mar 11, 2026
2689c37
Merge pull request #16 from MicheleBonus/codex/refactor-assignment-in…
MicheleBonus Mar 11, 2026
a01001b
Handle missing restriction mapping defensively
MicheleBonus Mar 11, 2026
fbe2fe3
Merge pull request #17 from MicheleBonus/codex/add-defensive-initiali…
MicheleBonus Mar 11, 2026
41e2f38
Validate fixed-molecule residue bounds before nres
MicheleBonus Mar 11, 2026
ebbca56
Merge pull request #18 from MicheleBonus/codex/initialize-ilres-and-v…
MicheleBonus Mar 11, 2026
d32a71a
Reformat long free-form expressions in computef
MicheleBonus Mar 11, 2026
da192e6
Merge pull request #19 from MicheleBonus/codex/reformat-assignments-i…
MicheleBonus Mar 11, 2026
a383777
Optimize neighbor reach checks and split long Fortran lines
MicheleBonus Mar 11, 2026
ac0a56a
Merge pull request #20 from MicheleBonus/codex/improve-program-perfor…
MicheleBonus Mar 11, 2026
67c38e4
Eliminate overhead that cancelled out pairwise kernel optimizations
claude Mar 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,8 @@ dynamics and docking. Journal of Computational Chemistry, 24(7):819-825,
2003.

Home-Page: http://m3g.iqm.unicamp.br/packmol

Contributor:
- Michele Bonus, Heinrich Heine University Düsseldorf
Email: michele.bonus@hhu.de
GitHub: https://github.com/MicheleBonus/
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ packmol changelog
Version 21.2.2-DEV
--------------
- ![INFO][badge-info] Add changelog verification CI run.
- ![INFO][badge-info] Update README build/profile instructions and document recent cell-list and pair-kernel performance work.
- ![INFO][badge-info] Add contributor credentials for Michele Bonus (Heinrich Heine University Düsseldorf).

Version 21.2.1
--------------
Expand Down
41 changes: 37 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,48 @@
cmake_minimum_required(VERSION 3.10)
project(packmol Fortran)

set(PACKMOL_PROFILE "baseline" CACHE STRING "Build profile: baseline, perf-native, debug, sanitize")
set_property(CACHE PACKMOL_PROFILE PROPERTY STRINGS baseline perf-native debug sanitize)
option(PACKMOL_UNSAFE_MATH "Enable unsafe/fast math optimizations" OFF)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif(NOT CMAKE_BUILD_TYPE)
endif()

if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
add_compile_options(
-Wall "$<$<CONFIG:Debug>:-Werror>"
)
add_compile_options(-Wall)
endif()

if(PACKMOL_PROFILE STREQUAL "baseline")
add_compile_options(-O2)
elseif(PACKMOL_PROFILE STREQUAL "perf-native")
add_compile_options(-O3)
if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
add_compile_options(-march=native -mtune=native -funroll-loops)
elseif(CMAKE_Fortran_COMPILER_ID MATCHES IntelLLVM OR CMAKE_Fortran_COMPILER_ID MATCHES Intel)
add_compile_options(-xHost -ipo)
endif()
elseif(PACKMOL_PROFILE STREQUAL "debug")
add_compile_options(-g)
if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
add_compile_options(-fcheck=bounds -fbacktrace -ffpe-trap=zero,overflow,underflow)
endif()
elseif(PACKMOL_PROFILE STREQUAL "sanitize")
add_compile_options(-O1 -g)
if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
add_compile_options(-fno-omit-frame-pointer -fsanitize=address,undefined -fcheck=all)
add_link_options(-fsanitize=address,undefined)
endif()
else()
message(FATAL_ERROR "Unknown PACKMOL_PROFILE='${PACKMOL_PROFILE}'. Use one of: baseline, perf-native, debug, sanitize")
endif()

if(PACKMOL_UNSAFE_MATH)
if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
add_compile_options(-ffast-math)
elseif(CMAKE_Fortran_COMPILER_ID MATCHES IntelLLVM OR CMAKE_Fortran_COMPILER_ID MATCHES Intel)
add_compile_options(-fp-model fast=2)
endif()
endif()

# Build the executable
Expand Down
102 changes: 80 additions & 22 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,26 @@ FORTRAN=/usr/bin/gfortran
#
# Change the flags of the compilation if you want:
#
FLAGS= -O3 -march=native -funroll-loops
PROFILE ?= baseline
UNSAFE_MATH ?= 0

ifneq (,$(filter perf perf-native,$(MAKECMDGOALS)))
PROFILE := perf-native
endif
ifneq (,$(filter devel,$(MAKECMDGOALS)))
PROFILE := devel
endif
ifneq (,$(filter sanitize,$(MAKECMDGOALS)))
PROFILE := sanitize
endif
ifneq (,$(filter static,$(MAKECMDGOALS)))
PROFILE := static
endif
ifneq (,$(filter baseline,$(MAKECMDGOALS)))
PROFILE := baseline
endif

FLAGS=
SRCDIR= src
MAINDIR= app
###################################################################
Expand All @@ -30,20 +49,54 @@ MAINDIR= app
# Flags for compiling development version
#
GENCANFLAGS := $(FLAGS)
# Flags for the routines that signal with --fast-math
IEEE_SIGNAL_FLAGS := $(FLAGS)
ifeq ($(MAKECMDGOALS),devel)
FC_VERSION := $(shell $(FORTRAN) --version 2>/dev/null | head -n 1)

ifeq ($(PROFILE),baseline)
FLAGS = -O2
GENCANFLAGS = $(FLAGS)
endif

ifeq ($(PROFILE),perf-native)
FLAGS = -O3
GENCANFLAGS = $(FLAGS)
ifneq (,$(findstring GNU Fortran,$(FC_VERSION)))
FLAGS += -march=native -mtune=native -funroll-loops
GENCANFLAGS += -march=native -mtune=native -funroll-loops
else ifneq (,$(findstring ifx,$(FC_VERSION)))
FLAGS += -xHost -ipo
GENCANFLAGS += -xHost -ipo
else ifneq (,$(findstring ifort,$(FC_VERSION)))
FLAGS += -xHost -ipo
GENCANFLAGS += -xHost -ipo
endif
endif

ifeq ($(PROFILE),devel)
FLAGS = -Wall -fcheck=bounds -g -fbacktrace -ffpe-trap=zero,overflow,underflow
GENCANFLAGS = -fcheck=bounds -g -fbacktrace -ffpe-trap=zero,overflow,underflow
GENCANFLAGS = -fcheck=bounds -g -fbacktrace -ffpe-trap=zero,overflow,underflow
endif
ifeq ($(MAKECMDGOALS),perf)
FLAGS = -g -pg
GENCANFLAGS = -g -pg

ifeq ($(PROFILE),sanitize)
FLAGS = -O1 -g -fno-omit-frame-pointer
GENCANFLAGS = $(FLAGS)
ifneq (,$(findstring GNU Fortran,$(FC_VERSION)))
FLAGS += -fsanitize=address,undefined -fcheck=all
GENCANFLAGS += -fsanitize=address,undefined -fcheck=all
endif
ifeq ($(MAKECMDGOALS),static)
FLAGS = -O3 --fast-math -static
GENCANFLAGS = -O3 --fast-math -static
endif

ifeq ($(PROFILE),static)
FLAGS = -O2 -static
GENCANFLAGS = $(FLAGS)
endif

ifeq ($(UNSAFE_MATH),1)
FLAGS += -ffast-math
GENCANFLAGS += -ffast-math
endif
IEEE_SIGNAL_FLAGS = $(FLAGS)


#
# Files required
#
Expand Down Expand Up @@ -103,17 +156,22 @@ all : $(oall)
#
# Compiling with flags for development
#
static : devel
perf : devel
devel : $(oall)
@echo " ------------------------------------------------------ "
@echo " Compiling packmol with $(FORTRAN) "
@echo " Flags: $(FLAGS)"
@echo " ------------------------------------------------------ "
@$(FORTRAN) -o packmol $(oall) $(FLAGS)
@echo " ------------------------------------------------------ "
@echo " Packmol succesfully built. "
@echo " ------------------------------------------------------ "
baseline: PROFILE=baseline
baseline: all

perf-native: PROFILE=perf-native
perf-native: all

devel: PROFILE=devel
devel: all

sanitize: PROFILE=sanitize
sanitize: all

static: PROFILE=static
static: all

perf : perf-native
#
# Modules
#
Expand Down
96 changes: 95 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,101 @@ uvx packmol < inp.pack
`FPM_FC=compiler`, for example for `ifort`, use in bash, `export FPM_FC=ifort`.



### Build profiles (Makefile and CMake)

Packmol now supports explicit build profiles so you can choose between conservative optimized builds, aggressive benchmarking builds, and debugging/sanitized builds.

#### Baseline release profile (default)

This profile prioritizes stability and portability with conservative optimization.

- **Makefile**: `make` (or `make baseline`)
- **CMake**: `cmake -S . -B build -DPACKMOL_PROFILE=baseline`

Then build with:

```bash
cmake --build build --config Release
```

#### Aggressive benchmark profile (`perf-native`)

This profile is opt-in and applies compiler-specific native tuning (for example, `-march=native` for GNU or `-xHost` for Intel compilers). Use this for local benchmarking only.

- **Makefile**: `make perf-native`
- **CMake**: `cmake -S . -B build -DPACKMOL_PROFILE=perf-native`

Then build with:

```bash
cmake --build build --config Release
```

#### Debug / sanitized profiles

Use these profiles for troubleshooting and memory/UB diagnostics.

- **Makefile debug checks**: `make devel`
- **Makefile sanitizers (GNU)**: `make sanitize`
- **CMake debug checks**: `cmake -S . -B build -DPACKMOL_PROFILE=debug`
- **CMake sanitizers (GNU)**: `cmake -S . -B build -DPACKMOL_PROFILE=sanitize`

Then build with:

```bash
cmake --build build --config Debug
```

### Performance-oriented kernel improvements

Recent performance work in the pairwise cell-list kernels includes:

- Prebuilt neighbor-cell offsets used in `computef` / `computeg` traversal.
- Reduced branch pressure in pair kernels by separating hot paths.
- Better data locality in hot loops through structure-of-arrays style access.
- Conservative per-cell broad-phase reject checks before expensive pair evaluation.
- Consistent `x*x` math micro-optimizations replacing hot-loop `**2` exponentiation.
- Build-profile driven tuning with numerics validation support.

These updates are designed to speed up large systems while preserving numerical behavior.

## Contributors

- Michele Bonus, Heinrich Heine University Düsseldorf
Email: `michele.bonus@hhu.de`
GitHub: https://github.com/MicheleBonus/

#### Unsafe math is explicit opt-in

Unsafe/fast-math is **not** enabled globally.

- **Makefile**: add `UNSAFE_MATH=1`, e.g. `make perf-native UNSAFE_MATH=1`
- **CMake**: set `-DPACKMOL_UNSAFE_MATH=ON`

### Reproducibility and numerics check across profiles

To compare objective and gradient behavior across profiles using existing inputs under `testing/`, run:

```bash
./testing/check_numerics_profiles.sh
```

What it does:

1. Builds and runs `baseline` and `perf-native` binaries.
2. Uses `testing/input_files/benzene2.inp` with `chkgrad` enabled (small system for fast finite-difference gradient checking).
3. Compares values extracted from each run's `chkgrad.log`:
- Objective function value (`Function Value = ...`)
- Gradient check summary (`Maximum difference = ... Error= ...`)
4. Fails if differences exceed tolerances.

You can override tolerances with environment variables:

```bash
OBJ_TOL=1e-8 GRAD_TOL=1e-3 ./testing/check_numerics_profiles.sh
```

## References

Please always cite one of the following references in publications for which Packmol was useful:
Expand All @@ -93,4 +188,3 @@ JM Martinez, L Martinez, Packing optimization for the automated generation of co
(https://doi.org/10.1002/jcc.10216)



14 changes: 13 additions & 1 deletion app/packmol.f90
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,14 @@ program packmol
keyword(iline,1).eq.'below') then
nratom(icart) = nratom(icart) + 1
iratcount = iratcount + 1
iirest = -1
do irest = 1, nrest
if(irestline(irest).eq.iline) iirest = irest
end do
if(iirest.eq.-1) then
write(*,*) ' ERROR: Could not map atom-level restriction to definition at line ', iline
stop exit_code_input_error
end if
iratom(icart,iratcount) = iirest
end if
end if
Expand All @@ -608,9 +613,14 @@ program packmol
keyword(iline,1).eq.'below') then
nratom(icart) = nratom(icart) + 1
iratcount = iratcount + 1
iirest = -1
do irest = 1, nrest
if(irestline(irest).eq.iline) iirest = irest
end do
if(iirest.eq.-1) then
write(*,*) ' ERROR: Could not map restriction to definition at line ', iline
stop exit_code_input_error
end if
iratom(icart,iratcount) = iirest
end if
end do
Expand Down Expand Up @@ -710,6 +720,9 @@ program packmol

call initial(n,x)

! Pre-compute fixed_short_marker once (fixedatom and use_short_radius are static)
call init_fixed_short_marker()

! Computing the energy at the initial point

radscale = 1.d0
Expand Down Expand Up @@ -978,4 +991,3 @@ program packmol
end if

end program packmol

27 changes: 25 additions & 2 deletions src/cell_indexing.f90
Original file line number Diff line number Diff line change
@@ -1,7 +1,30 @@
module cell_indexing

implicit none
public :: setcell, icell_to_cell, index_cell
implicit none
public :: setcell, icell_to_cell, index_cell, n_forward_offsets, forward_offsets

! Ordered list of relative cell offsets used for forward-only traversal.
! 1: (0, 0, 0) -> self cell (handled with latomnext to avoid double-counting)
! 2-4: forward face neighbors
! 5-10: forward edge neighbors
! 11-14: forward vertex neighbors
integer, parameter :: n_forward_offsets = 14
integer, parameter :: forward_offsets(3,n_forward_offsets) = reshape([ &
0, 0, 0, &
1, 0, 0, &
0, 1, 0, &
0, 0, 1, &
1,-1, 0, &
1, 0,-1, &
0, 1,-1, &
0, 1, 1, &
1, 1, 0, &
1, 0, 1, &
1,-1,-1, &
1,-1, 1, &
1, 1,-1, &
1, 1, 1 &
], [3,n_forward_offsets])

contains
!
Expand Down
2 changes: 1 addition & 1 deletion src/checkpoint.f90
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
subroutine checkpoint(n,x)

use sizes
use compute_data
use compute_data, only : comptype, init1
use input
use usegencan
use ahestetic
Expand Down
Loading