diff --git a/.gitignore b/.gitignore
index b068d332..306ef4fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@
 .DS_Store
 .DS_Store?
 *.[oa]
+*.optrpt
+*.003t.original
 
 # Prerequisites
 *.d
diff --git a/Scripts/Trajectory_Conversion/lammpstrjconvert.py b/Scripts/Trajectory_Conversion/lammpstrjconvert.py
index 7b0e72a5..4046f0b9 100755
--- a/Scripts/Trajectory_Conversion/lammpstrjconvert.py
+++ b/Scripts/Trajectory_Conversion/lammpstrjconvert.py
@@ -57,7 +57,6 @@ def lammpstrjconvert(
 
     ltpath = Path(lammpstrjpath)
     full_fstr = "X " + fstr + " " + fstr + " " + fstr
-    colnames = ("id", "xu", "yu", "zu")
 
     if Hpath is None:
         Hpath = Path(ltpath.stem + ".H")
@@ -141,6 +140,10 @@ def convert_frame():
                 colname: i
                 for i, colname in enumerate(ltfile.readline().strip().split())
             }  # gives indices of columns
+            if ("xu" in coldict) and ("yu" in coldict) and ("zu" in coldict):
+                colnames = ("id", "xu", "yu", "zu")
+            else:
+                colnames = ("id", "x", "y", "z")
             xyz_buffer = io.StringIO()
             for i in range(n_atoms):
                 xyz_buffer.write(ltfile.readline())
diff --git a/Src/Makefile b/Src/Makefile
index 4cc65141..486ee461 100755
--- a/Src/Makefile
+++ b/Src/Makefile
@@ -80,7 +80,7 @@ LINKFLAGS = -Wl,-rpath,${locallib}
 #F90FLAGS = -c -openmp
 #F90FLAGS = -p -c -O3
 #F90FLAGS = -c -O3 -openmp
-OPTFLAGS = -O3
+OPTFLAGS = -O3 -align array32byte -march=core-avx2
 #LINKFLAGS = -openmp
 #LINKFLAGS = -p
 
@@ -139,14 +139,13 @@ OBJS =  main.o \
   move_regrow.o \
   ring_fragment_driver.o \
   move_atom.o \
-  chempot.o \
   pair_nrg_variables.o \
   move_ring_flip.o \
   move_identity_switch.o \
   widom_subdriver.o \
   widom_insert.o \
   write_widom_properties.o \
-  load_next_frame.o \
+  trajectory_reader_routines.o \
   pregen_control.o \
   pregen_driver.o \
   sector_routines.o \
diff --git a/Src/Makefile.conda b/Src/Makefile.conda
index 48883125..335078fa 100644
--- a/Src/Makefile.conda
+++ b/Src/Makefile.conda
@@ -116,14 +116,13 @@ OBJS =  main.o \
   move_regrow.o \
   ring_fragment_driver.o \
   move_atom.o \
-  chempot.o \
   pair_nrg_variables.o \
   move_ring_flip.o \
   move_identity_switch.o \
   widom_subdriver.o \
   widom_insert.o \
   write_widom_properties.o \
-  load_next_frame.o \
+  trajectory_reader_routines.o \
   pregen_control.o \
   pregen_driver.o \
   sector_routines.o \
diff --git a/Src/Makefile.gfortran b/Src/Makefile.gfortran
index 5db90ff2..1ce3981f 100755
--- a/Src/Makefile.gfortran
+++ b/Src/Makefile.gfortran
@@ -69,6 +69,8 @@ libgmxfortpath = ${locallib}/libgmxfort.so
 LIBS = -I${localdir}/include -L${locallib} -lgmxfort # `pkg-config --cflags --libs libgmxfort`
 INCS = -I${localdir}/include -L${locallib} -lgmxfort # `pkg-config --cflags --libs libgmxfort`
 F90FLAGS = -c -ffree-line-length-none -fcheck=all
+# specifying -mavx2 is redundant with -march but Cassandra looks for it to recognize vector length
+OPTFLAGS = -O3 -ffast-math -mavx2 -mfma -march=haswell
 LINKFLAGS = -Wl,-rpath,${locallib}
 
 ####################################################################
@@ -126,14 +128,13 @@ OBJS =  main.o \
   move_regrow.o \
   ring_fragment_driver.o \
   move_atom.o \
-  chempot.o \
   pair_nrg_variables.o \
   move_ring_flip.o \
   move_identity_switch.o \
   widom_subdriver.o \
   widom_insert.o \
   write_widom_properties.o \
-  load_next_frame.o \
+  trajectory_reader_routines.o \
   pregen_control.o \
   pregen_driver.o \
   sector_routines.o \
diff --git a/Src/Makefile.gfortran.noXTC b/Src/Makefile.gfortran.noXTC
index 31524be6..c8c33d6e 100755
--- a/Src/Makefile.gfortran.noXTC
+++ b/Src/Makefile.gfortran.noXTC
@@ -60,6 +60,8 @@ FC = gfortran
 LIBS = 
 INCS = 
 F90FLAGS = -c -ffree-line-length-none -fcheck=all
+# specifying -mavx2 is redundant with -march but Cassandra looks for it to recognize vector length
+OPTFLAGS = -O3 -ffast-math -mavx2 -mfma -march=haswell
 LINKFLAGS = 
 
 ####################################################################
@@ -117,14 +119,13 @@ OBJS =  main.o \
   move_regrow.o \
   ring_fragment_driver.o \
   move_atom.o \
-  chempot.o \
   pair_nrg_variables.o \
   move_ring_flip.o \
   move_identity_switch.o \
   widom_subdriver.o \
   widom_insert.o \
   write_widom_properties.o \
-  load_next_frame.o \
+  trajectory_reader_routines.o \
   pregen_control.o \
   pregen_driver.o \
   sector_routines.o \
diff --git a/Src/Makefile.gfortran.openMP b/Src/Makefile.gfortran.openMP
index 895ceb2f..325e6342 100755
--- a/Src/Makefile.gfortran.openMP
+++ b/Src/Makefile.gfortran.openMP
@@ -69,7 +69,9 @@ libgmxfortpath = ${locallib}/libgmxfort.so
 LIBS = -I${localdir}/include -L${locallib} -lgmxfort # `pkg-config --cflags --libs libgmxfort`
 INCS = -I${localdir}/include -L${locallib} -lgmxfort # `pkg-config --cflags --libs libgmxfort`
 #
-F90FLAGS = -c -ffree-line-length-none -fopenmp -O3
+F90FLAGS = -c -ffree-line-length-none -fno-range-check -fopenmp
+# specifying -mavx2 is redundant with -march but Cassandra looks for it to recognize vector length
+OPTFLAGS = -O3 -ffast-math -mavx2 -mfma -march=haswell
 LINKFLAGS = -fopenmp -Wl,-rpath,${locallib}
 
 
@@ -128,14 +130,13 @@ OBJS =  main.o \
   move_regrow.o \
   ring_fragment_driver.o \
   move_atom.o \
-  chempot.o \
   pair_nrg_variables.o \
   move_ring_flip.o \
   move_identity_switch.o \
   widom_subdriver.o \
   widom_insert.o \
   write_widom_properties.o \
-  load_next_frame.o \
+  trajectory_reader_routines.o \
   pregen_control.o \
   pregen_driver.o \
   sector_routines.o \
diff --git a/Src/Makefile.gfortran.openMP.noXTC b/Src/Makefile.gfortran.openMP.noXTC
index d61b293e..fe3f377a 100755
--- a/Src/Makefile.gfortran.openMP.noXTC
+++ b/Src/Makefile.gfortran.openMP.noXTC
@@ -57,7 +57,9 @@ FC = gfortran
 LIBS =
 INCS =
 #
-F90FLAGS = -c -ffree-line-length-none -fopenmp -O3
+F90FLAGS = -c -ffree-line-length-none -fopenmp
+# specifying -mavx2 is redundant with -march but Cassandra looks for it to recognize vector length
+OPTFLAGS = -O3 -ffast-math -mavx2 -mfma -march=haswell
 LINKFLAGS =  -fopenmp
 
 
@@ -116,14 +118,13 @@ OBJS =  main.o \
   move_regrow.o \
   ring_fragment_driver.o \
   move_atom.o \
-  chempot.o \
   pair_nrg_variables.o \
   move_ring_flip.o \
   move_identity_switch.o \
   widom_subdriver.o \
   widom_insert.o \
   write_widom_properties.o \
-  load_next_frame.o \
+  trajectory_reader_routines.o \
   pregen_control.o \
   pregen_driver.o \
   sector_routines.o \
diff --git a/Src/Makefile.intel.openMP b/Src/Makefile.intel.openMP
index a76077ba..34e8316b 100755
--- a/Src/Makefile.intel.openMP
+++ b/Src/Makefile.intel.openMP
@@ -77,8 +77,8 @@ LIBS = -I${localdir}/include -L${locallib} -lgmxfort # `pkg-config --cflags --li
 INCS = -I${localdir}/include -L${locallib} -lgmxfort # `pkg-config --cflags --libs libgmxfort`
 #
 #F90FLAGS = -c  -ffree-line-length-none
-F90FLAGS = -c -g -no-wrap-margin -qopenmp #-check all -traceback #-warn unused #-openmp
-OPTFLAGS = -O3
+F90FLAGS = -c -g -no-wrap-margin -qopenmp -traceback #-debug -check all,noarg_temp_created #-warn unused #-openmp
+OPTFLAGS = -O3 -march=core-avx2 -align array32byte -qopt-report=5 -qopt-report-phase=vec,loop,openmp # -ipo -fp-model=fast=2
 LINKFLAGS = -qopenmp -Wl,-rpath,${locallib} 
 
 
@@ -114,7 +114,7 @@ OBJS =  main.o \
   mcf_control.o \
   nptmc_driver.o \
   create_nonbond_table.o \
- 	internal_coordinate_routines.o \
+  internal_coordinate_routines.o \
   clean_abort.o \
   move_translate.o \
   random_generators.o \
@@ -144,14 +144,13 @@ OBJS =  main.o \
   move_regrow.o \
   ring_fragment_driver.o \
   move_atom.o \
-  chempot.o \
   pair_nrg_variables.o \
   move_ring_flip.o \
   move_identity_switch.o \
   widom_subdriver.o \
   widom_insert.o \
   write_widom_properties.o \
-  load_next_frame.o \
+  trajectory_reader_routines.o \
   pregen_control.o \
   pregen_driver.o \
   sector_routines.o \
diff --git a/Src/Makefile.intel.openMP.noXTC b/Src/Makefile.intel.openMP.noXTC
index 157be231..7a7be5fd 100755
--- a/Src/Makefile.intel.openMP.noXTC
+++ b/Src/Makefile.intel.openMP.noXTC
@@ -117,14 +117,13 @@ OBJS =	main.o \
 	move_regrow.o \
 	ring_fragment_driver.o \
 	move_atom.o \
-	chempot.o \
 	pair_nrg_variables.o \
         move_ring_flip.o \
         move_identity_switch.o \
 	widom_subdriver.o \
         widom_insert.o \
         write_widom_properties.o \
-	load_next_frame.o \
+	trajectory_reader_routines.o \
 	pregen_control.o \
 	pregen_driver.o \
 	sector_routines.o \
diff --git a/Src/Makefile.noXTC b/Src/Makefile.noXTC
index 53887942..be20adf0 100755
--- a/Src/Makefile.noXTC
+++ b/Src/Makefile.noXTC
@@ -125,14 +125,13 @@ OBJS =  main.o \
   move_regrow.o \
   ring_fragment_driver.o \
   move_atom.o \
-  chempot.o \
   pair_nrg_variables.o \
   move_ring_flip.o \
   move_identity_switch.o \
   widom_subdriver.o \
   widom_insert.o \
   write_widom_properties.o \
-  load_next_frame.o \
+  trajectory_reader_routines.o \
   pregen_control.o \
   pregen_driver.o \
   sector_routines.o \
diff --git a/Src/Makefile.pgfortran b/Src/Makefile.pgfortran
index 6a379b7a..8c79c8ad 100755
--- a/Src/Makefile.pgfortran
+++ b/Src/Makefile.pgfortran
@@ -128,14 +128,13 @@ OBJS = main.o \
        move_regrow.o \
        ring_fragment_driver.o \
        move_atom.o \
-       chempot.o \
        pair_nrg_variables.o \
        move_ring_flip.o \
        move_identity_switch.o \
        widom_subdriver.o \
        widom_insert.o \
        write_widom_properties.o \
-       load_next_frame.o \
+       trajectory_reader_routines.o \
        pregen_control.o \
        pregen_driver.o \
        sector_routines.o \
diff --git a/Src/Makefile.pgfortran.noXTC b/Src/Makefile.pgfortran.noXTC
index 333fb6c8..348db3ee 100755
--- a/Src/Makefile.pgfortran.noXTC
+++ b/Src/Makefile.pgfortran.noXTC
@@ -116,14 +116,13 @@ OBJS = main.o \
        move_regrow.o \
        ring_fragment_driver.o \
        move_atom.o \
-       chempot.o \
        pair_nrg_variables.o \
        move_ring_flip.o \
        move_identity_switch.o \
        widom_subdriver.o \
        widom_insert.o \
        write_widom_properties.o \
-       load_next_frame.o \
+       trajectory_reader_routines.o \
        pregen_control.o \
        pregen_driver.o \
        sector_routines.o \
diff --git a/Src/Makefile.pgfortran.openMP b/Src/Makefile.pgfortran.openMP
index a819d894..53a2be9b 100755
--- a/Src/Makefile.pgfortran.openMP
+++ b/Src/Makefile.pgfortran.openMP
@@ -128,14 +128,13 @@ OBJS = main.o \
        move_regrow.o \
        ring_fragment_driver.o \
        move_atom.o \
-       chempot.o \
        pair_nrg_variables.o \
        move_ring_flip.o \
        move_identity_switch.o \
        widom_subdriver.o \
        widom_insert.o \
        write_widom_properties.o \
-       load_next_frame.o \
+       trajectory_reader_routines.o \
        pregen_control.o \
        pregen_driver.o \
        sector_routines.o \
diff --git a/Src/Makefile.pgfortran.openMP.noXTC b/Src/Makefile.pgfortran.openMP.noXTC
index b71d0477..543c78c2 100755
--- a/Src/Makefile.pgfortran.openMP.noXTC
+++ b/Src/Makefile.pgfortran.openMP.noXTC
@@ -116,14 +116,13 @@ OBJS = main.o \
        move_regrow.o \
        ring_fragment_driver.o \
        move_atom.o \
-       chempot.o \
        pair_nrg_variables.o \
        move_ring_flip.o \
        move_identity_switch.o \
        widom_subdriver.o \
        widom_insert.o \
        write_widom_properties.o \
-       load_next_frame.o \
+       trajectory_reader_routines.o \
        pregen_control.o \
        pregen_driver.o \
        sector_routines.o \
diff --git a/Src/atompair_nrg_table_routines.f90 b/Src/atompair_nrg_table_routines.f90
index f0e92968..26bbe50f 100755
--- a/Src/atompair_nrg_table_routines.f90
+++ b/Src/atompair_nrg_table_routines.f90
@@ -28,7 +28,8 @@ MODULE Atompair_nrg_table_routines
   !********************************************************************************
   USE Global_Variables
   USE Type_Definitions
-  USE Energy_Routines , ONLY: AtomPair_VdW_Energy_Vector
+  USE Io_Utilities
+  USE Energy_Routines , ONLY: AtomPair_VdW_Energy_Vector, Recip_Sqrt
   USE Input_Routines , ONLY: Get_Solvent_Info
   USE Pair_Emax_Estimation, ONLY: Read_Pair_rminsq
   !$ USE OMP_LIB
@@ -57,7 +58,7 @@ SUBROUTINE Allocate_Atompair_tables
                 IF (ALLOCATED(atompair_rminsq_table)) DEALLOCATE(atompair_rminsq_table)
                 IF (ALLOCATED(typepair_solute_indices)) DEALLOCATE(typepair_solute_indices)
                 IF (ALLOCATED(typepair_wsolute_indices)) DEALLOCATE(typepair_wsolute_indices)
-                IF (ALLOCATED(typepair_solvent_indices)) DEALLOCATE(typepair_solute_indices)
+                IF (ALLOCATED(typepair_solvent_indices)) DEALLOCATE(typepair_solvent_indices)
                 ALLOCATE(typepair_solute_indices(0:nbr_atomtypes))
                 ALLOCATE(typepair_wsolute_indices(0:nbr_atomtypes))
                 ALLOCATE(typepair_solvent_indices(0:nbr_atomtypes))
@@ -127,7 +128,8 @@ SUBROUTINE Allocate_Atompair_tables
                 wsolute_maxind = wsolute_nextbase
                 IF (precalc_atompair_nrg) THEN
                         ALLOCATE(typepair_nrg_table(atompair_nrg_res,0:solvent_ntypes,0:solute_ntypes,nbr_boxes))
-                        ALLOCATE(atompair_nrg_table(atompair_nrg_res,solvent_nextbase,solute_nextbase,nbr_boxes))
+                        ALLOCATE(atompair_nrg_table(atompair_nrg_res+1,solvent_nextbase,solute_nextbase,nbr_boxes))
+                        ALLOCATE(atompair_nrg_table_reduced(0:(atompair_nrg_res+1)*solvent_nextbase-1,solute_nextbase,nbr_boxes))
                         typepair_nrg_table = 0.0_DP
                 END IF
                 IF (est_atompair_rminsq) THEN
@@ -142,6 +144,7 @@ SUBROUTINE Allocate_Atompair_tables
                 END IF
                 IF (read_atompair_rminsq) THEN 
                         ALLOCATE(atompair_rminsq_table(solvent_nextbase,wsolute_nextbase,nbr_boxes))
+                        ALLOCATE(sp_atompair_rminsq_table(solvent_nextbase,wsolute_nextbase,nbr_boxes))
                 END IF
         END SUBROUTINE Allocate_Atompair_tables
 
@@ -157,11 +160,14 @@ SUBROUTINE Create_Atompair_Nrg_table
                 REAL(DP), DIMENSION(atompair_nrg_res, nbr_boxes) :: f2
                 REAL(DP) :: solvent_charges(solvent_maxind), solute_charges(solute_maxind)
                 INTEGER :: solvent_typeindvec(solvent_maxind), solute_typeindvec(solute_maxind)
-                REAL(DP), DIMENSION(atompair_nrg_res) :: rsq_mp_vector, rsq_lb_vector, rij, alpha_rij
+                REAL(DP), DIMENSION(atompair_nrg_res) :: rsq_mp_vector, rsq_lb_vector, rij, inv_rij, alpha_rij
+                REAL(DP), DIMENSION(:,:,:), ALLOCATABLE :: atompair_nrg_table_reduced_dp
                 IF (.NOT. precalc_atompair_nrg) RETURN
                 nsolutes = 0
                 nsolvents = 0
                 rsq_step = (MAXVAL(rcut_cbmcsq)-rcut_lowsq)/atompair_nrg_res
+                inv_rsq_step = 1.0_DP/rsq_step
+                inv_rsq_step_sp = REAL(inv_rsq_step,SP)
                 rsq_shifter = rcut_lowsq - rsq_step
                 DO i = 1, atompair_nrg_res
                         rsq_lb_vector(i) = rsq_shifter + rsq_step*i
@@ -197,20 +203,23 @@ SUBROUTINE Create_Atompair_Nrg_table
                                 = typepair_solute_indices(nonbond_list(1:natoms(is),is)%atom_type_number)
                 END DO
 
-                rij = SQRT(rsq_mp_vector)
+                inv_rij = 1.0_DP/SQRT(rsq_mp_vector)
+                rij = inv_rij*rsq_mp_vector
 
                 DO ibox = 1, nbr_boxes
-                        IF (int_charge_sum_style(ibox) == charge_ewald) THEN
+                        IF (cbmc_charge_sf_flag) THEN
+                                f2(:,ibox) = inv_rij - 2.0_DP/rcut_cbmc(ibox) + rij/rcut_cbmcsq(ibox)
+                        ELSEIF (int_charge_sum_style(ibox) == charge_ewald) THEN
                                 alpha_rij = alpha_ewald(ibox) * rij
-                                f2(:,ibox) = ERFC(alpha_rij)/rij
+                                f2(:,ibox) = ERFC(alpha_rij)*inv_rij
                         ELSEIF (int_charge_sum_style(ibox) == charge_dsf) THEN
                                 alpha_rij = alpha_dsf(ibox)*rij
                                 f2(:,ibox) = &
                                         dsf_factor2(ibox)*(rij-rcut_coul(ibox)) - &
                                         dsf_factor1(ibox) + &
-                                        ERFC(alpha_rij)/rij
+                                        ERFC(alpha_rij)*inv_rij
                         ELSEIF (int_charge_sum_style(ibox) == charge_cut) THEN
-                                f2(:,ibox) = 1.0_DP/rij
+                                f2(:,ibox) = inv_rij
                         ELSE
                                 f2(:,ibox) = 0.0_DP
                         END IF
@@ -240,7 +249,7 @@ SUBROUTINE Create_Atompair_Nrg_table
                 !$OMP END PARALLEL
 
                 !$OMP WORKSHARE
-                atompair_nrg_table = typepair_nrg_table(:,solvent_typeindvec,solute_typeindvec,:)
+                atompair_nrg_table(1:atompair_nrg_res,:,:,:) = typepair_nrg_table(:,solvent_typeindvec,solute_typeindvec,:)
                 !$OMP END WORKSHARE
 
                 !$OMP PARALLEL DEFAULT(SHARED)
@@ -248,22 +257,145 @@ SUBROUTINE Create_Atompair_Nrg_table
                 DO ibox = 1, nbr_boxes
                         DO ti_solute = 1, solute_maxind
                                 DO ti_solvent = 1, solvent_maxind
-                                        atompair_nrg_table(:,ti_solvent,ti_solute,ibox) = &
-                                                atompair_nrg_table(:,ti_solvent,ti_solute,ibox) + &
+                                        atompair_nrg_table(1:atompair_nrg_res,ti_solvent,ti_solute,ibox) = &
+                                                atompair_nrg_table(1:atompair_nrg_res,ti_solvent,ti_solute,ibox) + &
                                                 f2(:,ibox)*cfqq(ti_solvent,ti_solute)
                                 END DO
                         END DO
                 END DO
                 !$OMP END DO
+                ALLOCATE(atompair_nrg_table_reduced_dp(SIZE(atompair_nrg_table_reduced,1),&
+                        SIZE(atompair_nrg_table_reduced,2), &
+                        SIZE(atompair_nrg_table_reduced,3)))
+                !$OMP WORKSHARE
+                atompair_nrg_table(atompair_nrg_res+1,:,:,:) = 0.0
+                atompair_nrg_table_reduced_dp = RESHAPE(atompair_nrg_table, SHAPE(atompair_nrg_table_reduced))
+                atompair_nrg_table_reduced = REAL(atompair_nrg_table_reduced_dp,SP)
+                !$OMP END WORKSHARE
                 !$OMP END PARALLEL
 
 
+
         END SUBROUTINE Create_Atompair_Nrg_table
 
         SUBROUTINE Setup_Atompair_tables
+                LOGICAL, DIMENSION(0:nbr_atomtypes) :: solvent_lvec, wsolute_lvec
+                INTEGER :: ia, is, solvent_tcount, wsolute_tcount, itype, ibox
+                INTEGER, DIMENSION(0:nbr_atomtypes) :: ti_which_big_atom
+                INTEGER, DIMENSION(nbr_atomtypes+1) :: big_atom_ti_list
+                INTEGER :: ifrag, ia_frag, ia_frag_ti, biggest_atom, biggest_atom_ti, i_big_atom
+                REAL(DP) :: ia_frag_rminsq_sum, biggest_atom_rminsq_sum
                 solvent_maxind = 1
                 rsqmin_res_d = 1
                 solvent_maxind_d = 1
+
+
+                n_big_atoms = 0
+                IF (calc_rmin_flag) THEN
+                        IF (need_solvents) CALL Get_Solvent_Info
+                        solvent_lvec = .FALSE.
+                        wsolute_lvec = .FALSE.
+                        DO is = 1, nspecies
+                              IF (species_list(is)%l_solvent) THEN
+                                      DO ia = 1, natoms(is)
+                                              solvent_lvec(nonbond_list(ia,is)%atom_type_number) = .TRUE.
+                                      END DO
+                              END IF
+                              IF (species_list(is)%l_wsolute) THEN
+                                      DO ia = 1, natoms(is)
+                                              wsolute_lvec(nonbond_list(ia,is)%atom_type_number) = .TRUE.
+                                      END DO
+                              END IF
+                        END DO
+                        n_solvent_atomtypes = COUNT(solvent_lvec)
+                        n_wsolute_atomtypes = COUNT(wsolute_lvec)
+                        ALLOCATE(which_solvent_atomtypes(n_solvent_atomtypes))
+                        ALLOCATE(which_solvent_atomtypes_inv(0:nbr_atomtypes))
+                        ALLOCATE(which_wsolute_atomtypes(n_wsolute_atomtypes))
+                        ALLOCATE(which_wsolute_atomtypes_inv(0:nbr_atomtypes)) ! inverse function of above
+                        which_solvent_atomtypes_inv = -2000000000 ! meant to cause invalid index if used where it isn't properly set
+                        which_wsolute_atomtypes_inv = -2000000000 ! meant to cause invalid index if used where it isn't properly set
+                        solvent_tcount = 0
+                        wsolute_tcount = 0
+                        DO itype = 0, nbr_atomtypes
+                              IF (solvent_lvec(itype)) THEN
+                                      solvent_tcount = solvent_tcount + 1
+                                      which_solvent_atomtypes(solvent_tcount) = itype
+                                      which_solvent_atomtypes_inv(itype) = solvent_tcount
+                              END IF
+                              IF (wsolute_lvec(itype)) THEN
+                                      wsolute_tcount = wsolute_tcount + 1
+                                      which_wsolute_atomtypes(wsolute_tcount) = itype
+                                      which_wsolute_atomtypes_inv(itype) = wsolute_tcount
+                              END IF
+                        END DO
+                        max_rmin = DSQRT(MAXVAL(rminsq_table))
+                        sp_rminsq_table = REAL(rminsq_table,SP)
+                        IF (cavity_biasing_flag) THEN
+                                ti_which_big_atom = -999999
+                                DO is = 1, nspecies
+                                        IF (.NOT. species_list(is)%l_wsolute) CYCLE
+                                        DO ifrag = 1, nfragments(is)
+                                                biggest_atom = 1
+                                                biggest_atom_ti = nonbond_list(frag_list(ifrag,is)%atoms(1),is)%atom_type_number
+                                                biggest_atom_rminsq_sum = SUM(rminsq_table(:,biggest_atom_ti))
+                                                DO ia_frag = 2, frag_list(ifrag,is)%natoms
+                                                        ia_frag_ti = &
+                                                                nonbond_list(frag_list(ifrag,is)%atoms(ia_frag),is)%atom_type_number
+                                                        ia_frag_rminsq_sum = SUM(rminsq_table(:,ia_frag_ti))
+                                                        IF (ia_frag_rminsq_sum>biggest_atom_rminsq_sum) THEN
+                                                                biggest_atom = ia_frag
+                                                                biggest_atom_ti = ia_frag_ti
+                                                                biggest_atom_rminsq_sum = ia_frag_rminsq_sum
+                                                        END IF
+                                                END DO
+                                                i_big_atom = ti_which_big_atom(biggest_atom_ti)
+                                                IF (i_big_atom < 0) THEN
+                                                        n_big_atoms = n_big_atoms+1
+                                                        i_big_atom = n_big_atoms
+                                                        ti_which_big_atom(biggest_atom_ti) = n_big_atoms
+                                                        big_atom_ti_list(n_big_atoms) = biggest_atom_ti
+                                                END IF
+                                                frag_list(ifrag,is)%i_big_atom = i_big_atom
+                                                frag_list(ifrag,is)%ia_frag_big_atom = biggest_atom
+                                        END DO
+                                END DO
+                        END IF
+                        ALLOCATE(atomtype_max_rminsq(0:nbr_atomtypes))
+                        ALLOCATE(atomtype_min_rminsq(n_solvent_atomtypes,0:n_big_atoms))
+                        ALLOCATE(atomtype_max_rminsq_sp(0:nbr_atomtypes))
+                        atomtype_max_rminsq = MAXVAL(rminsq_table(:, &
+                                which_wsolute_atomtypes),2)
+                        atomtype_min_rminsq(:,0) = MINVAL(rminsq_table(which_solvent_atomtypes, &
+                                which_wsolute_atomtypes),2)
+                        IF (cavity_biasing_flag) THEN
+                                atomtype_min_rminsq(:,1:n_big_atoms) = rminsq_table(which_solvent_atomtypes,big_atom_ti_list(1:n_big_atoms))
+                        END IF
+                        atomtype_max_rminsq_sp = REAL(atomtype_max_rminsq,SP)
+                        box_list%rcut_low_max = SQRT(MAXVAL(atomtype_min_rminsq))
+                        box_list%ideal_bitcell_length = box_list%rcut_low_max / SQRT(902.0_DP) ! RHS scalar LHS vector with one element per box
+                        solvents_or_types_maxind = n_solvent_atomtypes
+                ELSE
+                        box_list%rcut_low_max = rcut_low
+                        box_list%ideal_bitcell_length = rcut_low / SQRT(902.0_DP)
+                        solvents_or_types_maxind = 1
+                END IF
+                IF (bitcell_flag .AND. .NOT. read_atompair_rminsq) THEN
+                      DO ibox = 1, nbr_boxes
+                              WRITE(logunit, '(A,F5.3,A)') "For box " // TRIM(Int_To_String(ibox)) // ", computed ideal bitcell length = ", &
+                                      box_list(ibox)%ideal_bitcell_length, " Angstroms"
+                      END DO
+                END IF
+                box_list%ideal_bitcell_length = MAX(min_ideal_bitcell_length,box_list%ideal_bitcell_length)
+                IF (bitcell_flag .AND. .NOT. read_atompair_rminsq) THEN
+                      DO ibox = 1, nbr_boxes
+                              WRITE(logunit, '(A,F5.3,A)') "Setting box " // TRIM(Int_To_String(ibox)) // " ideal bitcell length = ", &
+                                      box_list(ibox)%ideal_bitcell_length, " Angstroms"
+                      END DO
+                END IF
+
+
+
                 IF (.NOT. (precalc_atompair_nrg .OR. read_atompair_rminsq .OR. est_atompair_rminsq)) RETURN
                 CALL Allocate_Atompair_Tables
                 IF (precalc_atompair_nrg) CALL Create_Atompair_Nrg_table
diff --git a/Src/chempot.f90 b/Src/chempot.f90
deleted file mode 100755
index e6fb4c94..00000000
--- a/Src/chempot.f90
+++ /dev/null
@@ -1,224 +0,0 @@
-!*******************************************************************************
-!   Cassandra - An open source atomistic Monte Carlo software package
-!   developed at the University of Notre Dame.
-!   http://cassandra.nd.edu
-!   Prof. Edward Maginn <ed@nd.edu>
-!   Copyright (2013) University of Notre Dame du Lac
-!
-!   This program is free software: you can redistribute it and/or modify
-!   it under the terms of the GNU General Public License as published by
-!   the Free Software Foundation, either version 3 of the License, or
-!   (at your option) any later version.
-!
-!   This program is distributed in the hope that it will be useful,
-!   but WITHOUT ANY WARRANTY; without even the implied warranty of
-!   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-!   GNU General Public License for more details.
-!
-!   You should have received a copy of the GNU General Public License
-!   along with this program.  If not, see <http://www.gnu.org/licenses/>.
-!*******************************************************************************
-
-SUBROUTINE Chempot(this_box,is)
-
-  !*****************************************************************************
-  !
-  ! The subroutine calculates the chemical potential of the species in the system.  
-  !
-  ! Called by
-  !
-  !   nvtmc_driver.f90
-  !   nptmc_driver.f90
-  !   gemc_particle_transfer.f90
-  !
-  ! Revision history
-  !
-  !   12/10/13  : Beta Release
-  ! 
-  !*****************************************************************************
-
-  USE Global_Variables
-  USE Energy_Routines
-  USE IO_Utilities
-  USE Random_Generators
-  USE Rotation_Routines
-  USE Type_Definitions
-  USE Simulation_Properties
-  USE Fragment_Growth
-
-  IMPLICIT NONE
-
-  INTEGER :: is, alive, this_box, i_type, i, anchor_dummy
-
-  INTEGER, ALLOCATABLE :: frag_order(:)
-
-  REAL(DP) :: dx, dy, dz 
-  REAL(DP) :: E_bond, E_angle, E_dihedral, E_intra_vdw, E_intra_qq
-  REAL(DP) :: E_inter_vdw, E_inter_qq, E_improper, E_periodic_qq
-  REAL(DP) :: delta_e, E_reciprocal_move, E_self_move, E_lrc
-  REAL(DP) :: prefact, CP_energy, nrg_ring_frag_tot
-
-  REAL(DP) :: ln_pseq, ln_pbias, this_lambda, nrg_ring_frag_out
-
-  LOGICAL :: inter_overlap ,cbmc_overlap, intra_overlap
-
-  delta_e = 0.0_DP
-  prefact = 1.0_DP
-  nrg_ring_frag_tot = 0.0_DP
-  ln_pseq = 0.0_DP
-  ln_pbias = 0.0_DP
-
-  ntrials(is,this_box)%cpcalc = ntrials(is,this_box)%cpcalc + 1
- 
-  
-  IF ( locate(max_molecules(is)+1,is,this_box) == 0 ) THEN
-     locate(max_molecules(is)+1,is,this_box) = max_molecules(is)+1
-  END IF
-
-  alive = locate(max_molecules(is)+1,is,this_box)
-  molecule_list(alive,is)%which_box = this_box
-  molecule_list(alive,is)%frac = 1.0_DP
-  molecule_list(alive,is)%molecule_type = int_normal
-  molecule_list(alive,is)%live = .TRUE.
-
-  cbmc_overlap = .FALSE.
-
-  IF(species_list(is)%fragment) THEN
-
-     del_flag = .FALSE.
-     get_fragorder = .TRUE.
-     this_lambda = molecule_list(alive,is)%frac
-     anchor_dummy = 0
-     ALLOCATE(frag_order(nfragments(is)))
-     CALL Build_Molecule(alive,is,this_box,frag_order,this_lambda, &
-             ln_pseq,ln_pbias,nrg_ring_frag_out,cbmc_overlap)
-     DEALLOCATE(frag_order)
-
-  ELSE
-
-     molecule_list(alive,is)%xcom = species_list(is)%xcom
-     molecule_list(alive,is)%ycom = species_list(is)%ycom
-     molecule_list(alive,is)%zcom = species_list(is)%zcom
-
-     atom_list(:,alive,is)%rxp = init_list(:,1,is)%rxp
-     atom_list(:,alive,is)%ryp = init_list(:,1,is)%ryp
-     atom_list(:,alive,is)%rzp = init_list(:,1,is)%rzp
-
-     atom_list(:,alive,is)%exist = .TRUE.
-
-     CALL Rotate_Molecule_Eulerian(alive,is)
-
-     IF ( box_list(this_box)%int_box_shape == int_cubic ) THEN
- 
-        molecule_list(alive,is)%xcom = (rranf() - 0.5_DP) * box_list(this_box)%length(1,1)
-        molecule_list(alive,is)%ycom = (rranf() - 0.5_DP) * box_list(this_box)%length(2,2)
-        molecule_list(alive,is)%zcom = (rranf() - 0.5_DP) * box_list(this_box)%length(3,3)
-
-     END IF
-
-     dx = molecule_list(alive,is)%xcom - species_list(is)%xcom
-     dy = molecule_list(alive,is)%ycom - species_list(is)%ycom
-     dz = molecule_list(alive,is)%zcom - species_list(is)%zcom
-
-     atom_list(:,alive,is)%rxp = atom_list(:,alive,is)%rxp + dx
-     atom_list(:,alive,is)%ryp = atom_list(:,alive,is)%ryp + dy
-     atom_list(:,alive,is)%rzp = atom_list(:,alive,is)%rzp + dz
-
-  END IF
-
-  IF (cbmc_overlap) THEN
-        ! There is nothing to be done.
-        
-     molecule_list(alive,is)%live = .FALSE.
-     atom_list(:,alive,is)%exist = .FALSE.
-     molecule_list(alive,is)%molecule_type = int_none
-
-     RETURN
-
-  END IF
-     
-  CALL Get_COM(alive,is)
-
-  ! compute the distance of atom farthest from COM
-
-  CALL Compute_Max_COM_Distance(alive,is)
-
-  ! Intra molecule energy
-
-!  CALL Compute_Molecule_Bond_Energy(alive,is,E_bond)
-!  CALL Compute_Molecule_Angle_Energy(alive,is,E_angle)
-!  CALL Compute_Molecule_Dihedral_Energy(alive,is,E_dihedral)
-!  CALL Compute_Molecule_Improper_Energy(alive,is,E_improper)
-
-!  delta_e = E_bond + E_angle + E_dihedral + E_improper + delta_e
-
-
-  ! Nonbonded energy
-  CALL Compute_Molecule_Nonbond_Intra_Energy(alive,is,E_intra_vdw,E_intra_qq, &
-       E_periodic_qq,intra_overlap)
-  CALL Compute_Molecule_Nonbond_Inter_Energy(alive,is,E_inter_vdw,E_inter_qq, &
-       inter_overlap)
-  E_inter_qq = E_inter_qq + E_periodic_qq
-
-!    delta_e = delta_e + E_inter_vdw + E_inter_qq
-  delta_e = delta_e + E_intra_vdw + E_intra_qq + E_inter_vdw + E_inter_qq
-
-  IF (int_charge_style(this_box) == charge_coul) THEN
-       IF ( int_charge_sum_style(this_box) == charge_ewald .AND. has_charge(is)) THEN
-          CALL Update_System_Ewald_Reciprocal_Energy(alive,is,this_box, &
-               int_insertion,E_reciprocal_move)
-
-          delta_e = delta_e + (E_reciprocal_move-energy(this_box)%reciprocal)
-
-       END IF
-       CALL Compute_Molecule_Self_Energy(alive,is,this_box,E_self_move)
-       delta_e = delta_e + E_self_move 
-  END IF
-
-  IF (int_vdw_sum_style(this_box) == vdw_cut_tail) THEN
-     nbeads_in(:) = nint_beads(:,this_box)
-
-     DO i = 1, natoms(is)
-        i_type = nonbond_list(i,is)%atom_type_number
-        nint_beads(i_type,this_box) = nint_beads(i_type,this_box) + 1
-     END DO
-     CALL Compute_LR_correction(this_box,e_lrc)
-     delta_e = delta_e + e_lrc - energy(this_box)%lrc
-     nint_beads(:,this_box) = nbeads_in(:)
-  END IF
-
-  CP_energy = delta_e 
-
-  IF(int_sim_type == sim_npt) THEN
-    prefact = box_list(this_box)%volume / REAL(nmols(is,this_box) + 1, DP)
-  END IF
-
-!  IF(species_list(is)%fragment) THEN
-!     prefact = prefact / (P_forward * kappa ** nfragments(is))
-     ! subtract off the angle energy as this was used in biasing
-     ! the branch points. Also reduce the total energy by 
-     ! ring biasing energy if any
-!     CP_energy = CP_energy - E_angle - nrg_ring_frag_tot
-!     IF(rx_Flag) CP_energy = CP_energy - E_dihedral
-!  END IF
-
-  chpot(is,this_box) = chpot(is,this_box) + prefact *  DEXP(-beta(this_box) * CP_energy)
-
-  molecule_list(alive,is)%live = .FALSE.
-  atom_list(:,alive,is)%exist = .FALSE.
-  molecule_list(alive,is)%molecule_type = int_none
-
-  IF ( int_charge_sum_style(this_box) == charge_ewald .AND. has_charge(is)) THEN
-        ! restore cos_sum and sin_sum. Note that these were changed when difference in
-        ! reciprocal energies was computed
-     !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-     cos_sum(:,this_box) = cos_sum_old(:,this_box)
-     sin_sum(:,this_box) = sin_sum_old(:,this_box)
-     !$OMP END PARALLEL WORKSHARE
-  END IF
-
-
-  RETURN
-
-END SUBROUTINE Chempot    
-
diff --git a/Src/compute_cell_dimensions.f90 b/Src/compute_cell_dimensions.f90
index 21d7f38f..c4bb7acb 100755
--- a/Src/compute_cell_dimensions.f90
+++ b/Src/compute_cell_dimensions.f90
@@ -51,6 +51,58 @@ SUBROUTINE Compute_Cell_Dimensions(box_nbr)
   REAL(DP) :: bxc1, bxc2, bxc3
   REAL(DP) :: cxa1, cxa2, cxa3
   REAL(DP) :: det, inv_det
+  REAL(DP) :: inv_H_T(3,3), unit_cross(3)
+  LOGICAL :: change_basis, left_basis
+  REAL(DP), DIMENSION(3,3) :: H, orig_H_inv
+  REAL(DP), DIMENSION(3) :: orig_abc, orig_abcsq
+  REAL(DP) :: ax,bx,by,cx,cy,cz
+
+  ! Cassandra now automatically converts the cell matrix to the form
+  !     required by LAMMPS since it allows better efficiency enhancement
+  !     due to being an upper triangular matrix (with 3 elements guaranteed 
+  !     to be zero). The cell matrix inverse is also upper triangular.
+  !     This should especially help with vectorized conversions between real and
+  !     fractional coordinates, since only 6 matrix elements must be stored in 6
+  !     vector registers, leaving more vector registers available to store intermediates
+  !     and coordinates or displacement components and possibly perform other operations
+  !     without needing to load constant elements to vector registers every vector iteration 
+  !     (there are 16 ymm vector registers in AVX2).
+  !     The diagonal elements are also all positive in the new basis.
+  ! The cell matrix is only converted if it does not already meet the new basis criteria.
+  ! orig_length_inv and basis_converter in the box object are used elsewhere
+  !     to transform the coordinates read from trajectory, checkpoint, or configuration files
+  !     to the new basis if the cell matrix provided does not already meet the requirements.
+  H = box_list(box_nbr)%length
+  box_list(box_nbr)%orig_length = H
+  left_basis = DOT_PRODUCT(Cross_Product(H(:,1),H(:,2)),H(:,3)) < 0.0_DP
+  change_basis = &
+          ANY(ABS((/ H(2,1),H(3,1),H(3,2) /))>tiny_number .OR. &
+          (/ H(1,1), H(2,2), H(3,3) /) <= 0.0_DP) .OR. &
+          left_basis
+  IF (change_basis) THEN
+          orig_H_inv(1,:) = Cross_Product(H(:,2),H(:,3))
+          orig_H_inv(2,:) = Cross_Product(H(:,3),H(:,1))
+          orig_H_inv(3,:) = Cross_Product(H(:,1),H(:,2))
+          IF (left_basis) H(:,3) = -H(:,3)
+          orig_abcsq(1) = DOT_PRODUCT(H(:,1),H(:,1))
+          orig_abcsq(2) = DOT_PRODUCT(H(:,2),H(:,2))
+          orig_abcsq(3) = DOT_PRODUCT(H(:,3),H(:,3))
+          orig_abc = SQRT(orig_abcsq)
+          ax = orig_abc(1)
+          bx = DOT_PRODUCT(H(:,2),H(:,1))/orig_abc(1)
+          by = SQRT(orig_abcsq(2)-bx*bx)
+          cx = DOT_PRODUCT(H(:,3),H(:,1))/orig_abc(1)
+          cy = (DOT_PRODUCT(H(:,2),H(:,3))-bx*cx)/by
+          cz = SQRT(orig_abcsq(3) - cx*cx - cy*cy)
+          H = 0.0_DP
+          H(1,1) = ax
+          H(1,2) = bx
+          H(2,2) = by
+          H(1,3) = cx
+          H(2,3) = cy
+          H(3,3) = cz
+          box_list(box_nbr)%length = H
+  END IF
 
   !Cell basis vector lengths
   box_list(box_nbr)%basis_length(1) = &
@@ -180,6 +232,14 @@ SUBROUTINE Compute_Cell_Dimensions(box_nbr)
 
   ! The adjoint divided by the determinant is the inverse of the cell basis matrix
   box_list(box_nbr)%length_inv = box_list(box_nbr)%length_inv * inv_det
+  IF (change_basis) THEN
+          orig_H_inv = orig_H_inv * inv_det
+          box_list(box_nbr)%orig_length_inv = orig_H_inv
+          box_list(box_nbr)%basis_converter = MATMUL(H,orig_H_inv)
+  ELSE
+          box_list(box_nbr)%orig_length_inv = box_list(box_nbr)%length_inv
+  END IF
+  box_list(box_nbr)%basis_changed = change_basis
 
   ! Compute half of the box length to be used in Fold_Molecule
 
@@ -187,5 +247,19 @@ SUBROUTINE Compute_Cell_Dimensions(box_nbr)
   box_list(box_nbr)%hlength(2,2) =0.5_DP * box_list(box_nbr)%basis_length(2)
   box_list(box_nbr)%hlength(3,3) =0.5_DP * box_list(box_nbr)%basis_length(3)
 
+  ! Compute face distance of transpose of inverse of cell matrix
+  ! This is used for reciprocal lattice vector setup
+  inv_H_T = TRANSPOSE(box_list(box_nbr)%length_inv)
+  unit_cross = Cross_Product(inv_H_T(:,2),inv_H_T(:,3))
+  unit_cross = unit_cross/SQRT(DOT_PRODUCT(unit_cross,unit_cross))
+  box_list(box_nbr)%invT_face_distance(1) = ABS(DOT_PRODUCT(inv_H_T(:,1),unit_cross))
+  unit_cross = Cross_Product(inv_H_T(:,3),inv_H_T(:,1))
+  unit_cross = unit_cross/SQRT(DOT_PRODUCT(unit_cross,unit_cross))
+  box_list(box_nbr)%invT_face_distance(2) = ABS(DOT_PRODUCT(inv_H_T(:,2),unit_cross))
+  unit_cross = Cross_Product(inv_H_T(:,1),inv_H_T(:,2))
+  unit_cross = unit_cross/SQRT(DOT_PRODUCT(unit_cross,unit_cross))
+  box_list(box_nbr)%invT_face_distance(3) = ABS(DOT_PRODUCT(inv_H_T(:,3),unit_cross))
+
       
 END SUBROUTINE Compute_Cell_Dimensions
+
diff --git a/Src/create_nonbond_table.f90 b/Src/create_nonbond_table.f90
index cd04cefd..9ef2917b 100755
--- a/Src/create_nonbond_table.f90
+++ b/Src/create_nonbond_table.f90
@@ -66,13 +66,17 @@ SUBROUTINE Create_Nonbond_Table
   ! Steele potential
   REAL(DP) :: sigma_ss, eps_ss, rho_s, delta_s, eps_sf
   !custom mixing rules
-  INTEGER :: ierr,line_nbr,nbr_entries, is_1, is_2, ia_1, ia_2, itype_custom, jtype_custom
+  INTEGER :: ierr,line_nbr,nbr_entries, is_1, is_2, ia_1, ia_2, itype_custom, jtype_custom, ibox
   INTEGER ::  i_type1, i_type2
   CHARACTER(STRING_LEN) :: line_string, line_array(60)
 
   REAL(DP) :: min_qprod, min_U_q, U_max, lambda
+  INTEGER, DIMENSION(4), PARAMETER :: order2 = (/ 2, 3, 1, 4 /)
+  INTEGER, DIMENSION(4) :: shape1, shape2
+  REAL(DP) :: sixbycut, eps, sigma, negsigsq, negsigbyr2, rterm, rterm2
 
 !******************************************************************************
+  l_zerotype_present = .FALSE.
   IF (verbose_log) THEN
      WRITE(logunit,*)
      WRITE(logunit,'(A)') 'Nonbond tables'
@@ -99,7 +103,9 @@ SUBROUTINE Create_Nonbond_Table
         
         repeat_type = .FALSE.
 
-        IF (nonbond_list(ia,is)%vdw_type /= 'NONE') THEN
+        IF (nonbond_list(ia,is)%vdw_type /= 'NONE' .AND. &
+                (ANY(ABS(nonbond_list(ia,is)%vdw_param(1:nbr_vdw_params(is)))>tiny_number) &
+                .OR. mix_rule == 'custom')) THEN
 
            !----------------------------------------------------------------
            ! Determine whether the atomtype has already been accounted for
@@ -140,6 +146,7 @@ SUBROUTINE Create_Nonbond_Table
         ELSE
            ! atom has no atom_type
            nonbond_list(ia,is)%atom_type_number = 0
+           l_zerotype_present = .TRUE.
         ENDIF
         ! Get maximum and minimum charge for atom type
         IF (repeat_type) THEN
@@ -204,11 +211,15 @@ SUBROUTINE Create_Nonbond_Table
   IF (ALLOCATED(temp_atomtypes)) DEALLOCATE(temp_atomtypes)
 
   ! allocate arrays containing vdw parameters for all interaction pairs.
-  ALLOCATE(vdw_param1_table(nbr_atomtypes,nbr_atomtypes), Stat=AllocateStatus)
-  ALLOCATE(vdw_param2_table(nbr_atomtypes,nbr_atomtypes), Stat=AllocateStatus)
-  ALLOCATE(vdw_param3_table(nbr_atomtypes,nbr_atomtypes), Stat=AllocateStatus)
-  ALLOCATE(vdw_param4_table(nbr_atomtypes,nbr_atomtypes), Stat=AllocateStatus)
-  ALLOCATE(vdw_param5_table(nbr_atomtypes,nbr_atomtypes), Stat=AllocateStatus)
+  ALLOCATE(vdw_param1_table(0:nbr_atomtypes,0:nbr_atomtypes), Stat=AllocateStatus)
+  ALLOCATE(vdw_param2_table(0:nbr_atomtypes,0:nbr_atomtypes), Stat=AllocateStatus)
+  ALLOCATE(vdw_param3_table(0:nbr_atomtypes,0:nbr_atomtypes), Stat=AllocateStatus)
+  ALLOCATE(vdw_param4_table(0:nbr_atomtypes,0:nbr_atomtypes), Stat=AllocateStatus)
+  ALLOCATE(vdw_param5_table(0:nbr_atomtypes,0:nbr_atomtypes), Stat=AllocateStatus)
+  ALLOCATE(ppvdwp_table(0:nbr_atomtypes,0:nbr_atomtypes,5,nbr_boxes))
+  ALLOCATE(ppvdwp_table2(5,0:nbr_atomtypes,0:nbr_atomtypes,nbr_boxes))
+  ALLOCATE(ppvdwp_table_sp(0:nbr_atomtypes,0:nbr_atomtypes,5,nbr_boxes))
+  ALLOCATE(ppvdwp_table2_sp(5,0:nbr_atomtypes,0:nbr_atomtypes,nbr_boxes))
 
   IF (AllocateStatus .NE. 0) THEN
      err_msg = ''
@@ -216,8 +227,17 @@ SUBROUTINE Create_Nonbond_Table
      CALL Clean_Abort(err_msg,'create_nonbond_table')
   END IF
 
+  vdw_param1_table = 0.0_DP
+  vdw_param2_table = 0.0_DP
+  vdw_param3_table = 0.0_DP
+  vdw_param4_table = 0.0_DP
+  vdw_param5_table = 0.0_DP
+  ppvdwp_table = 0.0_DP
+  ppvdwp_table2 = 0.0_DP
+
   ! Allocate memory for rminsq_table
   ALLOCATE(rminsq_table(0:nbr_atomtypes, 0:nbr_atomtypes), Stat=AllocateStatus)
+  ALLOCATE(sp_rminsq_table(0:nbr_atomtypes, 0:nbr_atomtypes), Stat=AllocateStatus)
   rminsq_table = rcut_lowsq
 
   IF (AllocateStatus .NE. 0) THEN
@@ -638,6 +658,63 @@ SUBROUTINE Create_Nonbond_Table
      END IF ! mix_rule
   END IF ! nbr_atomtypes > 1
 
-  max_rmin = DSQRT(MAXVAL(rminsq_table))
+
+  DO ibox = 1, nbr_boxes
+        IF (int_vdw_sum_style(ibox) .NE. vdw_charmm .AND. int_vdw_style(ibox) == vdw_lj) THEN
+                sixbycut = 6.0_DP / rcut_vdw(ibox)
+                DO jtype = 1, nbr_atomtypes
+                        DO itype = 1, nbr_atomtypes
+                                eps = vdw_param1_table(itype,jtype)*4.0_DP
+                                sigma = vdw_param2_table(itype,jtype)
+                                negsigsq = -sigma*sigma
+                                ppvdwp_table(itype,jtype,1,ibox) = eps
+                                ppvdwp_table(itype,jtype,2,ibox) = negsigsq
+                                IF (int_vdw_sum_style(ibox) == vdw_cut_shift) THEN
+                                        negsigbyr2 = negsigsq/rcut_vdwsq(ibox)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        ppvdwp_table(itype,jtype,3,ibox) = eps*rterm
+                                ELSE IF (int_vdw_sum_style(ibox) == vdw_cut_shift_force) THEN
+                                        negsigbyr2 = negsigsq/rcut_vdwsq(ibox)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        rterm2 = rterm * rterm
+                                        rterm = rterm + rterm*rterm
+                                        ppvdwp_table(itype,jtype,3,ibox) = eps*rterm
+                                        ppvdwp_table(itype,jtype,4,ibox) = sixbycut * eps * rterm2
+                                END IF
+                        END DO
+                END DO
+        ELSE IF (int_vdw_style(ibox) == vdw_mie) THEN
+                ppvdwp_table(:,:,1,ibox) = vdw_param1_table * &
+                        vdw_param3_table/(vdw_param3_table-vdw_param4_table) * &
+                        (vdw_param3_table/vdw_param4_table)** &
+                        (vdw_param4_table/(vdw_param3_table-vdw_param4_table))
+                ppvdwp_table(:,:,2,ibox) = ppvdwp_table(:,:,1,ibox) * &
+                        vdw_param2_table ** vdw_param4_table
+                ppvdwp_table(:,:,1,ibox) = ppvdwp_table(:,:,1,ibox) * &
+                        vdw_param2_table ** vdw_param3_table
+                l_nonuniform_exponents = ANY(vdw_param3_table(1:,1:) .NE. vdw_param3_table(1,1)) &
+                        .OR. ANY (vdw_param4_table(1:,1:) .NE. vdw_param4_table(1,1))
+                ppvdwp_table(:,:,3,ibox) = vdw_param3_table * -0.5_DP
+                ppvdwp_table(:,:,4,ibox) = vdw_param4_table * -0.5_DP
+                IF (int_vdw_sum_style(ibox) == vdw_cut_shift) THEN
+                        ! this shift constant is positive and meant to be subtracted, not added
+                        ppvdwp_table(:,:,5,ibox) = &
+                                ppvdwp_table(:,:,1,ibox) * rcut_vdwsq(ibox)**ppvdwp_table(:,:,3,ibox) - &
+                                ppvdwp_table(:,:,2,ibox) * rcut_vdwsq(ibox)**ppvdwp_table(:,:,4,ibox)
+                END IF
+        ELSE
+                ppvdwp_table(:,:,1,ibox) = vdw_param1_table
+                ppvdwp_table(:,:,2,ibox) = vdw_param2_table * vdw_param2_table
+        END IF
+  END DO
+
+  !order2 = (/ 2, 3, 1, 4 /)
+
+  ppvdwp_table2 = RESHAPE(ppvdwp_table, SHAPE(ppvdwp_table2), ORDER=order2)
+  ppvdwp_table2_sp = REAL(ppvdwp_table2,SP)
+  ppvdwp_table_sp = REAL(ppvdwp_table,SP)
+
 
 END SUBROUTINE Create_Nonbond_Table
diff --git a/Src/energy_routines.f90 b/Src/energy_routines.f90
index 861443ad..b6c7ef11 100755
--- a/Src/energy_routines.f90
+++ b/Src/energy_routines.f90
@@ -190,6 +190,8 @@ MODULE Energy_Routines
 
   IMPLICIT NONE
 
+  INTEGER, PARAMETER :: kxyz_max_allowed = 1023
+
 CONTAINS
 
   !-----------------------------------------------------------------------------
@@ -347,7 +349,7 @@ SUBROUTINE Compute_Molecule_Angle_Energy(im,is,energy)
   END SUBROUTINE Compute_Molecule_Angle_Energy
   !-----------------------------------------------------------------------------
 
-  SUBROUTINE Compute_Molecule_Dihedral_Energy(molecule,species,energy_dihed)
+  SUBROUTINE Compute_Molecule_Dihedral_Energy(molecule,species,energy_dihed,l_skip_dihed_vec)
     !**************************************************************************
     ! This routine is passed a molecule and species index. It then computes
     !the total dihedral angle energy of this molecule.
@@ -371,6 +373,7 @@ SUBROUTINE Compute_Molecule_Dihedral_Energy(molecule,species,energy_dihed)
   USE Global_Variables
     INTEGER :: molecule,species
     REAL(DP) :: energy_dihed
+    LOGICAL, DIMENSION(species_list(species)%ndihedrals_energetic), INTENT(IN), OPTIONAL :: l_skip_dihed_vec
     INTEGER :: idihed, idihed_rb, atom1, atom2, atom3, atom4
     REAL(DP) :: a0,a1,a2,a3,a4,a5,a6,a7,a8,edihed,phi,cosphi,r12dn,twophi,threephi
     REAL(DP) :: cosphi_vec(0:5)
@@ -387,6 +390,9 @@ SUBROUTINE Compute_Molecule_Dihedral_Energy(molecule,species,energy_dihed)
     cosphi_vec = 0.0_DP
     cosphi_vec(0) = 1.0_DP
     DO idihed_rb = 1, species_list(species)%ndihedrals_rb
+        IF (PRESENT(l_skip_dihed_vec)) THEN
+                IF (l_skip_dihed_vec(idihed_rb)) CYCLE
+        END IF
         IF (.NOT. ALL(these_atoms(dihedral_list(idihed_rb,species)%atom)%exist)) CYCLE
         CALL Get_Dihedral_Angle_COS(idihed_rb,molecule,species,cosphi_vec(1))
         cosphi_vec(2) = cosphi_vec(1)*cosphi_vec(1)
@@ -396,6 +402,9 @@ SUBROUTINE Compute_Molecule_Dihedral_Energy(molecule,species,energy_dihed)
         energy_dihed = energy_dihed + DOT_PRODUCT(cosphi_vec,dihedral_list(idihed_rb,species)%rb_c)
     END DO
     DO idihed=idihed_rb, species_list(species)%ndihedrals_energetic
+       IF (PRESENT(l_skip_dihed_vec)) THEN
+               IF (l_skip_dihed_vec(idihed)) CYCLE
+       END IF
        ! Verify that the atoms of this dihedral exist. This is required
        ! for CBMC moves in which only a part of the molecule is present in
        ! the simulation
@@ -621,12 +630,12 @@ SUBROUTINE Compute_Atom_Nonbond_Energy(ia,im,is, &
                    IF ( .NOT. these_atoms_j(ja)%exist) CYCLE AtomLoop
 
                    ! Interatomic distance
-                   rxij = these_atoms_j(ja)%rxp &
-                        - these_atoms_i(ia)%rxp
-                   ryij = these_atoms_j(ja)%ryp &
-                        - these_atoms_i(ia)%ryp
-                   rzij = these_atoms_j(ja)%rzp &
-                        - these_atoms_i(ia)%rzp
+                   rxij = these_atoms_j(ja)%rp(1) &
+                        - these_atoms_i(ia)%rp(1)
+                   ryij = these_atoms_j(ja)%rp(2) &
+                        - these_atoms_i(ia)%rp(2)
+                   rzij = these_atoms_j(ja)%rp(3) &
+                        - these_atoms_i(ia)%rp(3)
 
                    rijsq = rxij*rxij + ryij*ryij + rzij*rzij
 
@@ -642,12 +651,12 @@ SUBROUTINE Compute_Atom_Nonbond_Energy(ia,im,is, &
                 ! Intermolecular energy so apply pbc.
 
                 ! First compute the parent separation
-                rxijp = these_atoms_j(ja)%rxp &
-                      - these_atoms_i(ia)%rxp
-                ryijp = these_atoms_j(ja)%ryp &
-                      - these_atoms_i(ia)%ryp
-                rzijp = these_atoms_j(ja)%rzp &
-                      - these_atoms_i(ia)%rzp
+                rxijp = these_atoms_j(ja)%rp(1) &
+                      - these_atoms_i(ia)%rp(1)
+                ryijp = these_atoms_j(ja)%rp(2) &
+                      - these_atoms_i(ia)%rp(2)
+                rzijp = these_atoms_j(ja)%rp(3) &
+                      - these_atoms_i(ia)%rp(3)
 
                 ! Now get the minimum image separation
                 CALL Minimum_Image_Separation(this_box,rxijp,ryijp,rzijp, &
@@ -713,9 +722,9 @@ SUBROUTINE Compute_Atom_Nonbond_Intra_Energy(ia,im,is, &
                 nonzero_vdw = vdw_intra_scale(ia,ja,is) > 0.0_DP 
                 nonzero_qq = charge_intra_scale(ia,ja,is) > 0.0_DP 
                 IF (.NOT. (nonzero_vdw .OR. nonzero_qq)) CYCLE
-                rxij = these_atoms(ja)%rxp - these_atoms(ia)%rxp
-                ryij = these_atoms(ja)%ryp - these_atoms(ia)%ryp
-                rzij = these_atoms(ja)%rzp - these_atoms(ia)%rzp
+                rxij = these_atoms(ja)%rp(1) - these_atoms(ia)%rp(1)
+                ryij = these_atoms(ja)%rp(2) - these_atoms(ia)%rp(2)
+                rzij = these_atoms(ja)%rp(3) - these_atoms(ia)%rp(3)
                 !CALL Minimum_Image_Separation(this_box,rxijp,ryijp,rzijp,rxij,ryij,rzij)
                 rijsq = rxij*rxij+ryij*ryij+rzij*rzij
                 IF (rijsq < rcut_lowsq .AND. nonzero_vdw) RETURN
@@ -738,134 +747,6 @@ END SUBROUTINE Compute_Atom_Nonbond_Intra_Energy
 
   !-----------------------------------------------------------------------------
 
-!  SUBROUTINE Compute_Atom_Nonbond_Inter_Energy_Cells(ia,im,is, &
-!       E_inter_vdw,E_inter_qq)
-!        INTEGER, INTENT(IN) :: ia,im,is
-!        REAL(DP), INTENT(OUT):: E_inter_vdw, E_inter_qq
-!        REAL(DP) :: Eij_intra_vdw, Eij_intra_qq, Eij_inter_vdw, Eij_inter_qq
-!        INTEGER :: grid_length(3), this_box, i
-!        INTEGER, DIMENSION(:), POINTER :: xi, yi, zi, thisrange_cells, sector_atom_ID, these_cells
-!        INTEGER :: dummy_ind, dummy, n_cells_occupied, icell, ia_cell, cell_coords(3), secind
-!        !LOGICAL, DIMENSION(:,:,:), POINTER :: filtered_mask, this_mask
-!        LOGICAL :: get_vdw, get_qq
-!        REAL(DP) :: rijsq, rxijp, ryijp, rzijp, rxij, ryij, rzij, cp(3)
-!        TYPE(Atom_Class), POINTER :: atom_ptr
-!        INTEGER, DIMENSION(:), POINTER :: this_yb
-!        INTEGER, DIMENSION(:,:), POINTER :: this_zb
-!        INTEGER :: ix, iy, iz, iy_c, iz_c
-!        E_inter_vdw = 0.0_DP
-!        E_inter_qq = 0.0_DP
-!        IF (widom_active) THEN
-!                cp(1) = widom_atoms(ia)%rxp
-!                cp(2) = widom_atoms(ia)%ryp
-!                cp(3) = widom_atoms(ia)%rzp
-!                this_box = widom_molecule%which_box
-!        ELSE
-!                cp(1) = atom_list(ia,im,is)%rxp
-!                cp(2) = atom_list(ia,im,is)%ryp
-!                cp(3) = atom_list(ia,im,is)%rzp
-!                this_box = molecule_list(im,is)%which_box
-!        END IF
-!        IF (cbmc_flag) THEN
-!                thisrange_cells => cbmcrange_cells(:,this_box)
-!!                this_mask => cbmc_mask(-thisrange_cells(1):thisrange_cells(1), &
-!!                        -thisrange_cells(2):thisrange_cells(2), &
-!!                        -thisrange_cells(3):thisrange_cells(3), &
-!!                        this_box)
-!                this_yb => cbmc_yb(this_box, -thisrange_cells(1):thisrange_cells(1))
-!                this_zb => cbmc_zb(this_box, -thisrange_cells(1):thisrange_cells(1), &
-!                        -thisrange_cells(2):thisrange_cells(2))
-!        ELSE
-!                thisrange_cells => cutrange_cells(:,this_box)
-!!                this_mask => cut_mask(-thisrange_cells(1):thisrange_cells(1), &
-!!                        -thisrange_cells(2):thisrange_cells(2), &
-!!                        -thisrange_cells(3):thisrange_cells(3), &
-!!                        this_box)
-!                this_yb => cut_yb(this_box, -thisrange_cells(1):thisrange_cells(1))
-!                this_zb => cut_zb(this_box, -thisrange_cells(1):thisrange_cells(1), &
-!                        -thisrange_cells(2):thisrange_cells(2))
-!        END IF
-!        grid_length = thisrange_cells*2+1
-!        !filtered_mask => filtered_mask_super(1:grid_length(1),1:grid_length(2),1:grid_length(3))
-!        cell_coords = IDNINT(cp*cell_length_inv(:,this_box))
-!        xi => ci_grid(1,1:grid_length(1))
-!        yi => ci_grid(2,1:grid_length(2))
-!        zi => ci_grid(3,1:grid_length(3))
-!        dummy = cell_coords(1) - thisrange_cells(1)
-!        DO i = 1, grid_length(1)
-!                xi(i) = dummy
-!                dummy = dummy + 1
-!        END DO
-!        dummy = cell_coords(2) - thisrange_cells(2)
-!        DO i = 1, grid_length(2)
-!                yi(i) = dummy
-!                dummy = dummy + 1
-!        END DO
-!        dummy = cell_coords(3) - thisrange_cells(3)
-!        DO i = 1, grid_length(3)
-!                zi(i) = dummy
-!                dummy = dummy + 1
-!        END DO
-!        IF (cell_coords(1)+thisrange_cells(1)>sectorbound(1,this_box)) THEN
-!                dummy_ind = grid_length(1) + 1 - (cell_coords(1)+thisrange_cells(1)-sectorbound(1,this_box))
-!                xi(dummy_ind:grid_length(1)) = xi(dummy_ind:grid_length(1)) - length_cells(1,this_box)
-!        ELSE IF (cell_coords(1)-thisrange_cells(1)<-sectorbound(1,this_box)) THEN
-!                dummy_ind = thisrange_cells(1)-sectorbound(1,this_box) - cell_coords(1)
-!                xi(1:dummy_ind) = xi(1:dummy_ind) + length_cells(1,this_box)
-!        END IF
-!        IF (cell_coords(2)+thisrange_cells(2)>sectorbound(2,this_box)) THEN
-!                dummy_ind = grid_length(2) + 1 - (cell_coords(2)+thisrange_cells(2)-sectorbound(2,this_box))
-!                yi(dummy_ind:grid_length(2)) = yi(dummy_ind:grid_length(2)) - length_cells(2,this_box)
-!        ELSE IF (cell_coords(2)-thisrange_cells(2)<-sectorbound(2,this_box)) THEN
-!                dummy_ind = thisrange_cells(2)-sectorbound(2,this_box) - cell_coords(2)
-!                yi(1:dummy_ind) = yi(1:dummy_ind) + length_cells(2,this_box)
-!        END IF
-!        IF (cell_coords(3)+thisrange_cells(3)>sectorbound(3,this_box)) THEN
-!                dummy_ind = grid_length(3) + 1 - (cell_coords(3)+thisrange_cells(3)-sectorbound(3,this_box))
-!                zi(dummy_ind:grid_length(3)) = zi(dummy_ind:grid_length(3)) - length_cells(3,this_box)
-!        ELSE IF (cell_coords(3)-thisrange_cells(3)<-sectorbound(3,this_box)) THEN
-!                dummy_ind = thisrange_cells(3)-sectorbound(3,this_box) - cell_coords(3)
-!                zi(1:dummy_ind) = zi(1:dummy_ind) + length_cells(3,this_box)
-!        END IF
-!!        filtered_mask = sector_has_atoms(xi,yi,zi,this_box) .AND. this_mask
-!!        n_cells_occupied = COUNT(filtered_mask)
-!!        these_cells => cell_index_vector(1:n_cells_occupied)
-!!        these_cells = PACK(sector_index_map(xi,yi,zi,this_box),filtered_mask)
-!!        DO icell = 1, n_cells_occupied
-!!                secind = these_cells(icell)
-!        iy_c = 1+thisrange_cells(2)
-!        iz_c = 1+thisrange_cells(3)
-!        DO ix = 1, grid_length(1)
-!                DO iy = iy_c-this_yb(ix), iy_c+this_yb(ix)
-!                        DO iz = iz_c - this_zb(ix,iy), iz_c+this_zb(ix,iy)
-!                                IF (.NOT. sector_has_atoms(xi(ix),yi(iy),zi(iz),this_box)) CYCLE
-!                                secind = sector_index_map(xi(ix),yi(iy),zi(iz),this_box)
-!                                DO ia_cell = 1, sector_n_atoms(secind)
-!                                        sector_atom_ID => sector_atoms(ia_cell,secind,:)
-!                                        IF (sector_atom_ID(2) == im .AND. sector_atom_ID(3) == is) CYCLE
-!                                        atom_ptr => atom_list(sector_atom_ID(1),sector_atom_ID(2),sector_atom_ID(3))
-!                                        rxijp = atom_ptr%rxp - cp(1)
-!                                        ryijp = atom_ptr%ryp - cp(2)
-!                                        rzijp = atom_ptr%rzp - cp(3)
-!                                        CALL Minimum_Image_Separation(this_box,rxijp,ryijp,rzijp,rxij,ryij,rzij)
-!                                        rijsq = rxij*rxij+ryij*ryij+rzij*rzij
-!                                        CALL Check_AtomPair_Cutoff(rijsq,get_vdw,get_qq,this_box)
-!                                        ! Compute vdw and q-q energy using if required
-!                                        IF (get_vdw .OR. get_qq) THEN
-!                                           CALL Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq, &
-!                                                sector_atom_ID(3),sector_atom_ID(2),sector_atom_ID(1),is,im,ia,&
-!                                                get_vdw,get_qq, &
-!                                                Eij_intra_vdw,Eij_intra_qq,Eij_inter_vdw,Eij_inter_qq)
-!                                           E_inter_vdw = E_inter_vdw + Eij_inter_vdw
-!                                           E_inter_qq  = E_inter_qq  + Eij_inter_qq
-!                                        END IF
-!                                END DO
-!                        END DO
-!                END DO
-!        END DO
-!!        END DO
-!  END SUBROUTINE Compute_Atom_Nonbond_Inter_Energy_Cells
-
   !-----------------------------------------------------------------------------
   SUBROUTINE Compute_Atom_Nonbond_Inter_Energy_Cells(ia,im,is, &
                   E_inter_vdw, E_inter_qq, overlap, Eij_qq)
@@ -884,30 +765,57 @@ SUBROUTINE Compute_Atom_Nonbond_Inter_Energy_Cells(ia,im,is, &
           INTEGER, DIMENSION(3) :: ci_min, ci_max
           REAL(DP) :: cp(3), dx, dy, dz, dxp, dyp, dzp, rijsq
           REAL(DP) :: Eij_intra_vdw,Eij_intra_qq,Eij_inter_vdw,Eij_inter_qq
-          REAL(DP), DIMENSION(:), POINTER :: cell_length_inv_ptr
+          REAL(DP), DIMENSION(:,:), POINTER :: cell_length_inv_ptr
           LOGICAL :: get_vdw, get_qq, overlap
+          INTEGER :: coord_limit(4)
+
+          REAL(SP) :: cp_sp(3)
           !
           overlap = .TRUE.
           E_inter_vdw = 0.0_DP
           E_inter_qq = 0.0_DP
           Eij_qq_temp = 0.0_DP
           IF (widom_active) THEN
-                  cp(1) = widom_atoms(ia)%rxp
-                  cp(2) = widom_atoms(ia)%ryp
-                  cp(3) = widom_atoms(ia)%rzp
+                  cp(1) = widom_atoms(ia)%rp(1)
+                  cp(2) = widom_atoms(ia)%rp(2)
+                  cp(3) = widom_atoms(ia)%rp(3)
                   this_box = widom_molecule%which_box
           ELSE
-                  cp(1) = atom_list(ia,im,is)%rxp
-                  cp(2) = atom_list(ia,im,is)%ryp
-                  cp(3) = atom_list(ia,im,is)%rzp
+                  cp(1) = atom_list(ia,im,is)%rp(1)
+                  cp(2) = atom_list(ia,im,is)%rp(2)
+                  cp(3) = atom_list(ia,im,is)%rp(3)
                   this_box = molecule_list(im,is)%which_box
           END IF
           IF (cbmc_flag) THEN
-                  cell_length_inv_ptr => cell_length_inv_cbmc(:,this_box)
+                  cp_sp = REAL(cp,SP)
+                  cell_coords = IDNINT(MATMUL(REAL(box_list(this_box)%cell_length_inv,SP),cp))
+                  coord_limit = UBOUND(cbmc_cell_n_interact)
+                  IF (ANY(ABS(cell_coords) > coord_limit(1:3))) THEN
+                          !WRITE(*,*) cell_coords, coord_limit(1:3), cp_sp, MATMUL(REAL(box_list(this_box)%cell_length_inv,SP),cp)
+                          !WRITE(*,*) cp
+                          DO i = 1, 3
+                                IF (cell_coords(i) < LBOUND(cbmc_cell_n_interact,i)) THEN
+                                        cell_coords(i) = cell_coords(i) + SIZE(cbmc_cell_n_interact,i)
+                                        cp_sp(i) = cp_sp(i) + box_list(this_box)%length(i,i)*0.5_DP
+                                ELSE IF (cell_coords(i) > UBOUND(cbmc_cell_n_interact,i)) THEN
+                                        cell_coords(i) = cell_coords(i) - SIZE(cbmc_cell_n_interact,i)
+                                        cp_sp(i) = cp_sp(i) - box_list(this_box)%length(i,i)*0.5_DP
+                                ELSE
+                                        CYCLE
+                                END IF
+                                !WRITE(*,*) cp_sp(i), box_list(this_box)%length(i,i)*0.5_DP
+                          END DO
+                  END IF
+                  ! next line includes qq energy along with vdw
+                  E_inter_vdw = REAL(Compute_Cell_List_CBMC_nrg(cp_sp(1), cp_sp(2), cp_sp(3), &
+                          cell_coords(1),cell_coords(2),cell_coords(3),ia,is,this_box),DP)
+                  overlap = .FALSE.
+                  RETURN
+                  !cell_length_inv_ptr => cell_length_inv_cbmc(:,:,this_box)
           ELSE
-                  cell_length_inv_ptr => cell_length_inv_full(:,this_box)
+                  cell_length_inv_ptr => cell_length_inv_full(:,:,this_box)
           END IF
-          cell_coords = IDNINT(cp*cell_length_inv_ptr)
+          cell_coords = IDNINT(MATMUL(cell_length_inv_ptr,cp))
           ci_min = cell_coords - 1
           ci_max = cell_coords + 1
           IF (cbmc_flag) THEN
@@ -918,9 +826,9 @@ SUBROUTINE Compute_Atom_Nonbond_Inter_Energy_Cells(ia,im,is, &
                                         DO ia_cell = 1, sector_n_atoms_cbmc(secind)
                                                 sector_atom_ID => sector_atoms_cbmc(ia_cell,secind,:)
                                                 atom_ptr => atom_list(sector_atom_ID(1),sector_atom_ID(2),sector_atom_ID(3))
-                                                dxp = atom_ptr%rxp - cp(1)
-                                                dyp = atom_ptr%ryp - cp(2)
-                                                dzp = atom_ptr%rzp - cp(3)
+                                                dxp = atom_ptr%rp(1) - cp(1)
+                                                dyp = atom_ptr%rp(2) - cp(2)
+                                                dzp = atom_ptr%rp(3) - cp(3)
                                                 CALL Minimum_Image_Separation(this_box,dxp,dyp,dzp,dx,dy,dz)
                                                 rijsq = dx*dx+dy*dy+dz*dz
                                                 CALL Check_AtomPair_Cutoff(rijsq,get_vdw,get_qq,this_box)
@@ -945,9 +853,9 @@ SUBROUTINE Compute_Atom_Nonbond_Inter_Energy_Cells(ia,im,is, &
                                         DO ia_cell = 1, sector_n_atoms_full(secind)
                                                 sector_atom_ID => sector_atoms_full(ia_cell,secind,:)
                                                 atom_ptr => atom_list(sector_atom_ID(1),sector_atom_ID(2),sector_atom_ID(3))
-                                                dxp = atom_ptr%rxp - cp(1)
-                                                dyp = atom_ptr%ryp - cp(2)
-                                                dzp = atom_ptr%rzp - cp(3)
+                                                dxp = atom_ptr%rp(1) - cp(1)
+                                                dyp = atom_ptr%rp(2) - cp(2)
+                                                dzp = atom_ptr%rp(3) - cp(3)
                                                 CALL Minimum_Image_Separation(this_box,dxp,dyp,dzp,dx,dy,dz)
                                                 rijsq = dx*dx+dy*dy+dz*dz
                                                 CALL Check_AtomPair_Cutoff(rijsq,get_vdw,get_qq,this_box)
@@ -1044,9 +952,9 @@ SUBROUTINE Compute_Molecule_Nonbond_Intra_Energy(im,is, &
              IF ( .NOT. these_atoms(ja)%exist) CYCLE
 
              ! Find distance between this atom and all others in the system
-             rxij = these_atoms(ia)%rxp - these_atoms(ja)%rxp
-             ryij = these_atoms(ia)%ryp - these_atoms(ja)%ryp
-             rzij = these_atoms(ia)%rzp - these_atoms(ja)%rzp
+             rxij = these_atoms(ia)%rp(1) - these_atoms(ja)%rp(1)
+             ryij = these_atoms(ia)%rp(2) - these_atoms(ja)%rp(2)
+             rzij = these_atoms(ia)%rp(3) - these_atoms(ja)%rp(3)
 
              rijsq = rxij*rxij + ryij*ryij + rzij*rzij
 
@@ -1132,13 +1040,24 @@ SUBROUTINE Compute_Molecule_Nonbond_Inter_Energy(im,is, &
     LOGICAL :: l_pair_store
     LOGICAL :: my_overlap, shared_overlap
 
+
+    this_box = molecule_list(im,is)%which_box
+    IF (l_vectorized) THEN
+            atompairdim = natoms(is)*MAXVAL(natoms)
+            mol_dim = MAXVAL(nmols(:,this_box))
+            atompairdim = IAND(atompairdim+padconst_4byte,padmask_4byte)
+            mol_dim = IAND(mol_dim+padconst_4byte,padmask_4byte)
+            CALL Compute_Molecule_Nonbond_Inter_Energy_Vectorized(im,is, &
+                    E_inter_vdw, E_inter_qq, overlap)
+            RETURN
+    END IF
+
     E_inter_vdw = 0.0_DP
     E_inter_qq = 0.0_DP
     overlap = .FALSE.
     my_overlap = .FALSE.
     shared_overlap = .FALSE.
 
-    this_box = molecule_list(im,is)%which_box
 
     l_pair_store = .FALSE.
 
@@ -1263,6 +1182,20 @@ SUBROUTINE Compute_Molecule_Nonbond_Inter_Energy_Widom(im,is, &
             RETURN
     END IF
 
+    ! Start vectorized version
+    IF (l_vectorized .AND. .NOT. (cbmc_flag .AND. precalc_atompair_nrg)) THEN
+            CALL Compute_Molecule_Nonbond_Inter_Energy_Vectorized_Widom(im,is,this_box,&
+                    Ei_inter_vdw,Ei_inter_qq,overlap)
+            IF (.NOT. overlap) E_inter = Ei_inter_vdw + Ei_inter_qq
+            RETURN
+    END IF
+
+
+
+
+    ! End vectorized version
+
+
 !molecule_priority    IF (widom_active .AND. l_sectors) THEN
     speciesLoop0: DO ispecies = 1, nspecies
         moleculeLoop0: DO imolecule = 1, nmols(ispecies,this_box)
@@ -1291,78 +1224,2547 @@ SUBROUTINE Compute_Molecule_Nonbond_Inter_Energy_Widom(im,is, &
 
         END DO moleculeLoop0
     END DO speciesLoop0
-!molecule_priority            RETURN
-!molecule_priority    END IF
-
-!molecule_priority    hardcore_max_r = widom_molecule%max_dcom + rcut_low
-!molecule_priority    molecule_hardcore_r = rcut_low - widom_molecule%min_dcom
-!molecule_priority
-!molecule_priority
-!molecule_priority    speciesLoop: DO ispecies = 1, nspecies
-!molecule_priority       moleculeLoop: DO imolecule = 1, nmols(ispecies,this_box)
-!molecule_priority          this_locate = locate(imolecule,ispecies,this_box)
-!molecule_priority          IF (ispecies == is .AND. this_locate == im) THEN
-!molecule_priority                  shortrange(imolecule, ispecies) = .FALSE.
-!molecule_priority                  midrange(imolecule, ispecies) = .FALSE.
-!molecule_priority                  CYCLE moleculeLoop
-!molecule_priority          ELSE IF (.NOT. molecule_list(this_locate,ispecies)%live) THEN
-!molecule_priority                  shortrange(imolecule, ispecies) = .FALSE.
-!molecule_priority                  midrange(imolecule, ispecies) = .FALSE.
-!molecule_priority                  CYCLE moleculeLoop
-!molecule_priority          END IF
-!molecule_priority
-!molecule_priority          ! Determine whether any atoms of these two molecules will interact
-!molecule_priority          CALL Check_MoleculePair_Cutoff(im,is,this_locate,ispecies,get_interaction, &
-!molecule_priority               rcom,rx,ry,rz)
-!molecule_priority
-!molecule_priority          IF (.NOT. get_interaction) THEN
-!molecule_priority                  shortrange(imolecule, ispecies) = .FALSE.
-!molecule_priority                  midrange(imolecule, ispecies) = .FALSE.
-!molecule_priority          ELSE IF (rcom + molecule_list(this_locate,ispecies)%min_dcom < molecule_hardcore_r) THEN
-!molecule_priority                  overlap = .TRUE.
-!molecule_priority                  RETURN
-!molecule_priority          ELSE IF (rcom - molecule_list(this_locate,ispecies)%max_dcom > hardcore_max_r) THEN
-!molecule_priority                  shortrange(imolecule, ispecies) = .FALSE.
-!molecule_priority                  midrange(imolecule, ispecies) = .TRUE.
-!molecule_priority          ELSE
-!molecule_priority                  shortrange(imolecule, ispecies) = .TRUE.
-!molecule_priority                  midrange(imolecule, ispecies) = .FALSE.
-!molecule_priority          END IF
-!molecule_priority       END DO moleculeLoop
-!molecule_priority    END DO speciesLoop
-!molecule_priority
-!molecule_priority    speciesLoop2: DO ispecies = 1, nspecies
-!molecule_priority       moleculeLoop2: DO imolecule = 1, nmols(ispecies,this_box)
-!molecule_priority          IF (.NOT. shortrange(imolecule,ispecies)) CYCLE moleculeLoop2
-!molecule_priority          this_locate = locate(imolecule,ispecies,this_box)
-!molecule_priority
-!molecule_priority          CALL Compute_MoleculePair_Energy(im,is,this_locate,ispecies, &
-!molecule_priority               this_box,Eij_vdw,Eij_qq,overlap)
-!molecule_priority
-!molecule_priority          IF (overlap) RETURN
-!molecule_priority
-!molecule_priority          E_inter_vdw = E_inter_vdw + Eij_vdw
-!molecule_priority          E_inter_qq  = E_inter_qq + Eij_qq
-!molecule_priority
-!molecule_priority       END DO moleculeLoop2
-!molecule_priority    END DO speciesLoop2
-!molecule_priority
-!molecule_priority    speciesLoop3: DO ispecies = 1, nspecies
-!molecule_priority       moleculeLoop3: DO imolecule = 1, nmols(ispecies,this_box)
-!molecule_priority          IF (.NOT. midrange(imolecule,ispecies)) CYCLE moleculeLoop3
-!molecule_priority          this_locate = locate(imolecule,ispecies,this_box)
-!molecule_priority          CALL Compute_MoleculePair_Energy(im,is,this_locate,ispecies, &
-!molecule_priority               this_box,Eij_vdw,Eij_qq,overlap)
-!molecule_priority
-!molecule_priority          IF (overlap) RETURN ! there shouldn't be overlap for midrange molecules
-!molecule_priority
-!molecule_priority          E_inter_vdw = E_inter_vdw + Eij_vdw
-!molecule_priority          E_inter_qq  = E_inter_qq + Eij_qq
-!molecule_priority
-!molecule_priority       END DO moleculeLoop3
-!molecule_priority    END DO speciesLoop3
   END SUBROUTINE Compute_Molecule_Nonbond_Inter_Energy_Widom
   !-----------------------------------------------------------------------------
+  SUBROUTINE Field_Allocation
+          INTEGER :: ibox, i
+          INTEGER :: navec(nbr_boxes), na
+          DO ibox = 1, nbr_boxes
+                navec(ibox) = DOT_PRODUCT(nmols(:,ibox),natoms)
+                maxboxnatoms = MAX(maxboxnatoms,DOT_PRODUCT(nmols(:,ibox),natoms))
+          END DO
+          na = MAXVAL(navec)
+          maxnmols = IAND(MAXVAL(nmols(:,1:))+padconst_4byte,padmask_4byte)
+          maxboxnatoms = IAND(maxboxnatoms+padconst_4byte,padmask_4byte)
+          IF (ALLOCATED(zero_field)) THEN
+                  IF (SIZE(zero_field) .LE. na) RETURN
+                  DEALLOCATE(zero_field)
+                  DEALLOCATE(rijsq_field)
+                  DEALLOCATE(vec123)
+          END IF
+          ALLOCATE(zero_field(na))
+          ALLOCATE(rijsq_field(na))
+          ALLOCATE(vec123(na))
+          DO i = 1, na
+                vec123(i) = i
+          END DO
+          zero_field = 0.0_DP
+          rijsq_field = 9.0e98_DP
+  END SUBROUTINE Field_Allocation
+
+
+  SUBROUTINE Livelist_Packing(know_all_live,know_unit_stride)
+          LOGICAL, INTENT(IN), OPTIONAL :: know_all_live, know_unit_stride
+          LOGICAL :: l_all_live, l_unit_stride, l_ortho
+          INTEGER :: js, jnlive, jnmols, ibox, istart, iend, jnatoms, maxnlive
+          INTEGER :: ja, jm, maxvlen, i, vlen, cjnmols, js_present
+          REAL(DP) :: lx_recip, ly_recip, lz_recip
+          LOGICAL, DIMENSION(maxnmols) :: spec_live
+          INTEGER, DIMENSION(maxnmols,nspecies,nbr_boxes) :: live_locates
+          INTEGER, DIMENSION(maxnmols) :: spec_locates
+          TYPE(Atom_Class), DIMENSION(:,:), ALLOCATABLE :: j_atom_list
+          TYPE(Molecule_Class), DIMENSION(:), ALLOCATABLE :: j_molecule_list
+          REAL(DP) :: h11,h21,h31,h12,h22,h32,h13,h23,h33
+          REAL(DP) :: rxp,ryp,rzp,sxp,syp,szp,jrp
+          REAL(DP) :: rcom,xcom,ycom,zcom
+          l_not_all_exist = .FALSE.
+          IF (PRESENT(know_all_live)) THEN
+                  l_all_live = know_all_live
+          ELSE
+                  l_all_live = .FALSE.
+          END IF
+          IF (PRESENT(know_unit_stride)) THEN
+                  l_unit_stride = know_unit_stride
+          ELSE
+                  l_unit_stride = .FALSE.
+          END IF
+          nlive = 0
+          nspecies_present = 0
+          IF (.NOT. ALLOCATED(which_species_present)) ALLOCATE(which_species_present(nspecies))
+          DO js = 1, nspecies
+                IF (SUM(nmols(js,1:))>0) THEN
+                        nspecies_present = nspecies_present+1
+                        which_species_present(nspecies_present) = js
+                END IF
+          END DO
+          IF (l_unit_stride) THEN
+                  IF (l_all_live) THEN
+                          nlive = nmols(:,1:)
+                  ELSE
+                          DO js = 1, nspecies
+                                istart = 1
+                                jnatoms = natoms(js)
+                                DO ibox = 1, nbr_boxes
+                                        jnmols = nmols(js,ibox)
+                                        IF (jnmols == 0) CYCLE
+                                        iend = istart + jnmols - 1
+                                        nlive(js,ibox) = COUNT(molecule_list(istart:iend,js)%live)
+                                        istart = istart + jnmols
+                                END DO
+                          END DO
+                  END IF
+          ELSE
+                  !$OMP PARALLEL PRIVATE(jnmols,spec_locates,spec_live,js)
+                  !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC)
+                  DO ibox = 1, nbr_boxes
+                        DO js_present = 1, nspecies_present
+                                js = which_species_present(js_present)
+                                IF (nmols(js,ibox) == 0) CYCLE
+                                jnmols = nmols(js,ibox)
+                                IF (l_all_live) THEN
+                                        live_locates(1:jnmols,js,ibox) = locate(1:jnmols,js,ibox)
+                                        nlive(js,ibox) = jnmols
+                                        CYCLE
+                                END IF
+                                spec_locates(1:jnmols) = locate(1:jnmols,js,ibox)
+                                spec_live(1:jnmols) = molecule_list(spec_locates(1:jnmols),js)%live
+                                IF (ALL(spec_live(1:jnmols))) THEN
+                                        nlive(js,ibox) = jnmols
+                                        live_locates(1:jnmols,js,ibox) = spec_locates(1:jnmols)
+                                ELSE
+                                        jnlive = COUNT(spec_live(1:jnmols))
+                                        nlive(js,ibox) = jnlive
+                                        live_locates(1:jnlive,js,ibox) = PACK(spec_locates(1:jnmols),spec_live(1:jnmols))
+                                END IF
+                        END DO
+                  END DO
+                  !$OMP END DO
+                  !$OMP END PARALLEL
+          END IF
+          IF (ALLOCATED(live_xcom)) DEALLOCATE(live_xcom)
+          IF (ALLOCATED(live_ycom)) DEALLOCATE(live_ycom)
+          IF (ALLOCATED(live_zcom)) DEALLOCATE(live_zcom)
+          IF (ALLOCATED(live_max_dcom)) DEALLOCATE(live_max_dcom)
+          !IF (ALLOCATED(live_atom_list)) DEALLOCATE(live_atom_list)
+          IF (ALLOCATED(live_atom_rsp)) DEALLOCATE(live_atom_rsp)
+          IF (ALLOCATED(live_atom_exist)) DEALLOCATE(live_atom_exist)
+          maxnlive = MAXVAL(nlive)
+          !ALLOCATE(live_atom_list(MAXVAL(natoms),maxnlive,nspecies,nbr_boxes))
+          ALLOCATE(live_atom_exist(MAXVAL(natoms),maxnlive,nspecies_present,nbr_boxes))
+          ALLOCATE(live_atom_rsp(MAXVAL(natoms),maxnlive,3,nspecies_present,nbr_boxes))
+          IF (.NOT. (l_all_live .AND. l_unit_stride)) THEN
+                  ALLOCATE(j_atom_list(MAXVAL(natoms),maxnlive))
+                  ALLOCATE(j_molecule_list(maxnlive))
+          END IF
+          maxnlive = IAND(maxnlive+padconst_4byte,padmask_4byte)
+          ALLOCATE(live_xcom(maxnlive,nspecies_present,nbr_boxes))
+          ALLOCATE(live_ycom(maxnlive,nspecies_present,nbr_boxes))
+          ALLOCATE(live_zcom(maxnlive,nspecies_present,nbr_boxes))
+          ALLOCATE(live_max_dcom(maxnlive,nspecies_present,nbr_boxes))
+          DO js_present = 1, nspecies_present
+                js = which_species_present(js_present)
+                istart = 1
+                jnatoms = natoms(js)
+                cjnmols = 0
+                DO ibox = 1, nbr_boxes
+                        jnlive = nlive(js,ibox)
+                        jnmols = nmols(js,ibox)
+                        vlen = jnlive*jnatoms
+                        IF (jnlive == 0) CYCLE
+                        iend = istart + jnmols - 1
+                        IF (l_all_live .AND. l_unit_stride) THEN
+                                !$OMP PARALLEL
+                                !$OMP DO SIMD COLLAPSE(2)
+                                DO jm = 1, jnlive
+                                        DO ja = 1, jnatoms
+                                                live_atom_rsp(ja,jm,1,js_present,ibox) = atom_list(ja,jm+cjnmols,js)%rp(1)
+                                                live_atom_rsp(ja,jm,2,js_present,ibox) = atom_list(ja,jm+cjnmols,js)%rp(2)
+                                                live_atom_rsp(ja,jm,3,js_present,ibox) = atom_list(ja,jm+cjnmols,js)%rp(3)
+                                                live_atom_exist(ja,jm,js_present,ibox) = atom_list(ja,jm+cjnmols,js)%exist
+                                        END DO
+                                END DO
+                                !$OMP END DO SIMD
+                                !$OMP DO SIMD
+                                DO jm = 1, jnlive
+                                        live_xcom(jm,js_present,ibox) = molecule_list(jm+cjnmols,js)%rcom(1)
+                                        live_ycom(jm,js_present,ibox) = molecule_list(jm+cjnmols,js)%rcom(2)
+                                        live_zcom(jm,js_present,ibox) = molecule_list(jm+cjnmols,js)%rcom(3)
+                                        live_max_dcom(jm,js_present,ibox) = molecule_list(jm+cjnmols,js)%rcom(4)
+                                END DO
+                                !$OMP END DO SIMD
+                                !$OMP END PARALLEL
+                        ELSE
+                                IF (l_unit_stride) THEN
+                                        !$OMP PARALLEL WORKSHARE
+                                        spec_locates(1:jnlive) = PACK(locate(1:jnmols,js,ibox), &
+                                                molecule_list(istart:iend,js)%live)
+                                        j_molecule_list(1:jnlive) = molecule_list(spec_locates(1:jnlive),js)
+                                        j_atom_list(1:jnatoms,1:jnlive) = atom_list(1:jnatoms,spec_locates(1:jnlive),js)
+                                        !$OMP END PARALLEL WORKSHARE
+                                ELSE
+                                        !$OMP PARALLEL WORKSHARE
+                                        j_molecule_list(1:jnlive) = molecule_list(live_locates(1:jnlive,js,ibox),js)
+                                        j_atom_list(1:jnatoms,1:jnlive) = atom_list(1:jnatoms,live_locates(1:jnlive,js,ibox),js)
+                                        !$OMP END PARALLEL WORKSHARE
+                                END IF
+                                !$OMP PARALLEL
+                                !$OMP DO SIMD COLLAPSE(2)
+                                DO jm = 1, jnlive
+                                        DO ja = 1, jnatoms
+                                                live_atom_rsp(ja,jm,1,js_present,ibox) = j_atom_list(ja,jm)%rp(1)
+                                                live_atom_rsp(ja,jm,2,js_present,ibox) = j_atom_list(ja,jm)%rp(2)
+                                                live_atom_rsp(ja,jm,3,js_present,ibox) = j_atom_list(ja,jm)%rp(3)
+                                                live_atom_exist(ja,jm,js_present,ibox) = j_atom_list(ja,jm)%exist
+                                        END DO
+                                END DO
+                                !$OMP END DO SIMD
+                                !$OMP DO SIMD
+                                DO jm = 1, jnlive
+                                        live_xcom(jm,js_present,ibox) = j_molecule_list(jm)%rcom(1)
+                                        live_ycom(jm,js_present,ibox) = j_molecule_list(jm)%rcom(2)
+                                        live_zcom(jm,js_present,ibox) = j_molecule_list(jm)%rcom(3)
+                                        live_max_dcom(jm,js_present,ibox) = j_molecule_list(jm)%rcom(4)
+                                END DO
+                                !$OMP END DO SIMD
+                                !$OMP END PARALLEL
+                        END IF
+                        IF (.NOT. ALL(live_atom_exist(1:jnatoms,1:jnlive,js_present,ibox))) l_not_all_exist = .TRUE.
+                        istart = istart + jnmols
+                        cjnmols = cjnmols + jnmols
+                END DO
+          END DO
+          DO ibox = 1, nbr_boxes
+                l_ortho = box_list(ibox)%int_box_shape <= int_ortho
+                IF (l_ortho) CYCLE
+                h11 = box_list(ibox)%length_inv(1,1)
+                h21 = box_list(ibox)%length_inv(2,1)
+                h31 = box_list(ibox)%length_inv(3,1)
+                h12 = box_list(ibox)%length_inv(1,2)
+                h22 = box_list(ibox)%length_inv(2,2)
+                h32 = box_list(ibox)%length_inv(3,2)
+                h13 = box_list(ibox)%length_inv(1,3)
+                h23 = box_list(ibox)%length_inv(2,3)
+                h33 = box_list(ibox)%length_inv(3,3)
+                DO js_present = 1, nspecies_present
+                        js = which_species_present(js_present)
+                        jnlive = nlive(js,ibox)
+                        jnatoms = natoms(js)
+                        !$OMP PARALLEL
+                        !$OMP DO SIMD PRIVATE(xcom,ycom,zcom,rcom) &
+                        !$OMP ALIGNED(live_xcom,live_ycom,live_zcom)
+                        DO jm = 1, jnlive
+                                rcom = live_xcom(jm,js_present,ibox)
+                                xcom = h11*rcom
+                                rcom = live_ycom(jm,js_present,ibox)
+                                xcom = xcom + h12*rcom
+                                ycom = h22*rcom
+                                rcom = live_zcom(jm,js_present,ibox)
+                                xcom = xcom + h13*rcom
+                                ycom = ycom + h23*rcom
+                                zcom = h33*rcom
+                                live_xcom(jm,js_present,ibox) = xcom
+                                live_ycom(jm,js_present,ibox) = ycom
+                                live_zcom(jm,js_present,ibox) = zcom
+                        END DO
+                        !$OMP END DO SIMD
+                        !$OMP DO PRIVATE(jrp,sxp,syp,szp,ja) &
+                        !$OMP SCHEDULE(STATIC)
+                        DO jm = 1, jnlive
+                                DO ja = 1, jnatoms
+                                        jrp = live_atom_rsp(ja,jm,1,js_present,ibox)
+                                        sxp = h11*jrp
+                                        jrp = live_atom_rsp(ja,jm,2,js_present,ibox)
+                                        sxp = sxp + h12*jrp
+                                        syp = h22*jrp
+                                        jrp = live_atom_rsp(ja,jm,3,js_present,ibox)
+                                        sxp = sxp + h13*jrp
+                                        syp = syp + h23*jrp
+                                        szp = h33*jrp
+                                        live_atom_rsp(ja,jm,1,js_present,ibox) = sxp
+                                        live_atom_rsp(ja,jm,2,js_present,ibox) = syp
+                                        live_atom_rsp(ja,jm,3,js_present,ibox) = szp
+                                END DO
+                        END DO
+                        !$OMP END DO
+                        !$OMP END PARALLEL
+                END DO
+          END DO
+
+  END SUBROUTINE Livelist_Packing
+
+  SUBROUTINE Compute_Molecule_Nonbond_Inter_Energy_Vectorized_Widom(im,is, &
+    this_box,vdw_energy,qq_energy,overlap)
+
+    ! Arguments
+    INTEGER, INTENT(IN) :: im, is, this_box
+    REAL(DP), INTENT(OUT) :: vdw_energy, qq_energy
+    LOGICAL, INTENT(OUT) :: overlap
+    ! End Arguments
+    INTEGER, DIMENSION(nspecies) :: jspec_n_interact, jspec_istart, jspec_iend
+    INTEGER, DIMENSION(COUNT(widom_atoms%exist)) :: which_i_exist, ispec_atomtypes
+    REAL(DP), DIMENSION(COUNT(widom_atoms%exist)) :: icharge
+    REAL(DP), DIMENSION(3,COUNT(widom_atoms%exist)) :: irp, isp
+    REAL(DP), DIMENSION(3) :: irp_com, isp_com
+    LOGICAL(4), DIMENSION(COUNT(widom_atoms%exist)) :: i_has_vdw
+    !LOGICAL, DIMENSION(MAXVAL(nmols(:,this_box))), TARGET :: spec_live_tgt
+    REAL(DP), DIMENSION(maxboxnatoms) :: rijsq_4min
+    LOGICAL(4), DIMENSION(MAXVAL(nlive(:,this_box))) :: interact_vec
+    !INTEGER, DIMENSION(MAXVAL(nlive(:,this_box))) :: which_interact
+    LOGICAL(1), DIMENSION(maxboxnatoms) :: vdw_mask, coul_mask, j_hascharge, j_hasvdw, j_exist
+    INTEGER, DIMENSION(maxboxnatoms) :: jatomtype, packed_types
+    REAL(DP), DIMENSION(maxboxnatoms) :: jcharge, up_nrg_vec, rijsq_packed, rijsq, jcharge_coul, jxp, jyp, jzp
+    REAL(DP), DIMENSION(maxboxnatoms,3) :: jrsp
+    TYPE(VdW256), DIMENSION(maxboxnatoms) :: ij_vdw_p
+    REAL(DP), DIMENSION(4,maxboxnatoms) :: mie_vdw_p_table
+    REAL(DP), DIMENSION(2,maxboxnatoms) :: ij_vdw_p_table
+    REAL(DP), DIMENSION(maxboxnatoms,4) :: ij_vdw_p_table_T
+    LOGICAL :: this_est_emax, l_get_rij_min, l_ortho
+    INTEGER :: n_vdw_p, ia_counter, i, j, n_i_exist, istart, iend, jnlive, n_interact, vlen, orig_vlen, n_j_exist
+    INTEGER :: bsolvent, istart_base, natoms_js, ti_solvent, ja, js, js_present, n_coul, n_vdw, ia, live_vlen, jnmols, jnatoms, itype
+    REAL(DP) :: mol_rcut, max_dcom_i_const, this_vdw_rcutsq, this_coul_rcutsq
+    REAL(DP) :: sigbyr2, sigbyr6, sigbyr12, rij, roffsq_rijsq, nrg_vdw, nrg_coul, i_qq_energy, mie_m, mie_n, icharge_factor
+
+    INTEGER :: this_int_vdw_style, ibox, jtype
+
+    LOGICAL :: l_check_coul, l_notallvdw, l_notallcoul, l_pack_separately, l_notsamecut, i_get_vdw, i_get_coul
+    LOGICAL(8) :: l_interact
+    REAL(DP) :: rterm, rterm2, eps, sigsq, negsigsq, negsigbyr2, this_rijsq, epsig_n, epsig_m
+    REAL(DP) :: ixp, iyp, izp, dxcom, dycom, dzcom, dscom, ixcom, iycom, izcom, dxp, dyp, dzp, dsp
+    REAL(DP) :: xl, yl, zl, hxl, hyl, hzl
+    REAL(DP) :: h11,h21,h31,h12,h22,h32,h13,h23,h33
+    REAL(DP) :: invcutx2_cbmc, invcutsq_cbmc, inv_rij
+    INTEGER :: this_int_charge_sum_style
+    !dir$ assume_aligned jxp:array_align_bytes, jyp:array_align_bytes, jzp:array_align_bytes, jrsp:array_align_bytes, ij_vdw_p_table:array_align_bytes, ij_vdw_p_table_T:array_align_bytes
+    !DIR$ ASSUME_ALIGNED rijsq:array_align_bytes, rijsq_packed:array_align_bytes, jcharge:array_align_bytes, jcharge_coul:array_align_bytes
+    !DIR$ ASSUME_ALIGNED vdw_mask:array_align_bytes, coul_mask:array_align_bytes, j_hascharge:array_align_bytes, j_hasvdw:array_align_bytes, j_exist:array_align_bytes
+    !DIR$ ASSUME_ALIGNED jatomtype:array_align_bytes, packed_types:array_align_bytes, up_nrg_vec:array_align_bytes
+    !dir$ assume (MOD(maxboxnatoms,dimpad_4byte) .EQ. 0)
+
+    vdw_energy = 0.0_DP
+    qq_energy = 0.0_DP
+    overlap = .FALSE.
+    ibox = widom_molecule%which_box
+    this_int_charge_sum_style = MERGE(charge_sf,int_charge_sum_style(ibox),cbmc_flag .AND. cbmc_charge_sf_flag)
+
+
+    l_get_rij_min = est_atompair_rminsq .AND. .NOT. cbmc_flag
+    this_est_emax = est_emax .AND. widom_active .AND. .NOT. cbmc_flag
+    jspec_n_interact = 0
+    n_vdw_p = n_vdw_p_list(this_box)
+    l_ortho = box_list(this_box)%int_box_shape <= int_ortho
+    IF (l_ortho) THEN
+            xl = box_list(this_box)%length(1,1)
+            yl = box_list(this_box)%length(2,2)
+            zl = box_list(this_box)%length(3,3)
+            hxl = 0.5 * xl
+            hyl = 0.5 * yl
+            hzl = 0.5 * zl
+    ELSE
+            h11 = box_list(this_box)%length(1,1)
+            h21 = box_list(this_box)%length(2,1)
+            h31 = box_list(this_box)%length(3,1)
+            h12 = box_list(this_box)%length(1,2)
+            h22 = box_list(this_box)%length(2,2)
+            h32 = box_list(this_box)%length(3,2)
+            h13 = box_list(this_box)%length(1,3)
+            h23 = box_list(this_box)%length(2,3)
+            h33 = box_list(this_box)%length(3,3)
+    END IF
+    IF (cbmc_flag) THEN
+            mol_rcut = rcut_cbmc(this_box)
+            this_vdw_rcutsq = rcut_cbmcsq(this_box)
+            this_coul_rcutsq = rcut_cbmcsq(this_box)
+            invcutx2_cbmc = 2.0_DP/rcut_cbmc(this_box)
+            invcutsq_cbmc = 1.0_DP/rcut_cbmcsq(this_box)
+    ELSE
+            mol_rcut = rcut_max(this_box)
+            IF (int_vdw_sum_style(this_box) == vdw_cut_switch) THEN
+                    this_vdw_rcutsq = roff_switch_sq(this_box)
+            ELSE
+                    this_vdw_rcutsq = rcut_vdwsq(this_box)
+            END IF
+            this_coul_rcutsq = rcut_coulsq(this_box)
+    END IF
+    max_dcom_i_const = widom_molecule%rcom(4) + mol_rcut
+    n_i_exist = COUNT(widom_atoms%exist)
+    which_i_exist = PACK(vec123(1:natoms(is)),widom_atoms%exist)
+    irp(1,:) = widom_atoms(which_i_exist)%rp(1)
+    irp(2,:) = widom_atoms(which_i_exist)%rp(2)
+    irp(3,:) = widom_atoms(which_i_exist)%rp(3)
+    ispec_atomtypes = nonbond_list(which_i_exist,is)%atom_type_number
+    icharge = nonbond_list(which_i_exist,is)%charge * charge_factor
+    IF (l_ortho) THEN
+            ixcom = widom_molecule%rcom(1)
+            iycom = widom_molecule%rcom(2)
+            izcom = widom_molecule%rcom(3)
+    ELSE
+            irp_com(1) = widom_molecule%rcom(1)
+            irp_com(2) = widom_molecule%rcom(2)
+            irp_com(3) = widom_molecule%rcom(3)
+            isp_com = MATMUL(box_list(this_box)%length_inv,irp_com)
+            isp = MATMUL(box_list(this_box)%length_inv,irp)
+            ixcom = isp_com(1)
+            iycom = isp_com(2)
+            izcom = isp_com(3)
+    END IF
+    istart = 1
+    DO js_present = 1, nspecies_present
+        js = which_species_present(js_present)
+        jnlive = nlive(js,this_box)
+        IF (jnlive == 0) CYCLE
+        jnatoms = natoms(js)
+        n_interact = 0
+        IF (l_ortho) THEN
+                DO i = 1, jnlive
+                        dxcom = ABS(live_xcom(i,js_present,ibox) - ixcom)
+                        dycom = ABS(live_ycom(i,js_present,ibox) - iycom)
+                        dzcom = ABS(live_zcom(i,js_present,ibox) - izcom)
+                        IF (dxcom > hxl) dxcom = dxcom - xl
+                        IF (dycom > hyl) dycom = dycom - yl
+                        IF (dzcom > hzl) dzcom = dzcom - zl
+                        ! Repurposing dxcom as rijsq accumulator to enforce FMA3 instructions if supported
+                        dxcom = dxcom * dxcom
+                        dxcom = dxcom + dycom * dycom
+                        dxcom = dxcom + dzcom * dzcom
+                        l_interact = max_dcom_i_const > &
+                                SQRT(dxcom) - live_max_dcom(i,js_present,this_box)
+                        interact_vec(i) = l_interact
+                        IF (l_interact) n_interact = n_interact + 1
+                END DO
+                !DO i = 1,3
+                !        drspw(:,i) = drspw(:,i) - &
+                !                ANINT(drspw(:,i)/box_list(this_box)%length(i,i))*box_list(this_box)%length(i,i)
+                !END DO
+        ELSE
+                DO i = 1, jnlive
+                        ! COM coordinates are fractional if box is triclinic
+                        dscom = live_xcom(i,js_present,this_box)
+                        dscom = dscom - ixcom
+                        IF (dscom > 0.5_DP) THEN
+                                dscom = dscom - 1.0_DP
+                        ELSE IF (dscom < -0.5_DP) THEN
+                                dscom = dscom + 1.0_DP
+                        END IF
+                        dxcom = h11*dscom
+                        dscom = live_ycom(i,js_present,this_box)
+                        dscom = dscom - iycom
+                        IF (dscom > 0.5_DP) THEN
+                                dscom = dscom - 1.0_DP
+                        ELSE IF (dscom < -0.5_DP) THEN
+                                dscom = dscom + 1.0_DP
+                        END IF
+                        dxcom = dxcom + h12*dscom
+                        dycom = h22*dscom
+                        dscom = live_zcom(i,js_present,this_box)
+                        dscom = dscom - izcom
+                        IF (dscom > 0.5_DP) THEN
+                                dscom = dscom - 1.0_DP
+                        ELSE IF (dscom < -0.5_DP) THEN
+                                dscom = dscom + 1.0_DP
+                        END IF
+                        dxcom = dxcom + h13*dscom
+                        dycom = dycom + h23*dscom
+                        dzcom = h33*dscom
+                        ! Repurposing dxcom as rijsq accumulator to enforce FMA3 instructions if supported
+                        dxcom = dxcom * dxcom
+                        dxcom = dxcom + dycom * dycom
+                        dxcom = dxcom + dzcom * dzcom
+                        l_interact = max_dcom_i_const > &
+                                SQRT(dxcom) - live_max_dcom(i,js_present,this_box)
+                        interact_vec(i) = l_interact
+                        IF (l_interact) n_interact = n_interact + 1
+                END DO
+                !drspw = MATMUL(drspw,length_inv_T)
+                !drspw = drspw - ANINT(drspw)
+                !drspw = MATMUL(drspw,length_T)
+        END IF
+        !which_interact(1:n_interact) = PACK(vec123(1:jnlive),interact_vec(1:jnlive))
+        vlen = n_interact*natoms(js)
+        iend = istart + vlen - 1
+        jrsp(istart:iend,:) = RESHAPE(live_atom_rsp(1:jnatoms,PACK(vec123(1:jnlive),interact_vec(1:jnlive)),1:3,js_present,this_box), &
+                (/ vlen, 3 /))
+        IF (l_not_all_exist) THEN
+                j_exist(istart:iend) = RESHAPE(&
+                        live_atom_exist(1:jnatoms,PACK(vec123(1:jnlive),interact_vec(1:jnlive)),js_present,this_box), (/ vlen /))
+        END IF
+        jatomtype(istart:iend) = RESHAPE(SPREAD(nonbond_list(1:natoms(js),js)%atom_type_number,2,n_interact), (/ vlen /))
+        jcharge(istart:iend) = RESHAPE(SPREAD(nonbond_list(1:natoms(js),js)%charge,2,n_interact), (/ vlen /))
+        IF (l_get_rij_min) THEN
+                jspec_n_interact(js) = n_interact
+                jspec_istart(js) = istart
+                jspec_iend(js) = iend
+        END IF
+        istart = istart+vlen
+    END DO
+    vlen = istart - 1
+    orig_vlen = vlen
+    ! The following IF statement condition is unlikely to be true.
+    ! That is why this process is saved until now; it's unlikely to be necessary.
+    IF (l_not_all_exist) THEN
+            IF (.NOT. ALL(j_exist(1:vlen))) THEN
+                    n_j_exist = COUNT(j_exist(1:vlen))
+                    DO i = 1,3
+                        jrsp(1:n_j_exist,i) = PACK(jrsp(1:vlen,i),j_exist(1:vlen))
+                    END DO
+                    jcharge(1:n_j_exist) = PACK(jcharge(1:vlen),j_exist(1:vlen))
+                    jatomtype(1:n_j_exist) = PACK(jatomtype(1:vlen),j_exist(1:vlen))
+                    vlen = n_j_exist
+            END IF
+    END IF
+    IF (this_est_emax) THEN
+            up_nrg_vec = 0.0_DP
+    END IF
+    j_hascharge(1:vlen) = jcharge(1:vlen) .NE. 0.0_DP
+    j_hasvdw(1:vlen) = jatomtype(1:vlen) .NE. 0
+    l_notallvdw = .NOT. ALL(j_hasvdw(1:vlen))
+    l_notallcoul = .NOT. ALL(j_hascharge(1:vlen))
+    l_notsamecut = this_vdw_rcutsq .NE. this_coul_rcutsq
+    l_pack_separately = l_notsamecut .OR. l_notallcoul .OR. l_notallvdw
+
+    DO ia = 1, n_i_exist
+        itype = ispec_atomtypes(ia)
+        i_get_vdw = int_vdw_style(this_box) .NE. vdw_none .AND. itype .NE. 0
+        i_get_coul = icharge(ia) .NE. 0.0_DP .AND. int_charge_style(this_box) == charge_coul .AND. &
+                (species_list(is)%l_coul_cbmc .OR. .NOT. cbmc_flag)
+        IF (.NOT. (i_get_vdw .OR. i_get_coul)) CYCLE
+        l_check_coul = (i_get_coul .AND. l_notsamecut) .OR. .NOT. i_get_vdw
+        !l_pack_coul = (i_get_coul .AND. l_pack_separately) .OR. l_check_coul
+        IF (l_ortho) THEN
+                ixp = irp(1,ia)
+                iyp = irp(2,ia)
+                izp = irp(3,ia)
+                DO i = 1, vlen
+                        dxp = jrsp(i,1)
+                        dyp = jrsp(i,2)
+                        dzp = jrsp(i,3)
+                        dxp = ABS(dxp - ixp)
+                        dyp = ABS(dyp - iyp)
+                        dzp = ABS(dzp - izp)
+                        IF (dxp > hxl) dxp = dxp - xl
+                        IF (dyp > hyl) dyp = dyp - yl
+                        IF (dzp > hzl) dzp = dzp - zl
+                        ! Repurposing dxp as rijsq accumulator to enforce FMA3 instructions if supported
+                        dxp = dxp * dxp
+                        dxp = dxp + dyp * dyp
+                        dxp = dxp + dzp * dzp
+                        rijsq(i) = dxp
+                        IF (i_get_vdw) THEN
+                                l_interact = dxp < this_vdw_rcutsq
+                                vdw_mask(i) = l_interact
+                        END IF
+                        IF (l_check_coul) THEN
+                                l_interact = dxp < this_coul_rcutsq
+                                coul_mask(i) = l_interact
+                        END IF
+                END DO
+        ELSE
+                ixp = isp(1,ia)
+                iyp = isp(2,ia)
+                izp = isp(3,ia)
+                DO i = 1, vlen
+                        ! atom coordinates are fractional if box is triclinic
+                        dsp = jrsp(i,1)
+                        dsp = dsp - ixp
+                        IF (dsp > 0.5_DP) THEN
+                                dsp = dsp - 1.0_DP
+                        ELSE IF (dsp < -0.5_DP) THEN
+                                dsp = dsp + 1.0_DP
+                        END IF
+                        dxp = h11*dsp
+                        dsp = jrsp(i,2)
+                        dsp = dsp - iyp
+                        IF (dsp > 0.5_DP) THEN
+                                dsp = dsp - 1.0_DP
+                        ELSE IF (dsp < -0.5_DP) THEN
+                                dsp = dsp + 1.0_DP
+                        END IF
+                        dxp = dxp + h12*dsp
+                        dyp = h22*dsp
+                        dsp = jrsp(i,3)
+                        dsp = dsp - izp
+                        IF (dsp > 0.5_DP) THEN
+                                dsp = dsp - 1.0_DP
+                        ELSE IF (dsp < -0.5_DP) THEN
+                                dsp = dsp + 1.0_DP
+                        END IF
+                        dxp = dxp + h13*dsp
+                        dyp = dyp + h23*dsp
+                        dzp = h33*dsp
+                        ! Repurposing dxp as rijsq accumulator to enforce FMA3 instructions if supported
+                        dxp = dxp * dxp
+                        dxp = dxp + dyp * dyp
+                        dxp = dxp + dzp * dzp
+                        rijsq(i) = dxp
+                        IF (i_get_vdw) THEN
+                                l_interact = dxp < this_vdw_rcutsq
+                                vdw_mask(i) = l_interact
+                        END IF
+                        IF (l_check_coul) THEN
+                                l_interact = dxp < this_coul_rcutsq
+                                coul_mask(i) = l_interact
+                        END IF
+                END DO
+        END IF
+        IF (.NOT. l_sectors) THEN
+                IF (ANY(rijsq(1:vlen) < rcut_lowsq)) THEN
+                        overlap = .TRUE.
+                        !!$OMP CRITICAL
+                        !WRITE(*,*) "CRITICAL OVERLAP!", MINVAL(rijsq(1:vlen)), MAXVAL(rijsq(1:vlen)), ia, is, cbmc_flag
+                        !WRITE(*,*) "check_overlap: ", check_overlap(ia,im,is)
+                        !!$OMP END CRITICAL
+                        RETURN
+                END IF
+        END IF
+        IF (l_get_rij_min) THEN
+                DO js = 1, nspecies
+                        IF (jspec_n_interact(js) == 0) CYCLE
+                        bsolvent = species_list(js)%solvent_base
+                        iend = jspec_iend(js)
+                        istart_base = jspec_istart(js) - 1
+                        natoms_js = natoms(js)
+                        DO ja = 1, natoms(js)
+                                ti_solvent = bsolvent + ja
+                                istart = istart_base + ja
+                                IF (vlen .NE. orig_vlen) THEN
+                                        rijsq_4min(1:orig_vlen) = UNPACK(rijsq(1:vlen),j_exist(1:orig_vlen),rijsq_field(1:orig_vlen))
+                                        swi_atompair_rsqmin(ti_solvent,which_i_exist(ia)) = &
+                                                MINVAL(rijsq_4min(istart:iend:natoms_js))
+                                ELSE
+                                        swi_atompair_rsqmin(ti_solvent,which_i_exist(ia)) = &
+                                                MINVAL(rijsq(istart:iend:natoms_js))
+                                END IF
+                        END DO
+                END DO
+        END IF
+        IF (i_get_vdw) THEN
+                IF (i_get_coul .AND. .NOT. l_notsamecut) THEN
+                        IF (l_notallcoul) THEN
+                                coul_mask(1:vlen) = vdw_mask(1:vlen) .AND. j_hascharge(1:vlen)
+                        ELSE IF (l_notallvdw) THEN
+                                coul_mask(1:vlen) = vdw_mask(1:vlen)
+                        ELSE
+                                n_coul = COUNT(vdw_mask(1:vlen))
+                                jcharge_coul(1:n_coul) = PACK(jcharge(1:vlen),vdw_mask(1:vlen))
+                        END IF
+                END IF
+                IF (l_notallvdw) THEN
+                        vdw_mask(1:vlen) = vdw_mask(1:vlen) .AND. j_hasvdw(1:vlen)
+                END IF
+                n_vdw = COUNT(vdw_mask(1:vlen))
+                !packed_types(1:n_vdw) = PACK(jatomtype(1:vlen),vdw_mask(1:vlen))
+                !ij_vdw_p(1:n_vdw) = ppvdwp_list(packed_types(1:n_vdw),itype,this_box)
+                !ij_vdw_p(1:n_vdw) = ppvdwp_list(PACK(jatomtype(1:vlen),vdw_mask(1:vlen)),itype,this_box)
+                !DO i = 1, n_vdw
+                !        ij_vdw_p_table(i,1:4) = ppvdwp_table(1:4,packed_types(i),itype,this_box)
+                !END DO
+                !DO i = 1, n_vdw
+                !        ij_vdw_p(i) = ppvdwp_list(packed_types(i),itype,this_box)
+                !END DO
+                !ij_vdw_p_table(1:n_vdw,1:4) = TRANSPOSE(ppvdwp_table(1:4,packed_types(1:n_vdw),itype,this_box))
+                !ij_vdw_p_table(1:4,1:n_vdw) = ppvdwp_table(1:4,packed_types(1:n_vdw),itype,this_box)
+                !ij_vdw_p_table(1:4,1:n_vdw) = ppvdwp_table(1:4,PACK(jatomtype(1:vlen),vdw_mask(1:vlen)),itype,this_box)
+                rijsq_packed(1:n_vdw) = PACK(rijsq(1:vlen),vdw_mask(1:vlen))
+                this_int_vdw_style = int_vdw_style(this_box)
+                IF (this_int_vdw_style == vdw_lj) THEN
+                        ij_vdw_p_table(1:2,1:n_vdw) = ppvdwp_table2(1:2,PACK(jatomtype(1:vlen),vdw_mask(1:vlen)),itype,ibox)
+                        IF (int_vdw_sum_style(this_box) == vdw_charmm) THEN
+                                DO i = 1, n_vdw
+                                        eps = ij_vdw_p_table(1,i) ! epsilon
+                                        sigsq = ij_vdw_p_table(2,i) ! sigma**2
+                                        sigbyr2 = sigsq/rijsq_packed(i) ! sigma was already squared
+                                        sigbyr6 = sigbyr2*sigbyr2*sigbyr2
+                                        sigbyr12 = sigbyr6*sigbyr6
+                                        nrg_vdw = eps * (sigbyr12 - 2.0_DP*sigbyr6) ! eps was not multiplied by 4
+                                        vdw_energy = vdw_energy + nrg_vdw
+                                        IF (this_est_emax) up_nrg_vec(i) = nrg_vdw
+                                END DO
+                        ELSE IF (int_vdw_sum_style(this_box) == vdw_cut_shift) THEN
+                                DO i = 1, n_vdw
+                                        eps = ij_vdw_p_table(1,i) ! 4*epsilon
+                                        negsigsq = ij_vdw_p_table(2,i) ! -(sigma**2)
+                                        negsigbyr2 = negsigsq/rijsq_packed(i)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        nrg_vdw = eps * rterm
+                                        negsigbyr2 = negsigsq/rcut_vdwsq(this_box)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        nrg_vdw = nrg_vdw - eps * rterm
+                                        vdw_energy = vdw_energy + nrg_vdw
+                                        IF (this_est_emax) up_nrg_vec(i) = nrg_vdw
+                                END DO
+                        ELSE IF (int_vdw_sum_style(this_box) == vdw_cut_switch) THEN
+                                DO i = 1, n_vdw
+                                        eps = ij_vdw_p_table(1,i) ! 4*epsilon
+                                        negsigsq = ij_vdw_p_table(2,i) ! -(sigma**2)
+                                        this_rijsq = rijsq_packed(i)
+                                        negsigbyr2 = negsigsq/this_rijsq
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        nrg_vdw = eps * rterm
+                                        IF (this_rijsq .GE. ron_switch_sq(this_box)) THEN
+                                                roffsq_rijsq = roff_switch_sq(this_box) - this_rijsq
+                                                nrg_vdw = &
+                                                        roffsq_rijsq*roffsq_rijsq * &
+                                                        (switch_factor2(this_box)+2.0_DP*this_rijsq)*switch_factor1(this_box) * &
+                                                        nrg_vdw
+                                        END IF
+                                        vdw_energy = vdw_energy + nrg_vdw
+                                        IF (this_est_emax) up_nrg_vec(i) = nrg_vdw
+                                END DO
+                        ELSE IF (int_vdw_sum_style(this_box) == vdw_cut_shift_force) THEN
+                                DO i = 1, n_vdw
+                                        eps = ij_vdw_p_table(1,i) ! 4*epsilon
+                                        negsigsq = ij_vdw_p_table(2,i) ! -(sigma**2)
+                                        this_rijsq = rijsq_packed(i)
+                                        negsigbyr2 = negsigsq/this_rijsq
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        nrg_vdw = eps * rterm
+                                        negsigbyr2 = negsigsq/rcut_vdwsq(this_box)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm2 = rterm * rterm
+                                        rterm2 = rterm + 2.0_DP * rterm2
+                                        rterm = rterm + rterm*rterm
+                                        nrg_vdw = nrg_vdw - eps * rterm
+                                        nrg_vdw = nrg_vdw - &
+                                                (SQRT(this_rijsq) - rcut_vdw(this_box)) * &
+                                                -6.0_DP * eps * rterm2 / rcut_vdw(this_box)
+                                        vdw_energy = vdw_energy + nrg_vdw
+                                        IF (this_est_emax) up_nrg_vec(i) = nrg_vdw
+                                END DO
+                        ELSE
+                                DO i = 1, n_vdw
+                                        !eps = ppvdwp_table(1,packed_types(i),itype,ibox) ! 4*epsilon
+                                        !negsigsq = ppvdwp_table(2,packed_types(i),itype,ibox) ! -(sigma**2)
+                                        eps = ij_vdw_p_table(1,i) ! 4*epsilon
+                                        negsigsq = ij_vdw_p_table(2,i) ! -(sigma**2)
+                                        negsigbyr2 = negsigsq/rijsq_packed(i)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        ! nrg_vdw = eps * rterm
+                                        vdw_energy = vdw_energy + eps * rterm
+                                        IF (this_est_emax) up_nrg_vec(i) = eps * rterm
+                                END DO
+                        END IF
+                ELSE IF (int_vdw_style(this_box) == vdw_mie) THEN
+                        packed_types(1:n_vdw) = PACK(jatomtype(1:vlen),vdw_mask(1:vlen))
+                        mie_vdw_p_table(1:4,1:n_vdw) = ppvdwp_table2(1:4,packed_types(1:n_vdw),itype,this_box)
+                        DO i = 1, n_vdw
+                                this_rijsq = LOG(rijsq_packed(i)) ! actually ln(rijsq)
+                                epsig_n = mie_vdw_p_table(1,i)  ! epsilon * mie_coeff * sigma ** n
+                                epsig_m = mie_vdw_p_table(2,i) ! epsilon * mie_coeff * sigma ** m
+                                mie_n = mie_vdw_p_table(3,i) ! already halved
+                                mie_m = mie_vdw_p_table(4,i) ! already halved
+                                nrg_vdw = epsig_n * EXP(this_rijsq*mie_n)
+                                nrg_vdw = nrg_vdw - epsig_m * EXP(this_rijsq*mie_m)
+                                IF (int_vdw_sum_style(this_box) == vdw_cut_shift) THEN
+                                        nrg_vdw = nrg_vdw - ppvdwp_table(packed_types(i),itype,5,this_box)
+                                        !nrg_vdw = nrg_vdw + epsig_n * rcut_vdwsq(this_box)**mie_n
+                                        !nrg_vdw = nrg_vdw - epsig_m * rcut_vdwsq(this_box)**mie_m
+                                END IF
+                                vdw_energy = vdw_energy + nrg_vdw
+                                IF (this_est_emax) up_nrg_vec(i) = nrg_vdw
+                        END DO
+                END IF
+                !vdw_energy = SUM(nrg_vdw)
+                IF (this_est_emax) up_nrg_vec(1:vlen) = UNPACK(up_nrg_vec(1:n_vdw), vdw_mask(1:vlen), zero_field(1:vlen))
+        ELSE IF (this_est_emax) THEN
+                up_nrg_vec(1:vlen) = 0.0_DP
+        END IF
+        ! Electrostatics
+        IF (i_get_coul) THEN
+                icharge_factor = icharge(ia) ! already multiplied by charge_factor
+                IF (l_pack_separately .OR. .NOT. i_get_vdw) THEN
+                        IF (l_notallcoul .AND. l_check_coul) THEN
+                                coul_mask(1:vlen) = coul_mask(1:vlen) .AND. j_hascharge(1:vlen)
+                        END IF
+                        n_coul = COUNT(coul_mask(1:vlen))
+                        jcharge_coul(1:n_coul) = PACK(jcharge(1:vlen), coul_mask(1:vlen))
+                        rijsq_packed(1:n_coul) = PACK(rijsq(1:vlen), coul_mask(1:vlen))
+                ELSE
+                        n_coul = n_vdw
+                END IF
+                i_qq_energy = 0.0_DP
+                SELECT CASE(this_int_charge_sum_style)
+                CASE(charge_sf)
+                        DO i = 1, n_coul
+                                this_rijsq = rijsq_packed(i)
+                                inv_rij = Recip_Sqrt(this_rijsq)
+                                rij = inv_rij * this_rijsq
+                                i_qq_energy = i_qq_energy + jcharge_coul(i) * (inv_rij - invcutx2_cbmc + rij*invcutsq_cbmc)
+                        END DO
+                CASE(charge_ewald)
+                        DO i = 1, n_coul
+                                this_rijsq = rijsq_packed(i)
+                                inv_rij = Recip_Sqrt(this_rijsq)
+                                rij = inv_rij * this_rijsq
+                                i_qq_energy = i_qq_energy + ERFC(alpha_ewald(this_box)*rij) * inv_rij * jcharge_coul(i)
+                        END DO
+                CASE(charge_dsf)
+                        DO i = 1, n_coul
+                                this_rijsq = rijsq_packed(i)
+                                inv_rij = Recip_Sqrt(this_rijsq)
+                                rij = inv_rij * this_rijsq
+                                i_qq_energy = i_qq_energy + (dsf_factor2(this_box) * (rij - rcut_coul(this_box)) - &
+                                        dsf_factor1(this_box) + &
+                                        ERFC(alpha_dsf(this_box)*rij) * inv_rij) * jcharge_coul(i)
+                        END DO
+                CASE DEFAULT
+                        DO i = 1, n_coul
+                                this_rijsq = rijsq_packed(i)
+                                inv_rij = Recip_Sqrt(this_rijsq)
+                                i_qq_energy = i_qq_energy + inv_rij*jcharge_coul(i)
+                        END DO
+                END SELECT
+                qq_energy = qq_energy + i_qq_energy * icharge_factor
+                IF (this_est_emax) THEN
+                        up_nrg_vec(1:vlen) = up_nrg_vec(1:vlen) + UNPACK(jcharge_coul(1:n_coul)*icharge_factor/SQRT(rijsq_packed(1:n_coul)), coul_mask(1:vlen), zero_field(1:vlen))
+                END IF
+        END IF
+        IF (this_est_emax) THEN
+                Eij_max  = MAX(Eij_max, MAXVAL(up_nrg_vec(1:vlen)))
+        END IF
+    END DO
+  END SUBROUTINE Compute_Molecule_Nonbond_Inter_Energy_Vectorized_Widom
+
+  ELEMENTAL FUNCTION Recip_Sqrt(x) RESULT(rsqrt)
+          !DIR$ ATTRIBUTES FORCEINLINE :: Recip_Sqrt
+          ! Double precision fast inverse square root algorithm
+          ! Not beneficial for single precision these days because reciprocal square root is usually
+          !     implemented as a hardware intrinsic in single precision for modern CPUs.
+          ! However, it's only a hardware intrinsic in double precision for CPUs with AVX-512 instruction set, 
+          !     which is still relatively uncommon and is currently missing from all non-Intel CPUs.
+          ! Using this method is faster than using both a sqare root operation and a division or reciprocal operation, 
+          !     according to tests I've run on both Intel and AMD CPUs with the AVX2 instruction set.
+          ! If you need both the square root and the reciprocal sqare root of a number (x), use this function to get the 
+          !     reciprocal sqare root, then multiply the reciprocal square root with the original number (x) 
+          !     to get the square root of x.
+          ! I'm not sure whether it will behave properly if x is infinity.  I haven't tested that yet.
+          REAL(DP), INTENT(IN) :: x
+          INTEGER(INT64), PARAMETER :: magic_number = INT(Z'5FE6EB50C7B537A9',INT64)
+          REAL(DP), PARAMETER :: threehalves = 1.5_DP
+          REAL(DP) :: x2, rsqrt
+          x2 = x * 0.5_DP
+          rsqrt = TRANSFER(magic_number - ISHFT(TRANSFER(x,magic_number),-1),rsqrt)
+          rsqrt = rsqrt * (threehalves - x2*rsqrt*rsqrt)
+          rsqrt = rsqrt * (threehalves - x2*rsqrt*rsqrt)
+          rsqrt = rsqrt * (threehalves - x2*rsqrt*rsqrt)
+          rsqrt = rsqrt * (threehalves - x2*rsqrt*rsqrt)
+  END FUNCTION Recip_Sqrt
+
+
+  SUBROUTINE Compute_Molecule_Nonbond_Inter_Energy_Vectorized(im,is, &
+    i_vdw_energy,i_qq_energy,overlap)
+    ! Arguments
+    INTEGER, INTENT(IN) :: im, is
+    REAL(DP), INTENT(OUT) :: i_vdw_energy, i_qq_energy
+    LOGICAL, INTENT(OUT) :: overlap
+    ! End Arguments
+    INTEGER, DIMENSION(COUNT(atom_list(1:natoms(is),im,is)%exist)) :: which_i_exist, iatomtypes
+    REAL(DP), DIMENSION(COUNT(atom_list(1:natoms(is),im,is)%exist)) :: irsxp_vec, irsyp_vec, irszp_vec
+    REAL(DP), DIMENSION(3) :: irp_com, isp_com
+    !LOGICAL, DIMENSION(MAXVAL(nmols(:,this_box))), TARGET :: spec_live_tgt
+    !INTEGER, DIMENSION(MAXVAL(nlive(:,this_box))) :: which_interact
+    LOGICAL :: this_est_emax, l_get_rij_min, l_ortho
+    INTEGER :: ia_counter, i, j, js, n_i_exist, istart, iend, jnlive, orig_vlen, n_j_exist
+    INTEGER :: bsolvent, istart_base, natoms_js, ti_solvent, ja, n_coul, n_vdw, ia, ia0, live_vlen, jnmols, jnatoms
+    REAL(DP) :: mol_rcut, max_dcom_i_const, this_vdw_rcutsq, this_coul_rcutsq, this_coul_rcut
+
+    INTEGER :: this_int_vdw_style, ibox, this_box
+
+    LOGICAL :: get_vdw, get_qq, i_get_qq
+    LOGICAL(1) :: l_interact
+    REAL(DP) :: dxcom, dycom, dzcom, dscom, ixcom, iycom, izcom
+    REAL(DP) :: dsxcom, dsycom, dszcom, jrp
+    REAL(DP) :: xl, yl, zl, hxl, hyl, hzl
+    REAL(DP) :: h11,h21,h31,h12,h22,h32,h13,h23,h33
+    REAL(DP) :: inv_h11,inv_h21,inv_h31,inv_h12,inv_h22,inv_h32,inv_h13,inv_h23,inv_h33
+
+    INTEGER :: j_limit, j_limit_2, ibase,ibase2,jloc,k
+    REAL(DP) :: max_dcom
+
+    LOGICAL(1), DIMENSION(MAXVAL(max_molecules)) :: interact_vec, all_interact_vec
+    INTEGER, DIMENSION(MAXVAL(max_molecules)) :: live_locates, which_may_interact_p, &
+            which_all_interact_p, which_interact
+    INTEGER :: n_all_interact, n_may_interact, n_interact, n_all_interact_p, n_may_interact_p
+    REAL(DP), DIMENSION(4,MAXVAL(max_molecules)) :: live_rcom
+
+    INTEGER, DIMENSION(MAXVAL(natoms)) :: jatomtypes
+
+    LOGICAL :: lj_charmm, lj_cut_shift, lj_cut_switch, lj_cut_shift_force, lj_cut, mie_cut_shift, mie_cut, l_charge_ewald, l_charge_dsf
+    LOGICAL :: l_charge_cut, need_sqrt, l_pair_store, ij_overlap, i_overlap, l_order_ij
+
+    INTEGER :: jm, jsl, isl, jsl_base, j_interact
+    INTEGER(2) :: natompairs, ji, n_vdw_p
+    REAL(DP) :: dxp, dyp, dzp, rijsq, rij, vdw_energy, qq_energy
+    REAL(DP) :: rxp, ryp, rzp, sxp, syp, szp
+
+    REAL(DP), DIMENSION(atompairdim) :: irxp, iryp, irzp, jrxp, jryp, jrzp, cfqq_vec, rij_vec, rijsq_packed
+    REAL(DP), DIMENSION(atompairdim) :: vdw_p1,vdw_p2,vdw_p3,vdw_p4,vdw_p5
+    REAL(DP), DIMENSION(MAX(atompairdim,MAXVAL(max_molecules))) :: rijsq_vec, rijsq_svec
+    REAL(DP), DIMENSION(atompairdim,5) :: vdw_p, vdw_p_packed, vdw_p_nonzero
+    REAL(DP) :: dsxp,dsyp,dszp,eps,sigsq,sigbyr2,sigbyr6,sigbyr12,rterm,rterm2
+    REAL(DP) :: jsxp, jsyp, jszp, dsp
+    REAL(DP) :: negsigsq,negsigbyr2,roffsq_rijsq
+    REAL(DP) :: epsig_n,epsig_m,mie_n,mie_m
+    REAL(DP) :: shift_p1, shift_p2, cfqq
+
+    INTEGER :: chunksize, nthreads_used, im_thread, jbase, ithread, n_interact_p, cni, j_end_shared
+    INTEGER(2) :: ji_end,ji_start,vlen,ji_end_2,ji_base_2
+    LOGICAL(1), DIMENSION(MAX(atompairdim,mol_dim)) :: vdw_interact_vec, qq_interact_vec
+
+    REAL(DP), DIMENSION(mol_dim,MAX(0,MAXVAL(natoms,MASK=natoms .LE. nmols(:,molecule_list(im,is)%which_box))),3) :: jrp_perm
+    LOGICAL :: l_molvectorized,ij_get_vdw,ij_get_qq,l_getdrcom,l_vdw_packed,l_vdw_gather,l_vdw_strided,l_masked
+    LOGICAL :: l_shortvdw, l_shortcoul, l_notsamecut
+    REAL(DP) :: pair_max_rcutsq,pair_min_rcutsq,nrg,dsf_const,i_max_dcom
+    INTEGER :: itype,jtype,maxvlen,minvlen,vdw_vlen,qq_vlen, n_interact_qq_pairstore, n_interact_vdw_pairstore
+    INTEGER, DIMENSION(atompairdim) :: which_have_vdw
+    INTEGER :: atompair_vlen, ji_base, ji_stride
+    REAL(DP), DIMENSION(mol_dim) :: mol_vdw_energy_p, mol_qq_energy_p
+    REAL(DP), DIMENSION(:), ALLOCATABLE :: mol_vdw_energy, mol_qq_energy
+
+    REAL(DP) :: alpharsq, alpha_sq, ewald_const, numer, denom
+
+    INTEGER :: i_selector
+    LOGICAL :: l_read_svec, switchflag
+    INTEGER(1) :: ji_start_byte, ji_byte
+
+    !DIR$ ASSUME_ALIGNED jrxp:array_align_bytes, jryp:array_align_bytes, jrzp:array_align_bytes, irxp:array_align_bytes, iryp:array_align_bytes, irzp:array_align_bytes, cfqq_vec:array_align_bytes, rijsq_vec:array_align_bytes, rij_vec:array_align_bytes
+    !DIR$ ASSUME_ALIGNED vdw_p:array_align_bytes, jrp_perm:array_align_bytes, vdw_p_packed:array_align_bytes, vdw_p_nonzero:array_align_bytes, interact_vec:array_align_bytes, all_interact_vec:array_align_bytes
+    !DIR$ ASSUME_ALIGNED rijsq_packed:array_align_bytes
+    !DIR$ ASSUME_ALIGNED vdw_p1:array_align_bytes, vdw_p2:array_align_bytes, vdw_p3:array_align_bytes, vdw_p4:array_align_bytes, vdw_p5:array_align_bytes
+    !DIR$ ASSUME (MOD(atompairdim,dimpad_4byte) .EQ. 0)
+    !DIR$ ASSUME (MOD(mol_dim,dimpad_4byte) .EQ. 0)
+
+    i_vdw_energy = 0.0_DP
+    i_qq_energy = 0.0_DP
+    overlap = .FALSE.
+    i_overlap = .FALSE.
+    this_box = molecule_list(im,is)%which_box
+    ibox = this_box
+    n_vdw_p = 0
+    n_i_exist = 0
+    DO i = 1, natoms(is)
+        IF (atom_list(i,im,is)%exist) THEN
+                n_i_exist = n_i_exist + 1
+                which_i_exist(n_i_exist) = i
+        END IF
+    END DO
+    l_ortho = box_list(this_box)%int_box_shape <= int_ortho
+
+
+    ithread = 1
+
+    nthreads_used = 1
+
+
+
+    !$OMP PARALLEL PRIVATE(jm,jrxp,jryp,jrzp,dsp,dxp,dyp,dzp,i,j,k,ji,rijsq,rij) &
+    !$OMP PRIVATE(alpharsq,numer,denom) &
+    !$OMP PRIVATE(vdw_energy,qq_energy,jsl,ij_overlap) &
+    !$OMP PRIVATE(jsxp,jsyp,jszp,eps,sigsq,sigbyr2,sigbyr6,sigbyr12,rterm,rterm2) &
+    !$OMP PRIVATE(negsigsq,negsigbyr2,roffsq_rijsq) &
+    !$OMP PRIVATE(epsig_n,epsig_m,mie_n,mie_m) &
+    !$OMP PRIVATE(l_interact, shift_p1, shift_p2, cfqq) &
+    !$OMP PRIVATE(rijsq_vec,rijsq_svec, rij_vec) &
+    !$OMP PRIVATE(js,jnmols,get_qq,natompairs,jsl_base,jnlive,chunksize,nthreads_used) &
+    !$OMP PRIVATE(j_limit,j_limit_2,im_thread,jbase,ithread) &
+    !$OMP PRIVATE(dxcom,dycom,dzcom,max_dcom,dsxcom,dsycom,dszcom) &
+    !$OMP PRIVATE(n_may_interact_p,which_may_interact_p,cni,need_sqrt) &
+    !$OMP PRIVATE(n_all_interact_p,which_all_interact_p) &
+    !$OMP PRIVATE(ji_end,ji_start,vlen,ji_end_2,ji_base_2) &
+    !$OMP PRIVATE(rijsq_packed,vdw_p_packed) &
+    !$OMP PRIVATE(l_molvectorized,ij_get_vdw,ij_get_qq,l_getdrcom) &
+    !$OMP PRIVATE(l_vdw_packed,l_vdw_gather,l_vdw_strided,l_masked) &
+    !$OMP PRIVATE(rxp,ryp,rzp,pair_max_rcutsq,pair_min_rcutsq,nrg) &
+    !$OMP PRIVATE(itype,jtype,maxvlen,minvlen,vdw_vlen,qq_vlen,atompair_vlen) &
+    !$OMP PRIVATE(qq_interact_vec,vdw_interact_vec) &
+    !$OMP PRIVATE(mol_vdw_energy_p,mol_qq_energy_p) &
+    !$OMP PRIVATE(l_read_svec, i_selector, switchflag) &
+    !$OMP PRIVATE(ji_start_byte,ji_byte) &
+    !$OMP PRIVATE(ji_base, ji_stride) &
+    !$OMP PRIVATE(vdw_p1,vdw_p2,vdw_p3,vdw_p4,vdw_p5) &
+    !$OMP PRIVATE(ia,ia0,ja,ibase) &
+    !$OMP REDUCTION(+: i_vdw_energy,i_qq_energy) &
+    !$OMP REDUCTION(.OR.: i_overlap)
+
+    i_overlap = .FALSE.
+    i_vdw_energy = 0.0_DP
+    i_qq_energy = 0.0_DP
+
+
+    !$ ithread = OMP_GET_THREAD_NUM() + 1
+
+    !$ nthreads_used = OMP_GET_NUM_THREADS()
+
+    !DIR$ ASSUME (MOD(atompairdim,dimpad_4byte) .EQ. 0)
+    !DIR$ ASSUME (MOD(mol_dim,dimpad_4byte) .EQ. 0)
+
+    !$OMP SECTIONS
+    !$OMP SECTION
+    l_order_ij = n_i_exist .GE. 4 .AND. l_ortho
+    IF (l_ortho) THEN
+            xl = box_list(this_box)%length(1,1)
+            yl = box_list(this_box)%length(2,2)
+            zl = box_list(this_box)%length(3,3)
+            hxl = 0.5 * xl
+            hyl = 0.5 * yl
+            hzl = 0.5 * zl
+    ELSE
+            h11 = box_list(this_box)%length(1,1)
+            h21 = box_list(this_box)%length(2,1)
+            h31 = box_list(this_box)%length(3,1)
+            h12 = box_list(this_box)%length(1,2)
+            h22 = box_list(this_box)%length(2,2)
+            h32 = box_list(this_box)%length(3,2)
+            h13 = box_list(this_box)%length(1,3)
+            h23 = box_list(this_box)%length(2,3)
+            h33 = box_list(this_box)%length(3,3)
+            inv_h11 = box_list(this_box)%length_inv(1,1)
+            inv_h21 = box_list(this_box)%length_inv(2,1)
+            inv_h31 = box_list(this_box)%length_inv(3,1)
+            inv_h12 = box_list(this_box)%length_inv(1,2)
+            inv_h22 = box_list(this_box)%length_inv(2,2)
+            inv_h32 = box_list(this_box)%length_inv(3,2)
+            inv_h13 = box_list(this_box)%length_inv(1,3)
+            inv_h23 = box_list(this_box)%length_inv(2,3)
+            inv_h33 = box_list(this_box)%length_inv(3,3)
+    END IF
+    DO i = 1, n_i_exist
+        IF (n_i_exist == natoms(is)) THEN
+                rxp = atom_list(i,im,is)%rp(1)
+                ryp = atom_list(i,im,is)%rp(2)
+                rzp = atom_list(i,im,is)%rp(3)
+        ELSE
+                ia = which_i_exist(i)
+                rxp = atom_list(ia,im,is)%rp(1)
+                ryp = atom_list(ia,im,is)%rp(2)
+                rzp = atom_list(ia,im,is)%rp(3)
+        END IF
+        IF (l_ortho) THEN
+                irsxp_vec(i) = rxp
+                irsyp_vec(i) = ryp
+                irszp_vec(i) = rzp
+        ELSE
+                sxp = inv_h11*rxp
+                sxp = sxp + inv_h12*ryp
+                syp = inv_h22*ryp
+                sxp = sxp + inv_h13*rzp
+                syp = syp + inv_h23*rzp
+                szp = inv_h33*rzp
+                irsxp_vec(i) = rxp
+                irsyp_vec(i) = ryp
+                irszp_vec(i) = rzp
+        END IF
+    END DO
+    !$OMP SECTION
+    lj_charmm = .FALSE.
+    lj_cut_shift = .FALSE.
+    lj_cut_switch = .FALSE.
+    lj_cut_shift_force = .FALSE.
+    lj_cut = .FALSE.
+    mie_cut_shift = .FALSE.
+    mie_cut = .FALSE.
+    get_vdw = int_vdw_style(this_box) .NE. vdw_none
+    IF (get_vdw) THEN
+            this_int_vdw_style = int_vdw_style(this_box)
+            IF (this_int_vdw_style == vdw_lj) THEN
+                    SELECT CASE (int_vdw_sum_style(this_box))
+                        CASE (vdw_charmm)
+                                lj_charmm = .TRUE.
+                                n_vdw_p = 2
+                        CASE (vdw_cut_shift)
+                                lj_cut_shift = .TRUE.
+                                n_vdw_p = 3
+                        CASE (vdw_cut_switch)
+                                lj_cut_switch = .TRUE.
+                                n_vdw_p = 2
+                        CASE (vdw_cut_shift_force)
+                                lj_cut_shift_force = .TRUE.
+                                n_vdw_p = 4
+                        CASE DEFAULT
+                                lj_cut = .TRUE.
+                                n_vdw_p = 2
+                    END SELECT
+            ELSE IF (this_int_vdw_style == vdw_mie) THEN
+                    IF (int_vdw_sum_style(this_box) .EQ. vdw_cut_shift) THEN
+                            mie_cut_shift = .TRUE.
+                            n_vdw_p = 5
+                    ELSE
+                            mie_cut = .TRUE.
+                            n_vdw_p = 4
+                    END IF
+            END IF
+    END IF
+    l_charge_ewald = .FALSE.
+    l_charge_dsf = .FALSE.
+    l_charge_cut = .FALSE.
+    i_get_qq = int_charge_style(this_box) .NE. charge_none .AND. has_charge(is) .AND. &
+            (species_list(is)%l_coul_cbmc .OR. .NOT. cbmc_flag)
+    IF (i_get_qq) THEN
+            SELECT CASE (int_charge_sum_style(this_box))
+                CASE (charge_ewald)
+                        l_charge_ewald = .TRUE.
+                        !ewald_const = alpha_ewald(this_box)/rootPI
+                        !alpha_sq = alpha_ewald(this_box)*alpha_ewald(this_box)
+                CASE (charge_dsf)
+                        l_charge_dsf = .TRUE.
+                        dsf_const = -dsf_factor1(this_box) - rcut_coul(this_box)*dsf_factor2(this_box)
+                CASE DEFAULT
+                        l_charge_cut = .TRUE.
+            END SELECT
+    END IF
+    IF (cbmc_flag) THEN
+            mol_rcut = rcut_cbmc(this_box)
+            this_vdw_rcutsq = rcut_cbmcsq(this_box)
+            this_coul_rcutsq = rcut_cbmcsq(this_box)
+            this_coul_rcut = rcut_cbmc(this_box)
+            l_shortvdw = .FALSE.
+            l_shortcoul = .FALSE.
+            l_notsamecut = .FALSE.
+    ELSE
+            mol_rcut = rcut_max(this_box)
+            IF (int_vdw_sum_style(this_box) == vdw_cut_switch) THEN
+                    this_vdw_rcutsq = roff_switch_sq(this_box)
+            ELSE IF (get_vdw) THEN
+                    this_vdw_rcutsq = rcut_vdwsq(this_box)
+            ELSE
+                    this_vdw_rcutsq = 0.0_DP
+            END IF
+            IF (i_get_qq) THEN
+                    this_coul_rcutsq = rcut_coulsq(this_box)
+            ELSE
+                    this_coul_rcutsq = 0.0_DP
+            END IF
+            !this_coul_rcut = rcut_coul(this_box)
+            l_shortcoul = this_coul_rcutsq < this_vdw_rcutsq
+            l_shortvdw = this_vdw_rcutsq < this_coul_rcutsq
+            l_notsamecut = l_shortcoul .OR. l_shortvdw
+    END IF
+    i_max_dcom = molecule_list(im,is)%rcom(4)
+    max_dcom_i_const = i_max_dcom + mol_rcut
+    ixcom = molecule_list(im,is)%rcom(1)
+    iycom = molecule_list(im,is)%rcom(2)
+    izcom = molecule_list(im,is)%rcom(3)
+    l_pair_store = l_pair_nrg .AND. .NOT. cbmc_flag
+    IF (l_pair_store) THEN
+            isl = species_list(is)%superlocate_base + im
+    END IF
+    !$OMP END SECTIONS
+    DO js = 1, nspecies
+            jnmols = nmols(js,ibox)
+            IF (jnmols == 0) CYCLE
+            get_qq = i_get_qq .AND. has_charge(js)
+            jnatoms = natoms(js)
+            natompairs = jnatoms*n_i_exist
+            l_getdrcom = natompairs > 3 ! I don't exactly know what the threshold should be. Subject to change.
+            IF (open_mc_flag .OR. ibox > 1) THEN
+                    IF (l_pair_store) THEN
+                            jsl_base = species_list(js)%superlocate_base
+                            !$OMP WORKSHARE
+                            pair_nrg_vdw(locate(1:jnmols,js,ibox)+jsl_base,isl) = 0.0_DP
+                            pair_nrg_qq(locate(1:jnmols,js,ibox)+jsl_base,isl) = 0.0_DP
+                            pair_nrg_vdw(isl,locate(1:jnmols,js,ibox)+jsl_base) = 0.0_DP
+                            pair_nrg_qq(isl,locate(1:jnmols,js,ibox)+jsl_base) = 0.0_DP
+                            !$OMP END WORKSHARE
+                    END IF
+                    IF (l_not_all_live) THEN
+                            !$OMP SINGLE
+                            molecule_list(im,is)%live = .FALSE.
+                            jnlive = 0
+                            DO j = 1, jnmols
+                                jm = locate(j,js,ibox)
+                                IF (molecule_list(jm,js)%live) THEN
+                                        jnlive = jnlive + 1
+                                        live_locates(jnlive) = jm
+                                        IF (l_getdrcom) live_rcom(:,jnlive) = molecule_list(jm,js)%rcom
+                                END IF
+                            END DO
+                            molecule_list(im,is)%live = .TRUE.
+                            !$OMP END SINGLE
+                    ELSE IF (is .EQ. js) THEN
+                            jnlive = jnmols - 1
+                            !$OMP SINGLE
+                            j_end_shared = jnmols + 1
+                            !$OMP END SINGLE
+                            !$OMP DO SIMD SCHEDULE(STATIC) REDUCTION(MIN:j_end_shared)
+                            DO j = 1, jnmols
+                                IF (locate(j,js,ibox) .EQ. im) j_end_shared = MIN(j_end_shared,j)
+                            END DO
+                            !$OMP END DO SIMD
+                            !$OMP WORKSHARE
+                            live_locates(1:j_end_shared-1) = locate(1:j_end_shared-1,js,ibox)
+                            live_locates(j_end_shared:jnlive) = locate(j_end_shared+1:jnmols,js,ibox)
+                            !$OMP END WORKSHARE
+                            IF (l_getdrcom) THEN
+                                    !$OMP DO SCHEDULE(STATIC)
+                                    DO j = 1, jnlive
+                                        live_rcom(:,j) = molecule_list(live_locates(j),js)%rcom
+                                    END DO
+                                    !$OMP END DO
+                            END IF
+                    ELSE
+                            jnlive = jnmols
+                            !$OMP WORKSHARE
+                            live_locates(1:jnlive) = locate(1:jnlive,js,ibox)
+                            !$OMP END WORKSHARE NOWAIT
+                            IF (l_getdrcom) THEN
+                                    !$OMP DO SCHEDULE(STATIC)
+                                    DO j = 1, jnlive
+                                            live_rcom(:,j) = molecule_list(locate(j,js,ibox),js)%rcom
+                                    END DO
+                                    !$OMP END DO
+                            END IF
+                    END IF
+            ELSE
+                    jnlive = jnmols
+                    IF (l_pair_store) THEN
+                            jsl_base = species_list(js)%superlocate_base
+                            !$OMP WORKSHARE
+                            pair_nrg_vdw(jsl_base+1:jsl_base+jnmols,isl) = 0.0_DP
+                            pair_nrg_qq(jsl_base+1:jsl_base+jnmols,isl) = 0.0_DP
+                            pair_nrg_vdw(isl,jsl_base+1:jsl_base+jnmols) = 0.0_DP
+                            pair_nrg_qq(isl,jsl_base+1:jsl_base+jnmols) = 0.0_DP
+                            !$OMP END WORKSHARE
+                    END IF
+            END IF
+            IF (l_getdrcom) THEN
+                    IF (l_ortho) THEN
+                            !$OMP DO SIMD SCHEDULE(SIMD:STATIC) PRIVATE(dxcom,dycom,dzcom,max_dcom)
+                            DO j = 1, jnlive
+                                    IF (open_mc_flag) THEN
+                                            dxcom = live_rcom(1,j)
+                                            dycom = live_rcom(2,j)
+                                            dzcom = live_rcom(3,j)
+                                            max_dcom = live_rcom(4,j)
+                                    ELSE
+                                            dxcom = molecule_list(j,js)%rcom(1)
+                                            dycom = molecule_list(j,js)%rcom(2)
+                                            dzcom = molecule_list(j,js)%rcom(3)
+                                            max_dcom = molecule_list(j,js)%rcom(4)
+                                    END IF
+                                    dxcom = ABS(dxcom - ixcom)
+                                    dycom = ABS(dycom - iycom)
+                                    dzcom = ABS(dzcom - izcom)
+                                    IF (dxcom > hxl) dxcom = dxcom - xl
+                                    IF (dycom > hyl) dycom = dycom - yl
+                                    IF (dzcom > hzl) dzcom = dzcom - zl
+                                    ! Repurposing dxcom as rijsq accumulator to enforce FMA3 instructions if supported
+                                    dxcom = dxcom * dxcom
+                                    dxcom = dxcom + dycom * dycom
+                                    dxcom = dxcom + dzcom * dzcom
+                                    dxcom = SQRT(dxcom) ! rij
+                                    max_dcom = max_dcom + i_max_dcom
+                                    interact_vec(j) = mol_rcut > dxcom - max_dcom
+                                    all_interact_vec(j) = mol_rcut > dxcom + max_dcom
+                                    !l_interact = max_dcom_i_const > &
+                                    !        SQRT(dxcom) - max_dcom
+                            END DO
+                            !$OMP END DO SIMD
+                    ELSE
+                            IF (open_mc_flag) THEN
+                                    !$OMP DO SIMD SCHEDULE(SIMD:STATIC) PRIVATE(dxcom,dycom,dzcom,max_dcom,dsxcom,dsycom,dszcom)
+                                    DO j = 1, jnlive
+                                            dxcom = live_rcom(1,j)
+                                            dycom = live_rcom(2,j)
+                                            dzcom = live_rcom(3,j)
+                                            max_dcom = live_rcom(4,j)
+                                            dxcom = dxcom - ixcom
+                                            dsxcom = inv_h11*dxcom
+                                            dycom = dycom - iycom
+                                            dsxcom = dsxcom + inv_h12*dycom
+                                            dsycom = inv_h22*dycom
+                                            dzcom = dzcom - izcom
+                                            dsxcom = dsxcom + inv_h13*dzcom
+                                            dsycom = dsycom + inv_h23*dzcom
+                                            dszcom = inv_h33*dzcom
+                                            IF (dsxcom > 0.5_DP) THEN
+                                                    dsxcom = dsxcom - 1.0_DP
+                                            ELSE IF (dsxcom < -0.5_DP) THEN
+                                                    dsxcom = dsxcom + 1.0_DP
+                                            END IF
+                                            IF (dsycom > 0.5_DP) THEN
+                                                    dsycom = dsycom - 1.0_DP
+                                            ELSE IF (dsycom < -0.5_DP) THEN
+                                                    dsycom = dsycom + 1.0_DP
+                                            END IF
+                                            IF (dszcom > 0.5_DP) THEN
+                                                    dszcom = dszcom - 1.0_DP
+                                            ELSE IF (dszcom < -0.5_DP) THEN
+                                                    dszcom = dszcom + 1.0_DP
+                                            END IF
+                                            dxcom = h11*dsxcom
+                                            dxcom = dxcom + h12*dsycom
+                                            dycom = h22*dsycom
+                                            dxcom = dxcom + h13*dszcom
+                                            dycom = dycom + h23*dszcom
+                                            dzcom = h33*dszcom
+                                            ! Repurposing dxcom as rijsq accumulator to enforce FMA3 instructions if supported
+                                            dxcom = dxcom * dxcom
+                                            dxcom = dxcom + dycom * dycom
+                                            dxcom = dxcom + dzcom * dzcom
+                                            dxcom = SQRT(dxcom) ! rij
+                                            max_dcom = max_dcom + i_max_dcom
+                                            interact_vec(j) = mol_rcut > dxcom - max_dcom
+                                            all_interact_vec(j) = mol_rcut > dxcom + max_dcom
+                                    END DO
+                                    !$OMP END DO SIMD
+                            ELSE
+                                    !$OMP DO SIMD SCHEDULE(SIMD:STATIC) PRIVATE(dxcom,dycom,dzcom,max_dcom,dsxcom,dsycom,dszcom)
+                                    DO j = 1, jnlive
+                                            dxcom = molecule_list(j,js)%rcom(1)
+                                            dycom = molecule_list(j,js)%rcom(2)
+                                            dzcom = molecule_list(j,js)%rcom(3)
+                                            max_dcom = molecule_list(j,js)%rcom(4)
+                                            dxcom = dxcom - ixcom
+                                            dsxcom = inv_h11*dxcom
+                                            dycom = dycom - iycom
+                                            dsxcom = dsxcom + inv_h12*dycom
+                                            dsycom = inv_h22*dycom
+                                            dzcom = dzcom - izcom
+                                            dsxcom = dsxcom + inv_h13*dzcom
+                                            dsycom = dsycom + inv_h23*dzcom
+                                            dszcom = inv_h33*dzcom
+                                            IF (dsxcom > 0.5_DP) THEN
+                                                    dsxcom = dsxcom - 1.0_DP
+                                            ELSE IF (dsxcom < -0.5_DP) THEN
+                                                    dsxcom = dsxcom + 1.0_DP
+                                            END IF
+                                            IF (dsycom > 0.5_DP) THEN
+                                                    dsycom = dsycom - 1.0_DP
+                                            ELSE IF (dsycom < -0.5_DP) THEN
+                                                    dsycom = dsycom + 1.0_DP
+                                            END IF
+                                            IF (dszcom > 0.5_DP) THEN
+                                                    dszcom = dszcom - 1.0_DP
+                                            ELSE IF (dszcom < -0.5_DP) THEN
+                                                    dszcom = dszcom + 1.0_DP
+                                            END IF
+                                            dxcom = h11*dsxcom
+                                            dxcom = dxcom + h12*dsycom
+                                            dycom = h22*dsycom
+                                            dxcom = dxcom + h13*dszcom
+                                            dycom = dycom + h23*dszcom
+                                            dzcom = h33*dszcom
+                                            ! Repurposing dxcom as rijsq accumulator to enforce FMA3 instructions if supported
+                                            dxcom = dxcom * dxcom
+                                            dxcom = dxcom + dycom * dycom
+                                            dxcom = dxcom + dzcom * dzcom
+                                            dxcom = SQRT(dxcom) ! rij
+                                            max_dcom = max_dcom + i_max_dcom
+                                            interact_vec(j) = mol_rcut > dxcom - max_dcom
+                                            all_interact_vec(j) = mol_rcut > dxcom + max_dcom
+                                    END DO
+                                    !$OMP END DO SIMD
+                            END IF
+                    END IF
+                    IF (is == js .AND. .NOT. open_mc_flag) THEN
+                            !$OMP SINGLE
+                            interact_vec(im) = .FALSE.
+                            all_interact_vec(im) = .FALSE.
+                            !$OMP END SINGLE
+                    END IF
+                    IF (nthreads_used > 1) THEN
+                            !$OMP WORKSHARE
+                            n_interact = COUNT(interact_vec(1:jnlive))
+                            n_all_interact = COUNT(all_interact_vec(1:jnlive))
+                            n_may_interact = n_interact - n_all_interact
+                            !$OMP END WORKSHARE
+                    END IF
+                    !$OMP SINGLE
+                    n_all_interact_p = 0
+                    n_may_interact_p = 0
+                    IF (open_mc_flag) THEN
+                            DO j = 1, jnlive
+                                IF (all_interact_vec(j)) THEN
+                                        n_all_interact_p = n_all_interact_p + 1
+                                        which_interact(n_all_interact_p) = live_locates(j)
+                                ELSE IF (interact_vec(j)) THEN
+                                        n_may_interact_p = n_may_interact_p + 1
+                                        which_may_interact_p(n_may_interact_p) = live_locates(j)
+                                END IF
+                            END DO
+                    ELSE
+                            DO j = 1, jnlive
+                                IF (all_interact_vec(j)) THEN
+                                        n_all_interact_p = n_all_interact_p + 1
+                                        which_interact(n_all_interact_p) = j
+                                ELSE IF (interact_vec(j)) THEN
+                                        n_may_interact_p = n_may_interact_p + 1
+                                        which_may_interact_p(n_may_interact_p) = j
+                                END IF
+                            END DO
+                    END IF
+                    ! The first n_all_interact molecules in which_interact are guaranteed to be fully in-range.
+                    ! The next n_may_interact molecules might have some atoms in range.
+                    which_interact(n_all_interact_p+1:n_all_interact_p+n_may_interact_p) = which_may_interact_p(1:n_may_interact_p)
+                    IF (nthreads_used < 2) THEN
+                            n_all_interact = n_all_interact_p
+                            n_may_interact = n_may_interact_p
+                            n_interact = n_all_interact_p + n_may_interact_p
+                    END IF
+                    !$OMP END SINGLE NOWAIT
+                    l_molvectorized = n_interact > natompairs
+                    IF (l_debug_print) WRITE(*,*) "n_interact = ", n_interact, n_all_interact, n_may_interact, l_molvectorized
+                    IF (l_debug_print) WRITE(*,*) n_all_interact_p, n_may_interact_p
+                    IF (l_debug_print) WRITE(*,*) "nthreads_used = ", nthreads_used
+            ELSE
+                    l_molvectorized = .TRUE.
+            END IF
+            IF (l_order_ij .AND. .NOT. l_molvectorized) THEN
+                    !$OMP WORKSHARE
+                    iatomtypes = nonbond_list(which_i_exist,is)%atom_type_number
+                    jatomtypes(1:jnatoms) = nonbond_list(1:jnatoms,js)%atom_type_number
+                    vdw_p(1:natompairs,1:n_vdw_p) = RESHAPE(ppvdwp_table(iatomtypes,jatomtypes(1:jnatoms),1:n_vdw_p,ibox), (/ natompairs, n_vdw_p /))
+                    irxp(1:natompairs) = RESHAPE(SPREAD(irsxp_vec(1:n_i_exist),2,jnatoms), (/ natompairs /))
+                    iryp(1:natompairs) = RESHAPE(SPREAD(irsyp_vec(1:n_i_exist),2,jnatoms), (/ natompairs /))
+                    irzp(1:natompairs) = RESHAPE(SPREAD(irszp_vec(1:n_i_exist),2,jnatoms), (/ natompairs /))
+                    !$OMP END WORKSHARE NOWAIT
+                    IF (get_qq) THEN
+                            !$OMP WORKSHARE
+                            cfqq_vec(1:natompairs) = RESHAPE(SPREAD(nonbond_list(1:jnatoms,js)%charge,1,n_i_exist) * &
+                                    SPREAD(nonbond_list(which_i_exist,is)%charge,2,jnatoms) * charge_factor, (/ natompairs /))
+                            !$OMP END WORKSHARE NOWAIT
+                    END IF
+            ELSE IF (.NOT. l_molvectorized) THEN
+                    !$OMP WORKSHARE
+                    iatomtypes = nonbond_list(which_i_exist,is)%atom_type_number
+                    jatomtypes(1:jnatoms) = nonbond_list(1:jnatoms,js)%atom_type_number
+                    vdw_p(1:natompairs,1:n_vdw_p) = RESHAPE(ppvdwp_table(jatomtypes(1:jnatoms),iatomtypes,1:n_vdw_p,ibox), (/ natompairs, n_vdw_p /))
+                    irxp(1:natompairs) = RESHAPE(SPREAD(irsxp_vec(1:n_i_exist),1,jnatoms), (/ natompairs /))
+                    iryp(1:natompairs) = RESHAPE(SPREAD(irsyp_vec(1:n_i_exist),1,jnatoms), (/ natompairs /))
+                    irzp(1:natompairs) = RESHAPE(SPREAD(irszp_vec(1:n_i_exist),1,jnatoms), (/ natompairs /))
+                    !$OMP END WORKSHARE NOWAIT
+                    IF (get_qq) THEN
+                            !$OMP WORKSHARE
+                            cfqq_vec(1:natompairs) = RESHAPE(SPREAD(nonbond_list(1:jnatoms,js)%charge,2,n_i_exist) * &
+                                    SPREAD(nonbond_list(which_i_exist,is)%charge,1,jnatoms) * charge_factor, (/ natompairs /))
+                            !$OMP END WORKSHARE NOWAIT
+                    END IF
+            END IF
+
+            need_sqrt = get_qq .OR. lj_cut_shift_force
+
+            !$OMP BARRIER
+            IF (l_molvectorized) THEN
+                    IF (l_debug_print) WRITE(*,*) "l_molvectorized"
+                    IF (l_getdrcom) THEN
+                            !WRITE(*,*) SHAPE(jrp_perm)
+                            !WRITE(*,*) n_interact, jnatoms
+                            !$OMP WORKSHARE
+                            jrp_perm(1:n_interact,1:jnatoms,1) = TRANSPOSE(atom_list(1:jnatoms,which_interact(1:n_interact),js)%rp(1))
+                            jrp_perm(1:n_interact,1:jnatoms,2) = TRANSPOSE(atom_list(1:jnatoms,which_interact(1:n_interact),js)%rp(2))
+                            jrp_perm(1:n_interact,1:jnatoms,3) = TRANSPOSE(atom_list(1:jnatoms,which_interact(1:n_interact),js)%rp(3))
+                            !$OMP END WORKSHARE
+                            IF (l_debug_print) THEN
+                                    WRITE(*,*) im
+                                    WRITE(*,*) irsxp_vec
+                                    WRITE(*,*) irsyp_vec
+                                    WRITE(*,*) irszp_vec
+                                    WRITE(*,*)
+                                    WRITE(*,*) n_interact, jnatoms
+                                    DO i = 1, n_interact
+                                        WRITE(*,*) "Molecule ", which_interact(i)
+                                        DO j = 1, jnatoms
+                                                WRITE(*,*) jrp_perm(i,j,1:3)
+                                        END DO
+                                    END DO
+                            END IF
+                    ELSE IF (open_mc_flag) THEN
+                            !$OMP WORKSHARE
+                            n_interact = jnlive
+                            n_all_interact = 0
+                            n_may_interact = n_interact
+                            jrp_perm(1:jnlive,1:jnatoms,1) = TRANSPOSE(atom_list(1:jnatoms,live_locates(1:jnlive),js)%rp(1))
+                            jrp_perm(1:jnlive,1:jnatoms,2) = TRANSPOSE(atom_list(1:jnatoms,live_locates(1:jnlive),js)%rp(2))
+                            jrp_perm(1:jnlive,1:jnatoms,3) = TRANSPOSE(atom_list(1:jnatoms,live_locates(1:jnlive),js)%rp(3))
+                            !$OMP END WORKSHARE
+                    ELSE
+                            IF (is .EQ. js) THEN
+                                    !$OMP SINGLE
+                                    n_interact = jnlive - 1
+                                    n_all_interact = 0
+                                    n_may_interact = n_interact
+                                    !$OMP END SINGLE NOWAIT
+                                    !$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
+                                    DO ja = 1, jnatoms
+                                        DO i = 1, 3
+                                                jrp_perm(1:im-1,ja,i) = atom_list(ja,1:im-1,js)%rp(i)
+                                                jrp_perm(im:jnlive-1,ja,i) = atom_list(ja,im+1:jnlive,js)%rp(i)
+                                        END DO
+                                    END DO
+                                    !$OMP END DO
+                            ELSE IF (l_ortho) THEN
+                                    !$OMP SINGLE
+                                    n_interact = jnlive
+                                    n_all_interact = 0
+                                    n_may_interact = n_interact
+                                    !$OMP END SINGLE NOWAIT
+                                    !$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
+                                    DO ja = 1, jnatoms
+                                        DO i = 1, 3
+                                                jrp_perm(1:jnlive,ja,i) = atom_list(ja,1:jnlive,js)%rp(i)
+                                        END DO
+                                    END DO
+                                    !$OMP END DO
+                            ELSE
+                                    !$OMP SINGLE
+                                    n_interact = jnlive
+                                    n_all_interact = 0
+                                    n_may_interact = n_interact
+                                    !$OMP END SINGLE
+                            END IF
+                    END IF
+                    IF (.NOT. l_ortho) THEN
+                            switchflag = l_getdrcom .OR. open_mc_flag .OR. is .EQ. js
+                            !$OMP DO SCHEDULE(STATIC)
+                            DO ja = 1, jnatoms
+                                DO j = 1, n_interact
+                                        IF (switchflag) THEN
+                                                rxp = jrp_perm(j,ja,1)
+                                                ryp = jrp_perm(j,ja,2)
+                                                rzp = jrp_perm(j,ja,3)
+                                        ELSE
+                                                rxp = atom_list(ja,j,js)%rp(1)
+                                                ryp = atom_list(ja,j,js)%rp(2)
+                                                rzp = atom_list(ja,j,js)%rp(3)
+                                        END IF
+                                        jsxp = inv_h11*rxp
+                                        jsxp = jsxp + inv_h12*ryp
+                                        jsyp = inv_h22*ryp
+                                        jsxp = jsxp + inv_h13*rzp
+                                        jsyp = jsyp + inv_h23*rzp
+                                        jszp = inv_h33*rzp
+                                        jrp_perm(j,ja,1) = jsxp
+                                        jrp_perm(j,ja,2) = jsyp
+                                        jrp_perm(j,ja,3) = jszp
+                                END DO
+                            END DO
+                            !$OMP END DO NOWAIT
+                    END IF
+                    !$OMP SINGLE
+                    IF (get_vdw .AND. l_pair_store) THEN
+                            n_interact_vdw_pairstore = n_interact
+                    ELSE
+                            n_interact_vdw_pairstore = 1 ! reduction item cannot have zero length
+                    END IF
+                    IF (get_qq .AND. l_pair_store) THEN
+                            n_interact_qq_pairstore = n_interact
+                    ELSE
+                            n_interact_qq_pairstore = 1 ! reduction item cannot have zero length
+                    END IF
+                    IF (.NOT. ALLOCATED(mol_vdw_energy)) THEN
+                            ALLOCATE(mol_vdw_energy(n_interact_vdw_pairstore))
+                    ELSE IF (SIZE(mol_vdw_energy,1) < n_interact_vdw_pairstore) THEN
+                            DEALLOCATE(mol_vdw_energy)
+                            ALLOCATE(mol_vdw_energy(n_interact_vdw_pairstore))
+                    END IF
+                    IF (.NOT. ALLOCATED(mol_qq_energy)) THEN
+                            ALLOCATE(mol_qq_energy(n_interact_qq_pairstore))
+                    ELSE IF (SIZE(mol_qq_energy,1) < n_interact_qq_pairstore) THEN
+                            DEALLOCATE(mol_qq_energy)
+                            ALLOCATE(mol_qq_energy(n_interact_qq_pairstore))
+                    END IF
+                    mol_vdw_energy = 0.0_DP
+                    mol_qq_energy = 0.0_DP
+                    !$OMP END SINGLE
+                    !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC) &
+                    !$OMP REDUCTION(+:mol_vdw_energy,mol_qq_energy)
+                    DO ja = 1, jnatoms
+                        DO ia = 1, n_i_exist
+                                !DIR$ ASSUME (MOD(atompairdim,dimpad_4byte) .EQ. 0)
+                                !DIR$ ASSUME (MOD(mol_dim,dimpad_4byte) .EQ. 0)
+                                IF (overlap .OR. i_overlap) CYCLE
+                                ia0 = which_i_exist(ia)
+                                itype = nonbond_list(ia0,is)%atom_type_number
+                                jtype = nonbond_list(ja,js)%atom_type_number
+                                ij_get_vdw = itype > 0 .AND. jtype > 0 .AND. get_vdw
+                                IF (i_get_qq) THEN
+                                        cfqq = nonbond_list(ia0,is)%charge * nonbond_list(ja,js)%charge * charge_factor
+                                        ij_get_qq = ABS(cfqq) > 0.0_DP
+                                ELSE
+                                        ij_get_qq = .FALSE.
+                                END IF
+                                IF (l_ortho) THEN
+                                        DO j = 1, n_interact
+                                                dxp = ABS(jrp_perm(j,ja,1) - irsxp_vec(ia))
+                                                dyp = ABS(jrp_perm(j,ja,2) - irsyp_vec(ia))
+                                                dzp = ABS(jrp_perm(j,ja,3) - irszp_vec(ia))
+                                                IF (dxp > hxl) dxp = dxp - xl
+                                                IF (dyp > hyl) dyp = dyp - yl
+                                                IF (dzp > hzl) dzp = dzp - zl
+                                                dxp = dxp*dxp
+                                                dxp = dxp + dyp*dyp
+                                                dxp = dxp + dzp*dzp
+                                                rijsq_vec(j) = dxp
+                                                IF (ij_get_vdw) vdw_interact_vec(j) = dxp < this_vdw_rcutsq
+                                                IF (ij_get_qq) qq_interact_vec(j) = dxp < this_coul_rcutsq
+                                                i_overlap = i_overlap .OR. dxp < rcut_lowsq
+                                        END DO
+                                ELSE
+                                        DO j = 1, n_interact
+                                                dsp = jrp_perm(j,ja,1) - irsxp_vec(ia)
+                                                IF (dsp > 0.5_DP) THEN
+                                                        dsp = dsp - 1.0_DP
+                                                ELSE IF (dsp < -0.5_DP) THEN
+                                                        dsp = dsp + 1.0_DP
+                                                END IF
+                                                dxp = h11*dsp
+                                                dsp = jrp_perm(j,ja,2) - irsyp_vec(ia)
+                                                IF (dsp > 0.5_DP) THEN
+                                                        dsp = dsp - 1.0_DP
+                                                ELSE IF (dsp < -0.5_DP) THEN
+                                                        dsp = dsp + 1.0_DP
+                                                END IF
+                                                dxp = dxp + h12*dsp
+                                                dyp = h22*dsp
+                                                dsp = jrp_perm(j,ja,3) - irszp_vec(ia)
+                                                IF (dsp > 0.5_DP) THEN
+                                                        dsp = dsp - 1.0_DP
+                                                ELSE IF (dsp < -0.5_DP) THEN
+                                                        dsp = dsp + 1.0_DP
+                                                END IF
+                                                dxp = dxp + h13*dsp
+                                                dyp = dyp + h23*dsp
+                                                dzp = h33*dsp
+                                                dxp = dxp*dxp
+                                                dxp = dxp + dyp*dyp
+                                                dxp = dxp + dzp*dzp
+                                                rijsq_vec(j) = dxp
+                                                IF (ij_get_vdw) vdw_interact_vec(j) = dxp < this_vdw_rcutsq
+                                                IF (ij_get_qq) qq_interact_vec(j) = dxp < this_coul_rcutsq
+                                                i_overlap = i_overlap .OR. dxp < rcut_lowsq
+                                        END DO
+                                END IF
+                                IF (l_debug_print) THEN
+                                        WRITE(*,*) "i_overlap = ", i_overlap
+                                        WRITE(*,*) "rijsq_vec = "
+                                        WRITE(*,*) rijsq_vec(1:n_interact)
+                                END IF
+                                IF (i_overlap) THEN
+                                        overlap = .TRUE.
+                                        CYCLE
+                                END IF
+                                IF (ij_get_vdw .AND. ij_get_qq) THEN
+                                        pair_max_rcutsq = MAX(this_vdw_rcutsq,this_coul_rcutsq)
+                                        pair_min_rcutsq = MIN(this_vdw_rcutsq,this_coul_rcutsq)
+                                        n_all_interact_p = n_all_interact
+                                ELSE IF (ij_get_vdw) THEN
+                                        pair_max_rcutsq = this_vdw_rcutsq
+                                        IF (l_shortvdw) THEN
+                                                n_all_interact_p = 0
+                                        ELSE
+                                                n_all_interact_p = n_all_interact
+                                        END IF
+                                ELSE IF (ij_get_qq) THEN
+                                        pair_max_rcutsq = this_coul_rcutsq
+                                        IF (l_shortcoul) THEN
+                                                n_all_interact_p = 0
+                                        ELSE
+                                                n_all_interact_p = n_all_interact
+                                        END IF
+                                END IF
+                                maxvlen = n_all_interact_p
+                                DO j = n_all_interact_p+1, n_interact
+                                        rijsq = rijsq_vec(j)
+                                        IF (rijsq < pair_max_rcutsq) THEN
+                                                maxvlen = maxvlen + 1
+                                                rijsq_vec(maxvlen) = rijsq
+                                        END IF
+                                END DO
+                                IF (ij_get_vdw .AND. ij_get_qq .AND. l_notsamecut) THEN
+                                        minvlen = 0
+                                        DO j = 1, maxvlen
+                                                rijsq = rijsq_vec(j)
+                                                IF (rijsq < pair_min_rcutsq) THEN
+                                                        minvlen = minvlen + 1
+                                                        rijsq_svec(minvlen) = rijsq
+                                                END IF
+                                        END DO
+                                        IF (l_shortcoul) THEN
+                                                vdw_vlen = maxvlen
+                                                qq_vlen = minvlen
+                                        ELSE
+                                                vdw_vlen = minvlen
+                                                qq_vlen = maxvlen
+                                        END IF
+                                ELSE
+                                        IF (ij_get_vdw) vdw_vlen = maxvlen
+                                        IF (ij_get_qq) qq_vlen = maxvlen
+                                END IF
+                                IF (ij_get_vdw) THEN
+                                        l_read_svec = ij_get_qq .AND. l_shortvdw
+                                        IF (l_debug_print) WRITE(*,*) "l_read_svec = ", l_read_svec
+                                        IF (lj_charmm) THEN
+                                                eps = ppvdwp_table2(1,itype,jtype,ibox) ! epsilon
+                                                sigsq = ppvdwp_table2(2,itype,jtype,ibox) ! sigma**2
+                                                vdw_energy = 0.0_DP
+                                                !$OMP SIMD PRIVATE(rijsq,sigbyr2,sigbyr6,sigbyr12) REDUCTION(+:vdw_energy)
+                                                DO j = 1, vdw_vlen
+                                                        IF (l_read_svec) THEN
+                                                                rijsq = rijsq_svec(j)
+                                                        ELSE
+                                                                rijsq = rijsq_vec(j)
+                                                        END IF
+                                                        sigbyr2 = sigsq/rijsq ! sigma was already squared
+                                                        sigbyr6 = sigbyr2*sigbyr2*sigbyr2
+                                                        IF (l_pair_store) THEN
+                                                                sigbyr12 = sigbyr6*sigbyr6
+                                                                sigbyr12 = sigbyr12 - 2.0_DP*sigbyr6
+                                                                mol_vdw_energy_p(j) = eps*sigbyr12
+                                                                vdw_energy = vdw_energy + sigbyr12
+                                                        ELSE
+                                                                vdw_energy = vdw_energy + sigbyr6*sigbyr6 !- 2.0_DP *sigbyr6
+                                                                vdw_energy = vdw_energy - 2.0_DP*sigbyr6
+                                                        END IF
+                                                END DO
+                                                !$OMP END SIMD
+
+                                                i_vdw_energy = i_vdw_energy + vdw_energy * eps
+                                        ELSE IF (lj_cut_shift) THEN
+                                                eps = ppvdwp_table2(1,itype,jtype,ibox) ! 4*epsilon
+                                                negsigsq = ppvdwp_table2(2,itype,jtype,ibox) ! -(sigma**2)
+                                                shift_p1 = ppvdwp_table2(3,itype,jtype,ibox)
+                                                vdw_energy = 0.0_DP
+                                                IF (l_read_svec) THEN
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + rterm
+                                                                        mol_vdw_energy_p(j) = eps*rterm - shift_p1
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + rterm
+                                                                END DO
+                                                        END IF
+                                                ELSE
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + rterm
+                                                                        mol_vdw_energy_p(j) = eps*rterm - shift_p1
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + rterm
+                                                                END DO
+                                                        END IF
+                                                END IF
+                                                i_vdw_energy = i_vdw_energy + vdw_energy*eps - vdw_vlen*shift_p1
+                                        ELSE IF (lj_cut_switch) THEN
+                                                eps = ppvdwp_table2(1,itype,jtype,ibox) ! 4*epsilon
+                                                negsigsq = ppvdwp_table2(2,itype,jtype,ibox) ! -(sigma**2)
+                                                vdw_energy = 0.0_DP
+                                                IF (l_read_svec) THEN
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        roffsq_rijsq = roff_switch_sq(this_box) - rijsq
+                                                                        roffsq_rijsq = roffsq_rijsq * roffsq_rijsq * switch_factor1(this_box) * &
+                                                                                (switch_factor2(this_box)+2.0_DP*rijsq)
+                                                                        roffsq_rijsq = MERGE(roffsq_rijsq, 1.0_DP, rijsq .GE. ron_switch_sq(this_box))
+                                                                        vdw_energy = vdw_energy + roffsq_rijsq*rterm
+                                                                        mol_vdw_energy_p(j) = eps*roffsq_rijsq*rterm
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        roffsq_rijsq = roff_switch_sq(this_box) - rijsq
+                                                                        roffsq_rijsq = roffsq_rijsq * roffsq_rijsq * switch_factor1(this_box) * &
+                                                                                (switch_factor2(this_box)+2.0_DP*rijsq)
+                                                                        roffsq_rijsq = MERGE(roffsq_rijsq, 1.0_DP, rijsq .GE. ron_switch_sq(this_box))
+                                                                        vdw_energy = vdw_energy + roffsq_rijsq*rterm
+                                                                END DO
+                                                        END IF
+                                                ELSE
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        roffsq_rijsq = roff_switch_sq(this_box) - rijsq
+                                                                        roffsq_rijsq = roffsq_rijsq * roffsq_rijsq * switch_factor1(this_box) * &
+                                                                                (switch_factor2(this_box)+2.0_DP*rijsq)
+                                                                        roffsq_rijsq = MERGE(roffsq_rijsq, 1.0_DP, rijsq .GE. ron_switch_sq(this_box))
+                                                                        vdw_energy = vdw_energy + roffsq_rijsq*rterm
+                                                                        mol_vdw_energy_p(j) = eps*roffsq_rijsq*rterm
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        roffsq_rijsq = roff_switch_sq(this_box) - rijsq
+                                                                        roffsq_rijsq = roffsq_rijsq * roffsq_rijsq * switch_factor1(this_box) * &
+                                                                                (switch_factor2(this_box)+2.0_DP*rijsq)
+                                                                        roffsq_rijsq = MERGE(roffsq_rijsq, 1.0_DP, rijsq .GE. ron_switch_sq(this_box))
+                                                                        vdw_energy = vdw_energy + roffsq_rijsq*rterm
+                                                                END DO
+                                                        END IF
+                                                END IF
+                                                i_vdw_energy = i_vdw_energy + vdw_energy*eps
+                                        ELSE IF (lj_cut_shift_force) THEN
+                                                eps = ppvdwp_table2(1,itype,jtype,ibox) ! 4*epsilon
+                                                negsigsq = ppvdwp_table2(2,itype,jtype,ibox) ! -(sigma**2)
+                                                !shift_p1 = ppvdwp_table2(3,itype,jtype,ibox)
+                                                !vdw_energy = shift_p1
+                                                vdw_energy = ppvdwp_table2(3,itype,jtype,ibox)
+                                                shift_p2 = ppvdwp_table2(4,itype,jtype,ibox)
+                                                vdw_energy = vdw_energy + rcut_vdw(this_box)*shift_p2
+                                                IF (l_pair_store) shift_p1 = vdw_energy
+                                                vdw_energy = -vdw_vlen*vdw_energy
+                                                IF (l_read_svec) THEN
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        nrg = eps*rterm + SQRT(rijsq)*shift_p2
+                                                                        vdw_energy = vdw_energy + nrg
+                                                                        mol_vdw_energy_p(j) = nrg - shift_p1
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + eps*rterm + SQRT(rijsq)*shift_p2
+                                                                END DO
+                                                        END IF
+                                                ELSE
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        nrg = eps*rterm + SQRT(rijsq)*shift_p2
+                                                                        vdw_energy = vdw_energy + nrg
+                                                                        mol_vdw_energy_p(j) = nrg - shift_p1
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + eps*rterm + SQRT(rijsq)*shift_p2
+                                                                END DO
+                                                        END IF
+                                                END IF
+                                                i_vdw_energy = i_vdw_energy + vdw_energy
+                                        ELSE IF (lj_cut) THEN
+                                                eps = ppvdwp_table2(1,itype,jtype,ibox) ! 4*epsilon
+                                                negsigsq = ppvdwp_table2(2,itype,jtype,ibox) ! -(sigma**2)
+                                                vdw_energy = 0.0_DP
+                                                IF (l_read_svec) THEN
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + rterm
+                                                                        mol_vdw_energy_p(j) = eps*rterm 
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + rterm
+                                                                END DO
+                                                        END IF
+                                                ELSE
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + rterm
+                                                                        mol_vdw_energy_p(j) = eps*rterm 
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        negsigbyr2 = negsigsq/rijsq
+                                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                                        rterm = rterm + rterm*rterm
+                                                                        vdw_energy = vdw_energy + rterm
+                                                                END DO
+                                                        END IF
+                                                END IF
+                                                i_vdw_energy = i_vdw_energy + vdw_energy*eps
+                                        ELSE IF (mie_cut .OR. mie_cut_shift) THEN
+                                                epsig_n = ppvdwp_table2(1,itype,jtype,ibox) ! epsilon * mie_coeff * sigma ** n
+                                                epsig_m = ppvdwp_table2(2,itype,jtype,ibox) ! epsilon * mie_coeff * sigma ** m
+                                                mie_n = ppvdwp_table2(3,itype,jtype,ibox) ! already halved
+                                                mie_m = ppvdwp_table2(4,itype,jtype,ibox) ! already halved
+                                                IF (mie_cut_shift) THEN
+                                                        shift_p1 = ppvdwp_table2(5,itype,jtype,ibox)
+                                                        IF (l_pair_store) THEN
+                                                                vdw_energy = 0.0_DP
+                                                        ELSE
+                                                                vdw_energy = -shift_p1*vdw_vlen
+                                                        END IF
+                                                ELSE
+                                                        vdw_energy = 0.0_DP
+                                                        shift_p1 = 0.0_DP
+                                                END IF
+                                                IF (l_read_svec) THEN
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        nrg = shift_p1
+                                                                        nrg = epsig_n * rijsq**mie_n - nrg
+                                                                        nrg = nrg - epsig_m * rijsq**mie_m
+                                                                        mol_vdw_energy_p(j) = nrg
+                                                                        vdw_energy = vdw_energy + nrg
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        vdw_energy = vdw_energy + epsig_n * rijsq**mie_n - &
+                                                                                epsig_m * rijsq**mie_m
+                                                                END DO
+                                                        END IF
+                                                ELSE
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        nrg = shift_p1
+                                                                        nrg = epsig_n * rijsq**mie_n - nrg
+                                                                        nrg = nrg - epsig_m * rijsq**mie_m
+                                                                        mol_vdw_energy_p(j) = nrg
+                                                                        vdw_energy = vdw_energy + nrg
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        vdw_energy = vdw_energy + epsig_n * rijsq**mie_n - &
+                                                                                epsig_m * rijsq**mie_m
+                                                                END DO
+                                                        END IF
+                                                END IF
+                                                i_vdw_energy = i_vdw_energy + vdw_energy
+                                        END IF
+                                        IF (l_pair_store) THEN
+                                                IF (l_getdrcom .AND. .NOT. l_shortvdw) THEN
+                                                        mol_vdw_energy(1:n_all_interact_p) = &
+                                                                mol_vdw_energy(1:n_all_interact_p) + &
+                                                                mol_vdw_energy_p(1:n_all_interact_p)
+                                                        mol_vdw_energy(n_all_interact_p+1:n_interact) = &
+                                                                mol_vdw_energy(n_all_interact_p+1:n_interact) + &
+                                                                UNPACK(mol_vdw_energy_p(n_all_interact_p+1:vdw_vlen), &
+                                                                vdw_interact_vec(n_all_interact_p+1:n_interact), &
+                                                                SPREAD(0.0_DP,1,n_may_interact))
+                                                ELSE
+                                                        mol_vdw_energy(1:n_interact) = &
+                                                                mol_vdw_energy(1:n_interact) + &
+                                                                UNPACK(mol_vdw_energy_p(1:vdw_vlen), &
+                                                                vdw_interact_vec(1:n_interact), &
+                                                                SPREAD(0.0_DP,1,n_interact))
+                                                END IF
+                                        END IF
+                                END IF
+                                IF (ij_get_qq) THEN
+                                        l_read_svec = ij_get_vdw .AND. l_shortcoul
+                                        IF (l_charge_ewald) THEN
+                                                qq_energy = 0.0_DP
+                                                IF (l_read_svec) THEN
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, qq_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        rij = SQRT(rijsq)
+                                                                        nrg = ERFC(alpha_ewald(this_box)*rij) / rij
+                                                                        mol_qq_energy_p(j) = cfqq*nrg
+                                                                        qq_energy = qq_energy + nrg
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, qq_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        rij = SQRT(rijsq)
+                                                                        qq_energy = qq_energy + ERFC(alpha_ewald(this_box)*rij) / rij
+                                                                END DO
+                                                        END IF
+                                                ELSE
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, qq_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        rij = SQRT(rijsq)
+                                                                        nrg = ERFC(alpha_ewald(this_box)*rij) / rij
+                                                                        mol_qq_energy_p(j) = cfqq*nrg
+                                                                        qq_energy = qq_energy + nrg
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, qq_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        rij = SQRT(rijsq)
+                                                                        qq_energy = qq_energy + ERFC(alpha_ewald(this_box)*rij) / rij
+                                                                END DO
+                                                        END IF
+                                                END IF
+                                                i_qq_energy = i_qq_energy + cfqq*qq_energy
+                                        ELSE IF (l_charge_dsf) THEN
+                                                IF (l_pair_store) THEN
+                                                        qq_energy = 0.0_DP
+                                                ELSE
+                                                        qq_energy = dsf_const*qq_vlen
+                                                END IF
+                                                IF (l_read_svec) THEN
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, qq_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        rij = SQRT(rijsq)
+                                                                        nrg = dsf_const
+                                                                        nrg = nrg + dsf_factor2(this_box)*rij + &
+                                                                                ERFC(alpha_dsf(this_box)*rij) / rij
+                                                                        qq_energy = qq_energy + nrg
+                                                                        nrg = nrg*cfqq
+                                                                        mol_qq_energy_p(j) = nrg
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, qq_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        rij = SQRT(rijsq)
+                                                                        qq_energy = qq_energy + dsf_factor2(this_box) * rij + &
+                                                                                ERFC(alpha_dsf(this_box)*rij) / rij
+                                                                END DO
+                                                        END IF
+                                                ELSE
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, qq_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        rij = SQRT(rijsq)
+                                                                        nrg = dsf_const
+                                                                        nrg = nrg + dsf_factor2(this_box)*rij + &
+                                                                                ERFC(alpha_dsf(this_box)*rij) / rij
+                                                                        qq_energy = qq_energy + nrg
+                                                                        nrg = nrg*cfqq
+                                                                        mol_qq_energy_p(j) = nrg
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, qq_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        rij = SQRT(rijsq)
+                                                                        qq_energy = qq_energy + dsf_factor2(this_box) * rij + &
+                                                                                ERFC(alpha_dsf(this_box)*rij) / rij
+                                                                END DO
+                                                        END IF
+                                                END IF
+                                                i_qq_energy = i_qq_energy + cfqq*qq_energy
+                                        ELSE IF (l_charge_cut) THEN
+                                                qq_energy = 0.0_DP
+                                                IF (l_read_svec) THEN
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        nrg = cfqq / SQRT(rijsq)
+                                                                        mol_qq_energy_p(j) = nrg
+                                                                        qq_energy = qq_energy + nrg
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_svec(j)
+                                                                        qq_energy = qq_energy + 1.0_DP / SQRT(rijsq)
+                                                                END DO
+                                                        END IF
+                                                ELSE
+                                                        IF (l_pair_store) THEN
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        nrg = cfqq / SQRT(rijsq)
+                                                                        mol_qq_energy_p(j) = nrg
+                                                                        qq_energy = qq_energy + nrg
+                                                                END DO
+                                                        ELSE
+                                                                DO j = 1, vdw_vlen
+                                                                        rijsq = rijsq_vec(j)
+                                                                        qq_energy = qq_energy + 1.0_DP / SQRT(rijsq)
+                                                                END DO
+                                                        END IF
+                                                END IF
+                                                IF (l_pair_store) THEN
+                                                        i_qq_energy = i_qq_energy + qq_energy
+                                                ELSE
+                                                        i_qq_energy = i_qq_energy + cfqq*qq_energy
+                                                END IF
+                                        END IF
+                                        IF (l_pair_store) THEN
+                                                IF (l_getdrcom .AND. .NOT. l_shortcoul) THEN
+                                                        mol_qq_energy(1:n_all_interact_p) = &
+                                                                mol_qq_energy(1:n_all_interact_p) + &
+                                                                mol_qq_energy_p(1:n_all_interact_p)
+                                                        mol_qq_energy(n_all_interact_p+1:n_interact) = &
+                                                                mol_qq_energy(n_all_interact_p+1:n_interact) + &
+                                                                UNPACK(mol_qq_energy_p(n_all_interact_p+1:qq_vlen), &
+                                                                qq_interact_vec(n_all_interact_p+1:n_interact), &
+                                                                SPREAD(0.0_DP,1,n_may_interact))
+                                                ELSE
+                                                        mol_qq_energy(1:n_interact) = &
+                                                                mol_qq_energy(1:n_interact) + &
+                                                                UNPACK(mol_qq_energy_p(1:qq_vlen), &
+                                                                qq_interact_vec(1:n_interact), &
+                                                                SPREAD(0.0_DP,1,n_interact))
+                                                END IF
+                                        END IF
+                                END IF
+                        END DO
+                    END DO
+                    !$OMP END DO
+                    IF (l_pair_store) THEN
+                            IF (l_getdrcom) THEN
+                                    !$OMP WORKSHARE
+                                    pair_nrg_vdw(which_interact(1:n_interact_vdw_pairstore)+jsl_base,isl) = mol_vdw_energy(1:n_interact_vdw_pairstore)
+                                    pair_nrg_vdw(isl,which_interact(1:n_interact_vdw_pairstore)+jsl_base) = mol_vdw_energy(1:n_interact_vdw_pairstore)
+                                    pair_nrg_qq(which_interact(1:n_interact_qq_pairstore)+jsl_base,isl) = mol_qq_energy(1:n_interact_qq_pairstore)
+                                    pair_nrg_qq(isl,which_interact(1:n_interact_qq_pairstore)+jsl_base) = mol_qq_energy(1:n_interact_qq_pairstore)
+                                    !$OMP END WORKSHARE
+                            ELSE IF (open_mc_flag) THEN
+                                    !$OMP WORKSHARE
+                                    pair_nrg_vdw(live_locates(1:n_interact_vdw_pairstore)+jsl_base,isl) = mol_vdw_energy(1:n_interact_vdw_pairstore)
+                                    pair_nrg_vdw(isl,live_locates(1:n_interact_vdw_pairstore)+jsl_base) = mol_vdw_energy(1:n_interact_vdw_pairstore)
+                                    pair_nrg_qq(live_locates(1:n_interact_qq_pairstore)+jsl_base,isl) = mol_qq_energy(1:n_interact_qq_pairstore)
+                                    pair_nrg_qq(isl,live_locates(1:n_interact_qq_pairstore)+jsl_base) = mol_qq_energy(1:n_interact_qq_pairstore)
+                                    !$OMP END WORKSHARE
+                            ELSE
+                                    !$OMP WORKSHARE
+                                    pair_nrg_vdw(jsl_base+1:jsl_base+n_interact_vdw_pairstore,isl) = mol_vdw_energy(1:n_interact_vdw_pairstore)
+                                    pair_nrg_qq(jsl_base+1:jsl_base+n_interact_qq_pairstore,isl) = mol_qq_energy(1:n_interact_qq_pairstore)
+                                    pair_nrg_vdw(isl,jsl_base+1:jsl_base+n_interact_vdw_pairstore) = mol_vdw_energy(1:n_interact_vdw_pairstore)
+                                    pair_nrg_qq(isl,jsl_base+1:jsl_base+n_interact_qq_pairstore) = mol_qq_energy(1:n_interact_qq_pairstore)
+                                    !$OMP END WORKSHARE
+                            END IF
+                    END IF
+            ELSE
+                    vdw_p1(1:natompairs) = vdw_p(1:natompairs,1)
+                    vdw_p2(1:natompairs) = vdw_p(1:natompairs,2)
+                    IF (n_vdw_p > 2) vdw_p3(1:natompairs) = vdw_p(1:natompairs,3)
+                    IF (n_vdw_p > 3) vdw_p4(1:natompairs) = vdw_p(1:natompairs,4)
+                    IF (n_vdw_p > 4) vdw_p5(1:natompairs) = vdw_p(1:natompairs,5)
+                    IF (l_debug_print) WRITE(*,*) "n_interact = ", n_interact
+                    !$OMP DO SCHEDULE(DYNAMIC)
+                    DO j_interact = 1, n_interact !n_all_interact
+                        !DIR$ ASSUME (MOD(atompairdim,dimpad_4byte) .EQ. 0)
+                        !DIR$ ASSUME (MOD(mol_dim,dimpad_4byte) .EQ. 0)
+                        !DIR$ ASSUME (MOD(SIZE(vdw_p,1),dimpad_4byte) .EQ. 0)
+                        !DIR$ ASSUME (MOD(SIZE(jrp_perm,1),dimpad_4byte) .EQ. 0)
+                        IF (overlap .OR. i_overlap) CYCLE
+                        ij_overlap = .FALSE.
+                        vdw_energy = 0.0_DP
+                        qq_energy = 0.0_DP
+                        jm = which_interact(j_interact) !which_all_interact(j_interact)
+                        IF (l_ortho) THEN
+                                IF (l_order_ij) THEN
+                                        DO j = 1, jnatoms
+                                                jsxp = atom_list(j,jm,js)%rp(1)
+                                                jsyp = atom_list(j,jm,js)%rp(2)
+                                                jszp = atom_list(j,jm,js)%rp(3)
+                                                k = (j-1) * n_i_exist
+                                                !$OMP SIMD
+                                                DO i = 1, n_i_exist
+                                                        jrxp(i+k) = jsxp
+                                                        jryp(i+k) = jsyp
+                                                        jrzp(i+k) = jszp
+                                                END DO
+                                                !$OMP END SIMD
+                                        END DO
+                                ELSE
+                                        !$OMP SIMD PRIVATE(jsxp,jsyp,jszp)
+                                        DO j = 1, jnatoms
+                                                jsxp = atom_list(j,jm,js)%rp(1)
+                                                jsyp = atom_list(j,jm,js)%rp(2)
+                                                jszp = atom_list(j,jm,js)%rp(3)
+                                                jrxp(j) = jsxp
+                                                jryp(j) = jsyp
+                                                jrzp(j) = jszp
+                                                DO i = 1, n_i_exist - 1
+                                                        k = i*jnatoms
+                                                        jrxp(j+k) = jsxp
+                                                        jryp(j+k) = jsyp
+                                                        jrzp(j+k) = jszp
+                                                END DO
+                                        END DO
+                                        !$OMP END SIMD
+                                END IF
+                        ELSE
+                                !$OMP SIMD PRIVATE(jrp,jsxp,jsyp,jszp)
+                                DO j = 1, jnatoms
+                                        jrp = atom_list(j,jm,js)%rp(1)
+                                        jsxp = inv_h11*jrp
+                                        jrp = atom_list(j,jm,js)%rp(2)
+                                        jsxp = jsxp + inv_h12*jrp
+                                        jsyp = inv_h22*jrp
+                                        jrp = atom_list(j,jm,js)%rp(3)
+                                        jsxp = jsxp + inv_h13*jrp
+                                        jsyp = jsyp + inv_h23*jrp
+                                        jszp = inv_h33*jrp
+                                        jrxp(j) = jsxp
+                                        jryp(j) = jsyp
+                                        jrzp(j) = jszp
+                                        DO i = 1, n_i_exist-1
+                                                k = i*jnatoms
+                                                jrxp(i+k) = jsxp
+                                                jryp(i+k) = jsyp
+                                                jrzp(i+k) = jszp
+                                        END DO
+                                END DO
+                                !$OMP END SIMD
+                        END IF
+                        !jrxp = RESHAPE(SPREAD(jrxp_vec,2,n_i_exist), (/ natompairs /))
+                        !jryp = RESHAPE(SPREAD(jryp_vec,2,n_i_exist), (/ natompairs /))
+                        !jrzp = RESHAPE(SPREAD(jrzp_vec,2,n_i_exist), (/ natompairs /))
+                        IF (l_ortho) THEN
+                                DO ji = 1, natompairs
+                                        dxp = ABS(jrxp(ji) - irxp(ji))
+                                        dyp = ABS(jryp(ji) - iryp(ji))
+                                        dzp = ABS(jrzp(ji) - irzp(ji))
+                                        IF (dxp > hxl) dxp = dxp - xl
+                                        IF (dyp > hyl) dyp = dyp - yl
+                                        IF (dzp > hzl) dzp = dzp - zl
+                                        dxp = dxp*dxp
+                                        dxp = dxp + dyp*dyp
+                                        dxp = dxp + dzp*dzp
+                                        rijsq_vec(ji) = dxp
+                                        vdw_interact_vec(ji) = dxp < this_vdw_rcutsq
+                                        IF (need_sqrt) rij_vec(ji) = SQRT(dxp)
+                                        i_overlap = i_overlap .OR. dxp < rcut_lowsq
+                                END DO
+                        ELSE
+                                DO ji = 1, natompairs
+                                        dsp = jrxp(ji) - irxp(ji)
+                                        IF (dsp > 0.5_DP) THEN
+                                                dsp = dsp - 1.0_DP
+                                        ELSE IF (dsp < -0.5_DP) THEN
+                                                dsp = dsp + 1.0_DP
+                                        END IF
+                                        dxp = h11*dsp
+                                        dsp = jryp(ji) - iryp(ji)
+                                        IF (dsp > 0.5_DP) THEN
+                                                dsp = dsp - 1.0_DP
+                                        ELSE IF (dsp < -0.5_DP) THEN
+                                                dsp = dsp + 1.0_DP
+                                        END IF
+                                        dxp = dxp + h12*dsp
+                                        dyp = h22*dsp
+                                        dsp = jrzp(ji) - irzp(ji)
+                                        IF (dsp > 0.5_DP) THEN
+                                                dsp = dsp - 1.0_DP
+                                        ELSE IF (dsp < -0.5_DP) THEN
+                                                dsp = dsp + 1.0_DP
+                                        END IF
+                                        dxp = dxp + h13*dsp
+                                        dyp = dyp + h23*dsp
+                                        dzp = h33*dsp
+                                        dxp = dxp*dxp
+                                        dxp = dxp + dyp*dyp
+                                        dxp = dxp + dzp*dzp
+                                        rijsq_vec(ji) = dxp
+                                        !vdw_interact_vec(ji) = dxp < this_vdw_rcutsq
+                                        i_overlap = i_overlap .OR. dxp < rcut_lowsq
+                                END DO
+                        END IF
+                        IF (l_debug_print) THEN
+                                WRITE(*,*) "jm = ", jm
+                                WRITE(*,*) "i_overlap = ", i_overlap
+                                WRITE(*,*) "rijsq_vec = "
+                                WRITE(*,*) rijsq_vec(1:natompairs)
+                        END IF
+                        IF (i_overlap) THEN
+                                overlap = .TRUE.
+                                CYCLE
+                        END IF
+                        l_masked = j_interact > n_all_interact .OR. l_shortvdw
+                        atompair_vlen = natompairs ! temporary, replace when appropriate
+                        IF (lj_charmm) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_vdw_rcutsq) THEN
+                                                        eps = vdw_p1(ji) ! epsilon
+                                                        sigsq = vdw_p2(ji) ! sigma**2
+                                                        sigbyr2 = sigsq/rijsq ! sigma was already squared
+                                                        sigbyr6 = sigbyr2*sigbyr2*sigbyr2
+                                                        sigbyr12 = sigbyr6*sigbyr6
+                                                        sigbyr12 = sigbyr12 - 2.0_DP*sigbyr6
+                                                        vdw_energy = vdw_energy + eps*sigbyr12
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                eps = vdw_p1(ji) ! epsilon
+                                                sigsq = vdw_p2(ji) ! sigma**2
+                                                sigbyr2 = sigsq/rijsq ! sigma was already squared
+                                                sigbyr6 = sigbyr2*sigbyr2*sigbyr2
+                                                sigbyr12 = sigbyr6*sigbyr6
+                                                sigbyr12 = sigbyr12 - 2.0_DP*sigbyr6
+                                                vdw_energy = vdw_energy + eps*sigbyr12
+                                        END DO
+                                END IF
+                        ELSE IF (lj_cut_shift) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_vdw_rcutsq) THEN
+                                                        eps = vdw_p1(ji) ! 4*epsilon
+                                                        negsigsq = vdw_p2(ji) ! -(sigma**2)
+                                                        shift_p1 = vdw_p3(ji)
+                                                        negsigbyr2 = negsigsq/rijsq
+                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                        rterm = rterm + rterm*rterm
+                                                        vdw_energy = vdw_energy + eps * rterm - shift_p1
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                eps = vdw_p1(ji) ! 4*epsilon
+                                                negsigsq = vdw_p2(ji) ! -(sigma**2)
+                                                shift_p1 = vdw_p3(ji)
+                                                negsigbyr2 = negsigsq/rijsq
+                                                rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                rterm = rterm + rterm*rterm
+                                                vdw_energy = vdw_energy + eps * rterm - shift_p1
+                                        END DO
+                                END IF
+                        ELSE IF (lj_cut_switch) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_vdw_rcutsq) THEN
+                                                        eps = vdw_p1(ji) ! 4*epsilon
+                                                        negsigsq = vdw_p2(ji) ! -(sigma**2)
+                                                        negsigbyr2 = negsigsq/rijsq
+                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                        rterm = rterm + rterm*rterm
+                                                        roffsq_rijsq = roff_switch_sq(this_box) - rijsq
+                                                        roffsq_rijsq = roffsq_rijsq * roffsq_rijsq * switch_factor1(this_box) * &
+                                                                (switch_factor2(this_box)+2.0_DP*rijsq)*eps
+                                                        eps = MERGE(roffsq_rijsq, eps, rijsq .GE. ron_switch_sq(this_box))
+                                                        vdw_energy = vdw_energy + eps*rterm
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                eps = vdw_p1(ji) ! 4*epsilon
+                                                negsigsq = vdw_p2(ji) ! -(sigma**2)
+                                                negsigbyr2 = negsigsq/rijsq
+                                                rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                rterm = rterm + rterm*rterm
+                                                roffsq_rijsq = roff_switch_sq(this_box) - rijsq
+                                                roffsq_rijsq = roffsq_rijsq * roffsq_rijsq * switch_factor1(this_box) * &
+                                                        (switch_factor2(this_box)+2.0_DP*rijsq)*eps
+                                                eps = MERGE(roffsq_rijsq, eps, rijsq .GE. ron_switch_sq(this_box))
+                                                vdw_energy = vdw_energy + eps*rterm
+                                        END DO
+                                END IF
+                        ELSE IF (lj_cut_shift_force) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_vdw_rcutsq) THEN
+                                                        eps = vdw_p1(ji) ! 4*epsilon
+                                                        negsigsq = vdw_p2(ji) ! -(sigma**2)
+                                                        shift_p1 = vdw_p3(ji)
+                                                        shift_p2 = vdw_p4(ji)
+                                                        negsigbyr2 = negsigsq/rijsq
+                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                        rterm = rterm + rterm*rterm
+                                                        vdw_energy = vdw_energy + eps * rterm - shift_p1 - &
+                                                                (rcut_vdw(this_box)-SQRT(rijsq))*shift_p2
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                eps = vdw_p1(ji) ! 4*epsilon
+                                                negsigsq = vdw_p2(ji) ! -(sigma**2)
+                                                shift_p1 = vdw_p3(ji)
+                                                shift_p2 = vdw_p4(ji)
+                                                negsigbyr2 = negsigsq/rijsq
+                                                rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                rterm = rterm + rterm*rterm
+                                                vdw_energy = vdw_energy + eps * rterm - shift_p1 - &
+                                                        (rcut_vdw(this_box)-SQRT(rijsq))*shift_p2
+                                        END DO
+                                END IF
+                        ELSE IF (lj_cut) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_vdw_rcutsq) THEN
+                                                        eps = vdw_p1(ji) ! 4*epsilon
+                                                        negsigsq = vdw_p2(ji) ! -(sigma**2)
+                                                        l_interact = rijsq < this_vdw_rcutsq
+                                                        negsigbyr2 = negsigsq/rijsq
+                                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                        rterm = rterm + rterm*rterm
+                                                        vdw_energy = vdw_energy + eps * rterm
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                eps = vdw_p1(ji) ! 4*epsilon
+                                                negsigsq = vdw_p2(ji) ! -(sigma**2)
+                                                l_interact = rijsq < this_vdw_rcutsq
+                                                negsigbyr2 = negsigsq/rijsq
+                                                rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                                rterm = rterm + rterm*rterm
+                                                vdw_energy = vdw_energy + eps * rterm
+                                        END DO
+                                END IF
+                        ELSE IF (mie_cut_shift) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_vdw_rcutsq) THEN
+                                                        epsig_n = vdw_p1(ji) ! epsilon * mie_coeff * sigma ** n
+                                                        epsig_m = vdw_p2(ji) ! epsilon * mie_coeff * sigma ** m
+                                                        mie_n = vdw_p3(ji) ! already halved
+                                                        mie_m = vdw_p4(ji) ! already halved
+                                                        shift_p1 = vdw_p5(ji)
+                                                        vdw_energy = vdw_energy + epsig_n * rijsq**mie_n
+                                                        vdw_energy = vdw_energy - epsig_m * rijsq**mie_m
+                                                        vdw_energy = vdw_energy - shift_p1
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                epsig_n = vdw_p1(ji) ! epsilon * mie_coeff * sigma ** n
+                                                epsig_m = vdw_p2(ji) ! epsilon * mie_coeff * sigma ** m
+                                                mie_n = vdw_p3(ji) ! already halved
+                                                mie_m = vdw_p4(ji) ! already halved
+                                                shift_p1 = vdw_p5(ji)
+                                                vdw_energy = vdw_energy + epsig_n * rijsq**mie_n
+                                                vdw_energy = vdw_energy - epsig_m * rijsq**mie_m
+                                                vdw_energy = vdw_energy - shift_p1
+                                        END DO
+                                END IF
+                        ELSE IF (mie_cut) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_vdw_rcutsq) THEN
+                                                        epsig_n = vdw_p1(ji) ! epsilon * mie_coeff * sigma ** n
+                                                        epsig_m = vdw_p2(ji) ! epsilon * mie_coeff * sigma ** m
+                                                        mie_n = vdw_p3(ji) ! already halved
+                                                        mie_m = vdw_p4(ji) ! already halved
+                                                        vdw_energy = vdw_energy + epsig_n * rijsq**mie_n
+                                                        vdw_energy = vdw_energy - epsig_m * rijsq**mie_m
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                epsig_n = vdw_p1(ji) ! epsilon * mie_coeff * sigma ** n
+                                                epsig_m = vdw_p2(ji) ! epsilon * mie_coeff * sigma ** m
+                                                mie_n = vdw_p3(ji) ! already halved
+                                                mie_m = vdw_p4(ji) ! already halved
+                                                vdw_energy = vdw_energy + epsig_n * rijsq**mie_n
+                                                vdw_energy = vdw_energy - epsig_m * rijsq**mie_m
+                                        END DO
+                                END IF
+                        END IF
+                        l_masked = j_interact > n_all_interact .OR. l_shortcoul
+                        IF (l_charge_ewald) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_coul_rcutsq) THEN
+                                                        cfqq = cfqq_vec(ji)
+                                                        rij = SQRT(rijsq)
+                                                        qq_energy = qq_energy + ERFC(alpha_ewald(this_box)*rij) / rij * cfqq
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                cfqq = cfqq_vec(ji)
+                                                rij = SQRT(rijsq)
+                                                qq_energy = qq_energy + ERFC(alpha_ewald(this_box)*rij) / rij * cfqq
+                                        END DO
+                                END IF
+                        ELSE IF (l_charge_dsf) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_coul_rcutsq) THEN
+                                                        cfqq = cfqq_vec(ji)
+                                                        rij = SQRT(rijsq)
+                                                        nrg = ERFC(alpha_dsf(this_box)*rij)/rij + dsf_const
+                                                        nrg = nrg + rij*dsf_factor2(this_box)
+                                                        qq_energy = qq_energy + nrg*cfqq
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                cfqq = cfqq_vec(ji)
+                                                rij = SQRT(rijsq)
+                                                nrg = ERFC(alpha_dsf(this_box)*rij)/rij + dsf_const
+                                                nrg = nrg + rij*dsf_factor2(this_box)
+                                                qq_energy = qq_energy + nrg*cfqq
+                                        END DO
+                                END IF
+                        ELSE IF (l_charge_cut) THEN
+                                IF (l_masked) THEN
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                IF (rijsq < this_coul_rcutsq) THEN
+                                                        cfqq = cfqq_vec(ji)
+                                                        qq_energy = qq_energy + cfqq / SQRT(rijsq)
+                                                END IF
+                                        END DO
+                                ELSE
+                                        DO ji = 1, atompair_vlen
+                                                rijsq = rijsq_vec(ji)
+                                                cfqq = cfqq_vec(ji)
+                                                qq_energy = qq_energy + cfqq / SQRT(rijsq)
+                                        END DO
+                                END IF
+                        END IF
+                        IF (l_pair_store) THEN
+                                jsl = jsl_base + jm
+                                pair_nrg_vdw(isl,jsl) = vdw_energy
+                                pair_nrg_vdw(jsl,isl) = vdw_energy
+                                pair_nrg_qq(isl,jsl) = qq_energy
+                                pair_nrg_qq(jsl,isl) = qq_energy
+                        END IF
+                        i_vdw_energy = i_vdw_energy + vdw_energy
+                        i_qq_energy = i_qq_energy + qq_energy
+                    END DO
+                    !$OMP END DO
+            END IF
+
+            IF (overlap) EXIT
+
+
+    END DO
+    !$OMP END PARALLEL
+    overlap = i_overlap
+  END SUBROUTINE Compute_Molecule_Nonbond_Inter_Energy_Vectorized
 
 
   SUBROUTINE Compute_MoleculeCollection_Nonbond_Inter_Energy(n_list,lm_list,is_list, &
@@ -1548,12 +3950,12 @@ SUBROUTINE Estimate_MoleculePair_Energy(im,is,jm,js,this_box,mpnrg,overlap)
           IF (im == widom_locate .AND. is == widom_species) THEN
                   these_atoms_i => widom_atoms
           ELSE
-                  these_atoms_i => atom_list(:,im,is)
+                  these_atoms_i => atom_list(1:natoms(is),im,is)
           END IF
           IF (jm == widom_locate .AND. js == widom_species) THEN
                   these_atoms_j => widom_atoms
           ELSE
-                  these_atoms_j => atom_list(:,jm,js)
+                  these_atoms_j => atom_list(1:natoms(js),jm,js)
           END IF
 
           bsolvent = species_list(js)%solvent_base
@@ -1565,15 +3967,15 @@ SUBROUTINE Estimate_MoleculePair_Energy(im,is,jm,js,this_box,mpnrg,overlap)
           rcutsq_shift = rcut_cbmcsq(this_box) - rsq_shifter
           DO ia = 1, natoms(is)
                 IF (.NOT. these_atoms_i(ia)%exist) CYCLE
-                ixp = these_atoms_i(ia)%rxp
-                iyp = these_atoms_i(ia)%ryp
-                izp = these_atoms_i(ia)%rzp
+                ixp = these_atoms_i(ia)%rp(1)
+                iyp = these_atoms_i(ia)%rp(2)
+                izp = these_atoms_i(ia)%rp(3)
                 DO ja = 1, natoms(js)
                         IF (.NOT. these_atoms_j(ja)%exist) CYCLE
                         CALL Minimum_Image_Separation(this_box, &
-                                ixp - these_atoms_j(ja)%rxp, &
-                                iyp - these_atoms_j(ja)%ryp, &
-                                izp - these_atoms_j(ja)%rzp, &
+                                ixp - these_atoms_j(ja)%rp(1), &
+                                iyp - these_atoms_j(ja)%rp(2), &
+                                izp - these_atoms_j(ja)%rp(3), &
                                 dx, dy, dz)
                         rijsq_shift = dx*dx + dy*dy + dz*dz - rsq_shifter
                         IF (rijsq_shift >= rcutsq_shift) CYCLE
@@ -1586,6 +3988,262 @@ SUBROUTINE Estimate_MoleculePair_Energy(im,is,jm,js,this_box,mpnrg,overlap)
           !
   END SUBROUTINE Estimate_MoleculePair_Energy
 
+  REAL(SP) ELEMENTAL FUNCTION Compute_Cell_List_CBMC_nrg(irxp,iryp,irzp,xi,yi,zi,ia,is,this_box)
+          REAL(SP), INTENT(IN) :: irxp,iryp,irzp
+          INTEGER, INTENT(IN) :: xi,yi,zi, ia, is, this_box
+          INTEGER :: vlen, i
+          REAL(SP) :: nrg
+
+          INTEGER :: dmult, isolute, rsqsol
+          REAL(SP) :: this_shifter, rsq_shift, drp
+          REAL(SP) :: dxp, dyp, dzp
+
+          REAL(SP) :: rsq, rcutsq
+          REAL(SP), DIMENSION(cbmc_max_interact) :: rsq_vec
+          !REAL(SP), DIMENSION(cbmc_max_interact,2) :: ppljp
+          INTEGER :: itype, jtype, this_int_vdw_style, this_int_vdw_sum_style
+          REAL(SP) :: eps, sigsq, sigbyr2, sigbyr6, sigbyr12, nrg_vdw, rterm, rterm2, negsigsq, negsigbyr2, roffsq_rijsq
+          REAL(SP) :: mie_m, mie_n, lnrsq, epsig_n, epsig_m
+          REAL(SP) :: icharge, jcharge, invr, rij, nrg_qq, dsf_const, alpha_ewald_sp, alpha_dsf_sp, dsf_factor2_sp
+          REAL(SP) :: const1, const2, const3, const4, sf_const1, sf_const2
+          INTEGER :: this_int_charge_sum_style
+          LOGICAL :: i_get_coul
+
+          vlen = cbmc_cell_n_interact(xi,yi,zi,this_box)
+          !DIR$ ASSUME (MOD(vlen,dimpad_4byte) .EQ. 0)
+          nrg = 0.0
+
+          IF (precalc_atompair_nrg) THEN
+                  dmult = SIZE(atompair_nrg_table,1)
+                  this_shifter = -sp_rcut_lowsq !-(rsq_shifter + rsq_step)
+                  isolute = species_list(is)%solute_base + ia
+                  !DIR$ VECTOR ALIGNED
+                  DO i = 1, vlen
+                        rsq_shift = this_shifter
+                        drp = cbmc_cell_rsp(i,1,xi,yi,zi,this_box) - irxp
+                        rsq_shift = rsq_shift + drp*drp
+                        drp = cbmc_cell_rsp(i,2,xi,yi,zi,this_box) - iryp
+                        rsq_shift = rsq_shift + drp*drp
+                        drp = cbmc_cell_rsp(i,3,xi,yi,zi,this_box) - irzp
+                        rsq_shift = rsq_shift + drp*drp
+                        rsqsol = INT(MIN(rsq_shift*inv_rsq_step_sp,atompair_nrg_res_sp)) + &
+                                (cbmc_cell_ti(i,xi,yi,zi,this_box)-1)*dmult
+                        nrg = nrg + atompair_nrg_table_reduced(rsqsol,isolute,this_box)
+                  END DO
+                  Compute_Cell_List_CBMC_nrg = nrg
+                  RETURN
+          END IF
+          rcutsq = REAL(rcut_cbmcsq(this_box),SP)
+          !DIR$ ASSUME_ALIGNED rsq_vec:array_align_bytes
+          !DIR$ VECTOR ALIGNED
+          DO i = 1, vlen
+                drp = cbmc_cell_rsp(i,1,xi,yi,zi,this_box) - irxp
+                rsq = drp*drp 
+                drp = cbmc_cell_rsp(i,2,xi,yi,zi,this_box) - iryp
+                rsq = rsq + drp*drp
+                drp = cbmc_cell_rsp(i,3,xi,yi,zi,this_box) - irzp
+                rsq = rsq + drp*drp
+                rsq_vec(i) = rsq
+          END DO
+          itype = nonbond_list(ia,is)%atom_type_number
+          this_int_vdw_style = MIN(itype,1) * int_vdw_style(this_box)
+          this_int_vdw_sum_style = MIN(itype,1) * int_vdw_sum_style(this_box)
+          icharge = nonbond_list(ia,is)%charge * charge_factor
+          i_get_coul = icharge .NE. 0.0_SP .AND. int_charge_style(this_box) .NE. charge_none .AND. species_list(is)%l_coul_cbmc
+          this_int_charge_sum_style = MERGE(MERGE(charge_sf,int_charge_sum_style(this_box),cbmc_charge_sf_flag),0,i_get_coul)
+          nrg_vdw = 0.0_SP
+          nrg_qq = 0.0_SP
+          SELECT CASE(this_int_vdw_style)
+          CASE(vdw_lj)
+                  SELECT CASE (this_int_vdw_sum_style)
+                  CASE(vdw_charmm)
+                          !DIR$ VECTOR ALIGNED
+                          !$OMP SIMD PRIVATE(rsq,jtype,eps,sigsq,sigbyr2,sigbyr6) &
+                          !$OMP REDUCTION(+:nrg_vdw)
+                          DO i = 1, vlen
+                                rsq = rsq_vec(i)
+                                jtype = cbmc_cell_atomtypes(i,xi,yi,zi,this_box)
+                                eps = ppvdwp_table_sp(jtype,itype,1,this_box) ! epsilon
+                                sigsq = ppvdwp_table_sp(jtype,itype,2,this_box) ! sigma**2
+                                sigbyr2 = sigsq/rsq ! sigma was already squared
+                                IF (rsq >= rcutsq) sigbyr2 = 0.0
+                                sigbyr6 = sigbyr2*sigbyr2*sigbyr2
+                                nrg_vdw = nrg_vdw + eps * (sigbyr6*sigbyr6 - sigbyr6 - sigbyr6) ! eps was not multiplied by 4
+                          END DO
+                          !!$OMP END SIMD
+                  CASE(vdw_cut_shift)
+                          DO i = 1, vlen
+                                rsq = rsq_vec(i)
+                                jtype = cbmc_cell_atomtypes(i,xi,yi,zi,this_box)
+                                IF (rsq < rcutsq) THEN
+                                  eps = ppvdwp_table_sp(jtype,itype,1,this_box) ! 4*epsilon
+                                  negsigsq = ppvdwp_table_sp(jtype,itype,2,this_box) ! -(sigma**2)
+                                  negsigbyr2 = negsigsq/rsq
+                                  rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                  rterm = rterm + rterm*rterm
+                                  nrg_vdw = nrg_vdw + eps*rterm
+                                  negsigbyr2 = negsigsq*inv_rcut_vdwsq_sp(this_box)
+                                  rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                  rterm = rterm + rterm*rterm
+                                  nrg_vdw = nrg_vdw - eps*rterm
+                                END IF
+                          END DO
+                  CASE(vdw_cut_switch)
+                          const1 = switch_factor1(this_box)*switch_factor2(this_box)
+                          const2 = switch_factor1(this_box)*2
+                          const3 = ron_switch_sq(this_box)
+                          const4 = roff_switch_sq(this_box)
+                          DO i = 1, vlen
+                                rsq = rsq_vec(i)
+                                jtype = cbmc_cell_atomtypes(i,xi,yi,zi,this_box)
+                                IF (rsq < rcutsq) THEN
+                                  eps = ppvdwp_table_sp(jtype,itype,1,this_box) ! 4*epsilon
+                                  negsigsq = ppvdwp_table_sp(jtype,itype,2,this_box) ! -(sigma**2)
+                                  negsigbyr2 = negsigsq/rsq
+                                  rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                  rterm = rterm + rterm*rterm
+                                  IF (rsq .GE. const3) THEN
+                                          roffsq_rijsq = const4 - rsq
+                                          eps = &
+                                                  roffsq_rijsq*roffsq_rijsq * &
+                                                  (const1+const2*rsq) * eps
+                                  END IF
+                                  nrg_vdw = nrg_vdw + eps*rterm
+                                END IF
+                          END DO
+                  CASE(vdw_cut_shift_force)
+                          const1 = 36.0_SP*inv_rcut_vdwsq_sp(this_box)
+                          DO i = 1, vlen
+                                rsq = rsq_vec(i)
+                                jtype = cbmc_cell_atomtypes(i,xi,yi,zi,this_box)
+                                IF (rsq < rcutsq) THEN
+                                  eps = ppvdwp_table_sp(jtype,itype,1,this_box) ! 4*epsilon
+                                  negsigsq = ppvdwp_table_sp(jtype,itype,2,this_box) ! -(sigma**2)
+                                  negsigbyr2 = negsigsq/rsq
+                                  rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                  rterm = rterm + rterm*rterm
+                                  nrg_vdw = nrg_vdw + eps * rterm
+                                  negsigbyr2 = negsigsq*inv_rcut_vdwsq_sp(this_box)
+                                  rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                  rterm2 = rterm * rterm
+                                  rterm2 = rterm + 2.0_SP * rterm2
+                                  rterm = rterm + rterm*rterm
+                                  nrg_vdw = nrg_vdw - eps * rterm
+                                  nrg_vdw = nrg_vdw + &
+                                          (SQRT(rsq*const1) - 6.0_SP) * &
+                                          eps * rterm2
+                                  !nrg_vdw = nrg_vdw - &
+                                  !        (SQRT(rsq) - rcut_vdw_sp(this_box)) * &
+                                  !        -6.0_SP * eps * rterm2 * inv_rcut_vdw_sp(this_box)
+                                END IF
+                          END DO
+                  CASE(vdw_cut,vdw_cut_tail)
+                          !DIR$ LOOP COUNT = 40, 48, 56, 64
+                          !DIR$ VECTOR ALIGNED
+                          !$OMP SIMD PRIVATE(eps,negsigsq,negsigbyr2,rterm,rsq,jtype) &
+                          !$OMP REDUCTION(+:nrg_vdw)
+                          DO i = 1, vlen
+                                rsq = rsq_vec(i)
+                                jtype = cbmc_cell_atomtypes(i,xi,yi,zi,this_box)
+                                eps = ppvdwp_table_sp(jtype,itype,1,this_box) ! 4*epsilon
+                                negsigsq = ppvdwp_table_sp(jtype,itype,2,this_box) ! -(sigma**2)
+                                IF (rsq < rcutsq) THEN
+                                  negsigbyr2 = negsigsq/rsq
+                                  rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                  rterm = rterm + rterm*rterm
+                                  ! nrg = eps * rterm
+                                  nrg_vdw = nrg_vdw + eps * rterm
+                                END IF
+                          END DO
+                          !!$OMP END SIMD
+                  END SELECT
+          CASE(vdw_mie)
+                  IF (.NOT. l_nonuniform_exponents) THEN
+                          mie_n = ppvdwp_table2_sp(3,itype,itype,this_box)
+                          mie_m = ppvdwp_table2_sp(4,itype,itype,this_box)
+                  END IF
+                  DO i = 1, vlen
+                        rsq = rsq_vec(i)
+                        jtype = cbmc_cell_atomtypes(i,xi,yi,zi,this_box)
+                        IF (rsq < rcutsq) THEN
+                          lnrsq = LOG(rsq)
+                          epsig_n = ppvdwp_table2_sp(1,jtype,itype,this_box)  ! epsilon * mie_coeff * sigma ** n
+                          epsig_m = ppvdwp_table2_sp(2,jtype,itype,this_box) ! epsilon * mie_coeff * sigma ** m
+                          IF (l_nonuniform_exponents) THEN
+                                  mie_n = ppvdwp_table2_sp(3,jtype,itype,this_box) ! already halved and negative
+                                  mie_m = ppvdwp_table2_sp(4,jtype,itype,this_box) ! already halved and negative
+                          END IF
+                          nrg_vdw = nrg_vdw + epsig_n*EXP(lnrsq*mie_n) - epsig_m*EXP(lnrsq*mie_m)
+                          IF (int_vdw_sum_style(this_box) == vdw_cut_shift) THEN
+                                  nrg_vdw = nrg_vdw - ppvdwp_table_sp(jtype,itype,5,this_box)
+                                  !nrg = nrg + epsig_n * rcut_vdwsq(this_box)**mie_n
+                                  !nrg = nrg - epsig_m * rcut_vdwsq(this_box)**mie_m
+                          END IF
+                        END IF
+                  END DO
+          END SELECT
+          SELECT CASE (this_int_charge_sum_style)
+          CASE(charge_ewald)
+                  alpha_ewald_sp = REAL(alpha_ewald(this_box),SP)
+                  !DIR$ LOOP COUNT = 40, 48, 56, 64
+                  !DIR$ VECTOR ALIGNED
+                  DO i = 1, vlen
+                        rsq = rsq_vec(i)
+                        jcharge = cbmc_cell_rsp(i,4,xi,yi,zi,this_box)
+                        IF (rsq < rcutsq) THEN
+                                invr = 1.0 / SQRT(rsq)
+                                rij = rsq * invr
+                                nrg_qq = nrg_qq + ERFC(alpha_ewald_sp*rij)*invr*jcharge
+                        END IF
+                  END DO
+          CASE(charge_dsf)
+                  dsf_const = -dsf_factor2(this_box)*(rcut_coul(this_box)+dsf_factor1(this_box))
+                  alpha_dsf_sp = REAL(alpha_dsf(this_box),SP)
+                  dsf_factor2_sp = dsf_factor2(this_box)
+                  !DIR$ LOOP COUNT = 40, 48, 56, 64
+                  !DIR$ VECTOR ALIGNED
+                  DO i = 1, vlen
+                        rsq = rsq_vec(i)
+                        IF (rsq < rcutsq) THEN
+                                jcharge = cbmc_cell_rsp(i,4,xi,yi,zi,this_box)
+                                invr = 1.0 / SQRT(rsq)
+                                rij = rsq*invr
+                                nrg = ERFC(alpha_dsf_sp*rij)*invr + dsf_const + &
+                                        rij*dsf_factor2(this_box)
+                                nrg_qq = nrg_qq + jcharge * nrg
+                        END IF
+                  END DO
+          CASE(charge_cut)
+                  !DIR$ LOOP COUNT = 40, 48, 56, 64
+                  !DIR$ VECTOR ALIGNED
+                  DO i = 1, vlen
+                        rsq = rsq_vec(i)
+                        IF (rsq < rcutsq) THEN
+                                jcharge = cbmc_cell_rsp(i,4,xi,yi,zi,this_box)
+                                invr = 1.0 / SQRT(rsq)
+                                nrg_qq = nrg_qq + jcharge*invr
+                        END IF
+                  END DO
+          CASE(charge_sf)
+                  sf_const1 = REAL(2.0_DP/rcut_cbmc(this_box),SP)
+                  sf_const2 = 1.0/rcutsq
+                  !DIR$ LOOP COUNT = 40, 48, 56, 64
+                  !DIR$ VECTOR ALIGNED
+                  DO i = 1, vlen
+                        rsq = rsq_vec(i)
+                        jcharge = cbmc_cell_rsp(i,4,xi,yi,zi,this_box)
+                        IF (rsq < rcutsq) THEN
+                                invr = 1.0/SQRT(rsq)
+                                rij = rsq*invr
+                                nrg = invr - sf_const1
+                                nrg = nrg + rij*sf_const2
+                                nrg_qq = nrg_qq + jcharge*nrg
+                        END IF
+                  END DO
+          END SELECT
+          Compute_Cell_List_CBMC_nrg = nrg_vdw + icharge*nrg_qq
+  END FUNCTION Compute_Cell_List_CBMC_nrg
+
   SUBROUTINE Compute_MoleculePair_Energy(im,is,jm,js,this_box, &
     vlj_pair,vqq_pair,overlap,Eij_qq)
     !***************************************************************************
@@ -1664,9 +4322,9 @@ SUBROUTINE Compute_MoleculePair_Energy(im,is,jm,js,this_box, &
         IF ( .NOT. these_atoms_j(ja)%exist) CYCLE
 
         ! Obtain the minimum image separation
-        rxijp = these_atoms_i(ia)%rxp - these_atoms_j(ja)%rxp
-        ryijp = these_atoms_i(ia)%ryp - these_atoms_j(ja)%ryp
-        rzijp = these_atoms_i(ia)%rzp - these_atoms_j(ja)%rzp
+        rxijp = these_atoms_i(ia)%rp(1) - these_atoms_j(ja)%rp(1)
+        ryijp = these_atoms_i(ia)%rp(2) - these_atoms_j(ja)%rp(2)
+        rzijp = these_atoms_i(ia)%rp(3) - these_atoms_j(ja)%rp(3)
 
         ! Now get the minimum image separation
         CALL Minimum_Image_Separation(this_box,rxijp,ryijp,rzijp,rxij,ryij,rzij)
@@ -1828,8 +4486,8 @@ FUNCTION AtomPair_VdW_Energy_Vector(rijsq,itype,jtype,ibox)
 
   END FUNCTION AtomPair_VdW_Energy_Vector
 
-  SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
-    get_vdw,get_qq,E_intra_vdw,E_intra_qq,E_inter_vdw,E_inter_qq,Eij_qq_o)
+  ELEMENTAL SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
+    get_vdw,get_qq,E_intra_vdw,E_intra_qq,E_inter_vdw,E_inter_qq,Eij_qq_o,excess_flag_o,minimg_flag_o)
 
     ! Computes the vdw and q-q pair energy between atoms ia and ja of molecules
     ! im and jm and species is and js, given their separation rijsq. I have
@@ -1849,17 +4507,19 @@ SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
     !   Compute_AtomPair_Ewald_Real
   !----------------------------------------------------------------------------
     ! Passed to
-    REAL(DP) :: rxij,ryij,rzij,rijsq
-    INTEGER :: is,im,ia,js,jm,ja,ibox
-    LOGICAL :: get_vdw,get_qq
+    REAL(DP), INTENT(IN) :: rxij,ryij,rzij,rijsq
+    INTEGER, INTENT(IN) :: is,im,ia,js,jm,ja
+    LOGICAL, INTENT(IN) :: get_vdw,get_qq
+    LOGICAL, INTENT(IN), OPTIONAL :: excess_flag_o, minimg_flag_o
 
     ! Returned
-    REAL(DP) :: E_intra_vdw,E_intra_qq
-    REAL(DP) :: E_inter_vdw,E_inter_qq
+    REAL(DP), INTENT(OUT) :: E_intra_vdw,E_intra_qq
+    REAL(DP), INTENT(OUT) :: E_inter_vdw,E_inter_qq
+    REAL(DP), INTENT(OUT), OPTIONAL :: Eij_qq_o
 
     ! Local
     ! LJ potential
-    INTEGER :: itype, jtype
+    INTEGER :: itype, jtype, ibox
     REAL(DP) :: rij, rcut_vdw
     REAL(DP) :: eps, sig, Eij_vdw, dEij_dr
     REAL(DP) :: SigByR2, SigByR6, SigByR12
@@ -1871,9 +4531,19 @@ SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
     REAL(DP) :: SigByR_shift, SigByRn_shift, SigByRm_shift
     ! Coulomb potential
     REAL(DP) :: qi, qj, Eij_qq
-    REAL(DP), OPTIONAL :: Eij_qq_o
 
-    LOGICAL :: atom_i_exist, atom_j_exist
+    LOGICAL :: atom_i_exist, atom_j_exist, excess_flag, minimg_flag
+
+    IF (PRESENT(excess_flag_o)) THEN
+            excess_flag = excess_flag_o
+    ELSE
+            excess_flag = .FALSE.
+    END IF
+    IF (PRESENT(excess_flag_o)) THEN
+            minimg_flag = minimg_flag_o
+    ELSE
+            minimg_flag = .FALSE.
+    END IF
 
     E_intra_vdw = 0.0_DP
     E_intra_qq  = 0.0_DP
@@ -1922,9 +4592,15 @@ SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
                    SigByR12 = SigByR6 * SigByR6
 
                    ! use standard LJ potential
-                   Eij_vdw = 4.0_DP * eps * (SigByR12 - SigByR6)
+                   IF (excess_flag .AND. int_vdw_sum_style(ibox) .NE. vdw_cut_switch .AND. .NOT. minimg_flag) THEN
+                           Eij_vdw = 0.0_DP
+                   ELSE
+                           Eij_vdw = 4.0_DP * eps * (SigByR12 - SigByR6)
+                   END IF
 
-                   IF (int_vdw_sum_style(ibox) == vdw_cut_shift) THEN
+                   IF (minimg_flag) THEN
+                           CONTINUE
+                   ELSE IF (int_vdw_sum_style(ibox) == vdw_cut_shift) THEN
                          ! shift the LJ potential
                          SigByR2_shift = sig**2/rcut_vdwsq(ibox)
                          SigByR6_shift = SigByR2_shift * SigByR2_shift * SigByR2_shift
@@ -1942,14 +4618,18 @@ SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
                            roffsq_rijsq_sq = roffsq_rijsq * roffsq_rijsq
                            factor2 = switch_factor2(ibox) + 2.0_DP * rijsq
                            fscale = roffsq_rijsq_sq * factor2 * switch_factor1(ibox)
-                           Eij_vdw = fscale * Eij_vdw
                          ELSE
                            fscale = 0.0_DP
-                           Eij_vdw = 0.0_DP
                          END IF
+                         IF (excess_flag) fscale = fscale - 1.0_DP
+                         Eij_vdw = fscale * Eij_vdw
                    ELSE IF (int_vdw_sum_style(ibox) == vdw_charmm) THEN
                          ! use the form for modified LJ potential
-                         Eij_vdw = eps * (SigByR12 - 2.0_DP * SigByR6)
+                         IF (excess_flag) THEN
+                                 Eij_vdw = eps * (2.0_DP * SigByR6 - 3.0_DP * SigByR12)
+                         ELSE
+                                 Eij_vdw = eps * (SigByR12 - 2.0_DP * SigByR6)
+                         END IF
                    ELSE IF (int_vdw_sum_style(ibox) == vdw_cut_shift_force) THEN
                          ! apply the shifted-force LJ potential
                          ! u_sf(r) = u_lj(r) - u_lj(rc) - (r-rc)*du_lj/dr(rc)
@@ -1986,11 +4666,14 @@ SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
                    rcut_vdw = SQRT(rcut_vdwsq(ibox))
 
                    mie_coeff = mie_n/(mie_n-mie_m) * (mie_n/mie_m)**(mie_m/(mie_n-mie_m))
-
-                   SigByR = sig/rij
-                   SigByRn = SigByR ** mie_n
-                   SigByRm = SigByR ** mie_m
-                   Eij_vdw =  mie_coeff * eps * (SigByRn - SigByRm)
+                   IF (excess_flag) THEN
+                           Eij_vdw = 0.0_DP
+                   ELSE
+                           SigByR = sig/rij
+                           SigByRn = SigByR ** mie_n
+                           SigByRm = SigByR ** mie_m
+                           Eij_vdw =  mie_coeff * eps * (SigByRn - SigByRm)
+                   END IF
                    !use cut-shift potential
                    IF (int_vdw_sum_style(ibox) == vdw_cut_shift) THEN
                          SigByR_shift = sig/rcut_vdw
@@ -2017,21 +4700,30 @@ SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
               qj = nonbond_list(ja,js)%charge
 
 
-              IF (int_charge_sum_style(ibox) == charge_ewald .AND. &
-                      ( .NOT. igas_flag) ) THEN
+              IF (cbmc_flag .AND. cbmc_charge_sf_flag  .AND. .NOT. (igas_flag .OR. minimg_flag)) THEN
+                      IF (is == js .AND. im == jm) THEN
+                              E_intra_qq = E_intra_qq +&
+                                     charge_factor*qi*qj* &
+                                     (charge_intra_scale(ia,ja,is)/SQRT(rijsq) - 2.0_DP/rcut_cbmc(ibox) + SQRT(rijsq)/rcut_cbmcsq(ibox))
+                      ELSE
+                              E_intra_qq = E_intra_qq + &
+                                      charge_factor*qi*qj*(1.0_DP/SQRT(rijsq) - 2.0_DP/rcut_cbmc(ibox) + SQRT(rijsq)/rcut_cbmcsq(ibox))
+                      END IF
+              ELSEIF (int_charge_sum_style(ibox) == charge_ewald .AND. &
+                      .NOT. (igas_flag .OR. minimg_flag) ) THEN
                    ! Real space Ewald part
                    CALL Compute_AtomPair_Ewald_Real(ia,im,is,qi,ja,jm,js,qj, &
-                        rijsq,E_intra_qq,E_inter_qq,ibox,Eij_qq_o)
+                        rijsq,E_intra_qq,E_inter_qq,ibox,Eij_qq_o,excess_flag_o)
 
                    ! self and reciprocal parts need to be computed as total energy
                    ! differences between original configuration and the perturbed
                    ! configuration.
 
-              ELSEIF (int_charge_sum_style(ibox) == charge_dsf) THEN
-                   CALL Compute_AtomPair_DSF_Energy(ia,im,is,qi,ja,jm,js,qj,rijsq,E_intra_qq,E_inter_qq,ibox,Eij_qq_o)
+              ELSEIF (int_charge_sum_style(ibox) == charge_dsf .AND. .NOT. (igas_flag .OR. minimg_flag)) THEN
+                   CALL Compute_AtomPair_DSF_Energy(ia,im,is,qi,ja,jm,js,qj,rijsq,E_intra_qq,E_inter_qq,ibox,Eij_qq_o,excess_flag_o)
 
-              ELSEIF (int_charge_sum_style(ibox) == charge_cut .OR. &
-                  int_charge_sum_style(ibox) == charge_minimum .OR. igas_flag) THEN
+              ELSEIF ((int_charge_sum_style(ibox) == charge_cut .OR. &
+                  int_charge_sum_style(ibox) == charge_minimum .OR. igas_flag .OR. minimg_flag) .AND. .NOT. excess_flag) THEN
 
                    Eij_qq = charge_factor*(qi*qj)/SQRT(rijsq)
                    ! Apply charge scaling for intramolecular energies
@@ -2052,18 +4744,100 @@ SUBROUTINE Compute_AtomPair_Energy(rxij,ryij,rzij,rijsq,is,im,ia,js,jm,ja, &
 
   END SUBROUTINE Compute_AtomPair_Energy
 
+REAL(DP) FUNCTION Excess_Molecule_Intrafragment_Energy(im,is,ibox)
+        ! This function returns the intrafragment, intramolecular nonbonded energy of a molecule
+        !     minus the intrafragment nonbonded energy that would be computed with simple minimum image
+        !     vdw/charge summation styles (the styles used by fragment library generation simulations).
+        INTEGER, INTENT(IN) :: im, is, ibox
+        INTEGER :: ifrag, ifrag_natoms, ia, ia_frag, ja, ja_frag, maxatom, minatom
+        REAL(DP) :: Eij_intra_vdw, Eij_intra_qq, Eij_inter_vdw, Eij_inter_qq, Etot, rijsq
+        REAL(DP), DIMENSION(3) :: irp, drp
+        LOGICAL :: get_vdw_base, get_vdw, get_qq, get_qq_base
+        TYPE(Atom_Class), DIMENSION(:), POINTER :: these_atoms
+        LOGICAL(1), DIMENSION(natoms(is),natoms(is)) :: already_computed
+        INTEGER(INT8), DIMENSION(natoms(is),natoms(is)) :: n_repeats
+        get_vdw_base = ALL(int_vdw_sum_style(ibox) .NE. (/ vdw_cut, vdw_cut_tail, vdw_none /))
+        get_qq_base = ALL(int_charge_sum_style(ibox) .NE. (/ charge_cut, charge_minimum, charge_none /)) .AND. has_charge(is)
+        Excess_Molecule_Intrafragment_Energy = 0.0_DP
+        IF (scale_1_2_vdw(is) < tiny_number .AND. scale_1_2_charge(is) < tiny_number .AND. .NOT. (get_vdw_base .OR. get_qq_base)) RETURN
+        get_qq = .FALSE.
+        already_computed = .FALSE.
+        n_repeats = 0_INT8
+        IF (widom_active) THEN
+                these_atoms => widom_atoms
+        ELSE
+                these_atoms => atom_list(1:natoms(is),im,is)
+        END IF
+        Etot = 0.0_DP
+        DO ifrag = 1, species_list(is)%nfragments
+                ifrag_natoms = frag_list(ifrag,is)%natoms
+                DO ia_frag = 1, ifrag_natoms
+                        ia = frag_list(ifrag,is)%atoms(ia_frag)
+                        irp = these_atoms(ia)%rp
+                        DO ja_frag = ia_frag+1, ifrag_natoms
+                                ja = frag_list(ifrag,is)%atoms(ja_frag)
+                                maxatom = MAX(ia,ja)
+                                minatom = MIN(ia,ja)
+                                IF (already_computed(maxatom,minatom)) THEN
+                                        n_repeats(maxatom,minatom) = n_repeats(maxatom,minatom) + 1_INT8
+                                        CYCLE
+                                END IF
+                                already_computed(maxatom,minatom) = .TRUE.
+                                get_vdw = get_vdw_base .AND. vdw_intra_scale(ja,ia,is) > 0.0_DP
+                                IF (get_qq_base) get_qq = ALL(nonbond_list((/ minatom,maxatom /),is)%charge .NE. 0.0_DP)
+                                IF (.NOT. (get_vdw .OR. get_qq)) CYCLE
+                                drp = these_atoms(ja)%rp - irp
+                                rijsq = DOT_PRODUCT(drp,drp)
+                                CALL Compute_AtomPair_Energy(drp(1),drp(2),drp(3),rijsq,is,im,ia,is,im,ja, &
+                                        get_vdw,get_qq, Eij_intra_vdw, Eij_intra_qq, Eij_inter_vdw, Eij_inter_qq, &
+                                        excess_flag_o=.TRUE., minimg_flag_o=.FALSE.)
+                                Etot = Etot + Eij_intra_vdw + Eij_intra_qq + Eij_inter_vdw + Eij_inter_qq
+                        END DO
+                END DO
+        END DO
+        IF (scale_1_2_vdw(is) > tiny_number .OR. (scale_1_2_charge(is) > tiny_number .AND. has_charge(is))) THEN
+                DO ia = 1, natoms(is)
+                        DO ja = ia+1, natoms(is)
+                                IF (n_repeats(ja,ia) > 0_INT8) THEN
+                                        get_vdw = vdw_intra_scale(ja,ia,is) > 0.0_DP
+                                        get_qq = charge_intra_scale(ja,ia,is) > 0.0_DP
+                                        IF (.NOT. (get_vdw .OR. get_qq)) CYCLE
+                                        drp = these_atoms(ja)%rp - these_atoms(ia)%rp
+                                        rijsq = DOT_PRODUCT(drp,drp)
+                                        CALL Compute_AtomPair_Energy(drp(1),drp(2),drp(3),rijsq,is,im,ia,is,im,ja, &
+                                                get_vdw,get_qq, Eij_intra_vdw, Eij_intra_qq, Eij_inter_vdw, Eij_inter_qq, &
+                                                excess_flag_o=.FALSE., minimg_flag_o=.TRUE.)
+                                        Etot = Etot - n_repeats(ja,ia)*(Eij_intra_vdw + Eij_intra_qq + Eij_inter_vdw + Eij_inter_qq)
+                                END IF
+                        END DO
+                END DO
+        END IF
+        Excess_Molecule_Intrafragment_Energy = Etot
+END FUNCTION Excess_Molecule_Intrafragment_Energy
 
-SUBROUTINE Compute_AtomPair_DSF_Energy(ia,im,is,qi,ja,jm,js,qj,rijsq,E_intra_qq,E_inter_qq,ibox,Eij_qq_o)
+ELEMENTAL SUBROUTINE Compute_AtomPair_DSF_Energy(ia,im,is,qi,ja,jm,js,qj,rijsq,E_intra_qq,E_inter_qq,ibox,Eij_qq_o,excess_flag_o)
 USE Global_Variables
 IMPLICIT NONE
-INTEGER :: ia,im,is,ja,jm,js,ibox
-REAL(DP), OPTIONAL :: Eij_qq_o
-REAL(DP) :: qi,qj,rijsq,rij, Eij, qsc, E_intra_qq,E_inter_qq, cfqq, Eij_qq
+INTEGER, INTENT(IN) :: ia,im,is,ja,jm,js,ibox
+REAL(DP), INTENT(OUT), OPTIONAL :: Eij_qq_o
+LOGICAL, INTENT(IN), OPTIONAL :: excess_flag_o
+REAL(DP), INTENT(IN) :: qi,qj,rijsq
+REAL(DP), INTENT(OUT) :: E_intra_qq, E_inter_qq
+REAL(DP) :: rij, Eij, qsc, cfqq, Eij_qq
 
 
       rij = SQRT(rijsq)
       cfqq = qi*qj*charge_factor
-      Eij = dsf_factor2(ibox)*(rij-rcut_coul(ibox)) - dsf_factor1(ibox) + erfc(alpha_dsf(ibox)*rij)/rij
+      Eij = dsf_factor2(ibox)*(rij-rcut_coul(ibox)) - dsf_factor1(ibox)
+      IF (PRESENT(excess_flag_o)) THEN
+              IF (excess_flag_o) THEN
+                      Eij = Eij - ERF(alpha_dsf(ibox)*rij)/rij
+                      E_inter_qq = 0.0_DP
+                      E_intra_qq = Eij*cfqq
+                      RETURN
+              END IF
+      END IF
+      Eij = Eij + erfc(alpha_dsf(ibox)*rij)/rij
       Eij = Eij*cfqq
 
       IF (is==js .AND. im==jm) THEN
@@ -2085,8 +4859,8 @@ END SUBROUTINE Compute_AtomPair_DSF_Energy
 
   !-----------------------------------------------------------------------------
 
-  SUBROUTINE Compute_AtomPair_Ewald_Real(ia,im,is,qi,ja,jm,js,qj,rijsq, &
-    E_intra_qq,E_inter_qq,ibox,Eij_qq_o)
+  ELEMENTAL SUBROUTINE Compute_AtomPair_Ewald_Real(ia,im,is,qi,ja,jm,js,qj,rijsq, &
+    E_intra_qq,E_inter_qq,ibox,Eij_qq_o,excess_flag_o)
   !-----------------------------------------------------------------------------
     ! Real space part of the Ewald sum between atoms ia and ja with
     ! charges qi and qj.
@@ -2102,14 +4876,16 @@ SUBROUTINE Compute_AtomPair_Ewald_Real(ia,im,is,qi,ja,jm,js,qj,rijsq, &
     ! Compute_AtomPair_Energy
   !-----------------------------------------------------------------------------
     ! Arguments
-    INTEGER :: ia,im,is
-    REAL(DP) :: qi
-    INTEGER :: ja,jm,js
-    REAL(DP) :: qj
-    REAL(DP) :: rijsq
-    REAL(DP) :: E_intra_qq, E_inter_qq, Eij_qq
-    INTEGER :: ibox
-    REAL(DP), OPTIONAL :: Eij_qq_o
+    INTEGER, INTENT(IN) :: ia,im,is
+    REAL(DP), INTENT(IN) :: qi
+    INTEGER, INTENT(IN) :: ja,jm,js
+    REAL(DP), INTENT(IN) :: qj
+    REAL(DP), INTENT(IN) :: rijsq
+    REAL(DP), INTENT(OUT) :: E_intra_qq, E_inter_qq
+    REAL(DP) :: Eij_qq
+    INTEGER, INTENT(OUT) :: ibox
+    REAL(DP), INTENT(OUT), OPTIONAL :: Eij_qq_o
+    LOGICAL, INTENT(IN), OPTIONAL :: excess_flag_o
 
     ! Local variables
     REAL(DP) :: rij,erf_val
@@ -2117,9 +4893,16 @@ SUBROUTINE Compute_AtomPair_Ewald_Real(ia,im,is,qi,ja,jm,js,qj,rijsq, &
     ibox = molecule_list(im,is)%which_box
 
     rij = SQRT(rijsq)
+    Eij_qq = qi * qj / rij * charge_factor
+    IF (PRESENT(excess_flag_o)) THEN
+            IF (excess_flag_o) THEN
+                    E_inter_qq = -Eij_qq*ERF(alpha_ewald(ibox)*rij)
+                    E_intra_qq = 0.0_DP
+                    RETURN
+            END IF
+    END IF
     ! May need to protect against very small rijsq
     erf_val = 1.0_DP - erfc(alpha_ewald(ibox) * rij)
-    Eij_qq = qi * qj / rij * charge_factor
 
     ! Minimum image real space energy
     IF (is == js .AND. im == jm) THEN
@@ -2127,198 +4910,367 @@ SUBROUTINE Compute_AtomPair_Ewald_Real(ia,im,is,qi,ja,jm,js,qj,rijsq, &
        E_intra_qq = charge_intra_scale(ia,ja,is) * Eij_qq
        E_inter_qq = 0.0_DP
     ELSE
-       E_intra_qq = 0.0_DP
-       E_inter_qq = Eij_qq
-    ENDIF
-
-    ! Periodic image real space energy
-    E_inter_qq = E_inter_qq - erf_val * Eij_qq
-
-    IF (PRESENT(Eij_qq_o)) Eij_qq_o = Eij_qq
-
-
-  !-----------------------------------------------------------------------------
-  CONTAINS
-
-    FUNCTION erfc(x)
-      !*************************************************************************
-      !
-      ! Calculate the complementary error function for a number
-      !
-      !*************************************************************************
-
-      REAL(DP) :: erfc
-      REAL(DP), PARAMETER :: A1 = 0.254829592_DP, A2 = -0.284496736_DP
-      REAL(DP), PARAMETER :: A3 = 1.421413741_DP, A4 = -1.453152027_DP
-      REAL(DP), PARAMETER :: A5 = 1.061405429_DP, P = 0.3275911_DP
-      REAL(DP) :: T, x, xsq, TP
-
-      T = 1.0_DP / (1.0_DP + P*x)
-      xsq = x*x
-
-      TP = T * (A1 + T * (A2 + T * (A3 + T * (A4 + T * A5))))
-
-      erfc = TP * EXP(-xsq)
-
-    END FUNCTION erfc
-  !-----------------------------------------------------------------------------
-
-  END SUBROUTINE Compute_AtomPair_Ewald_Real
-
-  !*****************************************************************************
-
-  SUBROUTINE Ewald_Reciprocal_Lattice_Vector_Setup(this_box)
-    !***************************************************************************
-    ! This subroutine sets up the reciprocal lattice vector constants required in the reciprocal
-    ! space summation. Note that these constants need to be recomputed every time a volume
-    ! change move is attempted.
-    ! Based on the APSS code, ewald_setup.f90
-    !
-    ! Added by Jindal Shah on 12/05/07
-    !
-    !***************************************************************************
-
-    USE Type_Definitions
-    USE Global_Variables
-
-    IMPLICIT NONE
-
-    INTEGER :: nx, ny, nz, this_box, kvecs, kx_max, ky_max, kz_max
-
-    REAL(DP) :: const_val, hcutsq, x, y, z
-    REAL(DP) :: hx_val, hy_val, hz_val, hsq_val
-
-    ! Total number of k vectors
-    kvecs = 1
-
-    const_val = 1.0_DP/(4.0_DP * alpha_ewald(this_box) * alpha_ewald(this_box))
-    hcutsq = h_ewald_cut(this_box) * h_ewald_cut(this_box)
-
-    IF (box_list(this_box)%int_box_shape == int_cell .OR. box_list(this_box)%int_box_shape == int_ortho) THEN
-
-       ! The most general definition for a wave-vector is h = 2*pi*TRANSPOSE(cell_matrix)^-1)*n
-       ! where h is the wave vector and n is a vector of integers
-
-       ! We will use symmetry about the x to calculate only half the wave vectors
-
-       DO nz = -20, 20
-          DO ny = -20, 20
-             DO nx = 0, 20
-
-                ! Exclude the possiblity for the central  simulation box where h = 0
-
-                IF ( (nx == 0) .AND. (ny == 0) .AND. (nz == 0) ) CYCLE
-
-                x = REAL(nx,DP)
-                y = REAL(ny,DP)
-                z = REAL(nz,DP)
-
-                hx_val = twoPI * (box_list(this_box)%length_inv(1,1)*x + &
-                     box_list(this_box)%length_inv(2,1)*y + box_list(this_box)%length_inv(3,1)*z)
-                hy_val = twoPI * (box_list(this_box)%length_inv(1,2)*x + &
-                     box_list(this_box)%length_inv(2,2)*y + box_list(this_box)%length_inv(3,2)*z)
-                hz_val = twoPI * (box_list(this_box)%length_inv(1,3)*x + &
-                     box_list(this_box)%length_inv(2,3)*y + box_list(this_box)%length_inv(3,3)*z)
-
-                hsq_val = hx_val * hx_val + hy_val * hy_val + hz_val * hz_val
-
-                IF (hsq_val < hcutsq) THEN
-
-                   hx(kvecs,this_box) = hx_val
-                   hy(kvecs,this_box) = hy_val
-                   hz(kvecs,this_box) = hz_val
-                   hsq(kvecs,this_box) = hsq_val
-
-                   ! if x /= 0, multipy the constant by 2 for symmetry
-
-                   IF ( nx == 0 ) THEN
-
-                      Cn(kvecs,this_box) = twoPI / box_list(this_box)%volume &
-                           * DEXP ( -hsq(kvecs,this_box) * const_val ) / hsq(kvecs,this_box)
-
-                   ELSE
-
-                      Cn(kvecs,this_box) = 2.0_DP * twoPI / box_list(this_box)%volume &
-                           * DEXP ( -hsq(kvecs,this_box) * const_val ) / hsq(kvecs,this_box)
-
-                   END IF
-
-                   kvecs = kvecs + 1
-
-                END IF
-
-             END DO
-          END DO
-       END DO
-
-    ELSE
-
-       ! if it is an orthogonal box, then h vectors are simply hx = twoPI * nx / Lx and so on
-
-       ! we will determine the number of reciprocal space vectors needed in each direction
-
-       kz_max = INT ( (h_ewald_cut(this_box) * box_list(this_box)%basis_length(3))/twoPI ) + 1
-       ky_max = INT ( (h_ewald_cut(this_box) * box_list(this_box)%basis_length(2))/twoPI ) + 1
-       kx_max = INT ( (h_ewald_cut(this_box) * box_list(this_box)%basis_length(1))/twoPI ) + 1
+       E_intra_qq = 0.0_DP
+       E_inter_qq = Eij_qq
+    ENDIF
 
+    ! Periodic image real space energy
+    E_inter_qq = E_inter_qq - erf_val * Eij_qq
 
-       DO nz = -kz_max, kz_max
-          DO ny = -ky_max, ky_max
-             DO nx = 0, kx_max
-                IF ( kvecs > maxk) THEN
-                   err_msg = ""
-                   err_msg(1) = 'Total number of k vectors exceeded'
-                   CALL Clean_Abort(err_msg,'Ewald_Reciprocal_Lattice_Vector_Setup')
-                END IF
+    IF (PRESENT(Eij_qq_o)) Eij_qq_o = Eij_qq
 
-                IF ( (nx == 0) .AND. (ny == 0) .AND. (nz == 0)) CYCLE
 
-                hx_val = twoPI * REAL(nx,DP)/box_list(this_box)%basis_length(1)
-                hy_val = twoPI * REAL(ny,DP)/box_list(this_box)%basis_length(2)
-                hz_val = twoPI * REAL(nz,DP)/box_list(this_box)%basis_length(3)
+  !-----------------------------------------------------------------------------
+  CONTAINS
 
-                hsq_val = hx_val * hx_val + hy_val * hy_val + hz_val * hz_val
+    ELEMENTAL FUNCTION erfc(x)
+      !*************************************************************************
+      !
+      ! Calculate the complementary error function for a number
+      !
+      !*************************************************************************
 
-                IF (hsq_val < hcutsq ) THEN
+      REAL(DP) :: erfc
+      REAL(DP), PARAMETER :: A1 = 0.254829592_DP, A2 = -0.284496736_DP
+      REAL(DP), PARAMETER :: A3 = 1.421413741_DP, A4 = -1.453152027_DP
+      REAL(DP), PARAMETER :: A5 = 1.061405429_DP, P = 0.3275911_DP
+      REAL(DP), INTENT(IN) :: x
+      REAL(DP) :: T, xsq, TP
 
-                   hx(kvecs,this_box) = hx_val
-                   hy(kvecs,this_box) = hy_val
-                   hz(kvecs,this_box) = hz_val
-                   hsq(kvecs,this_box) = hsq_val
+      T = 1.0_DP / (1.0_DP + P*x)
+      xsq = x*x
 
-                   !hsq(kvecs,this_box) = hx(kvecs,this_box)*hx(kvecs,this_box) + &
-                   ! hy(kvecs,this_box)*hy(kvecs,this_box) + hz(kvecs,this_box)*hz(kvecs,this_box)
+      TP = T * (A1 + T * (A2 + T * (A3 + T * (A4 + T * A5))))
 
-                   ! if x /= 0, multipy the constant by 2 for symmetry
-                   IF ( nx == 0 ) THEN
+      erfc = TP * EXP(-xsq)
 
-                      Cn(kvecs,this_box) = twoPI / box_list(this_box)%volume &
-                           * DEXP ( - hsq(kvecs,this_box) * const_val ) / hsq(kvecs,this_box)
+    END FUNCTION erfc
+  !-----------------------------------------------------------------------------
 
-                   ELSE
+  END SUBROUTINE Compute_AtomPair_Ewald_Real
 
-                      Cn(kvecs,this_box) = 2.0_DP * twoPI / box_list(this_box)%volume &
-                           * DEXP ( - hsq(kvecs,this_box) * const_val ) / hsq(kvecs,this_box)
+  !*****************************************************************************
 
-                   END IF
 
-                   kvecs = kvecs + 1
+  ELEMENTAL SUBROUTINE Extract_Kvector_Ints(kxyz,kx,ky,kz)
+          !DIR$ ATTRIBUTES FORCEINLINE :: Extract_Kvector_Ints
+          INTEGER, INTENT(IN) :: kxyz
+          INTEGER, INTENT(OUT) :: kx, ky, kz
+          INTEGER, PARAMETER :: tmask = MASKR(11)
+          ky = SHIFTR(kxyz,11)
+          kz = SHIFTR(kxyz,22)
+          kx = IAND(kxyz,tmask)
+          kx = kx - kxyz_max_allowed
+          ky = IAND(ky,tmask)
+          ky = ky - kxyz_max_allowed
+  END SUBROUTINE Extract_Kvector_Ints
 
-                END IF
+  SUBROUTINE Ewald_Reciprocal_Lattice_Vector_Setup(this_box)
+    !***************************************************************************
+    ! This subroutine sets up the reciprocal lattice vector constants required in the reciprocal
+    ! space summation. Note that these constants need to be recomputed every time a volume
+    ! change move is attempted.
+    ! Based on the APSS code, ewald_setup.f90
+    !
+    ! Added by Jindal Shah on 12/05/07
+    !
+    !***************************************************************************
 
-             END DO
-          END DO
-       END DO
+    USE Type_Definitions
+    USE Global_Variables
 
-    END IF
+    IMPLICIT NONE
 
+    INTEGER :: nx, ny, nz, this_box, kvecs, kvecs_p4
 
+    REAL(DP) :: const_val, const_val_2, hcutsq, x, y, z
+    REAL(DP) :: hx, hy, hz, hsq
+    INTEGER :: i
+    LOGICAL :: l_inrange
+    LOGICAL, DIMENSION(:), ALLOCATABLE, SAVE :: l_inrange_vec
+    INTEGER :: nchecks
+    LOGICAL :: l_ortho
+    REAL(DP) :: prefactor_constant, double_prefactor_constant
+    REAL(SP) :: hcutsq_sp
+    INTEGER :: kxyz_max(3)
+    INTEGER(INT64) :: kxyz_max_int64(3), nchecks_int64
+    REAL(DP), DIMENSION(3) :: inv_l, inv_lsq
+    REAL(DP), DIMENSION(3,3) :: inv_H
+    REAL(SP), DIMENSION(3) :: inv_lsq_sp
+    REAL(SP), DIMENSION(3,3) :: inv_H_sp
+    INTEGER :: tcount
+    REAL(DP) :: prefactor, factor1, factor2
+    INTEGER :: kx, ky, kz, kxyz, kx_shifted, ky_shifted1, ky_shifted2, kz_shifted
+    REAL(SP) :: xsq_sp,ysq_sp,zsq_sp,x_sp,y_sp,z_sp,hx_sp,hy_sp,hz_sp
+    INTEGER :: ky_dimfactor, kz_dimfactor, ky_dimshift, kx_dimshift 
+    INTEGER :: kx_shifted_start, ky_shifted_start, kx_shifted_end, ky_shifted_end
+    INTEGER :: shiftconst
+    INTEGER :: ithread, chunkstart, chunkend, chunksize, nthreads
+
+    ky_dimfactor = kxyz_max(1)*2+1
+    kz_dimfactor = ky_dimfactor*(kxyz_max(2)*2+1)
+    ky_dimshift = kxyz_max(2)-kxyz_max_allowed
+    kx_dimshift = kxyz_max(1)-kxyz_max_allowed + 1
+    kx_shifted_start = kxyz_max_allowed - kxyz_max(1)
+    ky_shifted_start = kxyz_max_allowed - kxyz_max(2)
+    kx_shifted_end = kxyz_max_allowed + kxyz_max(1)
+    ky_shifted_end = kxyz_max_allowed + kxyz_max(2)
+
+    l_ortho = box_list(this_box)%int_box_shape <= int_ortho
+    prefactor_constant = twoPI/box_list(this_box)%volume
+    double_prefactor_constant = 2.0_DP*prefactor_constant
 
-    ! nvecs points to where the next wave_vector should be written, i. e. it is too high by 1
-    nvecs(this_box) = kvecs - 1
-    ! Note that at this point we do not allocate the memory for cos_sum and sin_sum arrays.
-    ! It will have to be decided by the maximum number of k vectors encountered in all the boxes
+    ! Total number of k vectors
+    !kvecs = 1
 
+    !const_val = 1.0_DP/(4.0_DP * alpha_ewald(this_box) * alpha_ewald(this_box))
+    const_val_2 = -0.5_DP/(alpha_ewald(this_box) * alpha_ewald(this_box))
+    const_val = const_val_2*0.5_DP
+    hcutsq = h_ewald_cut(this_box) * h_ewald_cut(this_box)
+    hcutsq_sp = REAL(hcutsq,SP)
+
+    kxyz_max = INT( h_ewald_cut(this_box) / (twoPI * box_list(this_box)%invT_face_distance)) + 1
+    IF (ANY(kxyz_max>kxyz_max_allowed)) THEN
+            WRITE(logunit,*) kxyz_max
+            WRITE(*,*) kxyz_max
+            err_msg = ""
+            err_msg(1) = "At least one of the above kspace vector integer component maxima"
+            err_msg(2) = "exceeds the maximum allowed value, " // TRIM(Int_To_String(kxyz_max_allowed))
+            CALL Clean_Abort(err_msg,'Ewald_Reciprocal_Lattice_Vector_Setup')
+    END IF
+    kxyz_max_int64 = INT(kxyz_max,INT64)
+    box_list(this_box)%kxyz_maxmax = MAXVAL(kxyz_max)
+    box_list(this_box)%kxyz_max = kxyz_max
+    nchecks_int64 = PRODUCT((/2_INT64,2_INT64,1_INT64/)*kxyz_max_int64+1_INT64)
+    IF (nchecks_int64 >= HUGE(nchecks)) THEN
+            err_msg = ""
+            err_msg(1) = 'Too many kspace vectors to check'
+            CALL Clean_Abort(err_msg,'Ewald_Reciprocal_Lattice_Vector_Setup')
+    END IF
+    nchecks = INT(nchecks_int64,INT32) ! PRODUCT((/2,2,1/)*kxyz_max+1)
+    IF (l_ortho) THEN
+            inv_l(1) = twoPI/box_list(this_box)%length(1,1)
+            inv_l(2) = twoPI/box_list(this_box)%length(2,2)
+            inv_l(3) = twoPI/box_list(this_box)%length(3,3)
+            inv_lsq = inv_l*inv_l
+            inv_lsq_sp = REAL(inv_lsq,SP)
+    ELSE
+            inv_H = twoPI*box_list(this_box)%length_inv
+            inv_H_sp = REAL(inv_H,SP)
+    END IF
+    kvecs = -1 ! exclude central box
+    IF (ALLOCATED(box_list(this_box)%kspace_vector_ints)) THEN
+            IF (UBOUND(box_list(this_box)%kspace_vector_ints,1) < nchecks) DEALLOCATE(box_list(this_box)%kspace_vector_ints)
+    END IF
+    IF (.NOT. ALLOCATED(box_list(this_box)%kspace_vector_ints)) THEN
+            ALLOCATE(box_list(this_box)%kspace_vector_ints(nchecks))
+    END IF
+    IF (ALLOCATED(l_inrange_vec)) THEN
+            IF (UBOUND(l_inrange_vec,1) < nchecks) DEALLOCATE(l_inrange_vec)
+    END IF
+    IF (.NOT. ALLOCATED(l_inrange_vec)) THEN
+            ALLOCATE(l_inrange_vec(nchecks))
+    END IF
+    ky_dimfactor = kxyz_max(1)*2+1
+    kz_dimfactor = ky_dimfactor*(kxyz_max(2)*2+1)
+    ky_dimshift = kxyz_max(2)-kxyz_max_allowed
+    kx_dimshift = kxyz_max(1)-kxyz_max_allowed + 1
+    kx_shifted_start = kxyz_max_allowed - kxyz_max(1)
+    ky_shifted_start = kxyz_max_allowed - kxyz_max(2)
+    kx_shifted_end = kxyz_max_allowed + kxyz_max(1)
+    ky_shifted_end = kxyz_max_allowed + kxyz_max(2)
+    ! The most general definition for a wave-vector is h = 2*pi*TRANSPOSE(cell_matrix)^-1)*n
+    ! where h is the wave vector and n is a vector of integers
+
+    ! We will use symmetry to calculate only half the wave vectors
+
+    !$OMP PARALLEL PRIVATE(kz,ky,kx,kxyz,x,y,z,hx,hy,hz,hsq,l_inrange,i) &
+    !$OMP PRIVATE(prefactor,factor1,factor2) &
+    !$OMP PRIVATE(xsq_sp,ysq_sp,zsq_sp,x_sp,y_sp,z_sp,hx_sp,hy_sp,hz_sp) &
+    !$OMP PRIVATE(kx_shifted,ky_shifted1,kz_shifted,ky_shifted2,shiftconst) &
+    !$OMP PRIVATE(ithread,chunkstart,chunkend)
+    !$OMP DO SCHEDULE(STATIC)
+    DO ky_shifted1 = ky_shifted_start, ky_shifted_end
+        ky_shifted2 = SHIFTL(ky_shifted1,11)
+        shiftconst = (ky_shifted1 + ky_dimshift)*ky_dimfactor + kx_dimshift
+        !$OMP SIMD
+        DO kx_shifted = kx_shifted_start, kx_shifted_end
+                box_list(this_box)%kspace_vector_ints(shiftconst+kx_shifted) = IOR(kx_shifted,ky_shifted2)
+        END DO
+        !$OMP END SIMD
+    END DO
+    !$OMP END DO
+    !$OMP DO SCHEDULE(STATIC)
+    DO kz = 1, kxyz_max(3)
+        kz_shifted = SHIFTL(kz,22)
+        shiftconst = kz*kz_dimfactor
+        !$OMP SIMD
+        DO i = 1, kz_dimfactor
+                box_list(this_box)%kspace_vector_ints(shiftconst+i) = &
+                        IOR(box_list(this_box)%kspace_vector_ints(i),kz_shifted)
+        END DO
+        !$OMP END SIMD
+        !box_list(this_box)%kspace_vector_ints(kz*kz_dimfactor+1:(kz+1)*kz_dimfactor) = &
+        !        IOR(box_list(this_box)%kspace_vector_ints(:kz_dimfactor),kz_shifted)
+    END DO
+    !$OMP END DO
+    IF (l_ortho) THEN
+            ! if it is an orthogonal box, then h vectors are simply hx = twoPI * kx / Lx and so on
+            !DIR$ VECTOR ALIGNED
+            !$OMP DO SIMD SCHEDULE(SIMD:STATIC) &
+            !$OMP PRIVATE(kxyz,kx,ky,kz,xsq_sp,ysq_sp,zsq_sp,l_inrange) &
+            !$OMP REDUCTION(+:kvecs)
+            DO i = 1, nchecks
+                kxyz = box_list(this_box)%kspace_vector_ints(i)
+                CALL Extract_Kvector_Ints(kxyz,kx,ky,kz) ! Should be inlined by compiler
+                kx = kx*kx
+                ky = ky*ky
+                kz = kz*kz
+                xsq_sp = REAL(kx,SP)
+                ysq_sp = REAL(ky,SP)
+                zsq_sp = REAL(kz,SP)
+                l_inrange = inv_lsq_sp(1)*xsq_sp + inv_lsq_sp(2)*ysq_sp + inv_lsq_sp(3)*zsq_sp < hcutsq_sp
+                l_inrange_vec(i) = l_inrange
+                IF (l_inrange) kvecs = kvecs + 1
+            END DO
+            !$OMP END DO SIMD
+    ELSE
+            !DIR$ VECTOR ALIGNED
+            !$OMP DO SIMD SCHEDULE(SIMD:STATIC) &
+            !$OMP PRIVATE(kxyz,kx,ky,kz,x_sp,y_sp,z_sp,hx_sp,hy_sp,hz_sp,l_inrange) &
+            !$OMP REDUCTION(+:kvecs)
+            DO i = 1, nchecks
+                kxyz = box_list(this_box)%kspace_vector_ints(i)
+                CALL Extract_Kvector_Ints(kxyz,kx,ky,kz) ! Should be inlined by compiler
+                x_sp = REAL(kx,SP)
+                y_sp = REAL(ky,SP)
+                z_sp = REAL(kz,SP)
+                hx_sp = inv_H_sp(1,1)*x_sp + inv_H_sp(2,1)*y_sp + inv_H_sp(3,1)*z_sp
+                hy_sp = inv_H_sp(1,2)*x_sp + inv_H_sp(2,2)*y_sp + inv_H_sp(3,2)*z_sp
+                hz_sp = inv_H_sp(1,3)*x_sp + inv_H_sp(2,3)*y_sp + inv_H_sp(3,3)*z_sp
+                l_inrange = hx_sp*hx_sp + hy_sp*hy_sp + hz_sp*hz_sp < hcutsq_sp
+                l_inrange_vec(i) = l_inrange
+                IF (l_inrange) kvecs = kvecs + 1
+            END DO
+            !$OMP END DO SIMD
+    END IF
+    !$OMP SECTIONS
+    !$OMP SECTION
+    l_inrange_vec(1+kxyz_max(2)*(kxyz_max(1)*2+1)+kxyz_max(1)) = .FALSE. ! Exclude central box (0,0,0)
+    tcount = 0
+    i = 0
+    DO WHILE (tcount < kvecs)
+        i = i + 1
+        IF (l_inrange_vec(i)) THEN
+                tcount = tcount + 1
+                box_list(this_box)%kspace_vector_ints(tcount) = box_list(this_box)%kspace_vector_ints(i)
+        END IF
+    END DO
+    !$OMP SECTION
+    nvecs(this_box) = kvecs
+    kvecs_p4 = IAND(kvecs+padconst_8byte,padmask_8byte)
+    IF (ALLOCATED(box_list(this_box)%kspace_vectors)) THEN
+            IF (UBOUND(box_list(this_box)%kspace_vectors,1) < kvecs) DEALLOCATE(box_list(this_box)%kspace_vectors)
+    END IF
+    IF (.NOT. ALLOCATED(box_list(this_box)%kspace_vectors)) THEN
+            ALLOCATE(box_list(this_box)%kspace_vectors(kvecs_p4,5))
+    END IF
+    IF (ALLOCATED(box_list(this_box)%sincos_sum)) THEN
+            IF (UBOUND(box_list(this_box)%sincos_sum,1) < kvecs) DEALLOCATE(box_list(this_box)%sincos_sum)
+    END IF
+    IF (.NOT. ALLOCATED(box_list(this_box)%sincos_sum)) THEN
+            ALLOCATE(box_list(this_box)%sincos_sum(kvecs_p4,2))
+    END IF
+    !$ nthreads = OMP_GET_NUM_THREADS()
+    !$ chunksize = IAND((kvecs+nthreads-1)/nthreads+padconst_4byte,padmask_4byte)
+    !$OMP END SECTIONS
+    chunkstart = 1
+    chunkend = kvecs
+    !$ ithread = OMP_GET_THREAD_NUM()
+    !$ chunkstart = ithread*chunksize+1
+    !$ chunkend = MIN((ithread+1)*chunksize,kvecs)
+    IF (l_ortho) THEN
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(kxyz,kx,ky,kz,x,y,z,prefactor)
+            DO i = chunkstart, chunkend
+                kxyz = box_list(this_box)%kspace_vector_ints(i)
+                CALL Extract_Kvector_Ints(kxyz,kx,ky,kz) ! Should be inlined by compiler
+                z = REAL(kz,DP)
+                x = REAL(kx,DP)
+                y = REAL(ky,DP)
+                prefactor = MERGE(prefactor_constant,double_prefactor_constant,kz==0)
+                box_list(this_box)%kspace_vectors(i,1) = x
+                box_list(this_box)%kspace_vectors(i,2) = y
+                box_list(this_box)%kspace_vectors(i,3) = z
+                box_list(this_box)%kspace_vectors(i,5) = prefactor
+            END DO
+            !$OMP END SIMD
+            ! if it is an orthogonal box, then h vectors are simply hx = twoPI * kx / Lx and so on
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(x,y,z,hx,hy,hz,hsq)
+            DO i = chunkstart, chunkend
+                x = box_list(this_box)%kspace_vectors(i,1)
+                y = box_list(this_box)%kspace_vectors(i,2)
+                z = box_list(this_box)%kspace_vectors(i,3)
+                hx = inv_l(1)*x
+                hy = inv_l(2)*y
+                hz = inv_l(3)*z
+                hsq = hx*hx + hy*hy + hz*hz
+                box_list(this_box)%kspace_vectors(i,1) = hx
+                box_list(this_box)%kspace_vectors(i,2) = hy
+                box_list(this_box)%kspace_vectors(i,3) = hz
+                box_list(this_box)%kspace_vectors(i,4) = hsq
+            END DO
+            !$OMP END SIMD
+    ELSE
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(kxyz,kx,ky,kz,x,y,z,prefactor)
+            DO i = chunkstart, chunkend
+                kxyz = box_list(this_box)%kspace_vector_ints(i)
+                CALL Extract_Kvector_Ints(kxyz,kx,ky,kz) ! Should be inlined by compiler
+                z = REAL(kz,DP)
+                x = REAL(kx,DP)
+                y = REAL(ky,DP)
+                prefactor = MERGE(prefactor_constant,double_prefactor_constant,kz==0)
+                box_list(this_box)%kspace_vectors(i,1) = x
+                box_list(this_box)%kspace_vectors(i,2) = y
+                box_list(this_box)%kspace_vectors(i,3) = z
+                box_list(this_box)%kspace_vectors(i,5) = prefactor
+            END DO
+            !$OMP END SIMD
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(x,y,z,hx,hy,hz,hsq)
+            DO i = chunkstart, chunkend
+                x = box_list(this_box)%kspace_vectors(i,1)
+                y = box_list(this_box)%kspace_vectors(i,2)
+                z = box_list(this_box)%kspace_vectors(i,3)
+                hx = inv_H(1,1)*x + inv_H(2,1)*y + inv_H(3,1)*z
+                hy = inv_H(1,2)*x + inv_H(2,2)*y + inv_H(3,2)*z
+                hz = inv_H(1,3)*x + inv_H(2,3)*y + inv_H(3,3)*z
+                hsq = hx*hx + hy*hy + hz*hz
+                box_list(this_box)%kspace_vectors(i,1) = hx
+                box_list(this_box)%kspace_vectors(i,2) = hy
+                box_list(this_box)%kspace_vectors(i,3) = hz
+                box_list(this_box)%kspace_vectors(i,4) = hsq
+            END DO
+            !$OMP END SIMD
+    END IF
+    !DIR$ VECTOR ALIGNED
+    !$OMP SIMD PRIVATE(hsq,prefactor,factor1,factor2)
+    DO i = chunkstart, chunkend
+             hsq = box_list(this_box)%kspace_vectors(i,4)
+             prefactor = box_list(this_box)%kspace_vectors(i,5)
+             factor2 = 1.0_DP/hsq
+             prefactor = prefactor*factor2
+             factor1 = EXP(hsq*const_val)
+             factor1 = prefactor*factor1
+             factor2 = const_val_2 - 2.0_DP*factor2
+             factor2 = factor2*factor1
+             box_list(this_box)%kspace_vectors(i,4) = factor1
+             box_list(this_box)%kspace_vectors(i,5) = factor2
+    END DO
+    !$OMP END SIMD
+    !$OMP END PARALLEL
 
   END SUBROUTINE Ewald_Reciprocal_Lattice_Vector_Setup
   !*****************************************************************************
@@ -2356,8 +5308,11 @@ SUBROUTINE Update_System_Ewald_Reciprocal_Energy(im,is,ibox, &
     REAL(DP), INTENT(OUT) :: E_reciprocal
 
     ! Local variables
-    REAL(DP) :: q
-    INTEGER :: i, ia, jm, js
+    REAL(DP) :: q, hx, hy, hz, sin_sum_i, cos_sum_i, factor
+    INTEGER :: i, ia, jm, js, kvecs, kvecs_p4
+    INTEGER :: n_charged_atoms
+    REAL(DP), DIMENSION(4,natoms(is)) :: rpq
+    REAL(DP) :: charge
 
     REAL(DP) :: hdotr
 
@@ -2369,10 +5324,19 @@ SUBROUTINE Update_System_Ewald_Reciprocal_Energy(im,is,ibox, &
     ! Initialize variables
     E_reciprocal = 0.0_DP
 
-    !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-    cos_sum_old(1:nvecs(ibox),ibox) = cos_sum(1:nvecs(ibox),ibox)
-    sin_sum_old(1:nvecs(ibox),ibox) = sin_sum(1:nvecs(ibox),ibox)
-    !$OMP END PARALLEL WORKSHARE
+    kvecs = nvecs(ibox)
+    kvecs_p4 = IAND(kvecs+padconst_8byte,padmask_8byte)
+
+    IF (ALLOCATED(box_list(ibox)%sincos_sum_old)) THEN
+            IF (UBOUND(box_list(ibox)%sincos_sum_old,1) .NE. &
+                    UBOUND(box_list(ibox)%sincos_sum,1)) THEN
+                    DEALLOCATE(box_list(ibox)%sincos_sum_old)
+            END IF
+    END IF
+    IF (.NOT. ALLOCATED(box_list(ibox)%sincos_sum_old)) THEN
+        ALLOCATE(box_list(ibox)%sincos_sum_old(UBOUND(box_list(ibox)%sincos_sum,1),2))
+    END IF
+
 
     ! get the location of im for cos_mol, sin_mol arrays
     IF (is==1) THEN
@@ -2380,118 +5344,149 @@ SUBROUTINE Update_System_Ewald_Reciprocal_Energy(im,is,ibox, &
     ELSE
       im_locate = SUM(max_molecules(1:is-1)) + im
     END IF
+    n_charged_atoms = 0
+    DO ia = 1, natoms(is)
+        charge = nonbond_list(ia,is)%charge
+        IF (charge == 0.0_DP) CYCLE
+        n_charged_atoms = n_charged_atoms+1
+        rpq(1:3,n_charged_atoms) = atom_list(ia,im,is)%rp
+        rpq(4,n_charged_atoms) = charge
+    END DO
 
-    IF ( move_flag == int_translation .OR. move_flag == int_rotation .OR. &
-         move_flag == int_intra ) THEN
-
-      ! only the particle coordinates change. Therefore, the contribution of
-      ! cos(hdotr) and sin(hdotr) of the old coordinates will be subtracted
-      ! off for each of reciprocal vectors and corresponding terms for the new
-      ! coordinates are added.
-
-      ! Note that the flag INTRA will refer to any of the moves that
-      ! correspond to the intramolecular DOF change.
-
-      !$OMP PARALLEL DO DEFAULT(SHARED) &
-      !$OMP PRIVATE(i,ia,cos_mol_im,sin_mol_im) &
-      !$OMP PRIVATE(cos_mol_im_o, sin_mol_im_o) &
-      !$OMP PRIVATE(hdotr, q) &
-      !$OMP SCHEDULE(STATIC) &
-      !$OMP REDUCTION(+:E_reciprocal)
-      DO i = 1, nvecs(ibox)
-
-        cos_mol_im = 0.0_DP
-        sin_mol_im = 0.0_DP
-
-        DO ia = 1,natoms(is)
-          ! compute the new hdotr
-          hdotr = hx(i,ibox) * atom_list(ia,im,is)%rxp + &
-                  hy(i,ibox) * atom_list(ia,im,is)%ryp + &
-                  hz(i,ibox) * atom_list(ia,im,is)%rzp
-
-          q = nonbond_list(ia,is)%charge
-          cos_mol_im = cos_mol_im + q * DCOS(hdotr)
-          sin_mol_im = sin_mol_im + q * DSIN(hdotr)
-        END DO
-
-        cos_mol_im_o = cos_mol(i,im_locate)
-        sin_mol_im_o = sin_mol(i,im_locate)
-
-        cos_sum(i,ibox) = cos_sum(i,ibox) + (cos_mol_im - cos_mol_im_o)
-        sin_sum(i,ibox) = sin_sum(i,ibox) + (sin_mol_im - sin_mol_im_o)
-
-        E_reciprocal = E_reciprocal + cn(i,ibox) &
-                     * (cos_sum(i,ibox) * cos_sum(i,ibox) &
-                     + sin_sum(i,ibox) * sin_sum(i,ibox))
-
-        ! set the molecules cos and sin terms to the one calculated here
-        cos_mol(i,im_locate) = cos_mol_im
-        sin_mol(i,im_locate) = sin_mol_im
-
-      END DO
-      !$OMP END PARALLEL DO
-
-    ELSE IF ( move_flag == int_deletion) THEN
-
-      ! We need to subtract off the cos(hdotr) and sin(hdotr) for each of the
-      ! k vectors.
-
-      !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-      cos_sum(1:nvecs(ibox),ibox) = cos_sum(1:nvecs(ibox),ibox) &
-                            - cos_mol(1:nvecs(ibox),im_locate)
-      sin_sum(1:nvecs(ibox),ibox) = sin_sum(1:nvecs(ibox),ibox) &
-                            - sin_mol(1:nvecs(ibox),im_locate)
-      !$OMP END PARALLEL WORKSHARE
-
-      !$OMP PARALLEL DO DEFAULT(SHARED) &
-      !$OMP PRIVATE(i) &
-      !$OMP SCHEDULE(STATIC) &
-      !$OMP REDUCTION(+:E_reciprocal)
-      DO i = 1, nvecs(ibox)
-
-        E_reciprocal = E_reciprocal + cn(i,ibox) &
-                     * ( cos_sum(i,ibox) * cos_sum(i,ibox) &
-                       + sin_sum(i,ibox) * sin_sum(i,ibox) )
-
-      END DO
-      !$OMP END PARALLEL DO
-
-    ELSE IF ( move_flag == int_insertion ) THEN
-
-      !$OMP PARALLEL DO DEFAULT(SHARED) &
-      !$OMP PRIVATE(i, ia, hdotr, q) &
-      !$OMP SCHEDULE(STATIC) &
-      !$OMP REDUCTION(+:E_reciprocal)
-
-      DO i = 1, nvecs(ibox)
-
-        cos_mol(i,im_locate) = 0.0_DP
-        sin_mol(i,im_locate) = 0.0_DP
-
-        DO ia = 1, natoms(is)
-          ! Compute the new hdotr vector
-          hdotr = hx(i,ibox) * atom_list(ia,im,is)%rxp + &
-                  hy(i,ibox) * atom_list(ia,im,is)%ryp + &
-                  hz(i,ibox) * atom_list(ia,im,is)%rzp
+    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ia) &
+    !$OMP PRIVATE(cos_mol_im,sin_mol_im) &
+    !$OMP PRIVATE(cos_mol_im_o, sin_mol_im_o) &
+    !$OMP PRIVATE(hdotr, q, hx, hy, hz, sin_sum_i, cos_sum_i, factor)
+    SELECT CASE(move_flag)
+    CASE(int_translation,int_rotation,int_intra)
+            !DIR$ VECTOR ALIGNED
+            !$OMP DO SIMD SCHEDULE(SIMD:STATIC) &
+            !$OMP PRIVATE(cos_mol_im,sin_mol_im) &
+            !$OMP PRIVATE(cos_mol_im_o, sin_mol_im_o) &
+            !$OMP PRIVATE(hdotr, q, hx, hy, hz, sin_sum_i, cos_sum_i, factor) &
+            !$OMP REDUCTION(+:E_reciprocal)
+            DO i = 1, nvecs(ibox)
+
+              cos_mol_im = 0.0_DP
+              sin_mol_im = 0.0_DP
+              hx = box_list(ibox)%kspace_vectors(i,1)
+              hy = box_list(ibox)%kspace_vectors(i,2)
+              hz = box_list(ibox)%kspace_vectors(i,3)
+
+              DO ia = 1, n_charged_atoms
+                ! compute the new hdotr
+                hdotr = hx * rpq(1,ia) + &
+                        hy * rpq(2,ia) + &
+                        hz * rpq(3,ia)
+
+                q = rpq(4,ia)
+                cos_mol_im = cos_mol_im + q*COS(hdotr)
+                sin_mol_im = sin_mol_im + q*SIN(hdotr)
+              END DO
+
+              sin_sum_i = box_list(ibox)%sincos_sum(i,1)
+              cos_sum_i = box_list(ibox)%sincos_sum(i,2)
+              factor = box_list(ibox)%kspace_vectors(i,4)
+              box_list(ibox)%sincos_sum_old(i,1) = sin_sum_i
+              box_list(ibox)%sincos_sum_old(i,2) = cos_sum_i
+              cos_mol_im_o = cos_mol(i,im_locate)
+              sin_mol_im_o = sin_mol(i,im_locate)
+              cos_mol(i,0) = cos_mol_im_o
+              sin_mol(i,0) = sin_mol_im_o
+              cos_sum_i = cos_sum_i + (cos_mol_im - cos_mol_im_o)
+              sin_sum_i = sin_sum_i + (sin_mol_im - sin_mol_im_o)
+              ! set the molecule's cos and sin terms to the one calculated here
+              cos_mol(i,im_locate) = cos_mol_im
+              sin_mol(i,im_locate) = sin_mol_im
+
+              box_list(ibox)%sincos_sum(i,1) = sin_sum_i
+              box_list(ibox)%sincos_sum(i,2) = cos_sum_i
+
+              E_reciprocal = E_reciprocal + factor &
+                           * (cos_sum_i * cos_sum_i &
+                           + sin_sum_i * sin_sum_i)
 
-          q = nonbond_list(ia,is)%charge
-          cos_mol(i,im_locate) = cos_mol(i,im_locate) + q * DCOS(hdotr)
-          sin_mol(i,im_locate) = sin_mol(i,im_locate) + q * DSIN(hdotr)
-        END DO
 
-        cos_sum(i,ibox) = cos_sum(i,ibox) &
-                        + cos_mol(i,im_locate)
-        sin_sum(i,ibox) = sin_sum(i,ibox) &
-                        + sin_mol(i,im_locate)
+            END DO
+            !$OMP END DO SIMD
+    CASE(int_insertion)
+            !DIR$ VECTOR ALIGNED
+            !$OMP DO SIMD SCHEDULE(SIMD:STATIC) &
+            !$OMP PRIVATE(cos_mol_im,sin_mol_im) &
+            !$OMP PRIVATE(cos_mol_im_o, sin_mol_im_o) &
+            !$OMP PRIVATE(hdotr, q, hx, hy, hz, sin_sum_i, cos_sum_i, factor) &
+            !$OMP REDUCTION(+:E_reciprocal)
+            DO i = 1, nvecs(ibox)
+
+              cos_mol_im = 0.0_DP
+              sin_mol_im = 0.0_DP
+              hx = box_list(ibox)%kspace_vectors(i,1)
+              hy = box_list(ibox)%kspace_vectors(i,2)
+              hz = box_list(ibox)%kspace_vectors(i,3)
+
+              DO ia = 1, n_charged_atoms
+                ! compute the new hdotr
+                hdotr = hx * rpq(1,ia) + &
+                        hy * rpq(2,ia) + &
+                        hz * rpq(3,ia)
+
+                q = rpq(4,ia)
+                cos_mol_im = cos_mol_im + q*COS(hdotr)
+                sin_mol_im = sin_mol_im + q*SIN(hdotr)
+              END DO
+
+              sin_sum_i = box_list(ibox)%sincos_sum(i,1)
+              cos_sum_i = box_list(ibox)%sincos_sum(i,2)
+              factor = box_list(ibox)%kspace_vectors(i,4)
+              box_list(ibox)%sincos_sum_old(i,1) = sin_sum_i
+              box_list(ibox)%sincos_sum_old(i,2) = cos_sum_i
+              cos_sum_i = cos_sum_i + cos_mol_im
+              sin_sum_i = sin_sum_i + sin_mol_im
+              ! set the molecule's cos and sin terms to the one calculated here
+              cos_mol(i,im_locate) = cos_mol_im
+              sin_mol(i,im_locate) = sin_mol_im
+
+              box_list(ibox)%sincos_sum(i,1) = sin_sum_i
+              box_list(ibox)%sincos_sum(i,2) = cos_sum_i
+
+              E_reciprocal = E_reciprocal + factor &
+                           * (cos_sum_i * cos_sum_i &
+                           + sin_sum_i * sin_sum_i)
 
-        E_reciprocal = E_reciprocal + cn(i,ibox) &
-                     * ( cos_sum(i,ibox) * cos_sum(i,ibox) &
-                       + sin_sum(i,ibox) * sin_sum(i,ibox) )
 
-      END DO
-      !$OMP END PARALLEL DO
-    END IF
+            END DO
+            !$OMP END DO SIMD
+    CASE(int_deletion)
+            !DIR$ VECTOR ALIGNED
+            !$OMP DO SIMD &
+            !$OMP PRIVATE(factor, cos_sum_i, sin_sum_i) &
+            !$OMP PRIVATE(cos_mol_im_o,sin_mol_im_o) &
+            !$OMP SCHEDULE(SIMD:STATIC) &
+            !$OMP REDUCTION(+:E_reciprocal)
+            DO i = 1, nvecs(ibox)
+              sin_sum_i = box_list(ibox)%sincos_sum(i,1)
+              cos_sum_i = box_list(ibox)%sincos_sum(i,2)
+              box_list(ibox)%sincos_sum_old(i,1) = sin_sum_i
+              box_list(ibox)%sincos_sum_old(i,2) = cos_sum_i
+              sin_mol_im_o = sin_mol(i,im_locate)
+              cos_mol_im_o = cos_mol(i,im_locate)
+              sin_sum_i = sin_sum_i - sin_mol_im_o
+              cos_sum_i = cos_sum_i - cos_mol_im_o
+              sin_mol(i,0) = sin_mol_im_o
+              cos_mol(i,0) = cos_mol_im_o
+              box_list(ibox)%sincos_sum(i,1) = sin_sum_i
+              box_list(ibox)%sincos_sum(i,2) = cos_sum_i
+              factor = box_list(ibox)%kspace_vectors(i,4)
+
+
+              E_reciprocal = E_reciprocal + factor &
+                           * ( cos_sum_i * cos_sum_i &
+                             + sin_sum_i * sin_sum_i )
 
+            END DO
+            !$OMP END DO SIMD
+    END SELECT
+    !$OMP END PARALLEL
     E_reciprocal = E_reciprocal * charge_factor
 
   END SUBROUTINE Update_System_Ewald_Reciprocal_Energy
@@ -2521,32 +5516,90 @@ SUBROUTINE Update_System_Ewald_Reciprocal_Energy_Widom(im,is,ibox, &
     REAL(DP), INTENT(OUT) :: E_reciprocal
 
     ! Local variables
-    REAL(DP), DIMENSION(natoms(is)) :: q, hdotr
-    REAL(DP) :: cos_sum_i, sin_sum_i
-    INTEGER :: i
-
-    q = nonbond_list(1:natoms(is),is)%charge
-
-    ! Initialize variables
-    E_reciprocal = 0.0_DP
+    REAL(DP) :: charge
+    REAL(DP), DIMENSION(4,natoms(is)) :: rpq
+    INTEGER :: i, ia, n_charged_atoms
 
-    DO i = 1, nvecs(ibox)
+    n_charged_atoms = 0
+    DO ia = 1, natoms(is)
+        charge = nonbond_list(ia,is)%charge
+        IF (charge == 0.0_DP) CYCLE
+        n_charged_atoms = n_charged_atoms+1
+        rpq(1:3,n_charged_atoms) = widom_atoms(ia)%rp
+        rpq(4,n_charged_atoms) = charge
+    END DO
 
-      hdotr = hx(i,ibox) * widom_atoms%rxp + &
-              hy(i,ibox) * widom_atoms%ryp + &
-              hz(i,ibox) * widom_atoms%rzp
 
-      cos_sum_i = cos_sum(i,ibox) + DOT_PRODUCT(q, DCOS(hdotr))
-      sin_sum_i = sin_sum(i,ibox) + DOT_PRODUCT(q, DSIN(hdotr))
+    ! Initialize variables
 
-      E_reciprocal = E_reciprocal + cn(i,ibox) &
-                   * ( cos_sum_i * cos_sum_i &
-                     + sin_sum_i * sin_sum_i )
+    ! Multiversion inner loop nested in vectorized loop
+    SELECT CASE(n_charged_atoms)
+    CASE(1)
+            E_reciprocal = Compute_E_reciprocal(1)
+    CASE(2)
+            E_reciprocal = Compute_E_reciprocal(2)
+    CASE(3)
+            E_reciprocal = Compute_E_reciprocal(3)
+    CASE(4)
+            E_reciprocal = Compute_E_reciprocal(4)
+    CASE(5)
+            E_reciprocal = Compute_E_reciprocal(5)
+    CASE(6)
+            E_reciprocal = Compute_E_reciprocal(6)
+    CASE(7)
+            E_reciprocal = Compute_E_reciprocal(7)
+    CASE(8)
+            E_reciprocal = Compute_E_reciprocal(8)
+    CASE DEFAULT
+            E_reciprocal = Compute_E_reciprocal(n_charged_atoms)
+    END SELECT
 
-    END DO
 
     E_reciprocal = E_reciprocal * charge_factor
 
+    CONTAINS
+            FUNCTION Compute_E_reciprocal(na) RESULT(E_reciprocal_redux)
+                    !DIR$ ATTRIBUTES FORCEINLINE :: Compute_E_reciprocal
+                    INTEGER, INTENT(IN) :: na
+                    REAL(DP) :: E_reciprocal_redux
+                    REAL(DP) :: hdotr, q, hx, hy, hz, sin_sum_i, cos_sum_i, factor
+                    INTEGER :: i, ia
+                    E_reciprocal_redux = 0.0_DP
+                    !DIR$ VECTOR ALIGNED
+                    !$OMP SIMD &
+                    !$OMP PRIVATE(hdotr, q, hx, hy, hz, sin_sum_i, cos_sum_i, factor) &
+                    !$OMP REDUCTION(+:E_reciprocal_redux)
+                    DO i = 1, nvecs(ibox)
+
+                      hx = box_list(ibox)%kspace_vectors(i,1)
+                      hy = box_list(ibox)%kspace_vectors(i,2)
+                      hz = box_list(ibox)%kspace_vectors(i,3)
+                      sin_sum_i = box_list(ibox)%sincos_sum(i,1)
+                      cos_sum_i = box_list(ibox)%sincos_sum(i,2)
+                      factor = box_list(ibox)%kspace_vectors(i,4)
+
+                      DO ia = 1, na
+                        ! compute the new hdotr
+                        hdotr = hx * rpq(1,ia) + &
+                                hy * rpq(2,ia) + &
+                                hz * rpq(3,ia)
+
+                        q = rpq(4,ia)
+                        sin_sum_i = sin_sum_i + q*SIN(hdotr)
+                        cos_sum_i = cos_sum_i + q*COS(hdotr)
+                      END DO
+
+                      sin_sum_i = sin_sum_i * sin_sum_i
+                      sin_sum_i = sin_sum_i + cos_sum_i*cos_sum_i
+
+
+                      E_reciprocal_redux = E_reciprocal_redux + factor*sin_sum_i
+
+
+                    END DO
+                    !$OMP END SIMD
+            END FUNCTION Compute_E_reciprocal
+
   END SUBROUTINE Update_System_Ewald_Reciprocal_Energy_Widom
   !*****************************************************************************
 
@@ -2610,7 +5663,6 @@ SUBROUTINE Compute_Molecule_Self_Energy(im,is,this_box,E_self)
   !
   ! CALLED BY:
   !
-  ! Chempot
   ! GEMC_Particle_Transfer
   ! Insertion
   ! Deletion
@@ -2688,9 +5740,11 @@ SUBROUTINE Compute_System_Total_Energy(this_box,intra_flag,overlap)
     energy(this_box)%total = 0.0_DP
     energy(this_box)%inter = 0.0_DP
     energy(this_box)%inter_vdw = 0.0_DP
-    energy(this_box)%lrc = 0.0_DP
     energy(this_box)%inter_q = 0.0_DP
-    energy(this_box)%reciprocal = 0.0_DP
+    IF (int_sim_type .NE. sim_pregen) THEN
+        energy(this_box)%reciprocal = 0.0_DP
+        energy(this_box)%lrc = 0.0_DP
+    END IF
     ! Compute the intramolecular energy of the system if the flag is set.
 
     IF (intra_flag) THEN
@@ -2783,7 +5837,11 @@ SUBROUTINE Compute_System_Total_Energy(this_box,intra_flag,overlap)
           this_im_1 = locate(im_1,is,this_box)
           IF (.NOT. molecule_list(this_im_1,is)%live) CYCLE imLOOP1
 
-          IF (l_pair_store) CALL Get_Position_Alive(this_im_1, is, locate_1)
+          IF (l_pair_store) THEN
+                  CALL Get_Position_Alive(this_im_1, is, locate_1)
+                  pair_nrg_vdw(locate_1,locate_1) = 0.0_DP
+                  pair_nrg_qq(locate_1,locate_1) = 0.0_DP
+          END IF
 
           E_inter_vdw = 0.0_DP
           E_inter_qq  = 0.0_DP
@@ -2834,6 +5892,9 @@ SUBROUTINE Compute_System_Total_Energy(this_box,intra_flag,overlap)
           END DO imLOOP2
           !$OMP END PARALLEL DO
           IF (SHARED_OVERLAP) THEN
+             l_debug_print = .TRUE.
+             CALL Compute_Molecule_Nonbond_Inter_Energy(this_im_1,is,vlj_pair,vqq_pair,my_overlap)
+             l_debug_print = .FALSE.
              overlap = .true.
              RETURN
           ENDIF
@@ -2904,6 +5965,8 @@ SUBROUTINE Compute_System_Total_Energy(this_box,intra_flag,overlap)
              END DO imLOOP4
              !$OMP END PARALLEL DO
              IF (SHARED_OVERLAP) THEN
+                l_debug_print = .TRUE.
+                CALL Compute_Molecule_Nonbond_Inter_Energy(this_im_1,is_1,vlj_pair,vqq_pair,my_overlap)
                 overlap = .true.
                 RETURN
              ENDIF
@@ -2921,8 +5984,9 @@ SUBROUTINE Compute_System_Total_Energy(this_box,intra_flag,overlap)
     ! Compute the reciprocal and self energy terms of the electrostatic energies if flag for Ewald is set.
     IF (int_charge_style(this_box) == charge_coul) THEN
        IF (int_charge_sum_style(this_box) == charge_ewald) THEN
-
-            CALL Compute_System_Ewald_Reciprocal_Energy(this_box)
+            IF (int_sim_type .NE. sim_pregen) THEN
+                 CALL Compute_System_Ewald_Reciprocal_Energy(this_box)
+            END IF
 
             energy(this_box)%inter = energy(this_box)%inter &
                                    + energy(this_box)%reciprocal
@@ -2938,9 +6002,11 @@ SUBROUTINE Compute_System_Total_Energy(this_box,intra_flag,overlap)
 
     ! Long range correction if it is required
     IF (int_vdw_sum_style(this_box) == vdw_cut_tail) THEN
-       CALL Compute_LR_Correction(this_box,e_lrc)
-       ! add to the correction to the inter energy of the system
-       energy(this_box)%lrc = e_lrc
+       IF (int_sim_type .NE. sim_pregen) THEN
+               CALL Compute_LR_Correction(this_box,e_lrc)
+               ! add to the correction to the inter energy of the system
+               energy(this_box)%lrc = e_lrc
+       END IF
        energy(this_box)%inter = energy(this_box)%inter + energy(this_box)%lrc
     END IF
 
@@ -3059,9 +6125,9 @@ SUBROUTINE Check_MoleculePair_Cutoff(im_1,is_1,im_2,is_2,get_interaction, &
 
     ! Parent separation
 
-    rxijp = molecule_1%xcom - molecule_2%xcom
-    ryijp = molecule_1%ycom - molecule_2%ycom
-    rzijp = molecule_1%zcom - molecule_2%zcom
+    rxijp = molecule_1%rcom(1) - molecule_2%rcom(1)
+    ryijp = molecule_1%rcom(2) - molecule_2%rcom(2)
+    rzijp = molecule_1%rcom(3) - molecule_2%rcom(3)
 
     ! Compute the minimum image distance
 
@@ -3071,15 +6137,15 @@ SUBROUTINE Check_MoleculePair_Cutoff(im_1,is_1,im_2,is_2,get_interaction, &
 
     IF (CBMC_flag) THEN
 
-       rinteraction = rcut_cbmc(this_box) + molecule_1%max_dcom &
-            + molecule_2%max_dcom
+       rinteraction = rcut_cbmc(this_box) + molecule_1%rcom(4) &
+            + molecule_2%rcom(4)
 
        IF (rcom > rinteraction) get_interaction = .FALSE.
 
     ELSE
 
-       rinteraction = rcut_max(this_box) + molecule_1%max_dcom &
-            + molecule_2%max_dcom
+       rinteraction = rcut_max(this_box) + molecule_1%rcom(4) &
+            + molecule_2%rcom(4)
 
        IF (rcom > rinteraction) get_interaction = .FALSE.
 
@@ -3205,9 +6271,7 @@ SUBROUTINE Compute_System_Total_Force(this_box)
    !
    ! CALLED BY
    !
-   ! Volume_Change
-   ! Main
-   ! Write_Properties_Buffer
+   ! Compute_Pressure
    !
    !****************************************************************************
 
@@ -3360,9 +6424,9 @@ SUBROUTINE Compute_MoleculePair_Force(im,is,jm,js,this_box,tens_vdw,tens_charge,
        DO ja = 1, natoms(js)
 
           ! Obtain the minimum image separation
-          rxijp = atom_list(ia,im,is)%rxp - atom_list(ja,jm,js)%rxp
-          ryijp = atom_list(ia,im,is)%ryp - atom_list(ja,jm,js)%ryp
-          rzijp = atom_list(ia,im,is)%rzp - atom_list(ja,jm,js)%rzp
+          rxijp = atom_list(ia,im,is)%rp(1) - atom_list(ja,jm,js)%rp(1)
+          ryijp = atom_list(ia,im,is)%rp(2) - atom_list(ja,jm,js)%rp(2)
+          rzijp = atom_list(ia,im,is)%rp(3) - atom_list(ja,jm,js)%rp(3)
 
           ! Now get the minimum image separation
           CALL Minimum_Image_Separation(this_box,rxijp,ryijp,rzijp, &
@@ -3673,6 +6737,7 @@ SUBROUTINE Compute_System_Ewald_Reciprocal_Force(this_box)
     !
     ! Added by Tom Rosch on 06/11/09
     ! (See Wheeler, Mol. Phys. 1997 Vol. 92 pg. 55)
+    ! Drastically refactored by Ryan Smith in 2024
     !
     !***************************************************************************
 
@@ -3685,121 +6750,704 @@ SUBROUTINE Compute_System_Ewald_Reciprocal_Force(this_box)
 
     INTEGER :: i, is, im, ia, this_locate, this_box
 
-    REAL(DP) :: charge
-    REAL(DP) :: qw(9), qwxy, qwxz, qwyz, un, const_val
-    REAL(DP) :: xcmi, ycmi, zcmi, piix, piiy, piiz, arg, factor
+    REAL(DP) :: un, const_val
+    REAL(DP) :: piix, piiy, piiz, arg, factor
     REAL(DP) :: recip_11, recip_21, recip_31, recip_22, recip_23, recip_33
+    REAL(DP) :: qw1,qw2,qw3,qw5,qw6,qw9
+    REAL(DP) :: hxhy,hxhz,hyhz,rxp,ryp,rzp
+    REAL(DP) :: hxsq, hysq, hzsq, hx, hy, hz
+
+    INTEGER :: nlive_count, n_charged_atoms, n_charged_live, n_charged_live_p4
+    INTEGER, DIMENSION(MAXVAL(nmols(:,this_box))) :: live_locates
+    REAL(DP), PARAMETER :: inv_charge_factor = 1.0_DP/charge_factor
+    REAL(DP) :: diag_initializer
+    REAL(DP), DIMENSION(IAND(SUM(nmols(:,this_box)*natoms)+padconst_8byte,padmask_8byte),3) :: qpii, rp
+    REAL(DP), DIMENSION(IAND(SUM(nmols(:,this_box)*natoms)+padconst_8byte,padmask_8byte)) :: charges
+    REAL(DP), DIMENSION(3,3) :: H_inv
+    REAL(DP), DIMENSION(MAXVAL(natoms)) :: species_charges
+    INTEGER, DIMENSION(MAXVAL(natoms)) :: which_charged_atoms
+    REAL(DP), DIMENSION(:,:,:,:), ALLOCATABLE :: sincos
+    REAL(DP), DIMENSION(IAND(nvecs(this_box)+padconst_8byte,padmask_8byte),3,2) :: qpii_sincos_sum
+    REAL(DP) :: qpiix, qpiiy, qpiiz
+    REAL(DP) :: qpiix_sin_sum, qpiix_cos_sum, qpiiy_sin_sum, qpiiy_cos_sum, qpiiz_sin_sum, qpiiz_cos_sum
+    INTEGER :: kxyz_max(3), kxyz_maxmax, kxyz, kx, ky, kz, this_kxyz_max, hxp, hyp, hzp
+    REAL(DP) :: charge, xcom, ycom, zcom
+    REAL(DP) :: ihp, sin1, cos1, nsin1, ncos1, nsin2, ncos2, sin2, cos2, sin12, cos12, sin3, cos3
+    REAL(DP) :: sin_sum_i, cos_sum_i
+    INTEGER :: istart, iend, i_dim, ni
+    LOGICAL :: l_ortho
+    INTEGER :: chunkstart,chunkend,chunksize,ithread,nthreads
+    REAL(DP), DIMENSION(6) :: qw_vec
+
+    l_ortho = box_list(this_box)%int_box_shape <= int_ortho
+    diag_initializer = energy(this_box)%reciprocal * inv_charge_factor
+    !qw1 = diag_initializer
+    !qw2 = 0.0_DP
+    !qw3 = 0.0_DP
+    !qw5 = diag_initializer
+    !qw6 = 0.0_DP
+    !qw9 = diag_initializer
+    !recip_11 = 0.0_DP
+    !recip_21 = 0.0_DP
+    !recip_31 = 0.0_DP
+    !recip_22 = 0.0_DP
+    !recip_23 = 0.0_DP
+    !recip_33 = 0.0_DP
+    qw_vec = 0.0_DP
+
+    H_inv = twoPI*box_list(this_box)%length_inv
+    kxyz_maxmax = box_list(this_box)%kxyz_maxmax
+    kxyz_max = box_list(this_box)%kxyz_max
+
+    istart = 1
+    n_charged_live = 0
+
+
+    !$OMP PARALLEL DEFAULT(SHARED) &
+    !$OMP PRIVATE(i,cos_sum_i,sin_sum_i,hxsq,hysq,hzsq) &
+    !$OMP PRIVATE(hxhy,hxhz,hyhz,factor) &
+    !$OMP PRIVATE(is,im,ia,this_locate,ni,i_dim) &
+    !$OMP PRIVATE(xcom,ycom,zcom,rxp,ryp,rzp,charge) &
+    !$OMP PRIVATE(ihp, sin1, cos1, nsin1, ncos1, nsin2, ncos2, sin2, cos2, sin12, cos12, sin3, cos3) &
+    !$OMP PRIVATE(qpiix_sin_sum, qpiix_cos_sum, qpiiy_sin_sum, qpiiy_cos_sum, qpiiz_sin_sum, qpiiz_cos_sum) &
+    !$OMP PRIVATE(piix,piiy,piiz,qpiix,qpiiy,qpiiz) &
+    !$OMP PRIVATE(hx,hy,hz,hxp,hyp,hzp,this_kxyz_max) &
+    !$OMP PRIVATE(chunkstart,chunkend,chunksize,ithread,nthreads) &
+    !$OMP PRIVATE(kxyz,kx,ky,kz) &
+    !$OMP PRIVATE(qw1,qw2,qw3,qw5,qw6,recip_11,recip_21,recip_31,recip_22,recip_23,recip_33) &
+    !$OMP REDUCTION(+:qw_vec)
+
+    !$ nthreads = OMP_GET_NUM_THREADS()
+    !$ ithread = OMP_GET_THREAD_NUM()
 
-    const_val = 1.0_DP/(2.0_DP * alpha_ewald(this_box) * alpha_ewald(this_box))
-    qw(:) = 0.0_DP
-    W_tensor_recip(:,:,this_box) = 0.0_DP
-
-    !$OMP PARALLEL DO DEFAULT(SHARED) &
-    !$OMP SCHEDULE(STATIC) &
-    !$OMP PRIVATE(i, un, qwxy, qwxz, qwyz) &
-    !$OMP REDUCTION(+:qw)
+    DO is = 1, nspecies
+       IF (nmols(is,this_box)==0 .OR. .NOT. has_charge(is)) CYCLE
+       !$OMP SINGLE
+       istart = istart + n_charged_live
+       nlive_count = 0
+       DO im = 1, nmols(is,this_box)
+                this_locate = locate(im,is,this_box)
+                IF (.NOT. molecule_list(this_locate,is)%live) CYCLE
+                nlive_count = nlive_count + 1
+                live_locates(nlive_count) = this_locate
+       END DO
+       n_charged_atoms = 0
+       DO ia = 1, natoms(is)
+                charge = nonbond_list(ia,is)%charge
+                IF (charge == 0.0_DP) CYCLE
+                n_charged_atoms = n_charged_atoms + 1
+                which_charged_atoms(n_charged_atoms) = ia
+                species_charges(n_charged_atoms) = charge
+       END DO
+       n_charged_live = nlive_count*n_charged_atoms
+       iend = istart + n_charged_live - 1
+       !$OMP END SINGLE
+       IF (n_charged_live == 0) CYCLE
+       !$OMP WORKSHARE
+       charges(istart:iend) = RESHAPE(SPREAD(species_charges(1:n_charged_atoms),1,nlive_count), (/ n_charged_live /))
+       qpii(istart:iend,1) = RESHAPE(SPREAD(molecule_list(live_locates(1:nlive_count),is)%rcom(1),2,n_charged_atoms), &
+               (/ n_charged_live /))
+       qpii(istart:iend,2) = RESHAPE(SPREAD(molecule_list(live_locates(1:nlive_count),is)%rcom(2),2,n_charged_atoms), &
+               (/ n_charged_live /))
+       qpii(istart:iend,3) = RESHAPE(SPREAD(molecule_list(live_locates(1:nlive_count),is)%rcom(3),2,n_charged_atoms), &
+               (/ n_charged_live /))
+       rp(istart:iend,1) = RESHAPE(TRANSPOSE(atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(1)), &
+               (/ n_charged_live /))
+       rp(istart:iend,2) = RESHAPE(TRANSPOSE(atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(2)), &
+               (/ n_charged_live /))
+       rp(istart:iend,3) = RESHAPE(TRANSPOSE(atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(3)), &
+               (/ n_charged_live /))
+       !$OMP END WORKSHARE
+    END DO
+    !$OMP SINGLE
+    n_charged_live = iend
+    n_charged_live_p4 = IAND(n_charged_live+padconst_8byte,padmask_8byte)
+    ALLOCATE(sincos(n_charged_live_p4,2,-kxyz_maxmax:kxyz_maxmax,3))
+    !$OMP END SINGLE
+    chunkstart = 1
+    chunkend = n_charged_live
+    !$ chunksize = IAND((n_charged_live+nthreads-1)/nthreads+padconst_8byte,padmask_8byte)
+    !$ chunkstart = ithread*chunksize+1
+    !$ chunkend = MIN((ithread+1)*chunksize,n_charged_live)
+    IF (l_ortho) THEN
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(xcom,ycom,zcom,rxp,ryp,rzp,charge)
+            DO i = chunkstart, chunkend
+                xcom = qpii(i,1)
+                rxp = rp(i,1)
+                ycom = qpii(i,2)
+                ryp = rp(i,2)
+                zcom = qpii(i,3)
+                rzp = rp(i,3)
+                charge = charges(i)
+                xcom = rxp-xcom
+                ycom = ryp-ycom
+                zcom = rzp-zcom
+                rxp = H_inv(1,1)*rxp
+                ryp = H_inv(2,2)*ryp
+                rzp = H_inv(3,3)*rzp
+                xcom = xcom*charge
+                ycom = ycom*charge
+                zcom = zcom*charge
+                rp(i,1) = rxp
+                rp(i,2) = ryp
+                rp(i,3) = rzp
+                qpii(i,1) = xcom
+                qpii(i,2) = ycom
+                qpii(i,3) = zcom
+            END DO
+            !$OMP END SIMD
+    ELSE
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(xcom,ycom,zcom,rxp,ryp,rzp,charge)
+            DO i = chunkstart, chunkend
+                xcom = qpii(i,1)
+                rxp = rp(i,1)
+                ycom = qpii(i,2)
+                ryp = rp(i,2)
+                zcom = qpii(i,3)
+                rzp = rp(i,3)
+                charge = charges(i)
+                rxp = rxp-xcom
+                ryp = ryp-ycom
+                rzp = rzp-zcom
+                rxp = rxp*charge
+                ryp = ryp*charge
+                rzp = rzp*charge
+                qpii(i,1) = rxp
+                qpii(i,2) = ryp
+                qpii(i,3) = rzp
+            END DO
+            !$OMP END SIMD
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(hxp,hyp,hzp,rxp,ryp,rzp)
+            DO i = chunkstart, chunkend
+                rxp = rp(i,1)
+                ryp = rp(i,2)
+                rzp = rp(i,3)
+                hxp = H_inv(1,1)*rxp + H_inv(2,1)*ryp + H_inv(3,1)*rzp
+                hyp = H_inv(1,2)*rxp + H_inv(2,2)*ryp + H_inv(3,2)*rzp
+                hzp = H_inv(1,3)*rxp + H_inv(2,3)*ryp + H_inv(3,3)*rzp
+                rp(i,1) = hxp
+                rp(i,2) = hyp
+                rp(i,3) = hzp
+            END DO
+            !$OMP END SIMD
+    END IF
+    DO i_dim = 1, 3
+        this_kxyz_max = kxyz_max(i_dim)
+        !DIR$ VECTOR ALIGNED
+        !$OMP SIMD PRIVATE(ihp,sin1,cos1)
+        DO i = chunkstart, chunkend
+                 ihp = rp(i,i_dim)
+                 sincos(i,1,0,i_dim) = 0.0_DP
+                 sincos(i,2,0,i_dim) = 1.0_DP
+                 sin1 = SIN(ihp)
+                 cos1 = COS(ihp)
+                 sincos(i,1, 1,i_dim) = sin1
+                 sincos(i,2, 1,i_dim) = cos1
+        END DO
+        !$OMP END SIMD
+        IF (i_dim == 3) THEN
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(sin1,cos1,nsin1,ncos1,nsin2,ncos2)
+                DO i = chunkstart, chunkend
+                        sin1 = sincos(i,1,1,3)
+                        cos1 = sincos(i,2,1,3)
+                        nsin1 = sin1
+                        ncos1 = cos1
+                        ! Ptolemy's identities
+                        DO ni = 2, this_kxyz_max
+                                nsin2 = nsin1*cos1 + ncos1*sin1
+                                ncos2 = ncos1*cos1 - nsin1*sin1
+                                sincos(i,1, ni,3) = nsin2
+                                sincos(i,2, ni,3) = ncos2
+                                nsin1 = nsin2
+                                ncos1 = ncos2
+                        END DO
+                END DO
+                !$OMP END SIMD
+        ELSE
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(sin1,cos1,nsin1,ncos1,nsin2,ncos2)
+                DO i = chunkstart, chunkend
+                        sin1 = sincos(i,1,1,i_dim)
+                        cos1 = sincos(i,2,1,i_dim)
+                        sincos(i,1,-1,i_dim) = -sin1
+                        sincos(i,2,-1,i_dim) = cos1
+                        nsin1 = sin1
+                        ncos1 = cos1
+                        ! Ptolemy's identities
+                        DO ni = 2, this_kxyz_max
+                                nsin2 = nsin1*cos1 + ncos1*sin1
+                                ncos2 = ncos1*cos1 - nsin1*sin1
+                                sincos(i,1, ni,i_dim) = nsin2
+                                sincos(i,1,-ni,i_dim) = -nsin2
+                                sincos(i,2, ni,i_dim) = ncos2
+                                sincos(i,2,-ni,i_dim) = ncos2
+                                nsin1 = nsin2
+                                ncos1 = ncos2
+                        END DO
+                END DO
+                !$OMP END SIMD
+        END IF
+    END DO
+    IF (n_charged_live_p4 .NE. n_charged_live) THEN
+            !$OMP SINGLE
+            sincos(n_charged_live+1:n_charged_live_p4,:,:,:) = 0.0_DP
+            qpii(n_charged_live+1:n_charged_live_p4,:) = 0.0_DP
+            !$OMP END SINGLE NOWAIT
+    END IF
+    !$OMP BARRIER
+    !$OMP DO SCHEDULE(STATIC)
     DO i = 1, nvecs(this_box)
-
-       un = Cn(i,this_box) * (cos_sum(i,this_box) * cos_sum(i,this_box) + sin_sum(i,this_box) * sin_sum(i,this_box))
-
-       qwxy =  un * ( -2.0_DP*(1.0_DP/hsq(i,this_box) + 0.5_DP*const_val) &
-              *hx(i,this_box)*hy(i,this_box) )
-       qwxz =  un * ( -2.0_DP*(1.0_DP/hsq(i,this_box) + 0.5_DP*const_val) &
-              *hx(i,this_box)*hz(i,this_box) )
-       qwyz =  un * ( -2.0_DP*(1.0_DP/hsq(i,this_box) + 0.5_DP*const_val) &
-              *hy(i,this_box)*hz(i,this_box) )
-
-       qw(1) = qw(1) + &
-               ( un * ( 1.0_DP - 2.0_DP*(1.0_DP/hsq(i,this_box) + 0.5_DP*const_val) &
-               *hx(i,this_box)*hx(i,this_box)))
-       qw(2) = qw(2) + qwxy
-       qw(3) = qw(3) + qwxz
-       qw(5) = qw(5) + &
-               ( un * ( 1.0_DP - 2.0_DP*(1.0_DP/hsq(i,this_box) + 0.5_DP*const_val) &
-               *hy(i,this_box)*hy(i,this_box)))
-       qw(6) = qw(6) + qwyz
-       qw(9) = qw(9) + &
-               ( un * ( 1.0_DP - 2.0_DP*(1.0_DP/hsq(i,this_box) + 0.5_DP*const_val) &
-               *hz(i,this_box)*hz(i,this_box)))
-
+        kxyz = box_list(this_box)%kspace_vector_ints(i)
+        CALL Extract_Kvector_Ints(kxyz,kx,ky,kz)
+        qpiix_cos_sum = 0.0_DP
+        qpiix_sin_sum = 0.0_DP
+        qpiiy_cos_sum = 0.0_DP
+        qpiiy_sin_sum = 0.0_DP
+        qpiiz_cos_sum = 0.0_DP
+        qpiiz_sin_sum = 0.0_DP
+        !DIR$ ASSUME (MOD(n_charged_live_p4,dimpad_8byte) .EQ. 0)
+        !DIR$ VECTOR ALIGNED
+        !$OMP SIMD PRIVATE(sin1,cos1,sin2,cos2,sin3,cos3,sin12,cos12,qpiix,qpiiy,qpiiz) &
+        !$OMP REDUCTION(+:qpiix_sin_sum,qpiix_cos_sum,qpiiy_sin_sum,qpiiy_cos_sum,qpiiz_sin_sum,qpiiz_cos_sum)
+        DO ia = 1, n_charged_live_p4
+                sin1 = sincos(ia,1,kx,1)
+                cos1 = sincos(ia,2,kx,1)
+                sin2 = sincos(ia,1,ky,2)
+                cos2 = sincos(ia,2,ky,2)
+                sin3 = sincos(ia,1,kz,3)
+                cos3 = sincos(ia,2,kz,3)
+                sin12 = sin1*cos2 + cos1*sin2
+                cos12 = cos1*cos2 - sin1*sin2
+                sin1 = sin12*cos3 + cos12*sin3
+                cos1 = cos12*cos3 - sin12*sin3
+                qpiix = qpii(ia,1)
+                qpiiy = qpii(ia,2)
+                qpiiz = qpii(ia,3)
+                qpiix_sin_sum = qpiix_sin_sum + qpiix*sin1
+                qpiix_cos_sum = qpiix_cos_sum + qpiix*cos1
+                qpiiy_sin_sum = qpiiy_sin_sum + qpiiy*sin1
+                qpiiy_cos_sum = qpiiy_cos_sum + qpiiy*cos1
+                qpiiz_sin_sum = qpiiz_sin_sum + qpiiz*sin1
+                qpiiz_cos_sum = qpiiz_cos_sum + qpiiz*cos1
+        END DO
+        !$OMP END SIMD
+        qpii_sincos_sum(i,1,1) = qpiix_sin_sum
+        qpii_sincos_sum(i,1,2) = qpiix_cos_sum
+        qpii_sincos_sum(i,2,1) = qpiiy_sin_sum
+        qpii_sincos_sum(i,2,2) = qpiiy_cos_sum
+        qpii_sincos_sum(i,3,1) = qpiiz_sin_sum
+        qpii_sincos_sum(i,3,2) = qpiiz_cos_sum
     END DO
-    !$OMP END PARALLEL DO
+    !$OMP END DO
+    chunkstart = 1
+    chunkend = nvecs(this_box)
+    !$ chunksize = IAND((nvecs(this_box)+nthreads-1)/nthreads+padconst_8byte,padmask_8byte)
+    !$ chunkstart = ithread*chunksize+1
+    !$ chunkend = MIN((ithread+1)*chunksize,nvecs(this_box))
+    !DIR$ VECTOR ALIGNED
+    !$OMP SIMD &
+    !$OMP PRIVATE(cos_sum_i,sin_sum_i,qpiix_sin_sum,qpiix_cos_sum) &
+    !$OMP PRIVATE(qpiiy_sin_sum,qpiiy_cos_sum,qpiiz_sin_sum,qpiiz_cos_sum)
+    DO i = chunkstart, chunkend
+        sin_sum_i = box_list(this_box)%sincos_sum(i,1)
+        cos_sum_i = box_list(this_box)%sincos_sum(i,2)
+        qpiix_sin_sum = qpii_sincos_sum(i,1,1)
+        qpiix_cos_sum = qpii_sincos_sum(i,1,2)
+        qpiiy_sin_sum = qpii_sincos_sum(i,2,1)
+        qpiiy_cos_sum = qpii_sincos_sum(i,2,2)
+        qpiiz_sin_sum = qpii_sincos_sum(i,3,1)
+        qpiiz_cos_sum = qpii_sincos_sum(i,3,2)
+        qpiix_cos_sum = qpiix_cos_sum*sin_sum_i
+        qpiix_cos_sum = qpiix_cos_sum - qpiix_sin_sum*cos_sum_i
+        qpiiy_cos_sum = qpiiy_cos_sum*sin_sum_i
+        qpiiy_cos_sum = qpiiy_cos_sum - qpiiy_sin_sum*cos_sum_i
+        qpiiz_cos_sum = qpiiz_cos_sum*sin_sum_i
+        qpiiz_cos_sum = qpiiz_cos_sum - qpiiz_sin_sum*cos_sum_i
+        qpii_sincos_sum(i,1,1) = qpiix_cos_sum
+        qpii_sincos_sum(i,2,1) = qpiiy_cos_sum
+        qpii_sincos_sum(i,3,1) = qpiiz_cos_sum
+    END DO
+    !$OMP END SIMD
+    recip_11 = 0.0_DP
+    recip_21 = 0.0_DP
+    recip_31 = 0.0_DP
+    recip_22 = 0.0_DP
+    recip_23 = 0.0_DP
+    recip_33 = 0.0_DP
+    !DIR$ VECTOR ALIGNED
+    !$OMP SIMD &
+    !$OMP PRIVATE(factor,hx,hy,hz,piix,piiy,piiz) &
+    !$OMP REDUCTION(+:recip_11,recip_21,recip_31,recip_22,recip_23,recip_33)
+    DO i = chunkstart, chunkend
+        factor = box_list(this_box)%kspace_vectors(i,4)
+        hx = box_list(this_box)%kspace_vectors(i,1)
+        hy = box_list(this_box)%kspace_vectors(i,2)
+        hz = box_list(this_box)%kspace_vectors(i,3)
+        piix = qpii_sincos_sum(i,1,1)
+        piiy = qpii_sincos_sum(i,2,1)
+        piiz = qpii_sincos_sum(i,3,1)
+        hx = hx*factor
+        hy = hy*factor
+        hz = hz*factor
+        recip_11 = recip_11 + hx*piix
+        recip_21 = recip_21 + hy*piix
+        recip_31 = recip_31 + hz*piix
+        recip_21 = recip_21 + hx*piiy
+        recip_22 = recip_22 + hy*piiy
+        recip_23 = recip_23 + hz*piiy
+        recip_31 = recip_31 + hx*piiz
+        recip_23 = recip_23 + hy*piiz
+        recip_33 = recip_33 + hz*piiz
+    END DO
+    !$OMP END SIMD
+    qw1 = 0.0_DP
+    qw2 = 0.0_DP
+    qw3 = 0.0_DP
+    qw5 = 0.0_DP
+    qw6 = 0.0_DP
+    qw9 = 0.0_DP
+    !DIR$ VECTOR ALIGNED
+    !$OMP SIMD &
+    !$OMP PRIVATE(cos_sum_i,sin_sum_i,hxsq,hysq,hzsq) &
+    !$OMP PRIVATE(hxhy,hxhz,hyhz,factor) &
+    !$OMP REDUCTION(+:qw1,qw2,qw3,qw5,qw6,qw9)
+    DO i = chunkstart, chunkend
+       sin_sum_i = box_list(this_box)%sincos_sum(i,1)
+       cos_sum_i = box_list(this_box)%sincos_sum(i,2)
+       factor = cos_sum_i*cos_sum_i+sin_sum_i*sin_sum_i
+       factor = factor*box_list(this_box)%kspace_vectors(i,5)
+       hxsq = box_list(this_box)%kspace_vectors(i,1) ! not squared yet
+       hysq = box_list(this_box)%kspace_vectors(i,2) ! not squared yet
+       hzsq = box_list(this_box)%kspace_vectors(i,3) ! not squared yet
+       hxhy = hxsq*hysq
+       hxhz = hxsq*hzsq
+       hyhz = hysq*hzsq
+       hxsq = hxsq*hxsq
+       hysq = hysq*hysq
+       hzsq = hzsq*hzsq
+       qw1 = qw1 + factor*hxsq
+       qw2 = qw2 + factor*hxhy
+       qw3 = qw3 + factor*hxhz
+       qw5 = qw5 + factor*hysq
+       qw6 = qw6 + factor*hyhz
+       qw9 = qw9 + factor*hzsq
 
-    W_tensor_recip(1,1,this_box) = qw(1)
-    W_tensor_recip(2,1,this_box) = qw(2)
-    W_tensor_recip(3,1,this_box) = qw(3)
-    W_tensor_recip(2,2,this_box) = qw(5)
-    W_tensor_recip(3,2,this_box) = qw(6)
-    W_tensor_recip(3,3,this_box) = qw(9)
 
-    DO is = 1, nspecies
+    END DO
+    !$OMP END SIMD
+    qw1 = qw1 + 2.0_DP*recip_11
+    qw2 = qw2 + recip_21
+    qw3 = qw3 + recip_31
+    qw5 = qw5 + 2.0_DP*recip_22
+    qw6 = qw6 + recip_23
+    qw9 = qw9 + 2.0_DP*recip_33
+    qw_vec(1) = qw1
+    qw_vec(2) = qw2
+    qw_vec(3) = qw3
+    qw_vec(4) = qw5
+    qw_vec(5) = qw6
+    qw_vec(6) = qw9
+    !$OMP END PARALLEL
+    qw1 = qw_vec(1) + diag_initializer
+    qw2 = qw_vec(2)
+    qw3 = qw_vec(3)
+    qw5 = qw_vec(4) + diag_initializer
+    qw6 = qw_vec(5)
+    qw9 = qw_vec(6) + diag_initializer
+    !qw1 = qw1 + 2.0_DP*recip_11
+    !qw2 = qw2 + recip_21
+    !qw3 = qw3 + recip_31
+    !qw5 = qw5 + 2.0_DP*recip_22
+    !qw6 = qw6 + recip_23
+    !qw9 = qw9 + 2.0_DP*recip_33
+    W_tensor_recip(1,1,this_box) = qw1
+    W_tensor_recip(2,1,this_box) = qw2
+    W_tensor_recip(1,2,this_box) = qw2
+    W_tensor_recip(1,3,this_box) = qw3
+    W_tensor_recip(3,1,this_box) = qw3
+    W_tensor_recip(2,2,this_box) = qw5
+    W_tensor_recip(3,2,this_box) = qw6
+    W_tensor_recip(2,3,this_box) = qw6
+    W_tensor_recip(3,3,this_box) = qw9
 
-       DO im = 1, nmols(is,this_box)
 
-          this_locate = locate(im,is,this_box)
-          IF( .NOT. molecule_list(this_locate,is)%live) CYCLE
 
-          xcmi = molecule_list(this_locate,is)%xcom
-          ycmi = molecule_list(this_locate,is)%ycom
-          zcmi = molecule_list(this_locate,is)%zcom
 
-          DO ia = 1, natoms(is)
+  END SUBROUTINE Compute_System_Ewald_Reciprocal_Force
 
-             piix = atom_list(ia,this_locate,is)%rxp - xcmi
-             piiy = atom_list(ia,this_locate,is)%ryp - ycmi
-             piiz = atom_list(ia,this_locate,is)%rzp - zcmi
-             charge = nonbond_list(ia,is)%charge
+  !-----------------------------------------------------------------------------
 
-             recip_11 = 0.0_DP
-             recip_21 = 0.0_DP
-             recip_31 = 0.0_DP
-             recip_22 = 0.0_DP
-             recip_23 = 0.0_DP
-             recip_33 = 0.0_DP
+  SUBROUTINE Compute_System_Ewald_Reciprocal_Energy_Pregen(this_box)
+    !***************************************************************************
+    !
+    !***************************************************************************
 
-             !$OMP PARALLEL DO DEFAULT(SHARED) &
-             !$OMP SCHEDULE(STATIC) &
-             !$OMP PRIVATE(i,arg,factor) &
-             !$OMP REDUCTION(+:recip_11, recip_21, recip_31) &
-             !$OMP REDUCTION(+:recip_22, recip_23, recip_33)
-             DO i = 1, nvecs(this_box)
-
-                arg = hx(i,this_box)*atom_list(ia,this_locate,is)%rxp + &
-                      hy(i,this_box)*atom_list(ia,this_locate,is)%ryp + &
-                      hz(i,this_box)*atom_list(ia,this_locate,is)%rzp
-
-                factor = Cn(i,this_box)*2.0_DP*(-cos_sum(i,this_box)*DSIN(arg) + &
-                         sin_sum(i,this_box)*DCOS(arg))*charge
-
-                recip_11 = recip_11 + factor*hx(i,this_box)*piix
-                recip_21 = recip_21 + factor* 0.5_DP*(hx(i,this_box)*piiy+hy(i,this_box)*piix)
-                recip_31 = recip_31 + factor* 0.5_DP*(hx(i,this_box)*piiz+hz(i,this_box)*piix)
-                recip_22 = recip_22 + factor*hy(i,this_box)*piiy
-                recip_23 = recip_23 + factor* 0.5_DP*(hy(i,this_box)*piiz+hz(i,this_box)*piiy)
-                recip_33 = recip_33 + factor*hz(i,this_box)*piiz
-
-             END DO
-             !$OMP END PARALLEL DO
+    USE Type_Definitions
+    USE Global_Variables
 
-             W_tensor_recip(1,1,this_box) = W_tensor_recip(1,1,this_box) + recip_11
-             W_tensor_recip(2,1,this_box) = W_tensor_recip(2,1,this_box) + recip_21
-             W_tensor_recip(3,1,this_box) = W_tensor_recip(3,1,this_box) + recip_31
-             W_tensor_recip(2,2,this_box) = W_tensor_recip(2,2,this_box) + recip_22
-             W_tensor_recip(2,3,this_box) = W_tensor_recip(2,3,this_box) + recip_23
-             W_tensor_recip(3,3,this_box) = W_tensor_recip(3,3,this_box) + recip_33
+    IMPLICIT NONE
 
+!    !$ include 'omp_lib.h'
 
-          END DO
+    INTEGER :: i, is, im, ia, this_locate, this_box
 
+    REAL(DP) :: factor
+    REAL(DP) :: rxp,ryp,rzp
+
+    INTEGER :: nlive_count, n_charged_atoms, n_charged_live, n_charged_live_p4
+    INTEGER, DIMENSION(MAXVAL(nmols(:,this_box))) :: live_locates
+    REAL(DP), DIMENSION(IAND(SUM(nmols(:,this_box)*natoms)+padconst_8byte,padmask_8byte),4) :: rpq
+    REAL(DP), DIMENSION(3,3) :: H_inv
+    REAL(DP), DIMENSION(MAXVAL(natoms)) :: species_charges
+    INTEGER, DIMENSION(MAXVAL(natoms)) :: which_charged_atoms
+    REAL(DP), DIMENSION(:,:,:,:), ALLOCATABLE :: sincos
+    INTEGER :: kxyz_max(3), kxyz_maxmax, kxyz, kx, ky, kz, this_kxyz_max, hxp, hyp, hzp
+    REAL(DP) :: charge
+    REAL(DP) :: ihp, sin1, cos1, nsin1, ncos1, nsin2, ncos2, sin2, cos2, sin12, cos12, sin3, cos3
+    REAL(DP) :: sin_sum_i, cos_sum_i
+    INTEGER :: istart, iend, i_dim, ni
+    LOGICAL :: l_ortho
+    INTEGER :: chunkstart,chunkend,chunksize,ithread,nthreads
+    REAL(DP) :: E_reciprocal
+
+    l_ortho = box_list(this_box)%int_box_shape <= int_ortho
+
+    H_inv = twoPI*box_list(this_box)%length_inv
+    kxyz_maxmax = box_list(this_box)%kxyz_maxmax
+    kxyz_max = box_list(this_box)%kxyz_max
+    E_reciprocal = 0.0_DP
+    n_charged_live = 0
+    istart = 1
+
+    !$OMP PARALLEL DEFAULT(SHARED) &
+    !$OMP PRIVATE(i,cos_sum_i,sin_sum_i) &
+    !$OMP PRIVATE(factor) &
+    !$OMP PRIVATE(is,im,ia,this_locate,ni,i_dim) &
+    !$OMP PRIVATE(rxp,ryp,rzp,charge) &
+    !$OMP PRIVATE(ihp, sin1, cos1, nsin1, ncos1, nsin2, ncos2, sin2, cos2, sin12, cos12, sin3, cos3) &
+    !$OMP PRIVATE(hxp,hyp,hzp,this_kxyz_max) &
+    !$OMP PRIVATE(kxyz,kx,ky,kz) &
+    !$OMP PRIVATE(chunkstart,chunkend,chunksize,ithread,nthreads)
+
+    !$ nthreads = OMP_GET_NUM_THREADS()
+    !$ ithread = OMP_GET_THREAD_NUM()
+
+    !istart = 1
+    DO is = 1, nspecies
+       IF (nmols(is,this_box)==0 .OR. .NOT. has_charge(is)) CYCLE
+       !$OMP SINGLE
+       istart = istart + n_charged_live
+       nlive_count = 0
+       DO im = 1, nmols(is,this_box)
+                this_locate = locate(im,is,this_box)
+                IF (.NOT. molecule_list(this_locate,is)%live) CYCLE
+                nlive_count = nlive_count + 1
+                live_locates(nlive_count) = this_locate
        END DO
-
+       n_charged_atoms = 0
+       DO ia = 1, natoms(is)
+                charge = nonbond_list(ia,is)%charge
+                IF (charge == 0.0_DP) CYCLE
+                n_charged_atoms = n_charged_atoms + 1
+                which_charged_atoms(n_charged_atoms) = ia
+                species_charges(n_charged_atoms) = charge
+       END DO
+       n_charged_live = nlive_count*n_charged_atoms
+       iend = istart + n_charged_live - 1
+       !$OMP END SINGLE
+       IF (n_charged_live == 0) CYCLE
+       !$OMP WORKSHARE
+       rpq(istart:iend,4) = RESHAPE(SPREAD(species_charges(1:n_charged_atoms),1,nlive_count), (/ n_charged_live /))
+       rpq(istart:iend,1) = RESHAPE(TRANSPOSE(atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(1)), &
+               (/ n_charged_live /))
+       rpq(istart:iend,2) = RESHAPE(TRANSPOSE(atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(2)), &
+               (/ n_charged_live /))
+       rpq(istart:iend,3) = RESHAPE(TRANSPOSE(atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(3)), &
+               (/ n_charged_live /))
+       !$OMP END WORKSHARE
+    END DO
+    !$OMP SINGLE
+    n_charged_live = iend
+    n_charged_live_p4 = IAND(n_charged_live+padconst_8byte,padmask_8byte)
+    ALLOCATE(sincos(n_charged_live_p4,2,-kxyz_maxmax:kxyz_maxmax,3))
+    !IF (i_mcstep == 1) THEN
+    !        WRITE(*,*)
+    !        WRITE(*,*) i_mcstep
+    !        WRITE(*,*) kxyz_max
+    !        WRITE(*,*) "Good rpq 1!", n_charged_live
+    !        DO i = 1, n_charged_live
+    !            WRITE(*,*) rpq(i,:)
+    !        END DO
+    !        WRITE(*,*)
+    !END IF
+    !IF (ANY(rpq(:n_charged_live,:) .NE. rpq(:n_charged_live,:))) THEN
+    !        WRITE(*,*)
+    !        WRITE(*,*) i_mcstep
+    !        WRITE(*,*) kxyz_max
+    !        WRITE(*,*) "Bad rpq 1!", n_charged_live
+    !        DO i = 1, n_charged_live
+    !            WRITE(*,*) rpq(i,:)
+    !        END DO
+    !        WRITE(*,*)
+    !END IF
+    !IF (i_mcstep == 1) THEN
+    !    WRITE(*,*) n_charged_live, n_charged_live_p4
+    !    WRITE(*,*) "ithread, nthreads, chunksize, chunkstart, chunkend"
+    !END IF
+    !$OMP END SINGLE
+    chunkstart = 1
+    chunkend = n_charged_live
+    !$ chunksize = IAND((n_charged_live+nthreads-1)/nthreads+padconst_8byte,padmask_8byte)
+    !$ chunkstart = ithread*chunksize+1
+    !$ chunkend = MIN((ithread+1)*chunksize,n_charged_live)
+    IF (l_ortho) THEN
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(rxp,ryp,rzp,charge)
+            DO i = chunkstart, chunkend
+                rxp = rpq(i,1)
+                ryp = rpq(i,2)
+                rzp = rpq(i,3)
+                rxp = H_inv(1,1)*rxp
+                ryp = H_inv(2,2)*ryp
+                rzp = H_inv(3,3)*rzp
+                rpq(i,1) = rxp
+                rpq(i,2) = ryp
+                rpq(i,3) = rzp
+            END DO
+            !$OMP END SIMD
+    ELSE
+            !DIR$ VECTOR ALIGNED
+            !$OMP SIMD PRIVATE(hxp,hyp,hzp,rxp,ryp,rzp)
+            DO i = chunkstart, chunkend
+                rxp = rpq(i,1)
+                ryp = rpq(i,2)
+                rzp = rpq(i,3)
+                hxp = H_inv(1,1)*rxp
+                hyp = H_inv(1,2)*rxp + H_inv(2,2)*ryp
+                hzp = H_inv(1,3)*rxp + H_inv(2,3)*ryp + H_inv(3,3)*rzp
+                rpq(i,1) = hxp
+                rpq(i,2) = hyp
+                rpq(i,3) = hzp
+            END DO
+            !$OMP END SIMD
+    END IF
+    DO i_dim = 1, 3
+        this_kxyz_max = kxyz_max(i_dim)
+        !DIR$ VECTOR ALIGNED
+        !$OMP SIMD PRIVATE(ihp,sin1,cos1)
+        DO i = chunkstart, chunkend
+                 ihp = rpq(i,i_dim)
+                 sincos(i,1,0,i_dim) = 0.0_DP
+                 sincos(i,2,0,i_dim) = 1.0_DP
+                 sin1 = SIN(ihp)
+                 cos1 = COS(ihp)
+                 sincos(i,1, 1,i_dim) = sin1
+                 sincos(i,2, 1,i_dim) = cos1
+        END DO
+        !$OMP END SIMD
+        IF (i_dim == 3) THEN
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(sin1,cos1,nsin1,ncos1,nsin2,ncos2)
+                DO i = chunkstart, chunkend
+                        sin1 = sincos(i,1,1,3)
+                        cos1 = sincos(i,2,1,3)
+                        nsin1 = sin1
+                        ncos1 = cos1
+                        ! Ptolemy's identities
+                        DO ni = 2, this_kxyz_max
+                                nsin2 = nsin1*cos1 + ncos1*sin1
+                                ncos2 = ncos1*cos1 - nsin1*sin1
+                                sincos(i,1, ni,3) = nsin2
+                                sincos(i,2, ni,3) = ncos2
+                                nsin1 = nsin2
+                                ncos1 = ncos2
+                        END DO
+                END DO
+                !$OMP END SIMD
+        ELSE
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(sin1,cos1,nsin1,ncos1,nsin2,ncos2)
+                DO i = chunkstart, chunkend
+                        sin1 = sincos(i,1,1,i_dim)
+                        cos1 = sincos(i,2,1,i_dim)
+                        sincos(i,1,-1,i_dim) = -sin1
+                        sincos(i,2,-1,i_dim) = cos1
+                        nsin1 = sin1
+                        ncos1 = cos1
+                        ! Ptolemy's identities
+                        DO ni = 2, this_kxyz_max
+                                nsin2 = nsin1*cos1 + ncos1*sin1
+                                ncos2 = ncos1*cos1 - nsin1*sin1
+                                sincos(i,1, ni,i_dim) = nsin2
+                                sincos(i,1,-ni,i_dim) = -nsin2
+                                sincos(i,2, ni,i_dim) = ncos2
+                                sincos(i,2,-ni,i_dim) = ncos2
+                                nsin1 = nsin2
+                                ncos1 = ncos2
+                        END DO
+                END DO
+                !$OMP END SIMD
+        END IF
+    END DO
+    IF (n_charged_live_p4 .NE. n_charged_live) THEN
+            !$OMP SINGLE
+            sincos(n_charged_live+1:n_charged_live_p4,:,:,:) = 0.0_DP
+            rpq(n_charged_live+1:n_charged_live_p4,4) = 0.0_DP
+            !$OMP END SINGLE NOWAIT
+    END IF
+    !$OMP BARRIER
+    !$OMP DO SCHEDULE(STATIC)
+    DO i = 1, nvecs(this_box)
+        kxyz = box_list(this_box)%kspace_vector_ints(i)
+        CALL Extract_Kvector_Ints(kxyz,kx,ky,kz)
+        sin_sum_i = 0.0_DP
+        cos_sum_i = 0.0_DP
+        !DIR$ ASSUME (MOD(n_charged_live_p4,dimpad_8byte) .EQ. 0)
+        !DIR$ VECTOR ALIGNED
+        !$OMP SIMD PRIVATE(sin1,cos1,sin2,cos2,sin3,cos3,sin12,cos12,charge) &
+        !$OMP REDUCTION(+:sin_sum_i,cos_sum_i)
+        DO ia = 1, n_charged_live_p4
+                sin1 = sincos(ia,1,kx,1)
+                cos1 = sincos(ia,2,kx,1)
+                sin2 = sincos(ia,1,ky,2)
+                cos2 = sincos(ia,2,ky,2)
+                sin3 = sincos(ia,1,kz,3)
+                cos3 = sincos(ia,2,kz,3)
+                charge = rpq(ia,4)
+                sin12 = sin1*cos2 + cos1*sin2
+                cos12 = cos1*cos2 - sin1*sin2
+                sin1 = sin12*cos3 + cos12*sin3
+                cos1 = cos12*cos3 - sin12*sin3
+                sin_sum_i = sin_sum_i + charge*sin1
+                cos_sum_i = cos_sum_i + charge*cos1
+        END DO
+        !$OMP END SIMD
+        box_list(this_box)%sincos_sum(i,1) = sin_sum_i
+        box_list(this_box)%sincos_sum(i,2) = cos_sum_i
+    END DO
+    !$OMP END DO
+    !$OMP DO SIMD SCHEDULE(SIMD:STATIC) PRIVATE(sin_sum_i,cos_sum_i,factor) &
+    !$OMP REDUCTION(+:E_reciprocal)
+    DO i = 1, nvecs(this_box)
+        sin_sum_i = box_list(this_box)%sincos_sum(i,1)
+        cos_sum_i = box_list(this_box)%sincos_sum(i,2)
+        factor = box_list(this_box)%kspace_vectors(i,4)
+        sin_sum_i = sin_sum_i*sin_sum_i
+        sin_sum_i = sin_sum_i + cos_sum_i*cos_sum_i
+        E_reciprocal = E_reciprocal + factor*sin_sum_i
     END DO
+    !$OMP END DO SIMD
+    !$OMP END PARALLEL
+    energy(this_box)%reciprocal = E_reciprocal*charge_factor
 
-  W_tensor_recip(1,2,this_box) = W_tensor_recip(2,1,this_box)
-  W_tensor_recip(1,3,this_box) = W_tensor_recip(3,1,this_box)
-  W_tensor_recip(3,2,this_box) = W_tensor_recip(2,3,this_box)
 
-  END SUBROUTINE Compute_System_Ewald_Reciprocal_Force
+
+
+  END SUBROUTINE Compute_System_Ewald_Reciprocal_Energy_Pregen
 
   !-----------------------------------------------------------------------------
 
@@ -4132,102 +7780,390 @@ SUBROUTINE Compute_System_Ewald_Reciprocal_Energy(this_box)
     REAL(DP) :: un, const_val
     REAL(DP) :: charge, hdotr, E_reciprocal
 
-    ! individual k-space vector stuff
-    INTEGER ::  position
-    INTEGER, ALLOCATABLE :: im_locate(:,:)
+    INTEGER :: i_dim
+    REAL(DP) :: cos_sum_i,sin_sum_i,this_cos_mol,this_sin_mol
+    REAL(DP) :: sin1,cos1,sin2,cos2,sin3,cos3,sin12,cos12
+    REAL(DP) :: trigsum, factor
+    REAL(DP), DIMENSION(3,3) :: H_inv
+    INTEGER :: im_locate_shift, im_locate_shift_vec(nspecies)
+    INTEGER :: nlive_count, nlive_count_p4, n_charged_atoms, n_charged_atoms_p4
+    REAL(DP), DIMENSION(IAND(MAXVAL(natoms)+padconst_8byte,padmask_8byte)) :: charges
+    INTEGER, DIMENSION(MAXVAL(natoms)) :: which_charged_atoms
+    INTEGER, DIMENSION(MAXVAL(nmols(:,this_box))) :: live_locates
+    LOGICAL :: molvectorized, l_ortho
+    REAL(DP), DIMENSION(:,:,:), ALLOCATABLE :: rp
+    REAL(DP), DIMENSION(:,:,:,:,:), ALLOCATABLE :: sincos
+    REAL(DP), DIMENSION(:,:), ALLOCATABLE :: species_cos_mol, species_sin_mol
+    INTEGER :: kxyz,kx,ky,kz,kxyz_maxmax,kxyz_max(3)
+
+    !! individual k-space vector stuff
+    !INTEGER ::  position
+    !INTEGER, ALLOCATABLE :: im_locate(:,:)
 
-    ! openmp stuff
-!   INTEGER :: omp_get_num_threads, omp_get_thread_num
 
     ! Initialize variables
-    const_val = 1.0_DP/(2.0_DP * alpha_ewald(this_box) * alpha_ewald(this_box))
     E_reciprocal = 0.0_DP
-    !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-    cos_sum(:,this_box) = 0.0_DP
-    sin_sum(:,this_box) = 0.0_DP
-    !$OMP END PARALLEL WORKSHARE
-
-    ! Create an index, im_locate, for each live molecule in this_box
-    ! im_locate will be used to access cos_mol and sin_mol
-    ALLOCATE(im_locate(MAXVAL(max_molecules),nspecies))
-    DO is = 1, nspecies
-       DO im = 1, nmols(is,this_box)
-          this_locate = locate(im,is,this_box)
-          IF (.NOT. molecule_list(this_locate,is)%live) CYCLE
+    l_ortho = box_list(this_box)%int_box_shape <= int_ortho
+    kxyz_maxmax = box_list(this_box)%kxyz_maxmax
+    kxyz_max = box_list(this_box)%kxyz_max
+    H_inv = twoPI*box_list(this_box)%length_inv
+    im_locate_shift_vec(1) = 0
+    im_locate_shift = max_molecules(1)
+    DO is = 2, nspecies
+        im_locate_shift_vec(is) = im_locate_shift
+        im_locate_shift = im_locate_shift + max_molecules(is)
+    END DO
+    !$OMP PARALLEL DEFAULT(SHARED) &
+    !$OMP PRIVATE(i,is,im,i_dim,ia,this_locate,kxyz,kx,ky,kz) &
+    !$OMP PRIVATE(cos_sum_i,sin_sum_i,this_cos_mol,this_sin_mol) &
+    !$OMP PRIVATE(sin1,cos1,sin2,cos2,sin3,cos3,sin12,cos12,charge) &
+    !$OMP PRIVATE(trigsum,factor)
 
-          ! create index
-          IF (is == 1) THEN
-             im_locate(im,is) = this_locate
-          ELSE
-             im_locate(im,is) = SUM(max_molecules(1:is-1)) + this_locate
-          END IF
 
-       END DO
-    END DO
+
+    !$OMP WORKSHARE
+    box_list(this_box)%sincos_sum = 0.0_DP
+    !$OMP END WORKSHARE
+
 
     ! Loop over each species, molecule
     DO is = 1, nspecies
        ! skip nonpolar species
-       IF (.NOT. has_charge(is)) CYCLE
-
+       IF (nmols(is,this_box) == 0 .OR. .NOT. has_charge(is)) CYCLE
+       !$OMP SINGLE
+       nlive_count = 0
        DO im = 1, nmols(is,this_box)
-          this_locate = locate(im,is,this_box) ! index to atom_list, molecule_list
-          IF( .NOT. molecule_list(this_locate,is)%live) CYCLE
-
-          position = im_locate(im,is) ! index to cos_mol, sin_mol
-
-          !$OMP PARALLEL DO DEFAULT(SHARED) &
-          !$OMP PRIVATE(i,ia,hdotr,charge) &
-          !$OMP SCHEDULE(STATIC)
-
-          ! loop over all the k vectors of this box
-          DO i = 1, nvecs(this_box)
-
-             cos_mol(i,position) = 0.0_DP
-             sin_mol(i,position) = 0.0_DP
-
-             DO ia = 1, natoms(is)
-                ! compute hdotr
-                hdotr = hx(i,this_box) * atom_list(ia,this_locate,is)%rxp + &
-                        hy(i,this_box) * atom_list(ia,this_locate,is)%ryp + &
-                        hz(i,this_box) * atom_list(ia,this_locate,is)%rzp
-
+                this_locate = locate(im,is,this_box)
+                IF (.NOT. molecule_list(this_locate,is)%live) CYCLE
+                nlive_count = nlive_count + 1
+                live_locates(nlive_count) = this_locate
+       END DO
+       nlive_count_p4 = IAND(nlive_count+padconst_8byte,padmask_8byte)
+       n_charged_atoms = 0
+       charges = 0.0_DP
+       DO ia = 1, natoms(is)
                 charge = nonbond_list(ia,is)%charge
-
-                cos_mol(i,position) = cos_mol(i,position) + charge * DCOS(hdotr)
-                sin_mol(i,position) = sin_mol(i,position) + charge * DSIN(hdotr)
-             END DO
-
-             cos_sum(i,this_box) = cos_sum(i,this_box) &
-                                 + cos_mol(i,position)
-             sin_sum(i,this_box) = sin_sum(i,this_box) &
-                                 + sin_mol(i,position)
-          END DO
-
-          !$OMP END PARALLEL DO
-
+                IF (charge == 0.0_DP) CYCLE
+                n_charged_atoms = n_charged_atoms + 1
+                which_charged_atoms(n_charged_atoms) = ia
+                charges(n_charged_atoms) = charge
        END DO
+       n_charged_atoms_p4 = IAND(n_charged_atoms+padconst_8byte,padmask_8byte)
+       molvectorized = nlive_count > n_charged_atoms ! vectorize over molecules instead of atoms
+       IF (ALLOCATED(rp)) DEALLOCATE(rp)
+       IF (ALLOCATED(sincos)) DEALLOCATE(sincos)
+       IF (ALLOCATED(species_cos_mol)) DEALLOCATE(species_cos_mol)
+       IF (ALLOCATED(species_sin_mol)) DEALLOCATE(species_sin_mol)
+       IF (molvectorized) THEN
+               ALLOCATE(rp(nlive_count_p4,3,n_charged_atoms))
+               ALLOCATE(sincos(nlive_count_p4,n_charged_atoms,2,-kxyz_maxmax:kxyz_maxmax,3))
+               ALLOCATE(species_cos_mol(nlive_count_p4,nvecs(this_box)))
+               ALLOCATE(species_sin_mol(nlive_count_p4,nvecs(this_box)))
+       ELSE
+               ALLOCATE(rp(n_charged_atoms_p4,3,nlive_count))
+               ALLOCATE(sincos(n_charged_atoms_p4,nlive_count,2,-kxyz_maxmax:kxyz_maxmax,3))
+       END IF
+       im_locate_shift = im_locate_shift_vec(is)
+       !$OMP END SINGLE
+       IF (molvectorized) THEN
+                !$OMP WORKSHARE
+                sincos = 0.0_DP
+                sincos(:,:,2,0,:) = 1.0_DP
+                rp = 0.0_DP
+                rp(1:nlive_count,1,1:n_charged_atoms) = TRANSPOSE(&
+                        atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(1))
+                rp(1:nlive_count,2,1:n_charged_atoms) = TRANSPOSE(&
+                        atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(2))
+                rp(1:nlive_count,3,1:n_charged_atoms) = TRANSPOSE(&
+                        atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(3))
+                !$OMP END WORKSHARE
+                IF (l_ortho) THEN
+                       !$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
+                       DO ia = 1, n_charged_atoms
+                               DO i_dim = 1, 3
+                                        CALL Fill_sincos_ortho(nlive_count_p4,ia,i_dim)
+                               END DO
+                       END DO
+                       !$OMP END DO
+                ELSE
+                       !$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
+                       DO ia = 1, n_charged_atoms
+                               DO i_dim = 1, 3
+                                        CALL Fill_sincos_nonortho(nlive_count_p4,ia,i_dim)
+                               END DO
+                       END DO
+                       !$OMP END DO
+                END IF
+                IF (nlive_count_p4 > nlive_count) THEN
+                        !$OMP WORKSHARE
+                        sincos(nlive_count+1:nlive_count_p4,:,:,:,:) = 0.0_DP
+                        !$OMP END WORKSHARE
+                END IF
+                SELECT CASE(n_charged_atoms)
+                CASE(1)
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,1)
+                        END DO
+                        !$OMP END DO
+                CASE(2)
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,2)
+                        END DO
+                        !$OMP END DO
+                CASE(3)
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,3)
+                        END DO
+                        !$OMP END DO
+                CASE(4)
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,4)
+                        END DO
+                        !$OMP END DO
+                CASE(5)
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,5)
+                        END DO
+                        !$OMP END DO
+                CASE(6)
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,6)
+                        END DO
+                        !$OMP END DO
+                CASE(7)
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,7)
+                        END DO
+                        !$OMP END DO
+                CASE(8)
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,8)
+                        END DO
+                        !$OMP END DO
+                CASE DEFAULT
+                        !$OMP DO SCHEDULE(STATIC)
+                        DO i = 1, nvecs(this_box)
+                                CALL Molvectorized_Loop(i,n_charged_atoms)
+                        END DO
+                        !$OMP END DO
+                END SELECT
+                !$OMP WORKSHARE
+                cos_mol(1:nvecs(this_box),im_locate_shift+live_locates(1:nlive_count)) = &
+                        TRANSPOSE(species_cos_mol(1:nlive_count,1:nvecs(this_box)))
+                sin_mol(1:nvecs(this_box),im_locate_shift+live_locates(1:nlive_count)) = &
+                        TRANSPOSE(species_sin_mol(1:nlive_count,1:nvecs(this_box)))
+                !$OMP END WORKSHARE
+       ELSE ! Vectorized over atoms
+                !$OMP WORKSHARE
+                sincos = 0.0_DP
+                sincos(:,:,2,0,:) = 1.0_DP
+                rp = 0.0_DP
+                rp(1:n_charged_atoms,1,1:nlive_count) = &
+                        atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(1)
+                rp(1:n_charged_atoms,2,1:nlive_count) = &
+                        atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(2)
+                rp(1:n_charged_atoms,3,1:nlive_count) = &
+                        atom_list(which_charged_atoms(1:n_charged_atoms),live_locates(1:nlive_count),is)%rp(3)
+                !$OMP END WORKSHARE
+                IF (l_ortho) THEN
+                        !$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
+                        DO im = 1, nlive_count
+                                DO i_dim = 1, 3
+                                        CALL Fill_sincos_ortho(n_charged_atoms_p4,im,i_dim)
+                                END DO
+                        END DO
+                        !$OMP END DO
+                ELSE
+                        !$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
+                        DO im = 1, nlive_count
+                                DO i_dim = 1, 3
+                                        CALL Fill_sincos_nonortho(n_charged_atoms_p4,im,i_dim)
+                                END DO
+                        END DO
+                        !$OMP END DO
+                END IF
+                !$OMP DO SCHEDULE(STATIC)
+                DO i = 1, nvecs(this_box)
+                        kxyz = box_list(this_box)%kspace_vector_ints(i)
+                        CALL Extract_Kvector_Ints(kxyz,kx,ky,kz)
+                        sin_sum_i = box_list(this_box)%sincos_sum(i,1)
+                        cos_sum_i = box_list(this_box)%sincos_sum(i,2)
+                        DO im = 1, nlive_count
+                                this_cos_mol = 0.0_DP
+                                this_sin_mol = 0.0_DP
+                                !DIR$ ASSUME (MOD(n_charged_atoms_p4,dimpad_8byte) .EQ. 0)
+                                !DIR$ VECTOR ALIGNED
+                                !$OMP SIMD &
+                                !$OMP PRIVATE(sin1,cos1,sin2,cos2,sin3,cos3,sin12,cos12,charge) &
+                                !$OMP REDUCTION(+:this_cos_mol,this_sin_mol)
+                                DO ia = 1, n_charged_atoms_p4
+                                        sin1 = sincos(ia,im,1,kx,1)
+                                        cos1 = sincos(ia,im,2,kx,1)
+                                        sin2 = sincos(ia,im,1,ky,2)
+                                        cos2 = sincos(ia,im,2,ky,2)
+                                        sin3 = sincos(ia,im,1,kz,3)
+                                        cos3 = sincos(ia,im,2,kz,3)
+                                        sin12 = sin1*cos2 + cos1*sin2
+                                        cos12 = cos1*cos2 - sin1*sin2
+                                        sin1 = sin12*cos3 + cos12*sin3
+                                        cos1 = cos12*cos3 - sin12*sin3
+                                        charge = charges(ia)
+                                        this_cos_mol = this_cos_mol + charge*cos1
+                                        this_sin_mol = this_sin_mol + charge*sin1
+                                END DO
+                                !$OMP END SIMD
+                                this_locate = im_locate_shift + live_locates(im)
+                                cos_mol(i,this_locate) = this_cos_mol
+                                sin_mol(i,this_locate) = this_sin_mol
+                                cos_sum_i = cos_sum_i + this_cos_mol
+                                sin_sum_i = sin_sum_i + this_sin_mol
+                        END DO
+                        box_list(this_box)%sincos_sum(i,1) = sin_sum_i
+                        box_list(this_box)%sincos_sum(i,2) = cos_sum_i
+                END DO
+                !$OMP END DO
+       END IF
     END DO
 
     ! At the end of all the loops we have computed cos_sum, sin_sum, cos_mol and
     ! sin_mol for each of the k-vectors. Now let us calculate the reciprocal
     ! space energy
 
-    !$OMP PARALLEL DO DEFAULT(SHARED) &
-    !$OMP PRIVATE(i,  un)  &
-    !$OMP SCHEDULE(STATIC) &
+    !DIR$ VECTOR ALIGNED
+    !$OMP DO SIMD SCHEDULE(SIMD:STATIC) &
+    !$OMP PRIVATE(sin_sum_i,cos_sum_i,factor,trigsum) &
     !$OMP REDUCTION(+:E_reciprocal)
-
     DO i = 1, nvecs(this_box)
-       un =  cos_sum(i,this_box) * cos_sum(i,this_box) &
-          +  sin_sum(i,this_box) * sin_sum(i,this_box)
-       E_reciprocal = E_reciprocal + Cn(i,this_box) * un
+            sin_sum_i = box_list(this_box)%sincos_sum(i,1)
+            cos_sum_i = box_list(this_box)%sincos_sum(i,2)
+            factor = box_list(this_box)%kspace_vectors(i,4)
+            trigsum = sin_sum_i*sin_sum_i + cos_sum_i*cos_sum_i
+            E_reciprocal = E_reciprocal + factor*trigsum
     END DO
-
-    !$OMP END PARALLEL DO
+    !$OMP END DO SIMD
+    !$OMP END PARALLEL
 
     energy(this_box)%reciprocal = E_reciprocal * charge_factor
 
+    CONTAINS
+            SUBROUTINE Molvectorized_Loop(i,nca)
+                    !DIR$ ATTRIBUTES FORCEINLINE :: Molvectorized_Loop
+                    INTEGER, INTENT(IN) :: i, nca
+                    INTEGER :: im,ia
+                    REAL(DP) :: sin1,cos1,sin2,cos2,sin3,cos3,sin12,cos12,charge
+                    REAL(DP) :: cos_sum_i, sin_sum_i, this_cos_mol, this_sin_mol
+                    kxyz = box_list(this_box)%kspace_vector_ints(i)
+                    CALL Extract_Kvector_Ints(kxyz,kx,ky,kz)
+                    sin_sum_i = box_list(this_box)%sincos_sum(i,1)
+                    cos_sum_i = box_list(this_box)%sincos_sum(i,2)
+                    !DIR$ VECTOR ALIGNED
+                    !$OMP SIMD PRIVATE(this_cos_mol,this_sin_mol) &
+                    !$OMP PRIVATE(sin1,cos1,sin2,cos2,sin3,cos3,sin12,cos12,charge) &
+                    !$OMP REDUCTION(+:cos_sum_i,sin_sum_i)
+                    DO im = 1, nlive_count_p4
+                            this_cos_mol = 0.0_DP
+                            this_sin_mol = 0.0_DP
+                            DO ia = 1, nca
+                                    sin1 = sincos(im,ia,1,kx,1)
+                                    cos1 = sincos(im,ia,2,kx,1)
+                                    sin2 = sincos(im,ia,1,ky,2)
+                                    cos2 = sincos(im,ia,2,ky,2)
+                                    sin3 = sincos(im,ia,1,kz,3)
+                                    cos3 = sincos(im,ia,2,kz,3)
+                                    sin12 = sin1*cos2 + cos1*sin2
+                                    cos12 = cos1*cos2 - sin1*sin2
+                                    sin1 = sin12*cos3 + cos12*sin3
+                                    cos1 = cos12*cos3 - sin12*sin3
+                                    charge = charges(ia)
+                                    this_cos_mol = this_cos_mol + charge*cos1
+                                    this_sin_mol = this_sin_mol + charge*sin1
+                            END DO
+                            species_cos_mol(im,i) = this_cos_mol
+                            species_sin_mol(im,i) = this_sin_mol
+                            cos_sum_i = cos_sum_i + this_cos_mol
+                            sin_sum_i = sin_sum_i + this_sin_mol
+                    END DO
+                    !$OMP END SIMD
+                    box_list(this_box)%sincos_sum(i,1) = sin_sum_i
+                    box_list(this_box)%sincos_sum(i,2) = cos_sum_i
+            END SUBROUTINE Molvectorized_Loop
+            SUBROUTINE Fill_sincos_ortho(imax,j,i_dim)
+                    !DIR$ ATTRIBUTES FORCEINLINE :: Fill_sincos_ortho
+                    INTEGER, INTENT(IN) :: imax, j, i_dim
+                    INTEGER :: ni, i
+                    REAL(DP) :: ki,sin1,cos1,nsin1,nsin2,ncos1,ncos2
+                    !DIR$ ASSUME (MOD(imax,dimpad_8byte) .EQ. 0)
+                    !DIR$ VECTOR ALIGNED
+                    !$OMP SIMD PRIVATE(ki,sin1,cos1,nsin1,nsin2,ncos1,ncos2)
+                    DO i = 1, imax
+                             ki = H_inv(i_dim,i_dim)*rp(i,i_dim,j)
+                             sin1 = SIN(ki)
+                             cos1 = COS(ki)
+                             sincos(i,j,1, 1,i_dim) = sin1
+                             sincos(i,j,1,-1,i_dim) = -sin1
+                             sincos(i,j,2, 1,i_dim) = cos1
+                             sincos(i,j,2,-1,i_dim) = cos1
+                             nsin1 = sin1
+                             ncos1 = cos1
+                             ! Ptolemy's identities
+                             DO ni = 2, kxyz_max(i_dim)
+                                     nsin2 = nsin1*cos1 + ncos1*sin1
+                                     ncos2 = ncos1*cos1 - nsin1*sin1
+                                     nsin1 = nsin2
+                                     ncos1 = ncos2
+                                     sincos(i,j,1, ni,i_dim) = nsin2
+                                     sincos(i,j,1,-ni,i_dim) = -nsin2
+                                     sincos(i,j,2, ni,i_dim) = ncos2
+                                     sincos(i,j,2,-ni,i_dim) = ncos2
+                             END DO
+                    END DO
+                    !$OMP END SIMD
+            END SUBROUTINE Fill_sincos_ortho
+            SUBROUTINE Fill_sincos_nonortho(imax,j,i_dim)
+                    !DIR$ ATTRIBUTES FORCEINLINE :: Fill_sincos_nonortho
+                    INTEGER, INTENT(IN) :: imax, j, i_dim
+                    INTEGER :: ni, i
+                    REAL(DP) :: ki,sin1,cos1,nsin1,nsin2,ncos1,ncos2
+                    REAL(DP) :: rxp,ryp,rzp
+                    !DIR$ ASSUME (MOD(imax,dimpad_8byte) .EQ. 0)
+                    !DIR$ VECTOR ALIGNED
+                    !$OMP SIMD PRIVATE(rxp,ryp,rzp,ki,sin1,cos1,nsin1,nsin2,ncos1,ncos2)
+                    DO i = 1, imax
+                             rxp = rp(i,1,j)
+                             ryp = rp(i,2,j)
+                             rzp = rp(i,3,j)
+                             ki = H_inv(1,i_dim)*rxp + H_inv(2,i_dim)*ryp + H_inv(3,i_dim)*rzp
+                             sin1 = SIN(ki)
+                             cos1 = COS(ki)
+                             sincos(i,j,1, 1,i_dim) = sin1
+                             sincos(i,j,1,-1,i_dim) = -sin1
+                             sincos(i,j,2, 1,i_dim) = cos1
+                             sincos(i,j,2,-1,i_dim) = cos1
+                             nsin1 = sin1
+                             ncos1 = cos1
+                             ! Ptolemy's identities
+                             DO ni = 2, kxyz_max(i_dim)
+                                     nsin2 = nsin1*cos1 + ncos1*sin1
+                                     ncos2 = ncos1*cos1 - nsin1*sin1
+                                     nsin1 = nsin2
+                                     ncos1 = ncos2
+                                     sincos(i,j,1, ni,i_dim) = nsin2
+                                     sincos(i,j,1,-ni,i_dim) = -nsin2
+                                     sincos(i,j,2, ni,i_dim) = ncos2
+                                     sincos(i,j,2,-ni,i_dim) = ncos2
+                             END DO
+                    END DO
+                    !$OMP END SIMD
+            END SUBROUTINE Fill_sincos_nonortho
+
   END SUBROUTINE Compute_System_Ewald_Reciprocal_Energy
 
   !-----------------------------------------------------------------------------
diff --git a/Src/file_names.f90 b/Src/file_names.f90
index a66b5234..2f1ac07b 100755
--- a/Src/file_names.f90
+++ b/Src/file_names.f90
@@ -160,12 +160,12 @@ MODULE File_Names
 CHARACTER(FILENAME_LEN) :: lattice_file
 
 ! Variables associated with widom property files
-INTEGER :: wprop_file_unit_base = 180
+INTEGER :: wprop_file_unit_base = 1000 !180
 INTEGER, DIMENSION(:,:), ALLOCATABLE :: wprop_file_unit
 CHARACTER(FILENAME_LEN), DIMENSION(:,:), ALLOCATABLE :: wprop_filenames
 LOGICAL, DIMENSION(:,:), ALLOCATABLE :: first_open_wprop
 
-INTEGER :: wprop2_file_unit_base = 190
+INTEGER :: wprop2_file_unit_base = 2000 !190
 INTEGER, DIMENSION(:,:), ALLOCATABLE :: wprop2_file_unit
 CHARACTER(FILENAME_LEN), DIMENSION(:,:), ALLOCATABLE :: wprop2_filenames
 LOGICAL, DIMENSION(:,:), ALLOCATABLE :: first_open_wprop2
diff --git a/Src/fragment_driver.f90 b/Src/fragment_driver.f90
index 54e7efcd..c5db22a9 100755
--- a/Src/fragment_driver.f90
+++ b/Src/fragment_driver.f90
@@ -83,9 +83,9 @@ SUBROUTINE Fragment_Driver
            WRITE(frag_file_unit,*) temperature(ibox), energy(ibox)%total
            DO ia = 1, natoms(is)
               WRITE(frag_file_unit,*) nonbond_list(ia,is)%element, &
-                                      atom_list(ia,im,is)%rxp, &
-                                      atom_list(ia,im,is)%ryp, &
-                                      atom_list(ia,im,is)%rzp
+                                      atom_list(ia,im,is)%rp(1), &
+                                      atom_list(ia,im,is)%rp(2), &
+                                      atom_list(ia,im,is)%rp(3)
            END DO
            
         END IF
diff --git a/Src/fragment_growth.f90 b/Src/fragment_growth.f90
index 10a57f97..9b40ef72 100755
--- a/Src/fragment_growth.f90
+++ b/Src/fragment_growth.f90
@@ -70,7 +70,7 @@ MODULE Fragment_Growth
 
 !*******************************************************************************
 SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
-              ln_pseq,ln_pbias,nrg_ring_frag_total,cbmc_overlap)
+              ln_pseq,ln_pbias,nrg_ring_frag_total,cbmc_overlap,E_interfrag)
 !*******************************************************************************
 !
 ! PURPOSE: build the molecule from scratch
@@ -104,6 +104,7 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
   ! Declare and Initialize Variables
   !*****************************************************************************
 
+  !DIR$ ATTRIBUTES ALIGN : 32 :: trial_atom_rp, trial_cell_coords, bitcell_overlap, xyz_rand_dp, nrg_sp_vec, xyz_rand, rtrial
   ! Arguments
   INTEGER :: this_im  ! molecule index
   INTEGER :: is       ! species index
@@ -116,6 +117,7 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
   REAL(DP), INTENT(OUT) :: nrg_ring_frag_total ! potential energy of the
                                                ! isolated ring fragment
   LOGICAL, INTENT(INOUT) :: cbmc_overlap ! did all trials have core overlap?
+  REAL(DP), INTENT(OUT), OPTIONAL :: E_interfrag ! total intramolecular, interfragment energy of molecule
 
   ! Local declarations
   INTEGER :: i, j, this_atom      ! atom indices
@@ -136,7 +138,7 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
                                            ! first fragment
   REAL(DP) :: dx, dy, dz
 
-  REAL(DP), DIMENSION(kappa_ins) :: xcom_trial, ycom_trial, zcom_trial
+  REAL(DP), DIMENSION(species_list(is)%kappa_ins_pad8) :: xcom_trial, ycom_trial, zcom_trial
 
   LOGICAL :: overlap ! TRUE if there is core overlap between a trial atom 
                      ! position and a an atom already in the box
@@ -144,45 +146,132 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
   CHARACTER  :: this_file*120, symbol*1
 
   TYPE(Molecule_Class), POINTER :: this_molecule
-  TYPE(Atom_Class), POINTER :: these_atoms(:)
+  TYPE(Atom_Class), CONTIGUOUS, POINTER :: these_atoms(:)
 
 
   ! Variables associated with the CBMC part
-  INTEGER :: itrial, trial, frag_type, n_frag_atoms
+  INTEGER :: itrial, trial, frag_type, ifrag_natoms
+  INTEGER :: itrial_start
 
-  REAL(DP) :: weight(kappa_ins), rand_no, E_dihed
+  REAL(DP) :: weight(species_list(is)%kappa_ins_pad8), rand_no, E_dihed
   REAL(DP) :: E_intra_vdw, E_intra_qq, E_inter_vdw, E_inter_qq, E_total
-  REAL(DP) :: nrg(kappa_ins), nrg_kBT, nrg_ring_frag
+  REAL(DP) :: nrg(species_list(is)%kappa_ins_pad8), nrg_kBT, nrg_ring_frag
 
-  LOGICAL :: del_overlap, overlap_trial(kappa_ins)
+  LOGICAL :: del_overlap, overlap_trial(species_list(is)%kappa_ins_pad8)
 
-  Type(Atom_Class) :: rtrial(MAXVAL(natoms),0:MAX(kappa_ins,kappa_rot,kappa_dih))
+  !Type(Atom_Class) :: rtrial(MAXVAL(natoms),0:MAX(kappa_ins,kappa_rot,kappa_dih))
   ! Slit pore variables
 
   LOGICAL :: framework_overlap
   REAL(DP) :: E_framework
 
   ! Inner-volume variables
-  REAL(DP) :: radius, radius2, theta, phi
+  REAL(DP) :: radius, radius2, theta, phi, rsinphi, zmax, hzmax, inner_dz, zscale, log_inner_radius
 
 !  ! DEBUGging variables
 !  INTEGER :: M_XYZ_unit
   REAL(DP) :: overlap_time_s, overlap_time_e, overlap_time
   LOGICAL :: omp_flag
   LOGICAL :: need_max_dcom
+  LOGICAL :: l_store_dp_trials, l_ortho
+
+  LOGICAL :: l_get_bitcell
+
+  INTEGER :: n_good_trials, n_good_trials_old
+
+  REAL(DP) :: rtrial0(3,MAXVAL(frag_list(1:nfragments(is),is)%natoms))
+  REAL(DP) :: rtrial(species_list(is)%kappa_ins_pad8,3,MAXVAL(frag_list(1:nfragments(is),is)%natoms))
+  REAL(DP), DIMENSION(species_list(is)%kappa_ins_pad8,3) :: xyz_rand_dp
+  REAL(SP), DIMENSION(species_list(is)%kappa_ins_pad8,3) :: xyz_rand
+  REAL(DP) :: xl, hxl, yl, hyl, zl, hzl, length_dp(3,3), drxcom, drycom, drzcom, isp, max_dcomsq
+  REAL(DP), DIMENSION(MAXVAL(frag_list(1:nfragments(is),is)%natoms)) :: drxcom_vec, drycom_vec, drzcom_vec
+  REAL(DP), DIMENSION(MAXVAL(frag_list(1:nfragments(is),is)%natoms)) :: drxref_vec, dryref_vec, drzref_vec
+  REAL(DP), DIMENSION(3,MAXVAL(frag_list(1:nfragments(is),is)%natoms)) :: drref_mat
+  REAL(DP) :: dscom_big_atom(3), drxref, dryref, drzref, sxcom_dp, sycom_dp, szcom_dp
+  REAL(SP) :: dsxref, dsyref, dszref 
+  REAL(SP), DIMENSION(3,MAXVAL(frag_list(1:nfragments(is),is)%natoms)) :: dsref_mat
+  REAL(SP) :: length_sp(3,3), dscom(3), dsref(3), dsxcom, dsycom, dszcom, sxcom, sycom, szcom
+  REAL(SP) :: sxp, syp, szp, rsl, hrsl, rsp
+  REAL(SP) :: rxp, ryp, rzp, this_atom_rp(3)
+  REAL(4), DIMENSION(species_list(is)%kappa_ins_pad8,3,MAXVAL(frag_list(1:nfragments(is),is)%natoms)) :: trial_atom_rp
+  INTEGER, DIMENSION(species_list(is)%kappa_ins_pad8,3,MAXVAL(frag_list(1:nfragments(is),is)%natoms)) :: trial_cell_coords
+
+  REAL(SP) :: nrg_sp, nrg_sp_vec(species_list(is)%kappa_ins_pad8), overlap_nrg_sp
+  REAL(DP) :: nrg_dp, cweight
+
+  INTEGER :: zbcdf, zlbc, ybcdf, ylbc, xlbc, ia_frag
+  INTEGER :: bitcell_bit, bitcell_int
+  INTEGER, DIMENSION(species_list(is)%kappa_ins_pad8) :: which_good_trials
+  INTEGER :: rlc, gtrial
+  INTEGER, DIMENSION(3) :: this_atom_ci
+
+  !LOGICAL(1) :: bitcell_overlap(species_list(is)%kappa_ins_pad32,MAXVAL(frag_list(1:nfragments(is),is)%natoms))
+  LOGICAL(1) :: bitcell_overlap(species_list(is)%kappa_ins_pad32)
+
+  LOGICAL :: l_widom_cells, this_bitcell_overlap
+
+  REAL(DP) :: overlap_nrg
+  INTEGER :: i_dim, ia
+
+  INTEGER :: bitcell_int_ior, bitcell_bit_min, bitcell_bit_max, bitcell_int1_min, bitcell_int1_max
+  INTEGER :: bitcell_int2_min, bitcell_int2_max
+
+  REAL(DP) :: t1, t2, t12, cbmc_time_s, cbmc_time_e, cbmc_time_e2, cbmc_time_e3, cbmc_time_e4
+  REAL(DP) :: rng_time_s, rng_time_e, rng_time, cell_list_cbmc_nrg_time, trial_loop_time_s, trial_loop_time
+  REAL(DP) :: cell_list_cbmc_nrg_time_s, cell_list_cbmc_nrg_time_e, noncell_cbmc_nrg_time_s, noncell_cbmc_nrg_time
+  REAL(DP) :: noncell_cbmc_nrg_time_e
+
+  INTEGER :: kappa_ins, kappa_ins_pad8, kappa_ins_pad32, kappa_dih, kappa_dih_pad8, kappa_dih_pad32
+
+  REAL(DP) :: sz_rand_dp, zpart_width, zpart_shift
+  LOGICAL, PARAMETER :: l_zscan = .FALSE.
+
+  INTEGER :: n_small_atoms, i_big_atom, ia_frag_big_atom, which_small_atoms(MAXVAL(frag_list(1:nfragments(is),is)%natoms))
+  LOGICAL :: l_widom_cavity_biasing
+  !INTEGER :: n_zparts
+  !INTEGER, PARAMETER :: zpartition_kappa = 32 ! must be a multiple of 8 (or less than 8 to turn off z-partitioning)
+  IF (widom_timing) THEN
+          omp_flag = .FALSE.
+          !$ omp_flag = .TRUE.
+          IF (.NOT. omp_flag) CALL CPU_TIME(cbmc_time_s)
+          !$ cbmc_time_s = omp_get_wtime()
+  END IF
+  kappa_ins = species_list(is)%kappa_ins
+  kappa_ins_pad8 = species_list(is)%kappa_ins_pad8
+  kappa_ins_pad32 = species_list(is)%kappa_ins_pad32
+  kappa_dih = species_list(is)%kappa_dih
+  kappa_dih_pad8 = species_list(is)%kappa_dih_pad8
+  kappa_dih_pad32 = species_list(is)%kappa_dih_pad32
+
+
+  !DIR$ ASSUME_ALIGNED trial_atom_rp:array_align_bytes, trial_cell_coords:array_align_bytes, bitcell_overlap:array_align_bytes, xyz_rand_dp:array_align_bytes
+  !DIR$ ASSUME_ALIGNED nrg_sp_vec:array_align_bytes, xyz_rand:array_align_bytes, rtrial:array_align_bytes
+  !DIR$ ASSUME (MOD(kappa_ins_pad8,dimpad_4byte) .EQ. 0)
+  !DIR$ ASSUME (MOD(kappa_ins_pad32,dimpad_1byte) .EQ. 0)
+
+
+
+
+
+
+  n_good_trials = species_list(is)%kappa_ins
+
   omp_flag = .FALSE.
   !$ omp_flag = .TRUE.
 
+  cbmc_overlap = .TRUE.
+
 
   ! Initialize variables
   IF (widom_active) THEN
           this_molecule => widom_molecule
           these_atoms => widom_atoms
+          l_store_dp_trials = .NOT. cbmc_cell_list_flag
   ELSE
           this_molecule => molecule_list(this_im,is)
           these_atoms => atom_list(:,this_im,is)
+          l_store_dp_trials = .TRUE.
   END IF
-  n_frag_atoms = 0
   ln_pbias = 0.0_DP
   this_box = this_molecule%which_box ! which box this_im is in
   these_atoms%exist = .FALSE. ! mark all the atoms as deleted 
@@ -197,6 +286,18 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
 
   weight(:) = 0.0_DP
 
+  overlap_nrg = max_kBT / beta(this_box)
+  overlap_nrg_sp = REAL(overlap_nrg,SP)
+
+  l_ortho = box_list(this_box)%int_box_shape <= int_ortho
+  l_widom_cells = widom_active .AND. l_sectors
+  l_get_bitcell = widom_active .AND. bitcell_flag
+  length_dp = box_list(this_box)%length
+  length_sp = REAL(length_dp,4)
+  !IF (this_im == 2) THEN
+  !        WRITE(*,*) "second molecule placement"
+  !END IF
+
   !*****************************************************************************
   ! Step 1) Select which fragment will be inserted first
   !*****************************************************************************
@@ -267,6 +368,21 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
 
   ! If get_fragorder = .FALSE., then frag_start is not yet defined
   frag_start = frag_order(1)
+  ifrag_natoms = frag_list(frag_start,is)%natoms
+  l_widom_cavity_biasing = l_get_bitcell .AND. cavity_biasing_flag
+  IF (l_widom_cavity_biasing) THEN
+          i_big_atom = frag_list(frag_start,is)%i_big_atom
+          ia_frag_big_atom = frag_list(frag_start,is)%ia_frag_big_atom
+          n_small_atoms = 0
+          DO i = 1, ifrag_natoms
+                IF (i == ia_frag_big_atom) CYCLE
+                n_small_atoms = n_small_atoms + 1
+                which_small_atoms(n_small_atoms) = i
+          END DO
+          IF (n_small_atoms .EQ. 0) l_get_bitcell = .FALSE.
+  ELSE
+          n_small_atoms = ifrag_natoms
+  END IF
 
   ! If inserting a molecule, choose the first fragment's conformation from the 
   ! reservoir
@@ -290,16 +406,17 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
         this_atom = frag_list(frag_start,is)%atoms(i)
         
         nl = (frag_position_library(frag_type)-1) + &
-                                           frag_list(frag_start,is)%natoms*(this_fragment -1) + i 
-        these_atoms(this_atom)%rxp = &
-                                    !  frag_coords(i,this_fragment,frag_type)%rxp
-                                       library_coords(nl)%rxp
-        these_atoms(this_atom)%ryp = &
-                                     ! frag_coords(i,this_fragment,frag_type)%ryp
-                                        library_coords(nl)%ryp
-        these_atoms(this_atom)%rzp = &
-                                     ! frag_coords(i,this_fragment,frag_type)%rzp
-                                        library_coords(nl)%rzp
+                                           frag_list(frag_start,is)%natoms*(this_fragment -1) + i
+        these_atoms(this_atom)%rp(1:3) = library_coords(1:3,nl)
+        !these_atoms(this_atom)%rp(1) = &
+        !                            !  frag_coords(i,this_fragment,frag_type)%rp(1)
+        !                               library_coords(nl)%rp(1)
+        !these_atoms(this_atom)%rp(2) = &
+        !                             ! frag_coords(i,this_fragment,frag_type)%rp(2)
+        !                                library_coords(nl)%rp(2)
+        !these_atoms(this_atom)%rp(3) = &
+        !                             ! frag_coords(i,this_fragment,frag_type)%rp(3)
+        !                                library_coords(nl)%rp(3)
       END DO
   END IF
   ! Turn on the molecule and its individual atoms
@@ -337,16 +454,17 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
 
      this_atom = frag_list(frag_start,is)%atoms(i)
      
-     rtrial(this_atom,0)%rxp = these_atoms(this_atom)%rxp
-     rtrial(this_atom,0)%ryp = these_atoms(this_atom)%ryp
-     rtrial(this_atom,0)%rzp = these_atoms(this_atom)%rzp
+     !rtrial(this_atom,0)%rp(1) = these_atoms(this_atom)%rp(1)
+     !rtrial(this_atom,0)%rp(2) = these_atoms(this_atom)%rp(2)
+     !rtrial(this_atom,0)%rp(3) = these_atoms(this_atom)%rp(3)
+     rtrial0(:,i) = these_atoms(this_atom)%rp(1:3)
 
   END DO
 
   ! Store the COM
-  xcom_old = this_molecule%xcom
-  ycom_old = this_molecule%ycom
-  zcom_old = this_molecule%zcom
+  xcom_old = this_molecule%rcom(1)
+  ycom_old = this_molecule%rcom(2)
+  zcom_old = this_molecule%rcom(3)
 
   ! We will place this fragment based only on its external weight
 
@@ -355,23 +473,23 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
      CALL Compute_Max_COM_Distance(this_im,is)
      IF(.NOT. del_flag) THEN
 
-        dx = molecule_list(imreplace,isreplace)%xcom &
-           - this_molecule%xcom
-        dy = molecule_list(imreplace,isreplace)%ycom &
-           - this_molecule%ycom
-        dz = molecule_list(imreplace,isreplace)%zcom &
-           - this_molecule%zcom
+        dx = molecule_list(imreplace,isreplace)%rcom(1) &
+           - this_molecule%rcom(1)
+        dy = molecule_list(imreplace,isreplace)%rcom(2) &
+           - this_molecule%rcom(2)
+        dz = molecule_list(imreplace,isreplace)%rcom(3) &
+           - this_molecule%rcom(3)
 
-        this_molecule%xcom = molecule_list(imreplace,isreplace)%xcom
-        this_molecule%ycom = molecule_list(imreplace,isreplace)%ycom
-        this_molecule%zcom = molecule_list(imreplace,isreplace)%zcom
+        this_molecule%rcom(1) = molecule_list(imreplace,isreplace)%rcom(1)
+        this_molecule%rcom(2) = molecule_list(imreplace,isreplace)%rcom(2)
+        this_molecule%rcom(3) = molecule_list(imreplace,isreplace)%rcom(3)
 
         DO i = 1, frag_list(frag_start,is)%natoms
 
            this_atom = frag_list(frag_start,is)%atoms(i)
-           these_atoms(this_atom)%rxp = rtrial(this_atom,0)%rxp + dx
-           these_atoms(this_atom)%ryp = rtrial(this_atom,0)%ryp + dy
-           these_atoms(this_atom)%rzp = rtrial(this_atom,0)%rzp + dz
+           these_atoms(this_atom)%rp(1) = rtrial0(1,i) + dx
+           these_atoms(this_atom)%rp(2) = rtrial0(2,i) + dy
+           these_atoms(this_atom)%rp(3) = rtrial0(3,i) + dz
         END DO
 
      END IF
@@ -379,212 +497,546 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
   ELSE
      need_max_dcom = .TRUE.
 
-     ! Loop over the multiple trial coordinates
-     trial_loop: DO itrial = 1, kappa_ins
-
-        IF ( del_flag .AND. (itrial == 1 )) THEN
-        
-           ! Use the COM of the current position
-           x_anchor = xcom_old
-           y_anchor = ycom_old
-           z_anchor = zcom_old
-        
-        ELSE
-
-           ! Select a random trial coordinate
-           IF (box_list(this_box)%int_box_shape == int_cubic .OR. &
-               box_list(this_box)%int_box_shape == int_ortho) THEN
-              
-              x_anchor = (0.5_DP - rranf()) * box_list(this_box)%length(1,1)
-              y_anchor = (0.5_DP - rranf()) * box_list(this_box)%length(2,2)
-              z_anchor = (0.5_DP - rranf()) * box_list(this_box)%length(3,3)
-
-           ELSE
-
-              !Generate random positions in fractional coordinates
-
-              x_anchor = 0.5_DP - rranf()
-              y_anchor = 0.5_DP - rranf()
-              z_anchor = 0.5_DP - rranf()
-
-              !transform back to cartesian
-
-              x_anchor = box_list(this_box)%length(1,1)*x_anchor + &
-                       box_list(this_box)%length(1,2)*y_anchor +   &
-                       box_list(this_box)%length(1,3)*z_anchor
-
-              y_anchor = box_list(this_box)%length(2,1)*x_anchor + &
-                       box_list(this_box)%length(2,2)*y_anchor +   &
-                       box_list(this_box)%length(2,3)*z_anchor
-
-              z_anchor = box_list(this_box)%length(3,1)*x_anchor + &
-                       box_list(this_box)%length(3,2)*y_anchor +   &
-                       box_list(this_box)%length(3,3)*z_anchor
-
-           END IF
-
-           IF (species_list(is)%insertion == 'RESTRICTED' .AND. box_list(this_box)%int_inner_shape /= int_none) THEN
-              IF (box_list(this_box)%int_inner_shape == int_sphere) THEN
-                 radius2 = x_anchor**2 + y_anchor**2 + z_anchor**2
-                 IF (radius2 > box_list(this_box)%inner_radius2) THEN
-                    theta = 2.0_DP*PI*rranf()
-                    phi = ACOS(2.0_DP*rranf()-1.0_DP)
-                    radius = (rranf())**(1.0_DP/3.0_DP) * box_list(this_box)%inner_radius
-                    x_anchor = radius * COS(theta) * SIN(phi)
-                    y_anchor = radius * SIN(theta) * SIN(phi)
-                    z_anchor = radius * COS(phi)
-                 END IF
-              ELSE IF (box_list(this_box)%int_inner_shape == int_cylinder) THEN
-                 radius2 = x_anchor**2 + y_anchor**2
-                 IF (radius2 > box_list(this_box)%inner_radius2) THEN
-                    theta = 2.0_DP*PI*rranf()
-                    radius = SQRT(rranf()) * box_list(this_box)%inner_radius
-                    x_anchor = radius * COS(theta)
-                    y_anchor = radius * SIN(theta)
-                 END IF
-              ELSE IF (box_list(this_box)%int_inner_shape == int_slitpore) THEN
-                 IF (ABS(z_anchor) > box_list(this_box)%inner_zmax) THEN
-                    z_anchor = (0.5_DP - rranf()) * box_list(this_box)%inner_zmax
-                 END IF
-              ELSE IF (box_list(this_box)%int_inner_shape == int_interface) THEN
-                 IF (ABS(z_anchor) > box_list(this_box)%inner_zmax .OR. &
-                     ABS(z_anchor) < box_list(this_box)%inner_zmin) THEN
-                    z_anchor = rranf() * (box_list(this_box)%inner_zmax - box_list(this_box)%inner_zmin)
-                    z_anchor = z_anchor + box_list(this_box)%inner_zmin
-                    IF (rranf() > 0.5_DP) THEN
-                       z_anchor = - z_anchor 
-                    END IF
-                 END IF
-              END IF
-           END IF
-
-        END IF
-!widom_timing        IF (.NOT. omp_flag) CALL cpu_time(overlap_time_s)
-!widom_timing        !$ overlap_time_s = omp_get_wtime()
-
-        ! Place the fragment (and all its atoms) at the trial coordinate
-        atom_loop: DO i = 1, frag_list(frag_start,is)%natoms
-        
-           this_atom = frag_list(frag_start,is)%atoms(i)
-        
-           these_atoms(this_atom)%rxp = & 
-                                   rtrial(this_atom,0)%rxp - xcom_old + x_anchor
-           these_atoms(this_atom)%ryp = & 
-                                   rtrial(this_atom,0)%ryp - ycom_old + y_anchor
-           these_atoms(this_atom)%rzp = &
-                                   rtrial(this_atom,0)%rzp - zcom_old + z_anchor
-           IF (l_sectors .AND. widom_active) THEN
-                   IF (check_overlap(this_atom,this_im,is)) THEN
-                           IF (itrial > 1) THEN
-                                   weight(itrial) = weight(itrial-1)
-                           ELSE
-                                   weight(itrial) = 0.0_DP
-                           END IF
-                           overlap_trial(itrial) = .TRUE.
-!widom_timing                           n_clo = n_clo + 1_INT64
-!widom_timing                           IF (.NOT. omp_flag) CALL cpu_time(overlap_time_e)
-!widom_timing                           !$ overlap_time_e = omp_get_wtime()
-!widom_timing                           cell_list_time = cell_list_time + (overlap_time_e - overlap_time_s)
-                           CYCLE trial_loop
-                   END IF
-           END IF
-           
-           rtrial(this_atom,itrial)%rxp = these_atoms(this_atom)%rxp
-           rtrial(this_atom,itrial)%ryp = these_atoms(this_atom)%ryp
-           rtrial(this_atom,itrial)%rzp = these_atoms(this_atom)%rzp
+     IF (widom_timing) THEN
+             IF (.NOT. omp_flag) CALL CPU_TIME(rng_time_s)
+             !$ rng_time_s = omp_get_wtime()
+     END IF
+     !CALL vector_rranf(TRANSFER(xyz_rand_dp,xyz_rand_dp)) ! Don't do this; it causes a wasteful memcopy to be generated
+     IF (l_widom_cavity_biasing) THEN
+             CALL cavity_biased_rranf(xyz_rand_dp,i_big_atom,this_box)
+             DO i_dim = 1, 3
+                     !$DIR$ VECTOR ALIGNED
+                     xyz_rand(:,i_dim) = REAL(xyz_rand_dp(:,i_dim),SP)
+             END DO
+     ELSE IF (l_widom_cells .AND. l_zscan .AND. kappa_ins >= 8) THEN
+             CALL array_boxscan_rranf(xyz_rand_dp,kappa_ins)
+             !DIR$ VECTOR ALIGNED
+             xyz_rand = REAL(xyz_rand_dp,SP)
+     ELSE IF (compatibility_mode) THEN
+             IF (del_flag) xyz_rand_dp(1,:) = 0.0_DP
+             DO i = MERGE(2,1,del_flag), kappa_ins
+                DO i_dim = 1, 3
+                        xyz_rand_dp(i,i_dim) = 1.0_DP - rranf()
+                END DO
+             END DO
+             xyz_rand = REAL(xyz_rand_dp,SP)
+     ELSE
+             DO i_dim = 1, 3
+                CALL vector_rranf(xyz_rand_dp(:,i_dim))
+                IF (l_widom_cells) THEN
+                        !DIR$ VECTOR ALIGNED
+                        xyz_rand(:,i_dim) = REAL(xyz_rand_dp(:,i_dim),SP)
+                END IF
+                !DO itrial = 1, kappa_ins
+                !        xyz_rand_dp(itrial,i_dim) = rranf()
+                !END DO
+             END DO
+     END IF
+     !IF (l_widom_cells) xyz_rand = REAL(xyz_rand_dp,SP)
+     IF (widom_timing) THEN
+             IF (.NOT. omp_flag) CALL CPU_TIME(rng_time_e)
+             !$ rng_time_e = omp_get_wtime()
+             rng_time = rng_time_e - rng_time_s
+             rng_ins_time = rng_ins_time + rng_time
+     END IF
+     xl = box_list(this_box)%length(1,1)
+     hxl = 0.5_DP * xl
+     yl = box_list(this_box)%length(2,2)
+     hyl = 0.5_DP * yl
+     zl = box_list(this_box)%length(3,3)
+     hzl = 0.5_DP * zl
+     drxcom_vec = 0.0_DP
+     drycom_vec = 0.0_DP
+     drzcom_vec = 0.0_DP
+     IF (l_widom_cavity_biasing) THEN
+             dscom_big_atom = MATMUL(&
+                     box_list(this_box)%length_inv,&
+                     rtrial0(:,ia_frag_big_atom) - this_molecule%rcom(1:3))
+     END IF
+     drxcom_vec(1:frag_list(frag_start,is)%natoms) = &
+             rtrial0(1,1:frag_list(frag_start,is)%natoms) - xcom_old
+     drycom_vec(1:frag_list(frag_start,is)%natoms) = &
+             rtrial0(2,1:frag_list(frag_start,is)%natoms) - ycom_old
+     drzcom_vec(1:frag_list(frag_start,is)%natoms) = &
+             rtrial0(3,1:frag_list(frag_start,is)%natoms) - zcom_old
+     IF (l_widom_cavity_biasing) THEN
+             drxref_vec(1:ifrag_natoms) = &
+                     rtrial0(1,1:ifrag_natoms) - rtrial0(1,ia_frag_big_atom)
+             dryref_vec(1:ifrag_natoms) = &
+                     rtrial0(2,1:ifrag_natoms) - rtrial0(2,ia_frag_big_atom)
+             drzref_vec(1:ifrag_natoms) = &
+                     rtrial0(3,1:ifrag_natoms) - rtrial0(3,ia_frag_big_atom)
+     ELSE
+             drxref_vec = drxcom_vec
+             dryref_vec = drycom_vec
+             drzref_vec = drzcom_vec
+     END IF
+     IF (widom_timing) THEN
+          IF (.NOT. omp_flag) CALL CPU_TIME(cbmc_time_e)
+          !$ cbmc_time_e = omp_get_wtime()
+          t12 = cbmc_time_e - cbmc_time_s
+          cbmc_setup_ins_time = cbmc_setup_ins_time + t12
+     END IF
+     IF (species_list(is)%insertion == 'RESTRICTED' .AND. &
+             box_list(this_box)%int_inner_shape /= int_none .AND. &
+             .NOT. widom_active) THEN
+        n_good_trials = kappa_ins
+        SELECT CASE(box_list(this_box)%int_inner_shape)
+        CASE(int_sphere)
+                log_inner_radius = LOG(box_list(this_box)%inner_radius)
+                !DIR$ VECTOR ALIGNED
+                DO itrial = 1, kappa_ins
+                        theta = 2.0_DP*PI*xyz_rand_dp(itrial,1)
+                        phi = ACOS(2.0_DP*xyz_rand_dp(itrial,2)-1.0_DP)
+                        radius = EXP(LOG(xyz_rand_dp(itrial,3))/3.0_DP + log_inner_radius)
+                        rsinphi = SIN(phi)*radius
+                        xcom_trial(itrial) = COS(theta) * rsinphi
+                        ycom_trial(itrial) = SIN(theta) * rsinphi
+                        zcom_trial(itrial) = radius * COS(phi)
+                END DO
+        CASE(int_cylinder)
+                ! WARNING: this does not work for triclinic boxes
+                !DIR$ VECTOR ALIGNED
+                DO itrial = 1, kappa_ins
+                        theta = 2.0_DP*PI*xyz_rand_dp(itrial,1)
+                        radius = SQRT(xyz_rand_dp(itrial,2)) * box_list(this_box)%inner_radius
+                        xcom_trial(itrial) = radius * COS(theta)
+                        ycom_trial(itrial) = radius * SIN(theta)
+                        zcom_trial(itrial) = zl*xyz_rand_dp(itrial,3) - hzl 
+                END DO
+        CASE(int_slitpore)
+                zmax = box_list(this_box)%inner_zmax
+                hzmax = 0.5_DP * zmax
+                !DIR$ VECTOR ALIGNED
+                DO itrial = 1, kappa_ins
+                        xcom_trial(itrial) = xyz_rand_dp(itrial,1)*xl - hxl
+                        ycom_trial(itrial) = xyz_rand_dp(itrial,2)*yl - hyl
+                        zcom_trial(itrial) = xyz_rand_dp(itrial,3)*zmax - hzmax
+                END DO
+        CASE(int_interface)
+                inner_dz = box_list(this_box)%inner_zmax - box_list(this_box)%inner_zmin
+                !DIR$ VECTOR ALIGNED
+                DO itrial = 1, kappa_ins
+                        xcom_trial(itrial) = xyz_rand_dp(itrial,1)*xl - hxl
+                        ycom_trial(itrial) = xyz_rand_dp(itrial,2)*yl - hyl
+                        zscale = xyz_rand_dp(itrial,3)*2.0_DP - 1.0_DP
+                        zcom_trial(itrial) = SIGN(box_list(this_box)%inner_zmin,zscale) + zscale*inner_dz
+                END DO
+        END SELECT
+     ELSE! IF (l_widom_cells) THEN
+             IF (l_get_bitcell) THEN
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(t1)
+                             !$ t1 = omp_get_wtime()
+                     END IF
+                     zbcdf = box_list(this_box)%bitcell_dimfactor(3)
+                     zlbc = box_list(this_box)%length_bitcells(3)
+                     ybcdf = box_list(this_box)%bitcell_dimfactor(2)
+                     ylbc = box_list(this_box)%length_bitcells(2)
+                     xlbc = box_list(this_box)%length_bitcells(1)
+
+                     IF (l_ortho) THEN
+                             IF (l_widom_cavity_biasing) THEN
+                                     dsref_mat(1,1:n_small_atoms) = drxref_vec(which_small_atoms(1:n_small_atoms))/xl
+                                     dsref_mat(2,1:n_small_atoms) = dryref_vec(which_small_atoms(1:n_small_atoms))/yl
+                                     dsref_mat(3,1:n_small_atoms) = drzref_vec(which_small_atoms(1:n_small_atoms))/zl
+                             ELSE
+                                     dsref_mat(1,1:n_small_atoms) = drxref_vec(1:n_small_atoms)/xl
+                                     dsref_mat(2,1:n_small_atoms) = dryref_vec(1:n_small_atoms)/yl
+                                     dsref_mat(3,1:n_small_atoms) = drzref_vec(1:n_small_atoms)/zl
+                             END IF
+                     ELSE
+                             IF (l_widom_cavity_biasing) THEN
+                                     drref_mat(1,1:n_small_atoms) = drxref_vec(which_small_atoms(1:n_small_atoms))
+                                     drref_mat(2,1:n_small_atoms) = dryref_vec(which_small_atoms(1:n_small_atoms))
+                                     drref_mat(3,1:n_small_atoms) = drzref_vec(which_small_atoms(1:n_small_atoms))
+                                     dsref_mat(:,1:n_small_atoms) = REAL(MATMUL(box_list(this_box)%length_inv, &
+                                             drref_mat(:,1:n_small_atoms)),SP)
+                             ELSE
+                                     drref_mat(1,1:n_small_atoms) = drxref_vec(1:n_small_atoms)
+                                     drref_mat(2,1:n_small_atoms) = dryref_vec(1:n_small_atoms)
+                                     drref_mat(3,1:n_small_atoms) = drzref_vec(1:n_small_atoms)
+                                     dsref_mat(:,1:n_small_atoms) = REAL(MATMUL(box_list(this_box)%length_inv, &
+                                             drref_mat(:,1:n_small_atoms)),SP)
+                             END IF
+                     END IF
+                     SELECT CASE(n_small_atoms)
+                     CASE(1)
+                             CALL Bitcell_Overlap_Detection(1,box_list(this_box)%bitcell_int32_vec)
+                     CASE(2)
+                             CALL Bitcell_Overlap_Detection(2,box_list(this_box)%bitcell_int32_vec)
+                     CASE(3)
+                             CALL Bitcell_Overlap_Detection(3,box_list(this_box)%bitcell_int32_vec)
+                     CASE(4)
+                             CALL Bitcell_Overlap_Detection(4,box_list(this_box)%bitcell_int32_vec)
+                     CASE(5)
+                             CALL Bitcell_Overlap_Detection(5,box_list(this_box)%bitcell_int32_vec)
+                     CASE DEFAULT
+                             CALL Bitcell_Overlap_Detection(n_small_atoms,box_list(this_box)%bitcell_int32_vec)
+                     END SELECT
+                     n_good_trials = 0
+                     !DIR$ LOOP COUNT = 1000
+                     DO itrial = 1, kappa_ins
+                        IF (bitcell_overlap(itrial)) CYCLE
+                        n_good_trials = n_good_trials+1
+                        which_good_trials(n_good_trials) = itrial
+                        xyz_rand(n_good_trials,:) = xyz_rand(itrial,:)
+                     END DO
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(t2)
+                             !$ t2 = omp_get_wtime()
+                             t12 = t2 - t1
+                             bitcell_overlap_ins_time = bitcell_overlap_ins_time + t12
+                             bitcell_overlap_ins_checks = bitcell_overlap_ins_checks + kappa_ins
+                             bitcell_overlap_ins_overlaps = bitcell_overlap_ins_overlaps + (kappa_ins-n_good_trials)
+                     END IF
+                     IF (n_good_trials == 0) THEN
+                        CALL Set_CBMC_Flag(.FALSE.)
+                        RETURN
+                     END IF
+             ELSE
+                     n_good_trials = kappa_ins
+             END IF
+             IF (l_widom_cells) THEN
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(t1)
+                             !$ t1 = omp_get_wtime()
+                     END IF
+                     !DIR$ LOOP COUNT = 5
+                     DO ia_frag = 1, frag_list(frag_start,is)%natoms
+                             this_atom = frag_list(frag_start,is)%atoms(ia_frag)
+                             drxref = drxref_vec(ia_frag)
+                             dryref = dryref_vec(ia_frag)
+                             drzref = drzref_vec(ia_frag)
+                             IF (l_ortho) THEN
+                                dsxref = REAL(drxref / length_dp(1,1),4)
+                                dsyref = REAL(dryref / length_dp(2,2),4)
+                                dszref = REAL(drzref / length_dp(3,3),4)
+                                dsref = (/ dsxref, dsyref, dszref /)
+                             ELSE
+                                dsref = REAL(MATMUL(box_list(this_box)%length_inv, &
+                                        (/ drxref, dryref, drzref /)),4)
+                             END IF
+                             DO i_dim = 1, 3
+                                     rsl = MERGE(length_sp(i_dim,i_dim),1.0,l_ortho)
+                                     hrsl = 0.5*rsl
+                                     rlc = box_list(this_box)%real_length_cells(i_dim)
+                                     !DIR$ VECTOR ALIGNED
+                                     DO itrial = 1, n_good_trials
+                                        rsp = xyz_rand(itrial,i_dim) + dsref(i_dim)
+                                        IF (rsp < 0.0) rsp = rsp + 1.0
+                                        IF (rsp >= 1.0) rsp = rsp - 1.0
+                                        trial_cell_coords(itrial,i_dim,ia_frag) = &
+                                                INT(rsp*rlc) - box_list(this_box)%sectorbound(i_dim)
+                                        rsp = rsp*rsl - hrsl
+                                        trial_atom_rp(itrial,i_dim,ia_frag) = rsp
+                                     END DO
+                             END DO
+                             IF (.NOT. l_ortho) THEN
+                                     !DIR$ VECTOR ALIGNED
+                                     DO itrial = 1, n_good_trials
+                                        sxp = trial_atom_rp(itrial,1,ia_frag)
+                                        rxp = length_sp(1,1)*sxp
+                                        syp = trial_atom_rp(itrial,2,ia_frag)
+                                        rxp = rxp + length_sp(1,2)*syp
+                                        ryp = length_sp(2,2)*syp
+                                        szp = trial_atom_rp(itrial,3,ia_frag)
+                                        rxp = rxp + length_sp(1,3)*szp
+                                        ryp = ryp + length_sp(2,3)*szp
+                                        rzp = length_sp(3,3)*szp
+                                        trial_atom_rp(itrial,1,ia_frag) = rxp
+                                        trial_atom_rp(itrial,2,ia_frag) = ryp
+                                        trial_atom_rp(itrial,3,ia_frag) = rzp
+                                     END DO
+                             END IF
+                     END DO
+                     n_good_trials_old = n_good_trials
+                     n_good_trials = 0
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(trial_loop_time_s)
+                             !$ trial_loop_time_s = omp_get_wtime()
+                             cell_list_cbmc_nrg_time = 0.0_DP
+                     END IF
+                     trial_loop: DO itrial = 1, n_good_trials_old
+                        DO ia_frag = 1, frag_list(frag_start,is)%natoms
+                                ia = frag_list(frag_start,is)%atoms(ia_frag)
+                                this_atom_rp = trial_atom_rp(itrial,:,ia_frag)
+                                this_atom_ci = trial_cell_coords(itrial,:,ia_frag)
+                                IF (check_overlap(this_atom_rp(1:3),this_atom_ci,ia,is,this_box)) THEN
+                                        CYCLE trial_loop
+                                END IF
+                        END DO
+                        IF (cbmc_cell_list_flag) THEN
+                                IF (widom_timing) THEN
+                                        IF (.NOT. omp_flag) CALL cpu_time(cell_list_cbmc_nrg_time_s)
+                                        !$ cell_list_cbmc_nrg_time_s = omp_get_wtime()
+                                        cell_list_cbmc_nrg_ins_checks = cell_list_cbmc_nrg_ins_checks + 1
+                                END IF
+                                nrg_sp = SUM(Compute_Cell_List_CBMC_nrg(&
+                                        trial_atom_rp(itrial,1,1:ifrag_natoms),trial_atom_rp(itrial,2,1:ifrag_natoms),& 
+                                        trial_atom_rp(itrial,3,1:ifrag_natoms), &
+                                        trial_cell_coords(itrial,1,1:ifrag_natoms),trial_cell_coords(itrial,2,1:ifrag_natoms), &
+                                        trial_cell_coords(itrial,3,1:ifrag_natoms), &
+                                        frag_list(frag_start,is)%atoms(1:ifrag_natoms),is,this_box))
+                                IF (widom_timing) THEN
+                                        IF (.NOT. omp_flag) CALL cpu_time(cell_list_cbmc_nrg_time_e)
+                                        !$ cell_list_cbmc_nrg_time_e = omp_get_wtime()
+                                        cell_list_cbmc_nrg_time = cell_list_cbmc_nrg_time + &
+                                                (cell_list_cbmc_nrg_time_e - cell_list_cbmc_nrg_time_s)
+                                END IF
+                                IF (nrg_sp > overlap_nrg_sp) CYCLE trial_loop
+                        END IF
+                        n_good_trials = n_good_trials + 1
+                        which_good_trials(n_good_trials) = MERGE(which_good_trials(itrial),itrial,l_get_bitcell)
+                        IF (cbmc_cell_list_flag) nrg_sp_vec(n_good_trials) = nrg_sp
+                     END DO trial_loop 
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(t2)
+                             !$ t2 = omp_get_wtime()
+                             t12 = t2 - t1
+                             trial_loop_time = t2 - trial_loop_time_s
+                             trial_loop_ins_time = trial_loop_ins_time + trial_loop_time
+                             cell_list_ins_time = cell_list_ins_time + t12
+                             cell_list_cbmc_nrg_ins_time = cell_list_cbmc_nrg_ins_time + cell_list_cbmc_nrg_time
+                             cell_list_ins_checks = cell_list_ins_checks + n_good_trials_old
+                     END IF
+                     IF (n_good_trials == 0) THEN
+                        CALL Set_CBMC_Flag(.FALSE.)
+                        RETURN
+                     END IF
+             END IF
+             IF (l_ortho .AND. l_store_dp_trials) THEN
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(noncell_cbmc_nrg_time_s)
+                             !$ noncell_cbmc_nrg_time_s = omp_get_wtime()
+                             noncell_cbmc_nrg_time = 0.0_DP
+                     END IF
+                     IF (l_widom_cavity_biasing) THEN
+                             !DIR$ VECTOR ALIGNED
+                             DO itrial = 1, n_good_trials
+                                gtrial = which_good_trials(itrial)
+                                sxcom_dp = xyz_rand_dp(gtrial,1) - dscom_big_atom(1)
+                                sycom_dp = xyz_rand_dp(gtrial,2) - dscom_big_atom(2)
+                                szcom_dp = xyz_rand_dp(gtrial,3) - dscom_big_atom(3)
+                                IF (sxcom_dp < 0.0_DP) sxcom_dp = sxcom_dp + 1.0_DP
+                                IF (sxcom_dp >= 1.0_DP) sxcom_dp = sxcom_dp - 1.0_DP
+                                IF (sycom_dp < 0.0_DP) sycom_dp = sycom_dp + 1.0_DP
+                                IF (sycom_dp >= 1.0_DP) sycom_dp = sycom_dp - 1.0_DP
+                                IF (szcom_dp < 0.0_DP) szcom_dp = szcom_dp + 1.0_DP
+                                IF (szcom_dp >= 1.0_DP) szcom_dp = szcom_dp - 1.0_DP
+                                xcom_trial(itrial) = sxcom_dp*xl - hxl
+                                ycom_trial(itrial) = sycom_dp*yl - hyl
+                                zcom_trial(itrial) = szcom_dp*zl - hzl
+                             END DO
+                     ELSE IF (l_widom_cells) THEN
+                             !DIR$ VECTOR ALIGNED
+                             DO itrial = 1, n_good_trials
+                                gtrial = which_good_trials(itrial)
+                                sxcom_dp = xyz_rand_dp(gtrial,1)
+                                sycom_dp = xyz_rand_dp(gtrial,2)
+                                szcom_dp = xyz_rand_dp(gtrial,3)
+                                xcom_trial(itrial) = sxcom_dp*xl - hxl
+                                ycom_trial(itrial) = sycom_dp*yl - hyl
+                                zcom_trial(itrial) = szcom_dp*zl - hzl
+                             END DO
+                     ELSE
+                             !DIR$ VECTOR ALIGNED
+                             xcom_trial(1:n_good_trials) = &
+                                     xyz_rand_dp(1:n_good_trials,1)*xl - hxl
+                             !DIR$ VECTOR ALIGNED
+                             ycom_trial(1:n_good_trials) = &
+                                     xyz_rand_dp(1:n_good_trials,2)*yl - hyl
+                             !DIR$ VECTOR ALIGNED
+                             zcom_trial(1:n_good_trials) = &
+                                     xyz_rand_dp(1:n_good_trials,3)*zl - hzl
+                     END IF
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(noncell_cbmc_nrg_time_e)
+                             !$ noncell_cbmc_nrg_time_e = omp_get_wtime()
+                             noncell_cbmc_nrg_time = noncell_cbmc_nrg_time + &
+                                     (noncell_cbmc_nrg_time_e-noncell_cbmc_nrg_time_s)
+                     END IF
+             ELSE IF (l_store_dp_trials) THEN
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(noncell_cbmc_nrg_time_s)
+                             !$ noncell_cbmc_nrg_time_s = omp_get_wtime()
+                             noncell_cbmc_nrg_time = 0.0_DP
+                     END IF
+                     IF (l_widom_cavity_biasing) THEN
+                             !DIR$ VECTOR ALIGNED
+                             DO itrial = 1, n_good_trials
+                                gtrial = which_good_trials(itrial)
+                                isp = xyz_rand_dp(gtrial,1) - 0.5_DP - dscom_big_atom(1)
+                                IF (ABS(isp) > 0.5_DP) isp = isp - SIGN(1.0_DP,isp)
+                                x_anchor = length_dp(1,1)*isp
+                                isp = xyz_rand_dp(gtrial,2) - 0.5_DP - dscom_big_atom(2)
+                                IF (ABS(isp) > 0.5_DP) isp = isp - SIGN(1.0_DP,isp)
+                                x_anchor = x_anchor + length_dp(1,2)*isp
+                                y_anchor = length_dp(2,2)*isp
+                                isp = xyz_rand_dp(gtrial,3) - 0.5_DP - dscom_big_atom(3)
+                                IF (ABS(isp) > 0.5_DP) isp = isp - SIGN(1.0_DP,isp)
+                                x_anchor = x_anchor + length_dp(1,3)*isp
+                                y_anchor = y_anchor + length_dp(2,3)*isp
+                                z_anchor = length_dp(3,3)*isp
+                                xcom_trial(itrial) = x_anchor
+                                ycom_trial(itrial) = y_anchor
+                                zcom_trial(itrial) = z_anchor
+                             END DO
+                     ELSE IF (l_widom_cells) THEN
+                             !DIR$ VECTOR ALIGNED
+                             DO itrial = 1, n_good_trials
+                                gtrial = which_good_trials(itrial)
+                                isp = xyz_rand_dp(gtrial,1) - 0.5_DP
+                                x_anchor = length_dp(1,1)*isp
+                                isp = xyz_rand_dp(gtrial,2) - 0.5_DP
+                                x_anchor = x_anchor + length_dp(1,2)*isp
+                                y_anchor = length_dp(2,2)*isp
+                                isp = xyz_rand_dp(gtrial,3) - 0.5_DP
+                                x_anchor = x_anchor + length_dp(1,3)*isp
+                                y_anchor = y_anchor + length_dp(2,3)*isp
+                                z_anchor = length_dp(3,3)*isp
+                                xcom_trial(itrial) = x_anchor
+                                ycom_trial(itrial) = y_anchor
+                                zcom_trial(itrial) = z_anchor
+                             END DO
+                     ELSE
+                             !DIR$ VECTOR ALIGNED
+                             DO itrial = 1, n_good_trials
+                                isp = xyz_rand_dp(itrial,1) - 0.5_DP
+                                x_anchor = length_dp(1,1)*isp
+                                isp = xyz_rand_dp(itrial,2) - 0.5_DP
+                                x_anchor = x_anchor + length_dp(1,2)*isp
+                                y_anchor = length_dp(2,2)*isp
+                                isp = xyz_rand_dp(itrial,3) - 0.5_DP
+                                x_anchor = x_anchor + length_dp(1,3)*isp
+                                y_anchor = y_anchor + length_dp(2,3)*isp
+                                z_anchor = length_dp(3,3)*isp
+                                xcom_trial(itrial) = x_anchor
+                                ycom_trial(itrial) = y_anchor
+                                zcom_trial(itrial) = z_anchor
+                             END DO
+                     END IF
+                     IF (widom_timing) THEN
+                             IF (.NOT. omp_flag) CALL cpu_time(noncell_cbmc_nrg_time_e)
+                             !$ noncell_cbmc_nrg_time_e = omp_get_wtime()
+                             noncell_cbmc_nrg_time = noncell_cbmc_nrg_time + &
+                                     (noncell_cbmc_nrg_time_e-noncell_cbmc_nrg_time_s)
+                     END IF
+             END IF
+     END IF
+     max_dcomsq = 0.0_DP
+     DO i = 1, frag_list(frag_start,is)%natoms
+        drxcom = drxcom_vec(i)
+        drycom = drycom_vec(i)
+        drzcom = drzcom_vec(i)
+        drxcom = drxcom*drxcom + drycom*drycom + drzcom*drzcom
+        max_dcomsq = MAX(max_dcomsq,drxcom)
+     END DO
+     this_molecule%rcom(4) = SQRT(max_dcomsq)
+     !DIR$ ASSUME_ALIGNED nrg:array_align_bytes, weight:array_align_bytes, overlap_trial:array_align_bytes
+     IF (l_store_dp_trials) THEN
+             IF (widom_timing) THEN
+                     IF (.NOT. omp_flag) CALL cpu_time(noncell_cbmc_nrg_time_s)
+                     !$ noncell_cbmc_nrg_time_s = omp_get_wtime()
+             END IF
+             DO i = 1, frag_list(frag_start,is)%natoms
+                rtrial(1:n_good_trials,1,i) = xcom_trial(1:n_good_trials) + drxcom_vec(i)
+                rtrial(1:n_good_trials,2,i) = ycom_trial(1:n_good_trials) + drycom_vec(i)
+                rtrial(1:n_good_trials,3,i) = zcom_trial(1:n_good_trials) + drzcom_vec(i)
+             END DO
+             IF (del_flag) THEN
+                     rtrial(1,1:3,1:frag_list(frag_start,is)%natoms) = &
+                             rtrial0(1:3,1:frag_list(frag_start,is)%natoms)
+                     xcom_trial(1) = xcom_old
+                     ycom_trial(1) = ycom_old
+                     zcom_trial(1) = zcom_old
+             END IF
+             DO itrial = 1, n_good_trials
+                ! Place the fragment (and all its atoms) at the trial coordinate
+                DO i = 1, frag_list(frag_start,is)%natoms
+                   this_atom = frag_list(frag_start,is)%atoms(i)
+                   these_atoms(this_atom)%rp = rtrial(itrial,1:3,i)
+                END DO
+
+                this_molecule%rcom(1) = xcom_trial(itrial)
+                this_molecule%rcom(2) = ycom_trial(itrial)
+                this_molecule%rcom(3) = zcom_trial(itrial)
+
+                !IF (need_max_dcom) THEN
+                !        CALL Compute_Max_COM_Distance(this_im,is)
+                !        need_max_dcom = .FALSE.
+                !END IF
+
+                ! Note that the COM position is always chosen inside the simulation box 
+                ! so there is no need to call Fold_Molecule.
+                   
+                ! Calculate the intermolecular energy of the fragment. Note that
+                ! cbmc_flag has been set to true so that the following call will compute
+                ! interaction energy of the growing molecule within a small distance
+                overlap = .FALSE.
+                IF (widom_active) THEN
+                        CALL Compute_Molecule_Nonbond_Inter_Energy_Widom(this_im,is,&
+                                E_inter_vdw,overlap)
+                        ! in this case, E_inter_vdw already includes qq energy
+                        overlap = overlap .OR. E_inter_vdw >= overlap_nrg
+                        overlap_trial(itrial) = overlap
+                        cbmc_overlap = cbmc_overlap .AND. overlap
+                        IF (.NOT. overlap) nrg(itrial) = E_inter_vdw
+                ELSE
+                        CALL Compute_Molecule_Nonbond_Inter_Energy(this_im,is,&
+                                E_inter_vdw,E_inter_qq,overlap)
+                        nrg_dp = MERGE(infinity_DP, E_inter_vdw + E_inter_qq, overlap)
+                        overlap = overlap .OR. nrg_dp >= overlap_nrg
+                        overlap_trial(itrial) = overlap
+                        cbmc_overlap = cbmc_overlap .AND. overlap
+                        IF (.NOT. overlap) nrg(itrial) = nrg_dp
+                END IF
+             END DO
+             IF (widom_timing) THEN
+                     IF (.NOT. omp_flag) CALL cpu_time(noncell_cbmc_nrg_time_e)
+                     !$ noncell_cbmc_nrg_time_e = omp_get_wtime()
+                     noncell_cbmc_nrg_time = noncell_cbmc_nrg_time + &
+                             (noncell_cbmc_nrg_time_e-noncell_cbmc_nrg_time_s)
+                     noncell_cbmc_nrg_ins_time = noncell_cbmc_nrg_ins_time + noncell_cbmc_nrg_time
+                     nrg_ins_overlaps = nrg_ins_overlaps + COUNT(overlap_trial(1:n_good_trials))
+             END IF
+             ! Reject the move if all trials tripped overlap
+             IF (cbmc_overlap) THEN
+                CALL Set_CBMC_Flag(.FALSE.)
+                RETURN
+             END IF
+
+             !DIR$ VECTOR ALIGNED
+             DO itrial = 1, n_good_trials
+                nrg_dp = nrg(itrial)
+                weight(itrial) = MERGE(0.0_DP,EXP(-beta(this_box)*nrg_dp),overlap_trial(itrial))
+             END DO
+     ELSE
+             !DIR$ VECTOR ALIGNED
+             !$OMP SIMD PRIVATE(nrg_dp,overlap)
+             DO itrial = 1, n_good_trials
+                nrg_dp = REAL(nrg_sp_vec(itrial),DP)
+                nrg(itrial) = nrg_dp
+                weight(itrial) = EXP(-beta(this_box)*nrg_dp)
+             END DO
+             !$OMP END SIMD
+     END IF
+     IF (widom_timing) THEN
+          IF (.NOT. omp_flag) CALL CPU_TIME(cbmc_time_e2)
+          !$ cbmc_time_e2 = omp_get_wtime()
+          t12 = cbmc_time_e2 - cbmc_time_e
+          cbmc_returnzone_ins_time = cbmc_returnzone_ins_time + t12
+          cbmc_nonoverlap_ins_count = cbmc_nonoverlap_ins_count + 1_INT64
+     END IF
+     ! Store the cumulative weight of each trial
+     cweight = weight(1)
+     DO itrial = 2, n_good_trials
+        cweight = cweight + weight(itrial)
+        weight(itrial) = cweight
+     END DO
         
-        END DO atom_loop
-!widom_timing        n_not_clo = n_not_clo + 1_INT64
-
-        xcom_trial(itrial) = x_anchor
-        ycom_trial(itrial) = y_anchor
-        zcom_trial(itrial) = z_anchor
-        this_molecule%xcom = x_anchor
-        this_molecule%ycom = y_anchor
-        this_molecule%zcom = z_anchor
-
-        IF (need_max_dcom) THEN
-                CALL Compute_Max_COM_Distance(this_im,is)
-                need_max_dcom = .FALSE.
-        END IF
 
-        ! Note that the COM position is always chosen inside the simulation box 
-        ! so there is no need to call Fold_Molecule.
-           
-        ! Calculate the intermolecular energy of the fragment. Note that
-        ! cbmc_flag has been set to true so that the following call will compute
-        ! interaction energy of the growing molecule within a small distance
-        overlap = .FALSE.
-        IF (widom_active) THEN
-                CALL Compute_Molecule_Nonbond_Inter_Energy_Widom(this_im,is,&
-                        E_inter_vdw,overlap)
-                ! in this case, E_inter_vdw already includes qq energy
-                nrg(itrial) = nrg(itrial) + E_inter_vdw 
-        ELSE
-                CALL Compute_Molecule_Nonbond_Inter_Energy(this_im,is,&
-                        E_inter_vdw,E_inter_qq,overlap)
-                nrg(itrial) = nrg(itrial) + E_inter_vdw + E_inter_qq 
-        END IF
 
-        IF (overlap) THEN
-           ! atoms are too close, set the weight to zero
-           weight(itrial) = 0.0_DP
-           overlap_trial(itrial) = .TRUE.
-        ELSE
-           nrg_kBT = beta(this_box) * nrg(itrial)
-
-           IF ( nrg_kBT >= max_kBT) THEN
-              ! the energy is too high, set the weight to zero
-              weight(itrial) = 0.0_DP
-              overlap_trial(itrial) = .TRUE.
-!widom_timing              n_nrg_overlap = n_nrg_overlap + 1_INT64
-           ELSE
-              weight(itrial) = DEXP(-nrg_kBT)
-           END IF
-        END IF
-
-!        ! BEGIN DEBUGGING OUTPUT
-!        ! Write out the fragment coordinates for each trial position
-!        M_XYZ_unit = movie_xyz_unit + this_box
-!        DO i = 1, frag_list(frag_start,is)%natoms
-!           this_atom = frag_list(frag_start,is)%atoms(i)
-!           WRITE(M_XYZ_unit,*) &
-!              TRIM(nonbond_list(this_atom,is)%element) // &
-!              TRIM(int_to_string(itrial)), & 
-!              rtrial(this_atom,itrial)%rxp, &
-!              rtrial(this_atom,itrial)%ryp, &
-!              rtrial(this_atom,itrial)%rzp
-!        END DO
-!        IF (itrial==1) THEN
-!           WRITE(*,'(2(A,X,I5,X))'), 'i_mcstep', i_mcstep, 'lm', this_im
-!           WRITE(*,'(4(A12,X))') 'POS:trial', 'energy', 'weight', 'overlap'
-!        END IF
-!        WRITE(*,'(I12,X,E12.6,X,E12.6,X,L12)') itrial, beta(this_box)*nrg(itrial), weight(itrial), overlap_trial(itrial)
-!        ! END DEBUGGING OUTPUT
-        ! Store the cumulative weight of each trial
-        IF (itrial > 1 ) weight(itrial) = weight(itrial-1) + weight(itrial)
-
-!widom_timing        IF (.NOT. omp_flag) CALL cpu_time(overlap_time_e)
-!widom_timing        !$ overlap_time_e = omp_get_wtime()
-!widom_timing        overlap_time = overlap_time_e - overlap_time_s
-!widom_timing
-!widom_timing        IF (overlap) THEN
-!widom_timing                normal_overlap_time = normal_overlap_time + overlap_time
-!widom_timing        ELSE IF (overlap_trial(itrial)) THEN
-!widom_timing                nrg_overlap_time = nrg_overlap_time + overlap_time
-!widom_timing        ELSE
-!widom_timing                non_overlap_time = non_overlap_time + overlap_time
-!widom_timing        END IF
-     
-     END DO trial_loop
-
-
-     ! Reject the move if all trials tripped overlap
-     IF (ALL(overlap_trial)) THEN
-        cbmc_overlap = .TRUE.
-        CALL Set_CBMC_Flag(.FALSE.)
-        RETURN
-     END IF
 
      ! Select one of the trial coordinates
 
@@ -597,18 +1049,17 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
      ELSE
 
         ! Choose one from Golden sampling for an insertion move
-        rand_no = rranf() * weight(kappa_ins)
+        rand_no = rranf() * cweight
      
-        DO i = 1, kappa_ins
+        DO i = 1, n_good_trials
            IF ( rand_no < weight(i)) EXIT
         END DO
      
         trial = i
 
-        IF ( trial == kappa_ins + 1 ) THEN
+        IF ( trial == n_good_trials + 1 ) THEN
            ! None of the trials were picked. Could be due to the fact that all 
            ! the trials had a very small cumulative weight
-
            cbmc_overlap = .TRUE.
            CALL Set_CBMC_Flag(.FALSE.)
            RETURN
@@ -617,33 +1068,70 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
      END IF
 
      ! Compute the weight of the selected trial coordinate
-     ln_pbias = ln_pbias - beta(this_box) * nrg(trial) - DLOG(weight(kappa_ins))
+     ln_pbias = ln_pbias - beta(this_box) * nrg(trial) - DLOG(cweight)
+     IF (l_widom_cavity_biasing) ln_pbias = ln_pbias - cavdatalist(i_big_atom,this_box)%ln_cavfrac
   
      ! This line is not used
-     e_total = nrg(trial)
+     !e_total = nrg(trial)
 
      ! We chose the ith trial coordinate for placement. Store the ith trial
      ! coordinates in the atom_list array. Note that for the deletion move,
      ! trial=1 has the current coordinates of the fragment, so the molecule
      ! is not moved.
-  
-     DO i = 1, frag_list(frag_start,is)%natoms
-     
-        this_atom = frag_list(frag_start,is)%atoms(i)
-     
-        these_atoms(this_atom)%rxp = rtrial(this_atom,trial)%rxp
-        these_atoms(this_atom)%ryp = rtrial(this_atom,trial)%ryp
-        these_atoms(this_atom)%rzp = rtrial(this_atom,trial)%rzp
-     
-     END DO
-     this_molecule%xcom = xcom_trial(trial)
-     this_molecule%ycom = ycom_trial(trial)
-     this_molecule%zcom = zcom_trial(trial)
+     IF (l_store_dp_trials) THEN
+             this_molecule%rcom(1) = xcom_trial(trial)
+             this_molecule%rcom(2) = ycom_trial(trial)
+             this_molecule%rcom(3) = zcom_trial(trial)
+             DO i = 1, frag_list(frag_start,is)%natoms
+                this_atom = frag_list(frag_start,is)%atoms(i)
+                these_atoms(this_atom)%rp = rtrial(trial,1:3,i)
+             
+                !these_atoms(this_atom)%rp(1) = rtrial(this_atom,trial)%rp(1)
+                !these_atoms(this_atom)%rp(2) = rtrial(this_atom,trial)%rp(2)
+                !these_atoms(this_atom)%rp(3) = rtrial(this_atom,trial)%rp(3)
+             
+             END DO
+     ELSE
+             IF (l_ortho) THEN
+                     IF (l_widom_cavity_biasing) THEN
+                             this_molecule%rcom(1:3) = &
+                                     (xyz_rand_dp(which_good_trials(trial),1:3) - 0.5_DP - dscom_big_atom)
+                             this_molecule%rcom(1:3) = MERGE(&
+                                     this_molecule%rcom(1:3) - SIGN(1.0_DP,this_molecule%rcom(1:3)), &
+                                     this_molecule%rcom(1:3), &
+                                     ABS(this_molecule%rcom(1:3)) > 0.5_DP)
+                             this_molecule%rcom(1:3) = this_molecule%rcom(1:3) * &
+                                     (/ length_dp(1,1) , length_dp(2,2), length_dp(3,3) /)
+                     ELSE
+                             this_molecule%rcom(1:3) = (xyz_rand_dp(which_good_trials(trial),1:3) - 0.5_DP) * &
+                                     (/ length_dp(1,1) , length_dp(2,2), length_dp(3,3) /)
+                     END IF
+             ELSE
+                     IF (l_widom_cavity_biasing) THEN
+                             this_molecule%rcom(1:3) = &
+                                     (xyz_rand_dp(which_good_trials(trial),1:3) - 0.5_DP - dscom_big_atom)
+                             this_molecule%rcom(1:3) = MERGE(&
+                                     this_molecule%rcom(1:3) - SIGN(1.0_DP,this_molecule%rcom(1:3)), &
+                                     this_molecule%rcom(1:3), &
+                                     ABS(this_molecule%rcom(1:3)) > 0.5_DP)
+                             this_molecule%rcom(1:3) = MATMUL(length_dp, &
+                                     this_molecule%rcom(1:3))
+                     ELSE
+                             this_molecule%rcom(1:3) = MATMUL(length_dp, &
+                                     xyz_rand_dp(which_good_trials(trial),1:3) - 0.5_DP)
+                     END IF
+             END IF
+             DO i = 1, frag_list(frag_start,is)%natoms
+                this_atom = frag_list(frag_start,is)%atoms(i)
+                these_atoms(this_atom)%rp(1) = this_molecule%rcom(1) + drxcom_vec(i)
+                these_atoms(this_atom)%rp(2) = this_molecule%rcom(2) + drycom_vec(i)
+                these_atoms(this_atom)%rp(3) = this_molecule%rcom(3) + drzcom_vec(i)
+             END DO
+     END IF
 
   END IF
   
   ! Mark this fragment is placed
-  e_total = 0.0_DP
   frag_placed(frag_start) = 1
 
   ! We have our first segment placed in the system. 
@@ -675,19 +1163,41 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
   ! Now we will place rest of the segments based on the initial fragment placed
 
   ! Why will the dihedral energy be anything other than zero?
-  CALL Compute_Molecule_Dihedral_Energy(this_im,is,E_dihed)
-  E_total = E_dihed
+  IF (compatibility_mode) THEN
+          CALL Compute_Molecule_Dihedral_Energy(this_im,is,E_dihed)
+          E_total = E_dihed
+  ELSE
+          E_total = 0.0_DP
+  END IF
 
   ! If we've gotten this far, cbmc_overlap = FALSE
+  cbmc_overlap = .FALSE.
   del_overlap = .FALSE.
 
   ! The first fragment in frag_order has already been placed.
   ! We will call Fragment_Placement to place the remaining fragments in
   ! frag_order, starting with fragment frag_order(2)
   frag_start = 2
+  !E_total = 0.0_DP
+  IF (widom_timing) THEN
+       IF (.NOT. omp_flag) CALL CPU_TIME(cbmc_time_e3)
+       !$ cbmc_time_e3 = omp_get_wtime()
+       t12 = cbmc_time_e3 - cbmc_time_e2
+       cbmc_endzone_ins_time = cbmc_endzone_ins_time + t12
+  END IF
   CALL Fragment_Placement(this_box,this_im,is,frag_start,frag_total, &
                           frag_order,frag_placed,this_lambda,E_total,ln_pbias, &
                           nrg_ring_frag_total,cbmc_overlap,del_overlap)
+  IF (widom_timing) THEN
+       IF (.NOT. omp_flag) CALL CPU_TIME(cbmc_time_e4)
+       !$ cbmc_time_e4 = omp_get_wtime()
+       t12 = cbmc_time_e4 - cbmc_time_e3
+       cbmc_fragment_placement_time = cbmc_fragment_placement_time + t12
+       IF (species_list(is)%nfragments > 1) THEN
+               cbmc_dih_time = cbmc_dih_time + t12
+               cbmc_dih_count = cbmc_dih_count + 1_INT64
+       END IF
+  END IF
 
   ! Note that cbmc_overlap may be TRUE and the cbmc_flag will be properly 
   ! assigned FALSE while exiting the code.
@@ -699,10 +1209,74 @@ SUBROUTINE Build_Molecule(this_im,is,this_box,frag_order,this_lambda, &
 
   IF (del_overlap) cbmc_overlap = .TRUE.
 
+  IF (PRESENT(E_interfrag)) E_interfrag = E_total
+
   ! Mark cbmc_flag as FALSE so that intermolecular nonbonded interactions
   ! are properly computed for the molecule
 
   CALL Set_CBMC_Flag(.FALSE.)
+
+  CONTAINS
+          SUBROUTINE Bitcell_Overlap_Detection(nsa,bitcell_int32_vec)
+                  !DIR$ ATTRIBUTES FORCEINLINE :: Bitcell_Overlap_Detection
+                  INTEGER, INTENT(IN) :: nsa
+                  INTEGER(INT32), DIMENSION(0:), CONTIGUOUS, INTENT(IN) :: bitcell_int32_vec
+                  INTEGER :: isa
+                  REAL(SP) :: sxref, syref, szref
+                  !DIR$ LOOP COUNT = 1000
+                  !DIR$ VECTOR ALIGNED
+                  !$OMP SIMD PRIVATE(sxp,syp,szp,bitcell_bit,bitcell_int,this_bitcell_overlap,sxref,syref,szref) 
+                  DO itrial = 1, kappa_ins
+                     sxref = xyz_rand(itrial,1)
+                     syref = xyz_rand(itrial,2)
+                     szref = xyz_rand(itrial,3)
+                     sxp = sxref + dsref_mat(1,1)
+                     syp = syref + dsref_mat(2,1)
+                     szp = szref + dsref_mat(3,1)
+                     ! Note: it is important that the < checks and shifts come before the >= checks and shifts
+                     !       due to floating point rounding.  It's fine for sxp, syp, or szp to be zero or even
+                     !       just barely less than zero due to the behavior of INT(), but if they end up equal to
+                     !       1.0, that can cause bitcell_int to be out of bounds or otherwise severely incorrect.
+                     !       This was found out the hard way.
+                     IF (sxp < 0.0) sxp = sxp + 1.0
+                     IF (syp < 0.0) syp = syp + 1.0
+                     IF (szp < 0.0) szp = szp + 1.0
+                     IF (sxp >= 1.0) sxp = sxp - 1.0
+                     IF (syp >= 1.0) syp = syp - 1.0
+                     IF (szp >= 1.0) szp = szp - 1.0
+                     bitcell_bit = INT(sxp*xlbc)
+                     bitcell_int = ISHFT(bitcell_bit,-5) + &
+                             INT(syp*ylbc)*ybcdf + &
+                             INT(szp*zlbc)*zbcdf
+                     bitcell_bit = IAND(bitcell_bit,31_INT32) ! same as modulo 32
+                     !bitcell_int = box_list(this_box)%bitcell_int32_vec(bitcell_int)
+                     bitcell_int = bitcell_int32_vec(bitcell_int)
+                     this_bitcell_overlap = BTEST(bitcell_int,bitcell_bit)
+                     ! Iteration 2+
+                     DO isa = 2, nsa
+                             sxp = sxref + dsref_mat(1,isa)
+                             syp = syref + dsref_mat(2,isa)
+                             szp = szref + dsref_mat(3,isa)
+                             IF (sxp < 0.0) sxp = sxp + 1.0
+                             IF (syp < 0.0) syp = syp + 1.0
+                             IF (szp < 0.0) szp = szp + 1.0
+                             IF (sxp >= 1.0) sxp = sxp - 1.0
+                             IF (syp >= 1.0) syp = syp - 1.0
+                             IF (szp >= 1.0) szp = szp - 1.0
+                             bitcell_bit = INT(sxp*xlbc)
+                             bitcell_int = ISHFT(bitcell_bit,-5) + &
+                                     INT(syp*ylbc)*ybcdf + &
+                                     INT(szp*zlbc)*zbcdf
+                             bitcell_bit = IAND(bitcell_bit,31_INT32) ! same as modulo 32
+                             bitcell_int = bitcell_int32_vec(bitcell_int)
+                             !bitcell_int = box_list(this_box)%bitcell_int32_vec(bitcell_int)
+                             this_bitcell_overlap = this_bitcell_overlap .OR. BTEST(bitcell_int,bitcell_bit)
+                     END DO
+                     ! Store result
+                     bitcell_overlap(itrial) = this_bitcell_overlap
+                  END DO
+                  !$OMP END SIMD
+          END SUBROUTINE Bitcell_Overlap_Detection
   
 END SUBROUTINE Build_Molecule
 
@@ -755,12 +1329,21 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
   ! Variables associated with the CBMC part
 
   INTEGER :: itrial, trial, frag_type
-  REAL(DP) :: weight(MAX(kappa_ins,kappa_rot,kappa_dih)), rand_no
+  REAL(DP) :: weight(MAX(species_list(is)%kappa_ins,species_list(is)%kappa_rot,species_list(is)%kappa_dih)), rand_no
   REAL(DP) :: e_dihed, E_intra_vdw, E_intra_qq, E_inter_vdw
   REAL(DP) :: E_inter_qq, E_total
-  REAL(DP) :: nrg(MAX(kappa_ins,kappa_rot,kappa_dih)), nrg_kBT, time0, time1, nrg_ring_frag
+  REAL(DP) :: nrg(MAX(species_list(is)%kappa_ins,species_list(is)%kappa_rot,species_list(is)%kappa_dih)), nrg_kBT, time0, time1, nrg_ring_frag
   LOGICAL :: del_overlap
-  TYPE(Atom_Class) :: rtrial(MAXVAL(natoms),0:MAX(kappa_ins,kappa_rot,kappa_dih))
+  TYPE(Atom_Class) :: rtrial(MAXVAL(natoms),0:MAX(species_list(is)%kappa_ins,species_list(is)%kappa_rot,species_list(is)%kappa_dih))
+
+  INTEGER :: kappa_ins, kappa_ins_pad8, kappa_ins_pad32, kappa_dih, kappa_dih_pad8, kappa_dih_pad32
+  kappa_ins = species_list(is)%kappa_ins
+  kappa_ins_pad8 = species_list(is)%kappa_ins_pad8
+  kappa_ins_pad32 = species_list(is)%kappa_ins_pad32
+  kappa_dih = species_list(is)%kappa_dih
+  kappa_dih_pad8 = species_list(is)%kappa_dih_pad8
+  kappa_dih_pad32 = species_list(is)%kappa_dih_pad32
+
 
 
   IF (widom_active) THEN
@@ -825,17 +1408,17 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
 
   DO itrial = 1, kappa_ins
      IF (del_flag .and.(itrial.eq.1)) THEN
-        rtrial(first_atom,itrial)%rxp =  these_atoms(first_atom)%rxp
-        rtrial(first_atom,itrial)%ryp =  these_atoms(first_atom)%ryp
-        rtrial(first_atom,itrial)%rzp =  these_atoms(first_atom)%rzp
+        rtrial(first_atom,itrial)%rp(1) =  these_atoms(first_atom)%rp(1)
+        rtrial(first_atom,itrial)%rp(2) =  these_atoms(first_atom)%rp(2)
+        rtrial(first_atom,itrial)%rp(3) =  these_atoms(first_atom)%rp(3)
      ELSE
         IF (box_list(this_box)%int_box_shape == int_cubic) THEN
            
-           rtrial(first_atom,itrial)%rxp = (0.5_DP - rranf()) * &
+           rtrial(first_atom,itrial)%rp(1) = (0.5_DP - rranf()) * &
                 box_list(this_box)%length(1,1)
-           rtrial(first_atom,itrial)%ryp = (0.5_DP - rranf()) * &
+           rtrial(first_atom,itrial)%rp(2) = (0.5_DP - rranf()) * &
                 box_list(this_box)%length(2,2)
-           rtrial(first_atom,itrial)%rzp = (0.5_DP - rranf()) * &
+           rtrial(first_atom,itrial)%rp(3) = (0.5_DP - rranf()) * &
                 box_list(this_box)%length(3,3)
            
         END IF
@@ -843,15 +1426,15 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
      ENDIF
 
   ! Store them in correct array for the energy call
-     these_atoms(first_atom)%rxp = rtrial(first_atom,itrial)%rxp
-     these_atoms(first_atom)%ryp = rtrial(first_atom,itrial)%ryp
-     these_atoms(first_atom)%rzp = rtrial(first_atom,itrial)%rzp
+     these_atoms(first_atom)%rp(1) = rtrial(first_atom,itrial)%rp(1)
+     these_atoms(first_atom)%rp(2) = rtrial(first_atom,itrial)%rp(2)
+     these_atoms(first_atom)%rp(3) = rtrial(first_atom,itrial)%rp(3)
 
 
      ! Compute the energy of this atom    
-     this_molecule%xcom = rtrial(first_atom,itrial)%rxp 
-     this_molecule%ycom = rtrial(first_atom,itrial)%ryp 
-     this_molecule%zcom = rtrial(first_atom,itrial)%rzp
+     this_molecule%rcom(1) = rtrial(first_atom,itrial)%rp(1) 
+     this_molecule%rcom(2) = rtrial(first_atom,itrial)%rp(2) 
+     this_molecule%rcom(3) = rtrial(first_atom,itrial)%rp(3)
 
      CALL Compute_Max_COM_Distance(this_im,is)
 
@@ -907,9 +1490,9 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
 
 ! Now we have the position of the first bead of   
 ! Assign this position to proper these_atoms
-  these_atoms(first_atom)%rxp = rtrial(first_atom,trial)%rxp
-  these_atoms(first_atom)%ryp = rtrial(first_atom,trial)%ryp
-  these_atoms(first_atom)%rzp = rtrial(first_atom,trial)%rzp
+  these_atoms(first_atom)%rp(1) = rtrial(first_atom,trial)%rp(1)
+  these_atoms(first_atom)%rp(2) = rtrial(first_atom,trial)%rp(2)
+  these_atoms(first_atom)%rp(3) = rtrial(first_atom,trial)%rp(3)
 
 ! NR: End selection of the first bead of the fragment
 
@@ -935,32 +1518,34 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
         nl = (frag_position_library(frag_type)-1) + &
                                            frag_list(1,is)%natoms*(this_fragment-1)+i
 
-                    
-         rtrial(this_atom,0)%rxp = library_coords(nl)%rxp -&
-                                   library_coords(nlo)%rxp +&
-                                   these_atoms(first_atom)%rxp
-         rtrial(this_atom,0)%ryp = library_coords(nl)%ryp -&
-                                   library_coords(nlo)%ryp +& 
-                                   these_atoms(first_atom)%ryp
-         rtrial(this_atom,0)%rzp = library_coords(nl)%rzp -& 
-                                   library_coords(nlo)%rzp +& 
-                                   these_atoms(first_atom)%rzp
-      !  rtrial(this_atom,0)%rxp = frag_coords(i,this_fragment,frag_type)%rxp-&
-      !                            frag_coords(1,this_fragment,frag_type)%rxp+&
-      !                            these_atoms(first_atom)%rxp 
-      !  rtrial(this_atom,0)%ryp = frag_coords(i,this_fragment,frag_type)%ryp-&
-      !                            frag_coords(1,this_fragment,frag_type)%ryp+&
-      !                            these_atoms(first_atom)%ryp
-      !  rtrial(this_atom,0)%rzp = frag_coords(i,this_fragment,frag_type)%rzp-&
-      !                            frag_coords(1,this_fragment,frag_type)%rzp+&
-      !                            these_atoms(first_atom)%rzp 
+         rtrial(this_atom,0)%rp(1:3) = library_coords(1:3,nl) - &
+                 library_coords(1:3,nlo) + &
+                 these_atoms(first_atom)%rp(1:3)
+         !rtrial(this_atom,0)%rp(1) = library_coords(nl)%rp(1) -&
+         !                          library_coords(nlo)%rp(1) +&
+         !                          these_atoms(first_atom)%rp(1)
+         !rtrial(this_atom,0)%rp(2) = library_coords(nl)%rp(2) -&
+         !                          library_coords(nlo)%rp(2) +& 
+         !                          these_atoms(first_atom)%rp(2)
+         !rtrial(this_atom,0)%rp(3) = library_coords(nl)%rp(3) -& 
+         !                          library_coords(nlo)%rp(3) +& 
+         !                          these_atoms(first_atom)%rp(3)
+      !  rtrial(this_atom,0)%rp(1) = frag_coords(i,this_fragment,frag_type)%rp(1)-&
+      !                            frag_coords(1,this_fragment,frag_type)%rp(1)+&
+      !                            these_atoms(first_atom)%rp(1) 
+      !  rtrial(this_atom,0)%rp(2) = frag_coords(i,this_fragment,frag_type)%rp(2)-&
+      !                            frag_coords(1,this_fragment,frag_type)%rp(2)+&
+      !                            these_atoms(first_atom)%rp(2)
+      !  rtrial(this_atom,0)%rp(3) = frag_coords(i,this_fragment,frag_type)%rp(3)-&
+      !                            frag_coords(1,this_fragment,frag_type)%rp(3)+&
+      !                            these_atoms(first_atom)%rp(3) 
      END DO
   ELSE
      DO i = 1,frag_list(frag_start,is)%natoms
         this_atom = frag_list(frag_start,is)%atoms(i)
-        rtrial(this_atom,0)%rxp = these_atoms(this_atom)%rxp
-        rtrial(this_atom,0)%ryp = these_atoms(this_atom)%ryp
-        rtrial(this_atom,0)%rzp = these_atoms(this_atom)%rzp
+        rtrial(this_atom,0)%rp(1) = these_atoms(this_atom)%rp(1)
+        rtrial(this_atom,0)%rp(2) = these_atoms(this_atom)%rp(2)
+        rtrial(this_atom,0)%rp(3) = these_atoms(this_atom)%rp(3)
      END DO
   END IF
 
@@ -970,30 +1555,30 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
 ! Rotate this fragment about the first bead along different axes
 ! select one from trials
 
-  DO itrial = 1, kappa_rot
+  DO itrial = 1, species_list(is)%kappa_rot
      IF(del_flag .and. (itrial.eq.1)) THEN
        DO i=1,frag_list(frag_start,is)%natoms
           this_atom = frag_list(frag_start,is)%atoms(i)
-          rtrial(this_atom,itrial)%rxp = these_atoms(this_atom)%rxp
-          rtrial(this_atom,itrial)%ryp = these_atoms(this_atom)%ryp
-          rtrial(this_atom,itrial)%rzp = these_atoms(this_atom)%rzp
+          rtrial(this_atom,itrial)%rp(1) = these_atoms(this_atom)%rp(1)
+          rtrial(this_atom,itrial)%rp(2) = these_atoms(this_atom)%rp(2)
+          rtrial(this_atom,itrial)%rp(3) = these_atoms(this_atom)%rp(3)
        END DO
        CALL Get_COM(this_im,is)
     ELSE
        DO i = 1,frag_list(frag_start,is)%natoms
           this_atom = frag_list(frag_start,is)%atoms(i)  
-          these_atoms(this_atom)%rxp = rtrial(this_atom,0)%rxp
-          these_atoms(this_atom)%ryp = rtrial(this_atom,0)%ryp
-          these_atoms(this_atom)%rzp = rtrial(this_atom,0)%rzp
+          these_atoms(this_atom)%rp(1) = rtrial(this_atom,0)%rp(1)
+          these_atoms(this_atom)%rp(2) = rtrial(this_atom,0)%rp(2)
+          these_atoms(this_atom)%rp(3) = rtrial(this_atom,0)%rp(3)
        ENDDO
        CALL Rotate_XYZ_Axes(this_im,is,frag_start,.true.,.true.,.true.,1)
        CALL Get_COM(this_im,is)
        !       CALL Fold_Molecule(this_im,is,this_box) 
        DO i = 1,frag_list(frag_start,is)%natoms
           this_atom = frag_list(frag_start,is)%atoms(i)
-          rtrial(this_atom,itrial)%rxp = these_atoms(this_atom)%rxp
-          rtrial(this_atom,itrial)%ryp = these_atoms(this_atom)%ryp
-          rtrial(this_atom,itrial)%rzp = these_atoms(this_atom)%rzp
+          rtrial(this_atom,itrial)%rp(1) = these_atoms(this_atom)%rp(1)
+          rtrial(this_atom,itrial)%rp(2) = these_atoms(this_atom)%rp(2)
+          rtrial(this_atom,itrial)%rp(3) = these_atoms(this_atom)%rp(3)
        END DO
        
     ENDIF
@@ -1006,13 +1591,13 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
 !     DO i = 1,frag_list(frag_start,is)%natoms
 !           this_atom = frag_list(frag_start,is)%atoms(i)
 !           IF (this_atom .eq. 1) then 
-!               Write(8,*) 'O', these_atoms(this_atom)%rxp, &
-!                               these_atoms(this_atom)%ryp, &
-!                               these_atoms(this_atom)%rzp
+!               Write(8,*) 'O', these_atoms(this_atom)%rp(1), &
+!                               these_atoms(this_atom)%rp(2), &
+!                               these_atoms(this_atom)%rp(3)
 !           ELSE
-!               Write(8,*) 'C', these_atoms(this_atom)%rxp, &
-!                               these_atoms(this_atom)%ryp, &
-!                               these_atoms(this_atom)%rzp
+!               Write(8,*) 'C', these_atoms(this_atom)%rp(1), &
+!                               these_atoms(this_atom)%rp(2), &
+!                               these_atoms(this_atom)%rp(3)
 !           END IF
 !     END DO  
 
@@ -1050,7 +1635,7 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
      IF (itrial > 1 ) weight(itrial) = weight(itrial-1) + weight(itrial)
   ENDDO ! End the generation of trial orientations and associating waight
 
-  IF (weight(kappa_rot) == 0.0_DP) THEN
+  IF (weight(species_list(is)%kappa_rot) == 0.0_DP) THEN
       cbmc_overlap = .TRUE.
       CALL Set_CBMC_Flag(.FALSE.)
       RETURN
@@ -1060,8 +1645,8 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
       trial = 1
   ELSE
    ! Choose one from Golden sampling for an insertion move
-      rand_no = rranf() * weight(kappa_rot)
-      DO i = 1, kappa_rot
+      rand_no = rranf() * weight(species_list(is)%kappa_rot)
+      DO i = 1, species_list(is)%kappa_rot
          IF ( rand_no < weight(i)) EXIT
       END DO
       trial = i
@@ -1069,20 +1654,20 @@ SUBROUTINE Build_Rigid_Fragment(this_im,is,this_box,frag_order,this_lambda, &
 
 !  Write(8,*) 'selected', trial
 
-  ln_pbias = ln_pbias - beta(this_box) * nrg(trial) - DLOG(weight(kappa_rot))
+  ln_pbias = ln_pbias - beta(this_box) * nrg(trial) - DLOG(weight(species_list(is)%kappa_rot))
 
 ! Assign positions to these_atoms
   DO i=1,frag_list(frag_start,is)%natoms
      this_atom =  frag_list(frag_start,is)%atoms(i)
-     these_atoms(this_atom)%rxp = rtrial(this_atom,trial)%rxp
-     these_atoms(this_atom)%ryp = rtrial(this_atom,trial)%ryp
-     these_atoms(this_atom)%rzp = rtrial(this_atom,trial)%rzp
+     these_atoms(this_atom)%rp(1) = rtrial(this_atom,trial)%rp(1)
+     these_atoms(this_atom)%rp(2) = rtrial(this_atom,trial)%rp(2)
+     these_atoms(this_atom)%rp(3) = rtrial(this_atom,trial)%rp(3)
   END DO
 
 ! IF ( .NOT. del_flag) THEN
-! write(*,*) these_atoms%rxp
-! write(*,*) these_atoms%rzp
-! write(*,*) these_atoms%ryp
+! write(*,*) these_atoms%rp(1)
+! write(*,*) these_atoms%rp(3)
+! write(*,*) these_atoms%rp(2)
 ! NR: Now we have place the first fragment.
 ! write(*,*)
 !	End if
@@ -1170,6 +1755,15 @@ SUBROUTINE Cut_Regrow(this_im,is,frag_live,frag_dead,frag_order,frag_total, &
   TYPE(Molecule_Class), POINTER :: this_molecule
   TYPE(Atom_Class), POINTER :: these_atoms(:)
 
+  INTEGER :: kappa_ins, kappa_ins_pad8, kappa_ins_pad32, kappa_dih, kappa_dih_pad8, kappa_dih_pad32
+  kappa_ins = species_list(is)%kappa_ins
+  kappa_ins_pad8 = species_list(is)%kappa_ins_pad8
+  kappa_ins_pad32 = species_list(is)%kappa_ins_pad32
+  kappa_dih = species_list(is)%kappa_dih
+  kappa_dih_pad8 = species_list(is)%kappa_dih_pad8
+  kappa_dih_pad32 = species_list(is)%kappa_dih_pad32
+
+
 
   IF (widom_active) THEN
           this_molecule => widom_molecule
@@ -1364,6 +1958,15 @@ SUBROUTINE Fragment_Order(this_frag,is,frag_total,frag_order,live,ln_pseq)
   INTEGER :: hanging_connections(nfragments(is)) ! to hold ids of frags ready to add
   REAL(DP) :: randno, prob(nfragments(is)), cum_prob(nfragments(is))
 
+  INTEGER :: kappa_ins, kappa_ins_pad8, kappa_ins_pad32, kappa_dih, kappa_dih_pad8, kappa_dih_pad32
+  kappa_ins = species_list(is)%kappa_ins
+  kappa_ins_pad8 = species_list(is)%kappa_ins_pad8
+  kappa_ins_pad32 = species_list(is)%kappa_ins_pad32
+  kappa_dih = species_list(is)%kappa_dih
+  kappa_dih_pad8 = species_list(is)%kappa_dih_pad8
+  kappa_dih_pad32 = species_list(is)%kappa_dih_pad32
+
+
   !*****************************************************************************
   !   Step 1) Determine the number & identity of hanging connections
   !*****************************************************************************
@@ -1485,35 +2088,99 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
   INTEGER :: anchor_frag_connect, atom_ifrag, atom_frag_connect, ii, trial
   INTEGER :: frag_type, dumcount
 
-  INTEGER :: counted(nfragments(is)), connection(nfragments(is)), atom_id(natoms(is))
+  INTEGER, DIMENSION(nfragments(is)) :: counted, connection
 
   INTEGER :: ispecies, jmol, k
 
-  INTEGER :: nl, nlo      ! number of the line where start the x,y,x coords of
-                          ! config and fragment randomly selected
+  INTEGER :: nl, nl_base ! index of library_coords column immediately before the start of the
+                     ! coordinates of the chosen fragment conformation
 
   INTEGER :: total_frags, this_fragment, nfrag_atoms
   REAL(DP) :: x_this,y_this,z_this, vec1(3), vec2(3), aligner_ifrag(3,3)
   REAL(DP) :: hanger_ifrag(3,3), aligner_frag_connect(3,3), hanger_frag_connect(3,3)
 
   REAL(DP) :: tempx, tempy, tempz, theta, e_dihed
-  REAL(DP) :: weight(kappa_dih), nrg(kappa_dih)
+  REAL(DP) :: weight(species_list(is)%kappa_dih_pad8), nrg_dp_vec(species_list(is)%kappa_dih_pad8)
+  REAL(DP) :: oldweight(species_list(is)%kappa_dih_pad8)
   REAL(DP) :: E_intra_qq, E_intra_vdw, prob_pick
-  REAL(DP) :: e_prev, temp_var, E_ang, E_inter_vdw, E_inter_qq
+  REAL(DP) :: temp_var, E_ang, E_inter_vdw, E_inter_qq
   REAL(DP) :: nrg_kBT, p_acc, nrg_intra_vdw, nrg_intra_qq, nrg_inter_vdw, nrg_inter_qq
   REAL(DP) :: trial_weight
-  REAL(DP) :: nrg_ring_frag, nrg_dihed(kappa_dih)
+  REAL(DP) :: nrg_ring_frag
 
-  LOGICAL :: overlap, overlap_trial(kappa_dih)
+  LOGICAL :: overlap, overlap_trial(species_list(is)%kappa_dih_pad8)
+  LOGICAL :: old_overlap_trial(species_list(is)%kappa_dih)
   LOGICAL, DIMENSION(natoms(is)) :: grown_exist, new_exist, combined_exist
 
   CHARACTER :: this_file*120, element*1 
 
-  TYPE(Atom_Class) :: config_list(natoms(is))
-  TYPE(Atom_Class) :: config_temp_list(natoms(is),kappa_dih)
+  TYPE(Atom_Class) :: config_list(natoms(is)) ! superceded by config_list_rp and new_rp
+  TYPE(Atom_Class) :: config_temp_list(natoms(is),species_list(is)%kappa_dih) ! superceded by trial_atom_rp
 
   TYPE(Molecule_Class), POINTER :: this_molecule
-  TYPE(Atom_Class), POINTER :: these_atoms(:)
+  TYPE(Atom_Class), POINTER, CONTIGUOUS :: these_atoms(:)
+  INTEGER :: ifrag_outer, itrial, itrial_orig
+
+  INTEGER :: ifrag_natoms
+  INTEGER :: clrp_anchor_ifrag, clrp_anchor_frag_connect
+  INTEGER, DIMENSION(natoms(is)) :: clrp_atom_id, atom_id, inv_atom_id
+
+  INTEGER :: i_dim, ja, ia_new
+
+  REAL(DP), DIMENSION(library_coords_dim1,natoms(is)) :: config_list_rp
+  REAL(DP), DIMENSION(3,natoms(is)) :: new_rp
+  REAL(SP), DIMENSION(species_list(is)%kappa_dih_pad8,3,natoms(is)) :: trial_atom_rp, trial_atom_rsp
+  INTEGER, DIMENSION(species_list(is)%kappa_dih_pad8,3,natoms(is)) :: trial_cell_coords
+  REAL(DP) :: theta_step, theta0, inv_nfrag_atoms_dp, max_dcomsq, jcharge_dp
+
+  REAL(SP), DIMENSION(3,3) :: hanger_frag_connect_sp, inv_H_sp, H_sp
+  INTEGER :: zbcdf,ybcdf,zlbc,ylbc,xlbc
+  REAL(SP), DIMENSION(species_list(is)%kappa_dih_pad8) :: sintheta_sp_vec, costheta_sp_vec, nrg_sp_vec, nrg_dihed_vec
+  REAL(DP) :: costheta0, sintheta0, costheta, sintheta
+  REAL(SP) :: costheta0_sp, sintheta0_sp, sintheta_sp, costheta_sp, overlap_nrg, nrg_sp
+  REAL(SP), DIMENSION(3) :: rp00, rp0, inv_l_vec, l_sp_vec, this_atom_rp
+  REAL(SP) :: x0, y0, z0, tempy_sp, tempz_sp, rxp, ryp, rzp, hl_sp, irsp
+  REAL(SP) :: sxp, syp, szp, rlc, l_sp, inv_l, rcutsq, jrp(3), vdwscale, chargescale, cfqq
+  REAL(SP) :: rsq, drp, rsq_vec(species_list(is)%kappa_dih_pad8), rij
+  REAL(SP) :: eps, sigsq, sigbyr2, sigbyr6, negsigsq, negsigbyr2, rterm
+  REAL(SP) :: epsig_n, epsig_m, mie_n, mie_m, lnrsq, invr, alpha_ewald_sp
+  REAL(SP) :: const1, const2, const3, const4, shift_p1, shift_p2, dsf_const, dsf_factor2_sp, sf_const1, sf_const2
+  REAL(SP) :: roffsq_rijsq
+  REAL(SP) :: anchor_frag_connect_rp(3), anchor_ifrag_rp(3), atom3_rp(3), m_sp(3)
+  INTEGER :: atom1, atom2, atom3, atom4, dihedral_atoms(4), idihed, idihed_rb
+  REAL(DP) :: r32_base(3), r32_dp(3), vecdp(3), m_dp(3), last_phi_dp, nrg_dp, cweight
+  REAL(SP) :: m_normsq, rx32, ry32, rz32, rx34, ry34, rz34, ni, cosphi, invnorm
+  REAL(SP) :: rb_c(5), cosphi2, cosphi3, cosphi4, cosphi5
+  REAL(SP), DIMENSION(species_list(is)%kappa_dih_pad8) :: cosphi_vec, phi_vec, nrg_sp_inter
+  REAL(SP) :: last_phi, phi_step_sp, theta_step_sp, a0, phi
+  REAL(DP) :: sinlintheta, coslintheta, sincosmat(2,2)
+  INTEGER :: bitcell_bit, bitcell_int, this_atom_ci(3), jtype, itype
+  INTEGER :: this_int_vdw_style, this_int_vdw_sum_style, this_int_charge_sum_style
+  LOGICAL :: l_ortho, l_widom_cells, l_get_bitcell, ij_get_coul
+  LOGICAL(1), DIMENSION(species_list(is)%kappa_dih_pad32,natoms(is)) :: bitcell_overlap
+  INTEGER :: n_good_trials, n_good_trials_pad8, n_good_trials_old
+  INTEGER :: anchor_atoms(2), anchor_atoms_reversed(2)
+  INTEGER, DIMENSION(species_list(is)%kappa_dih_pad8) :: which_good_trials
+
+  REAL(DP) :: rijsq_dp, Eij_intra_vdw, Eij_intra_qq, Eij_inter_qq, Eij_inter_vdw
+  LOGICAL :: get_vdw, get_qq, l_skip_dihed_vec(species_list(is)%ndihedrals_energetic)
+  INTEGER :: interfrag_nonbond_pairlist(2,natoms(is)*natoms(is)/(2*species_list(is)%nfragments))
+  INTEGER :: n_interfrag_nonbond_pairs
+
+  REAL(DP) :: nrg_vdw_dp, nrg_qq_dp, e_prev
+
+  REAL(DP), DIMENSION(species_list(is)%kappa_dih) :: nrg, nrg_dihed, weightnorm, oldweightnorm
+  REAL(DP) :: ran_no, prob_pick_compat, ln_pbias_compat
+
+  INTEGER :: kappa_ins, kappa_ins_pad8, kappa_ins_pad32, kappa_dih, kappa_dih_pad8, kappa_dih_pad32
+  LOGICAL :: l_checkoverlap
+  kappa_ins = species_list(is)%kappa_ins
+  kappa_ins_pad8 = species_list(is)%kappa_ins_pad8
+  kappa_ins_pad32 = species_list(is)%kappa_ins_pad32
+  kappa_dih = species_list(is)%kappa_dih
+  kappa_dih_pad8 = species_list(is)%kappa_dih_pad8
+  kappa_dih_pad32 = species_list(is)%kappa_dih_pad32
+
 
   IF (widom_active) THEN
           this_molecule => widom_molecule
@@ -1522,24 +2189,108 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
           this_molecule => molecule_list(this_im,is)
           these_atoms => atom_list(1:natoms(is),this_im,is)
   END IF
+  overlap_nrg = max_kBT/beta(this_box)
+  this_int_vdw_sum_style = int_vdw_sum_style(this_box)
+  n_interfrag_nonbond_pairs = 0
+
+  SELECT CASE(this_int_vdw_sum_style)
+  CASE(vdw_cut_switch)
+          const1 = switch_factor1(this_box)*switch_factor2(this_box)
+          const2 = switch_factor1(this_box)*2
+          const3 = ron_switch_sq(this_box)
+          const4 = roff_switch_sq(this_box)
+  END SELECT
+
+  IF (compatibility_mode) THEN
+          e_prev = e_total
+          ln_pbias_compat = ln_pbias
+  END IF
 
 
 !  ! DEBUGging variables
 !  INTEGER :: M_XYZ_unit
 
 
-  config_list(:)%rxp = 0.0_DP
-  config_list(:)%ryp = 0.0_DP
-  config_list(:)%rzp = 0.0_DP
-  dumcount = 0
-  e_prev = e_total
-  weight(:) = 0.0_DP
+  l_ortho = box_list(this_box)%int_box_shape <= int_ortho
+  l_widom_cells = widom_active .AND. l_sectors
+  l_get_bitcell = widom_active .AND. bitcell_flag
+  !IF (frag_total > 1 .AND. i_mcstep>0) THEN
+  !        WRITE(*,*) this_im, frag_start, frag_total
+  !        WRITE(*,*) frag_order
+  !        WRITE(*,*) these_atoms%exist
+  !END IF
 
-  DO i = frag_start, frag_total
+  l_skip_dihed_vec = .FALSE. ! currently marks dihedral as already being computed and able to be skipped early
+  ! l_skip_dihed_vec gets inverted later to mark which dihedrals aren't interfragment dihedrals
+  !DIR$ LOOP COUNT = 0, 1, 2, 3
+  DO ifrag_outer = frag_start, frag_total
+     config_list_rp = 0.0_DP
+     dumcount = 0
+     weight(:) = 0.0_DP
 
-     ifrag = frag_order(i)
+     ifrag = frag_order(ifrag_outer)
 
      grown_exist = these_atoms%exist
+     ! Note that there has to be only one connection of ifrag that is already 
+     ! placed. Let us find out which fragment that is
+
+     total_connect = 0
+     connection(:) = 0
+
+     DO j = 1, frag_list(ifrag,is)%nconnect
+        
+        frag_connect = frag_list(ifrag,is)%frag_connect(j)
+        
+        IF (frag_placed(frag_connect) == 1 ) THEN
+           total_connect = total_connect + 1
+           connection(total_connect) = frag_connect
+        END IF
+        
+     END DO
+
+     IF (total_connect > 1 ) THEN
+        err_msg = ''
+        err_msg(1) = 'More than one connections of' // &
+                     TRIM(Int_To_String(ifrag)) // 'exist'
+        CALL Clean_Abort(err_msg,'Fragment_Placement')
+     END IF
+     
+     ! If here then only one connection found
+
+     frag_connect = connection(1)
+     ! find anchor atom ids for both the fragments
+
+     CALL Get_Common_Fragment_Atoms(is,ifrag,frag_connect,anchor_ifrag, &
+             anchor_frag_connect)
+     nfrag_atoms = 0
+     atom_id(:) = 0
+     ifrag_natoms = frag_list(ifrag,is)%natoms
+     
+     DO j = 1, ifrag_natoms
+       
+        this_atom = frag_list(ifrag,is)%atoms(j)
+        IF (this_atom .EQ. anchor_ifrag) THEN
+                clrp_anchor_ifrag = j
+        ELSE IF (this_atom .EQ. anchor_frag_connect) THEN
+                clrp_anchor_frag_connect = j
+        ELSE
+                nfrag_atoms = nfrag_atoms + 1
+                atom_id(nfrag_atoms) = this_atom
+                clrp_atom_id(nfrag_atoms) = j
+        END IF
+        
+        !IF ( (this_atom /= anchor_ifrag) .AND. &
+        !     (this_atom /= anchor_frag_connect)) THEN
+        !   nfrag_atoms = nfrag_atoms + 1
+        !   atom_id(nfrag_atoms) = this_atom
+        !END IF
+        
+     END DO
+    
+     ! Note that frag_connect already has anchor of ifrag placed both for fixed 
+     ! and variable bond length cases so all we have to do is obtain the 
+     ! coordinates of these atoms in the configuration and align it to their 
+     ! coordinates in the simulation box.
      
      !**************************************************************************
      ! Step 1) Select a fragment conformation
@@ -1550,14 +2301,16 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
         ! For a deletion move, use the existing conformation for each additional
         ! fragment
 
-        DO j = 1, frag_list(ifrag,is)%natoms
+        DO j = 1, ifrag_natoms
            this_atom = frag_list(ifrag,is)%atoms(j)
 
            these_atoms(this_atom)%exist = .TRUE.
+           config_list_rp(1:3,j) = these_atoms(this_atom)%rp(1:3)
+           IF (compatibility_mode) config_list(this_atom)%rp = these_atoms(this_atom)%rp
 
-           config_list(this_atom)%rxp = these_atoms(this_atom)%rxp
-           config_list(this_atom)%ryp = these_atoms(this_atom)%ryp
-           config_list(this_atom)%rzp = these_atoms(this_atom)%rzp
+           !config_list(this_atom)%rp(1) = these_atoms(this_atom)%rp(1)
+           !config_list(this_atom)%rp(2) = these_atoms(this_atom)%rp(2)
+           !config_list(this_atom)%rp(3) = these_atoms(this_atom)%rp(3)
         END DO
 
         ! For a ring fragment, calculate the fragment intramolecular energy
@@ -1576,32 +2329,50 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
 
         total_frags = frag_list(ifrag,is)%nconfig
         this_fragment = INT(rranf() * total_frags) + 1
+        IF (compatibility_mode) this_fragment = INT(rranf() * total_frags) + 1
         
-        ! Select a fragment conformation from the reservoir
-        ! The reservoir was populated with a Boltzmann distribution, so now we
-        ! can pull from it with a uniform probability
-        
-        this_fragment = INT(rranf() * total_frags) + 1
         ! Read in the coordinates
         frag_type = frag_list(ifrag,is)%type
 
-        DO j = 1, frag_list(ifrag,is)%natoms
-           
-           this_atom = frag_list(ifrag,is)%atoms(j)
-           
-           these_atoms(this_atom)%exist = .TRUE.
-           nl = (frag_position_library(frag_type)-1) + &
-                        frag_list(ifrag,is)%natoms*(this_fragment-1)+ j
-           config_list(this_atom)%rxp = &
-                 library_coords(nl)%rxp
-              !frag_coords(j,this_fragment,frag_type)%rxp
-           config_list(this_atom)%ryp = &
-                 library_coords(nl)%ryp
-              !frag_coords(j,this_fragment,frag_type)%ryp
-           config_list(this_atom)%rzp = &
-                 library_coords(nl)%rzp
-              !frag_coords(j,this_fragment,frag_type)%rzp
-        END DO
+        nl_base = (frag_position_library(frag_type)-1) + &
+                     frag_list(ifrag,is)%natoms*(this_fragment-1)
+        config_list_rp(:,1:ifrag_natoms) = &
+                library_coords(:,nl_base+1:nl_base+ifrag_natoms)
+        these_atoms(frag_list(ifrag,is)%atoms(1:ifrag_natoms))%exist = .TRUE.
+        IF (compatibility_mode) THEN
+                DO j = 1, frag_list(ifrag,is)%natoms
+                   nl = nl_base + j
+                   
+                   this_atom = frag_list(ifrag,is)%atoms(j)
+                   
+                   these_atoms(this_atom)%exist = .TRUE.
+                   config_list(this_atom)%rp = library_coords(1:3,nl)
+                   !config_list(this_atom)%rp(1) = &
+                   !      library_coords(1,nl)
+                   !   !frag_coords(j,this_fragment,frag_type)%rp(1)
+                   !config_list(this_atom)%rp(2) = &
+                   !      library_coords(2,nl)
+                   !   !frag_coords(j,this_fragment,frag_type)%rp(2)
+                   !config_list(this_atom)%rp(3) = &
+                   !      library_coords(3,nl)
+                   !   !frag_coords(j,this_fragment,frag_type)%rp(3)
+                END DO
+        END IF
+        !DO j = 1, frag_list(ifrag,is)%natoms
+        !   
+        !   this_atom = frag_list(ifrag,is)%atoms(j)
+        !   
+        !   these_atoms(this_atom)%exist = .TRUE.
+        !   config_list(this_atom)%rp(1) = &
+        !         library_coords(nl)%rp(1)
+        !      !frag_coords(j,this_fragment,frag_type)%rp(1)
+        !   config_list(this_atom)%rp(2) = &
+        !         library_coords(nl)%rp(2)
+        !      !frag_coords(j,this_fragment,frag_type)%rp(2)
+        !   config_list(this_atom)%rp(3) = &
+        !         library_coords(nl)%rp(3)
+        !      !frag_coords(j,this_fragment,frag_type)%rp(3)
+        !END DO
      
         ! For a ring fragment, access the fragment intramolecular energy 
 
@@ -1610,69 +2381,19 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
                               ! nrg_frag(this_fragment,frag_type)
            nrg_ring_frag_tot = nrg_ring_frag_tot + nrg_ring_frag
         END IF
-        
 !why was this ever here?        CLOSE(UNIT=11)
 
      END IF
      combined_exist = these_atoms%exist
      new_exist = combined_exist .AND. .NOT. grown_exist
+     these_atoms%exist = new_exist
      
      !**************************************************************************
      ! Step 2) Align the fragment to the growing molecule
      !**************************************************************************
      !
 
-     ! Note that there has to be only one connection of ifrag that is already 
-     ! placed. Let us find out which fragment that is
-
-     total_connect = 0
-     connection(:) = 0
-
-     DO j = 1, frag_list(ifrag,is)%nconnect
-        
-        frag_connect = frag_list(ifrag,is)%frag_connect(j)
-        
-        IF (frag_placed(frag_connect) == 1 ) THEN
-           total_connect = total_connect + 1
-           connection(total_connect) = frag_connect
-        END IF
-        
-     END DO
-
-     IF (total_connect > 1 ) THEN
-        err_msg = ''
-        err_msg(1) = 'More than one connections of' // &
-                     TRIM(Int_To_String(ifrag)) // 'exist'
-        CALL Clean_Abort(err_msg,'Fragment_Placement')
-     END IF
-     
-     ! If here then only one connection found
-
-     frag_connect = connection(1)
-    
-     ! Note that frag_connect already has anchor of ifrag placed both for fixed 
-     ! and variable bond length cases so all we have to do is obtain the 
-     ! coordinates of these atoms in the configuration and align it to their 
-     ! coordinates in the simulation box.
-     
-     ! find anchor atom ids for both the fragments
-
-     CALL Get_Common_Fragment_Atoms(is,ifrag,frag_connect,anchor_ifrag, &
-             anchor_frag_connect)
-     nfrag_atoms = 0
-     atom_id(:) = 0
      
-     DO j = 1, frag_list(ifrag,is)%natoms
-       
-        this_atom = frag_list(ifrag,is)%atoms(j)
-        
-        IF ( (this_atom /= anchor_ifrag) .AND. &
-             (this_atom /= anchor_frag_connect)) THEN
-           nfrag_atoms = nfrag_atoms + 1
-           atom_id(nfrag_atoms) = this_atom
-        END IF
-        
-     END DO
      
 !     anchor_ifrag = frag_list(ifrag,is)%anchor(1)
 !     anchor_frag_connect = frag_list(frag_connect,is)%anchor(1)
@@ -1680,16 +2401,16 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
      ! Find one atom of ifrag and frag_connect that will be used for generating 
      ! xy plane
 
-     DO j = 1, frag_list(ifrag,is)%natoms
+     !DO j = 1, frag_list(ifrag,is)%natoms
 
-        this_atom = frag_list(ifrag,is)%atoms(j)
+     !   this_atom = frag_list(ifrag,is)%atoms(j)
 
-        IF ( (this_atom /= anchor_ifrag) .AND. &
-             (this_atom /= anchor_frag_connect)) EXIT
+     !   IF ( (this_atom /= anchor_ifrag) .AND. &
+     !        (this_atom /= anchor_frag_connect)) EXIT
 
-     END DO
+     !END DO
 
-     atom_ifrag = this_atom
+     !atom_ifrag = this_atom
 
      ! Similarly for frag_connect
 
@@ -1719,46 +2440,56 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
 
      ! for ifrag
 
-     vec1(1) = config_list(anchor_ifrag)%rxp &
-             - config_list(anchor_frag_connect)%rxp
-     vec1(2) = config_list(anchor_ifrag)%ryp &
-             - config_list(anchor_frag_connect)%ryp
-     vec1(3) = config_list(anchor_ifrag)%rzp &
-             - config_list(anchor_frag_connect)%rzp
+     new_rp(1:3,1:nfrag_atoms) = config_list_rp(1:3,clrp_atom_id(1:nfrag_atoms))
 
+     vec1(1:3) = config_list_rp(1:3,clrp_anchor_ifrag) - config_list_rp(1:3,clrp_anchor_frag_connect)
+     vec2(1:3) = new_rp(1:3,1) - config_list_rp(1:3,clrp_anchor_frag_connect)
 
-     vec2(1) = config_list(atom_ifrag)%rxp &
-             - config_list(anchor_frag_connect)%rxp
-     vec2(2) = config_list(atom_ifrag)%ryp &
-             - config_list(anchor_frag_connect)%ryp
-     vec2(3) = config_list(atom_ifrag)%rzp &
-             - config_list(anchor_frag_connect)%rzp
+     !vec1(1) = config_list(anchor_ifrag)%rp(1) &
+     !        - config_list(anchor_frag_connect)%rp(1)
+     !vec1(2) = config_list(anchor_ifrag)%rp(2) &
+     !        - config_list(anchor_frag_connect)%rp(2)
+     !vec1(3) = config_list(anchor_ifrag)%rp(3) &
+     !        - config_list(anchor_frag_connect)%rp(3)
+
+
+     !vec2(1) = config_list(atom_ifrag)%rp(1) &
+     !        - config_list(anchor_frag_connect)%rp(1)
+     !vec2(2) = config_list(atom_ifrag)%rp(2) &
+     !        - config_list(anchor_frag_connect)%rp(2)
+     !vec2(3) = config_list(atom_ifrag)%rp(3) &
+     !        - config_list(anchor_frag_connect)%rp(3)
        
 
      CALL Get_Aligner_Hanger(vec1, vec2, aligner_ifrag,hanger_ifrag)
      
      ! Calculate this only for inserting a molecule
-     
+     ! The following line was moved out of the following IF statement because it's used again later
+     r32_base = these_atoms(anchor_ifrag)%rp(1:3) - &
+             these_atoms(anchor_frag_connect)%rp(1:3)
      IF ( .NOT. del_flag) THEN
         
         
-        vec1(1) = these_atoms(anchor_ifrag)%rxp - &
-             these_atoms(anchor_frag_connect)%rxp
-        
-        vec1(2) = these_atoms(anchor_ifrag)%ryp - &
-             these_atoms(anchor_frag_connect)%ryp
-        
-        vec1(3) = these_atoms(anchor_ifrag)%rzp - &
-             these_atoms(anchor_frag_connect)%rzp
+        vec1(1:3) = r32_base
+        !vec1(1) = these_atoms(anchor_ifrag)%rp(1) - &
+        !     these_atoms(anchor_frag_connect)%rp(1)
+        !
+        !vec1(2) = these_atoms(anchor_ifrag)%rp(2) - &
+        !     these_atoms(anchor_frag_connect)%rp(2)
+        !
+        !vec1(3) = these_atoms(anchor_ifrag)%rp(3) - &
+        !     these_atoms(anchor_frag_connect)%rp(3)
+        vec2(1:3) = these_atoms(atom_frag_connect)%rp(1:3) - &
+                these_atoms(anchor_frag_connect)%rp(1:3)
         
-        vec2(1) = these_atoms(atom_frag_connect)%rxp - &
-             these_atoms(anchor_frag_connect)%rxp
-        
-        vec2(2) = these_atoms(atom_frag_connect)%ryp - &
-             these_atoms(anchor_frag_connect)%ryp
-        
-        vec2(3) = these_atoms(atom_frag_connect)%rzp - &
-             these_atoms(anchor_frag_connect)%rzp     
+        !vec2(1) = these_atoms(atom_frag_connect)%rp(1) - &
+        !     these_atoms(anchor_frag_connect)%rp(1)
+        !
+        !vec2(2) = these_atoms(atom_frag_connect)%rp(2) - &
+        !     these_atoms(anchor_frag_connect)%rp(2)
+        !
+        !vec2(3) = these_atoms(atom_frag_connect)%rp(3) - &
+        !     these_atoms(anchor_frag_connect)%rp(3)     
      
 
         CALL Get_Aligner_Hanger(vec1, vec2, aligner_frag_connect,hanger_frag_connect)
@@ -1771,32 +2502,35 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
      ! Apply aligner of ifrag
      
 
-     tempx = config_list(anchor_frag_connect)%rxp
-     tempy = config_list(anchor_frag_connect)%ryp
-     tempz = config_list(anchor_frag_connect)%rzp
-     
-     config_list(:)%rxp = config_list(:)%rxp - tempx
-     config_list(:)%ryp = config_list(:)%ryp - tempy
-     config_list(:)%rzp = config_list(:)%rzp - tempz
+     !tempx = config_list(anchor_frag_connect)%rp(1)
+     !tempy = config_list(anchor_frag_connect)%rp(2)
+     !tempz = config_list(anchor_frag_connect)%rp(3)
+     !
+     !config_list(:)%rp(1) = config_list(:)%rp(1) - tempx
+     !config_list(:)%rp(2) = config_list(:)%rp(2) - tempy
+     !config_list(:)%rp(3) = config_list(:)%rp(3) - tempz
+     new_rp(1:3,1:nfrag_atoms) = new_rp(1:3,1:nfrag_atoms) - SPREAD(config_list_rp(1:3,clrp_anchor_frag_connect),2,nfrag_atoms)
+
+     new_rp(1:3,1:nfrag_atoms) = MATMUL(aligner_ifrag,new_rp(1:3,1:nfrag_atoms))
          
-     DO j = 1, frag_list(ifrag,is)%natoms
-        
-        this_atom = frag_list(ifrag,is)%atoms(j)
-        
-        tempx = config_list(this_atom)%rxp
-        tempy = config_list(this_atom)%ryp
-        tempz = config_list(this_atom)%rzp
-        
-        config_list(this_atom)%rxp = tempx * aligner_ifrag(1,1) &
-                                   + tempy * aligner_ifrag(1,2) &
-                                   + tempz * aligner_ifrag(1,3)
-        config_list(this_atom)%ryp = tempx * aligner_ifrag(2,1) &
-                                   + tempy * aligner_ifrag(2,2) &
-                                   + tempz * aligner_ifrag(2,3)
-        config_list(this_atom)%rzp = tempx * aligner_ifrag(3,1) &
-                                   + tempy * aligner_ifrag(3,2) &
-                                   + tempz * aligner_ifrag(3,3)
-     END DO
+     !DO j = 1, frag_list(ifrag,is)%natoms
+     !   
+     !   this_atom = frag_list(ifrag,is)%atoms(j)
+     !   
+     !   tempx = config_list(this_atom)%rp(1)
+     !   tempy = config_list(this_atom)%rp(2)
+     !   tempz = config_list(this_atom)%rp(3)
+     !   
+     !   config_list(this_atom)%rp(1) = tempx * aligner_ifrag(1,1) &
+     !                              + tempy * aligner_ifrag(1,2) &
+     !                              + tempz * aligner_ifrag(1,3)
+     !   config_list(this_atom)%rp(2) = tempx * aligner_ifrag(2,1) &
+     !                              + tempy * aligner_ifrag(2,2) &
+     !                              + tempz * aligner_ifrag(2,3)
+     !   config_list(this_atom)%rp(3) = tempx * aligner_ifrag(3,1) &
+     !                              + tempy * aligner_ifrag(3,2) &
+     !                              + tempz * aligner_ifrag(3,3)
+     !END DO
      
      !**************************************************************************
      ! Step 3) Select kappa_dih orientations for each additional fragment
@@ -1808,10 +2542,13 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
      ! anchoring atoms do not change due to this rotation. For the deletion 
      ! move, the first trial must be the one corresponding to the actual 
      ! coordinates, hence there should be no rotation about x-axis.
+     theta_step_sp = species_list(is)%theta_step_sp
+     theta_step = species_list(is)%theta_step
 
      IF ( del_flag ) THEN
 
-        theta = 0.0_DP
+        !theta = 0.0_DP
+        theta0 = 0.0_DP !-threehalfPI_DP
 
         ! also note that we will transform the position based on hanger_ifrag so
         ! that the original positions are recovered
@@ -1822,309 +2559,1159 @@ SUBROUTINE Fragment_Placement(this_box, this_im, is, frag_start, frag_total, &
 
         ! Select a random theta with uniform probability
         
-        theta = twopi * rranf()
+        !theta = twopi * rranf()
+        IF (compatibility_mode) THEN
+                theta0 = twopi*rranf()
+        ELSE
+                theta0 = theta_step * rranf() !- threehalfPI_DP
+        END IF
      END IF
 
-     ! Now that we have a starting theta, the other dihedral positions are 
-     ! uniformly spaced around the 2pi disc. We need to calculate the atomic
-     ! coordinates:
+     IF (compatibility_mode) THEN
+             these_atoms%exist = combined_exist
+             theta = theta0
+             DO j = 1, frag_list(ifrag,is)%natoms
 
-     ! Loop over the trial dihedrals
-     ii = 1
-     DO 
-!     DO ii = 1, kappa_dih
-
-        config_temp_list(:,ii)%rxp  = config_list(:)%rxp
-        config_temp_list(:,ii)%ryp  = 0.0_DP
-        config_temp_list(:,ii)%rzp  = 0.0_DP
-        ! Loop over atoms
-        DO j = 1, frag_list(ifrag,is)%natoms
-           
-           this_atom = frag_list(ifrag,is)%atoms(j)
-           
-           tempx = config_list(this_atom)%rxp
-           tempy = config_list(this_atom)%ryp
-           tempz = config_list(this_atom)%rzp
-           config_temp_list(this_atom,ii)%ryp =  DCOS(theta) * tempy &
-                                              +  DSIN(theta) * tempz
-           config_temp_list(this_atom,ii)%rzp = -DSIN(theta) * tempy &
-                                              +  DCOS(theta) * tempz
+                this_atom = frag_list(ifrag,is)%atoms(j)
 
-        END DO
-
-        ! Loop over atoms (again)
-        
+                IF ( (this_atom /= anchor_ifrag) .AND. &
+                     (this_atom /= anchor_frag_connect)) EXIT
 
-        DO j = 1, frag_list(ifrag,is)%natoms
-           
-           this_atom = frag_list(ifrag,is)%atoms(j)
-           
-           tempx = config_temp_list(this_atom,ii)%rxp
-           tempy = config_temp_list(this_atom,ii)%ryp
-           tempz = config_temp_list(this_atom,ii)%rzp
-           
-           config_temp_list(this_atom,ii)%rxp = tempx*hanger_frag_connect(1,1) &
-                                              + tempy*hanger_frag_connect(1,2) &
-                                              + tempz*hanger_frag_connect(1,3)
-           
-           config_temp_list(this_atom,ii)%ryp = tempx*hanger_frag_connect(2,1) &
-                                              + tempy*hanger_frag_connect(2,2) &
-                                              + tempz*hanger_frag_connect(2,3) 
-           
-           config_temp_list(this_atom,ii)%rzp = tempx*hanger_frag_connect(3,1) &
-                                              + tempy*hanger_frag_connect(3,2) &
-                                              + tempz*hanger_frag_connect(3,3)
+             END DO
 
-    
-           IF ( this_atom /= anchor_ifrag) THEN
-              IF ( this_atom /= anchor_frag_connect) THEN
-                 
-                 config_temp_list(this_atom,ii)%rxp = &
-                      config_temp_list(this_atom,ii)%rxp + &
-                      these_atoms(anchor_frag_connect)%rxp
-                 
-                 config_temp_list(this_atom,ii)%ryp = &
-                      config_temp_list(this_atom,ii)%ryp + &
-                      these_atoms(anchor_frag_connect)%ryp
-                 
-                 config_temp_list(this_atom,ii)%rzp = &
-                      config_temp_list(this_atom,ii)%rzp + &
-                      these_atoms(anchor_frag_connect)%rzp
-                 
-                 these_atoms(this_atom)%rxp = &
-                      config_temp_list(this_atom,ii)%rxp 
-                 
-                 these_atoms(this_atom)%ryp = &
-                      config_temp_list(this_atom,ii)%ryp 
+             atom_ifrag = this_atom
+             tempx = config_list(anchor_frag_connect)%rp(1)
+             tempy = config_list(anchor_frag_connect)%rp(2)
+             tempz = config_list(anchor_frag_connect)%rp(3)
+             
+             config_list(:)%rp(1) = config_list(:)%rp(1) - tempx
+             config_list(:)%rp(2) = config_list(:)%rp(2) - tempy
+             config_list(:)%rp(3) = config_list(:)%rp(3) - tempz
                  
-                 these_atoms(this_atom)%rzp = &
-                      config_temp_list(this_atom,ii)%rzp  
+             DO j = 1, frag_list(ifrag,is)%natoms
+                
+                this_atom = frag_list(ifrag,is)%atoms(j)
+                
+                tempx = config_list(this_atom)%rp(1)
+                tempy = config_list(this_atom)%rp(2)
+                tempz = config_list(this_atom)%rp(3)
+                
+                config_list(this_atom)%rp(1) = tempx * aligner_ifrag(1,1) &
+                                           + tempy * aligner_ifrag(1,2) &
+                                           + tempz * aligner_ifrag(1,3)
+                config_list(this_atom)%rp(2) = tempx * aligner_ifrag(2,1) &
+                                           + tempy * aligner_ifrag(2,2) &
+                                           + tempz * aligner_ifrag(2,3)
+                config_list(this_atom)%rp(3) = tempx * aligner_ifrag(3,1) &
+                                           + tempy * aligner_ifrag(3,2) &
+                                           + tempz * aligner_ifrag(3,3)
+             END DO
+             ii = 1
+             DO 
+        !     DO ii = 1, kappa_dih
+
+                config_temp_list(:,ii)%rp(1)  = config_list(:)%rp(1)
+                config_temp_list(:,ii)%rp(2)  = 0.0_DP
+                config_temp_list(:,ii)%rp(3)  = 0.0_DP
+                ! Loop over atoms
+                DO j = 1, frag_list(ifrag,is)%natoms
+                   
+                   this_atom = frag_list(ifrag,is)%atoms(j)
+                   
+                   tempx = config_list(this_atom)%rp(1)
+                   tempy = config_list(this_atom)%rp(2)
+                   tempz = config_list(this_atom)%rp(3)
+                   config_temp_list(this_atom,ii)%rp(2) =  DCOS(theta) * tempy &
+                                                      +  DSIN(theta) * tempz
+                   config_temp_list(this_atom,ii)%rp(3) = -DSIN(theta) * tempy &
+                                                      +  DCOS(theta) * tempz
+
+                END DO
+
+                ! Loop over atoms (again)
+                
+
+                DO j = 1, frag_list(ifrag,is)%natoms
+                   
+                   this_atom = frag_list(ifrag,is)%atoms(j)
+                   
+                   tempx = config_temp_list(this_atom,ii)%rp(1)
+                   tempy = config_temp_list(this_atom,ii)%rp(2)
+                   tempz = config_temp_list(this_atom,ii)%rp(3)
+                   
+                   config_temp_list(this_atom,ii)%rp(1) = tempx*hanger_frag_connect(1,1) &
+                                                      + tempy*hanger_frag_connect(1,2) &
+                                                      + tempz*hanger_frag_connect(1,3)
+                   
+                   config_temp_list(this_atom,ii)%rp(2) = tempx*hanger_frag_connect(2,1) &
+                                                      + tempy*hanger_frag_connect(2,2) &
+                                                      + tempz*hanger_frag_connect(2,3) 
+                   
+                   config_temp_list(this_atom,ii)%rp(3) = tempx*hanger_frag_connect(3,1) &
+                                                      + tempy*hanger_frag_connect(3,2) &
+                                                      + tempz*hanger_frag_connect(3,3)
 
-              END IF
-           END IF
-        END DO
+            
+                   IF ( this_atom /= anchor_ifrag) THEN
+                      IF ( this_atom /= anchor_frag_connect) THEN
+                         
+                         !config_temp_list(this_atom,ii)%rxp = &
+                         !     config_temp_list(this_atom,ii)%rxp + &
+                         !     these_atoms(anchor_frag_connect)%rxp
+                         !
+                         !config_temp_list(this_atom,ii)%ryp = &
+                         !     config_temp_list(this_atom,ii)%ryp + &
+                         !     these_atoms(anchor_frag_connect)%ryp
+                         !
+                         !config_temp_list(this_atom,ii)%rzp = &
+                         !     config_temp_list(this_atom,ii)%rzp + &
+                         !     these_atoms(anchor_frag_connect)%rzp
+
+                         config_temp_list(this_atom,ii)%rp = &
+                                 config_temp_list(this_atom,ii)%rp + &
+                                 these_atoms(anchor_frag_connect)%rp
+                         these_atoms(this_atom)%rp = &
+                                 config_temp_list(this_atom,ii)%rp
+                         
+                         !these_atoms(this_atom)%rxp = &
+                         !     config_temp_list(this_atom,ii)%rp(1) 
+                         !
+                         !these_atoms(this_atom)%ryp = &
+                         !     config_temp_list(this_atom,ii)%rp(2) 
+                         !
+                         !these_atoms(this_atom)%rzp = &
+                         !     config_temp_list(this_atom,ii)%rp(3)  
+
+                      END IF
+                   END IF
+                END DO
+
+                ! Exit the loop if we've computed atomic coords for all trial dihedrals
+                IF( ii == kappa_dih ) EXIT
+
+                ! Increment the counter and dihedral angle
+                ii = ii + 1
+                theta = theta + twopi / REAL(kappa_dih,DP)
+
+             END DO
+             !**************************************************************************
+             ! Step 4) Compute the energy of the fragment 
+             !**************************************************************************
+             ! 
+             ! Initialize the energies
+
+             nrg(:) = 0.0_DP
+             nrg_dihed(:) = 0.0_DP
+             overlap_trial(:) = .FALSE.
+
+             trial_loop2: DO ii = 1, kappa_dih
+
+                ! Reload the coordinates for the atoms of this fragment
+                DO j = 1, nfrag_atoms
+                   
+                        this_atom = atom_id(j)
+                   
+                        these_atoms(this_atom)%rp(1) = &
+                           config_temp_list(this_atom,ii)%rp(1) 
+                        these_atoms(this_atom)%rp(2) = &
+                           config_temp_list(this_atom,ii)%rp(2) 
+                        these_atoms(this_atom)%rp(3) = &
+                           config_temp_list(this_atom,ii)%rp(3)
+                        IF (l_sectors .AND. widom_active) THEN
+                                IF (check_overlap(this_atom,this_im,is)) THEN
+                                       IF (ii > 1) THEN
+                                               weight(ii) = weight(ii-1)
+                                       ELSE
+                                               weight(ii) = 0.0_DP
+                                       END IF
+                                       overlap_trial(ii) = .TRUE.
+                                       !IF (ii == kappa_dih) these_atoms(atom_id(1:nfrag_atoms))%exist = .TRUE.
+                                       CYCLE trial_loop2
+                                END IF
+                        END IF
+                END DO
+                ! Turn all the atoms off
+                overlap = .FALSE.
+                these_atoms(atom_id(1:nfrag_atoms))%exist = .FALSE.
+                
+                ! Initialize the energies
+                nrg_intra_vdw = 0.0_DP
+                nrg_intra_qq = 0.0_DP
+                nrg_inter_vdw = 0.0_DP
+                nrg_inter_qq = 0.0_DP
+                   
+                ! Compute the atomic energies as the fragment is slowly turned on
+                DO j = 1, nfrag_atoms
+                      
+                   these_atoms(atom_id(j))%exist = .TRUE.
+                      
+                   CALL Compute_Atom_Nonbond_Intra_Energy(atom_id(j),this_im,is, &
+                        E_intra_vdw,E_intra_qq,E_inter_qq,overlap)
+                   IF (overlap) THEN
+                      IF (ii > 1) THEN
+                              weight(ii) = weight(ii-1)
+                      ELSE
+                              weight(ii) = 0.0_DP
+                      END IF
+                      overlap_trial(ii) = .TRUE.
+                      ! if it is the last trial, the atom exist flag may not be
+                      ! properly set to true
+                      IF (ii == kappa_dih) these_atoms(atom_id(1:nfrag_atoms))%exist = .TRUE.
+                      CYCLE trial_loop2
+                   END IF
+                   
+                   nrg_intra_vdw = nrg_intra_vdw + E_intra_vdw
+                   nrg_intra_qq  = nrg_intra_qq  + E_intra_qq
+                   nrg_inter_qq  = nrg_inter_qq  + E_inter_qq
+                      
+                END DO
+
+                these_atoms%exist = new_exist
+                IF (.NOT. (cbmc_cell_list_flag .AND. widom_active)) THEN
+                        CALL Get_COM(this_im,is)
+                        CALL Compute_Max_COM_Distance(this_im,is)
+                END IF
+                IF (widom_active) THEN
+                        CALL Compute_Molecule_Nonbond_Inter_Energy_Widom(this_im,is,&
+                                nrg_inter_vdw,overlap)
+                        ! in this case, nrg_inter_vdw already includes qq energy
+                        E_inter_qq = 0.0_DP
+                ELSE
+                        CALL Compute_Molecule_Nonbond_Inter_Energy(this_im,is,&
+                                nrg_inter_vdw,E_inter_qq,overlap)
+                END IF
+                these_atoms%exist = combined_exist
 
-        ! Exit the loop if we've computed atomic coords for all trial dihedrals
-        IF( ii == kappa_dih ) EXIT
+                IF (overlap) THEN
+                   ! atoms are too close, set the weight to zero
+                   weight(ii) = 0.0_DP
+                   overlap_trial(ii) = .TRUE.
+                ELSE
+                   nrg_inter_qq = nrg_inter_qq + E_inter_qq
+                   CALL Compute_Molecule_Dihedral_Energy(this_im,is,e_dihed)
 
-        ! Increment the counter and dihedral angle
-        ii = ii + 1
-        theta = theta + twopi / REAL(kappa_dih,DP)
+                   nrg_dihed(ii) = e_dihed
 
-     END DO
+                   nrg(ii) = e_dihed + nrg_intra_vdw + nrg_intra_qq  + &          
+                        nrg_inter_vdw + nrg_inter_qq - e_prev
 
-     !**************************************************************************
-     ! Step 4) Compute the energy of the fragment 
-     !**************************************************************************
-     ! 
-     ! Initialize the energies
+                   IF (frag_list(ifrag,is)%ring) THEN
+                      ! subtract the biasing energy used to sample intramolecular DOFs
+                      nrg(ii) = nrg(ii) - nrg_ring_frag
+                   END IF
+                   
+                   nrg_kBT = beta(this_box) * nrg(ii)
+                   
+                   IF ( nrg_kBT >= max_kBT) THEN
+                      ! the energy is too high, set the weight to zero
+                      weight(ii) = 0.0_DP
+                      overlap_trial(ii) = .TRUE.
+                   ELSE
+                      weight(ii) = DEXP(-nrg_kBT)
+                   END IF
+                END IF
 
-     nrg(:) = 0.0_DP
-     nrg_dihed(:) = 0.0_DP
-     overlap_trial(:) = .FALSE.
+        !        ! BEGIN DEBUGGING OUTPUT
+        !        ! Write out the fragment coordinates for each trial position
+        !        M_XYZ_unit = movie_xyz_unit + this_box
+        !        DO j = 1, frag_list(ifrag,is)%natoms
+        !           this_atom = frag_list(ifrag,is)%atoms(j)
+        !           WRITE(M_XYZ_unit,*) &
+        !              TRIM(nonbond_list(this_atom,is)%element) // &
+        !              TRIM(int_to_string(ii)), & 
+        !              these_atoms(this_atom)%rxp, &
+        !              these_atoms(this_atom)%ryp, &
+        !              these_atoms(this_atom)%rzp
+        !        END DO
+        !        IF (ii==1) THEN
+        !           WRITE(*,'(A,X,I5)'), 'i_mcstep', i_mcstep
+        !           WRITE(*,'(4(A12,X))') 'DIH' // TRIM(int_to_string(i)) // ':trial ', 'energy', 'weight', 'overlap'
+        !        END IF
+        !        WRITE(*,'(I12,X,E12.6,X,E12.6,X,L12)') ii, beta(this_box)*nrg(ii), weight(ii), overlap_trial(ii)
+        !        ! END DEBUGGING OUTPUT
+                         
+                ! Track the cumulative weights for Golden sampling
+                IF ( ii > 1 ) weight(ii) = weight(ii-1) + weight(ii)
+
+        !        WRITE(*,*) 'weight',overlap, ii, nrg_kBT
+        !        WRITE(*,*) 'energy', e_prev, nrg(ii)
+                   
+             END DO trial_loop2
+             oldweight = weight
+             these_atoms%exist = new_exist
+             old_overlap_trial = overlap_trial(1:kappa_dih)
 
-     trial_loop: DO ii = 1, kappa_dih
+     END IF
 
-        ! Reload the coordinates for the atoms of this fragment
-        DO j = 1, nfrag_atoms
-           
-                this_atom = atom_id(j)
-           
-                these_atoms(this_atom)%rxp = &
-                   config_temp_list(this_atom,ii)%rxp 
-                these_atoms(this_atom)%ryp = &
-                   config_temp_list(this_atom,ii)%ryp 
-                these_atoms(this_atom)%rzp = &
-                   config_temp_list(this_atom,ii)%rzp
-                IF (l_sectors .AND. widom_active) THEN
-                        IF (check_overlap(this_atom,this_im,is)) THEN
-                               IF (ii > 1) THEN
-                                       weight(ii) = weight(ii-1)
-                               ELSE
-                                       weight(ii) = 0.0_DP
-                               END IF
-                               overlap_trial(ii) = .TRUE.
-                               !IF (ii == kappa_dih) these_atoms(atom_id(1:nfrag_atoms))%exist = .TRUE.
-                               CYCLE trial_loop
+     ! Now that we have a starting theta, the other dihedral positions are 
+     ! uniformly spaced around the 2pi disc. We need to calculate the atomic
+     ! coordinates:
+
+     ! RS: Note that both the original code and my refactor of it actually rotate the dihdral
+     !     in the negative phi direction. For the dihedrals between frag_connect and ifrag,
+     !     phi = phi0 - theta.  This never mattered before, but it's important when calculating dihedral phi
+     !     for a given trial based on the phi and theta of another trial.
+
+     ! Vectorized version start
+     IF (.NOT. (cbmc_cell_list_flag .AND. l_widom_cells)) THEN
+             inv_nfrag_atoms_dp = 1.0_DP / nfrag_atoms
+             ! ignore actual mass for fragment growth.  Use average position of new atoms instead
+             this_molecule%rcom(1:3) = SUM(new_rp(1:3,1:nfrag_atoms),2)*inv_nfrag_atoms_dp
+             max_dcomsq = 0.0_DP
+             DO j = 1, nfrag_atoms
+                vec2 = new_rp(1:3,j) - this_molecule%rcom(1:3)
+                max_dcomsq = MAX(max_dcomsq,DOT_PRODUCT(vec2,vec2))
+             END DO
+             this_molecule%rcom(4) = SQRT(max_dcomsq) ! constant for all trials
+     END IF
+     hanger_frag_connect_sp = REAL(hanger_frag_connect,SP)
+     IF (l_get_bitcell) THEN
+             zbcdf = box_list(this_box)%bitcell_dimfactor(3)
+             zlbc = box_list(this_box)%length_bitcells(3)
+             ybcdf = box_list(this_box)%bitcell_dimfactor(2)
+             ylbc = box_list(this_box)%length_bitcells(2)
+             xlbc = box_list(this_box)%length_bitcells(1)
+     END IF
+     ! Use Ptolemy's identities to compute SIN and COS of randomly shifted angles from precomputed array and SIN and COS of the shift
+     IF (del_flag) THEN
+             sintheta_sp_vec = species_list(is)%sincos_lintheta_sp(:,1)
+             costheta_sp_vec = species_list(is)%sincos_lintheta_sp(:,2)
+             costheta0 = 1.0_DP
+             sintheta0 = 0.0_DP
+     ELSE
+             costheta0 = COS(theta0)
+             sintheta0 = SIN(theta0)
+             costheta0_sp = REAL(costheta0,SP)
+             sintheta0_sp = REAL(sintheta0,SP)
+             !DIR$ VECTOR ALIGNED
+             !$OMP SIMD PRIVATE(sintheta_sp,costheta_sp)
+             DO itrial = 1, kappa_dih_pad8
+                sintheta_sp = species_list(is)%sincos_lintheta_sp(itrial,1)
+                costheta_sp = species_list(is)%sincos_lintheta_sp(itrial,2)
+                sintheta_sp_vec(itrial) = sintheta_sp*costheta0_sp + costheta_sp*sintheta0_sp
+                costheta_sp_vec(itrial) = costheta_sp*costheta0_sp - sintheta_sp*sintheta0_sp
+             END DO
+             !$OMP END SIMD
+     END IF
+
+     rp00 = REAL(these_atoms(anchor_frag_connect)%rp(1:3),SP)
+     IF (l_ortho .AND. l_widom_cells) THEN
+             DO i_dim = 1,3
+                inv_l_vec(i_dim) = REAL(1.0_DP/box_list(this_box)%length(i_dim,i_dim),SP)
+                l_sp_vec(i_dim) = REAL(box_list(this_box)%length(i_dim,i_dim),SP)
+             END DO
+     ELSE IF (l_widom_cells) THEN
+             inv_H_sp = REAL(box_list(this_box)%length_inv,SP)
+             H_sp = REAL(box_list(this_box)%length,SP)
+     END IF
+     !DIR$ ASSUME (MOD(kappa_dih_pad8,dimpad_4byte) == 0)
+     !DIR$ LOOP COUNT = 3
+     DO j = 1, nfrag_atoms
+             x0 = REAL(new_rp(1,j),SP)
+             y0 = REAL(new_rp(2,j),SP)
+             z0 = REAL(new_rp(3,j),SP)
+             !DIR$ ASSUME (MOD(kappa_dih_pad8,dimpad_4byte) == 0)
+             !DIR$ LOOP COUNT = 8, 16, 32
+             !DIR$ VECTOR ALIGNED
+             DO ii = 1, kappa_dih_pad8
+                costheta_sp = costheta_sp_vec(ii)
+                sintheta_sp = sintheta_sp_vec(ii)
+                trial_atom_rp(ii,2,j) = costheta_sp*y0 + sintheta_sp*z0
+                trial_atom_rp(ii,3,j) = costheta_sp*z0 - sintheta_sp*y0
+             END DO
+             rp0 = rp00
+             rp0 = rp0 + x0*hanger_frag_connect_sp(1:3,1)
+             !DIR$ LOOP COUNT = 8, 16, 32
+             !DIR$ VECTOR ALIGNED
+             DO ii = 1, kappa_dih_pad8
+                tempy_sp = trial_atom_rp(ii,2,j)
+                rxp = rp0(1) + tempy_sp*hanger_frag_connect_sp(1,2)
+                ryp = rp0(2) + tempy_sp*hanger_frag_connect_sp(2,2)
+                rzp = rp0(3) + tempy_sp*hanger_frag_connect_sp(3,2)
+                tempz_sp = trial_atom_rp(ii,3,j)
+                rxp = rxp + tempz_sp*hanger_frag_connect_sp(1,3)
+                ryp = ryp + tempz_sp*hanger_frag_connect_sp(2,3)
+                rzp = rzp + tempz_sp*hanger_frag_connect_sp(3,3)
+                trial_atom_rp(ii,1,j) = rxp
+                trial_atom_rp(ii,2,j) = ryp
+                trial_atom_rp(ii,3,j) = rzp
+             END DO
+             IF (.NOT. l_widom_cells) CYCLE ! The rest of the loop body is relevant only if doing Widom insertions with cell list
+             IF (l_ortho) THEN
+                     DO i_dim = 1,3
+                        inv_l = inv_l_vec(i_dim)
+                        l_sp = l_sp_vec(i_dim)
+                        hl_sp = 0.5 * l_sp
+                        !DIR$ LOOP COUNT = 8, 16, 32
+                        !DIR$ VECTOR ALIGNED
+                        DO ii = 1, kappa_dih_pad8
+                                irsp = REAL(trial_atom_rp(ii,i_dim,j),SP)
+                                irsp = irsp*inv_l + 0.5
+                                ! Note: it is important that the < checks and shifts come before the >= checks and shifts
+                                !       due to floating point rounding.  It's fine for irsp to be zero or even
+                                !       just barely less than zero due to the behavior of INT(), but if it ends up equal to
+                                !       1.0, that can cause bitcell_int to be out of bounds or otherwise severely incorrect.
+                                !       This was found out the hard way.
+                                IF (irsp < 0.0) irsp = irsp + 1.0
+                                IF (irsp >= 1.0) irsp = irsp - 1.0
+                                trial_atom_rsp(ii,i_dim,j) = irsp
+                                !irsp = irsp*l_sp - hl_sp
+                                !sp_config_temp_list_rp(ii,this_atom,i_dim) = irsp
+                        END DO
+                     END DO
+             ELSE
+                     !DIR$ LOOP COUNT = 8, 16, 32
+                     !DIR$ VECTOR ALIGNED
+                     DO ii = 1, kappa_dih_pad8
+                        rxp = REAL(trial_atom_rp(ii,1,j),SP)
+                        ! Adding extra 0.5 because the fractional coordinates we want here assume the box is centered at 0.5
+                        ! and the real coordinates are for a box centered at 0.0
+                        ! Fractional coordinates elsewhere in Cassandra typically use the same centering as the real coordinates
+                        ! (centered at 0.0), but it isn't convenient for bitcell overlap detection, so it's done differently here.
+                        sxp = 0.5 + inv_H_sp(1,1)*rxp
+                        syp = 0.5 + inv_H_sp(2,1)*rxp
+                        szp = 0.5 + inv_H_sp(3,1)*rxp
+                        ryp = REAL(trial_atom_rp(ii,2,j),SP)
+                        sxp = sxp + inv_H_sp(1,2)*ryp
+                        syp = syp + inv_H_sp(2,2)*ryp
+                        szp = szp + inv_H_sp(3,2)*ryp
+                        rzp = REAL(trial_atom_rp(ii,3,j),SP)
+                        sxp = sxp + inv_H_sp(1,3)*rzp
+                        syp = syp + inv_H_sp(2,3)*rzp
+                        szp = szp + inv_H_sp(3,3)*rzp
+                        IF (sxp < 0.0) sxp = sxp + 1.0
+                        IF (sxp >= 1.0) sxp = sxp - 1.0
+                        IF (syp < 0.0) syp = syp + 1.0
+                        IF (syp >= 1.0) syp = syp - 1.0
+                        IF (szp < 0.0) szp = szp + 1.0
+                        IF (szp >= 1.0) szp = szp - 1.0
+                        trial_atom_rsp(ii,1,j) = sxp
+                        trial_atom_rsp(ii,2,j) = syp
+                        trial_atom_rsp(ii,3,j) = szp
+                     END DO
+                     !trial_atom_rsp(1:kappa_dih,:) = &
+                     !        MATMUL(sp_config_temp_list_rp(1:kappa_dih,:),TRANSPOSE(REAL(box_list(this_box)%length_inv,SP)))
+             END IF
+             IF (l_get_bitcell) THEN
+                     !DIR$ LOOP COUNT = 8, 16, 32
+                     !DIR$ VECTOR ALIGNED
+                     DO ii = 1, kappa_dih
+                        sxp = trial_atom_rsp(ii,1,j)
+                        syp = trial_atom_rsp(ii,2,j)
+                        szp = trial_atom_rsp(ii,3,j)
+                        bitcell_bit = INT(sxp*xlbc)
+                        bitcell_int = ISHFT(bitcell_bit,-5) + &
+                                INT(syp*ylbc)*ybcdf + &
+                                INT(szp*zlbc)*zbcdf
+                        bitcell_bit = IAND(bitcell_bit,MASKR(5,INT32)) ! same as modulo 32
+                        bitcell_int = box_list(this_box)%bitcell_int32_vec(bitcell_int)
+                        bitcell_overlap(ii,j) = BTEST(bitcell_int,bitcell_bit)
+                     END DO
+             END IF
+     END DO
+     IF (l_get_bitcell) THEN
+             bitcell_overlap(:,1) = .NOT. ANY(bitcell_overlap(:,1:nfrag_atoms),2)
+             n_good_trials = 0
+             !DIR$ LOOP COUNT = 8, 16, 32
+             DO itrial = 1, kappa_dih
+                IF (bitcell_overlap(itrial,1)) THEN
+                        n_good_trials = n_good_trials+1
+                        which_good_trials(n_good_trials) = itrial
+                        trial_atom_rsp(n_good_trials,:,1:nfrag_atoms) = trial_atom_rsp(itrial,:,1:nfrag_atoms)
+                END IF
+             END DO
+             IF (n_good_trials == 0) THEN
+                cbmc_overlap = .TRUE.
+                RETURN
+             END IF
+     ELSE
+             n_good_trials = kappa_dih
+     END IF
+     n_good_trials_pad8 = IAND(n_good_trials+padconst_4byte,padmask_4byte)
+     !DIR$ ASSUME (MOD(n_good_trials_pad8,dimpad_4byte) == 0)
+     IF (l_widom_cells) THEN
+             DO j = 1, nfrag_atoms
+                     IF (l_ortho) THEN
+                             DO i_dim = 1, 3
+                                     rlc = box_list(this_box)%real_length_cells(i_dim)
+                                     l_sp = l_sp_vec(i_dim)
+                                     hl_sp = 0.5 * l_sp
+                                     !DIR$ LOOP COUNT = 8, 16, 32
+                                     !DIR$ VECTOR ALIGNED
+                                     DO itrial = 1, n_good_trials_pad8
+                                        irsp = trial_atom_rsp(itrial,i_dim,j)
+                                        trial_cell_coords(itrial,i_dim,j) = &
+                                                INT(irsp*rlc) - box_list(this_box)%sectorbound(i_dim)
+                                        irsp = irsp*l_sp - hl_sp
+                                        trial_atom_rsp(itrial,i_dim,j) = irsp
+                                     END DO
+                             END DO
+                     ELSE
+                             !DIR$ LOOP COUNT = 8, 16, 32
+                             !DIR$ VECTOR ALIGNED
+                             DO itrial = 1, n_good_trials_pad8
+                                sxp = trial_atom_rsp(itrial,1,j)
+                                trial_cell_coords(itrial,1,j) = &
+                                        INT(sxp*box_list(this_box)%real_length_cells(1)) - &
+                                        box_list(this_box)%sectorbound(1)
+                                sxp = sxp - 0.5
+                                rxp = H_sp(1,1)*sxp
+                                ryp = H_sp(2,1)*sxp
+                                rzp = H_sp(3,1)*sxp
+                                syp = trial_atom_rsp(itrial,2,j)
+                                trial_cell_coords(itrial,2,j) = &
+                                        INT(syp*box_list(this_box)%real_length_cells(2)) - &
+                                        box_list(this_box)%sectorbound(2)
+                                syp = syp - 0.5
+                                rxp = rxp + H_sp(1,2)*syp
+                                ryp = ryp + H_sp(2,2)*syp
+                                rzp = rzp + H_sp(3,2)*syp
+                                szp = trial_atom_rsp(itrial,3,j)
+                                trial_cell_coords(itrial,3,j) = &
+                                        INT(szp*box_list(this_box)%real_length_cells(3)) - &
+                                        box_list(this_box)%sectorbound(3)
+                                szp = szp - 0.5
+                                rxp = rxp + H_sp(1,3)*szp
+                                ryp = ryp + H_sp(2,3)*szp
+                                rzp = rzp + H_sp(3,3)*szp
+                                trial_atom_rsp(itrial,1,j) = rxp
+                                trial_atom_rsp(itrial,2,j) = ryp
+                                trial_atom_rsp(itrial,3,j) = rzp
+                             END DO
+                     END IF
+             END DO
+     END IF
+     n_good_trials_old = n_good_trials
+     n_good_trials = 0
+     !DIR$ LOOP COUNT = 1, 2, 3, 4, 8, 16, 32
+     trial_loop: DO itrial = 1, n_good_trials_old
+        IF (l_widom_cells) THEN
+                !DIR$ LOOP COUNT = 3
+                DO j = 1, nfrag_atoms
+                        ia = atom_id(j)
+                        this_atom_rp = trial_atom_rsp(itrial,:,j)
+                        this_atom_ci = trial_cell_coords(itrial,:,j)
+                        IF (check_overlap(this_atom_rp(1:3),this_atom_ci,ia,is,this_box)) THEN
+                                CYCLE trial_loop
                         END IF
+                END DO
+        END IF
+        itrial_orig = MERGE(which_good_trials(itrial),itrial,l_get_bitcell)
+        IF (cbmc_cell_list_flag .AND. l_widom_cells) THEN
+                nrg_sp = SUM(Compute_Cell_List_CBMC_nrg(&
+                        trial_atom_rsp(itrial,1,1:nfrag_atoms),trial_atom_rsp(itrial,2,1:nfrag_atoms),& 
+                        trial_atom_rsp(itrial,3,1:nfrag_atoms), &
+                        trial_cell_coords(itrial,1,1:nfrag_atoms),trial_cell_coords(itrial,2,1:nfrag_atoms), &
+                        trial_cell_coords(itrial,3,1:nfrag_atoms), &
+                        atom_id(1:nfrag_atoms),is,this_box))
+        ELSE
+                this_molecule%rcom(1:3) = 0.0_DP
+                !DIR$ LOOP COUNT = 3
+                DO j = 1, nfrag_atoms
+                        ia = atom_id(j)
+                        these_atoms(ia)%rp(1:3) = REAL(trial_atom_rp(itrial_orig,1:3,j),DP)
+                        this_molecule%rcom(1:3) = this_molecule%rcom(1:3) + inv_nfrag_atoms_dp*these_atoms(ia)%rp(1:3)
+                END DO
+                IF (widom_active) THEN
+                        CALL Compute_Molecule_Nonbond_Inter_Energy_Widom(this_im,is,nrg_vdw_dp,overlap)
+                        IF (overlap) CYCLE trial_loop
+                        nrg_sp = REAL(nrg_vdw_dp,SP)
+                ELSE
+                        CALL Compute_Molecule_Nonbond_Inter_Energy(this_im,is,nrg_vdw_dp,nrg_qq_dp,overlap)
+                        IF (overlap) CYCLE trial_loop
+                        nrg_sp = REAL(nrg_vdw_dp + nrg_qq_dp,SP)
                 END IF
+        END IF
+        !!DIR$ LOOP COUNT = 3
+        !DO j = 1, nfrag_atoms
+        !        ia = atom_id(j)
+        !        IF (cbmc_cell_list_flag .AND. l_widom_cells) THEN
+        !                this_atom_rp = trial_atom_rsp(itrial,:,j)
+        !                this_atom_ci = trial_cell_coords(itrial,:,j)
+        !                nrg_sp = nrg_sp + Compute_Cell_List_CBMC_nrg(this_atom_rp(1:3),this_atom_ci,ia,is,this_box)
+        !        ELSE
+        !                these_atoms(ia)%rp(1:3) = REAL(trial_atom_rp(itrial_orig,1:3,j),DP)
+        !                IF (j > 1) THEN
+        !                        this_molecule%rcom(1:3) = this_molecule%rcom(1:3) + inv_nfrag_atoms_dp*these_atoms(ia)%rp(1:3)
+        !                ELSE
+        !                        this_molecule%rcom(1:3) = inv_nfrag_atoms_dp*these_atoms(ia)%rp(1:3)
+        !                END IF
+        !        END IF
+        !END DO
+        !IF (.NOT. widom_active) THEN
+        !        CALL Compute_Molecule_Nonbond_Inter_Energy(this_im,is,nrg_vdw_dp,nrg_qq_dp,overlap)
+        !        IF (overlap) CYCLE trial_loop
+        !        nrg_sp = REAL(nrg_vdw_dp + nrg_qq_dp,SP)
+        !ELSE IF (.NOT. cbmc_cell_list_flag) THEN
+        !        CALL Compute_Molecule_Nonbond_Inter_Energy_Widom(this_im,is,nrg_vdw_dp,overlap)
+        !        IF (overlap) CYCLE trial_loop
+        !        nrg_sp = REAL(nrg_vdw_dp,SP)
+        !END IF
+        IF (nrg_sp > overlap_nrg .AND. .NOT. compatibility_mode) CYCLE trial_loop
+        n_good_trials = n_good_trials + 1
+        nrg_sp_vec(n_good_trials) = nrg_sp
+        which_good_trials(n_good_trials) = itrial_orig
+        IF (n_good_trials .NE. itrial_orig) THEN
+                !DIR$ ASSUME (itrial_orig > n_good_trials)
+                trial_atom_rp(n_good_trials,:,1:nfrag_atoms) = &
+                        trial_atom_rp(itrial_orig,:,1:nfrag_atoms)
+        END IF
+        !IF (cbmc_cell_list_flag) THEN
+        !        nrg_sp = 0.0
+        !        DO j = 1, nfrag_atoms
+        !                ia = atom_id(j)
+        !                this_atom_rp = trial_atom_rsp(itrial,:,)
+        !                this_atom_ci = trial_cell_coords(itrial,:,j)
+        !                nrg_sp = nrg_sp + Compute_Cell_List_CBMC_nrg(this_atom_rp(1:3),this_atom_ci,ia,is,this_box)
+        !        END DO
+        !        nrg_sp_vec(n_good_trials) = nrg_sp
+        !END IF
+     END DO trial_loop
+     IF (n_good_trials == 0) THEN
+             cbmc_overlap = .TRUE.
+             RETURN
+     END IF
+     IF (compatibility_mode) nrg_sp_inter = nrg_sp_vec
+     DO j = 1, nfrag_atoms
+        inv_atom_id(atom_id(j)) = j
+     END DO
+     rcutsq = REAL(rcut_cbmcsq(this_box),SP)
+     anchor_atoms = (/ anchor_frag_connect, anchor_ifrag /)
+     anchor_atoms_reversed = anchor_atoms(2:1:-1)
+     n_good_trials_pad8 = IAND(n_good_trials+padconst_4byte,padmask_4byte)
+     !DIR$ ASSUME (MOD(n_good_trials_pad8,dimpad_4byte) .EQ. 0)
+     overlap_trial(1:n_good_trials_pad8) = .FALSE.
+     DO ja = 1, natoms(is)
+        IF (.NOT. grown_exist(ja) .OR. ((.NOT. compatibility_mode) .AND. ANY(ja .EQ. anchor_atoms))) CYCLE
+        jrp = these_atoms(ja)%rp(1:3)
+        jtype = nonbond_list(ja,is)%atom_type_number
+        jcharge_dp = nonbond_list(ja,is)%charge
+        !DIR$ LOOP COUNT = 3
+        DO ia_new = 1, nfrag_atoms
+                ia = atom_id(ia_new)
+                vdwscale = vdw_intra_scale(ia,ja,is)
+                chargescale = charge_intra_scale(ia,ja,is)
+                cfqq = REAL(nonbond_list(ia,is)%charge * jcharge_dp * charge_factor,SP)
+                itype = nonbond_list(ia,is)%atom_type_number
+                this_int_vdw_style = MIN(itype*jtype,1)*int_vdw_style(this_box)
+                this_int_vdw_sum_style = int_vdw_sum_style(this_box)
+                ij_get_coul = cfqq .NE. 0.0 .AND. int_charge_style(this_box) .NE. charge_none .AND. &
+                        species_list(is)%l_coul_cbmc
+                l_checkoverlap = .NOT. (vdwscale == 0.0 .OR. this_int_vdw_style == 0)
+                IF (.NOT. (l_checkoverlap .OR. ij_get_coul)) CYCLE
+                !IF ((vdwscale == 0.0 .OR. this_int_vdw_style == 0) .AND. .NOT. ij_get_coul) CYCLE
+                this_int_charge_sum_style = MERGE(MERGE(charge_sf,int_charge_sum_style(this_box),cbmc_charge_sf_flag),0,ij_get_coul)
+                !DIR$ LOOP COUNT = 8, 16
+                !DIR$ VECTOR ALIGNED
+                DO itrial = 1, n_good_trials_pad8
+                        drp = trial_atom_rp(itrial,1,ia_new) - jrp(1)
+                        rsq = drp*drp
+                        drp = trial_atom_rp(itrial,2,ia_new) - jrp(2)
+                        rsq = rsq + drp*drp
+                        drp = trial_atom_rp(itrial,3,ia_new) - jrp(3)
+                        rsq = rsq + drp*drp
+                        IF (l_checkoverlap) overlap_trial(itrial) = overlap_trial(itrial) .OR. rsq < sp_rcut_lowsq
+                        rsq_vec(itrial) = rsq
+                END DO
+                SELECT CASE(this_int_vdw_style)
+                CASE(vdw_lj)
+                        SELECT CASE(this_int_vdw_sum_style)
+                        CASE(vdw_charmm)
+                                eps = vdwscale*ppvdwp_table2_sp(1,itype,jtype,this_box) ! epsilon
+                                sigsq = ppvdwp_table2_sp(2,itype,jtype,this_box) ! sigma**2
+                                !DIR$ LOOP COUNT = 8, 16
+                                !DIR$ VECTOR ALIGNED
+                                DO itrial = 1, n_good_trials_pad8
+                                        rsq = rsq_vec(itrial)
+                                        nrg_sp = nrg_sp_vec(itrial)
+                                        sigbyr2 = MERGE(sigsq/rsq,0.0,rsq<rcutsq)
+                                        sigbyr6 = sigbyr2*sigbyr2*sigbyr2
+                                        nrg_sp = nrg_sp + eps * (sigbyr6*sigbyr6 - sigbyr6 - sigbyr6) ! eps was not multiplied by 4
+                                        nrg_sp_vec(itrial) = nrg_sp
+                                END DO
+                        CASE(vdw_cut_shift)
+                                eps = vdwscale*ppvdwp_table2_sp(1,itype,jtype,this_box) ! 4*epsilon
+                                negsigsq = ppvdwp_table2_sp(2,itype,jtype,this_box) ! -(sigma**2)
+                                shift_p1 = vdwscale*ppvdwp_table2_sp(3,itype,jtype,this_box)
+                                !DIR$ LOOP COUNT = 8, 16
+                                !DIR$ VECTOR ALIGNED
+                                DO itrial = 1, n_good_trials_pad8
+                                        rsq = rsq_vec(itrial)
+                                        nrg_sp = nrg_sp_vec(itrial)
+                                        negsigbyr2 = MERGE(negsigsq/rsq,0.0,rsq<rcutsq)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        nrg_sp = nrg_sp + eps*rterm - MERGE(shift_p1,0.0,rsq<rcutsq)
+                                        nrg_sp_vec(itrial) = nrg_sp
+                                END DO
+                        CASE(vdw_cut_switch)
+                                eps = vdwscale*ppvdwp_table2_sp(1,itype,jtype,this_box) ! 4*epsilon
+                                negsigsq = ppvdwp_table2_sp(2,itype,jtype,this_box) ! -(sigma**2)
+                                !DIR$ LOOP COUNT = 8, 16
+                                !DIR$ VECTOR ALIGNED
+                                DO itrial = 1, n_good_trials_pad8
+                                        rsq = rsq_vec(itrial)
+                                        nrg_sp = nrg_sp_vec(itrial)
+                                        negsigbyr2 = MERGE(negsigsq/rsq,0.0,rsq<rcutsq)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        IF (rsq .GE. const3) THEN
+                                                roffsq_rijsq = const4 - rsq
+                                                rterm = roffsq_rijsq*roffsq_rijsq * &
+                                                        (const1+const2*rsq) * rterm
+                                        END IF
+                                        nrg_sp = nrg_sp + eps*rterm
+                                        nrg_sp_vec(itrial) = nrg_sp
+                                END DO
+                        CASE(vdw_cut_shift_force)
+                                eps = vdwscale*ppvdwp_table2_sp(1,itype,jtype,this_box) ! 4*epsilon
+                                negsigsq = ppvdwp_table2_sp(2,itype,jtype,this_box) ! -(sigma**2)
+                                shift_p1 = vdwscale*ppvdwp_table2_sp(3,itype,jtype,this_box)
+                                shift_p2 = vdwscale*ppvdwp_table2_sp(4,itype,jtype,this_box)
+                                shift_p1 = shift_p1 + REAL(rcut_vdw(this_box),SP)*shift_p2
+                                !DIR$ LOOP COUNT = 8, 16
+                                !DIR$ VECTOR ALIGNED
+                                DO itrial = 1, n_good_trials_pad8
+                                        rsq = rsq_vec(itrial)
+                                        nrg_sp = nrg_sp_vec(itrial)
+                                        negsigbyr2 = negsigsq/rsq
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        IF (rsq < rcutsq) THEN
+                                                nrg_sp = nrg_sp + eps*rterm + SQRT(rsq)*shift_p2 - shift_p1
+                                        END IF
+                                        nrg_sp_vec(itrial) = nrg_sp
+                                END DO
+                        CASE(vdw_cut,vdw_cut_tail)
+                                eps = vdwscale*ppvdwp_table2_sp(1,itype,jtype,this_box) ! 4*epsilon
+                                negsigsq = ppvdwp_table2_sp(2,itype,jtype,this_box) ! -(sigma**2)
+                                !DIR$ LOOP COUNT = 8, 16
+                                !DIR$ VECTOR ALIGNED
+                                DO itrial = 1, n_good_trials_pad8
+                                        rsq = rsq_vec(itrial)
+                                        nrg_sp = nrg_sp_vec(itrial)
+                                        negsigbyr2 = MERGE(negsigsq/rsq,0.0,rsq<rcutsq)
+                                        rterm = negsigbyr2*negsigbyr2*negsigbyr2
+                                        rterm = rterm + rterm*rterm
+                                        nrg_sp = nrg_sp + eps*rterm
+                                        nrg_sp_vec(itrial) = nrg_sp
+                                END DO
+                        END SELECT
+                CASE(vdw_mie)
+                        epsig_n = vdwscale*ppvdwp_table2_sp(1,jtype,itype,this_box)
+                        epsig_m = vdwscale*ppvdwp_table2_sp(2,jtype,itype,this_box)
+                        ! In the table, mie exponents are already pre-multiplied by -0.5
+                        mie_n = ppvdwp_table2_sp(3,jtype,itype,this_box)
+                        mie_m = ppvdwp_table2_sp(4,jtype,itype,this_box)
+                        SELECT CASE(this_int_vdw_sum_style)
+                        CASE(vdw_cut_shift)
+                                shift_p1 = vdwscale*ppvdwp_table2_sp(5,itype,jtype,this_box)
+                                !DIR$ LOOP COUNT = 8, 16
+                                !DIR$ VECTOR ALIGNED
+                                DO itrial = 1, n_good_trials_pad8
+                                        rsq = rsq_vec(itrial)
+                                        nrg_sp = nrg_sp_vec(itrial)
+                                        lnrsq = LOG(rsq)
+                                        IF (rsq<rcutsq) THEN
+                                                nrg_sp = nrg_sp + epsig_n*EXP(lnrsq*mie_n) - epsig_m*EXP(lnrsq*mie_m) - shift_p1
+                                        END IF
+                                        nrg_sp_vec(itrial) = nrg_sp
+                                END DO
+                        CASE(vdw_cut,vdw_cut_tail)
+                                !DIR$ LOOP COUNT = 8, 16
+                                !DIR$ VECTOR ALIGNED
+                                DO itrial = 1, n_good_trials_pad8
+                                        rsq = rsq_vec(itrial)
+                                        nrg_sp = nrg_sp_vec(itrial)
+                                        lnrsq = LOG(rsq)
+                                        IF (rsq<rcutsq) THEN
+                                                nrg_sp = nrg_sp + epsig_n*EXP(lnrsq*mie_n) - epsig_m*EXP(lnrsq*mie_m)
+                                        END IF
+                                        nrg_sp_vec(itrial) = nrg_sp
+                                END DO
+                        END SELECT
+                END SELECT
+                SELECT CASE(this_int_charge_sum_style)
+                CASE(charge_ewald)
+                        alpha_ewald_sp = REAL(alpha_ewald(this_box),SP)
+                        !DIR$ LOOP COUNT = 8, 16
+                        !DIR$ VECTOR ALIGNED
+                        DO itrial = 1, n_good_trials_pad8
+                                rsq = rsq_vec(itrial)
+                                nrg_sp = nrg_sp_vec(itrial)
+                                invr = MERGE(1.0/SQRT(rsq),0.0,rsq<rcutsq)
+                                rij = rsq*invr
+                                nrg_sp = nrg_sp + (chargescale - ERF(alpha_ewald_sp*rij))*cfqq*invr
+                                nrg_sp_vec(itrial) = nrg_sp
+                        END DO
+                CASE(charge_dsf)
+                        dsf_const = -dsf_factor2(this_box)*(rcut_coul(this_box)+dsf_factor1(this_box))
+                        dsf_factor2_sp = REAL(dsf_factor2(this_box),SP)
+                        !DIR$ LOOP COUNT = 8, 16
+                        !DIR$ VECTOR ALIGNED
+                        DO itrial = 1, n_good_trials_pad8
+                                rsq = rsq_vec(itrial)
+                                !nrg_sp = nrg_sp_vec(itrial)
+                                invr = 1.0/SQRT(rsq)
+                                rij = rsq*invr
+                                nrg_sp = dsf_const + (chargescale - ERF(alpha_dsf(this_box)*rij))*invr + dsf_factor2_sp*rij
+                                IF (rsq>=rcutsq) nrg_sp = 0.0
+                                nrg_sp = cfqq*nrg_sp + nrg_sp_vec(itrial)
+                                nrg_sp_vec(itrial) = nrg_sp
+                        END DO
+                CASE(charge_cut)
+                        cfqq = cfqq*chargescale
+                        !DIR$ LOOP COUNT = 8, 16
+                        !DIR$ VECTOR ALIGNED
+                        DO itrial = 1, n_good_trials_pad8
+                                rsq = rsq_vec(itrial)
+                                nrg_sp = nrg_sp_vec(itrial)
+                                invr = MERGE(1.0/SQRT(rsq),0.0,rsq<rcutsq)
+                                nrg_sp = nrg_sp + cfqq*invr
+                                nrg_sp_vec(itrial) = nrg_sp
+                        END DO
+                CASE(charge_sf)
+                        sf_const1 = REAL(2.0_DP/rcut_cbmc(this_box),SP)
+                        sf_const2 = 1.0/rcutsq
+                        !DIR$ LOOP COUNT = 8, 16
+                        !DIR$ VECTOR ALIGNED
+                        DO itrial = 1, n_good_trials_pad8
+                                rsq = rsq_vec(itrial)
+                                nrg_sp = nrg_sp_vec(itrial)
+                                invr = 1.0/SQRT(rsq)
+                                rij = rsq*invr
+                                rij = rij*sf_const2 - sf_const1 ! rij no longer storing atomic separation distance
+                                rij = rij + chargescale*invr
+                                rij = MERGE(rij,0.0,rsq<rcutsq)
+                                nrg_sp = nrg_sp + cfqq*rij
+                                nrg_sp_vec(itrial) = nrg_sp
+                        END DO
+                END SELECT
         END DO
-        ! Turn all the atoms off
-        overlap = .FALSE.
-        these_atoms(atom_id(1:nfrag_atoms))%exist = .FALSE.
-        
-        ! Initialize the energies
-        nrg_intra_vdw = 0.0_DP
-        nrg_intra_qq = 0.0_DP
-        nrg_inter_vdw = 0.0_DP
-        nrg_inter_qq = 0.0_DP
-           
-        ! Compute the atomic energies as the fragment is slowly turned on
-        DO j = 1, nfrag_atoms
-              
-           these_atoms(atom_id(j))%exist = .TRUE.
-              
-           CALL Compute_Atom_Nonbond_Intra_Energy(atom_id(j),this_im,is, &
-                E_intra_vdw,E_intra_qq,E_inter_qq,overlap)
-           IF (overlap) THEN
-              IF (ii > 1) THEN
-                      weight(ii) = weight(ii-1)
-              ELSE
-                      weight(ii) = 0.0_DP
-              END IF
-              overlap_trial(ii) = .TRUE.
-              ! if it is the last trial, the atom exist flag may not be
-              ! properly set to true
-              IF (ii == kappa_dih) these_atoms(atom_id(1:nfrag_atoms))%exist = .TRUE.
-              CYCLE trial_loop
-           END IF
-           
-           nrg_intra_vdw = nrg_intra_vdw + E_intra_vdw
-           nrg_intra_qq  = nrg_intra_qq  + E_intra_qq
-           nrg_inter_qq  = nrg_inter_qq  + E_inter_qq
-              
+     END DO
+     ! Having intramolecular overlap is unlikely in most cases
+     IF (ANY(overlap_trial(1:n_good_trials))) THEN
+             IF (ALL(overlap_trial(1:n_good_trials))) THEN
+                     cbmc_overlap = .TRUE.
+                     RETURN
+             END IF
+             n_good_trials_old = n_good_trials
+             n_good_trials = 0
+             DO itrial = 1, n_good_trials_old
+                IF (.NOT. overlap_trial(itrial)) THEN
+                        n_good_trials = n_good_trials + 1
+                        which_good_trials(n_good_trials) = MERGE(itrial,which_good_trials(itrial),n_good_trials_old==kappa_dih)
+                        trial_atom_rp(n_good_trials,1:3,1:nfrag_atoms) = &
+                                trial_atom_rp(itrial,1:3,1:nfrag_atoms)
+                        nrg_sp_vec(n_good_trials) = nrg_sp_vec(itrial)
+                END IF
+             END DO
+             n_good_trials_pad8 = IAND(n_good_trials+padconst_4byte,padmask_4byte)
+     END IF
+     !DIR$ ASSUME (MOD(n_good_trials_pad8,dimpad_4byte) == 0)
+     nrg_dihed_vec = 0.0
+     anchor_frag_connect_rp = REAL(these_atoms(anchor_frag_connect)%rp(1:3),SP)
+     anchor_ifrag_rp = REAL(these_atoms(anchor_ifrag)%rp(1:3),SP)
+     !DIR$ LOOP COUNT = 0, 9, 18
+     DO idihed_rb = 1, species_list(is)%ndihedrals_rb
+        IF (l_skip_dihed_vec(idihed_rb)) CYCLE ! it would CYCLE anyway due to other conditionals but this is faster
+        dihedral_atoms = dihedral_list(idihed_rb,is)%atom(1:4)
+        IF (ALL(dihedral_atoms(2:3) .EQ. anchor_atoms) .AND. &
+                grown_exist(dihedral_atoms(1)) .AND. &
+                new_exist(dihedral_atoms(4))) THEN
+                atom1 = dihedral_atoms(1)
+                atom2 = dihedral_atoms(2)
+                atom3 = dihedral_atoms(3)
+                atom4 = inv_atom_id(dihedral_atoms(4))
+                r32_dp = r32_base
+                atom3_rp = anchor_ifrag_rp
+        ELSE IF (ALL(dihedral_atoms(2:3) .EQ. anchor_atoms_reversed) .AND. &
+                new_exist(dihedral_atoms(1)) .AND. &
+                grown_exist(dihedral_atoms(4))) THEN
+                atom1 = dihedral_atoms(4)
+                atom2 = dihedral_atoms(3)
+                atom3 = dihedral_atoms(2)
+                atom4 = inv_atom_id(dihedral_atoms(1))
+                r32_dp = -r32_base
+                atom3_rp = anchor_frag_connect_rp
+        ELSE
+                CYCLE
+        END IF
+        l_skip_dihed_vec(idihed_rb) = .TRUE.
+        vecdp = these_atoms(atom1)%rp(1:3) - &
+                these_atoms(atom2)%rp(1:3) ! r12
+        m_dp(1) = vecdp(2)*r32_dp(3) - vecdp(3)*r32_dp(2)
+        m_dp(2) = vecdp(3)*r32_dp(1) - vecdp(1)*r32_dp(3)
+        m_dp(3) = vecdp(1)*r32_dp(2) - vecdp(2)*r32_dp(1)
+        m_normsq = REAL(DOT_PRODUCT(m_dp,m_dp),SP)
+        m_sp = REAL(m_dp,SP)
+        rx32 = REAL(r32_dp(1),SP)
+        ry32 = REAL(r32_dp(2),SP)
+        rz32 = REAL(r32_dp(3),SP)
+        !DIR$ LOOP COUNT = 8, 16
+        !DIR$ VECTOR ALIGNED
+        DO itrial = 1, n_good_trials_pad8
+                rx34 = trial_atom_rp(itrial,1,atom4)
+                ry34 = trial_atom_rp(itrial,2,atom4)
+                rz34 = trial_atom_rp(itrial,3,atom4)
+                rx34 = atom3_rp(1) - rx34
+                ry34 = atom3_rp(2) - ry34
+                rz34 = atom3_rp(3) - rz34
+                ni = ry32*rz34 - rz32*ry34 ! nx
+                cosphi = m_sp(1)*ni ! currently accumulator corresponding to mdn
+                invnorm = ni*ni ! invnorm similarly needs to undergo further operations
+                ni = rz32*rx34 - rx32*rz34 ! ny
+                cosphi = cosphi + m_sp(2)*ni
+                invnorm = invnorm + ni*ni
+                ni = rx32*ry34 - ry32*rx34 ! nz
+                invnorm = invnorm + ni*ni
+                cosphi = cosphi + m_sp(3)*ni
+                invnorm = invnorm * m_normsq
+                invnorm = 1.0_SP / SQRT(invnorm) ! finally reciprocal of norm product
+                cosphi = cosphi*invnorm ! now cosphi is actually COS(phi)
+                cosphi_vec(itrial) = cosphi
         END DO
-
-        these_atoms%exist = new_exist
-        IF (.NOT. (cbmc_cell_list_flag .AND. widom_active)) THEN
-                CALL Get_COM(this_im,is)
-                CALL Compute_Max_COM_Distance(this_im,is)
+        rb_c(1:5) = dihedral_list(idihed_rb,is)%rb_c_sp(1:5)
+        !DIR$ LOOP COUNT = 8, 16
+        !DIR$ VECTOR ALIGNED
+        DO itrial = 1, n_good_trials_pad8
+                cosphi = cosphi_vec(itrial)
+                cosphi2 = cosphi*cosphi
+                cosphi3 = cosphi*cosphi2
+                cosphi4 = cosphi2*cosphi2
+                cosphi5 = cosphi2*cosphi3
+                nrg_dihed_vec(itrial) = nrg_dihed_vec(itrial) + &
+                        rb_c(1)*cosphi + &
+                        rb_c(2)*cosphi2 + &
+                        rb_c(3)*cosphi3 + &
+                        rb_c(4)*cosphi4 + &
+                        rb_c(5)*cosphi5
+        END DO
+        IF (compatibility_mode) THEN
+                nrg_dihed_vec = nrg_dihed_vec + dihedral_list(idihed_rb,is)%rb_c_sp(0)
         END IF
-        IF (widom_active) THEN
-                CALL Compute_Molecule_Nonbond_Inter_Energy_Widom(this_im,is,&
-                        nrg_inter_vdw,overlap)
-                ! in this case, nrg_inter_vdw already includes qq energy
-                E_inter_qq = 0.0_DP
+     END DO
+     !DIR$ LOOP COUNT = 0, 9, 18
+     DO idihed = idihed_rb, species_list(is)%ndihedrals_energetic
+        IF (l_skip_dihed_vec(idihed)) CYCLE ! it would CYCLE anyway due to other conditionals but this is faster
+        dihedral_atoms = dihedral_list(idihed_rb,is)%atom(1:4)
+        IF (ALL(dihedral_atoms(2:3) .EQ. anchor_atoms) .AND. &
+                grown_exist(dihedral_atoms(1)) .AND. &
+                new_exist(dihedral_atoms(4))) THEN
+                these_atoms(dihedral_atoms(4))%rp(1:3) = &
+                        trial_atom_rp(n_good_trials,1:3,inv_atom_id(dihedral_atoms(4)))
+        ELSE IF (ALL(dihedral_atoms(2:3) .EQ. anchor_atoms_reversed) .AND. &
+                new_exist(dihedral_atoms(1)) .AND. &
+                grown_exist(dihedral_atoms(4))) THEN
+                these_atoms(dihedral_atoms(1))%rp(1:3) = &
+                        trial_atom_rp(n_good_trials,1:3,inv_atom_id(dihedral_atoms(1)))
         ELSE
-                CALL Compute_Molecule_Nonbond_Inter_Energy(this_im,is,&
-                        nrg_inter_vdw,E_inter_qq,overlap)
+                CYCLE
         END IF
-        these_atoms%exist = combined_exist
-
-        IF (overlap) THEN
-           ! atoms are too close, set the weight to zero
-           weight(ii) = 0.0_DP
-           overlap_trial(ii) = .TRUE.
+        l_skip_dihed_vec(idihed) = .TRUE.
+        CALL Get_Dihedral_Angle(idihed,this_im,is,last_phi_dp)
+        SELECT CASE(dihedral_list(idihed,is)%int_dipot_type)
+        CASE(int_charmm)
+                last_phi = REAL(last_phi_dp - dihedral_list(idihed,is)%dihedral_param(3),SP)
+                phi_step_sp = REAL(theta_step * dihedral_list(idihed,is)%dihedral_param(2),SP)
+        CASE(int_harmonic)
+                last_phi = REAL(last_phi_dp - dihedral_list(idihed,is)%dihedral_param(2) - twoPI,SP)
+                phi_step_sp = theta_step_sp
+        END SELECT
+        a0 = dihedral_list(idihed,is)%dihedral_param_sp(1)
+        IF (l_widom_cells) THEN
+                DO itrial = 1, n_good_trials-1
+                        phi_vec(itrial) = (which_good_trials(n_good_trials)-which_good_trials(itrial)) * &
+                                phi_step_sp + last_phi
+                END DO
         ELSE
-           nrg_inter_qq = nrg_inter_qq + E_inter_qq
-           CALL Compute_Molecule_Dihedral_Energy(this_im,is,e_dihed)
-
-           nrg_dihed(ii) = e_dihed
-
-           nrg(ii) = e_dihed + nrg_intra_vdw + nrg_intra_qq  + &          
-                nrg_inter_vdw + nrg_inter_qq - e_prev
-
-           IF (frag_list(ifrag,is)%ring) THEN
-              ! subtract the biasing energy used to sample intramolecular DOFs
-              nrg(ii) = nrg(ii) - nrg_ring_frag
-           END IF
-           
-           nrg_kBT = beta(this_box) * nrg(ii)
-           
-           IF ( nrg_kBT >= max_kBT) THEN
-              ! the energy is too high, set the weight to zero
-              weight(ii) = 0.0_DP
-              overlap_trial(ii) = .TRUE.
-           ELSE
-              weight(ii) = DEXP(-nrg_kBT)
-           END IF
+                DO itrial = 1, n_good_trials-1
+                        phi_vec(itrial) = (n_good_trials-itrial) * &
+                                phi_step_sp + last_phi
+                END DO
         END IF
-
-!        ! BEGIN DEBUGGING OUTPUT
-!        ! Write out the fragment coordinates for each trial position
-!        M_XYZ_unit = movie_xyz_unit + this_box
-!        DO j = 1, frag_list(ifrag,is)%natoms
-!           this_atom = frag_list(ifrag,is)%atoms(j)
-!           WRITE(M_XYZ_unit,*) &
-!              TRIM(nonbond_list(this_atom,is)%element) // &
-!              TRIM(int_to_string(ii)), & 
-!              these_atoms(this_atom)%rxp, &
-!              these_atoms(this_atom)%ryp, &
-!              these_atoms(this_atom)%rzp
-!        END DO
-!        IF (ii==1) THEN
-!           WRITE(*,'(A,X,I5)'), 'i_mcstep', i_mcstep
-!           WRITE(*,'(4(A12,X))') 'DIH' // TRIM(int_to_string(i)) // ':trial ', 'energy', 'weight', 'overlap'
-!        END IF
-!        WRITE(*,'(I12,X,E12.6,X,E12.6,X,L12)') ii, beta(this_box)*nrg(ii), weight(ii), overlap_trial(ii)
-!        ! END DEBUGGING OUTPUT
-                 
-        ! Track the cumulative weights for Golden sampling
-        IF ( ii > 1 ) weight(ii) = weight(ii-1) + weight(ii)
-
-!        WRITE(*,*) 'weight',overlap, ii, nrg_kBT
-!        WRITE(*,*) 'energy', e_prev, nrg(ii)
-           
-     END DO trial_loop
-
-     !**************************************************************************
-     ! Step 5) Select a trial dihedral using the weighted probabilities
-     !**************************************************************************
-     ! 
-     ! 
-
+        ! I ignore any constant terms in dihedral energy because only relative energy matters.
+        SELECT CASE(dihedral_list(idihed,is)%int_dipot_type)
+        CASE(int_charmm)
+                nrg_dihed_vec(1:n_good_trials) = nrg_dihed_vec(1:n_good_trials) + &
+                        a0*COS(phi_vec(1:n_good_trials))
+        CASE(int_harmonic)
+                DO itrial = 1, n_good_trials
+                        phi = ABS(phi_vec(itrial))
+                        IF (phi > PI_SP) phi = phi - twoPI_SP
+                        nrg_dihed_vec(itrial) = nrg_dihed_vec(itrial) + phi*phi*a0
+                END DO
+        END SELECT
+     END DO
+     ! Compute trial weights
+     !DIR$ LOOP COUNT = 8, 16
+     !DIR$ VECTOR ALIGNED
+     DO itrial = 1, n_good_trials_pad8
+        nrg_sp = nrg_sp_vec(itrial) + nrg_dihed_vec(itrial)
+        nrg_dp = REAL(nrg_sp,DP)
+        nrg_dp_vec(itrial) = nrg_dp
+        !IF (compatibility_mode) nrg_dp = nrg_dp - e_prev
+        weight(itrial) = EXP(-beta(this_box)*nrg_dp)
+     END DO
+     cweight = weight(1)
+     !DIR$ LOOP COUNT = 1, 2, 3, 4, 8
+     DO itrial = 2, n_good_trials
+        cweight = cweight + weight(itrial)
+        weight(itrial) = cweight
+     END DO
+     IF (compatibility_mode) THEN
+             IF (ALL(old_overlap_trial(1:kappa_dih)) .NEQV. (cweight == 0.0_DP)) THEN
+                     WRITE(*,*) cweight
+             END IF
+     END IF
      ! If the cumulative weight is 0, then all trial dihedrals had core overlap
      ! Reject the move if all trials tripped overlap
-     IF (ALL(overlap_trial)) THEN
-        cbmc_overlap = .TRUE.
-        RETURN
+     IF (cweight == 0.0_DP ) THEN
+             cbmc_overlap = .TRUE.
+             RETURN
      END IF
-
-
+     weightnorm(1:n_good_trials) = weight(1:n_good_trials) / cweight
+     oldweightnorm(1:kappa_dih) = oldweight(1:kappa_dih) / oldweight(kappa_dih)
      IF (del_flag) THEN
-
-        ! Use trial 1, which holds the current coordinates of the fragment
-        trial = 1
-
+             trial = 1
+             itrial = 1
+             IF (n_good_trials .NE. kappa_dih .AND. which_good_trials(1) .NE. 1) THEN
+                     !WRITE(*,*) "Warning! Deletion cbmc overlap!"
+                     cbmc_overlap = .TRUE.
+                     RETURN
+             END IF
+             IF (compatibility_mode) ii = 1
      ELSE
+             IF (compatibility_mode) THEN
+                     ran_no = rranf()
+                     prob_pick = ran_no*cweight
+             ELSE
+                     prob_pick = rranf() * cweight
+             END IF
+             DO itrial = 1, n_good_trials
+                IF (prob_pick < weight(itrial)) EXIT
+             END DO
+             ! The above loop should always exit "early", so itrial should never be > n_good_trials
+             ! However, maybe rounding could make prob_pick equal to cweight, so cap itrial at n_good_trials
+             itrial = MIN(itrial,n_good_trials)
+             trial = MERGE(itrial, which_good_trials(itrial), n_good_trials==kappa_dih)
+             IF (compatibility_mode) THEN
+                     prob_pick_compat = ran_no*oldweight(kappa_dih)
+                     DO ii = 1, kappa_dih
+                        IF (prob_pick_compat < oldweight(ii)) EXIT
+                     END DO
+                     IF (ii .NE. trial) THEN
+                             WRITE(*,*) 
+                             WRITE(*,*) ifrag_outer, ifrag, frag_connect, frag_total
+                             WRITE(*,*) trial, ii, prob_pick, prob_pick_compat
+                             WRITE(*,*) weightnorm
+                             WRITE(*,*) oldweightnorm
+                             WRITE(*,*) weight
+                             WRITE(*,*) oldweight
+                             WRITE(*,*) n_good_trials
+                             WRITE(*,*) nrg_dp_vec
+                             WRITE(*,*) nrg_dihed_vec
+                             WRITE(*,*) nrg_sp_vec
+                             WRITE(*,*) nrg
+                             WRITE(*,*) nrg_dihed - e_prev
+                             WRITE(*,*) overlap_trial
+                             WRITE(*,*) old_overlap_trial
+                             WRITE(*,*) nrg_sp_inter
+                             WRITE(*,*) nrg_sp_vec - nrg_sp_inter
+                             WRITE(*,*) infinity_sp, 1.0/infinity_sp
+                             WRITE(*,*) 
+                     END IF
+             END IF
+     END IF
+     ! place atoms for chosen trial
+     sinlintheta = species_list(is)%sincos_lintheta_dp(trial,1)
+     coslintheta = species_list(is)%sincos_lintheta_dp(trial,2)
+     sintheta = sinlintheta*costheta0 + coslintheta*sintheta0
+     costheta = coslintheta*costheta0 - sinlintheta*sintheta0
+     sincosmat(1,:) = (/ costheta, sintheta /)
+     sincosmat(2,:) = (/ -sintheta, costheta /)
+     new_rp(2:3,1:nfrag_atoms) = MATMUL(sincosmat,new_rp(2:3,1:nfrag_atoms))
+     new_rp(1:3,1:nfrag_atoms) = MATMUL(hanger_frag_connect,new_rp(1:3,1:nfrag_atoms))
+     DO j = 1, nfrag_atoms
+        these_atoms(atom_id(j))%rp(1:3) = new_rp(1:3,j) + these_atoms(anchor_frag_connect)%rp(1:3)
+     END DO
+     these_atoms%exist = combined_exist
+     IF (widom_active) THEN
+             ! Note, this part could be precomputed in the simulation setup instead, since the interfragment
+             !      intramolecular nonbonded pairs are the same for each run of this subroutine for a given species
+             !      unless you are doing a partial deletion and regrowth, in which case some of the pairs would be
+             !      unneeded, but they shouldn't make the computation wrong since the extra pairs would be the same for
+             !      the partial deletion and the regrowth, and the extra energies would cancel out when computing dE.
+             ! I just didn't take the time to do so.
+             DO ja = 1, natoms(is)
+                IF ((.NOT. grown_exist(ja)) .OR. ANY(ja == anchor_atoms)) CYCLE
+                interfrag_nonbond_pairlist(1,n_interfrag_nonbond_pairs+1:n_interfrag_nonbond_pairs+nfrag_atoms) = &
+                        SPREAD(ja,1,nfrag_atoms)
+                interfrag_nonbond_pairlist(2,n_interfrag_nonbond_pairs+1:n_interfrag_nonbond_pairs+nfrag_atoms) = &
+                        atom_id(1:nfrag_atoms)
+                n_interfrag_nonbond_pairs = n_interfrag_nonbond_pairs + nfrag_atoms
+             END DO
+     END IF
+     ! Recover the individual probability for the accepted trial
+     ln_pbias = ln_pbias - beta(this_box) * nrg_dp_vec(itrial) - LOG(cweight)
+     ! mark this fragment as placed
+     frag_placed(ifrag) = 1
+     ! Vectorized version end
+
+     IF (.NOT. widom_active) e_prev = nrg_dihed(ii)
+     IF (compatibility_mode) THEN
+             ln_pbias_compat = ln_pbias_compat - beta(this_box)*nrg(ii) - LOG(oldweight(kappa_dih))
+             !IF (frag_total > 1) THEN
+             !        WRITE(*,*) ifrag_outer, ifrag, ln_pbias, ln_pbias_compat
+             !END IF
+     END IF
 
-        ! Select a trial from the weighted distribution
-        prob_pick = rranf() * weight(kappa_dih)
-
-        DO ii = 1, kappa_dih
-
-           IF ( prob_pick < weight(ii) ) EXIT
-
-        END DO
+     ! Loop over the trial dihedrals
 
-        trial = ii
+     !**************************************************************************
+     ! Step 4) Compute the energy of the fragment 
+     !**************************************************************************
+     ! 
 
-     END IF
-     IF (trial == kappa_dih + 1) THEN
-             cbmc_overlap = .TRUE.
-             RETURN
-     END IF
+     !**************************************************************************
+     ! Step 5) Select a trial dihedral using the weighted probabilities
+     !**************************************************************************
+     ! 
+     ! 
 
-     ! Recover the individual probability for the accepted trial
-     ln_pbias = ln_pbias - beta(this_box) * nrg(trial) - DLOG(weight(kappa_dih))
      
-     ! Give the coordinates of this conformation to these_atoms
-     DO j = 1, frag_list(ifrag,is)%natoms
-        
-        this_atom = frag_list(ifrag,is)%atoms(j)
-        
-        IF (this_atom /= anchor_ifrag) THEN
-           IF (this_atom /= anchor_frag_connect) THEN
-              these_atoms(this_atom)%rxp = &
-                 config_temp_list(this_atom,trial)%rxp
-              these_atoms(this_atom)%ryp = &
-                 config_temp_list(this_atom,trial)%ryp
-              these_atoms(this_atom)%rzp = &
-                 config_temp_list(this_atom,trial)%rzp
-           END IF
-        END IF
-     END DO
-     
-     ! also store the total energy up to this point
-     e_prev =  nrg_dihed(trial)
      
-     ! mark this fragment as placed
-     frag_placed(ifrag) = 1
 
     
 
   END DO
-  IF (frag_total > 1 .AND. .NOT. full_cell_list_flag) THEN
+  CALL Set_CBMC_Flag(.FALSE.)
+  ! apply this part to other CBMC moves, not just Widom insertions, if we determine it's correct for them
+  IF (widom_active .AND. n_interfrag_nonbond_pairs > 0) THEN
+          DO i = 1, n_interfrag_nonbond_pairs
+                ia = interfrag_nonbond_pairlist(1,i)
+                ja = interfrag_nonbond_pairlist(2,i)
+                vec2 = these_atoms(ja)%rp(1:3) - these_atoms(ia)%rp(1:3)
+                rijsq_dp = DOT_PRODUCT(vec2,vec2)
+                get_vdw = rijsq_dp < rcut_vdwsq(this_box) .AND. ALL(nonbond_list((/ia,ja/),is)%atom_type_number .NE. 0)
+                get_qq = rijsq_dp < rcut_coulsq(this_box) .AND. ALL(nonbond_list((/ia,ja/),is)%charge .NE. 0.0_DP)
+                IF (.NOT. (get_vdw .OR. get_qq)) CYCLE
+                CALL Compute_AtomPair_Energy(vec2(1),vec2(2),vec2(3),rijsq_dp,is,this_im,ia,is,this_im,ja, &
+                        get_vdw,get_qq, Eij_intra_vdw, Eij_intra_qq, Eij_inter_vdw, Eij_inter_qq)
+                E_total = E_total + Eij_intra_vdw + Eij_intra_qq + Eij_inter_qq + Eij_inter_vdw
+          END DO
+  END IF
+  IF (widom_active .AND. ANY(l_skip_dihed_vec)) THEN
+          ! Previously indicated dihedral should be skipped early for biasing energy because it was already done
+          !    and there's no way it would be needed twice (at least with the present CBMC algorithms)
+          l_skip_dihed_vec = .NOT. l_skip_dihed_vec ! inverting l_skip_dihed_vec
+          ! l_skip_dihed_vec now indicates which dihedrals to skip when computing interfragment dihedral energy
+          !    because they aren't interfragment dihedrals
+          CALL Compute_Molecule_Dihedral_Energy(this_im,is,E_dihed,l_skip_dihed_vec)
+          E_total = E_total + E_dihed
+  END IF
+  !IF (frag_total > 1 .AND. .NOT. (full_cell_list_flag .AND. widom_active)) THEN
+  IF (.NOT. (full_cell_list_flag .AND. widom_active)) THEN
           CALL Get_COM(this_im,is)
           CALL Compute_Max_COM_Distance(this_im,is)
   END IF
@@ -2240,13 +3827,14 @@ SUBROUTINE Single_Fragment_Regrowth(this_im,is)
       
       this_atom = frag_list(1,is)%atoms(i)
       nl = (frag_position_library(frag_type)-1) + &
-                                           frag_list(1,is)%natoms*(this_fragment -1) + i      
-      these_atoms(this_atom)%rxp =  library_coords(nl)%rxp
-                                             !frag_coords(i,this_fragment,frag_type)%rxp
-      these_atoms(this_atom)%ryp =  library_coords(nl)%ryp
-                                             !frag_coords(i,this_fragment,frag_type)%ryp
-      these_atoms(this_atom)%rzp =  library_coords(nl)%rzp
-                                              !frag_coords(i,this_fragment,frag_type)%rzp
+                                           frag_list(1,is)%natoms*(this_fragment -1) + i
+      these_atoms(this_atom)%rp(1:3) = library_coords(1:3,nl) 
+      !these_atoms(this_atom)%rp(1) =  library_coords(nl)%rp(1)
+      !                                       !frag_coords(i,this_fragment,frag_type)%rp(1)
+      !these_atoms(this_atom)%rp(2) =  library_coords(nl)%rp(2)
+      !                                       !frag_coords(i,this_fragment,frag_type)%rp(2)
+      !these_atoms(this_atom)%rp(3) =  library_coords(nl)%rp(3)
+      !                                        !frag_coords(i,this_fragment,frag_type)%rp(3)
    END DO
 
 
@@ -2267,14 +3855,14 @@ SUBROUTINE Single_Fragment_Regrowth(this_im,is)
    DO i = 1, frag_list(1,is)%natoms
       this_atom = frag_list(1,is)%atoms(i)
 
-      these_atoms(this_atom)%rxp = these_atoms(this_atom)%rxp + &
-           this_molecule%xcom_old - this_molecule%xcom
+      these_atoms(this_atom)%rp(1) = these_atoms(this_atom)%rp(1) + &
+           this_molecule%rcom_old(1) - this_molecule%rcom(1)
 
-      these_atoms(this_atom)%ryp = these_atoms(this_atom)%ryp + &
-           this_molecule%ycom_old - this_molecule%ycom
+      these_atoms(this_atom)%rp(2) = these_atoms(this_atom)%rp(2) + &
+           this_molecule%rcom_old(2) - this_molecule%rcom(2)
 
-      these_atoms(this_atom)%rzp = these_atoms(this_atom)%rzp + &
-           this_molecule%zcom_old - this_molecule%zcom
+      these_atoms(this_atom)%rp(3) = these_atoms(this_atom)%rp(3) + &
+           this_molecule%rcom_old(3) - this_molecule%rcom(3)
 
    END DO
    CALL Get_COM(this_im,is)
diff --git a/Src/gcmc_control.f90 b/Src/gcmc_control.f90
index 4b84b68d..5162cf98 100755
--- a/Src/gcmc_control.f90
+++ b/Src/gcmc_control.f90
@@ -70,6 +70,8 @@ SUBROUTINE GCMC_Control
   ! Determine whether widom insertions are done and get relevant details if they are
   CALL Get_Widom_Info
 
+  CALL Get_Lookup_Info
+
   ! Load molecular conectivity and force field paramters. Note that Get_Nspecies 
   ! must be called before this routine.  
   CALL Get_Molecule_Info
@@ -135,8 +137,6 @@ SUBROUTINE GCMC_Control
 
   CALL Get_CBMC_Info
 
-  CALL Get_Lookup_Info
-
   CALL Setup_Atompair_Tables
 
 END SUBROUTINE GCMC_Control
diff --git a/Src/gemc_control.f90 b/Src/gemc_control.f90
index 6c38ecbe..05ebcfd7 100755
--- a/Src/gemc_control.f90
+++ b/Src/gemc_control.f90
@@ -63,6 +63,8 @@ SUBROUTINE GEMC_Control
   ! Determine whether widom insertions are done and get relevant details if they are
   CALL Get_Widom_Info
 
+  CALL Get_Lookup_Info
+
   ! Load molecular conectivity and force field paramters. Note that Get_Nspecies 
   ! must be called before this routine.  
   CALL Get_Molecule_Info
@@ -140,8 +142,6 @@ SUBROUTINE GEMC_Control
   ! angles exist
   CALL Get_Dihedral_Atoms_To_Place  
 
-  CALL Get_Lookup_Info
-
   CALL Setup_Atompair_Tables
 
 END SUBROUTINE GEMC_Control
diff --git a/Src/get_com.f90 b/Src/get_com.f90
index 8b3b4a12..ef7aa701 100755
--- a/Src/get_com.f90
+++ b/Src/get_com.f90
@@ -87,9 +87,9 @@ SUBROUTINE Get_COM(alive,is)
   
   total_mass = 0.0_DP
   
-  this_molecule%xcom = 0.0_DP
-  this_molecule%ycom = 0.0_DP
-  this_molecule%zcom = 0.0_DP
+  this_molecule%rcom(1) = 0.0_DP
+  this_molecule%rcom(2) = 0.0_DP
+  this_molecule%rcom(3) = 0.0_DP
   
   DO k = 1, natoms(is)
      
@@ -102,21 +102,21 @@ SUBROUTINE Get_COM(alive,is)
         
         total_mass = total_mass + this_mass
         
-        this_molecule%xcom = this_molecule%xcom + this_mass * &
-             these_atoms(k)%rxp
-        this_molecule%ycom = this_molecule%ycom + this_mass * &
-             these_atoms(k)%ryp
-        this_molecule%zcom = this_molecule%zcom + this_mass * &
-             these_atoms(k)%rzp
+        this_molecule%rcom(1) = this_molecule%rcom(1) + this_mass * &
+             these_atoms(k)%rp(1)
+        this_molecule%rcom(2) = this_molecule%rcom(2) + this_mass * &
+             these_atoms(k)%rp(2)
+        this_molecule%rcom(3) = this_molecule%rcom(3) + this_mass * &
+             these_atoms(k)%rp(3)
           
      END IF
      
      
   END DO
   
-  this_molecule%xcom = this_molecule%xcom / total_mass
-  this_molecule%ycom = this_molecule%ycom / total_mass
-  this_molecule%zcom = this_molecule%zcom / total_mass
+  this_molecule%rcom(1) = this_molecule%rcom(1) / total_mass
+  this_molecule%rcom(2) = this_molecule%rcom(2) / total_mass
+  this_molecule%rcom(3) = this_molecule%rcom(3) / total_mass
   
 END SUBROUTINE Get_COM
 
@@ -255,9 +255,9 @@ SUBROUTINE Compute_Max_Com_Distance(alive,is)
   
   ! obtain the com of the molecule
 
-  xcom = this_molecule%xcom
-  ycom = this_molecule%ycom
-  zcom = this_molecule%zcom
+  xcom = this_molecule%rcom(1)
+  ycom = this_molecule%rcom(2)
+  zcom = this_molecule%rcom(3)
 
   ! set the maximum distance initially to zero
 
@@ -272,9 +272,9 @@ SUBROUTINE Compute_Max_Com_Distance(alive,is)
 
      IF(these_atoms(iatom)%exist) THEN
         
-        dx = these_atoms(iatom)%rxp - xcom
-        dy = these_atoms(iatom)%ryp - ycom
-        dz = these_atoms(iatom)%rzp - zcom
+        dx = these_atoms(iatom)%rp(1) - xcom
+        dy = these_atoms(iatom)%rp(2) - ycom
+        dz = these_atoms(iatom)%rp(3) - zcom
         
         ! No need to apply periodic boundary conditions 
         
@@ -289,7 +289,7 @@ SUBROUTINE Compute_Max_Com_Distance(alive,is)
 
   ! store the maximum distance 
 
-  this_molecule%max_dcom = DSQRT(dmax)
+  this_molecule%rcom(4) = DSQRT(dmax)
   this_molecule%min_dcom = DSQRT(dmin)
 
 END SUBROUTINE Compute_Max_Com_Distance
diff --git a/Src/global_variables.f90 b/Src/global_variables.f90
index f61fe1cc..a707650a 100755
--- a/Src/global_variables.f90
+++ b/Src/global_variables.f90
@@ -85,7 +85,7 @@ MODULE Global_Variables
   INTEGER, PARAMETER :: sim_gemc_ig = 8
   INTEGER, PARAMETER :: sim_mcf = 9
   INTEGER, PARAMETER :: sim_pregen = 10
-  LOGICAL :: timed_run, openmp_flag, en_flag, verbose_log, input_is_logfile
+  LOGICAL :: timed_run, openmp_flag, en_flag, verbose_log, input_is_logfile, open_mc_flag
   CHARACTER(10) :: sim_length_units
   INTEGER (KIND=INT64):: steps_per_sweep
 
@@ -121,15 +121,20 @@ MODULE Global_Variables
   INTEGER, PARAMETER :: charge_ewald = 3
   INTEGER, PARAMETER :: charge_minimum = 4
   INTEGER, PARAMETER :: charge_dsf = 5
+  INTEGER, PARAMETER :: charge_sf = 6
+
+  LOGICAL :: cbmc_charge_sf_flag = .TRUE.
 
   REAL(DP), DIMENSION(:), ALLOCATABLE :: rcut_cbmc, rcut_cbmcsq
   REAL(DP), DIMENSION(:), ALLOCATABLE :: rcut_vdw, rcut_coul, ron_charmm, roff_charmm, rcut_max
   REAL(DP), DIMENSION(:), ALLOCATABLE :: ron_switch, roff_switch, roff_switch_sq, switch_factor1
   REAL(DP), DIMENSION(:), ALLOCATABLE :: switch_factor2, ron_switch_sq
-  REAL(DP), DIMENSION(:), ALLOCATABLE :: rcut_vdwsq, rcut_coulsq, ron_charmmsq, roff_charmmsq
+  REAL(DP), DIMENSION(:), ALLOCATABLE :: rcut_vdwsq, inv_rcut_vdwsq, rcut_coulsq, ron_charmmsq, roff_charmmsq
+  REAL(SP), DIMENSION(:), ALLOCATABLE :: rcut_vdwsq_sp, inv_rcut_vdwsq_sp
   REAL(DP), DIMENSION(:), ALLOCATABLE :: rcut9, rcut3
   REAL(DP), DIMENSION(:), ALLOCATABLE :: rcut_vdw3, rcut_vdw6
   REAL(DP) :: edens_cut, rcut_clus, rcut_low, rcut_lowsq
+  REAL(SP) :: sp_rcut_lowsq
   LOGICAL, DIMENSION(:), ALLOCATABLE :: l_half_len_cutoff
 
  ! Mixing Rules variables :
@@ -179,9 +184,13 @@ MODULE Global_Variables
  !***************************************************************
   !Conversion factors and constants
 
-  REAL(DP), PARAMETER :: PI=3.1415926536_DP
-  REAL(DP), PARAMETER :: twoPI = 6.2831853072_DP
-  REAL(DP), PARAMETER :: rootPI = 1.7724538509_DP
+  !REAL(DP), PARAMETER :: PI=3.1415926536_DP
+  !REAL(DP), PARAMETER :: twoPI = 6.2831853072_DP
+  !REAL(DP), PARAMETER :: rootPI = 1.7724538509_DP
+  !REAL(DP), PARAMETER :: halfPI = 0.5_DP*PI
+  !REAL(DP), PARAMETER :: threehalfPI_DP = 3.0_DP*halfPI
+  !REAL(SP), PARAMETER :: twoPI_SP = REAL(twoPI,SP)
+  !REAL(SP), PARAMETER :: PI_SP = REAL(PI,SP)
 
   !KBOLTZ is Boltzmann's constant in atomic units amu A^2 / (K ps^2)
   REAL(DP), PARAMETER :: kboltz = 0.8314472_DP
@@ -232,8 +241,8 @@ MODULE Global_Variables
   REAL(DP), PARAMETER :: errel = 1.0E-5_DP
 
   ! Parameter identifying number of trials
-
-  INTEGER :: kappa_ins, kappa_rot, kappa_dih
+  ! Moved to Species_Class as its attributes
+  !INTEGER :: kappa_ins, kappa_rot, kappa_dih, kappa_dih_pad8, kappa_dih_pad32
 
   ! Parameters identifying move in Ewald calculations
 
@@ -289,6 +298,8 @@ MODULE Global_Variables
   INTEGER, DIMENSION(:), ALLOCATABLE :: ndihedrals, nimpropers
   INTEGER, DIMENSION(:), ALLOCATABLE :: nfragments, fragment_bonds
   INTEGER, DIMENSION(:), ALLOCATABLE :: natoms_to_read
+  INTEGER :: max_max_molecules, sum_max_molecules
+  INTEGER :: max_max_molecules_p4, sum_max_molecules_p4
 
   ! array to hold the total number of molecules of each species in a given box
 
@@ -317,6 +328,7 @@ MODULE Global_Variables
   CHARACTER(23), DIMENSION(:), ALLOCATABLE :: atom_type_list
 
   INTEGER, DIMENSION(:), ALLOCATABLE :: nbr_vdw_params
+  INTEGER, DIMENSION(:), ALLOCATABLE :: n_vdw_p_list
 
   ! Information of the position line where starts the coordinates storage of
   ! each fragment type
@@ -392,7 +404,12 @@ MODULE Global_Variables
 
   ! Array for storing coordinates of fragments
   !TYPE(Frag_Library_Class), DIMENSION(:), ALLOCATABLE :: frag_library
-  TYPE(Library_Coords_Class), DIMENSION(:), ALLOCATABLE :: library_coords
+  !TYPE(Library_Coords_Class), DIMENSION(:), ALLOCATABLE :: library_coords
+  REAL(DP), DIMENSION(:,:), ALLOCATABLE :: library_coords
+  ! library_coords_dim1 is the size of library_coords in the first dimension
+  !      since 1, 2, and 3 on the first axis correspond to x, y, and z, it must be at least 3,
+  !      but 4 or 8 might be desirable for the sake of vectorization or alignment.
+  INTEGER, PARAMETER :: library_coords_dim1 = 4
 
   ! Array for storing the energy of each configuration of each fragment
   ! nrg_frag has dimension (number of fragment )
@@ -440,15 +457,7 @@ MODULE Global_Variables
   ! nvecs will have dimensions of nbr_boxes
   INTEGER, DIMENSION(:), ALLOCATABLE, TARGET :: nvecs
 
-  INTEGER, PARAMETER  :: maxk = 100000
-
-  ! Dimensions of (maxk, nbr_boxes)
-  REAL(DP), DIMENSION(:,:), ALLOCATABLE, TARGET :: hx, hy, hz, hsq, Cn
-
-  ! the following arrays will have dimensions of (MAXVAL(nvecs),nbr_boxes)
-
-  REAL(DP), DIMENSION(:,:), ALLOCATABLE, TARGET :: cos_sum, sin_sum, cos_sum_old, sin_sum_old
-  REAL(DP), DIMENSION(:,:), ALLOCATABLE, TARGET :: cos_sum_start, sin_sum_start
+  !INTEGER, PARAMETER  :: maxk = 100000
 
   !*********************************************************************************************************
   ! Information on trial and probabilities of trial moves
@@ -506,8 +515,6 @@ MODULE Global_Variables
   INTEGER, DIMENSION(:,:), ALLOCATABLE :: prop_per_file
   LOGICAL, DIMENSION(:,:), ALLOCATABLE :: first_open
 
-  LOGICAL :: cpcollect  !  logical determining if the chemical potential info is collected
-
   LOGICAL :: accept
 
   LOGICAL :: cbmc_flag, del_flag, phi_Flag, angle_Flag, imp_Flag
@@ -518,6 +525,8 @@ MODULE Global_Variables
 
   INTEGER :: imreplace, isreplace
 
+  INTEGER :: atompairdim, mol_dim
+
 
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 ! variables for the neighbor list
@@ -585,7 +594,7 @@ MODULE Global_Variables
 INTEGER :: n_lat_atoms
 
 !!! Pair_Nrg_Variables
-REAL(DP), ALLOCATABLE :: pair_vdw_temp(:), pair_qq_temp(:)
+REAL(DP), ALLOCATABLE :: pair_vdw_temp(:,:), pair_qq_temp(:,:)
 
 !!!! DSF variables
 REAL(DP), ALLOCATABLE, DIMENSION(:) :: alpha_dsf
@@ -609,15 +618,27 @@ MODULE Global_Variables
   ! sector_index_map is indexed by (x index, y index, z index, box index) to get sector index
   INTEGER, DIMENSION(:,:,:,:), ALLOCATABLE, TARGET :: sector_index_map, sector_index_map_cbmc, sector_index_map_full
   ! sector_n_atoms is indexed by (sector index) to get number of atoms in a sector
-  INTEGER, DIMENSION(:), ALLOCATABLE, TARGET :: sector_n_atoms, sector_n_atoms_cbmc, sector_n_atoms_full
+  INTEGER(4), DIMENSION(:), ALLOCATABLE, TARGET :: sector_n_atoms, sector_n_atoms_cbmc, sector_n_atoms_full
   ! sector_has_atoms is indexed by (x index, y index, z index, box index)
   LOGICAL, DIMENSION(:,:,:,:), ALLOCATABLE :: sector_has_atoms
 
   LOGICAL :: l_sectors, cbmc_cell_list_flag, full_cell_list_flag
-  ! sectorbound, length_cells, & cell_length_inv are indexed by (box dimension, box index)
+  ! sectorbound and length_cells are indexed by (box dimension, box index)
   INTEGER, DIMENSION(:,:), ALLOCATABLE :: sectorbound, sectorbound_cbmc, sectorbound_full
   INTEGER, DIMENSION(:,:), ALLOCATABLE :: length_cells, length_cells_cbmc, length_cells_full
-  REAL(DP), DIMENSION(:,:), ALLOCATABLE, TARGET :: cell_length_inv, cell_length_inv_cbmc, cell_length_inv_full
+  REAL(DP), DIMENSION(:,:,:), ALLOCATABLE, TARGET :: cell_length_inv, cell_length_inv_cbmc, cell_length_inv_full
+
+
+  ! indexed like (n_adj_cell_atoms,dimension in 1:4, x index, y index, z index, box index)
+  ! there is no 4th dimension of coordinates; this stores charge instead
+  REAL(SP), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: adj_cell_rsp, cbmc_cell_rsp
+  INTEGER(INT32), DIMENSION(:,:,:,:,:), ALLOCATABLE :: adj_cell_ti, cbmc_cell_ti, cbmc_cell_atomtypes
+  INTEGER(4), DIMENSION(:,:,:,:), ALLOCATABLE :: n_adj_cell_atoms, cbmc_cell_n_interact
+  INTEGER, DIMENSION(3) :: adj_cellmaxbound
+
+  INTEGER :: max_adj_cell_atoms, cbmc_max_interact
+  REAL(SP), DIMENSION(:,:), ALLOCATABLE :: cell_length_recip, real_length_cells
+
 
   INTEGER, DIMENSION(3) :: sectormaxbound, sectormaxbound_cbmc, sectormaxbound_full
 
@@ -654,12 +675,40 @@ MODULE Global_Variables
 !widom_timing  !$OMP THREADPRIVATE(n_clo, n_not_clo, n_nrg_overlap)
 !widom_timing  !$OMP THREADPRIVATE(cell_list_time, normal_overlap_time, non_overlap_time, nrg_overlap_time)
 
+  ! widom timing variables and parameters
+  LOGICAL, PARAMETER :: widom_timing = .TRUE.
+  REAL(DP) :: trial_loop_ins_time, cell_list_ins_time, cell_list_cbmc_nrg_ins_time
+  REAL(DP) :: noncell_cbmc_nrg_ins_time, rng_ins_time, cbmc_setup_ins_time
+  REAL(DP) :: cbmc_returnzone_ins_time, cbmc_endzone_ins_time
+  REAL(DP) :: cbmc_fragment_placement_time, cbmc_dih_time, bitcell_overlap_ins_time
+  REAL(DP) :: widom_ewald_recip_time, total_cbmc_time
+  INTEGER(INT64) :: cbmc_nonoverlap_ins_count, cbmc_dih_count, bitcell_overlap_ins_checks
+  INTEGER(INT64) :: cell_list_ins_checks, cell_list_cbmc_nrg_ins_checks, bitcell_overlap_ins_overlaps
+  INTEGER(INT64) :: nrg_ins_overlaps
+  !$OMP THREADPRIVATE(trial_loop_ins_time, cell_list_ins_time, cell_list_cbmc_nrg_ins_time)
+  !$OMP THREADPRIVATE(noncell_cbmc_nrg_ins_time,rng_ins_time,cbmc_setup_ins_time)
+  !$OMP THREADPRIVATE(cbmc_returnzone_ins_time, cbmc_endzone_ins_time)
+  !$OMP THREADPRIVATE(bitcell_overlap_ins_time, bitcell_overlap_ins_checks, bitcell_overlap_ins_overlaps)
+  !$OMP THREADPRIVATE(cbmc_nonoverlap_ins_count, cbmc_dih_count)
+  !$OMP THREADPRIVATE(cell_list_ins_checks, cell_list_cbmc_nrg_ins_checks)
+  !$OMP THREADPRIVATE(total_cbmc_time, widom_ewald_recip_time, nrg_ins_overlaps)
+  REAL(DP) :: trial_loop_ins_time_redux, cell_list_ins_time_redux, cell_list_cbmc_nrg_ins_time_redux
+  REAL(DP) :: noncell_cbmc_nrg_ins_time_redux, rng_ins_time_redux, cbmc_setup_ins_time_redux
+  REAL(DP) :: cbmc_returnzone_ins_time_redux, cbmc_endzone_ins_time_redux
+  REAL(DP) :: cbmc_fragment_placement_time_redux, cbmc_dih_time_redux, bitcell_overlap_ins_time_redux
+  INTEGER(INT64) :: cbmc_nonoverlap_ins_count_redux, cbmc_dih_count_redux, bitcell_overlap_ins_checks_redux
+  INTEGER(INT64) :: cell_list_ins_checks_redux, cell_list_cbmc_nrg_ins_checks_redux, bitcell_overlap_ins_overlaps_redux
+  REAL(DP) :: noncbmc_time_total, total_cbmc_time_redux, widom_ewald_recip_time_redux, nrg_ins_overlaps_redux
+
   !!! atompair energy table global variables
   INTEGER :: atompair_nrg_res
+  REAL(SP) :: atompair_nrg_res_sp
   LOGICAL :: precalc_atompair_nrg
   REAL(DP), DIMENSION(:,:,:,:), ALLOCATABLE, TARGET :: atompair_nrg_table
+  REAL(SP), DIMENSION(:,:,:), ALLOCATABLE :: atompair_nrg_table_reduced
   REAL(DP), DIMENSION(:,:,:,:), ALLOCATABLE, TARGET :: typepair_nrg_table
-  REAL(DP) :: rsq_step
+  REAL(DP) :: rsq_step, inv_rsq_step
+  REAL(SP) :: inv_rsq_step_sp
   REAL(DP) :: rsq_shifter
   INTEGER, DIMENSION(:), ALLOCATABLE :: typepair_solute_indices, typepair_solvent_indices
   INTEGER, DIMENSION(:), ALLOCATABLE :: solute_atomtypes, solvent_atomtypes
@@ -675,6 +724,10 @@ MODULE Global_Variables
   REAL(DP), DIMENSION(:,:,:,:), ALLOCATABLE, TARGET :: rsqmin_atompair_w_sum
   INTEGER(KIND=INT64), DIMENSION(:,:,:,:), ALLOCATABLE, TARGET :: rsqmin_atompair_freq
   REAL(DP), DIMENSION(:,:,:), ALLOCATABLE, TARGET :: atompair_rminsq_table
+  REAL(SP), DIMENSION(:,:,:), ALLOCATABLE :: sp_atompair_rminsq_table
+  REAL(DP), DIMENSION(:,:), ALLOCATABLE :: solvent_max_rminsq
+  REAL(DP), DIMENSION(:,:,:), ALLOCATABLE :: solvent_min_rminsq
+  REAL(SP), DIMENSION(:,:), ALLOCATABLE :: solvent_max_rminsq_sp
   INTEGER, DIMENSION(:), ALLOCATABLE :: typepair_wsolute_indices, wsolute_atomtypes
   REAL(DP) :: maxrminsq, rsqmin_step, rsqmin_shifter
   INTEGER :: rsqmin_res, wsolute_ntypes, wsolute_maxind
@@ -687,8 +740,103 @@ MODULE Global_Variables
   !
   REAL(DP), DIMENSION(0:1000)  :: type_charge_min, type_charge_max
   REAL(DP), DIMENSION(:,:), ALLOCATABLE, TARGET :: rminsq_table
+  REAL(SP), DIMENSION(:,:), ALLOCATABLE :: sp_rminsq_table
   REAL(DP) :: U_max_base, max_rmin
   LOGICAL :: calc_rmin_flag
+  REAL(DP), DIMENSION(:), ALLOCATABLE :: atomtype_max_rminsq
+  REAL(DP), DIMENSION(:,:), ALLOCATABLE :: atomtype_min_rminsq
+  REAL(SP), DIMENSION(:), ALLOCATABLE :: atomtype_max_rminsq_sp
+
+
+  INTEGER, DIMENSION(:,:), ALLOCATABLE :: nlive
+
+  REAL(DP), DIMENSION(:,:,:,:), ALLOCATABLE :: ppvdwp_table, ppvdwp_table2
+  REAL(SP), DIMENSION(:,:,:,:), ALLOCATABLE :: ppvdwp_table_sp, ppvdwp_table2_sp
+  TYPE(VdW256), DIMENSION(:,:,:), ALLOCATABLE :: ppvdwp_list
+  LOGICAL :: l_nonuniform_exponents
+
+  LOGICAL, PARAMETER :: l_not_all_live = .FALSE.
+
+  INTEGER :: global_nthreads
+
+  INTEGER, DIMENSION(:,:,:), ALLOCATABLE :: chunksize_array, nthreads_used_array
+  INTEGER, DIMENSION(:,:), ALLOCATABLE :: chunks_set_nmols
+
+  LOGICAL :: l_debug_print
+
+  REAL(DP), DIMENSION(:), ALLOCATABLE :: zero_field, rijsq_field
+  INTEGER, DIMENSION(:), ALLOCATABLE :: vec123
+  REAL(DP), DIMENSION(:,:,:), ALLOCATABLE :: live_xcom, live_ycom, live_zcom, live_max_dcom
+  !TYPE(Atom256), DIMENSION(:,:,:,:), ALLOCATABLE :: live_atom_list
+  REAL(DP), DIMENSION(:,:,:,:,:), ALLOCATABLE :: live_atom_rsp
+  LOGICAL, DIMENSION(:,:,:,:), ALLOCATABLE :: live_atom_exist
+  INTEGER :: maxnmols, maxboxnatoms
+  LOGICAL :: l_not_all_exist
+
+
+
+  LOGICAL :: bitcell_flag
+  REAL(DP) :: min_ideal_bitcell_length
+  INTEGER :: solvents_or_types_maxind
+
+
+  ! Moved to Species_Class as its attributes
+  !INTEGER :: kappa_ins_pad8, kappa_ins_pad64
+
+  LOGICAL :: l_zerotype_present
+
+
+
+
+  ! Moved to Species_Class as its attributes
+  !REAL(DP), DIMENSION(:,:), ALLOCATABLE :: sincos_lintheta_dp
+  !REAL(SP), DIMENSION(:,:), ALLOCATABLE :: sincos_lintheta_sp
+
+
+  ! Use these parameters below to efficiently round positive integers up (not down) to the nearest multiple of 8, 16, etc.
+  ! If the original integer is already a multiple of 8, 16, etc., the answer is the same as the original number.
+  ! It actually works as long as the correct answer isn't negative, even if the original integer is 0 or slightly negative
+  ! Example: n_pad8 = IAND(n+7,pad8mask), n_pad64 = IAND(n+63,pad64mask)
+  ! In those examples, n_pad8 is a multiple of 8, n_pad64 is a multiple of 64, and n is the original number to be "padded"
+  ! Note that 7 is equivalent to MASKR(3) and 63 is equivalent to MASKR(6)
+  ! This technique only works for padding to positive multiples of power of 2
+  INTEGER(INT32), PARAMETER :: pad8mask = NOT(MASKR(3,INT32))
+  INTEGER(INT32), PARAMETER :: pad16mask = NOT(MASKR(4,INT32))
+  INTEGER(INT32), PARAMETER :: pad32mask = NOT(MASKR(5,INT32))
+  INTEGER(INT32), PARAMETER :: pad64mask = NOT(MASKR(6,INT32))
+
+
+  INTEGER(INT64), PARAMETER :: recip_sqrt_magic_number = INT(Z'5FE6EB50C7B537A9',INT64)
+
+  INTEGER :: nspecies_present
+  INTEGER, DIMENSION(:), ALLOCATABLE :: which_species_present
+
+  INTEGER, DIMENSION(3) :: dummy3vec
+
+
+  INTEGER, DIMENSION(:), ALLOCATABLE :: which_solvent_atomtypes, which_solvent_atomtypes_inv
+  INTEGER, DIMENSION(:), ALLOCATABLE :: which_wsolute_atomtypes, which_wsolute_atomtypes_inv
+  INTEGER :: n_solvent_atomtypes, n_wsolute_atomtypes
+
+
+  INTEGER :: n_big_atoms
+
+  TYPE(Cavity_Data_Class), DIMENSION(:,:), ALLOCATABLE :: cavdatalist
+
+  LOGICAL :: cavity_biasing_flag
+
+  LOGICAL :: early_end
+
+  ! Use lossless compression for vector of cavity voxel locations.
+  ! This can reduce the required memory and probably improve cache hit rate and memory access speed.
+  LOGICAL, PARAMETER :: l_compress = .TRUE.
+  ! l_vectorized controls whether intermolecular pairwise energy calculations are vectorized
+  ! compatibility_mode attempts to emulate prior CBMC implementation results, although
+  ! having it .FALSE. should still yield results that are just as valid.
+  ! At time of writing, compatibility_mode also results in printing values when the new implementation (with some behaviors modified
+  ! to try to emulate old results better)
+  ! still results in a different dihedral trial being chosen than with the more extensive emulation of the old implementation.
+  LOGICAL, PARAMETER :: l_vectorized = .TRUE., compatibility_mode = .TRUE.
 
 END MODULE Global_Variables
 
diff --git a/Src/input_routines.f90 b/Src/input_routines.f90
index 9ffaa7c7..9b63ed4a 100755
--- a/Src/input_routines.f90
+++ b/Src/input_routines.f90
@@ -306,6 +306,7 @@ SUBROUTINE Get_Sim_Type
 
         line_nbr = line_nbr + 1
         CALL Parse_String(inputunit,line_nbr,1,nbr_entries,line_array,ierr)
+        open_mc_flag = .FALSE.
 
         IF(line_array(1) == 'nvt' .OR. line_array(1) == 'NVT' .OR. &
            line_array(1) == 'nvt_mc' .OR. line_array(1) == 'NVT_MC') THEN
@@ -321,12 +322,15 @@ SUBROUTINE Get_Sim_Type
         ELSEIF(line_array(1) == 'gemc' .OR. line_array(1) == 'GEMC') THEN
            sim_type = 'GEMC'
            int_sim_type = sim_gemc
+           open_mc_flag = .TRUE.
         ELSEIF(line_array(1) == 'gemc_npt' .OR. line_array(1) == 'GEMC_NPT') THEN
            sim_type = 'GEMC_NPT'
            int_sim_type = sim_gemc_npt
+           open_mc_flag = .TRUE.
         ELSEIF(line_array(1) == 'gcmc' .OR. line_array(1) == 'GCMC') THEN
            sim_type = 'GCMC'
            int_sim_type = sim_gcmc
+           open_mc_flag = .TRUE.
         ELSEIF(line_array(1) == 'fragment' .OR. &
                line_array(1) == 'nvt_mc_fragment' .OR. line_array(1) == 'NVT_MC_Fragment') THEN
            sim_type = 'NVT_MC_Fragment'
@@ -384,7 +388,7 @@ SUBROUTINE Get_Pair_Style
 !                 This will alert the code if pair interaction energy arrays
 !                 need to be stored.
 !******************************************************************************
-  INTEGER :: ierr,line_nbr,nbr_entries, iassign, ibox, k
+  INTEGER :: ierr,line_nbr,nbr_entries, iassign, ibox, k, is
   CHARACTER(STRING_LEN) :: line_string, line_array(60)
 
   REAL(DP), ALLOCATABLE :: ewald_tol(:)
@@ -408,6 +412,8 @@ SUBROUTINE Get_Pair_Style
 
   ALLOCATE(l_half_len_cutoff(nbr_boxes))
   l_half_len_cutoff = .FALSE.
+  ALLOCATE(n_vdw_p_list(nbr_boxes))
+  n_vdw_p_list = 0
 
   ! vdw style
   line_nbr = 0
@@ -436,6 +442,7 @@ SUBROUTINE Get_Pair_Style
            ! the sum method
 
            IF ( line_array(1) == 'LJ' .OR. line_array(1) == 'Lj' .OR. line_array(1) == 'lj' ) THEN
+              n_vdw_p_list(ibox) = 2
               vdw_style(ibox) = 'LJ'
               int_vdw_style(ibox) = vdw_lj
               WRITE(logunit,'(A,2x,A,A,I3)') 'VDW style used is: ',vdw_style(ibox), 'in box:', ibox
@@ -535,6 +542,7 @@ SUBROUTINE Get_Pair_Style
               ENDIF
 
            ELSEIF ( line_array(1) == 'MIE' .OR. line_array(1) == 'Mie' .OR. line_array(1) == 'mie' ) THEN
+              n_vdw_p_list(ibox) = 4
               vdw_style(ibox) = 'MIE'
               int_vdw_style(ibox) = vdw_mie
               WRITE(logunit,'(A,2x,A,A,I3)') 'VDW style used is: ',vdw_style(ibox), 'in box:', ibox
@@ -873,6 +881,10 @@ SUBROUTINE Get_Pair_Style
      ENDIF
 
   END DO
+  rcut_vdwsq = rcut_vdw*rcut_vdw
+  inv_rcut_vdwsq = 1.0_DP/rcut_vdwsq
+  rcut_vdwsq_sp = REAL(rcut_vdwsq,SP)
+  inv_rcut_vdwsq_sp = REAL(inv_rcut_vdwsq,SP)
 
   !Now determine the mixing rule to use
    CALL Get_Mixing_Rules
@@ -1192,10 +1204,15 @@ SUBROUTINE Get_Molecule_Info
   ! the number of molecules of each species in a simulation at the end of a step is still capped at max_molecules - tp_correction
   max_molecules = max_molecules + tp_correction
 
+  sum_max_molecules = SUM(max_molecules)
+  sum_max_molecules_p4 = IAND(sum_max_molecules+padconst_8byte,padmask_8byte)
+  max_max_molecules = MAXVAL(max_molecules)
+  max_max_molecules_p4 = IAND(max_max_molecules+padconst_8byte,padmask_8byte)
+
   ! Allocate arrays that depend on max_molecules, natoms, and nspecies
   ! N.B.: MAXVAL instrinsic function selects the largest value from an array
 
-  ALLOCATE( atom_list(MAXVAL(natoms), MAXVAL(max_molecules), nspecies), Stat = AllocateStatus )
+  ALLOCATE( atom_list(MAXVAL(natoms), max_max_molecules, nspecies), Stat = AllocateStatus )
   IF (AllocateStatus /= 0) THEN
      write(*,*)'memory could not be allocated for atom_list array'
      write(*,*)'stopping'
@@ -1250,6 +1267,8 @@ SUBROUTINE Get_Molecule_Info
      STOP
   END IF
   uncombined_dihedral_list%l_rb_formatted = .FALSE.
+  CALL dihedral_list%init
+  CALL uncombined_dihedral_list%init
 
   ALLOCATE( improper_list(MAXVAL(nimpropers), nspecies), Stat = AllocateStatus )
   IF (AllocateStatus /= 0) THEN
@@ -1258,14 +1277,14 @@ SUBROUTINE Get_Molecule_Info
      STOP
   END IF
 
-  ALLOCATE( molecule_list(MAXVAL(max_molecules), nspecies), Stat = AllocateStatus )
+  ALLOCATE( molecule_list(max_max_molecules, nspecies), Stat = AllocateStatus )
   IF (AllocateStatus /= 0) THEN
      write(*,*)'memory could not be allocated for molecule_list array'
      write(*,*)'stopping'
      STOP
   END IF
 
-  ALLOCATE( locate(MAXVAL(max_molecules),nspecies,0:nbr_boxes), Stat = AllocateStatus )
+  ALLOCATE( locate(max_max_molecules_p4,nspecies,0:nbr_boxes), Stat = AllocateStatus )
   IF (AllocateStatus /= 0) THEN
      write(*,*)'memory could not be allocated for locate array'
      write(*,*)'stopping'
@@ -1273,14 +1292,14 @@ SUBROUTINE Get_Molecule_Info
   END IF
 
   IF (l_pair_nrg) THEN
-     ALLOCATE( pair_nrg_vdw(SUM(max_molecules),SUM(max_molecules)), Stat = AllocateStatus)
+     ALLOCATE( pair_nrg_vdw(sum_max_molecules_p4,sum_max_molecules), Stat = AllocateStatus)
      IF (AllocateStatus /= 0 ) THEN
         write(*,*) 'memmory could not be allocated for pair_nrg_vdw array'
         write(*,*) 'aborting'
         STOP
      END IF
 
-     ALLOCATE( pair_nrg_qq(SUM(max_molecules),SUM(max_molecules)), Stat = AllocateStatus)
+     ALLOCATE( pair_nrg_qq(sum_max_molecules_p4,sum_max_molecules), Stat = AllocateStatus)
      IF (AllocateStatus /= 0 ) THEN
         write(*,*) 'memmory could not be allocated for pair_nrg_qq array'
         write(*,*) 'aborting'
@@ -1299,7 +1318,7 @@ SUBROUTINE Get_Molecule_Info
 
   max_index = MAX(MAXVAL(nbonds),MAXVAL(nangles),MAXVAL(ndihedrals),MAXVAL(nimpropers))
 
-  ALLOCATE( internal_coord_list(max_index, MAXVAL(max_molecules), nspecies), Stat = AllocateStatus )
+  ALLOCATE( internal_coord_list(max_index, max_max_molecules, nspecies), Stat = AllocateStatus )
   IF (AllocateStatus /= 0) THEN
      write(*,*)'memory could not be allocated for internal_coord_list array'
      write(*,*)'stopping'
@@ -1390,6 +1409,10 @@ SUBROUTINE Get_Molecule_Info
         END IF
      END IF
   END DO
+  species_list(1)%superlocate_base = 0
+  DO is = 2, nspecies
+       species_list(is)%superlocate_base = SUM(max_molecules(1:is-1))
+  END DO
 
   WRITE(logunit,'(A80)') '********************************************************************************'
 
@@ -2348,6 +2371,7 @@ SUBROUTINE Get_Dihedral_Info(is)
         END DO
         species_list(is)%ndihedrals_uncombined = ndihedrals(is)
         ndihedrals(is) = n_combined_dihedrals
+        CALL dihedral_list(1:species_list(is)%ndihedrals_energetic,is)%SP_Convert
 
         EXIT
 
@@ -2795,7 +2819,7 @@ SUBROUTINE Get_Fragment_Info(is)
            END DO
 
            IF (iatoms_bond >= 2 ) THEN
-              ! this atom is connected to more than two bonds
+              ! this atom is connected to at least two bonds
               ! in the fragment and is an anchor
               nanchors = nanchors + 1
               anchor_id(nanchors) = i_atom
@@ -3310,7 +3334,7 @@ SUBROUTINE Get_Fragment_Coords
 !******************************************************************************
 
   INTEGER :: nfrag_types, this_fragment, is, ifrag, ifrag_type, this_config
-  INTEGER :: iconfig, ia, this_atom, ntcoords,  nl, nfl, aux
+  INTEGER :: iconfig, ia, this_atom, ntcoords,  nl, nfl, aux, nl_base
 
   REAL(DP) :: x_this, y_this, z_this
   REAL(DP) :: this_temperature, this_nrg
@@ -3401,7 +3425,7 @@ SUBROUTINE Get_Fragment_Coords
     END DO
 
 
-    ALLOCATE(library_coords(ntcoords),STAT = AllocateStatus)
+    ALLOCATE(library_coords(library_coords_dim1,ntcoords),STAT = AllocateStatus)
       IF (Allocatestatus /= 0 ) THEN
          err_msg = ''
          err_msg(1) = 'Error allocating library_coords'
@@ -3439,12 +3463,13 @@ SUBROUTINE Get_Fragment_Coords
 
   ! Load coordinates
 
-  library_coords(:)%rxp = 0.0_DP
-  library_coords(:)%ryp = 0.0_DP
-  library_coords(:)%rzp = 0.0_DP
-  !frag_library(:)%frag_coords(:,:)%rxp = 0.0_DP
-  !frag_library(:)%frag_coords(:,:)%ryp = 0.0_DP
-  ! frag_library(:)%frag_coords(:,:)%rzp = 0.0_DP
+  !library_coords(:)%rp(1) = 0.0_DP
+  !library_coords(:)%rp(2) = 0.0_DP
+  !library_coords(:)%rp(3) = 0.0_DP
+  library_coords = 0.0_DP
+  !frag_library(:)%frag_coords(:,:)%rp(1) = 0.0_DP
+  !frag_library(:)%frag_coords(:,:)%rp(2) = 0.0_DP
+  ! frag_library(:)%frag_coords(:,:)%rp(3) = 0.0_DP
 
 
 
@@ -3469,19 +3494,22 @@ SUBROUTINE Get_Fragment_Coords
               ! read in the energy of the fragment
               READ(10,*) this_temperature, this_nrg
               nrg_frag(ifrag_type)%this_config_energy(iconfig) = this_nrg
-              ! read coordinates
+              nl_base = (frag_position_library(ifrag_type)-1)+ &
+                        (iconfig-1)*natoms_this_frag(ifrag_type)
               DO ia = 1, frag_list(ifrag,is)%natoms
-
-                 READ(10,*) symbol, x_this, y_this, z_this
-      !           frag_library(ifrag_type)%frag_coords(ia,iconfig)%rxp = x_this
-      !           frag_library(ifrag_type)%frag_coords(ia,iconfig)%ryp = y_this
-      !           frag_library(ifrag_type)%frag_coords(ia,iconfig)%rzp = z_this
-                  nl = (frag_position_library(ifrag_type)-1)+ &
-                            (iconfig-1)*natoms_this_frag(ifrag_type) +ia
-                  library_coords(nl)%rxp = x_this
-                  library_coords(nl)%ryp = y_this
-                  library_coords(nl)%rzp = z_this
+                nl = nl_base + ia
+                READ(10,*) symbol, library_coords(1,nl), library_coords(2,nl), library_coords(3,nl)
               END DO
+              ! read coordinates
+              !DO ia = 1, frag_list(ifrag,is)%natoms
+
+              !   READ(10,*) symbol, x_this, y_this, z_this
+              !    nl = (frag_position_library(ifrag_type)-1)+ &
+              !              (iconfig-1)*natoms_this_frag(ifrag_type) +ia
+              !    library_coords(nl)%rp(1) = x_this
+              !    library_coords(nl)%rp(2) = y_this
+              !    library_coords(nl)%rp(3) = z_this
+              !END DO
            END DO
 
            WRITE(logunit,*) TRIM(res_file(ifrag,is))
@@ -4000,6 +4028,7 @@ SUBROUTINE Get_Box_Info
      err_msg(1) = 'Memory could not be allocated for nmols'
      CALL Clean_Abort(err_msg,'Get_Box_Info')
   END IF
+  ALLOCATE(nlive(nspecies,nbr_boxes))
 
   ALLOCATE(vdw_style(nbr_boxes) , charge_style(nbr_boxes))
   ALLOCATE(vdw_sum_style(nbr_boxes) , charge_sum_style(nbr_boxes))
@@ -4011,7 +4040,8 @@ SUBROUTINE Get_Box_Info
   ALLOCATE(rcut_vdw(nbr_boxes) , rcut_coul(nbr_boxes))
   ALLOCATE(ron_charmm(nbr_boxes) , roff_charmm(nbr_boxes))
   ALLOCATE(ron_switch(nbr_boxes) , roff_switch(nbr_boxes))
-  ALLOCATE(rcut_max(nbr_boxes), rcut_vdwsq(nbr_boxes))
+  ALLOCATE(rcut_max(nbr_boxes), rcut_vdwsq(nbr_boxes), rcut_vdwsq_sp(nbr_boxes), inv_rcut_vdwsq_sp(nbr_boxes))
+  ALLOCATE(inv_rcut_vdwsq(nbr_boxes))
   ALLOCATE(ron_switch_sq(nbr_boxes) , roff_switch_sq(nbr_boxes))
   ALLOCATE(ron_charmmsq(nbr_boxes) , roff_charmmsq(nbr_boxes))
   ALLOCATE(switch_factor1(nbr_boxes) , switch_factor2(nbr_boxes))
@@ -5593,14 +5623,17 @@ SUBROUTINE Get_Widom_Info
 END SUBROUTINE Get_Widom_Info
 
 SUBROUTINE Get_Lookup_Info
-        INTEGER :: line_nbr, ierr, max_atoms
-        CHARACTER(STRING_LEN) :: line_string
+        INTEGER :: line_nbr, ierr, max_atoms, nbr_entries
+        CHARACTER(STRING_LEN) :: line_string, line_array(60)
         REWIND(inputunit)
         line_nbr = 0
         line_string = ""
         l_sectors = .FALSE.
         cbmc_cell_list_flag = .FALSE.
         full_cell_list_flag = .FALSE.
+        bitcell_flag = .FALSE.
+        cavity_biasing_flag = .FALSE.
+        min_ideal_bitcell_length = 0.0_DP
         DO
                 line_nbr = line_nbr + 1
                 CALL Read_String(inputunit,line_string,ierr)
@@ -5630,36 +5663,58 @@ SUBROUTINE Get_Lookup_Info
                                 line_string(1:4) == 'full')) RETURN
                         l_sectors = .TRUE.
                         ALLOCATE(sectorbound(3,nbr_boxes))
+                        ALLOCATE(cell_length_recip(3,nbr_boxes))
+                        ALLOCATE(real_length_cells(3,nbr_boxes))
                         ALLOCATE(length_cells(3,nbr_boxes))
-                        ALLOCATE(cell_length_inv(3,nbr_boxes))
+                        ALLOCATE(cell_length_inv(3,3,nbr_boxes))
                         max_occ_sectors = 0
                         sectorbound = 0
                         sectormaxbound = 0
                         length_cells = 0
                         max_sector_natoms = 1
+                        WRITE(logunit,*) " overlap cell neighbor list method enabled"
                         IF (line_string(1:4) ==  'cbmc' .OR. &
                                 line_string(1:4) == 'full') THEN
                                 cbmc_cell_list_flag = .TRUE.
                                 ALLOCATE(sectorbound_cbmc(3,nbr_boxes))
                                 ALLOCATE(length_cells_cbmc(3,nbr_boxes))
-                                ALLOCATE(cell_length_inv_cbmc(3,nbr_boxes))
+                                ALLOCATE(cell_length_inv_cbmc(3,3,nbr_boxes))
                                 max_occ_sectors_cbmc = 0
                                 sectorbound_cbmc = 0
                                 sectormaxbound_cbmc = 0
                                 length_cells_cbmc = 0
                                 max_sector_natoms_cbmc = 1
+                                WRITE(logunit,*) " CBMC cell neighbor list method enabled"
                         END IF
                         IF (line_string(1:4) ==  'full') THEN
                                 full_cell_list_flag = .TRUE.
                                 ALLOCATE(sectorbound_full(3,nbr_boxes))
                                 ALLOCATE(length_cells_full(3,nbr_boxes))
-                                ALLOCATE(cell_length_inv_full(3,nbr_boxes))
+                                ALLOCATE(cell_length_inv_full(3,3,nbr_boxes))
                                 max_occ_sectors_full = 0
                                 sectorbound_full = 0
                                 sectormaxbound_full = 0
                                 length_cells_full = 0
                                 max_sector_natoms_full = 1
                         END IF
+                        line_nbr = line_nbr + 1
+                        CALL Parse_String(inputunit,line_nbr,0,nbr_entries,line_array,ierr)
+                        IF (nbr_entries < 1) RETURN
+                        SELECT CASE (line_array(1))
+                        CASE ("bit_cell", "bitcell", "bit_cell_overlap", "bitcell_overlap", "bovine", "BOVINE", "voxel",&
+                                        "bit_voxel", "cavity_biasing", "cavbias")
+                                bitcell_flag = .TRUE.
+                                WRITE(logunit,*) " BOVINE method enabled"
+                                IF (nbr_entries > 1) min_ideal_bitcell_length = String_To_Double(line_array(2))
+                                IF (line_array(1)(1:3) == "cav") THEN
+                                        cavity_biasing_flag = .TRUE.
+                                        WRITE(logunit,*) " Cavity biasing enabled"
+                                END IF
+                        CASE DEFAULT
+                                err_msg = ''
+                                err_msg(1) = 'Entry 1 on line ' // Int_To_String(line_nbr) // ' of the input file is invalid.'
+                                CALL clean_abort(err_msg,'Get_Lookup_Info')
+                        END SELECT
                         RETURN
                 END IF
         END DO
@@ -5839,7 +5894,7 @@ SUBROUTINE Get_Pregen_Info
                                                         TRIM(pregen_xyz_filenames(ibox)) // &
                                                         ' given as entry  ' // TRIM(Int_To_String(xyz_pos)) // ' on line number ' // &
                                                         TRIM(Int_To_String(line_nbr)) // ' of the input file'
-                                                err_msg(2) = 'Verify that xyz file ' // TRIM(pregen_xyz_filenames(ibox)) // 'exists'
+                                                err_msg(2) = 'Verify that xyz file ' // TRIM(pregen_xyz_filenames(ibox)) // ' exists'
                                                 CALL clean_abort(err_msg,'Get_Pregen_Info')
                                         END IF
                                         pregen_H_filenames(ibox) = line_array(H_pos)
@@ -5936,7 +5991,8 @@ SUBROUTINE Get_CBMC_Info
   INTEGER :: ibox, is
   INTEGER :: ierr, line_nbr, nbr_entries
   CHARACTER(STRING_LEN) :: line_string,line_array(60)
-  LOGICAL :: need_kappa_ins, need_kappa_dih
+  REAL(DP) :: theta_step, theta
+  INTEGER :: i
 
 !******************************************************************************
   WRITE(logunit,*)
@@ -5946,35 +6002,28 @@ SUBROUTINE Get_CBMC_Info
   REWIND(inputunit)
   ierr = 0
   line_nbr = 0
+  cbmc_charge_sf_flag = .FALSE.
 
-  kappa_ins = 0
-  kappa_rot = 0
-  kappa_dih = 0
-  need_kappa_ins = .FALSE.
-  need_kappa_dih = .FALSE.
   rcut_CBMC(:) = 0.0_DP
-
-  DO is = 1, nspecies
-     species_list(is)%l_coul_cbmc = .TRUE.
-  END DO
+  species_list%nfragments = nfragments
 
   ! Are CBMC parameters needed?
   DO ibox = 1, nbr_boxes
     IF (start_type(ibox) == 'make_config' .OR. start_type(ibox) == 'add_to_config') THEN
-       need_kappa_ins = .TRUE.
        DO is = 1, nspecies
-          IF (nfragments(is) > 1 .AND. nmols_to_make(is,ibox) > 0) THEN
-             need_kappa_dih = .TRUE.
+          IF (nmols_to_make(is,ibox) > 0) THEN
+                IF (species_list(is)%nfragments > 1) species_list(is)%need_kappa_dih = .TRUE.
+                species_list(is)%need_kappa_ins = .TRUE.
           END IF
        END DO
     END IF
   END DO
 
   IF (int_sim_type == sim_gcmc .OR. int_sim_type == sim_gemc .OR. int_sim_type == sim_gemc_npt .OR. widom_flag) THEN
-     need_kappa_ins = .TRUE.
      DO is = 1, nspecies
-        IF (nfragments(is) > 1 .AND. (species_list(is)%insertion == 'CBMC' .OR. tp_correction(is) .NE. 0)) THEN
-           need_kappa_dih = .TRUE.
+        IF (species_list(is)%insertion == 'CBMC' .OR. tp_correction(is) .NE. 0) THEN
+                species_list(is)%need_kappa_ins = .TRUE.
+                IF (species_list(is)%nfragments > 1) species_list(is)%need_kappa_dih = .TRUE.
         END IF
      END DO
   END IF
@@ -5982,13 +6031,13 @@ SUBROUTINE Get_CBMC_Info
   IF (prob_regrowth > tiny_number) THEN
      DO is = 1, nspecies
         IF (nfragments(is) > 1 .AND. prob_growth_species(is) > tiny_number) THEN
-           need_kappa_dih = .TRUE.
+           species_list(is)%need_kappa_dih = .TRUE.
         END IF
      END DO
   END IF
 
   ! Look for CBMC parameters
-  IF (need_kappa_ins .OR. need_kappa_dih) THEN
+  IF (ANY(species_list%need_kappa_ins .OR. species_list%need_kappa_dih)) THEN
 
      DO
         line_nbr = line_nbr + 1
@@ -6008,23 +6057,38 @@ SUBROUTINE Get_CBMC_Info
                  EXIT
               END IF
 
-              IF (line_array(1) == 'kappa_ins' .OR. line_array(1) == 'Kappa_Ins') THEN
-                 kappa_ins = String_To_Int(line_array(2))
-                 WRITE(logunit,'(A,T35,I12)') 'Kappa for first fragment insertion ', kappa_ins
-              ELSE IF (line_array(1) == 'kappa_rot' .OR. line_array(1) == 'Kappa_Rot') THEN
-                 kappa_rot = String_To_Int(line_array(2))
+              SELECT CASE(line_array(1))
+              CASE('kappa_ins','Kappa_Ins')
+              !IF (line_array(1) == 'kappa_ins' .OR. line_array(1) == 'Kappa_Ins') THEN
+                 DO is = 1, nspecies
+                        IF (is < nbr_entries) THEN
+                                species_list(is)%kappa_ins = String_To_Int(line_array(is+1))
+                        ELSE
+                                species_list(is)%kappa_ins = species_list(nbr_entries-1)%kappa_ins
+                        END IF
+                 END DO
+              CASE('kappa_rot','Kappa_Rot')
+              !ELSE IF (line_array(1) == 'kappa_rot' .OR. line_array(1) == 'Kappa_Rot') THEN
                  WRITE(logunit,'(X,A)') 'Orientational bias not supported. Kappa set to zero'
-                 kappa_rot = 0
-              ELSE IF (line_array(1) == 'kappa_dih' .OR. line_array(1) == 'Kappa_Dih') THEN
-                 kappa_dih = String_To_Int(line_array(2))
-                 WRITE(logunit,'(A,T35,I12)') 'Kappa for dihedral selection ', kappa_dih
-              ELSE IF (line_array(1) == 'rcut_cbmc' .OR. line_array(1) == 'Rcut_CBMC') THEN
+                 species_list%kappa_rot = 0
+              CASE('kappa_dih','Kappa_Dih')
+              !ELSE IF (line_array(1) == 'kappa_dih' .OR. line_array(1) == 'Kappa_Dih') THEN
+                 DO is = 1, nspecies
+                        IF (is < nbr_entries) THEN
+                                species_list(is)%kappa_dih = String_To_Int(line_array(is+1))
+                        ELSE
+                                species_list(is)%kappa_dih = species_list(nbr_entries-1)%kappa_dih
+                        END IF
+                 END DO
+              CASE('rcut_cbmc','Rcut_CBMC')
+              !ELSE IF (line_array(1) == 'rcut_cbmc' .OR. line_array(1) == 'Rcut_CBMC') THEN
                  DO ibox = 1, nbr_boxes
                     rcut_CBMC(ibox) = String_To_Double(line_array(ibox+1))
                     WRITE(logunit,'(X,A,F12.2)') 'Cutoff for CBMC for box '// TRIM(Int_To_String(ibox)) // &
                        ' is ', rcut_CBMC(ibox)
                  END DO
-              ELSE IF (line_array(1) == 'l_coul_cbmc' .OR. line_array(1) == 'L_Coul_CBMC') THEN
+              CASE('l_coul_cbmc','L_Coul_CBMC')
+              !ELSE IF (line_array(1) == 'l_coul_cbmc' .OR. line_array(1) == 'L_Coul_CBMC') THEN
                  DO is = 1, nspecies
                     IF (line_array(is+1) == 'true' .OR. line_array(is+1) == 'TRUE') THEN
                        species_list(is)%l_coul_cbmc = .TRUE.
@@ -6038,40 +6102,51 @@ SUBROUTINE Get_CBMC_Info
                        CALL Clean_Abort(err_msg,'Get_CBMC_Info')
                     END IF
                  END DO
-              ELSE IF (line_array(1) == 'energy_table' .OR. line_array(1) == "nrg_table" .OR. &
-                      line_array(1) == "atompair_nrg_table" .OR. line_array(1) == "atompair_energy_table") THEN
+              CASE('energy_table','nrg_table','atompair_nrg_table','atompair_energy_table')
+              !ELSE IF (line_array(1) == 'energy_table' .OR. line_array(1) == "nrg_table" .OR. &
+              !        line_array(1) == "atompair_nrg_table" .OR. line_array(1) == "atompair_energy_table") THEN
                       precalc_atompair_nrg = .TRUE.
                       IF (nbr_entries >= 2) THEN
                               atompair_nrg_res = String_To_Int(line_array(2))
                       ELSE
                               atompair_nrg_res = 1000
                       END IF
+                      atompair_nrg_res_sp = REAL(atompair_nrg_res,SP)
                       WRITE(logunit,'(X,A)') 'Atom pair energy table with rsq resolution = ' // &
                               TRIM(Int_To_String(atompair_nrg_res)) // ' will be used.'
-              ELSE
+              CASE('cbmc_charge_sf')
+                      cbmc_charge_sf_flag = .TRUE.
+                      WRITE(logunit,'(X,A)') "CBMC trials will use shifted force charge sum style."
+              CASE DEFAULT
+              !ELSE
                  err_msg = ''
                  err_msg(1) = 'Keyword ' // TRIM(line_array(1)) // ' on line number ' // &
                               TRIM(Int_To_String(line_nbr)) // ' of the input file is not supported'
                  err_msg(2) = 'Supported keywords are:'
                  err_msg(3) = 'kappa_ins, kappa_dih, rcut_cbmc, l_coul_cbmc, nrg_table'
                  CALL Clean_Abort(err_msg,'Get_CBMC_Info')
-              END IF
+              END SELECT
+              !END IF
 
            END DO
 
-           ! kappa_dih must be positive
-           IF (need_kappa_dih .AND. kappa_dih <= 0) THEN
-              err_msg = ''
-              err_msg(1) = 'kappa_dih must be positive'
-              CALL clean_abort(err_msg,'Get_CBMC_Info')
-           END IF
-
-           ! kappa_ins must be positive
-           IF (need_kappa_ins .AND. kappa_ins <= 0) THEN
-              err_msg = ''
-              err_msg(1) = 'kappa_ins must be positive'
-              CALL clean_abort(err_msg,'Get_CBMC_Info')
-           END IF
+           DO is = 1, nspecies
+                   ! kappa_dih must be positive
+                   IF (species_list(is)%need_kappa_dih .AND. species_list(is)%kappa_dih <= 0) THEN
+                      err_msg = ''
+                      err_msg(1) = 'kappa_dih must be positive for species ' // TRIM(Int_To_String(is))
+                      CALL clean_abort(err_msg,'Get_CBMC_Info')
+                   END IF
+                   ! kappa_ins must be positive
+                   IF (species_list(is)%need_kappa_ins .AND. species_list(is)%kappa_ins <= 0) THEN
+                      err_msg = ''
+                      err_msg(1) = 'kappa_ins must be positive for species ' // TRIM(Int_To_String(is))
+                      CALL clean_abort(err_msg,'Get_CBMC_Info')
+                   END IF
+                   WRITE(logunit,'(A)') "CBMC kappa values for species " // TRIM(Int_To_String(is)) // ":"
+                   CALL species_list(is)%Write_CBMC_Kappas(logunit)
+           END DO
+           CALL species_list%setup_cbmc_kappas
 
            ! rcut_cbmc must be positive
            DO ibox = 1, nbr_boxes
@@ -6460,8 +6535,6 @@ SUBROUTINE Get_Property_Info
 ! will be written in respective files.
 !******************************************************************************
 
-USE Global_Variables, ONLY: cpcollect
-
   INTEGER :: ierr, line_nbr, nbr_properties, max_properties, nbr_entries
   INTEGER :: i, j, this_box, ibox, is, average_id, ifrac
   CHARACTER(STRING_LEN) :: line_string, line_array(60)
@@ -6482,7 +6555,6 @@ SUBROUTINE Get_Property_Info
   line_nbr = 0
   nbr_prop_files(:) = 0
   max_properties = 0
-  cpcollect = .FALSE.
   need_pressure = .FALSE.
 
   DO
@@ -6530,10 +6602,6 @@ SUBROUTINE Get_Property_Info
                     line_array(1) == 'density' .OR. line_array(1) == 'Density') THEN
               ! there are as many properties to be written as there are species
               nbr_properties = nbr_properties + nspecies
-! chem_pot routines need testing
-!           ELSE IF (line_array(1) == 'chemical_potential' .OR. line_array(1) == 'Chemical_Potential') THEN
-!              nbr_properties = nbr_properties + nspecies
-!              cpcollect = .TRUE.
            ELSE IF (line_array(1) == 'pressure' .OR. line_array(1) == 'Pressure') THEN
               nbr_properties = nbr_properties + 1
               need_pressure = .TRUE.
@@ -6819,17 +6887,6 @@ SUBROUTINE Get_Property_Info
      END IF
   END DO
 
-  IF(cpcollect) THEN
-
-     DEALLOCATE(locate)
-     DEALLOCATE(molecule_list)
-     DEALLOCATE(atom_list)
-
-     ALLOCATE(locate(MAXVAL(max_molecules)+1,nspecies,0:nbr_boxes))
-     ALLOCATE(molecule_list(MAXVAL(max_molecules)+1,nspecies))
-     ALLOCATE(atom_list(MAXVAL(natoms),MAXVAL(max_molecules)+1,nspecies))
-
-  END IF
 
   IF (need_pressure) THEN
      ALLOCATE(W_tensor_charge(3,3,nbr_boxes) , W_tensor_recip(3,3,nbr_boxes))
@@ -6984,6 +7041,7 @@ SUBROUTINE Get_Rcutoff_Low
         CALL Parse_String(inputunit,line_nbr,1,nbr_entries,line_array,ierr)
         rcut_low = String_To_Double(line_array(1))
         rcut_lowsq = rcut_low * rcut_low
+        sp_rcut_lowsq = REAL(rcut_lowsq,SP)
 
         WRITE(logunit,'(A25,2X,F6.3,2X,A10)') 'MC low cutoff distance is ', rcut_low, ' Angstrom'
         ALLOCATE(tol_list(1))
diff --git a/Src/internal_coordinate_routines.f90 b/Src/internal_coordinate_routines.f90
index a0a9b799..8ddc6888 100755
--- a/Src/internal_coordinate_routines.f90
+++ b/Src/internal_coordinate_routines.f90
@@ -167,9 +167,9 @@ SUBROUTINE Get_Bond_Length(this_bond,im,is,r21)
 
 ! Calculate vectors, bond lenth
 
-   rx21 = these_atoms(atom1)%rxp - these_atoms(atom2)%rxp
-   ry21 = these_atoms(atom1)%ryp - these_atoms(atom2)%ryp
-   rz21 = these_atoms(atom1)%rzp - these_atoms(atom2)%rzp
+   rx21 = these_atoms(atom1)%rp(1) - these_atoms(atom2)%rp(1)
+   ry21 = these_atoms(atom1)%rp(2) - these_atoms(atom2)%rp(2)
+   rz21 = these_atoms(atom1)%rp(3) - these_atoms(atom2)%rp(3)
 
 !   this_box = molecule_list(im,is)%which_box
 !   IF (l_cubic(this_box) == .FALSE.) THEN
@@ -233,15 +233,15 @@ SUBROUTINE Get_Bond_Angle(this_angle,im,is,theta)
    
 !Vector r21 points from atom 1 to atom 2. Below, the components of this vector are calculated.
 
-   rx21 = these_atoms(atom2)%rxp - these_atoms(atom1)%rxp
-   ry21 = these_atoms(atom2)%ryp - these_atoms(atom1)%ryp
-   rz21 = these_atoms(atom2)%rzp - these_atoms(atom1)%rzp
+   rx21 = these_atoms(atom2)%rp(1) - these_atoms(atom1)%rp(1)
+   ry21 = these_atoms(atom2)%rp(2) - these_atoms(atom1)%rp(2)
+   rz21 = these_atoms(atom2)%rp(3) - these_atoms(atom1)%rp(3)
                                                                                    
 ! Vector r32 points from atom 2 to atom 3. Below the components are calculated.
 
-   rx32 = these_atoms(atom3)%rxp - these_atoms(atom2)%rxp
-   ry32 = these_atoms(atom3)%ryp - these_atoms(atom2)%ryp
-   rz32 = these_atoms(atom3)%rzp - these_atoms(atom2)%rzp
+   rx32 = these_atoms(atom3)%rp(1) - these_atoms(atom2)%rp(1)
+   ry32 = these_atoms(atom3)%rp(2) - these_atoms(atom2)%rp(2)
+   rz32 = these_atoms(atom3)%rp(3) - these_atoms(atom2)%rp(3)
    
 !   this_box = molecule_list(im,is)%which_box
 !   IF (l_cubic(this_box) == .FALSE.) THEN
@@ -353,21 +353,21 @@ SUBROUTINE Get_Dihedral_Angle_COS(this_dihedral,im,is,cosphi,r12dn)
 
 ! Vector r12 points from atom 2 to atom 1.  Below, the components of this vector are calculated.
 
-   rx12 = these_atoms(atom1)%rxp - these_atoms(atom2)%rxp
-   ry12 = these_atoms(atom1)%ryp - these_atoms(atom2)%ryp
-   rz12 = these_atoms(atom1)%rzp - these_atoms(atom2)%rzp
+   rx12 = these_atoms(atom1)%rp(1) - these_atoms(atom2)%rp(1)
+   ry12 = these_atoms(atom1)%rp(2) - these_atoms(atom2)%rp(2)
+   rz12 = these_atoms(atom1)%rp(3) - these_atoms(atom2)%rp(3)
    
 ! Vector r32 points from atom 2 to atom 3.  Below, the components of this vector are calculated.
 
-   rx32 = these_atoms(atom3)%rxp - these_atoms(atom2)%rxp
-   ry32 = these_atoms(atom3)%ryp - these_atoms(atom2)%ryp
-   rz32 = these_atoms(atom3)%rzp - these_atoms(atom2)%rzp
+   rx32 = these_atoms(atom3)%rp(1) - these_atoms(atom2)%rp(1)
+   ry32 = these_atoms(atom3)%rp(2) - these_atoms(atom2)%rp(2)
+   rz32 = these_atoms(atom3)%rp(3) - these_atoms(atom2)%rp(3)
 
 ! Vector r34 points from atom 4 to atom 3. Below the components of this vector are calculated.
 
-   rx34 = these_atoms(atom3)%rxp - these_atoms(atom4)%rxp
-   ry34 = these_atoms(atom3)%ryp - these_atoms(atom4)%ryp
-   rz34 = these_atoms(atom3)%rzp - these_atoms(atom4)%rzp
+   rx34 = these_atoms(atom3)%rp(1) - these_atoms(atom4)%rp(1)
+   ry34 = these_atoms(atom3)%rp(2) - these_atoms(atom4)%rp(2)
+   rz34 = these_atoms(atom3)%rp(3) - these_atoms(atom4)%rp(3)
 
 
 !   this_box = molecule_list(im,is)%which_box
@@ -457,21 +457,21 @@ SUBROUTINE Get_Improper_Angle(this_improper,im,is,phi)
 
 ! Vector r12 points from atom 2 to atom 1.  Below, the components of this vector are calculated.
 
-   rx12 = these_atoms(atom1)%rxp - these_atoms(atom2)%rxp
-   ry12 = these_atoms(atom1)%ryp - these_atoms(atom2)%ryp
-   rz12 = these_atoms(atom1)%rzp - these_atoms(atom2)%rzp
+   rx12 = these_atoms(atom1)%rp(1) - these_atoms(atom2)%rp(1)
+   ry12 = these_atoms(atom1)%rp(2) - these_atoms(atom2)%rp(2)
+   rz12 = these_atoms(atom1)%rp(3) - these_atoms(atom2)%rp(3)
    
 ! Vector r32 points from atom 2 to atom 3.  Below, the components of this vector are calculated.
 
-   rx32 = these_atoms(atom3)%rxp - these_atoms(atom2)%rxp
-   ry32 = these_atoms(atom3)%ryp - these_atoms(atom2)%ryp
-   rz32 = these_atoms(atom3)%rzp - these_atoms(atom2)%rzp
+   rx32 = these_atoms(atom3)%rp(1) - these_atoms(atom2)%rp(1)
+   ry32 = these_atoms(atom3)%rp(2) - these_atoms(atom2)%rp(2)
+   rz32 = these_atoms(atom3)%rp(3) - these_atoms(atom2)%rp(3)
 
 ! Vector r34 points from atom 4 to atom 3. Below the components of this vector are calculated.
 
-   rx34 = these_atoms(atom3)%rxp - these_atoms(atom4)%rxp
-   ry34 = these_atoms(atom3)%ryp - these_atoms(atom4)%ryp
-   rz34 = these_atoms(atom3)%rzp - these_atoms(atom4)%rzp
+   rx34 = these_atoms(atom3)%rp(1) - these_atoms(atom4)%rp(1)
+   ry34 = these_atoms(atom3)%rp(2) - these_atoms(atom4)%rp(2)
+   rz34 = these_atoms(atom3)%rp(3) - these_atoms(atom4)%rp(3)
 
 !   this_box = molecule_list(im,is)%which_box
 !   IF (l_cubic(this_box) == .FALSE.) THEN
diff --git a/Src/load_next_frame.f90 b/Src/load_next_frame.f90
deleted file mode 100644
index 93b3bdae..00000000
--- a/Src/load_next_frame.f90
+++ /dev/null
@@ -1,330 +0,0 @@
-!*****************************************************************************************
-!
-!
-!*****************************************************************************************
-SUBROUTINE Load_Next_Frame(end_reached)
-        USE Global_Variables
-        USE File_Names
-        USE Simulation_Properties
-        USE IO_Utilities
-        USE Energy_Routines
-        USE Internal_Coordinate_Routines
-        USE Type_Definitions
-        USE XTC_Routines
-        !$ USE OMP_LIB
-        IMPLICIT NONE
-
-        INTEGER :: ibox, is, im
-        LOGICAL :: end_reached
-        REAL(DP), DIMENSION(:,:), ALLOCATABLE, SAVE :: frame_xyz
-        REAL(DP), DIMENSION(3,3), SAVE :: this_length
-
-        end_reached = .FALSE.
-
-        nmols = 0
-        locate = 0
-        molecule_list(:,:)%live = .FALSE.
-        atom_list(:,:,:)%exist = .FALSE.
-        molecule_list(:,:)%molecule_type = int_none
-        molecule_list(:,:)%which_box = 0
-
-
-        DO ibox = 1, nbr_boxes
-                IF (has_Hfile(ibox)) THEN
-                        this_length = Read_H_Frame()
-                        IF (end_reached) RETURN
-                ELSEIF (.NOT. ALLOCATED(frame_xyz)) THEN
-                        ALLOCATE(frame_xyz(natoms_to_read(ibox),3))
-                END IF
-                IF (has_xyz(ibox)) THEN
-                        frame_xyz = Read_xyz_Frame()
-                        IF (end_reached) RETURN
-                ELSEIF (has_xtc(ibox)) THEN
-                        IF (Read_xtc_Frame(ibox)) THEN
-                                end_reached = .TRUE.
-                                EXIT
-                        END IF
-                        this_length = Get_xtc_Box(ibox)
-                        frame_xyz = Get_xtc_Coords(ibox)
-                END IF
-                CALL Set_Frame_Box
-                CALL Set_Frame_Coords
-        END DO
-
-        DO is = 1, nspecies
-                DO im = max_molecules(is), SUM(nmols(is,1:nbr_boxes)) + 1, -1
-                        nmols(is,0) = nmols(is,0) + 1
-                        locate(nmols(is,0),is,0) = im
-                END DO
-        END DO
-
-
-   CONTAINS
-        SUBROUTINE Set_Frame_Box
-
-                !REAL(DP), DIMENSION(3,3), INTENT(IN) :: this_length
-                INTEGER :: nvecsmax_old
-                INTEGER :: AllocateStatus
-
-                LOGICAL :: l_size_change
-
-
-                REAL(DP) :: frame_volume
-
-                IF (end_reached) RETURN
-
-                l_size_change = (.NOT. ALL(box_list(ibox)%length .EQ. this_length))
-
-                IF (l_size_change) THEN
-                        box_list(ibox)%length = this_length
-                        CALL Compute_Cell_Dimensions(ibox)
-                END IF
-
-                IF (l_size_change .AND. l_half_len_cutoff(ibox)) THEN
-                        rcut_vdw(ibox) = 0.5 * MIN(box_list(ibox)%face_distance(1), &
-                                                   box_list(ibox)%face_distance(2), &
-                                                   box_list(ibox)%face_distance(3))
-                        rcut_vdwsq(ibox) = rcut_vdw(ibox) * rcut_vdw(ibox)
-                        IF (int_charge_sum_style(ibox) /= charge_none) THEN
-                                rcut_coul(ibox) = rcut_vdw(ibox)
-                                rcut_coulsq(ibox) = rcut_vdwsq(ibox)
-                        END IF
-
-                        rcut_vdw3(ibox) = rcut_vdwsq(ibox) * rcut_vdw(ibox)
-                        rcut_vdw6(ibox) = rcut_vdw3(ibox) * rcut_vdw3(ibox)
-                        rcut3(ibox) = rcut_vdw3(ibox)
-                        rcut9(ibox) = rcut3(ibox) * rcut_vdw6(ibox)
-
-                        rcut_max(ibox) = rcut_vdw(ibox)
-                        IF ( int_charge_sum_style(ibox) == charge_ewald) THEN
-                                ! alpha_ewald(ibox) = ewald_p_sqrt(ibox) / rcut_coul(ibox)
-                                h_ewald_cut(ibox) = 2.0_DP * ewald_p(ibox) / rcut_coul(ibox)
-                        END IF
-                END IF
-                IF (l_size_change .AND. int_charge_sum_style(ibox) == charge_ewald) THEN
-                        nvecsmax_old = MAXVAL(nvecs)
-                        CALL Ewald_Reciprocal_Lattice_Vector_Setup(ibox)
-                        IF (MAXVAL(nvecs) > nvecsmax_old) THEN
-                                IF (ALLOCATED(cos_sum)) DEALLOCATE(cos_sum)
-                                IF (ALLOCATED(sin_sum)) DEALLOCATE(sin_sum)
-                                IF (ALLOCATED(cos_sum_old)) DEALLOCATE(cos_sum_old)
-                                IF (ALLOCATED(sin_sum_old)) DEALLOCATE(sin_sum_old)
-                                IF (ALLOCATED(cos_sum_start)) DEALLOCATE(cos_sum_start)
-                                IF (ALLOCATED(sin_sum_start)) DEALLOCATE(sin_sum_start)
-                                IF (ALLOCATED(cos_mol)) DEALLOCATE(cos_mol)
-                                IF (ALLOCATED(sin_mol)) DEALLOCATE(sin_mol)
-                                ALLOCATE(cos_sum(MAXVAL(nvecs),nbr_boxes), Stat = AllocateStatus)
-                                IF (AllocateStatus /= 0) THEN
-                                        err_msg = ''
-                                        err_msg(1) = 'Memory could not be allocated for cos_sum'
-                                        CALL Clean_Abort(err_msg,'Read_H_frame')
-                                END IF
-                                ALLOCATE(sin_sum(MAXVAL(nvecs),nbr_boxes), Stat = AllocateStatus)
-                                IF (AllocateStatus /= 0) THEN
-                                        err_msg = ''
-                                        err_msg(1) = 'Memory could not be allocated for sin_sum'
-                                        CALL Clean_Abort(err_msg,'Read_H_frame')
-                                END IF
-                                ALLOCATE(cos_mol(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-                                IF (AllocateStatus /= 0) THEN
-                                        err_msg = ''
-                                        err_msg(1) = 'Memory could not be allocated for cos_mol'
-                                        CALL Clean_Abort(err_msg,'Read_H_frame')
-                                END IF
-                                ALLOCATE(sin_mol(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-                                IF (AllocateStatus /= 0) THEN
-                                        err_msg = ''
-                                        err_msg(1) = 'Memory could not be allocated for sin_mol'
-                                        CALL Clean_Abort(err_msg,'Read_H_frame')
-                                END IF
-                                ALLOCATE(cos_sum_old(SIZE(cos_sum,1),nbr_boxes), Stat = AllocateStatus)
-                                IF (AllocateStatus /= 0) THEN
-                                        err_msg = ''
-                                        err_msg(1) = 'Memory could not be allocated for cos_mol_old'
-                                        CALL Clean_Abort(err_msg,'Read_H_frame')
-                                END IF
-                                ALLOCATE(sin_sum_old(SIZE(sin_sum,1),nbr_boxes), Stat = AllocateStatus)
-                                IF (AllocateStatus /= 0) THEN
-                                        err_msg = ''
-                                        err_msg(1) = 'Memory could not be allocated for sin_mol_old'
-                                        CALL Clean_Abort(err_msg,'Read_H_frame')
-                                END IF
-                                ALLOCATE(cos_sum_start(SIZE(cos_sum,1),nbr_boxes), Stat = AllocateStatus)
-                                IF (AllocateStatus /= 0) THEN
-                                        err_msg = ''
-                                        err_msg(1) = 'Memory could not be allocated for cos_mol_start'
-                                        CALL Clean_Abort(err_msg,'Read_H_frame')
-                                END IF
-                                ALLOCATE(sin_sum_start(SIZE(sin_sum,1),nbr_boxes), Stat = AllocateStatus)
-                                IF (AllocateStatus /= 0) THEN
-                                        err_msg = ''
-                                        err_msg(1) = 'Memory could not be allocated for sin_mol_start'
-                                        CALL Clean_Abort(err_msg,'Read_H_frame')
-                                END IF
-
-                        END IF
-                END IF
-
-        END SUBROUTINE Set_Frame_Box
-
-        FUNCTION Read_H_frame()
-                REAL(DP), DIMENSION(3,3) :: Read_H_frame
-                INTEGER :: nspecies_thisframe
-                INTEGER :: is_H, is
-                INTEGER :: nmols_H
-                INTEGER :: i, io
-                INTEGER :: old_natoms_to_read
-
-                READ(pregen_H_unit(ibox),*,IOSTAT=io)
-                IF (io < 0) THEN
-                        end_reached = .TRUE.
-                        RETURN
-                END IF
-                READ(pregen_H_unit(ibox),*)Read_H_frame(1,1), &
-                        Read_H_frame(1,2), &
-                        Read_H_frame(1,3)
-                READ(pregen_H_unit(ibox),*)Read_H_frame(2,1), &
-                        Read_H_frame(2,2), &
-                        Read_H_frame(2,3)
-                READ(pregen_H_unit(ibox),*)Read_H_frame(3,1), &
-                        Read_H_frame(3,2), &
-                        Read_H_frame(3,3)
-
-                READ(pregen_H_unit(ibox),*)
-                READ(pregen_H_unit(ibox),*)nspecies_thisframe
-                nmols_to_read(:,ibox) = 0
-                DO i = 1,nspecies_thisframe 
-                        READ(pregen_H_unit(ibox),*)is_H, nmols_H
-                        nmols_to_read(is_H,ibox) = nmols_H
-                END DO
-                atom_ibounds(2,:,ibox) = natoms*nmols_to_read(:,ibox)
-                old_natoms_to_read = natoms_to_read(ibox)
-                natoms_to_read(ibox) = SUM(atom_ibounds(2,:,ibox))
-                DO is = 2, nspecies
-                        atom_ibounds(2,is,ibox) = SUM(atom_ibounds(2,is-1:is,ibox))
-                END DO
-                atom_ibounds(1,1,ibox) = 1
-                IF (nspecies > 1) atom_ibounds(1,2:nspecies,ibox) = atom_ibounds(2,1:(nspecies-1),ibox)+1
-                IF (natoms_to_read(ibox) .NE. old_natoms_to_read) THEN
-                        IF (ALLOCATED(frame_xyz)) DEALLOCATE(frame_xyz)
-                        ALLOCATE(frame_xyz(natoms_to_read(ibox),3))
-                END IF
-
-        END FUNCTION Read_H_frame
-
-        SUBROUTINE Set_Frame_Coords
-
-                !REAL(DP), DIMENSION(natoms_to_read(ibox),3), INTENT(IN) :: frame_xyz
-                INTEGER :: is, ia, imol, this_im, locate_base
-
-
-                REAL(DP) :: xcom_old, ycom_old, zcom_old
-                REAL(DP) :: xcom_new, ycom_new, zcom_new
-                REAL(DP) :: this_lambda, e_lrc
-                LOGICAL :: overlap
-
-                TYPE(Atom_Class), POINTER :: al_ptr(:,:)
-                INTEGER :: newshape(2), sloc, eloc, aib(2)
-
-                IF (end_reached) RETURN
-
-                this_lambda = 1.0_DP
-                ! Read in the coordinates of the molecules
-                DO is = 1, nspecies
-                        IF (nmols_to_read(is,ibox) < 1) CYCLE
-                        locate_base = SUM(nmols(is,1:nbr_boxes))
-                        DO imol = 1, nmols_to_read(is,ibox)
-                                locate(imol,is,ibox) = imol+locate_base
-                        END DO
-                        sloc = locate_base + 1
-                        eloc = locate_base +nmols_to_read(is,ibox)
-                        aib = atom_ibounds(:,is,ibox)
-                        al_ptr => atom_list(1:natoms(is),sloc:eloc,is)
-                        newshape(1) = natoms(is)
-                        newshape(2) = nmols_to_read(is,ibox)
-                        al_ptr%rxp = &
-                                RESHAPE(frame_xyz(aib(1):aib(2),1), newshape) 
-                        al_ptr%ryp = &
-                                RESHAPE(frame_xyz(aib(1):aib(2),2), newshape) 
-                        al_ptr%rzp = &
-                                RESHAPE(frame_xyz(aib(1):aib(2),3), newshape) 
-                        al_ptr%exist = .TRUE.
-                        !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC) &
-                        !$OMP PRIVATE(xcom_old, ycom_old, zcom_old, xcom_new, ycom_new, zcom_new)
-                        DO this_im = sloc, eloc
-                                molecule_list(this_im,is)%live = .TRUE.
-                                ! By default all the molecules are normal
-                                molecule_list(this_im,is)%molecule_type = int_normal
-                                molecule_list(this_im,is)%frac = this_lambda
-                                ! assign the molecule the box id
-                                molecule_list(this_im,is)%which_box = ibox
-                                ! ensure that the molecular COM is inside the central simulation box
-                                ! Calculate COM and distance from the outermost atom to the COM
-                                CALL Get_COM(this_im,is)
-                                CALL Compute_Max_Com_Distance(this_im,is)
-
-                                xcom_old = molecule_list(this_im,is)%xcom
-                                ycom_old = molecule_list(this_im,is)%ycom
-                                zcom_old = molecule_list(this_im,is)%zcom
-
-                                ! Apply PBC
-                                CALL Apply_PBC_Anint(ibox,xcom_old,ycom_old,zcom_old, &
-                                                          xcom_new, ycom_new, zcom_new)
-
-                                ! COM in the central simulation box
-                                molecule_list(this_im,is)%xcom = xcom_new
-                                molecule_list(this_im,is)%ycom = ycom_new
-                                molecule_list(this_im,is)%zcom = zcom_new
-
-                                ! displace atomic coordinates
-                                atom_list(1:natoms(is),this_im,is)%rxp = &
-                                        atom_list(1:natoms(is),this_im,is)%rxp + xcom_new - xcom_old
-                                atom_list(1:natoms(is),this_im,is)%ryp = &
-                                        atom_list(1:natoms(is),this_im,is)%ryp + ycom_new - ycom_old
-                                atom_list(1:natoms(is),this_im,is)%rzp = &
-                                        atom_list(1:natoms(is),this_im,is)%rzp + zcom_new - zcom_old
-                        END DO
-                        !$OMP END PARALLEL DO
-                        nmols(is,ibox) = nmols(is,ibox) + nmols_to_read(is,ibox)
-                END DO
-
-                CALL Get_Internal_Coords
-
-                IF (int_vdw_sum_style(ibox) == vdw_cut_tail) THEN 
-                        CALL Compute_Beads(ibox)
-                        CALL Compute_LR_Correction(ibox,e_lrc)
-                        energy(ibox)%lrc = e_lrc
-                END IF
-                
-                IF (int_charge_sum_style(ibox) == charge_ewald) THEN
-                        CALL Compute_System_Ewald_Reciprocal_Energy(ibox)
-                END IF
-
-
-        END SUBROUTINE Set_Frame_Coords
-
-        FUNCTION Read_xyz_frame()
-                REAL(DP), DIMENSION(natoms_to_read(ibox),3) :: Read_xyz_frame
-                INTEGER :: this_unit, io, i
-                CHARACTER(6) :: this_element
-
-                this_unit = pregen_xyz_unit(ibox)
-
-                READ(this_unit,*,IOSTAT=io)
-                IF (io < 0) THEN
-                        end_reached = .TRUE.
-                        RETURN
-                END IF
-                READ(this_unit,*)
-                DO i = 1, natoms_to_read(ibox)
-                        READ(this_unit,*) this_element, &
-                                Read_xyz_frame(i,1), &
-                                Read_xyz_frame(i,2), &
-                                Read_xyz_frame(i,3)
-                END DO
-                
-        END FUNCTION Read_xyz_frame
-
-
-END SUBROUTINE Load_Next_Frame
-
diff --git a/Src/main.f90 b/Src/main.f90
index b90ddc4f..99a61b3f 100755
--- a/Src/main.f90
+++ b/Src/main.f90
@@ -82,8 +82,10 @@ PROGRAM Main
   USE Energy_Routines
   USE Simulation_Properties
   USE Fragment_Growth
-  USE Internal_Coordinate_Routines
   USE Pair_Emax_Estimation
+  !$ USE OMP_LIB
+  USE Internal_Coordinate_Routines
+  USE ISO_FORTRAN_ENV
 
   IMPLICIT NONE
 
@@ -116,6 +118,13 @@ PROGRAM Main
   CALL DATE_AND_TIME(date,time,zone,begin_values)
   CALL cpu_time(start_time)
 
+  l_debug_print = .FALSE.
+
+  openmp_flag = .FALSE.
+  !$ openmp_flag = .TRUE.
+  global_nthreads = 1
+  !$ global_nthreads = OMP_GET_MAX_THREADS()
+
 !Get the input file name as a command line parameter
   count = IARGC()
   IF (count < 1) THEN
@@ -149,6 +158,31 @@ PROGRAM Main
      err_msg(2) = logfile
      CALL Clean_Abort(err_msg,'Read_Inputfile')
   ENDIF
+  IF (widom_timing) THEN
+          !$OMP PARALLEL
+          trial_loop_ins_time = 0.0_DP
+          cell_list_ins_time = 0.0_DP
+          cell_list_cbmc_nrg_ins_time = 0.0_DP
+          noncell_cbmc_nrg_ins_time = 0.0_DP
+          rng_ins_time = 0.0_DP
+          cbmc_setup_ins_time = 0.0_DP
+          cbmc_returnzone_ins_time = 0.0_DP
+          cbmc_endzone_ins_time = 0.0_DP
+          cbmc_fragment_placement_time = 0.0_DP
+          cbmc_dih_time = 0.0_DP
+          cbmc_nonoverlap_ins_count = 0
+          cbmc_dih_count = 0
+          cell_list_ins_checks = 0
+          cell_list_cbmc_nrg_ins_checks = 0
+          bitcell_overlap_ins_time = 0.0_DP
+          bitcell_overlap_ins_checks = 0
+          bitcell_overlap_ins_overlaps = 0
+          total_cbmc_time = 0.0_DP
+          widom_ewald_recip_time = 0.0_DP
+          nrg_ins_overlaps = 0
+          !$OMP END PARALLEL
+          noncbmc_time_total = 0.0_DP
+  END IF
 
   WRITE(logunit,'(A80)')'********************************************************************************'
   WRITE(logunit,'(A80)')'               ______                                __                        '
@@ -186,6 +220,8 @@ PROGRAM Main
 
   CALL HOSTNM(name)
   WRITE(logunit,'(a,a)') 'machine: ', TRIM(name)
+  WRITE(logunit,'(a,a)') 'compiler version: ', TRIM(COMPILER_VERSION())
+  WRITE(logunit,'(a,a)') 'compiler options: ', TRIM(COMPILER_OPTIONS())
   WRITE(logunit,'(A80)') '********************************************************************************'
 
   ! Standard level of output to logfile, or verbose output
@@ -334,8 +370,8 @@ PROGRAM Main
   ! Ewald stuff
   IF ( int_charge_sum_style(1) == charge_ewald) THEN
 
-     ALLOCATE(hx(maxk,nbr_boxes),hy(maxk,nbr_boxes),hz(maxk,nbr_boxes), &
-          hsq(maxk,nbr_boxes), Cn(maxk,nbr_boxes),Stat=AllocateStatus)
+     !ALLOCATE(hx(maxk,nbr_boxes),hy(maxk,nbr_boxes),hz(maxk,nbr_boxes), &
+     !     hsq(maxk,nbr_boxes), Cn(maxk,nbr_boxes),Stat=AllocateStatus)
      ALLOCATE(nvecs(nbr_boxes))
 
      IF (AllocateStatus /=0) THEN
@@ -348,19 +384,19 @@ PROGRAM Main
         CALL  Ewald_Reciprocal_Lattice_Vector_Setup(ibox)
      END DO
 
-     ! Here we can allocate the memory for cos_sum, sin_sum etc
-
-     ALLOCATE(cos_sum(MAXVAL(nvecs),nbr_boxes))
-     ALLOCATE(sin_sum(MAXVAL(nvecs),nbr_boxes))
-     ALLOCATE(cos_sum_old(MAXVAL(nvecs),nbr_boxes))
-     ALLOCATE(sin_sum_old(MAXVAL(nvecs),nbr_boxes))
-     ALLOCATE(cos_sum_start(MAXVAL(nvecs),nbr_boxes))
-     ALLOCATE(sin_sum_start(MAXVAL(nvecs),nbr_boxes))
-     ALLOCATE(cos_mol(MAXVAL(nvecs), SUM(max_molecules)))
-     ALLOCATE(sin_mol(MAXVAL(nvecs), SUM(max_molecules)))
-     ! initialize these vectors
-     cos_mol(:,:) = 0.0_DP
-     sin_mol(:,:) = 0.0_DP
+     !! Here we can allocate the memory for cos_sum, sin_sum etc
+
+     !ALLOCATE(cos_sum(MAXVAL(nvecs),nbr_boxes))
+     !ALLOCATE(sin_sum(MAXVAL(nvecs),nbr_boxes))
+     !ALLOCATE(cos_sum_old(MAXVAL(nvecs),nbr_boxes))
+     !ALLOCATE(sin_sum_old(MAXVAL(nvecs),nbr_boxes))
+     IF (int_sim_type .NE. sim_pregen) THEN
+             ALLOCATE(cos_mol(IAND(MAXVAL(nvecs)+padconst_8byte,padmask_8byte), 0:SUM(max_molecules)))
+             ALLOCATE(sin_mol(IAND(MAXVAL(nvecs)+padconst_8byte,padmask_8byte), 0:SUM(max_molecules)))
+             ! initialize these arrays
+             cos_mol(:,:) = 0.0_DP
+             sin_mol(:,:) = 0.0_DP
+     END IF
 
   END IF
 
@@ -543,6 +579,80 @@ PROGRAM Main
 
   CALL Write_Subroutine_Times
 
+  IF (widom_timing) THEN
+          trial_loop_ins_time_redux = 0.0_DP
+          cell_list_ins_time_redux = 0.0_DP
+          cell_list_cbmc_nrg_ins_time_redux = 0.0_DP
+          noncell_cbmc_nrg_ins_time_redux = 0.0_DP
+          rng_ins_time_redux = 0.0_DP
+          cbmc_setup_ins_time_redux = 0.0_DP
+          cbmc_returnzone_ins_time_redux = 0.0_DP
+          cbmc_endzone_ins_time_redux = 0.0_DP
+          cbmc_fragment_placement_time_redux = 0.0_DP
+          cbmc_dih_time_redux = 0.0_DP
+          cbmc_nonoverlap_ins_count_redux = 0
+          cbmc_dih_count_redux = 0
+          cell_list_ins_checks_redux = 0
+          cell_list_cbmc_nrg_ins_checks_redux = 0
+          bitcell_overlap_ins_time_redux = 0.0_DP
+          bitcell_overlap_ins_checks_redux = 0
+          bitcell_overlap_ins_overlaps_redux = 0
+          total_cbmc_time_redux = 0.0_DP
+          nrg_ins_overlaps_redux = 0.0_DP
+          !$OMP PARALLEL &
+          !$OMP REDUCTION(+:trial_loop_ins_time_redux, cell_list_ins_time_redux, cell_list_cbmc_nrg_ins_time_redux) &
+          !$OMP REDUCTION(+:noncell_cbmc_nrg_ins_time_redux,rng_ins_time_redux,cbmc_setup_ins_time_redux) &
+          !$OMP REDUCTION(+:cbmc_returnzone_ins_time_redux, cbmc_endzone_ins_time_redux) &
+          !$OMP REDUCTION(+:cbmc_nonoverlap_ins_count_redux, cbmc_dih_count_redux) &
+          !$OMP REDUCTION(+:cell_list_ins_checks_redux, cell_list_cbmc_nrg_ins_checks_redux) &
+          !$OMP REDUCTION(+:bitcell_overlap_ins_time_redux, bitcell_overlap_ins_checks_redux) &
+          !$OMP REDUCTION(+:bitcell_overlap_ins_overlaps_redux, total_cbmc_time_redux) &
+          !$OMP REDUCTION(+:widom_ewald_recip_time_redux,nrg_ins_overlaps_redux)
+          trial_loop_ins_time_redux = trial_loop_ins_time
+          cell_list_ins_time_redux = cell_list_ins_time
+          cell_list_cbmc_nrg_ins_time_redux = cell_list_cbmc_nrg_ins_time
+          noncell_cbmc_nrg_ins_time_redux = noncell_cbmc_nrg_ins_time
+          rng_ins_time_redux = rng_ins_time
+          cbmc_setup_ins_time_redux = cbmc_setup_ins_time
+          cbmc_returnzone_ins_time_redux = cbmc_returnzone_ins_time
+          cbmc_endzone_ins_time_redux = cbmc_endzone_ins_time
+          cbmc_fragment_placement_time_redux = cbmc_fragment_placement_time
+          cbmc_dih_time_redux = cbmc_dih_time
+          cbmc_nonoverlap_ins_count_redux = cbmc_nonoverlap_ins_count
+          cbmc_dih_count_redux = cbmc_dih_count
+          cell_list_ins_checks_redux = cell_list_ins_checks
+          cell_list_cbmc_nrg_ins_checks_redux = cell_list_cbmc_nrg_ins_checks
+          bitcell_overlap_ins_time_redux = bitcell_overlap_ins_time
+          bitcell_overlap_ins_checks_redux = bitcell_overlap_ins_checks
+          bitcell_overlap_ins_overlaps_redux = bitcell_overlap_ins_overlaps
+          total_cbmc_time_redux = total_cbmc_time
+          widom_ewald_recip_time_redux = widom_ewald_recip_time
+          nrg_ins_overlaps_redux = nrg_ins_overlaps
+          !$OMP END PARALLEL
+          WRITE(logunit,*) "WIDOM_TIME report:"
+          WRITE(logunit,*) "noncbmc_time_total", noncbmc_time_total
+          WRITE(logunit,*) "widom_ewald_recip_time", widom_ewald_recip_time_redux
+          WRITE(logunit,*) "total_cbmc_time", total_cbmc_time_redux
+          WRITE(logunit,*) "trial_loop_ins_time", trial_loop_ins_time_redux
+          WRITE(logunit,*) "cell_list_ins_time", cell_list_ins_time_redux
+          WRITE(logunit,*) "cell_list_cbmc_nrg_ins_time", cell_list_cbmc_nrg_ins_time_redux
+          WRITE(logunit,*) "noncell_cbmc_nrg_ins_time", noncell_cbmc_nrg_ins_time_redux
+          WRITE(logunit,*) "rng_ins_time", rng_ins_time_redux
+          WRITE(logunit,*) "cbmc_setup_ins_time", cbmc_setup_ins_time_redux
+          WRITE(logunit,*) "cbmc_returnzone_ins_time", cbmc_returnzone_ins_time_redux
+          WRITE(logunit,*) "cbmc_endzone_ins_time", cbmc_endzone_ins_time_redux
+          WRITE(logunit,*) "cbmc_fragment_placement_time", cbmc_fragment_placement_time_redux
+          WRITE(logunit,*) "cbmc_dih_time", cbmc_dih_time_redux
+          WRITE(logunit,*) "cbmc_nonoverlap_ins_count", cbmc_nonoverlap_ins_count_redux
+          WRITE(logunit,*) "cbmc_dih_count", cbmc_dih_count_redux
+          WRITE(logunit,*) "cell_list_ins_checks", cell_list_ins_checks_redux
+          WRITE(logunit,*) "cell_list_cbmc_nrg_ins_checks", cell_list_cbmc_nrg_ins_checks_redux
+          WRITE(logunit,*) "bitcell_overlap_ins_time", bitcell_overlap_ins_time_redux
+          WRITE(logunit,*) "bitcell_overlap_ins_checks", bitcell_overlap_ins_checks_redux
+          WRITE(logunit,*) "bitcell_overlap_ins_overlaps", bitcell_overlap_ins_overlaps_redux
+          WRITE(logunit,*) "nrg_ins_overlaps", nrg_ins_overlaps_redux
+  END IF
+
   WRITE(logunit,*)
   WRITE(logunit,'(A80)') '********************************************************************************'
   WRITE(logunit,'(A80)') '************************ Cassandra simulation complete *************************'
diff --git a/Src/make_config.f90 b/Src/make_config.f90
index c2fc1e95..f0e8ee96 100755
--- a/Src/make_config.f90
+++ b/Src/make_config.f90
@@ -128,20 +128,20 @@ SUBROUTINE Make_Config(ibox)
                 
                 IF(box_list(ibox)%box_shape == 'cubic') THEN
                    ! -- all the cell lengths are identical,
-                   atom_list(1,alive,is)%rxp = (rranf() - 0.5_DP) * box_list(ibox)%length(1,1)
-                   atom_list(1,alive,is)%ryp = (rranf() - 0.5_DP) * box_list(ibox)%length(2,2)
-                   atom_list(1,alive,is)%rzp = (rranf() - 0.5_DP) * box_list(ibox)%length(3,3)                      
+                   atom_list(1,alive,is)%rp(1) = (rranf() - 0.5_DP) * box_list(ibox)%length(1,1)
+                   atom_list(1,alive,is)%rp(2) = (rranf() - 0.5_DP) * box_list(ibox)%length(2,2)
+                   atom_list(1,alive,is)%rp(3) = (rranf() - 0.5_DP) * box_list(ibox)%length(3,3)                      
                 END IF
                 
                 ! insert the rest of the molecule
                 DO ia = 2,natoms(is)
                    
-                   atom_list(ia,alive,is)%rxp = atom_list(1,alive,is)%rxp + init_list(ia,1,is)%rxp - &
-                        init_list(1,1,is)%rxp
-                   atom_list(ia,alive,is)%ryp = atom_list(1,alive,is)%ryp + init_list(ia,1,is)%ryp - &
-                        init_list(1,1,is)%ryp
-                   atom_list(ia,alive,is)%rzp = atom_list(1,alive,is)%rzp + init_list(ia,1,is)%rzp - &
-                        init_list(1,1,is)%rzp
+                   atom_list(ia,alive,is)%rp(1) = atom_list(1,alive,is)%rp(1) + init_list(ia,1,is)%rp(1) - &
+                        init_list(1,1,is)%rp(1)
+                   atom_list(ia,alive,is)%rp(2) = atom_list(1,alive,is)%rp(2) + init_list(ia,1,is)%rp(2) - &
+                        init_list(1,1,is)%rp(2)
+                   atom_list(ia,alive,is)%rp(3) = atom_list(1,alive,is)%rp(3) + init_list(ia,1,is)%rp(3) - &
+                        init_list(1,1,is)%rp(3)
                 END DO
                 
                 ! Obtain COM of the molecule
@@ -167,9 +167,9 @@ SUBROUTINE Make_Config(ibox)
                       
                       DO ja = 1, natoms(is2)
                          
-                         rxijp = atom_list(ia,alive,is)%rxp - atom_list(ja,this_im,is2)%rxp
-                         ryijp = atom_list(ia,alive,is)%ryp - atom_list(ja,this_im,is2)%ryp
-                         rzijp = atom_list(ia,alive,is)%rzp - atom_list(ja,this_im,is2)%rzp
+                         rxijp = atom_list(ia,alive,is)%rp(1) - atom_list(ja,this_im,is2)%rp(1)
+                         ryijp = atom_list(ia,alive,is)%rp(2) - atom_list(ja,this_im,is2)%rp(2)
+                         rzijp = atom_list(ia,alive,is)%rp(3) - atom_list(ja,this_im,is2)%rp(3)
                          
                          CALL Minimum_Image_Separation(ibox,rxijp,ryijp,rzijp,rxij,ryij,rzij)
                          
@@ -235,9 +235,9 @@ SUBROUTINE Rotate_Molecule_Eulerian
       
       ! shift the origin to the COM of the molecule
       
-      atom_list(:,alive,is)%rxp = atom_list(:,alive,is)%rxp - molecule_list(alive,is)%xcom
-      atom_list(:,alive,is)%ryp = atom_list(:,alive,is)%ryp - molecule_list(alive,is)%ycom
-      atom_list(:,alive,is)%rzp = atom_list(:,alive,is)%rzp - molecule_list(alive,is)%zcom
+      atom_list(:,alive,is)%rp(1) = atom_list(:,alive,is)%rp(1) - molecule_list(alive,is)%rcom(1)
+      atom_list(:,alive,is)%rp(2) = atom_list(:,alive,is)%rp(2) - molecule_list(alive,is)%rcom(2)
+      atom_list(:,alive,is)%rp(3) = atom_list(:,alive,is)%rp(3) - molecule_list(alive,is)%rcom(3)
       
       ! Construct the rotation matrix that needs to be applied to each of the vectors
       ! This is the A matrix in Goldstein notation
@@ -257,24 +257,24 @@ SUBROUTINE Rotate_Molecule_Eulerian
       
       DO ia = 1, natoms(is)
          
-         rxpnew = rot11*atom_list(ia,alive,is)%rxp + rot12*atom_list(ia,alive,is)%ryp + &
-              rot13*atom_list(ia,alive,is)%rzp
-         rypnew = rot21*atom_list(ia,alive,is)%rxp + rot22*atom_list(ia,alive,is)%ryp + &
-              rot23*atom_list(ia,alive,is)%rzp
-         rzpnew = rot31*atom_list(ia,alive,is)%rxp + rot32*atom_list(ia,alive,is)%ryp + &
-              rot33*atom_list(ia,alive,is)%rzp
+         rxpnew = rot11*atom_list(ia,alive,is)%rp(1) + rot12*atom_list(ia,alive,is)%rp(2) + &
+              rot13*atom_list(ia,alive,is)%rp(3)
+         rypnew = rot21*atom_list(ia,alive,is)%rp(1) + rot22*atom_list(ia,alive,is)%rp(2) + &
+              rot23*atom_list(ia,alive,is)%rp(3)
+         rzpnew = rot31*atom_list(ia,alive,is)%rp(1) + rot32*atom_list(ia,alive,is)%rp(2) + &
+              rot33*atom_list(ia,alive,is)%rp(3)
          
-         atom_list(ia,alive,is)%rxp = rxpnew
-         atom_list(ia,alive,is)%ryp = rypnew
-         atom_list(ia,alive,is)%rzp = rzpnew
+         atom_list(ia,alive,is)%rp(1) = rxpnew
+         atom_list(ia,alive,is)%rp(2) = rypnew
+         atom_list(ia,alive,is)%rp(3) = rzpnew
          
       END DO
       
       ! Shift the origin back to (0,0,0)
       
-      atom_list(:,alive,is)%rxp = atom_list(:,alive,is)%rxp + molecule_list(alive,is)%xcom
-      atom_list(:,alive,is)%ryp = atom_list(:,alive,is)%ryp + molecule_list(alive,is)%ycom
-      atom_list(:,alive,is)%rzp = atom_list(:,alive,is)%rzp + molecule_list(alive,is)%zcom
+      atom_list(:,alive,is)%rp(1) = atom_list(:,alive,is)%rp(1) + molecule_list(alive,is)%rcom(1)
+      atom_list(:,alive,is)%rp(2) = atom_list(:,alive,is)%rp(2) + molecule_list(alive,is)%rcom(2)
+      atom_list(:,alive,is)%rp(3) = atom_list(:,alive,is)%rp(3) + molecule_list(alive,is)%rcom(3)
       
     END SUBROUTINE Rotate_Molecule_Eulerian
     
diff --git a/Src/min.f90 b/Src/min.f90
index f663620b..9ea50194 100755
--- a/Src/min.f90
+++ b/Src/min.f90
@@ -191,49 +191,49 @@ SUBROUTINE Fold_Molecule(alive,is,this_box)
 
   IF (l_cubic(this_box)) THEN
      
-     IF(molecule_list(alive,is)%xcom .GT. box_list(this_box)%hlength(1,1)) THEN
-        molecule_list(alive,is)%xcom = &
-             molecule_list(alive,is)%xcom - box_list(this_box)%length(1,1)
-        atom_list(:,alive,is)%rxp = atom_list(:,alive,is)%rxp - box_list(this_box)%length(1,1)
+     IF(molecule_list(alive,is)%rcom(1) .GT. box_list(this_box)%hlength(1,1)) THEN
+        molecule_list(alive,is)%rcom(1) = &
+             molecule_list(alive,is)%rcom(1) - box_list(this_box)%length(1,1)
+        atom_list(:,alive,is)%rp(1) = atom_list(:,alive,is)%rp(1) - box_list(this_box)%length(1,1)
         
-     ELSE IF(molecule_list(alive,is)%xcom .LT. -box_list(this_box)%hlength(1,1)) THEN
-        molecule_list(alive,is)%xcom = &
-             molecule_list(alive,is)%xcom + box_list(this_box)%length(1,1)
-        atom_list(:,alive,is)%rxp = atom_list(:,alive,is)%rxp + box_list(this_box)%length(1,1)
+     ELSE IF(molecule_list(alive,is)%rcom(1) .LT. -box_list(this_box)%hlength(1,1)) THEN
+        molecule_list(alive,is)%rcom(1) = &
+             molecule_list(alive,is)%rcom(1) + box_list(this_box)%length(1,1)
+        atom_list(:,alive,is)%rp(1) = atom_list(:,alive,is)%rp(1) + box_list(this_box)%length(1,1)
      END IF
 
-     IF(molecule_list(alive,is)%ycom .GT. box_list(this_box)%hlength(2,2)) THEN
-        molecule_list(alive,is)%ycom = &
-             molecule_list(alive,is)%ycom - box_list(this_box)%length(2,2)
-        atom_list(:,alive,is)%ryp = atom_list(:,alive,is)%ryp - box_list(this_box)%length(2,2)
+     IF(molecule_list(alive,is)%rcom(2) .GT. box_list(this_box)%hlength(2,2)) THEN
+        molecule_list(alive,is)%rcom(2) = &
+             molecule_list(alive,is)%rcom(2) - box_list(this_box)%length(2,2)
+        atom_list(:,alive,is)%rp(2) = atom_list(:,alive,is)%rp(2) - box_list(this_box)%length(2,2)
 
-     ELSE IF(molecule_list(alive,is)%ycom .LT. -box_list(this_box)%hlength(2,2)) THEN
+     ELSE IF(molecule_list(alive,is)%rcom(2) .LT. -box_list(this_box)%hlength(2,2)) THEN
 
-        molecule_list(alive,is)%ycom = &
-             molecule_list(alive,is)%ycom + box_list(this_box)%length(2,2)
-        atom_list(:,alive,is)%ryp = atom_list(:,alive,is)%ryp + box_list(this_box)%length(2,2)
+        molecule_list(alive,is)%rcom(2) = &
+             molecule_list(alive,is)%rcom(2) + box_list(this_box)%length(2,2)
+        atom_list(:,alive,is)%rp(2) = atom_list(:,alive,is)%rp(2) + box_list(this_box)%length(2,2)
 
      END IF
 
-     IF(molecule_list(alive,is)%zcom .GT. box_list(this_box)%hlength(3,3)) THEN
+     IF(molecule_list(alive,is)%rcom(3) .GT. box_list(this_box)%hlength(3,3)) THEN
 
-        molecule_list(alive,is)%zcom = &
-             molecule_list(alive,is)%zcom - box_list(this_box)%length(3,3)
-        atom_list(:,alive,is)%rzp = atom_list(:,alive,is)%rzp - box_list(this_box)%length(3,3)
+        molecule_list(alive,is)%rcom(3) = &
+             molecule_list(alive,is)%rcom(3) - box_list(this_box)%length(3,3)
+        atom_list(:,alive,is)%rp(3) = atom_list(:,alive,is)%rp(3) - box_list(this_box)%length(3,3)
         
-     ELSE IF(molecule_list(alive,is)%zcom .LT. -box_list(this_box)%hlength(3,3)) THEN
+     ELSE IF(molecule_list(alive,is)%rcom(3) .LT. -box_list(this_box)%hlength(3,3)) THEN
         
-        molecule_list(alive,is)%zcom = &
-             molecule_list(alive,is)%zcom + box_list(this_box)%length(3,3)
-        atom_list(:,alive,is)%rzp = atom_list(:,alive,is)%rzp + box_list(this_box)%length(3,3)
+        molecule_list(alive,is)%rcom(3) = &
+             molecule_list(alive,is)%rcom(3) + box_list(this_box)%length(3,3)
+        atom_list(:,alive,is)%rp(3) = atom_list(:,alive,is)%rp(3) + box_list(this_box)%length(3,3)
      END IF
 
 
   ELSE
 
-  thisx = molecule_list(alive,is)%xcom
-  thisy = molecule_list(alive,is)%ycom
-  thisz = molecule_list(alive,is)%zcom
+  thisx = molecule_list(alive,is)%rcom(1)
+  thisy = molecule_list(alive,is)%rcom(2)
+  thisz = molecule_list(alive,is)%rcom(3)
 
   CALL Cartesian_To_Fractional(thisx,thisy,thisz,frac_comx, frac_comy, frac_comz, this_box)
 
@@ -335,27 +335,27 @@ SUBROUTINE Fold_Molecule_In_Fractional_Coords(this_im, this_is,this_box)
 INTEGER :: i
 
 
-     thisx=molecule_list(this_im,this_is)%xcom
-     thisy=molecule_list(this_im,this_is)%ycom
-     thisz=molecule_list(this_im,this_is)%zcom
+     thisx=molecule_list(this_im,this_is)%rcom(1)
+     thisy=molecule_list(this_im,this_is)%rcom(2)
+     thisz=molecule_list(this_im,this_is)%rcom(3)
 
      CALL Apply_PBC_Anint(this_box,thisx,thisy,thisz,thisx2,thisy2,thisz2)
      
-     molecule_list(this_im,this_is)%xcom = thisx2
-     molecule_list(this_im,this_is)%ycom = thisy2
-     molecule_list(this_im,this_is)%zcom = thisz2
+     molecule_list(this_im,this_is)%rcom(1) = thisx2
+     molecule_list(this_im,this_is)%rcom(2) = thisy2
+     molecule_list(this_im,this_is)%rcom(3) = thisz2
 
      DO i = 1, natoms(this_is)
 
-        thisx=atom_list(i,this_im,this_is)%rxp
-        thisy=atom_list(i,this_im,this_is)%ryp
-        thisz=atom_list(i,this_im,this_is)%rzp
+        thisx=atom_list(i,this_im,this_is)%rp(1)
+        thisy=atom_list(i,this_im,this_is)%rp(2)
+        thisz=atom_list(i,this_im,this_is)%rp(3)
 
         CALL Apply_PBC_Anint(this_box,thisx,thisy,thisz,thisx2,thisy2,thisz2)
 
-        atom_list(i,this_im,this_is)%rxp = thisx2
-        atom_list(i,this_im,this_is)%ryp = thisy2
-        atom_list(i,this_im,this_is)%rzp = thisz2
+        atom_list(i,this_im,this_is)%rp(1) = thisx2
+        atom_list(i,this_im,this_is)%rp(2) = thisy2
+        atom_list(i,this_im,this_is)%rp(3) = thisz2
 
      END DO
 
diff --git a/Src/minimum_image_separation.f90 b/Src/minimum_image_separation.f90
index c9a81a67..c0dad7da 100755
--- a/Src/minimum_image_separation.f90
+++ b/Src/minimum_image_separation.f90
@@ -187,49 +187,49 @@ SUBROUTINE Fold_Molecule(alive,is,this_box)
 
   IF (l_cubic(this_box)) THEN
      
-     IF(this_molecule%xcom .GT. box_list(this_box)%hlength(1,1)) THEN
-        this_molecule%xcom = &
-             this_molecule%xcom - box_list(this_box)%length(1,1)
-        these_atoms%rxp = these_atoms%rxp - box_list(this_box)%length(1,1)
+     IF(this_molecule%rcom(1) .GT. box_list(this_box)%hlength(1,1)) THEN
+        this_molecule%rcom(1) = &
+             this_molecule%rcom(1) - box_list(this_box)%length(1,1)
+        these_atoms%rp(1) = these_atoms%rp(1) - box_list(this_box)%length(1,1)
         
-     ELSE IF(this_molecule%xcom .LT. -box_list(this_box)%hlength(1,1)) THEN
-        this_molecule%xcom = &
-             this_molecule%xcom + box_list(this_box)%length(1,1)
-        these_atoms%rxp = these_atoms%rxp + box_list(this_box)%length(1,1)
+     ELSE IF(this_molecule%rcom(1) .LT. -box_list(this_box)%hlength(1,1)) THEN
+        this_molecule%rcom(1) = &
+             this_molecule%rcom(1) + box_list(this_box)%length(1,1)
+        these_atoms%rp(1) = these_atoms%rp(1) + box_list(this_box)%length(1,1)
      END IF
 
-     IF(this_molecule%ycom .GT. box_list(this_box)%hlength(2,2)) THEN
-        this_molecule%ycom = &
-             this_molecule%ycom - box_list(this_box)%length(2,2)
-        these_atoms%ryp = these_atoms%ryp - box_list(this_box)%length(2,2)
+     IF(this_molecule%rcom(2) .GT. box_list(this_box)%hlength(2,2)) THEN
+        this_molecule%rcom(2) = &
+             this_molecule%rcom(2) - box_list(this_box)%length(2,2)
+        these_atoms%rp(2) = these_atoms%rp(2) - box_list(this_box)%length(2,2)
 
-     ELSE IF(this_molecule%ycom .LT. -box_list(this_box)%hlength(2,2)) THEN
+     ELSE IF(this_molecule%rcom(2) .LT. -box_list(this_box)%hlength(2,2)) THEN
 
-        this_molecule%ycom = &
-             this_molecule%ycom + box_list(this_box)%length(2,2)
-        these_atoms%ryp = these_atoms%ryp + box_list(this_box)%length(2,2)
+        this_molecule%rcom(2) = &
+             this_molecule%rcom(2) + box_list(this_box)%length(2,2)
+        these_atoms%rp(2) = these_atoms%rp(2) + box_list(this_box)%length(2,2)
 
      END IF
 
-     IF(this_molecule%zcom .GT. box_list(this_box)%hlength(3,3)) THEN
+     IF(this_molecule%rcom(3) .GT. box_list(this_box)%hlength(3,3)) THEN
 
-        this_molecule%zcom = &
-             this_molecule%zcom - box_list(this_box)%length(3,3)
-        these_atoms%rzp = these_atoms%rzp - box_list(this_box)%length(3,3)
+        this_molecule%rcom(3) = &
+             this_molecule%rcom(3) - box_list(this_box)%length(3,3)
+        these_atoms%rp(3) = these_atoms%rp(3) - box_list(this_box)%length(3,3)
         
-     ELSE IF(this_molecule%zcom .LT. -box_list(this_box)%hlength(3,3)) THEN
+     ELSE IF(this_molecule%rcom(3) .LT. -box_list(this_box)%hlength(3,3)) THEN
         
-        this_molecule%zcom = &
-             this_molecule%zcom + box_list(this_box)%length(3,3)
-        these_atoms%rzp = these_atoms%rzp + box_list(this_box)%length(3,3)
+        this_molecule%rcom(3) = &
+             this_molecule%rcom(3) + box_list(this_box)%length(3,3)
+        these_atoms%rp(3) = these_atoms%rp(3) + box_list(this_box)%length(3,3)
      END IF
 
 
   ELSE
 
-     thisx = this_molecule%xcom
-     thisy = this_molecule%ycom
-     thisz = this_molecule%zcom
+     thisx = this_molecule%rcom(1)
+     thisy = this_molecule%rcom(2)
+     thisz = this_molecule%rcom(3)
   
 
      CALL Cartesian_To_Fractional(thisx,thisy,thisz,frac_comx, frac_comy, frac_comz, this_box)
@@ -239,34 +239,34 @@ SUBROUTINE Fold_Molecule(alive,is,this_box)
         frac_comx = frac_comx-1.0_DP
 !        CALL Fold_Molecule_In_Fractional_Coords(alive,is,this_box)  
         ! store old COM coordinates for displacement
-        xcom_old = this_molecule%xcom
-        ycom_old = this_molecule%ycom
-        zcom_old = this_molecule%zcom
+        xcom_old = this_molecule%rcom(1)
+        ycom_old = this_molecule%rcom(2)
+        zcom_old = this_molecule%rcom(3)
 
         CALL Fractional_To_Cartesian(frac_comx, frac_comy, frac_comz, thisx, thisy, thisz,this_box)
         ! update COM
-        this_molecule%xcom = thisx
-        this_molecule%ycom = thisy
-        this_molecule%zcom = thisz
+        this_molecule%rcom(1) = thisx
+        this_molecule%rcom(2) = thisy
+        this_molecule%rcom(3) = thisz
 
         ! move atomic coordinates
-        these_atoms%rxp = these_atoms%rxp + &
-             this_molecule%xcom - xcom_old
-        these_atoms%ryp = these_atoms%ryp + &
-             this_molecule%ycom - ycom_old
-        these_atoms%rzp = these_atoms%rzp + &
-             this_molecule%zcom - zcom_old
+        these_atoms%rp(1) = these_atoms%rp(1) + &
+             this_molecule%rcom(1) - xcom_old
+        these_atoms%rp(2) = these_atoms%rp(2) + &
+             this_molecule%rcom(2) - ycom_old
+        these_atoms%rp(3) = these_atoms%rp(3) + &
+             this_molecule%rcom(3) - zcom_old
 
 !     DO i = 1, natoms(is)
-!        thisiax=atom_list(i,alive,is)%rxp
-!        thisiay=atom_list(i,alive,is)%ryp
-!        thisiaz=atom_list(i,alive,is)%rzp
+!        thisiax=atom_list(i,alive,is)%rp(1)
+!        thisiay=atom_list(i,alive,is)%rp(2)
+!        thisiaz=atom_list(i,alive,is)%rp(3)
 !        CALL Cartesian_To_Fractional(thisiax,thisiay,thisiaz,fraciax,fraciay,fraciaz,this_box)
 !        fraciax = fraciax-1.0_DP
 !        CALL Fractional_To_Cartesian(fraciax,fraciay,fraciaz,thisiax,thisiay,thisiaz,this_box)
-!        atom_list(i,alive,is)%rxp = thisiax
-!        atom_list(i,alive,is)%ryp = thisiay
-!        atom_list(i,alive,is)%rzp = thisiaz
+!        atom_list(i,alive,is)%rp(1) = thisiax
+!        atom_list(i,alive,is)%rp(2) = thisiay
+!        atom_list(i,alive,is)%rp(3) = thisiaz
 !     END DO
  
 
@@ -275,34 +275,34 @@ SUBROUTINE Fold_Molecule(alive,is,this_box)
 !        CALL Fold_Molecule_In_Fractional_Coords(alive,is, this_box)
         frac_comx = frac_comx+1.0_DP
 
-        xcom_old = this_molecule%xcom
-        ycom_old = this_molecule%ycom
-        zcom_old = this_molecule%zcom
+        xcom_old = this_molecule%rcom(1)
+        ycom_old = this_molecule%rcom(2)
+        zcom_old = this_molecule%rcom(3)
 
         CALL Fractional_To_Cartesian(frac_comx, frac_comy, frac_comz, thisx, thisy, thisz,this_box)
         
-        this_molecule%xcom = thisx
-        this_molecule%ycom = thisy
-        this_molecule%zcom = thisz
+        this_molecule%rcom(1) = thisx
+        this_molecule%rcom(2) = thisy
+        this_molecule%rcom(3) = thisz
 
-        these_atoms%rxp = these_atoms%rxp + &
-             this_molecule%xcom - xcom_old
-        these_atoms%ryp = these_atoms%ryp + &
-             this_molecule%ycom - ycom_old
-        these_atoms%rzp = these_atoms%rzp + &
-             this_molecule%zcom - zcom_old
+        these_atoms%rp(1) = these_atoms%rp(1) + &
+             this_molecule%rcom(1) - xcom_old
+        these_atoms%rp(2) = these_atoms%rp(2) + &
+             this_molecule%rcom(2) - ycom_old
+        these_atoms%rp(3) = these_atoms%rp(3) + &
+             this_molecule%rcom(3) - zcom_old
 
 
 !     DO i = 1, natoms(is)
-!        thisiax=atom_list(i,alive,is)%rxp
-!        thisiay=atom_list(i,alive,is)%ryp
-!        thisiaz=atom_list(i,alive,is)%rzp
+!        thisiax=atom_list(i,alive,is)%rp(1)
+!        thisiay=atom_list(i,alive,is)%rp(2)
+!        thisiaz=atom_list(i,alive,is)%rp(3)
 !        CALL Cartesian_To_Fractional(thisiax,thisiay,thisiaz,fraciax,fraciay,fraciaz,this_box)
 !        fraciax = fraciax+1.0_DP
 !        CALL Fractional_To_Cartesian(fraciax,fraciay,fraciaz,thisiax,thisiay,thisiaz,this_box)
-!        atom_list(i,alive,is)%rxp = thisiax
-!        atom_list(i,alive,is)%ryp = thisiay
-!        atom_list(i,alive,is)%rzp = thisiaz
+!        atom_list(i,alive,is)%rp(1) = thisiax
+!        atom_list(i,alive,is)%rp(2) = thisiay
+!        atom_list(i,alive,is)%rp(3) = thisiaz
 !     END DO
 
      END IF
@@ -312,64 +312,64 @@ SUBROUTINE Fold_Molecule(alive,is,this_box)
      IF(frac_comy .GT. 0.5) THEN
         
 !        CALL Fold_Molecule_In_Fractional_Coords(alive,is,this_box)  
-        xcom_old = this_molecule%xcom
-        ycom_old = this_molecule%ycom
-        zcom_old = this_molecule%zcom
+        xcom_old = this_molecule%rcom(1)
+        ycom_old = this_molecule%rcom(2)
+        zcom_old = this_molecule%rcom(3)
 
         frac_comy = frac_comy-1.0_DP
         CALL Fractional_To_Cartesian(frac_comx, frac_comy, frac_comz, thisx, thisy, thisz,this_box)
-        this_molecule%xcom = thisx
-        this_molecule%ycom = thisy
-        this_molecule%zcom = thisz
+        this_molecule%rcom(1) = thisx
+        this_molecule%rcom(2) = thisy
+        this_molecule%rcom(3) = thisz
         
-        these_atoms%rxp = these_atoms%rxp + &
-             this_molecule%xcom - xcom_old
-        these_atoms%ryp = these_atoms%ryp + &
-             this_molecule%ycom - ycom_old
-        these_atoms%rzp = these_atoms%rzp + &
-             this_molecule%zcom - zcom_old
+        these_atoms%rp(1) = these_atoms%rp(1) + &
+             this_molecule%rcom(1) - xcom_old
+        these_atoms%rp(2) = these_atoms%rp(2) + &
+             this_molecule%rcom(2) - ycom_old
+        these_atoms%rp(3) = these_atoms%rp(3) + &
+             this_molecule%rcom(3) - zcom_old
 
 !     DO i = 1, natoms(is)
-!        thisiax=atom_list(i,alive,is)%rxp
-!        thisiay=atom_list(i,alive,is)%ryp
-!        thisiaz=atom_list(i,alive,is)%rzp
+!        thisiax=atom_list(i,alive,is)%rp(1)
+!        thisiay=atom_list(i,alive,is)%rp(2)
+!        thisiaz=atom_list(i,alive,is)%rp(3)
 !        CALL Cartesian_To_Fractional(thisiax,thisiay,thisiaz,fraciax,fraciay,fraciaz,this_box)
 !        fraciay = fraciay-1.0_DP
 !        CALL Fractional_To_Cartesian(fraciax,fraciay,fraciaz,thisiax,thisiay,thisiaz,this_box)
-!        atom_list(i,alive,is)%rxp = thisiax
-!        atom_list(i,alive,is)%ryp = thisiay
-!        atom_list(i,alive,is)%rzp = thisiaz
+!        atom_list(i,alive,is)%rp(1) = thisiax
+!        atom_list(i,alive,is)%rp(2) = thisiay
+!        atom_list(i,alive,is)%rp(3) = thisiaz
 !     END DO
 
      ELSE IF(frac_comy .LT. -0.5) THEN
 !        CALL Fold_Molecu  le_In_Fractional_Coords(alive,is,this_box)  
-        xcom_old = this_molecule%xcom
-        ycom_old = this_molecule%ycom
-        zcom_old = this_molecule%zcom
+        xcom_old = this_molecule%rcom(1)
+        ycom_old = this_molecule%rcom(2)
+        zcom_old = this_molecule%rcom(3)
         frac_comy = frac_comy+1.0_DP
         
         CALL Fractional_To_Cartesian(frac_comx, frac_comy, frac_comz, thisx, thisy, thisz,this_box)
-        this_molecule%xcom = thisx
-        this_molecule%ycom = thisy
-        this_molecule%zcom = thisz
+        this_molecule%rcom(1) = thisx
+        this_molecule%rcom(2) = thisy
+        this_molecule%rcom(3) = thisz
 
-        these_atoms%rxp = these_atoms%rxp + &
-             this_molecule%xcom - xcom_old
-        these_atoms%ryp = these_atoms%ryp + &
-             this_molecule%ycom - ycom_old
-        these_atoms%rzp = these_atoms%rzp + &
-             this_molecule%zcom - zcom_old
+        these_atoms%rp(1) = these_atoms%rp(1) + &
+             this_molecule%rcom(1) - xcom_old
+        these_atoms%rp(2) = these_atoms%rp(2) + &
+             this_molecule%rcom(2) - ycom_old
+        these_atoms%rp(3) = these_atoms%rp(3) + &
+             this_molecule%rcom(3) - zcom_old
 
 !     DO i = 1, natoms(is)
-!        thisiax=atom_list(i,alive,is)%rxp
-!        thisiay=atom_list(i,alive,is)%ryp
-!        thisiaz=atom_list(i,alive,is)%rzp
+!        thisiax=atom_list(i,alive,is)%rp(1)
+!        thisiay=atom_list(i,alive,is)%rp(2)
+!        thisiaz=atom_list(i,alive,is)%rp(3)
 !        CALL Cartesian_To_Fractional(thisiax,thisiay,thisiaz,fraciax,fraciay,fraciaz,this_box)
 !        fraciay = fraciay+1.0_DP
 !        CALL Fractional_To_Cartesian(fraciax,fraciay,fraciaz,thisiax,thisiay,thisiaz,this_box)
-!        atom_list(i,alive,is)%rxp = thisiax
-!        atom_list(i,alive,is)%ryp = thisiay
-!        atom_list(i,alive,is)%rzp = thisiaz
+!        atom_list(i,alive,is)%rp(1) = thisiax
+!        atom_list(i,alive,is)%rp(2) = thisiay
+!        atom_list(i,alive,is)%rp(3) = thisiaz
 !     END DO
 
 
@@ -378,68 +378,68 @@ SUBROUTINE Fold_Molecule(alive,is,this_box)
      IF(frac_comz .GT. 0.5) THEN
 
 !        CALL Fold_Molecule_In_Fractional_Coords(alive,is,this_box)  
-        xcom_old = this_molecule%xcom
-        ycom_old = this_molecule%ycom
-        zcom_old = this_molecule%zcom
+        xcom_old = this_molecule%rcom(1)
+        ycom_old = this_molecule%rcom(2)
+        zcom_old = this_molecule%rcom(3)
     
         frac_comz = frac_comz-1.0_DP
         CALL Fractional_To_Cartesian(frac_comx, frac_comy, frac_comz, thisx, thisy, thisz,this_box)
-        this_molecule%xcom = thisx
-        this_molecule%ycom = thisy
-        this_molecule%zcom = thisz
+        this_molecule%rcom(1) = thisx
+        this_molecule%rcom(2) = thisy
+        this_molecule%rcom(3) = thisz
         
-        these_atoms%rxp = these_atoms%rxp + &
-             this_molecule%xcom - xcom_old
-        these_atoms%ryp = these_atoms%ryp + &
-             this_molecule%ycom - ycom_old
-        these_atoms%rzp = these_atoms%rzp + &
-             this_molecule%zcom - zcom_old
+        these_atoms%rp(1) = these_atoms%rp(1) + &
+             this_molecule%rcom(1) - xcom_old
+        these_atoms%rp(2) = these_atoms%rp(2) + &
+             this_molecule%rcom(2) - ycom_old
+        these_atoms%rp(3) = these_atoms%rp(3) + &
+             this_molecule%rcom(3) - zcom_old
 
 !     DO i = 1, natoms(is)
-!        thisiax=atom_list(i,alive,is)%rxp
-!        thisiay=atom_list(i,alive,is)%ryp
-!        thisiaz=atom_list(i,alive,is)%rzp
+!        thisiax=atom_list(i,alive,is)%rp(1)
+!        thisiay=atom_list(i,alive,is)%rp(2)
+!        thisiaz=atom_list(i,alive,is)%rp(3)
 !        CALL Cartesian_To_Fractional(thisiax,thisiay,thisiaz,fraciax,fraciay,fraciaz,this_box)
 !        fraciaz = fraciaz-1.0_DP
 !        CALL Fractional_To_Cartesian(fraciax,fraciay,fraciaz,thisiax,thisiay,thisiaz,this_box)
-!        atom_list(i,alive,is)%rxp = thisiax
-!        atom_list(i,alive,is)%ryp = thisiay
-!        atom_list(i,alive,is)%rzp = thisiaz
+!        atom_list(i,alive,is)%rp(1) = thisiax
+!        atom_list(i,alive,is)%rp(2) = thisiay
+!        atom_list(i,alive,is)%rp(3) = thisiaz
 !     END DO
 
      ELSE IF(frac_comz .LT. -0.5) THEN
 
 !        CALL Fold_Molecule_In_Fractional_Coords(alive,is,this_box)  
-        xcom_old = this_molecule%xcom
-        ycom_old = this_molecule%ycom
-        zcom_old = this_molecule%zcom
+        xcom_old = this_molecule%rcom(1)
+        ycom_old = this_molecule%rcom(2)
+        zcom_old = this_molecule%rcom(3)
 
         frac_comz = frac_comz+1.0_DP
 
         CALL Fractional_To_Cartesian(frac_comx, frac_comy, frac_comz, thisx, thisy, thisz,this_box)
-        this_molecule%xcom = thisx
-        this_molecule%ycom = thisy
-        this_molecule%zcom = thisz
+        this_molecule%rcom(1) = thisx
+        this_molecule%rcom(2) = thisy
+        this_molecule%rcom(3) = thisz
 
-        these_atoms%rxp = these_atoms%rxp + &
-             this_molecule%xcom - xcom_old
-        these_atoms%ryp = these_atoms%ryp + &
-             this_molecule%ycom - ycom_old
-        these_atoms%rzp = these_atoms%rzp + &
-             this_molecule%zcom - zcom_old
+        these_atoms%rp(1) = these_atoms%rp(1) + &
+             this_molecule%rcom(1) - xcom_old
+        these_atoms%rp(2) = these_atoms%rp(2) + &
+             this_molecule%rcom(2) - ycom_old
+        these_atoms%rp(3) = these_atoms%rp(3) + &
+             this_molecule%rcom(3) - zcom_old
 
 
 
 !     DO i = 1, natoms(is)
-!        thisiax=atom_list(i,alive,is)%rxp
-!        thisiay=atom_list(i,alive,is)%ryp
-!        thisiaz=atom_list(i,alive,is)%rzp
+!        thisiax=atom_list(i,alive,is)%rp(1)
+!        thisiay=atom_list(i,alive,is)%rp(2)
+!        thisiaz=atom_list(i,alive,is)%rp(3)
 !        CALL Cartesian_To_Fractional(thisiax,thisiay,thisiaz,fraciax,fraciay,fraciaz,this_box)
 !        fraciaz = fraciaz+1.0_DP
 !        CALL Fractional_To_Cartesian(fraciax,fraciay,fraciaz,thisiax,thisiay,thisiaz,this_box)
-!        atom_list(i,alive,is)%rxp = thisiax
-!        atom_list(i,alive,is)%ryp = thisiay
-!        atom_list(i,alive,is)%rzp = thisiaz
+!        atom_list(i,alive,is)%rp(1) = thisiax
+!        atom_list(i,alive,is)%rp(2) = thisiay
+!        atom_list(i,alive,is)%rp(3) = thisiaz
 !     END DO
      END IF
 
diff --git a/Src/move_angle.f90 b/Src/move_angle.f90
index f92593f3..38fd918f 100755
--- a/Src/move_angle.f90
+++ b/Src/move_angle.f90
@@ -76,7 +76,7 @@ SUBROUTINE Angle_Distortion
 
   ! Pair Energy variables
 
-  REAL(DP), ALLOCATABLE :: cos_mol_old(:),sin_mol_old(:)
+  !REAL(DP), ALLOCATABLE :: cos_mol_old(:),sin_mol_old(:)
   INTEGER :: position
   inter_overlap = .false.
   intra_overlap = .false.
@@ -257,27 +257,27 @@ SUBROUTINE Angle_Distortion
   
   ! Move all the atoms with respect to iatom2
   
-  iatom2_rxp = atom_list(iatom2,lm,is)%rxp
-  iatom2_ryp = atom_list(iatom2,lm,is)%ryp
-  iatom2_rzp = atom_list(iatom2,lm,is)%rzp
+  iatom2_rxp = atom_list(iatom2,lm,is)%rp(1)
+  iatom2_ryp = atom_list(iatom2,lm,is)%rp(2)
+  iatom2_rzp = atom_list(iatom2,lm,is)%rp(3)
   
-  atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp - iatom2_rxp
-  atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp - iatom2_ryp
-  atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp - iatom2_rzp
+  atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) - iatom2_rxp
+  atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) - iatom2_ryp
+  atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) - iatom2_rzp
   
   ! We will generate a perpendicular frame at atom2 such that x - axis
   ! is aligned along iatom2 --- > iatom3 and y - axis is in the plane
   ! defined by iatom1 - iatom2 - iatom3. We will assume that iatom1 is moving.
   
-  vec23(1) = atom_list(iatom3,lm,is)%rxp - atom_list(iatom2,lm,is)%rxp
-  vec23(2) = atom_list(iatom3,lm,is)%ryp - atom_list(iatom2,lm,is)%ryp
-  vec23(3) = atom_list(iatom3,lm,is)%rzp - atom_list(iatom2,lm,is)%rzp
+  vec23(1) = atom_list(iatom3,lm,is)%rp(1) - atom_list(iatom2,lm,is)%rp(1)
+  vec23(2) = atom_list(iatom3,lm,is)%rp(2) - atom_list(iatom2,lm,is)%rp(2)
+  vec23(3) = atom_list(iatom3,lm,is)%rp(3) - atom_list(iatom2,lm,is)%rp(3)
 
   ! vector from iatom2 to iatom1
 
-  vec21(1) = atom_list(iatom1,lm,is)%rxp - atom_list(iatom2,lm,is)%rxp
-  vec21(2) = atom_list(iatom1,lm,is)%ryp - atom_list(iatom2,lm,is)%ryp
-  vec21(3) = atom_list(iatom1,lm,is)%rzp - atom_list(iatom2,lm,is)%rzp
+  vec21(1) = atom_list(iatom1,lm,is)%rp(1) - atom_list(iatom2,lm,is)%rp(1)
+  vec21(2) = atom_list(iatom1,lm,is)%rp(2) - atom_list(iatom2,lm,is)%rp(2)
+  vec21(3) = atom_list(iatom1,lm,is)%rp(3) - atom_list(iatom2,lm,is)%rp(3)
 
   ! Normalize these vectors
 
@@ -320,15 +320,15 @@ SUBROUTINE Angle_Distortion
      
      this_atom = atoms_to_place_list(j)
 
-     tempx = atom_list(this_atom,lm,is)%rxp
-     tempy = atom_list(this_atom,lm,is)%ryp
-     tempz = atom_list(this_atom,lm,is)%rzp
+     tempx = atom_list(this_atom,lm,is)%rp(1)
+     tempy = atom_list(this_atom,lm,is)%rp(2)
+     tempz = atom_list(this_atom,lm,is)%rp(3)
 
-     atom_list(this_atom,lm,is)%rxp = tempx * aligner(1,1) + tempy * aligner(1,2) + &
+     atom_list(this_atom,lm,is)%rp(1) = tempx * aligner(1,1) + tempy * aligner(1,2) + &
           tempz * aligner(1,3)
-     atom_list(this_atom,lm,is)%ryp = tempx * aligner(2,1) + tempy * aligner(2,2) + &
+     atom_list(this_atom,lm,is)%rp(2) = tempx * aligner(2,1) + tempy * aligner(2,2) + &
           tempz * aligner(2,3)
-     atom_list(this_atom,lm,is)%rzp = tempx * aligner(3,1) + tempy * aligner(3,2) + &
+     atom_list(this_atom,lm,is)%rp(3) = tempx * aligner(3,1) + tempy * aligner(3,2) + &
           tempz * aligner(3,3)
 
   END DO
@@ -342,7 +342,7 @@ SUBROUTINE Angle_Distortion
   ! is to be performed in the counter clockwise direction if the atom1 is positioned
   ! in the 3rd or 4th quadrant.
 
-  IF ( atom_list(iatom1,lm,is)%ryp >= 0.0_DP ) THEN
+  IF ( atom_list(iatom1,lm,is)%rp(2) >= 0.0_DP ) THEN
      delta_theta = -delta_theta
   END IF
 
@@ -354,11 +354,11 @@ SUBROUTINE Angle_Distortion
 
      this_atom = atoms_to_place_list(j)
 
-     tempx = atom_list(this_atom,lm,is)%rxp
-     tempy = atom_list(this_atom,lm,is)%ryp
+     tempx = atom_list(this_atom,lm,is)%rp(1)
+     tempy = atom_list(this_atom,lm,is)%rp(2)
      
-     atom_list(this_atom,lm,is)%rxp = tempx * cos_dtheta + tempy * sin_dtheta
-     atom_list(this_atom,lm,is)%ryp = -tempx * sin_dtheta + tempy * cos_dtheta
+     atom_list(this_atom,lm,is)%rp(1) = tempx * cos_dtheta + tempy * sin_dtheta
+     atom_list(this_atom,lm,is)%rp(2) = -tempx * sin_dtheta + tempy * cos_dtheta
 
   END DO
 
@@ -382,22 +382,22 @@ SUBROUTINE Angle_Distortion
 
      this_atom = atoms_to_place_list(j)
 
-     tempx = atom_list(this_atom,lm,is)%rxp
-     tempy = atom_list(this_atom,lm,is)%ryp
-     tempz = atom_list(this_atom,lm,is)%rzp
+     tempx = atom_list(this_atom,lm,is)%rp(1)
+     tempy = atom_list(this_atom,lm,is)%rp(2)
+     tempz = atom_list(this_atom,lm,is)%rp(3)
 
-     atom_list(this_atom,lm,is)%rxp = tempx * hanger(1,1) + tempy * hanger(1,2) + &
+     atom_list(this_atom,lm,is)%rp(1) = tempx * hanger(1,1) + tempy * hanger(1,2) + &
           tempz * hanger(1,3)
-     atom_list(this_atom,lm,is)%ryp = tempx * hanger(2,1) + tempy * hanger(2,2) + &
+     atom_list(this_atom,lm,is)%rp(2) = tempx * hanger(2,1) + tempy * hanger(2,2) + &
           tempz * hanger(2,3)
-     atom_list(this_atom,lm,is)%rzp = tempx * hanger(3,1) + tempy * hanger(3,2) + &
+     atom_list(this_atom,lm,is)%rp(3) = tempx * hanger(3,1) + tempy * hanger(3,2) + &
           tempz * hanger(3,3)
 
   END DO
 
-  atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp + iatom2_rxp
-  atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp + iatom2_ryp
-  atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp + iatom2_rzp
+  atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) + iatom2_rxp
+  atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) + iatom2_ryp
+  atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) + iatom2_rzp
 
   ! Calculate the energies after the move. First compute intramolecular and intermolecular
   ! nonbonded interactions so that the move can be immediately rejected if an overlap is detected.
@@ -458,11 +458,10 @@ SUBROUTINE Angle_Distortion
 
      IF ( int_charge_sum_style(ibox) == charge_ewald .and. has_charge(is)) THEN
         
-        ALLOCATE(cos_mol_old(nvecs(ibox)),sin_mol_old(nvecs(ibox)))
-        CALL Get_Position_Alive(lm,is,position)
+        !ALLOCATE(cos_mol_old(nvecs(ibox)),sin_mol_old(nvecs(ibox)))
         
-        cos_mol_old(:) = cos_mol(1:nvecs(ibox),position)
-        sin_mol_old(:) = sin_mol(1:nvecs(ibox),position)
+        !cos_mol(1:nvecs(ibox),0) = cos_mol(1:nvecs(ibox),position)
+        !sin_mol(1:nvecs(ibox),0) = sin_mol(1:nvecs(ibox),position)
         
         CALL Update_System_Ewald_Reciprocal_Energy(lm,is,ibox, &
              int_intra,E_reciprocal_move)
@@ -506,8 +505,8 @@ SUBROUTINE Angle_Distortion
 
         CALL Fold_Molecule(lm,is,ibox)
         
-        IF(ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
-        IF(ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
+        !IF(ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
+        !IF(ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
         
         IF (l_pair_nrg) DEALLOCATE(pair_vdw_temp,pair_qq_temp)
         
@@ -522,15 +521,17 @@ SUBROUTINE Angle_Distortion
         
         IF (int_charge_sum_style(ibox) == charge_ewald .AND. has_charge(is)) THEN
            ! Also reset the old cos_sum and sin_sum for reciprocal space vectors
+           CALL Get_Position_Alive(lm,is,position)
            !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-           cos_sum(:,ibox) = cos_sum_old(:,ibox)
-           sin_sum(:,ibox) = sin_sum_old(:,ibox)
+           box_list(ibox)%sincos_sum = box_list(ibox)%sincos_sum_old
+           !cos_sum(:,ibox) = cos_sum_old(:,ibox)
+           !sin_sum(:,ibox) = sin_sum_old(:,ibox)
            
-           cos_mol(1:nvecs(ibox),position) = cos_mol_old(:)
-           sin_mol(1:nvecs(ibox),position) = sin_mol_old(:)
+           cos_mol(1:nvecs(ibox),position) = cos_mol(1:nvecs(ibox),0)
+           sin_mol(1:nvecs(ibox),position) = sin_mol(1:nvecs(ibox),0)
            !$OMP END PARALLEL WORKSHARE
            
-           DEALLOCATE(cos_mol_old,sin_mol_old)
+           !DEALLOCATE(cos_mol_old,sin_mol_old)
 
         END IF
         
diff --git a/Src/move_atom.f90 b/Src/move_atom.f90
index 7bc78ac5..7a50575b 100755
--- a/Src/move_atom.f90
+++ b/Src/move_atom.f90
@@ -73,6 +73,7 @@ SUBROUTINE Atom_Displacement
   INTEGER :: ibox, ndisp_species, is, im
   INTEGER :: lm, this_atom, iatom, ref_atom, nmolecules_species, mcstep
   INTEGER :: nmols_tot, nmols_box(nbr_boxes)
+  INTEGER :: im_locate
 
   INTEGER, DIMENSION(:), ALLOCATABLE :: species_id
 
@@ -187,9 +188,9 @@ SUBROUTINE Atom_Displacement
   ref_atom = species_list(is)%disp_atom_ref(this_atom)
 !  write(*,*) iatom, ref_atom
 !  write(*,*) atom_list(iatom,lm,is)
-  atom_list(iatom,lm,is)%rxp = atom_list(iatom,lm,is)%rxp - atom_list(ref_atom,lm,is)%rxp
-  atom_list(iatom,lm,is)%ryp = atom_list(iatom,lm,is)%ryp - atom_list(ref_atom,lm,is)%ryp
-  atom_list(iatom,lm,is)%rzp = atom_list(iatom,lm,is)%rzp - atom_list(ref_atom,lm,is)%rzp
+  atom_list(iatom,lm,is)%rp(1) = atom_list(iatom,lm,is)%rp(1) - atom_list(ref_atom,lm,is)%rp(1)
+  atom_list(iatom,lm,is)%rp(2) = atom_list(iatom,lm,is)%rp(2) - atom_list(ref_atom,lm,is)%rp(2)
+  atom_list(iatom,lm,is)%rp(3) = atom_list(iatom,lm,is)%rp(3) - atom_list(ref_atom,lm,is)%rp(3)
 
   ! Displace iatom
 
@@ -208,9 +209,9 @@ SUBROUTINE Atom_Displacement
 
   ! change coordinates of iatom to lab frame of reference
 
-  atom_list(iatom,lm,is)%rxp = atom_list(iatom,lm,is)%rxp + atom_list(ref_atom,lm,is)%rxp
-  atom_list(iatom,lm,is)%ryp = atom_list(iatom,lm,is)%ryp + atom_list(ref_atom,lm,is)%ryp
-  atom_list(iatom,lm,is)%rzp = atom_list(iatom,lm,is)%rzp + atom_list(ref_atom,lm,is)%rzp
+  atom_list(iatom,lm,is)%rp(1) = atom_list(iatom,lm,is)%rp(1) + atom_list(ref_atom,lm,is)%rp(1)
+  atom_list(iatom,lm,is)%rp(2) = atom_list(iatom,lm,is)%rp(2) + atom_list(ref_atom,lm,is)%rp(2)
+  atom_list(iatom,lm,is)%rp(3) = atom_list(iatom,lm,is)%rp(3) + atom_list(ref_atom,lm,is)%rp(3)
 !   write(*,*) atom_list(iatom,lm,is)
   CALL Get_COM(lm,is)
   CALL Compute_Max_COM_Distance(lm,is)
@@ -292,8 +293,18 @@ SUBROUTINE Atom_Displacement
      ! to here except restore the cos_sum and sin_sum arrays
 
      IF (int_charge_sum_style(ibox) == charge_ewald) THEN
-        cos_sum(:,ibox) = cos_sum_old(:,ibox)
-        sin_sum(:,ibox) = sin_sum_old(:,ibox)
+        IF (is .EQ. 1) THEN
+                im_locate = im
+        ELSE
+                im_locate = SUM(max_molecules(1:is-1)) + im
+        END IF
+        !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+        box_list(ibox)%sincos_sum = box_list(ibox)%sincos_sum_old
+        cos_mol(1:nvecs(ibox),im_locate) = cos_mol(1:nvecs(ibox),0)
+        sin_mol(1:nvecs(ibox),im_locate) = sin_mol(1:nvecs(ibox),0)
+        !$OMP END PARALLEL WORKSHARE
+        !cos_sum(:,ibox) = cos_sum_old(:,ibox)
+        !sin_sum(:,ibox) = sin_sum_old(:,ibox)
      END IF
 
   END IF
@@ -327,9 +338,9 @@ SUBROUTINE Change_Phi_Theta(this_atom,im,is,theta_bound)
 
 
   ! Get spherical coordinates
-  this_x = atom_list(this_atom,im,is)%rxp
-  this_y = atom_list(this_atom,im,is)%ryp
-  this_z = atom_list(this_atom,im,is)%rzp
+  this_x = atom_list(this_atom,im,is)%rp(1)
+  this_y = atom_list(this_atom,im,is)%rp(2)
+  this_z = atom_list(this_atom,im,is)%rp(3)
 
   rho = this_x * this_x + this_y * this_y 
   bond_length = this_z * this_z + rho
@@ -364,9 +375,9 @@ SUBROUTINE Change_Phi_Theta(this_atom,im,is,theta_bound)
   phi = phi + dphi
   
   ! new coordinates
-  atom_list(this_atom,im,is)%rxp = bond_length * DSIN(theta) * DCOS(phi)
-  atom_list(this_atom,im,is)%ryp = bond_length * DSIN(theta) * DSIN(phi)
-  atom_list(this_atom,im,is)%rzp = bond_length * DCOS(theta)
+  atom_list(this_atom,im,is)%rp(1) = bond_length * DSIN(theta) * DCOS(phi)
+  atom_list(this_atom,im,is)%rp(2) = bond_length * DSIN(theta) * DSIN(phi)
+  atom_list(this_atom,im,is)%rp(3) = bond_length * DCOS(theta)
 
 END SUBROUTINE Change_Phi_Theta
   
diff --git a/Src/move_delete.f90 b/Src/move_delete.f90
index e44315bb..d6e98d60 100755
--- a/Src/move_delete.f90
+++ b/Src/move_delete.f90
@@ -199,14 +199,14 @@ SUBROUTINE Deletion
   !   * the number of trial dihedrals, kappa_dih, for each dihedral.
 
   ln_pbias = ln_pbias + ln_pseq
-  ln_pbias = ln_pbias + DLOG(REAL(kappa_ins,DP))
+  ln_pbias = ln_pbias + species_list(is)%log_kappa_ins
 
-  IF (kappa_rot /= 0 ) THEN
-     ln_pbias = ln_pbias + DLOG(REAL(kappa_rot,DP))
+  IF (species_list(is)%kappa_rot > 0 ) THEN
+     ln_pbias = ln_pbias + species_list(is)%log_kappa_rot
   END IF
 
-  IF (kappa_dih /= 0 ) THEN
-     ln_pbias = ln_pbias + REAL(nfragments(is)-1,DP) * DLOG(REAL(kappa_dih,DP))
+  IF (species_list(is)%need_kappa_dih) THEN
+     ln_pbias = ln_pbias + species_list(is)%ln_pbias_dih_const
   END IF
   
   !*****************************************************************************
@@ -286,7 +286,7 @@ SUBROUTINE Deletion
 
      DO i = 1, natoms(is)
         i_type = nonbond_list(i,is)%atom_type_number
-        nint_beads(i_type,ibox) = nint_beads(i_type,ibox) - 1
+        IF (i_type > 0) nint_beads(i_type,ibox) = nint_beads(i_type,ibox) - 1
      END DO
 
      CALL Compute_LR_correction(ibox,e_lrc)
@@ -394,8 +394,10 @@ SUBROUTINE Deletion
            (has_charge(is)) ) THEN
         ! Restore cos_sum and sin_sum. Note that these were changed when
         ! difference in reciprocal energies was computed
-        cos_sum(:,ibox) = cos_sum_old(:,ibox)
-        sin_sum(:,ibox) = sin_sum_old(:,ibox)
+        DEALLOCATE(box_list(ibox)%sincos_sum)
+        CALL MOVE_ALLOC(box_list(ibox)%sincos_sum_old,box_list(ibox)%sincos_sum)
+        !cos_sum(:,ibox) = cos_sum_old(:,ibox)
+        !sin_sum(:,ibox) = sin_sum_old(:,ibox)
      END IF
 
      IF ( int_vdw_sum_style(ibox) == vdw_cut_tail ) THEN
diff --git a/Src/move_dihedral.f90 b/Src/move_dihedral.f90
index efa4b629..a190b5b9 100755
--- a/Src/move_dihedral.f90
+++ b/Src/move_dihedral.f90
@@ -77,7 +77,7 @@ SUBROUTINE Rotate_Dihedral
 
 !  !$ include 'omp_lib.h'
   
-  INTEGER :: ibox, is, im, dihedral_to_move, lm
+  INTEGER :: ibox, is, im, dihedral_to_move, lm, im_locate
   INTEGER :: i, j, this_atom, mcstep
   INTEGER :: atom1, atom2, atom3, atom4, iatom1, iatom2, iatom3, iatom4
   INTEGER :: natoms_to_place
@@ -278,27 +278,27 @@ SUBROUTINE Rotate_Dihedral
 
   ! Move all the atoms with respect to iatom2
 
-  iatom2_rxp = atom_list(iatom2,lm,is)%rxp
-  iatom2_ryp = atom_list(iatom2,lm,is)%ryp
-  iatom2_rzp = atom_list(iatom2,lm,is)%rzp
+  iatom2_rxp = atom_list(iatom2,lm,is)%rp(1)
+  iatom2_ryp = atom_list(iatom2,lm,is)%rp(2)
+  iatom2_rzp = atom_list(iatom2,lm,is)%rp(3)
 
-  atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp - iatom2_rxp
-  atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp - iatom2_ryp
-  atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp - iatom2_rzp
+  atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) - iatom2_rxp
+  atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) - iatom2_ryp
+  atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) - iatom2_rzp
 
   ! We will generate a perpendicular frame at atom2 such that x - axis
   ! is aligned along iatom2 --- > iatom3 and y - axis is in the plane
   ! defined by iatom1 - iatom2 - iatom3
 
-  vec23(1) = atom_list(iatom3,lm,is)%rxp - atom_list(iatom2,lm,is)%rxp
-  vec23(2) = atom_list(iatom3,lm,is)%ryp - atom_list(iatom2,lm,is)%ryp
-  vec23(3) = atom_list(iatom3,lm,is)%rzp - atom_list(iatom2,lm,is)%rzp
+  vec23(1) = atom_list(iatom3,lm,is)%rp(1) - atom_list(iatom2,lm,is)%rp(1)
+  vec23(2) = atom_list(iatom3,lm,is)%rp(2) - atom_list(iatom2,lm,is)%rp(2)
+  vec23(3) = atom_list(iatom3,lm,is)%rp(3) - atom_list(iatom2,lm,is)%rp(3)
 
   ! vector from iatom2 to iatom1
 
-  vec21(1) = atom_list(iatom1,lm,is)%rxp - atom_list(iatom2,lm,is)%rxp
-  vec21(2) = atom_list(iatom1,lm,is)%ryp - atom_list(iatom2,lm,is)%ryp
-  vec21(3) = atom_list(iatom1,lm,is)%rzp - atom_list(iatom2,lm,is)%rzp
+  vec21(1) = atom_list(iatom1,lm,is)%rp(1) - atom_list(iatom2,lm,is)%rp(1)
+  vec21(2) = atom_list(iatom1,lm,is)%rp(2) - atom_list(iatom2,lm,is)%rp(2)
+  vec21(3) = atom_list(iatom1,lm,is)%rp(3) - atom_list(iatom2,lm,is)%rp(3)
 
   ! Normalize these vectors
 
@@ -339,15 +339,15 @@ SUBROUTINE Rotate_Dihedral
      
      this_atom = atoms_to_place_list(j)
 
-     tempx = atom_list(this_atom,lm,is)%rxp
-     tempy = atom_list(this_atom,lm,is)%ryp
-     tempz = atom_list(this_atom,lm,is)%rzp
+     tempx = atom_list(this_atom,lm,is)%rp(1)
+     tempy = atom_list(this_atom,lm,is)%rp(2)
+     tempz = atom_list(this_atom,lm,is)%rp(3)
 
-     atom_list(this_atom,lm,is)%rxp = tempx * aligner(1,1) + tempy * aligner(1,2) + &
+     atom_list(this_atom,lm,is)%rp(1) = tempx * aligner(1,1) + tempy * aligner(1,2) + &
           tempz * aligner(1,3)
-     atom_list(this_atom,lm,is)%ryp = tempx * aligner(2,1) + tempy * aligner(2,2) + &
+     atom_list(this_atom,lm,is)%rp(2) = tempx * aligner(2,1) + tempy * aligner(2,2) + &
           tempz * aligner(2,3)
-     atom_list(this_atom,lm,is)%rzp = tempx * aligner(3,1) + tempy * aligner(3,2) + &
+     atom_list(this_atom,lm,is)%rp(3) = tempx * aligner(3,1) + tempy * aligner(3,2) + &
           tempz * aligner(3,3)
 
   END DO
@@ -365,13 +365,13 @@ SUBROUTINE Rotate_Dihedral
 
      this_atom = atoms_to_place_list(j)
 
-     tempy = atom_list(this_atom,lm,is)%ryp
-     tempz = atom_list(this_atom,lm,is)%rzp
+     tempy = atom_list(this_atom,lm,is)%rp(2)
+     tempz = atom_list(this_atom,lm,is)%rp(3)
 
      ! apply the transformation
      
-     atom_list(this_atom,lm,is)%ryp =  cosphi * tempy + sinphi * tempz
-     atom_list(this_atom,lm,is)%rzp = -sinphi * tempy + cosphi * tempz
+     atom_list(this_atom,lm,is)%rp(2) =  cosphi * tempy + sinphi * tempz
+     atom_list(this_atom,lm,is)%rp(3) = -sinphi * tempy + cosphi * tempz
 
   END DO
 
@@ -393,22 +393,22 @@ SUBROUTINE Rotate_Dihedral
 
      this_atom = atoms_to_place_list(j)
 
-     tempx = atom_list(this_atom,lm,is)%rxp
-     tempy = atom_list(this_atom,lm,is)%ryp
-     tempz = atom_list(this_atom,lm,is)%rzp
+     tempx = atom_list(this_atom,lm,is)%rp(1)
+     tempy = atom_list(this_atom,lm,is)%rp(2)
+     tempz = atom_list(this_atom,lm,is)%rp(3)
 
-     atom_list(this_atom,lm,is)%rxp = tempx * hanger(1,1) + tempy * hanger(1,2) + &
+     atom_list(this_atom,lm,is)%rp(1) = tempx * hanger(1,1) + tempy * hanger(1,2) + &
           tempz * hanger(1,3)
-     atom_list(this_atom,lm,is)%ryp = tempx * hanger(2,1) + tempy * hanger(2,2) + &
+     atom_list(this_atom,lm,is)%rp(2) = tempx * hanger(2,1) + tempy * hanger(2,2) + &
           tempz * hanger(2,3)
-     atom_list(this_atom,lm,is)%rzp = tempx * hanger(3,1) + tempy * hanger(3,2) + &
+     atom_list(this_atom,lm,is)%rp(3) = tempx * hanger(3,1) + tempy * hanger(3,2) + &
           tempz * hanger(3,3)
 
   END DO
 
-  atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp + iatom2_rxp
-  atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp + iatom2_ryp
-  atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp + iatom2_rzp
+  atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) + iatom2_rxp
+  atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) + iatom2_ryp
+  atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) + iatom2_rzp
 
   ! Now compute the energy of this molecule in the new conformation. First compute the intramolecular
   ! nonbonded interactions so that if an overlap is detected, the move can be immediately rejected.
@@ -512,9 +512,17 @@ SUBROUTINE Rotate_Dihedral
 
         IF (int_charge_sum_style(ibox) == charge_ewald) THEN
            ! Also reset the old cos_sum and sin_sum for reciprocal space vectors
+           IF (is == 1) THEN
+                   im_locate = im
+           ELSE
+                   im_locate = SUM(max_molecules(1:is-1)) + im
+           END IF
            !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-           cos_sum(:,ibox) = cos_sum_old(:,ibox)
-           sin_sum(:,ibox) = sin_sum_old(:,ibox)
+           !cos_sum(:,ibox) = cos_sum_old(:,ibox)
+           !sin_sum(:,ibox) = sin_sum_old(:,ibox)
+           box_list(ibox)%sincos_sum = box_list(ibox)%sincos_sum_old
+           cos_mol(1:nvecs(ibox),im_locate) = cos_mol(1:nvecs(ibox),0)
+           sin_mol(1:nvecs(ibox),im_locate) = sin_mol(1:nvecs(ibox),0)
            !$OMP END PARALLEL WORKSHARE
 
         END IF
diff --git a/Src/move_identity_switch.f90 b/Src/move_identity_switch.f90
index eae99e4f..248b058c 100755
--- a/Src/move_identity_switch.f90
+++ b/Src/move_identity_switch.f90
@@ -282,35 +282,35 @@ SUBROUTINE Identity_Switch
    ! Step 8) Switch the two molecules by COM translation
    !
    !*****************************************************************************
-   xcom_i = molecule_list(lm_i,is)%xcom
-   ycom_i = molecule_list(lm_i,is)%ycom
-   zcom_i = molecule_list(lm_i,is)%zcom
+   xcom_i = molecule_list(lm_i,is)%rcom(1)
+   ycom_i = molecule_list(lm_i,is)%rcom(2)
+   zcom_i = molecule_list(lm_i,is)%rcom(3)
 
    ALLOCATE(dx_xcom_i(natoms(is)), dy_ycom_i(natoms(is)), dz_zcom_i(natoms(is)))
    ALLOCATE(dx_xcom_j(natoms(js)), dy_ycom_j(natoms(js)), dz_zcom_j(natoms(js)))
-   dx_xcom_i = xcom_i - atom_list(:,lm_i,is)%rxp
-   dy_ycom_i = ycom_i - atom_list(:,lm_i,is)%ryp
-   dz_zcom_i = zcom_i - atom_list(:,lm_i,is)%rzp
+   dx_xcom_i = xcom_i - atom_list(:,lm_i,is)%rp(1)
+   dy_ycom_i = ycom_i - atom_list(:,lm_i,is)%rp(2)
+   dz_zcom_i = zcom_i - atom_list(:,lm_i,is)%rp(3)
 
-   dx_xcom_j =  molecule_list(lm_j,js)%xcom - atom_list(:,lm_j,js)%rxp
-   dy_ycom_j =  molecule_list(lm_j,js)%ycom - atom_list(:,lm_j,js)%ryp
-   dz_zcom_j =  molecule_list(lm_j,js)%zcom - atom_list(:,lm_j,js)%rzp
+   dx_xcom_j =  molecule_list(lm_j,js)%rcom(1) - atom_list(:,lm_j,js)%rp(1)
+   dy_ycom_j =  molecule_list(lm_j,js)%rcom(2) - atom_list(:,lm_j,js)%rp(2)
+   dz_zcom_j =  molecule_list(lm_j,js)%rcom(3) - atom_list(:,lm_j,js)%rp(3)
 
    !Switch first molecule
-   molecule_list(lm_i,is)%xcom = molecule_list(lm_j,js)%xcom
-   molecule_list(lm_i,is)%ycom = molecule_list(lm_j,js)%ycom
-   molecule_list(lm_i,is)%zcom = molecule_list(lm_j,js)%zcom
-   atom_list(:,lm_i,is)%rxp = molecule_list(lm_i,is)%xcom - dx_xcom_i
-   atom_list(:,lm_i,is)%ryp = molecule_list(lm_i,is)%ycom - dy_ycom_i
-   atom_list(:,lm_i,is)%rzp = molecule_list(lm_i,is)%zcom - dz_zcom_i
+   molecule_list(lm_i,is)%rcom(1) = molecule_list(lm_j,js)%rcom(1)
+   molecule_list(lm_i,is)%rcom(2) = molecule_list(lm_j,js)%rcom(2)
+   molecule_list(lm_i,is)%rcom(3) = molecule_list(lm_j,js)%rcom(3)
+   atom_list(:,lm_i,is)%rp(1) = molecule_list(lm_i,is)%rcom(1) - dx_xcom_i
+   atom_list(:,lm_i,is)%rp(2) = molecule_list(lm_i,is)%rcom(2) - dy_ycom_i
+   atom_list(:,lm_i,is)%rp(3) = molecule_list(lm_i,is)%rcom(3) - dz_zcom_i
 
    !Switch second molecule
-   molecule_list(lm_j,js)%xcom = xcom_i
-   molecule_list(lm_j,js)%ycom = ycom_i
-   molecule_list(lm_j,js)%zcom = zcom_i
-   atom_list(:,lm_j,js)%rxp = xcom_i - dx_xcom_j
-   atom_list(:,lm_j,js)%ryp = ycom_i - dy_ycom_j
-   atom_list(:,lm_j,js)%rzp = zcom_i - dz_zcom_j
+   molecule_list(lm_j,js)%rcom(1) = xcom_i
+   molecule_list(lm_j,js)%rcom(2) = ycom_i
+   molecule_list(lm_j,js)%rcom(3) = zcom_i
+   atom_list(:,lm_j,js)%rp(1) = xcom_i - dx_xcom_j
+   atom_list(:,lm_j,js)%rp(2) = ycom_i - dy_ycom_j
+   atom_list(:,lm_j,js)%rp(3) = zcom_i - dz_zcom_j
    DEALLOCATE(dx_xcom_i, dy_ycom_i, dz_zcom_i)
    DEALLOCATE(dx_xcom_j, dy_ycom_j, dz_zcom_j)
 
@@ -392,7 +392,10 @@ SUBROUTINE Identity_Switch
       dE = 0.0_DP
       dE_i = 0.0_DP
       dE_j = 0.0_DP
-
+      ! Note by RS: the Ewald part of this move was wrong and is still wrong at least for multiple boxes,
+      !         but I made changes so that it references data structures I implemented instead of those that
+      !         no longer exist so it doesn't interfere with compilation.
+      !         I'm currently just trying to improve the Ewald summation implementation, not fix this broken move type.
       !Ewald charge code:
       IF ((int_charge_sum_style(box) == charge_ewald) .AND. (has_charge(is) .OR. has_charge(js))) THEN
          !TODO:Eventually rewrite Update_System_Ewald_Reciprocal_Energy!
@@ -402,8 +405,10 @@ SUBROUTINE Identity_Switch
 
             ALLOCATE(cos_sum_old_idsw(nvecs(box),nbr_boxes), sin_sum_old_idsw(nvecs(box),nbr_boxes))
             !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-            cos_sum_old_idsw(1:nvecs(box),box) = cos_sum(1:nvecs(box),box)
-            sin_sum_old_idsw(1:nvecs(box),box) = sin_sum(1:nvecs(box),box)
+            cos_sum_old_idsw(1:nvecs(box),box) = box_list(box)%sincos_sum(1:nvecs(box),2)
+            sin_sum_old_idsw(1:nvecs(box),box) = box_list(box)%sincos_sum(1:nvecs(box),1)
+            !cos_sum_old_idsw(1:nvecs(box),box) = cos_sum(1:nvecs(box),box)
+            !sin_sum_old_idsw(1:nvecs(box),box) = sin_sum(1:nvecs(box),box)
             !$OMP END PARALLEL WORKSHARE
 
             IF (has_charge(is)) THEN
@@ -581,8 +586,8 @@ SUBROUTINE Identity_Switch
         CALL Revert_Switch
         IF ((int_charge_sum_style(box) == charge_ewald) .AND. (has_charge(is) .OR. has_charge(js))) THEN
            !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-           cos_sum(1:nvecs(box), box) = cos_sum_old_idsw(1:nvecs(box),box)
-           sin_sum(1:nvecs(box), box) = sin_sum_old_idsw(1:nvecs(box),box)
+           box_list(box)%sincos_sum(1:nvecs(box),2) = cos_sum_old_idsw(1:nvecs(box),box)
+           box_list(box)%sincos_sum(1:nvecs(box),1) = sin_sum_old_idsw(1:nvecs(box),box)
            !$OMP END PARALLEL WORKSHARE
 
            DEALLOCATE(cos_sum_old_idsw, sin_sum_old_idsw)
@@ -696,9 +701,9 @@ SUBROUTINE Bias_Rotate(lm,is,ibox,rotations,P_bias,rot_overlap)
 
           !save rotation coordinates
           DO ia = 1, natoms(is)
-            rtrial(ia,i_rot)%rxp = atom_list(ia,lm,is)%rxp
-            rtrial(ia,i_rot)%ryp = atom_list(ia,lm,is)%ryp
-            rtrial(ia,i_rot)%rzp = atom_list(ia,lm,is)%rzp
+            rtrial(ia,i_rot)%rp(1) = atom_list(ia,lm,is)%rp(1)
+            rtrial(ia,i_rot)%rp(2) = atom_list(ia,lm,is)%rp(2)
+            rtrial(ia,i_rot)%rp(3) = atom_list(ia,lm,is)%rp(3)
           END DO
 
           overlap = .FALSE.
@@ -754,9 +759,9 @@ SUBROUTINE Bias_Rotate(lm,is,ibox,rotations,P_bias,rot_overlap)
       ! We chose the ith trial coordinate for placement. Store the ith trial
       ! coordinates in the atom_list array.
       DO ia = 1, natoms(is)
-        atom_list(ia,lm,is)%rxp = rtrial(ia,i_rot)%rxp
-        atom_list(ia,lm,is)%ryp = rtrial(ia,i_rot)%ryp
-        atom_list(ia,lm,is)%rzp = rtrial(ia,i_rot)%rzp
+        atom_list(ia,lm,is)%rp(1) = rtrial(ia,i_rot)%rp(1)
+        atom_list(ia,lm,is)%rp(2) = rtrial(ia,i_rot)%rp(2)
+        atom_list(ia,lm,is)%rp(3) = rtrial(ia,i_rot)%rp(3)
       END DO
 
       !COM was not stored, so recalculate
diff --git a/Src/move_insert.f90 b/Src/move_insert.f90
index 6b7ffa6c..af17145c 100755
--- a/Src/move_insert.f90
+++ b/Src/move_insert.f90
@@ -181,14 +181,14 @@ SUBROUTINE Insertion
   !   * the number of trial dihedrals, kappa_dih, for each dihedral.
 
   ln_pbias = ln_pbias + ln_pseq
-  ln_pbias = ln_pbias + DLOG(REAL(kappa_ins,DP))
+  ln_pbias = ln_pbias + species_list(is)%log_kappa_ins
 
-  IF (kappa_rot /= 0 ) THEN
-     ln_pbias = ln_pbias + DLOG(REAL(kappa_rot,DP))
+  IF (species_list(is)%kappa_rot > 0 ) THEN
+     ln_pbias = ln_pbias + species_list(is)%log_kappa_rot
   END IF
 
-  IF (kappa_dih /= 0 ) THEN
-     ln_pbias = ln_pbias + REAL(nfragments(is)-1,DP) * DLOG(REAL(kappa_dih,DP))
+  IF (species_list(is)%need_kappa_dih) THEN
+     ln_pbias = ln_pbias + species_list(is)%ln_pbias_dih_const
   END IF
 
   !*****************************************************************************
@@ -300,7 +300,7 @@ SUBROUTINE Insertion
 
      DO i = 1, natoms(is)
         i_type = nonbond_list(i,is)%atom_type_number
-        nint_beads(i_type,ibox) = nint_beads(i_type,ibox) + 1
+        IF (i_type > 0) nint_beads(i_type,ibox) = nint_beads(i_type,ibox) + 1
      END DO
 
      CALL Compute_LR_correction(ibox,E_lrc)
@@ -405,8 +405,10 @@ SUBROUTINE Insertion
           has_charge(is) ) THEN
         ! Restore cos_sum and sin_sum. Note that these were changed when the
         ! difference in reciprocal energies was computed.
-        cos_sum(:,ibox) = cos_sum_old(:,ibox)
-        sin_sum(:,ibox) = sin_sum_old(:,ibox)
+        DEALLOCATE(box_list(ibox)%sincos_sum)
+        CALL MOVE_ALLOC(box_list(ibox)%sincos_sum_old,box_list(ibox)%sincos_sum)
+        !cos_sum(:,ibox) = cos_sum_old(:,ibox)
+        !sin_sum(:,ibox) = sin_sum_old(:,ibox)
      END IF
 
      IF ( int_vdw_sum_style(ibox) == vdw_cut_tail ) THEN
diff --git a/Src/move_mol_swap.f90 b/Src/move_mol_swap.f90
index 5fad441e..cdc58dd3 100755
--- a/Src/move_mol_swap.f90
+++ b/Src/move_mol_swap.f90
@@ -103,7 +103,7 @@ SUBROUTINE GEMC_Particle_Transfer
 
   INTEGER :: position
 
-  REAL(DP), ALLOCATABLE :: cos_mol_old(:), sin_mol_old(:), cos_mol_new(:), sin_mol_new(:)
+  !REAL(DP), ALLOCATABLE :: cos_mol_old(:), sin_mol_old(:), cos_mol_new(:), sin_mol_new(:)
   REAL(DP) :: time0, time1, randno
   
   LOGICAL :: l_charge_in, l_charge_out
@@ -312,21 +312,21 @@ SUBROUTINE GEMC_Particle_Transfer
   CALL Save_Old_Cartesian_Coordinates(alive,is)
   CALL Compute_Molecule_Dihedral_Energy(alive,is,E_dihed_out)
 
-  ! Save the interaction energies
+  !! Save the interaction energies
   IF (l_pair_nrg) CALL Store_Molecule_Pair_Interaction_Arrays(alive,is, &
        box_out, E_inter_vdw_out, E_inter_qq_out)
   
-  ! Save the k-vectors
-  IF (int_charge_sum_style(box_in)  == charge_ewald .AND.&
-      has_charge(is)) THEN
-     ALLOCATE(cos_mol_old(nvecs(box_out)), sin_mol_old(nvecs(box_out)))
-     CALL Get_Position_Alive(alive,is,position)
-     
-     !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-     cos_mol_old(:) = cos_mol(1:nvecs(box_out),position)
-     sin_mol_old(:) = sin_mol(1:nvecs(box_out),position)
-     !$OMP END PARALLEL WORKSHARE
-  END IF
+  !! Save the k-vectors
+  !IF (int_charge_sum_style(box_in)  == charge_ewald .AND.&
+  !    has_charge(is)) THEN
+  !   ALLOCATE(cos_mol_old(nvecs(box_out)), sin_mol_old(nvecs(box_out)))
+  !   CALL Get_Position_Alive(alive,is,position)
+  !   
+  !   !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+  !   cos_mol_old(:) = cos_mol(1:nvecs(box_out),position)
+  !   sin_mol_old(:) = sin_mol(1:nvecs(box_out),position)
+  !   !$OMP END PARALLEL WORKSHARE
+  !END IF
 
   ! Switch the box identity of alive
   molecule_list(alive,is)%which_box = box_in
@@ -353,6 +353,9 @@ SUBROUTINE GEMC_Particle_Transfer
   ! overlap and also when the weight of all trials is zero.
 
   IF (.NOT. cbmc_overlap) THEN
+        !! Save the interaction energies
+        !IF (l_pair_nrg) CALL Store_Molecule_Pair_Interaction_Arrays(alive,is, &
+        !     box_out, E_inter_vdw_out, E_inter_qq_out)
         CALL Compute_Molecule_Nonbond_Inter_Energy(alive,is, &
              E_inter_vdw_in,E_inter_qq_in,inter_overlap)
   END IF
@@ -373,12 +376,13 @@ SUBROUTINE GEMC_Particle_Transfer
      atom_list(:,alive,is)%exist = .TRUE.
      molecule_list(alive,is)%frac = 1.0_DP
 
+     !IF (l_pair_nrg .AND. .NOT. cbmc_overlap) THEN
      IF (l_pair_nrg) THEN
         CALL Reset_Molecule_Pair_Interaction_Arrays(alive,is,box_out)
      END IF
 
-     IF(ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
-     IF(ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
+     !IF(ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
+     !IF(ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
 
      accept = .FALSE.
 
@@ -412,6 +416,12 @@ SUBROUTINE GEMC_Particle_Transfer
     dE_inter_in = dE_inter_in + E_periodic_qq
 
     call cpu_time(time0)
+    IF (int_charge_style(box_out) == charge_coul .AND. has_charge(is)) THEN
+       IF (int_charge_sum_style(box_out) == charge_ewald) THEN
+            CALL Update_System_Ewald_Reciprocal_Energy(alive,is,box_out, &
+                    int_deletion,E_reciprocal_out)
+       END IF
+    END IF
 
     IF (int_charge_style(box_in) == charge_coul .AND. has_charge(is)) THEN
        
@@ -419,7 +429,7 @@ SUBROUTINE GEMC_Particle_Transfer
 
             ! Note that this call will change cos_mol, sin_mol of alive and this
             ! will have to be restored below while computing the energy of box_out
-            ! without molecule alive. 
+            ! without molecule alive.  -- RS: just compute deletion first. no need to restore between them.
             CALL Update_System_Ewald_Reciprocal_Energy(alive,is,box_in, &
                  int_insertion,E_reciprocal_in)
        
@@ -439,7 +449,7 @@ SUBROUTINE GEMC_Particle_Transfer
 
        DO i = 1, natoms(is)
           i_type = nonbond_list(i,is)%atom_type_number
-          nint_beads(i_type,box_in) = nint_beads(i_type,box_in) + 1
+          IF (i_type > 0) nint_beads(i_type,box_in) = nint_beads(i_type,box_in) + 1
        END DO
           
        CALL Compute_LR_Correction(box_in,E_lrc_in)
@@ -450,18 +460,6 @@ SUBROUTINE GEMC_Particle_Transfer
     dE_in = dE_intra_in + dE_inter_in
     dE_frag_in = E_angle_in + E_ring_frag_in
 
-    IF(cpcollect) THEN
-    
-       potw = 1.0_DP / (P_forward * kappa_ins*kappa_rot*kappa_dih &
-            ** (nfragments(is)-1)) 
-       CP_energy = dE_in - dE_frag_in
-     
-       chpot(is,box_in) = chpot(is,box_in) &
-        + potw * (box_list(box_in)%volume &
-        / (REAL(nmols(is,box_in)))) * DEXP(-beta(box_in) * CP_energy)
-
-    END IF
-
     !*****************************************************************************
     ! Step 6) Calculate the change in box_out's potential energy from deleting
     !         alive
@@ -530,33 +528,32 @@ SUBROUTINE GEMC_Particle_Transfer
     dE_inter_out = - E_inter_vdw_out - E_inter_qq_out
 
     IF (int_charge_style(box_out) == charge_coul .AND. has_charge(is)) THEN
-       IF (int_charge_sum_style(box_in) == charge_ewald .AND. &
-           int_charge_sum_style(box_out) == charge_ewald) THEN
-          ! Restore the cos_mol and sin_mol as they changed above
-          ! but restoring will destroy the newly computed vector so now here allocate
-          ! cos_mol_new
-          ! sin_mol_new vectors so that if the move is accepted we can restore these
+       IF (int_charge_sum_style(box_out) == charge_ewald) THEN
+          !! Restore the cos_mol and sin_mol as they changed above
+          !! but restoring will destroy the newly computed vector so now here allocate
+          !! cos_mol_new
+          !! sin_mol_new vectors so that if the move is accepted we can restore these
        
-          call cpu_time(time0)
+          !call cpu_time(time0)
        
-          ALLOCATE(cos_mol_new(nvecs(box_in)))
-          ALLOCATE(sin_mol_new(nvecs(box_in)))
+          !ALLOCATE(cos_mol_new(nvecs(box_in)))
+          !ALLOCATE(sin_mol_new(nvecs(box_in)))
        
-          !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-          cos_mol_new(:) = cos_mol(1:nvecs(box_in),position)
-          sin_mol_new(:) = sin_mol(1:nvecs(box_in),position)
+          !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+          !cos_mol_new(:) = cos_mol(1:nvecs(box_in),position)
+          !sin_mol_new(:) = sin_mol(1:nvecs(box_in),position)
 
 
-          cos_mol(1:nvecs(box_out),position) = cos_mol_old(1:nvecs(box_out))
-          sin_mol(1:nvecs(box_out),position) = sin_mol_old(1:nvecs(box_out))
-          !$OMP END PARALLEL WORKSHARE
+          !cos_mol(1:nvecs(box_out),position) = cos_mol_old(1:nvecs(box_out))
+          !sin_mol(1:nvecs(box_out),position) = sin_mol_old(1:nvecs(box_out))
+          !!$OMP END PARALLEL WORKSHARE
        
-          call cpu_time(time1)
+          !call cpu_time(time1)
        
-         ! copy_time = copy_time + time1-time0
+         !! copy_time = copy_time + time1-time0
        
-          CALL Update_System_Ewald_Reciprocal_Energy(alive,is, &
-               box_out,int_deletion,E_reciprocal_out)
+          !CALL Update_System_Ewald_Reciprocal_Energy(alive,is, &
+          !     box_out,int_deletion,E_reciprocal_out)
        
           dE_inter_out = dE_inter_out + (E_reciprocal_out - energy(box_out)%reciprocal)
        
@@ -572,7 +569,7 @@ SUBROUTINE GEMC_Particle_Transfer
        nbeads_out(:) = nint_beads(:,box_out)
        DO i = 1, natoms(is)
           i_type = nonbond_list(i,is)%atom_type_number
-          nint_beads(i_type,box_out) = nint_beads(i_type,box_out) - 1
+          IF (i_type > 0) nint_beads(i_type,box_out) = nint_beads(i_type,box_out) - 1
        END DO
 
        CALL Compute_LR_correction(box_out,E_lrc_out)
@@ -637,25 +634,25 @@ SUBROUTINE GEMC_Particle_Transfer
        molecule_list(alive,is) = new_molecule_list
        CALL Fold_Molecule(alive,is,box_in)
 
-       IF (int_charge_sum_style(box_in) == charge_ewald .AND. &
-           has_charge(is)) THEN
-          call cpu_time(time0)
-          !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-          cos_mol(1:nvecs(box_in),position) = cos_mol_new(:)
-          sin_mol(1:nvecs(box_in),position) = sin_mol_new(:)
-          !$OMP END PARALLEL WORKSHARE
-          
-          DEALLOCATE(cos_mol_new,sin_mol_new)
-          
-          call cpu_time(time1)
-!          copy_time = copy_time + time1-time0
-       END IF
+       !IF (int_charge_sum_style(box_in) == charge_ewald .AND. &
+       !    has_charge(is)) THEN
+       !   call cpu_time(time0)
+       !   !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+       !   cos_mol(1:nvecs(box_in),position) = cos_mol_new(:)
+       !   sin_mol(1:nvecs(box_in),position) = sin_mol_new(:)
+       !   !$OMP END PARALLEL WORKSHARE
+       !   
+       !   DEALLOCATE(cos_mol_new,sin_mol_new)
+       !   
+       !   call cpu_time(time1)
+!      !    copy_time = copy_time + time1-time0
+       !END IF
 
        IF (l_pair_nrg) DEALLOCATE(pair_vdw_temp,pair_qq_temp)
-       IF (ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
-       IF (ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
-       IF (ALLOCATED(cos_mol_new)) DEALLOCATE(cos_mol_new)
-       IF (ALLOCATED(sin_mol_new)) DEALLOCATE(sin_mol_new)
+       !IF (ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
+       !IF (ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
+       !IF (ALLOCATED(cos_mol_new)) DEALLOCATE(cos_mol_new)
+       !IF (ALLOCATED(sin_mol_new)) DEALLOCATE(sin_mol_new)
 
        ! Restore the coordinates of the molecule due to successful insertion
        CALL Get_Internal_Coordinates(alive,is)
@@ -717,29 +714,27 @@ SUBROUTINE GEMC_Particle_Transfer
        nmols(is,box_in) = nmols(is,box_in) - 1
 
 
-       IF (has_charge(is)) THEN
+       IF (has_charge(is) .AND. ANY(int_charge_sum_style((/box_in,box_out/))==charge_ewald)) THEN
            ! Restore the reciprocal space k vectors
+           !$OMP PARALLEL DEFAULT(SHARED)
            IF (int_charge_sum_style(box_in) == charge_ewald) THEN
-              !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-              cos_sum(1:nvecs(box_in),box_in) = cos_sum_old(1:nvecs(box_in),box_in)
-              sin_sum(1:nvecs(box_in),box_in) = sin_sum_old(1:nvecs(box_in),box_in)
-              !$OMP END PARALLEL WORKSHARE
+              !$OMP WORKSHARE
+              box_list(box_in)%sincos_sum = box_list(box_in)%sincos_sum_old
+              !$OMP END WORKSHARE NOWAIT
       
-              DEALLOCATE(cos_mol_new,sin_mol_new)
            END IF
       
            IF (int_charge_sum_style(box_out) == charge_ewald) THEN
-              !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-              cos_sum(1:nvecs(box_out),box_out) = cos_sum_old(1:nvecs(box_out),box_out)
-              sin_sum(1:nvecs(box_out),box_out) = sin_sum_old(1:nvecs(box_out),box_out)
+              CALL Get_Position_Alive(alive,is,position)
+              !$OMP WORKSHARE
+              box_list(box_out)%sincos_sum = box_list(box_out)%sincos_sum_old
               
-              cos_mol(1:nvecs(box_out),position) = cos_mol_old(:)
-              sin_mol(1:nvecs(box_out),position) = sin_mol_old(:)
-              !$OMP END PARALLEL WORKSHARE
+              cos_mol(1:nvecs(box_out),position) = cos_mol(1:nvecs(box_out),0)
+              sin_mol(1:nvecs(box_out),position) = sin_mol(1:nvecs(box_out),0)
+              !$OMP END WORKSHARE
       
-              DEALLOCATE(cos_mol_old)
-              DEALLOCATE(sin_mol_old)
            END IF
+           !$OMP END PARALLEL
        END IF
 
        IF (l_pair_nrg) THEN
diff --git a/Src/move_regrow.f90 b/Src/move_regrow.f90
index a17d0572..047ca4c0 100755
--- a/Src/move_regrow.f90
+++ b/Src/move_regrow.f90
@@ -99,7 +99,7 @@ SUBROUTINE Cut_N_Grow
 
   INTEGER :: start, locate_im, count, this_species, position, this_im
 !  REAL(DP), ALLOCATABLE :: pair_vdw_temp(:), pair_qq_temp(:)
-  REAL(DP), ALLOCATABLE :: cos_mol_old(:), sin_mol_old(:)
+  !REAL(DP), ALLOCATABLE :: cos_mol_old(:), sin_mol_old(:)
 
   LOGICAL :: l_charge
 
@@ -278,8 +278,8 @@ SUBROUTINE Cut_N_Grow
 
      IF (l_pair_nrg) CALL Reset_Molecule_Pair_Interaction_Arrays(lm,is,ibox)
      
-     IF (ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
-     IF (ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
+     !IF (ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
+     !IF (ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
      
      IF (verbose_log) THEN
        WRITE(logunit,'(X,I19,X,A10,X,I5,X,I3,X,I3,X,L8,X,9X,X,A9)') &
@@ -337,15 +337,14 @@ SUBROUTINE Cut_N_Grow
 
   IF (int_charge_sum_style(ibox)  == charge_ewald .AND.&
       has_charge(is)) THEN
-     ! store cos_mol and sin_mol arrays
-     
-     ALLOCATE(cos_mol_old(nvecs(ibox)),sin_mol_old(nvecs(ibox)))
-     CALL Get_Position_Alive(lm,is,position)
-     
-     !$OMP PARALLEL WORKSHARE  DEFAULT(SHARED)
-     cos_mol_old(:) = cos_mol(1:nvecs(ibox),position)
-     sin_mol_old(:) = sin_mol(1:nvecs(ibox),position)
-     !$OMP END PARALLEL WORKSHARE
+     !! store cos_mol and sin_mol arrays
+     !
+     !ALLOCATE(cos_mol_old(nvecs(ibox)),sin_mol_old(nvecs(ibox)))
+     !
+     !!$OMP PARALLEL WORKSHARE  DEFAULT(SHARED)
+     !cos_mol_old(:) = cos_mol(1:nvecs(ibox),position)
+     !sin_mol_old(:) = sin_mol(1:nvecs(ibox),position)
+     !!$OMP END PARALLEL WORKSHARE
 
      ! Compute the change in Ewald reciprocal energy due to the move
      CALL Update_System_Ewald_Reciprocal_Energy(lm,is,ibox, &
@@ -472,8 +471,8 @@ SUBROUTINE Cut_N_Grow
 
      regrowth_success(frag_total,is) = regrowth_success(frag_total,is) + 1
 
-     IF (int_charge_sum_style(ibox)  == charge_ewald .AND.&
-      has_charge(is)) DEALLOCATE(cos_mol_old,sin_mol_old)
+     !IF (int_charge_sum_style(ibox)  == charge_ewald .AND.&
+     ! has_charge(is)) DEALLOCATE(cos_mol_old,sin_mol_old)
      IF (l_pair_nrg) DEALLOCATE(pair_vdw_temp,pair_qq_temp)
 
       ! Fold the molecule in case the COM has moved out of cell boundary
@@ -485,11 +484,13 @@ SUBROUTINE Cut_N_Grow
      IF (int_charge_sum_style(ibox)  == charge_ewald .AND.&
          has_charge(is)) THEN
         
+        CALL Get_Position_Alive(lm,is,position)
         !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-        cos_sum(1:nvecs(ibox),ibox) = cos_sum_old(1:nvecs(ibox),ibox)
-        sin_sum(1:nvecs(ibox),ibox) = sin_sum_old(1:nvecs(ibox),ibox)
-        cos_mol(1:nvecs(ibox),position) =cos_mol_old(:)
-        sin_mol(1:nvecs(ibox),position) =sin_mol_old(:)
+        box_list(ibox)%sincos_sum = box_list(ibox)%sincos_sum_old
+        !cos_sum(1:nvecs(ibox),ibox) = cos_sum_old(1:nvecs(ibox),ibox)
+        !sin_sum(1:nvecs(ibox),ibox) = sin_sum_old(1:nvecs(ibox),ibox)
+        cos_mol(1:nvecs(ibox),position) =cos_mol(1:nvecs(ibox),0)
+        sin_mol(1:nvecs(ibox),position) =sin_mol(1:nvecs(ibox),0)
         !$OMP END PARALLEL WORKSHARE
         
      END IF
diff --git a/Src/move_ring_flip.f90 b/Src/move_ring_flip.f90
index 80cbc74f..e5bf4f06 100755
--- a/Src/move_ring_flip.f90
+++ b/Src/move_ring_flip.f90
@@ -123,13 +123,13 @@ SUBROUTINE Flip_Move
   ! Align plane of atom1, atom2 and this_atom such that atom1 is at the origin, atom2 is 
   ! along the x-axis and this_atom is in xy plane
 
-  vec1(1) = atom_list(atom2,im,is)%rxp - atom_list(atom1,im,is)%rxp
-  vec1(2) = atom_list(atom2,im,is)%ryp - atom_list(atom1,im,is)%ryp
-  vec1(3) = atom_list(atom2,im,is)%rzp - atom_list(atom1,im,is)%rzp
+  vec1(1) = atom_list(atom2,im,is)%rp(1) - atom_list(atom1,im,is)%rp(1)
+  vec1(2) = atom_list(atom2,im,is)%rp(2) - atom_list(atom1,im,is)%rp(2)
+  vec1(3) = atom_list(atom2,im,is)%rp(3) - atom_list(atom1,im,is)%rp(3)
   
-  vec2(1) = atom_list(this_atom,im,is)%rxp - atom_list(atom1,im,is)%rxp
-  vec2(2) = atom_list(this_atom,im,is)%ryp - atom_list(atom1,im,is)%ryp
-  vec2(3) = atom_list(this_atom,im,is)%rzp - atom_list(atom1,im,is)%rzp
+  vec2(1) = atom_list(this_atom,im,is)%rp(1) - atom_list(atom1,im,is)%rp(1)
+  vec2(2) = atom_list(this_atom,im,is)%rp(2) - atom_list(atom1,im,is)%rp(2)
+  vec2(3) = atom_list(this_atom,im,is)%rp(3) - atom_list(atom1,im,is)%rp(3)
 
 
   CALL Get_Aligner_Hanger(vec1,vec2,aligner,hanger)
@@ -185,9 +185,9 @@ SUBROUTINE Flip_Move
      atom_i = atoms_to_move(i)
 
 
-     a(1) = atom_list(atom_i,im,is)%rxp - atom_list(atom1,im,is)%rxp
-     a(2) = atom_list(atom_i,im,is)%ryp - atom_list(atom1,im,is)%ryp
-     a(3) = atom_list(atom_i,im,is)%rzp - atom_list(atom1,im,is)%rzp
+     a(1) = atom_list(atom_i,im,is)%rp(1) - atom_list(atom1,im,is)%rp(1)
+     a(2) = atom_list(atom_i,im,is)%rp(2) - atom_list(atom1,im,is)%rp(2)
+     a(3) = atom_list(atom_i,im,is)%rp(3) - atom_list(atom1,im,is)%rp(3)
      
      ! first transformation
 
@@ -204,9 +204,9 @@ SUBROUTINE Flip_Move
 
      c = MATMUL(hanger,b)
 
-     atom_list(atom_i,im,is)%rxp = c(1) + atom_list(atom1,im,is)%rxp
-     atom_list(atom_i,im,is)%ryp = c(2) + atom_list(atom1,im,is)%ryp
-     atom_list(atom_i,im,is)%rzp = c(3) + atom_list(atom1,im,is)%rzp
+     atom_list(atom_i,im,is)%rp(1) = c(1) + atom_list(atom1,im,is)%rp(1)
+     atom_list(atom_i,im,is)%rp(2) = c(2) + atom_list(atom1,im,is)%rp(2)
+     atom_list(atom_i,im,is)%rp(3) = c(3) + atom_list(atom1,im,is)%rp(3)
         
   END DO
 
diff --git a/Src/move_rotate.f90 b/Src/move_rotate.f90
index 85d07cce..fadfcb3b 100755
--- a/Src/move_rotate.f90
+++ b/Src/move_rotate.f90
@@ -79,8 +79,8 @@ SUBROUTINE Rotate
   LOGICAL :: inter_overlap, overlap, accept_or_reject
 
   ! Pair_Energy arrays and Ewald implementation
-  INTEGER :: position
-  REAL(DP), ALLOCATABLE :: cos_mol_old(:), sin_mol_old(:)
+  INTEGER :: pos
+  !REAL(DP), ALLOCATABLE :: cos_mol_old(:), sin_mol_old(:)
 
   ! Framework energy related variables
   REAL(DP) :: E_framework, E_framework_move, E_correction_move
@@ -199,6 +199,8 @@ SUBROUTINE Rotate
   END IF
 
   IF (inter_overlap)  THEN
+     l_debug_print = .TRUE.
+     CALL Compute_Molecule_Nonbond_Inter_Energy(lm,is,E_vdw,E_qq,inter_overlap)
      err_msg = ""
      err_msg(1) = "Attempted to rotate molecule " // TRIM(Int_To_String(im)) // &
                   " of species " // TRIM(Int_To_String(is))
@@ -241,13 +243,12 @@ SUBROUTINE Rotate
 
      IF ((int_charge_sum_style(ibox) == charge_ewald) .AND. (has_charge(is))) THEN
         
-        ALLOCATE(cos_mol_old(nvecs(ibox)),sin_mol_old(nvecs(ibox)))
-        CALL Get_Position_Alive(lm,is,position)
+        !ALLOCATE(cos_mol_old(nvecs(ibox)),sin_mol_old(nvecs(ibox)))
      
-        !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-        cos_mol_old(:) = cos_mol(1:nvecs(ibox),position)
-        sin_mol_old(:) = sin_mol(1:nvecs(ibox),position)
-        !$OMP END PARALLEL WORKSHARE
+        !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+        !cos_mol(1:nvecs(ibox),0) = cos_mol(1:nvecs(ibox),pos)
+        !sin_mol(1:nvecs(ibox),0) = sin_mol(1:nvecs(ibox),pos)
+        !!$OMP END PARALLEL WORKSHARE
 
         CALL Update_System_Ewald_Reciprocal_Energy(lm,is,ibox,int_rotation,E_reciprocal_move)
         dE = E_reciprocal_move - energy(ibox)%reciprocal
@@ -290,8 +291,8 @@ SUBROUTINE Rotate
         CALL Get_COM(lm,is)
 
         IF (l_pair_nrg) DEALLOCATE(pair_vdw_temp,pair_qq_temp)
-        IF (ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
-        IF (ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
+        !IF (ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
+        !IF (ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
        
      ELSE
 
@@ -301,14 +302,16 @@ SUBROUTINE Rotate
         
         IF ((int_charge_sum_style(ibox) == charge_ewald) .AND. (has_charge(is))) THEN
            
+           CALL Get_Position_Alive(lm,is,pos)
            !$OMP PARALLEL WORKSHARE DEFAULT(SHARED) 
-           cos_sum(:,ibox) = cos_sum_old(:,ibox)
-           sin_sum(:,ibox) = sin_sum_old(:,ibox)
-           cos_mol(1:nvecs(ibox),position) =cos_mol_old(:)
-           sin_mol(1:nvecs(ibox),position) =sin_mol_old(:)
+           box_list(ibox)%sincos_sum = box_list(ibox)%sincos_sum_old
+           !cos_sum(:,ibox) = cos_sum_old(:,ibox)
+           !sin_sum(:,ibox) = sin_sum_old(:,ibox)
+           cos_mol(1:nvecs(ibox),pos) =cos_mol(1:nvecs(ibox),0)
+           sin_mol(1:nvecs(ibox),pos) =sin_mol(1:nvecs(ibox),0)
            !$OMP END PARALLEL WORKSHARE
 
-           DEALLOCATE(cos_mol_old,sin_mol_old)
+           !DEALLOCATE(cos_mol_old,sin_mol_old)
 
         END IF
         
@@ -427,38 +430,38 @@ SUBROUTINE Rotate_Molecule_Axis(dxrot,dyrot,dzrot)
 
     ! Move the origin to the COM of this molecule
 
-    atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp - molecule_list(lm,is)%xcom
-    atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp - molecule_list(lm,is)%ycom
-    atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp - molecule_list(lm,is)%zcom
+    atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) - molecule_list(lm,is)%rcom(1)
+    atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) - molecule_list(lm,is)%rcom(2)
+    atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) - molecule_list(lm,is)%rcom(3)
 
     ! Apply the rotation matrix to these coordinates
 
 
     DO ia = 1, natoms(is)
 
-       rxpnew = rot11*atom_list(ia,lm,is)%rxp + rot12*atom_list(ia,lm,is)%ryp + &
-            rot13*atom_list(ia,lm,is)%rzp
-       rypnew = rot21*atom_list(ia,lm,is)%rxp + rot22*atom_list(ia,lm,is)%ryp + &
-            rot23*atom_list(ia,lm,is)%rzp
-       rzpnew = rot31*atom_list(ia,lm,is)%rxp + rot32*atom_list(ia,lm,is)%ryp + &
-            rot33*atom_list(ia,lm,is)%rzp
+       rxpnew = rot11*atom_list(ia,lm,is)%rp(1) + rot12*atom_list(ia,lm,is)%rp(2) + &
+            rot13*atom_list(ia,lm,is)%rp(3)
+       rypnew = rot21*atom_list(ia,lm,is)%rp(1) + rot22*atom_list(ia,lm,is)%rp(2) + &
+            rot23*atom_list(ia,lm,is)%rp(3)
+       rzpnew = rot31*atom_list(ia,lm,is)%rp(1) + rot32*atom_list(ia,lm,is)%rp(2) + &
+            rot33*atom_list(ia,lm,is)%rp(3)
 
-       dxrot(ia) = rxpnew - atom_list(ia,lm,is)%rxp
-       dyrot(ia) = rypnew - atom_list(ia,lm,is)%ryp
-       dzrot(ia) = rzpnew - atom_list(ia,lm,is)%rzp
+       dxrot(ia) = rxpnew - atom_list(ia,lm,is)%rp(1)
+       dyrot(ia) = rypnew - atom_list(ia,lm,is)%rp(2)
+       dzrot(ia) = rzpnew - atom_list(ia,lm,is)%rp(3)
 
-       atom_list(ia,lm,is)%rxp = rxpnew
-       atom_list(ia,lm,is)%ryp = rypnew
-       atom_list(ia,lm,is)%rzp = rzpnew
+       atom_list(ia,lm,is)%rp(1) = rxpnew
+       atom_list(ia,lm,is)%rp(2) = rypnew
+       atom_list(ia,lm,is)%rp(3) = rzpnew
 
     END DO
     
     
     ! Shift the origin back to the space fixed axes.
 
-    atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp + molecule_list(lm,is)%xcom
-    atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp + molecule_list(lm,is)%ycom
-    atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp + molecule_list(lm,is)%zcom
+    atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) + molecule_list(lm,is)%rcom(1)
+    atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) + molecule_list(lm,is)%rcom(2)
+    atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) + molecule_list(lm,is)%rcom(3)
 
   END SUBROUTINE Rotate_Molecule_Axis
   !-----------------------------------------------------------------------------------------------
@@ -483,9 +486,9 @@ SUBROUTINE Rotate_Molecule_Eulerian
 
     ! shift the origin to the COM of the molecule
 
-    atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp - molecule_list(lm,is)%xcom
-    atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp - molecule_list(lm,is)%ycom
-    atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp - molecule_list(lm,is)%zcom
+    atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) - molecule_list(lm,is)%rcom(1)
+    atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) - molecule_list(lm,is)%rcom(2)
+    atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) - molecule_list(lm,is)%rcom(3)
 
     ! Construct the rotation matrix that needs to be applied to each of the vectors
     ! This is the A matrix in Goldstein notation
@@ -506,27 +509,27 @@ SUBROUTINE Rotate_Molecule_Eulerian
 
     DO ia = 1, natoms(is)
 
-       rxpnew = rot11*atom_list(ia,lm,is)%rxp & 
-              + rot12*atom_list(ia,lm,is)%ryp &
-              + rot13*atom_list(ia,lm,is)%rzp
-       rypnew = rot21*atom_list(ia,lm,is)%rxp &
-              + rot22*atom_list(ia,lm,is)%ryp &
-              + rot23*atom_list(ia,lm,is)%rzp
-       rzpnew = rot31*atom_list(ia,lm,is)%rxp &
-              + rot32*atom_list(ia,lm,is)%ryp &
-              + rot33*atom_list(ia,lm,is)%rzp
+       rxpnew = rot11*atom_list(ia,lm,is)%rp(1) & 
+              + rot12*atom_list(ia,lm,is)%rp(2) &
+              + rot13*atom_list(ia,lm,is)%rp(3)
+       rypnew = rot21*atom_list(ia,lm,is)%rp(1) &
+              + rot22*atom_list(ia,lm,is)%rp(2) &
+              + rot23*atom_list(ia,lm,is)%rp(3)
+       rzpnew = rot31*atom_list(ia,lm,is)%rp(1) &
+              + rot32*atom_list(ia,lm,is)%rp(2) &
+              + rot33*atom_list(ia,lm,is)%rp(3)
 
-       atom_list(ia,lm,is)%rxp = rxpnew
-       atom_list(ia,lm,is)%ryp = rypnew
-       atom_list(ia,lm,is)%rzp = rzpnew
+       atom_list(ia,lm,is)%rp(1) = rxpnew
+       atom_list(ia,lm,is)%rp(2) = rypnew
+       atom_list(ia,lm,is)%rp(3) = rzpnew
 
     END DO    
 
     ! Shift the origin back to (0,0,0)
 
-    atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp + molecule_list(lm,is)%xcom
-    atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp + molecule_list(lm,is)%ycom
-    atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp + molecule_list(lm,is)%zcom
+    atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) + molecule_list(lm,is)%rcom(1)
+    atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) + molecule_list(lm,is)%rcom(2)
+    atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) + molecule_list(lm,is)%rcom(3)
     
   END SUBROUTINE Rotate_Molecule_Eulerian
 
diff --git a/Src/move_translate.f90 b/Src/move_translate.f90
index 5cabcb87..9f20d8de 100755
--- a/Src/move_translate.f90
+++ b/Src/move_translate.f90
@@ -80,8 +80,8 @@ SUBROUTINE Translate
   LOGICAL :: inter_overlap, overlap, accept_or_reject
 
   ! Pair_Energy arrays and Ewald implementation
-  INTEGER :: position
-  REAL(DP), ALLOCATABLE :: cos_mol_old(:), sin_mol_old(:)
+  INTEGER :: pos
+  !REAL(DP), ALLOCATABLE :: cos_mol_old(:), sin_mol_old(:)
 
 ! Done with that section
 
@@ -211,13 +211,13 @@ SUBROUTINE Translate
   dz = ( 2.0_DP * rranf() - 1.0_DP) * max_disp(is,ibox)
 
   ! Move atoms by the above vector dx,dy,dz and also update the COM
-  atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp + dx
-  atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp + dy
-  atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp + dz
+  atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) + dx
+  atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) + dy
+  atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) + dz
 
-  molecule_list(lm,is)%xcom = molecule_list(lm,is)%xcom + dx
-  molecule_list(lm,is)%ycom = molecule_list(lm,is)%ycom + dy
-  molecule_list(lm,is)%zcom = molecule_list(lm,is)%zcom + dz
+  molecule_list(lm,is)%rcom(1) = molecule_list(lm,is)%rcom(1) + dx
+  molecule_list(lm,is)%rcom(2) = molecule_list(lm,is)%rcom(2) + dy
+  molecule_list(lm,is)%rcom(3) = molecule_list(lm,is)%rcom(3) + dz
 
 
   !**************************************************************************
@@ -241,13 +241,12 @@ SUBROUTINE Translate
 
      IF ((int_charge_sum_style(ibox) == charge_ewald) .AND. (has_charge(is))) THEN
 
-        ALLOCATE(cos_mol_old(nvecs(ibox)),sin_mol_old(nvecs(ibox)))
-        CALL Get_Position_Alive(lm,is,position)
+        !ALLOCATE(cos_mol_old(nvecs(ibox)),sin_mol_old(nvecs(ibox)))
         
-        !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-        cos_mol_old(:) = cos_mol(1:nvecs(ibox),position)
-        sin_mol_old(:) = sin_mol(1:nvecs(ibox),position)
-        !$OMP END PARALLEL WORKSHARE
+        !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+        !cos_mol(1:nvecs(ibox),0) = cos_mol(1:nvecs(ibox),position)
+        !sin_mol(1:nvecs(ibox),0) = sin_mol(1:nvecs(ibox),position)
+        !!$OMP END PARALLEL WORKSHARE
 
         CALL Update_System_Ewald_Reciprocal_Energy(lm,is,ibox,int_translation,E_reciprocal_move)
         dE = E_reciprocal_move - energy(ibox)%reciprocal
@@ -285,8 +284,8 @@ SUBROUTINE Translate
         nequil_success(is,ibox)%displacement = nequil_success(is,ibox)%displacement + 1
 
         IF (l_pair_nrg) DEALLOCATE(pair_vdw_temp,pair_qq_temp)
-        IF (ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
-        IF (ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
+        !IF (ALLOCATED(cos_mol_old)) DEALLOCATE(cos_mol_old)
+        !IF (ALLOCATED(sin_mol_old)) DEALLOCATE(sin_mol_old)
 
      ELSE
 
@@ -296,13 +295,13 @@ SUBROUTINE Translate
         IF ((int_charge_sum_style(ibox) == charge_ewald) .AND. (has_charge(is))) THEN
            ! Also reset the old cos_sum and sin_sum for reciprocal space vectors. Note
            ! that old vectors were set while difference in ewald reciprocal energy was computed.
-           !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)           
-           cos_sum(:,ibox) = cos_sum_old(:,ibox)
-           sin_sum(:,ibox) = sin_sum_old(:,ibox)
-           cos_mol(1:nvecs(ibox),position) =cos_mol_old(:)
-           sin_mol(1:nvecs(ibox),position) =sin_mol_old(:)
+           CALL Get_Position_Alive(lm,is,pos)
+           !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+           box_list(ibox)%sincos_sum = box_list(ibox)%sincos_sum_old
+           cos_mol(1:nvecs(ibox),pos) =cos_mol(1:nvecs(ibox),0)
+           sin_mol(1:nvecs(ibox),pos) =sin_mol(1:nvecs(ibox),0)
            !$OMP END PARALLEL WORKSHARE
-           DEALLOCATE(cos_mol_old,sin_mol_old)
+           !DEALLOCATE(cos_mol_old,sin_mol_old)
         END IF
         IF (l_pair_nrg)  CALL Reset_Molecule_Pair_Interaction_Arrays(lm,is,ibox)
      ENDIF
diff --git a/Src/move_vol_swap.f90 b/Src/move_vol_swap.f90
index ead3d36b..661a4ab3 100755
--- a/Src/move_vol_swap.f90
+++ b/Src/move_vol_swap.f90
@@ -50,24 +50,27 @@ SUBROUTINE GEMC_NVT_Volume
   ! Local variables
 
   INTEGER :: box_grw, box_shk
-  INTEGER :: ibox, nvecs_grw, nvecs_shk, nvecs_max
+  INTEGER :: ibox, nvecs_grw, nvecs_shk, nvecs_max, nvecs_max_new_p4
   INTEGER :: is, im, lm
 
   REAL(DP) :: delta_volume, ln_pacc, dE_grw, dE_shk
   REAL(DP) :: success_ratio
-  REAL(DP), DIMENSION(maxk) :: hx_grw, hy_grw, hz_grw, Cn_grw
-  REAL(DP), DIMENSION(maxk) :: hx_shk, hy_shk, hz_shk, Cn_shk  
   REAL(DP) :: v_ratio_o, v_total, vol_factor
 
   LOGICAL :: overlap, accept_or_reject
 
-  TYPE(Box_Class) :: box_list_grw, box_list_shk
+  REAL(DP), DIMENSION(3,3) :: length_grw, length_shk, length_inv_grw, length_inv_shk
+  REAL(DP) :: volume_grw, volume_shk
   TYPE(Energy_Class) :: energy_grw, energy_shk
 
-  INTEGER :: position
+  INTEGER :: pos
+  INTEGER :: istart, iend, im_locate_shift, nboxmols
+  INTEGER, DIMENSION(MAXVAL(SUM(nmols,1))) :: posvec
 
   REAL(DP), ALLOCATABLE :: pair_nrg_vdw_old(:,:), pair_nrg_qq_old(:,:)
   REAL(DP), ALLOCATABLE :: cos_mol_old(:,:), sin_mol_old(:,:)
+  REAL(DP), ALLOCATABLE :: kspace_vectors_grw(:,:), kspace_vectors_shk(:,:)
+  INTEGER, ALLOCATABLE :: kspace_vector_ints_grw(:), kspace_vector_ints_shk(:)
 
   REAL(DP) :: rcut_vdw_grw, rcut_coul_grw, rcut3_grw, rcut9_grw, alpha_ewald_grw
   REAL(DP) :: h_ewald_cut_grw, rcut_vdwsq_grw, rcut_coulsq_grw, rcut_vdw3_grw
@@ -94,8 +97,14 @@ SUBROUTINE GEMC_NVT_Volume
 
   ! store old cell matrix 
 
-  box_list_grw = box_list(box_grw)
-  box_list_shk = box_list(box_shk)
+  !box_list_grw = box_list(box_grw)
+  !box_list_shk = box_list(box_shk)
+  length_grw = box_list(box_grw)%length
+  length_inv_grw = box_list(box_grw)%length_inv
+  length_shk = box_list(box_shk)%length
+  length_inv_shk = box_list(box_shk)%length_inv
+  volume_shk = box_list(box_shk)%volume
+  volume_grw = box_list(box_grw)%volume
 
   ! Store the old configurations of all atoms and COMs
   CALL Save_Cartesian_Coordinates_Box(box_grw)
@@ -103,39 +112,45 @@ SUBROUTINE GEMC_NVT_Volume
 
   ! store the pair interactions
   IF (l_pair_nrg) THEN
-     ALLOCATE(pair_nrg_vdw_old(SUM(max_molecules),SUM(max_molecules)))
-     ALLOCATE(pair_nrg_qq_old(SUM(max_molecules),SUM(max_molecules)))
-
-     !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-     pair_nrg_vdw_old(:,:) = pair_nrg_vdw(:,:)
-     pair_nrg_qq_old(:,:) = pair_nrg_qq(:,:)
-     !!$OMP END PARALLEL WORKSHARE
+     CALL MOVE_ALLOC(pair_nrg_vdw,pair_nrg_vdw_old)
+     CALL MOVE_ALLOC(pair_nrg_qq,pair_nrg_qq_old)
+     ALLOCATE(pair_nrg_vdw(sum_max_molecules_p4,sum_max_molecules))
+     ALLOCATE(pair_nrg_qq(sum_max_molecules_p4,sum_max_molecules))
+     !ALLOCATE(pair_nrg_vdw_old(sum_max_molecules_p4,sum_max_molecules))
+     !ALLOCATE(pair_nrg_qq_old(sum_max_molecules_p4,sum_max_molecules))
+
+     !!!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+     !pair_nrg_vdw_old(:,:) = pair_nrg_vdw(:,:)
+     !pair_nrg_qq_old(:,:) = pair_nrg_qq(:,:)
+     !!!$OMP END PARALLEL WORKSHARE
   END IF
 
   ! store cos_mol and sin_mol
   IF ( int_charge_sum_style(box_grw) == charge_ewald .OR. &
        int_charge_sum_style(box_shk) == charge_ewald ) THEN
+     CALL MOVE_ALLOC(cos_mol,cos_mol_old)
+     CALL MOVE_ALLOC(sin_mol,sin_mol_old)
      
-     ALLOCATE(cos_mol_old(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-     
-     IF (AllocateStatus /= 0 ) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for cos_mol_old'
-        CALL Clean_Abort(err_msg,'gemc_nvt_volume.f90')
-     END IF
-     
-     ALLOCATE(sin_mol_old(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-
-     IF (AllocateStatus /= 0 ) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for sin_mol_old'
-        CALL Clean_Abort(err_msg,'gemc_nvt_volume.f90')
-     END IF
-     
-     !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-     cos_mol_old(:,:) = cos_mol(:,:)
-     sin_mol_old(:,:) = sin_mol(:,:)
-     !!$OMP END PARALLEL WORKSHARE
+     !ALLOCATE(cos_mol_old(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
+     !
+     !IF (AllocateStatus /= 0 ) THEN
+     !   err_msg = ''
+     !   err_msg(1) = 'Memory could not be allocated for cos_mol_old'
+     !   CALL Clean_Abort(err_msg,'gemc_nvt_volume.f90')
+     !END IF
+     !
+     !ALLOCATE(sin_mol_old(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
+
+     !IF (AllocateStatus /= 0 ) THEN
+     !   err_msg = ''
+     !   err_msg(1) = 'Memory could not be allocated for sin_mol_old'
+     !   CALL Clean_Abort(err_msg,'gemc_nvt_volume.f90')
+     !END IF
+     !
+     !!!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
+     !cos_mol_old(:,:) = cos_mol(:,:)
+     !sin_mol_old(:,:) = sin_mol(:,:)
+     !!!$OMP END PARALLEL WORKSHARE
 
   END IF
 
@@ -281,8 +296,8 @@ SUBROUTINE GEMC_NVT_Volume
 
   ! Rescale the COM and all atomic positions
 
-  CALL Scale_COM_Cartesian(box_grw,box_list_grw)
-  CALL Scale_COM_Cartesian(box_shk,box_list_shk)
+  CALL Scale_COM_Cartesian(box_grw,length_inv_grw)
+  CALL Scale_COM_Cartesian(box_shk,length_inv_shk)
 
   ! Now let us compute the energy change due to the combined move
 
@@ -299,68 +314,40 @@ SUBROUTINE GEMC_NVT_Volume
      nvecs_grw = nvecs(box_grw)
      nvecs_shk = nvecs(box_shk)
      nvecs_max = MAXVAL(nvecs)
-
-     !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED) 
-     cos_sum_old(:,:) = cos_sum(:,:)
-     sin_sum_old(:,:) = sin_sum(:,:)
-
-     hx_grw(:) = hx(:,box_grw)
-     hy_grw(:) = hy(:,box_grw)
-     hz_grw(:) = hz(:,box_grw)
-     Cn_grw(:) = Cn(:,box_grw)
-
-     hx_shk(:) = hx(:,box_shk)
-     hy_shk(:) = hy(:,box_shk)
-     hz_shk(:) = hz(:,box_shk)
-     Cn_shk(:) = Cn(:,box_shk)
-
-     !!$OMP END PARALLEL WORKSHARE
-
-     ! Determine the new k vectors for this box. The call will change Cn, hx, hy and hz and hence will
+     IF (ALLOCATED(box_list(box_grw)%sincos_sum_old)) DEALLOCATE(box_list(box_grw)%sincos_sum_old)
+     IF (ALLOCATED(box_list(box_shk)%sincos_sum_old)) DEALLOCATE(box_list(box_shk)%sincos_sum_old)
+
+     CALL MOVE_ALLOC(box_list(box_grw)%kspace_vectors,kspace_vectors_grw)
+     CALL MOVE_ALLOC(box_list(box_shk)%kspace_vectors,kspace_vectors_shk)
+     CALL MOVE_ALLOC(box_list(box_grw)%kspace_vector_ints,kspace_vector_ints_grw)
+     CALL MOVE_ALLOC(box_list(box_shk)%kspace_vector_ints,kspace_vector_ints_shk)
+     CALL MOVE_ALLOC(box_list(box_grw)%sincos_sum,box_list(box_grw)%sincos_sum_old)
+     CALL MOVE_ALLOC(box_list(box_shk)%sincos_sum,box_list(box_shk)%sincos_sum_old)
+     !!!$OMP PARALLEL WORKSHARE DEFAULT(SHARED) 
+     !cos_sum_old(:,:) = cos_sum(:,:)
+     !sin_sum_old(:,:) = sin_sum(:,:)
+
+     !hx_grw(:) = hx(:,box_grw)
+     !hy_grw(:) = hy(:,box_grw)
+     !hz_grw(:) = hz(:,box_grw)
+     !Cn_grw(:) = Cn(:,box_grw)
+
+     !hx_shk(:) = hx(:,box_shk)
+     !hy_shk(:) = hy(:,box_shk)
+     !hz_shk(:) = hz(:,box_shk)
+     !Cn_shk(:) = Cn(:,box_shk)
+
+     !!!$OMP END PARALLEL WORKSHARE
+
+     ! Determine the new k vectors for both boxes. The call will change Cn, hx, hy and hz and hence will
      ! change cos_sum and sin_sum.
      
      CALL Ewald_Reciprocal_Lattice_Vector_Setup(box_grw)
      CALL Ewald_Reciprocal_Lattice_Vector_Setup(box_shk)
+     nvecs_max_new_p4 = IAND(MAXVAL(nvecs)+padconst_8byte,padmask_8byte)
+     ALLOCATE(sin_mol(nvecs_max_new_p4,0:SUM(max_molecules)))
+     ALLOCATE(cos_mol(nvecs_max_new_p4,0:SUM(max_molecules)))
 
-     ! reallocate arrays
-
-     DEALLOCATE(cos_sum,sin_sum)
-
-     IF (ALLOCATED(cos_mol)) DEALLOCATE(cos_mol)
-     IF (ALLOCATED(sin_mol)) DEALLOCATE(sin_mol)
-
-     ALLOCATE(cos_sum(MAXVAL(nvecs),nbr_boxes),Stat=AllocateStatus)
-     
-     IF (AllocateStatus /= 0) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for cos_sum'
-        CALL Clean_Abort(err_msg,'GEMC_NVT_VOLUME')
-     END IF
-     
-     
-     ALLOCATE(sin_sum(MAXVAL(nvecs),nbr_boxes), Stat = AllocateStatus)
-     
-     IF (Allocatestatus /= 0) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for sin_sum'
-        CALL Clean_Abort(err_msg,'GEMC_NVT_VOLUME')
-     END IF
-
-     ALLOCATE(cos_mol(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-     
-     IF (AllocateStatus /= 0) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for cos_mol'
-        CALL Clean_Abort(err_msg,'gemc_volume_change.f90')
-     END IF
-     
-     ALLOCATE(sin_mol(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-     
-     IF (AllocateStatus /= 0) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for sin_mol'
-        CALL Clean_Abort(err_msg,'gemc_volume_change.f90')
-     END IF
         
   END IF
   
@@ -369,8 +356,9 @@ SUBROUTINE GEMC_NVT_Volume
   ! as this box will likely to have more overlaps
 
   CALL Compute_System_Total_Energy(box_shk, .TRUE., overlap)
+  !CALL Compute_System_Total_Energy(box_shk, .FALSE., overlap)
   
-   IF (overlap) THEN 
+  IF (overlap) THEN 
       CALL Reset_Coords
 
       IF (verbose_log) THEN
@@ -378,9 +366,10 @@ SUBROUTINE GEMC_NVT_Volume
                i_mcstep, 'vol_swap', box_shk, '>', box_grw, accept, 'overlap'
       END IF
 
-   ELSE
+  ELSE
       
       CALL Compute_System_Total_Energy(box_grw, .TRUE.,overlap)
+      !CALL Compute_System_Total_Energy(box_grw, .FALSE.,overlap)
      
       ! actually there should be no overlap for the box whose dimensions
       ! increase but we will include this check only for safety.
@@ -398,17 +387,19 @@ SUBROUTINE GEMC_NVT_Volume
          
          dE_grw = energy(box_grw)%total - energy_grw%total
          dE_shk = energy(box_shk)%total - energy_shk%total
+         !dE_grw = energy(box_grw)%inter - energy_grw%inter
+         !dE_shk = energy(box_shk)%inter - energy_shk%inter
          
          IF (f_dv) THEN
             ln_pacc = beta(box_grw) * dE_grw + beta(box_shk) * dE_shk &
-                    - REAL(SUM(nmols(:,box_grw)),DP) * DLOG( box_list(box_grw)%volume / box_list_grw%volume) &
-                    - REAL(SUM(nmols(:,box_shk)),DP) * DLOG( box_list(box_shk)%volume / box_list_shk%volume)
+                    - REAL(SUM(nmols(:,box_grw)),DP) * DLOG( box_list(box_grw)%volume / volume_grw) &
+                    - REAL(SUM(nmols(:,box_shk)),DP) * DLOG( box_list(box_shk)%volume / volume_shk)
             
          ELSE IF(f_vratio) THEN
             
             ln_pacc = beta(box_grw) * dE_grw + beta(box_shk) * dE_shk &
-                    - REAL(SUM(nmols(:,box_grw))+1,DP) * DLOG( box_list(box_grw)%volume / box_list_grw%volume) &
-                    - REAL(SUM(nmols(:,box_shk))+1,DP) * DLOG( box_list(box_shk)%volume / box_list_shk%volume)
+                    - REAL(SUM(nmols(:,box_grw))+1,DP) * DLOG( box_list(box_grw)%volume / volume_grw) &
+                    - REAL(SUM(nmols(:,box_shk))+1,DP) * DLOG( box_list(box_shk)%volume / volume_shk)
             
          END IF
 
@@ -421,29 +412,29 @@ SUBROUTINE GEMC_NVT_Volume
             ivol_success(box_grw) = ivol_success(box_grw) + 1
             ivol_success(box_shk) = ivol_success(box_shk) + 1
             ! energy,positions and box dimensions are already updated
-            IF (int_charge_sum_style(box_grw) == charge_ewald) THEN
-               
-               ! cos_sum and sin_sum were deallocated, destroying the terms
-               ! for boxes other than box_grw, box_shk. so restore these
+            IF ((int_charge_sum_style(box_grw) == charge_ewald .OR. l_pair_nrg) .AND. nbr_boxes>2) THEN
                
-               DO ibox = 1, nbr_boxes
-                  
-                  IF ( .NOT. ( (ibox /= box_grw) .OR. (ibox /= box_shk))) THEN
-                     ! transfer cos_sum and sin_sum for other boxes
-                     ! Note that direct assignment of cos_sum_old to cos_sum
-                     ! will result into an error as these two arrays are of
-                     ! different dimensions
-                     cos_sum(1:nvecs(ibox),ibox) = cos_sum_old(1:nvecs(ibox),ibox)
-                     sin_sum(1:nvecs(ibox),ibox) = sin_sum_old(1:nvecs(ibox),ibox)
-                     
-                  END IF
-                  
-               END DO
-               ! Now deallocate cos_sum_old and sin_sum_old so that they have the same dimensions
-               ! as sin_sum and cos_sum                  
-               DEALLOCATE(cos_sum_old,sin_sum_old)
-               ALLOCATE(cos_sum_old(SIZE(cos_sum,1),nbr_boxes))
-               ALLOCATE(sin_sum_old(SIZE(sin_sum,1),nbr_boxes))
+               !! cos_sum and sin_sum were deallocated, destroying the terms
+               !! for boxes other than box_grw, box_shk. so restore these
+               !
+               !DO ibox = 1, nbr_boxes
+               !   
+               !   IF ( .NOT. ( (ibox /= box_grw) .OR. (ibox /= box_shk))) THEN
+               !      ! transfer cos_sum and sin_sum for other boxes
+               !      ! Note that direct assignment of cos_sum_old to cos_sum
+               !      ! will result into an error as these two arrays are of
+               !      ! different dimensions
+               !      cos_sum(1:nvecs(ibox),ibox) = cos_sum_old(1:nvecs(ibox),ibox)
+               !      sin_sum(1:nvecs(ibox),ibox) = sin_sum_old(1:nvecs(ibox),ibox)
+               !      
+               !   END IF
+               !   
+               !END DO
+               !! Now deallocate cos_sum_old and sin_sum_old so that they have the same dimensions
+               !! as sin_sum and cos_sum                  
+               !DEALLOCATE(cos_sum_old,sin_sum_old)
+               !ALLOCATE(cos_sum_old(SIZE(cos_sum,1),nbr_boxes))
+               !ALLOCATE(sin_sum_old(SIZE(sin_sum,1),nbr_boxes))
                
                ! Now assign cos_mol and sin_mol for the molecules present in other
                ! boxes. Note that cos_mol for box_grw and box_shk have been assigned
@@ -451,28 +442,46 @@ SUBROUTINE GEMC_NVT_Volume
                
                DO ibox = 1, nbr_boxes
                   IF (ibox == box_grw .OR. ibox == box_shk) CYCLE
-                        
+                  nboxmols = SUM(nmols(:,ibox))
+                  IF (nboxmols < 1) CYCLE
+                  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(is,pos,im_locate_shift,iend,istart)
+                  !$OMP DO SCHEDULE(STATIC)
                   DO is = 1, nspecies
-                     DO im = 1, nmols(is,ibox)
-                        lm = locate(im,is,ibox)
-                        
-                        CALL Get_Position_Alive(lm,is,position)
-                        
-                        !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-                        cos_mol(1:nvecs(ibox),position) = cos_mol_old(1:nvecs(ibox),position)
-                        sin_mol(1:nvecs(ibox),position) = sin_mol_old(1:nvecs(ibox),position)
-                        
-                        cos_mol(nvecs(ibox)+1:MAXVAL(nvecs),position) = 0.0_DP
-                        sin_mol(nvecs(ibox)+1:MAXVAL(nvecs),position) = 0.0_DP
-                        !!$OMP END PARALLEL WORKSHARE
-                        
-                     END DO
+                     IF (is==1) THEN
+                             im_locate_shift = 0
+                     ELSE
+                             im_locate_shift = SUM(max_molecules(1:is-1))
+                     END IF
+                     iend = SUM(nmols(1:is,ibox))
+                     istart = iend - nmols(is,ibox) + 1
+                     posvec(istart:iend) = im_locate_shift + locate(1:nmols(is,ibox),is,ibox)
+                  END DO
+                  !$OMP END DO
+                  !$OMP DO SCHEDULE(STATIC)
+                  DO im = 1, nboxmols
+                        pos = posvec(im)
+                        IF (int_charge_sum_style(ibox) == charge_ewald) THEN
+                                !DIR$ VECTOR ALIGNED
+                                cos_mol(1:nvecs(ibox),pos) = cos_mol_old(1:nvecs(ibox),pos)
+                                !DIR$ VECTOR ALIGNED
+                                sin_mol(1:nvecs(ibox),pos) = sin_mol_old(1:nvecs(ibox),pos)
+                                cos_mol(nvecs(ibox)+1:,pos) = 0.0_DP
+                                sin_mol(nvecs(ibox)+1:,pos) = 0.0_DP
+                        END IF
+                        IF (l_pair_nrg) THEN
+                                ! Technically unnecessary but generally faster (and harmless) to copy whole column 
+                                ! instead of only the elements in posvec
+                                !DIR$ VECTOR ALIGNED
+                                pair_nrg_vdw(:,pos) = pair_nrg_vdw_old(:,pos)
+                                !DIR$ VECTOR ALIGNED
+                                pair_nrg_qq(:,pos) = pair_nrg_qq_old(:,pos)
+                        END IF
                   END DO
+                  !$OMP END DO
+                  !$OMP END PARALLEL
                END DO
-               DEALLOCATE(cos_mol_old,sin_mol_old)
-            END IF  ! ends if of 316
+            END IF
             
-            IF (l_pair_nrg) DEALLOCATE(pair_nrg_vdw_old,pair_nrg_qq_old)
   
          ELSE
             
@@ -487,10 +496,10 @@ SUBROUTINE GEMC_NVT_Volume
 
       END IF
 
-   END IF
+  END IF
 
   ! Update the maximum volume modulus of equilibration runs
-   IF (MOD(nvolumes(box_grw),nvol_update) == 0 ) THEN
+  IF (MOD(nvolumes(box_grw),nvol_update) == 0 ) THEN
       IF ( int_run_type == run_equil) THEN
 
          success_ratio = REAL(ivol_success(box_grw),DP)/REAL(nvol_update,DP)
@@ -520,7 +529,7 @@ SUBROUTINE GEMC_NVT_Volume
          
       END IF
 
-   END IF
+  END IF
 
    
  CONTAINS
@@ -532,22 +541,26 @@ SUBROUTINE Reset_Coords
      CALL Reset_Cartesian_Coordinates_Box(box_grw)
      CALL Reset_Cartesian_Coordinates_Box(box_shk)
      
-     ! box list and energy 
+     ! box list and energy
+     box_list(box_grw)%length = length_grw
+     box_list(box_shk)%length = length_shk
+
+     CALL Compute_Cell_Dimensions(box_grw)
+     CALL Compute_Cell_Dimensions(box_shk)
      
-     box_list(box_grw) = box_list_grw
-     box_list(box_shk) = box_list_shk
+     !box_list(box_grw) = box_list_grw
+     !box_list(box_shk) = box_list_shk
      
      energy(box_grw) = energy_grw
      energy(box_shk) = energy_shk
 
      IF (l_pair_nrg) THEN
-        
-        !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-        pair_nrg_vdw(:,:) = pair_nrg_vdw_old(:,:)
-        pair_nrg_qq(:,:) = pair_nrg_qq_old(:,:)
-        !!$OMP END PARALLEL WORKSHARE
-        
-        DEALLOCATE(pair_nrg_vdw_old,pair_nrg_qq_old)
+             !pair_nrg_vdw(:,:) = pair_nrg_vdw_old(:,:)
+             !pair_nrg_qq(:,:) = pair_nrg_qq_old(:,:)
+             !DEALLOCATE(pair_nrg_vdw_old,pair_nrg_qq_old)
+        DEALLOCATE(pair_nrg_vdw,pair_nrg_qq)
+        CALL MOVE_ALLOC(pair_nrg_vdw_old,pair_nrg_vdw)
+        CALL MOVE_ALLOC(pair_nrg_qq_old,pair_nrg_qq)
      END IF
 
      IF (l_half_len_cutoff(box_grw)) THEN
@@ -602,69 +615,24 @@ SUBROUTINE Reset_Coords
         
        nvecs(box_grw) = nvecs_grw
        nvecs(box_shk) = nvecs_shk
+       DEALLOCATE(box_list(box_shk)%kspace_vectors)
+       DEALLOCATE(box_list(box_grw)%kspace_vectors)
+       DEALLOCATE(box_list(box_shk)%kspace_vector_ints)
+       DEALLOCATE(box_list(box_grw)%kspace_vector_ints)
+       DEALLOCATE(box_list(box_shk)%sincos_sum)
+       DEALLOCATE(box_list(box_grw)%sincos_sum)
+       DEALLOCATE(sin_mol,cos_mol)
+       CALL MOVE_ALLOC(sin_mol_old,sin_mol)
+       CALL MOVE_ALLOC(cos_mol_old,cos_mol)
+       CALL MOVE_ALLOC(kspace_vectors_grw,box_list(box_grw)%kspace_vectors)
+       CALL MOVE_ALLOC(kspace_vectors_shk,box_list(box_shk)%kspace_vectors)
+       CALL MOVE_ALLOC(kspace_vector_ints_grw,box_list(box_grw)%kspace_vector_ints)
+       CALL MOVE_ALLOC(kspace_vector_ints_shk,box_list(box_shk)%kspace_vector_ints)
+       CALL MOVE_ALLOC(box_list(box_grw)%sincos_sum_old,box_list(box_grw)%sincos_sum)
+       CALL MOVE_ALLOC(box_list(box_shk)%sincos_sum_old,box_list(box_shk)%sincos_sum)
        
-       DEALLOCATE(cos_sum,sin_sum)
-       DEALLOCATE(cos_mol,sin_mol)
        
-       ALLOCATE(cos_sum(MAXVAL(nvecs),nbr_boxes),stat = AllocateStatus)
-       
-       IF (Allocatestatus /= 0) THEN
-          err_msg = ''
-          err_msg(1) = 'Memory could not be allocated for cos_sum'
-          err_msg(2) = 'volume move rejected'
-          CALL Clean_Abort(err_msg,'Volume_Change')
-       END IF
-       
-       ALLOCATE(sin_sum(MAXVAL(nvecs),nbr_boxes),Stat = Allocatestatus)
-       IF (Allocatestatus /= 0) THEN
-          err_msg = ''
-          err_msg(1) = 'Memory could not be allocated in the volume rejection'
-          CALL Clean_Abort(err_msg,'Volume_Change')
-       END IF
-       
-       ALLOCATE(cos_mol(MAXVAL(nvecs),SUM(max_molecules)),Stat = Allocatestatus)
-       IF (Allocatestatus /= 0) THEN
-          err_msg = ''
-          err_msg(1) = 'Memory could not be allocated for cos_mol in the volume rejection'
-          CALL Clean_Abort(err_msg,'GEMC NVT Volume_Change')
-       END IF
-       
-       ALLOCATE(sin_mol(MAXVAL(nvecs),SUM(max_molecules)),Stat = Allocatestatus)
-       IF (Allocatestatus /= 0) THEN
-          err_msg = ''
-          err_msg(1) = 'Memory could not be allocated for sin_mol in the volume rejection'
-          CALL Clean_Abort(err_msg,'GEMC NVT Volume_Change')
-       END IF
-          
-       !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-       
-       cos_sum(:,:) = cos_sum_old(:,:)
-       sin_sum(:,:) = sin_sum_old(:,:)
-       
-       cos_mol(1:SIZE(cos_mol_old,1),:) = cos_mol_old(:,:)
-       sin_mol(1:SIZE(sin_mol_old,1),:) = sin_mol_old(:,:)
-       
-       hx(:,box_grw) = hx_grw(:)
-       hy(:,box_grw) = hy_grw(:)
-       hz(:,box_grw) = hz_grw(:)
-       Cn(:,box_grw) = Cn_grw(:)
-       
-       hx(:,box_shk) = hx_shk(:)
-       hy(:,box_shk) = hy_shk(:)
-       hz(:,box_shk) = hz_shk(:)
-       Cn(:,box_shk) = Cn_shk(:)
-
-       !!$OMP END PARALLEL WORKSHARE
-       
-       DEALLOCATE(cos_mol_old,sin_mol_old)
-       
-       ! here we make sure that cos_sum_old and sin_sum_old have the same dimensions
-       ! as cos_sum and sin_sum
-       
-       DEALLOCATE(cos_sum_old,sin_sum_old)
-       ALLOCATE(cos_sum_old(SIZE(cos_sum,1),nbr_boxes),sin_sum_old(SIZE(sin_sum,1),nbr_boxes))
-       
-    END IF
+     END IF
     
   END SUBROUTINE Reset_Coords
 
diff --git a/Src/move_volume.f90 b/Src/move_volume.f90
index 41788b59..5f5bdd0e 100755
--- a/Src/move_volume.f90
+++ b/Src/move_volume.f90
@@ -67,6 +67,7 @@ SUBROUTINE Volume_Change
   USE Random_Generators
   USE Energy_Routines
   USE IO_Utilities
+  USE Volume
   
   
   IMPLICIT NONE
@@ -74,14 +75,13 @@ SUBROUTINE Volume_Change
   !  !$ include 'omp_lib.h'
   
   INTEGER :: is, im, alive, this_box, i, total_molecules, nvecs_old, ibox
-  INTEGER :: nvecs_max, k, iatom, mcstep
+  INTEGER :: nvecs_max, k, iatom, mcstep, nvecs_max_new_p4
   
   INTEGER :: ia
   
   REAL(DP) :: x_box(nbr_boxes), randno
   REAL(DP) :: random_displacement, s(3), delta_volume, ln_pacc, success_ratio
   REAL(DP) :: this_volume
-  REAL(DP), DIMENSION(maxk) :: hx_old, hy_old, hz_old, Cn_old
   
   REAL(DP) :: dE
 
@@ -90,26 +90,27 @@ SUBROUTINE Volume_Change
   
   LOGICAL :: overlap, xz_change, accept_or_reject
   
-  TYPE(Box_Class) :: box_list_old
+  REAL(DP), DIMENSION(3,3) :: length_old, length_inv_old
+  REAL(DP) :: volume_old
   
   TYPE(Energy_Class) :: energy_old, virial_old
   
   REAL(DP), ALLOCATABLE :: pair_nrg_vdw_old(:,:), pair_nrg_qq_old(:,:)
   
   INTEGER, ALLOCATABLE :: my_species_id(:), my_locate_id(:), my_position_id(:)
-  INTEGER :: position, my_box
+  INTEGER :: lm, pos, my_box
+  INTEGER :: istart, iend, im_locate_shift, nboxmols
+  INTEGER, DIMENSION(MAXVAL(SUM(nmols,1))) :: posvec
   
-  REAL(DP), ALLOCATABLE :: cos_mol_old(:,:), sin_mol_old(:,:)
+  REAL(DP), DIMENSION(:,:), ALLOCATABLE :: cos_mol_old, sin_mol_old, kspace_vectors_old
+  INTEGER, DIMENSION(:), ALLOCATABLE :: kspace_vector_ints_old
   
   REAL(DP) :: rcut_vdw_old, rcut_coul_old, rcut3_old, rcut9_old, alpha_ewald_old
   REAL(DP) :: h_ewald_cut_old, rcut_vdwsq_old, rcut_coulsq_old, rcut_vdw3_old
   REAL(DP) :: rcut_vdw6_old, rcut_max_old
+
   
-  REAL(DP) :: time1,time0
   
-  CHARACTER(7) :: box_str, cutoff_str
-  ! Framework related stuff
-  REAL(DP) :: pore_width_old, ratio_width, area, half_pore_width_old
   
   
   ! Done with that section
@@ -186,50 +187,24 @@ SUBROUTINE Volume_Change
   
   END DO
   
-  !  call cpu_time(time0)
   
   IF (l_pair_nrg) THEN
+
+     CALL MOVE_ALLOC(pair_nrg_vdw,pair_nrg_vdw_old)
+     CALL MOVE_ALLOC(pair_nrg_qq,pair_nrg_qq_old)
+
      
-     ALLOCATE(pair_nrg_vdw_old(SUM(max_molecules),SUM(max_molecules)))
-     ALLOCATE(pair_nrg_qq_old(SUM(max_molecules),SUM(max_molecules)))
+     ALLOCATE(pair_nrg_vdw(sum_max_molecules_p4,sum_max_molecules))
+     ALLOCATE(pair_nrg_qq(sum_max_molecules_p4,sum_max_molecules))
      
-     !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-     pair_nrg_vdw_old(:,:) = pair_nrg_vdw(:,:)
-     pair_nrg_qq_old(:,:) = pair_nrg_qq(:,:)
-     !!$OMP END PARALLEL WORKSHARE
      
   END IF
   
-  IF (int_charge_sum_style(this_box) == charge_ewald) THEN
-     
-     ! store the cos_mol and sin_mol arrays
-  
-     ALLOCATE(cos_mol_old(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-  
-     IF (AllocateStatus /=0 ) THEN
-        err_msg = ''
-        err_msg(1) = 'cos_mol_old cannot be allocated'
-        err_msg(2) = 'in Volume_Change'
-        CALL Clean_Abort(err_msg,'Volume_Change')
-     END IF
-  
-     ALLOCATE(sin_mol_old(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-  
-     IF (AllocateStatus /=0 ) THEN
-        err_msg = ''
-        err_msg(1) = 'sin_mol_old cannot be allocated'
-        err_msg(2) = 'in Volume_Change'
-        CALL Clean_Abort(err_msg,'Volume_Change')
-     END IF
-  
-     
-     cos_mol_old(:,:) = cos_mol(:,:)
-     sin_mol_old(:,:) = sin_mol(:,:)
-  
-  END IF
   
   ! Store the box_list matrix
-  box_list_old = box_list(this_box)
+  length_old = box_list(this_box)%length
+  length_inv_old = box_list(this_box)%length_inv
+  volume_old = box_list(this_box)%volume
   
   ! Change the box size
   ! Assume box is cubic
@@ -292,54 +267,56 @@ SUBROUTINE Volume_Change
      END IF
   END IF
      
-  delta_volume = box_list(this_box)%volume - box_list_old%volume
+  delta_volume = box_list(this_box)%volume - volume_old
   
   ! we scale the coordinates of the COM of each of the molecules based on
   ! the old and new cell basis vectors. The idea is to keep the fractional
   ! coordinates of the COM the same before and after the move.
-  
-  DO is = 1, nspecies
 
-     DO im = 1, nmols(is,this_box)
-        
-        alive = locate(im,is,this_box)
-        
-        ! obtain the new coordinates of the COM for this molecule
-        
-        ! first determine the fractional coordinate
-        
-        DO i = 1,3
-           s(i) = box_list_old%length_inv(i,1) * molecule_list(alive,is)%xcom &
-                + box_list_old%length_inv(i,2) * molecule_list(alive,is)%ycom &
-                + box_list_old%length_inv(i,3) * molecule_list(alive,is)%zcom
-        END DO
-        
-        ! now obtain the new positions of COMs
-        molecule_list(alive,is)%xcom = box_list(this_box)%length(1,1) * s(1) &
-                                     + box_list(this_box)%length(1,2) * s(2) &
-                                     + box_list(this_box)%length(1,3) * s(3)
-        
-        molecule_list(alive,is)%ycom = box_list(this_box)%length(2,1) * s(1) &
-                                     + box_list(this_box)%length(2,2) * s(2) &
-                                     + box_list(this_box)%length(2,3) * s(3)
-        
-        molecule_list(alive,is)%zcom = box_list(this_box)%length(3,1) * s(1) &
-                                     + box_list(this_box)%length(3,2) * s(2) &
-                                     + box_list(this_box)%length(3,3) * s(3)
-        
-        ! Obtain the new positions of atoms in this molecule
-        atom_list(:,alive,is)%rxp = atom_list(:,alive,is)%rxp + &
-             molecule_list(alive,is)%xcom - molecule_list(alive,is)%xcom_old
-        
-        atom_list(:,alive,is)%ryp = atom_list(:,alive,is)%ryp + &
-             molecule_list(alive,is)%ycom - molecule_list(alive,is)%ycom_old
-        
-        atom_list(:,alive,is)%rzp = atom_list(:,alive,is)%rzp + &
-             molecule_list(alive,is)%zcom - molecule_list(alive,is)%zcom_old
-        
-     END DO
-     
-  END DO
+  CALL Scale_COM_Cartesian(this_box,length_inv_old)
+  
+  !DO is = 1, nspecies
+
+  !   DO im = 1, nmols(is,this_box)
+  !      
+  !      alive = locate(im,is,this_box)
+  !      
+  !      ! obtain the new coordinates of the COM for this molecule
+  !      
+  !      ! first determine the fractional coordinate
+  !      
+  !      DO i = 1,3
+  !         s(i) = box_list_old%length_inv(i,1) * molecule_list(alive,is)%xcom &
+  !              + box_list_old%length_inv(i,2) * molecule_list(alive,is)%ycom &
+  !              + box_list_old%length_inv(i,3) * molecule_list(alive,is)%zcom
+  !      END DO
+  !      
+  !      ! now obtain the new positions of COMs
+  !      molecule_list(alive,is)%xcom = box_list(this_box)%length(1,1) * s(1) &
+  !                                   + box_list(this_box)%length(1,2) * s(2) &
+  !                                   + box_list(this_box)%length(1,3) * s(3)
+  !      
+  !      molecule_list(alive,is)%ycom = box_list(this_box)%length(2,1) * s(1) &
+  !                                   + box_list(this_box)%length(2,2) * s(2) &
+  !                                   + box_list(this_box)%length(2,3) * s(3)
+  !      
+  !      molecule_list(alive,is)%zcom = box_list(this_box)%length(3,1) * s(1) &
+  !                                   + box_list(this_box)%length(3,2) * s(2) &
+  !                                   + box_list(this_box)%length(3,3) * s(3)
+  !      
+  !      ! Obtain the new positions of atoms in this molecule
+  !      atom_list(:,alive,is)%rxp = atom_list(:,alive,is)%rxp + &
+  !           molecule_list(alive,is)%xcom - molecule_list(alive,is)%xcom_old
+  !      
+  !      atom_list(:,alive,is)%ryp = atom_list(:,alive,is)%ryp + &
+  !           molecule_list(alive,is)%ycom - molecule_list(alive,is)%ycom_old
+  !      
+  !      atom_list(:,alive,is)%rzp = atom_list(:,alive,is)%rzp + &
+  !           molecule_list(alive,is)%zcom - molecule_list(alive,is)%zcom_old
+  !      
+  !   END DO
+  !   
+  !END DO
   ! Energy change section,
   ! Store the old values of energy(this_box) and recompute the new components
 
@@ -358,72 +335,36 @@ SUBROUTINE Volume_Change
 
      nvecs_old = nvecs(this_box)
      nvecs_max = MAXVAL(nvecs)
-     
-     !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED) 
-     cos_sum_old(:,:) = cos_sum(:,:)
-     sin_sum_old(:,:) = sin_sum(:,:)
-     hx_old(:) = hx(:,this_box)
-     hy_old(:) = hy(:,this_box)
-     hz_old(:) = hz(:,this_box)
-     Cn_old(:) = Cn(:,this_box)
-     !!$OMP END PARALLEL WORKSHARE
-
-
-     ! Determine the new k vectors for this box. The call will change Cn, hx, hy and hz and hence will
-     ! change cos_sum and sin_sum.
-     CALL Ewald_Reciprocal_Lattice_Vector_Setup(this_box)
 
-     ! cos_sum and sin_sum need to be re-allocated since the number of vectors has changed
-     ! The operation destoys the cos_sum and sin_sum for other boxes but can be easily restored
-     ! from cos_sum_old and sin_sum_old. Note that these terms for this_box will be calculated
-     ! via the call to total system energy routine.
-     ! Similar reasoning goes for cos_mol and sin_mol
+     CALL MOVE_ALLOC(cos_mol,cos_mol_old)
+     CALL MOVE_ALLOC(sin_mol,sin_mol_old)
      
-     IF (ALLOCATED(cos_sum)) DEALLOCATE(cos_sum)
-     IF (ALLOCATED(sin_sum)) DEALLOCATE(sin_sum)
-     IF (ALLOCATED(cos_mol)) DEALLOCATE(cos_mol)
-     IF (ALLOCATED(sin_mol)) DEALLOCATE(sin_mol)
-
-     ALLOCATE(cos_sum(MAXVAL(nvecs),nbr_boxes), Stat = AllocateStatus)
-     
-     IF (Allocatestatus /= 0) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for cos_sum'
-        err_msg(2) = 'allocation_cos_sin'
-        CALL Clean_Abort(err_msg,'Volume_Change')
-     END IF
-     
-
-     ALLOCATE(sin_sum(MAXVAL(nvecs),nbr_boxes), Stat = AllocateStatus)
-
-     IF (Allocatestatus /= 0) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for sin_sum'
-        err_msg(2) = 'allocation_cos_sin'
-        CALL Clean_Abort(err_msg,'Volume_Change')
-     END IF
-
-     ALLOCATE(cos_mol(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-
-     IF (Allocatestatus /= 0) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for cos_mol'
-        err_msg(2) = 'allocation_cos_sin'
-        CALL Clean_Abort(err_msg,'Volume_Change')
-     END IF
-
-     ALLOCATE(sin_mol(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
+     IF (ALLOCATED(box_list(this_box)%sincos_sum_old)) DEALLOCATE(box_list(this_box)%sincos_sum_old)
+     CALL MOVE_ALLOC(box_list(this_box)%sincos_sum,box_list(this_box)%sincos_sum_old)
+     CALL MOVE_ALLOC(box_list(this_box)%kspace_vectors,kspace_vectors_old)
+     CALL MOVE_ALLOC(box_list(this_box)%kspace_vector_ints,kspace_vector_ints_old)
+     !!!$OMP PARALLEL WORKSHARE DEFAULT(SHARED) 
+     !cos_sum_old(:,:) = cos_sum(:,:)
+     !sin_sum_old(:,:) = sin_sum(:,:)
+     !hx_old(:) = hx(:,this_box)
+     !hy_old(:) = hy(:,this_box)
+     !hz_old(:) = hz(:,this_box)
+     !Cn_old(:) = Cn(:,this_box)
+     !!!$OMP END PARALLEL WORKSHARE
+
+
+     ! Determine the new k vectors for this box.
+     CALL Ewald_Reciprocal_Lattice_Vector_Setup(this_box)
+     nvecs_max_new_p4 = IAND(MAXVAL(nvecs)+padconst_8byte,padmask_8byte)
+     ALLOCATE(cos_mol(nvecs_max_new_p4,0:SUM(max_molecules)))
+     ALLOCATE(sin_mol(nvecs_max_new_p4,0:SUM(max_molecules)))
 
-     IF (Allocatestatus /= 0) THEN
-        err_msg = ''
-        err_msg(1) = 'Memory could not be allocated for sin_mol'
-        err_msg(2) = 'allocation_cos_sin'
-        CALL Clean_Abort(err_msg,'Volume_Change')
-     END IF
      
   END IF
 
   CALL Compute_System_Total_Energy(this_box,.TRUE.,overlap)
+  ! Internal degrees of freedom remain the same.
+  !CALL Compute_System_Total_Energy(this_box,.FALSE.,overlap)
 
   IF (overlap) THEN
      ! reject move
@@ -438,11 +379,12 @@ SUBROUTINE Volume_Change
      
      ! change in the energy of the system 
      dE = energy(this_box)%total - energy_old%total
+     !dE = energy(this_box)%inter - energy_old%inter
      
      ! based on the energy, calculate the acceptance ratio
      ln_pacc = beta(this_box) * dE &
              + beta(this_box) * pressure(this_box)%setpoint * delta_volume &
-             - total_molecules * DLOG(box_list(this_box)%volume/box_list_old%volume)
+             - total_molecules * DLOG(box_list(this_box)%volume/volume_old)
      accept = accept_or_reject(ln_pacc)
      
      IF ( accept ) THEN
@@ -450,57 +392,58 @@ SUBROUTINE Volume_Change
         nvol_success(this_box) = nvol_success(this_box) + 1
         ivol_success(this_box) = ivol_success(this_box) + 1
         ! energy, positions and box dimensions are already updated
-
-        
-        IF (int_charge_sum_style(this_box) == charge_ewald) THEN
-           ! put the sin_sum and cos_sum for other boxes
-           ! Note that hx,hy,hz and Cn are updated when the total energy routine is called above.
-           ! there is no need to update these arrays.
-           DO ibox = 1, nbr_boxes
-              IF (ibox /= this_box ) THEN
-                 !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-                 sin_sum(1:nvecs(ibox),ibox) = sin_sum_old(1:nvecs(ibox),ibox)
-                 cos_sum(1:nvecs(ibox),ibox) = cos_sum_old(1:nvecs(ibox),ibox)
-                 !!$OMP END PARALLEL WORKSHARE
-              END IF
-           END DO
-           
-           ! Now deallocate cos_sum_old and sin_sum_old so that they have the same dimensions
-           ! as sin_sum and cos_sum 
+        IF ((l_pair_nrg .OR. int_charge_sum_style(this_box) == charge_ewald) .AND. nbr_boxes>1) THEN
            
-           DEALLOCATE(cos_sum_old,sin_sum_old)
-           ALLOCATE(cos_sum_old(SIZE(cos_sum,1),nbr_boxes))
-           ALLOCATE(sin_sum_old(SIZE(sin_sum,1),nbr_boxes))
-           DEALLOCATE(cos_sum_start,sin_sum_start)
-           ALLOCATE(cos_sum_start(SIZE(cos_sum,1),nbr_boxes))
-           ALLOCATE(sin_sum_start(SIZE(sin_sum,1),nbr_boxes))
-
-           ! cos_mol
            
-           !              CALL cpu_time(time0)
+           ! Now assign cos_mol and sin_mol for the molecules present in other
+           ! boxes. Note that cos_mol and sin_mol for this_box were assigned
+           ! during Compute_System_Total_Energy.
            
            DO ibox = 1, nbr_boxes
-              ! skip the molecules in 'this_box'
-              IF (ibox == this_box )CYCLE 
+              IF (ibox == this_box) CYCLE
+              nboxmols = SUM(nmols(:,ibox))
+              IF (nboxmols < 1) CYCLE
+              !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(is,pos,im_locate_shift,iend,istart)
+              !$OMP DO SCHEDULE(STATIC)
               DO is = 1, nspecies
-                 DO im = 1, nmols(is,ibox)
-                    alive = locate(im,is,ibox)
-                    IF (.NOT. molecule_list(alive,is)%live) CYCLE
-                    CALL Get_Position_Alive(alive,is,position)
-                    
-                    cos_mol(1:nvecs(ibox),position) = cos_mol_old(1:nvecs(ibox),position)
-                    !                    cos_mol(nvecs(ibox)+1,nvecs(this_box)) = 0.0_DP
-                    
-                    sin_mol(1:nvecs(ibox),position) = sin_mol_old(1:nvecs(ibox),position)
-                    !                   sin_mol(nvecs(ibox)+1,nvecs(this_box)) = 0.0_DP
-                    
-                 END DO
+                 IF (is==1) THEN
+                         im_locate_shift = 0
+                 ELSE
+                         im_locate_shift = SUM(max_molecules(1:is-1))
+                 END IF
+                 iend = SUM(nmols(1:is,ibox))
+                 istart = iend - nmols(is,ibox) + 1
+                 posvec(istart:iend) = im_locate_shift + locate(1:nmols(is,ibox),is,ibox)
               END DO
+              !$OMP END DO
+              !$OMP DO SCHEDULE(STATIC)
+              DO im = 1, nboxmols
+                    pos = posvec(im)
+                    IF (int_charge_sum_style(this_box) == charge_ewald) THEN
+                            !DIR$ VECTOR ALIGNED
+                            cos_mol(1:nvecs(ibox),pos) = cos_mol_old(1:nvecs(ibox),pos)
+                            !DIR$ VECTOR ALIGNED
+                            sin_mol(1:nvecs(ibox),pos) = sin_mol_old(1:nvecs(ibox),pos)
+                            cos_mol(nvecs(ibox)+1:,pos) = 0.0_DP
+                            sin_mol(nvecs(ibox)+1:,pos) = 0.0_DP
+                    END IF
+                    IF (l_pair_nrg) THEN
+                            ! Technically unnecessary but generally faster (and harmless) to copy whole column 
+                            ! instead of only the elements in posvec
+                            !DIR$ VECTOR ALIGNED
+                            pair_nrg_vdw(:,pos) = pair_nrg_vdw_old(:,pos)
+                            !DIR$ VECTOR ALIGNED
+                            pair_nrg_qq(:,pos) = pair_nrg_qq_old(:,pos)
+                    END IF
+              END DO
+              !$OMP END DO
+              !$OMP END PARALLEL
            END DO
-           
            DEALLOCATE(cos_mol_old,sin_mol_old)
         END IF
 
+        
+
      ELSE
         
         ! Reject the move
@@ -572,7 +515,8 @@ SUBROUTINE Reset_Coords
       
       ! Reset the box dimensions
       
-      box_list(this_box) = box_list_old
+      box_list(this_box)%length = length_old
+      CALL Compute_Cell_Dimensions(this_box)
 
       
       ! Reset the energy components
@@ -580,11 +524,10 @@ SUBROUTINE Reset_Coords
       energy(this_box) = energy_old
 
       IF (l_pair_nrg) THEN
+         DEALLOCATE(pair_nrg_vdw,pair_nrg_qq)
+         CALL MOVE_ALLOC(pair_nrg_vdw_old,pair_nrg_vdw)
+         CALL MOVE_ALLOC(pair_nrg_qq_old,pair_nrg_qq)
 
-         pair_nrg_vdw(:,:) = pair_nrg_vdw_old(:,:)
-         pair_nrg_qq(:,:) = pair_nrg_qq_old(:,:)
-         
-         DEALLOCATE(pair_nrg_vdw_old,pair_nrg_qq_old)
          
       END IF
       
@@ -617,61 +560,18 @@ SUBROUTINE Reset_Coords
          ! reset the terms related to Ewald reciprocal energy
          ! reset the total number of kvectors
          nvecs(this_box) = nvecs_old
+         DEALLOCATE(box_list(this_box)%sincos_sum)
+         DEALLOCATE(box_list(this_box)%kspace_vectors)
+         DEALLOCATE(box_list(this_box)%kspace_vector_ints)
+         DEALLOCATE(sin_mol,cos_mol)
+         CALL MOVE_ALLOC(sin_mol_old,sin_mol)
+         CALL MOVE_ALLOC(cos_mol_old,cos_mol)
+
+         CALL MOVE_ALLOC(box_list(this_box)%sincos_sum_old,box_list(this_box)%sincos_sum)
+         CALL MOVE_ALLOC(kspace_vectors_old,box_list(this_box)%kspace_vectors)
+         CALL MOVE_ALLOC(kspace_vector_ints_old,box_list(this_box)%kspace_vector_ints)
 
          
-         DEALLOCATE(cos_sum,sin_sum)
-         DEALLOCATE(cos_mol,sin_mol)
-         
-         ALLOCATE(cos_sum(MAXVAL(nvecs),nbr_boxes),Stat = Allocatestatus)
-         
-         IF (Allocatestatus /= 0) THEN
-            err_msg = ''
-            err_msg(1) = 'Memory could not be allocated for cos_sum'
-            err_msg(2) = 'volume move rejected'
-            CALL Clean_Abort(err_msg,'Volume_Change')
-         END IF
-         
-         ALLOCATE(sin_sum(MAXVAL(nvecs),nbr_boxes),Stat = Allocatestatus)
-         IF (Allocatestatus /= 0) THEN
-            err_msg = ''
-            err_msg(1) = 'Memory could not be allocated in the volume rejection'
-            CALL Clean_Abort(err_msg,'Volume_Change')
-         END IF
-         
-         ALLOCATE(cos_mol(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-         
-         IF (Allocatestatus /= 0) THEN
-            err_msg = ''
-            err_msg(1) = 'Memory could not be in the volume rejection'
-            CALL Clean_Abort(err_msg,'Volume_Change')
-         END IF
-         
-         ALLOCATE(sin_mol(MAXVAL(nvecs),SUM(max_molecules)), Stat = AllocateStatus)
-         
-         IF (Allocatestatus /= 0) THEN
-            err_msg = ''
-            err_msg(1) = 'Memory could not be allocated in the volume rejection'
-            CALL Clean_Abort(err_msg,'Volume_Change')
-         END IF
-         
-         !!$OMP PARALLEL WORKSHARE DEFAULT(SHARED) 
-         cos_mol(1:SIZE(cos_mol_old,1),:) = cos_mol_old(:,:)
-         sin_mol(1:SIZE(sin_mol_old,1),:) = sin_mol_old(:,:)
-         cos_sum(:,:) = cos_sum_old(:,:)
-         sin_sum(:,:) = sin_sum_old(:,:)
-         hx(:,this_box) = hx_old(:)
-         hy(:,this_box) = hy_old(:)
-         hz(:,this_box) = hz_old(:)
-         Cn(:,this_box) = Cn_old(:)
-         !!$OMP END PARALLEL WORKSHARE
-         
-         ! here we make sure that cos_sum_old and sin_sum_old have the same dimensions
-         ! as cos_sum and sin_sum
-         DEALLOCATE(cos_mol_old,sin_mol_old)
-         DEALLOCATE(cos_sum_old,sin_sum_old)
-         ALLOCATE(cos_sum_old(SIZE(cos_sum,1),nbr_boxes),sin_sum_old(SIZE(sin_sum,1),nbr_boxes))
-         DEALLOCATE(cos_sum_start,sin_sum_start)
-         ALLOCATE(cos_sum_start(SIZE(cos_sum,1),nbr_boxes),sin_sum_start(SIZE(sin_sum,1),nbr_boxes))
          
       END IF
     END SUBROUTINE Reset_Coords
diff --git a/Src/nptmc_control.f90 b/Src/nptmc_control.f90
index bb34d04b..50d4ecb9 100755
--- a/Src/nptmc_control.f90
+++ b/Src/nptmc_control.f90
@@ -92,6 +92,8 @@ SUBROUTINE NPTMC_Control
   ! Determine whether widom insertions are done and get relevant details if they are
   CALL Get_Widom_Info
 
+  CALL Get_Lookup_Info
+
 
   ! Load molecular conectivity and force field paramters. Note that Get_Nspecies 
   ! must be called before this routine.  
@@ -155,8 +157,6 @@ SUBROUTINE NPTMC_Control
   ! angles exist
   CALL Get_Dihedral_Atoms_To_Place  
 
-  CALL Get_Lookup_Info
-
   CALL Setup_Atompair_Tables
 
 END SUBROUTINE NPTMC_Control
diff --git a/Src/nptmc_driver.f90 b/Src/nptmc_driver.f90
index 4866a6ac..da5fb5dd 100755
--- a/Src/nptmc_driver.f90
+++ b/Src/nptmc_driver.f90
@@ -287,12 +287,6 @@ SUBROUTINE NPTMC_Driver
 
      END IF
 
-     !IF ( cpcollect ) THEN
-     !   DO is = 1,nspecies
-     !      CALL Chempot(1, is)
-     !   END DO
-     !END IF
-
 
      ! do widom insertions, if applicable to this simulation and step
      IF (widom_flag) THEN
diff --git a/Src/nvtmc_control.f90 b/Src/nvtmc_control.f90
index cb243f85..e56ac19c 100755
--- a/Src/nvtmc_control.f90
+++ b/Src/nvtmc_control.f90
@@ -92,6 +92,8 @@ SUBROUTINE NVTMC_Control
   ! Determine whether widom insertions are done and get relevant details if they are
   CALL Get_Widom_Info
 
+  CALL Get_Lookup_Info
+
   ! Load molecular conectivity and force field paramters. Note that Get_Nspecies 
   ! must be called before this routine.  
   CALL Get_Molecule_Info
@@ -144,8 +146,6 @@ SUBROUTINE NVTMC_Control
   ! Dihedral moves
   CALL Get_Dihedral_Atoms_To_Place
 
-  CALL Get_Lookup_Info
-
   CALL Setup_Atompair_Tables
 
 END SUBROUTINE NVTMC_Control
diff --git a/Src/pair_emax_estimation.f90 b/Src/pair_emax_estimation.f90
index 22ccf9c6..4127b241 100644
--- a/Src/pair_emax_estimation.f90
+++ b/Src/pair_emax_estimation.f90
@@ -89,6 +89,8 @@ SUBROUTINE Estimate_Pair_rminsq
                 INTEGER(KIND=INT64), DIMENSION(:), POINTER :: rsqmin_freq_ptr
                 INTEGER(KIND=INT64), DIMENSION(solvent_maxind,wsolute_maxind,nbr_boxes,nbr_tols) :: atompair_nskips
                 REAL(DP), DIMENSION(solvent_maxind,wsolute_maxind,nbr_boxes,nbr_tols) :: atompair_wmax
+                CHARACTER(FILENAME_LEN) :: this_path
+                INTEGER :: this_unit
 
                 IF (.NOT. ALLOCATED(atompair_rminsq_ind_table)) THEN
                         ALLOCATE(atompair_rminsq_ind_table(solvent_maxind,wsolute_maxind,nbr_boxes,nbr_tols))
@@ -138,6 +140,101 @@ SUBROUTINE Estimate_Pair_rminsq
                 END DO
                 !$OMP END DO
                 !$OMP END PARALLEL
+                this_unit = emax_file_unit
+                this_path = ""
+                this_path = TRIM(run_name) // ".rsqmin.wmax"
+                OPEN(unit=this_unit,file=this_path,ACTION='WRITE')
+                DO ibox = 1, nbr_boxes
+                        WRITE(this_unit,*) "Box ", ibox
+                        DO ti_solute = 1, wsolute_maxind
+                                WRITE(this_unit,*)
+                                WRITE(this_unit,*) "Solute atom ", ti_solute
+                                DO ti_solvent = 1, solvent_maxind
+                                        WRITE(this_unit,*) rsqmin_atompair_w_max(:,ti_solvent,ti_solute,ibox)
+                                END DO
+                        END DO
+                END DO
+                WRITE(this_unit,*) "END"
+                CLOSE(this_unit)
+                DO ibox = 1, nbr_boxes
+                        DO is = 1, nspecies
+                                IF (.NOT. species_list(is)%test_particle(ibox)) CYCLE
+                                bsolute = species_list(is)%wsolute_base
+                                rsqmin_atompair_w_max(:,:,bsolute+1:bsolute+natoms(is),ibox) = &
+                                        rsqmin_atompair_w_max(:,:,bsolute+1:bsolute+natoms(is),ibox) / &
+                                        species_list(is)%widom_sum(ibox)
+                        END DO
+                END DO
+                this_path = ""
+                this_path = TRIM(run_name) // ".rsqmin.wmax_frac"
+                OPEN(unit=this_unit,file=this_path,ACTION='WRITE')
+                DO ibox = 1, nbr_boxes
+                        WRITE(this_unit,*) "Box ", ibox
+                        DO ti_solute = 1, wsolute_maxind
+                                WRITE(this_unit,*)
+                                WRITE(this_unit,*) "Solute atom ", ti_solute
+                                DO ti_solvent = 1, solvent_maxind
+                                        WRITE(this_unit,*) rsqmin_atompair_w_max(:,ti_solvent,ti_solute,ibox)
+                                END DO
+                        END DO
+                END DO
+                WRITE(this_unit,*) "END"
+                CLOSE(this_unit)
+                DO ibox = 1, nbr_boxes
+                        DO is = 1, nspecies
+                                IF (.NOT. species_list(is)%test_particle(ibox)) CYCLE
+                                bsolute = species_list(is)%wsolute_base
+                                rsqmin_atompair_w_max(:,:,bsolute+1:bsolute+natoms(is),ibox) = &
+                                        rsqmin_atompair_w_max(:,:,bsolute+1:bsolute+natoms(is),ibox) * &
+                                        ntrials(is,ibox)%widom
+                        END DO
+                END DO
+                this_path = ""
+                this_path = TRIM(run_name) // ".rsqmin.wmax_norm"
+                OPEN(unit=this_unit,file=this_path,ACTION='WRITE')
+                DO ibox = 1, nbr_boxes
+                        WRITE(this_unit,*) "Box ", ibox
+                        DO ti_solute = 1, wsolute_maxind
+                                WRITE(this_unit,*)
+                                WRITE(this_unit,*) "Solute atom ", ti_solute
+                                DO ti_solvent = 1, solvent_maxind
+                                        WRITE(this_unit,*) rsqmin_atompair_w_max(:,ti_solvent,ti_solute,ibox)
+                                END DO
+                        END DO
+                END DO
+                WRITE(this_unit,*) "END"
+                CLOSE(this_unit)
+                this_path = ""
+                this_path = TRIM(run_name) // ".rsqmin.wfrac"
+                OPEN(unit=this_unit,file=this_path,ACTION='WRITE')
+                DO ibox = 1, nbr_boxes
+                        WRITE(this_unit,*) "Box ", ibox
+                        DO ti_solute = 1, wsolute_maxind
+                                WRITE(this_unit,*)
+                                WRITE(this_unit,*) "Solute atom ", ti_solute
+                                DO ti_solvent = 1, solvent_maxind
+                                        WRITE(this_unit,*) rsqmin_atompair_wfrac(:,ti_solvent,ti_solute,ibox)
+                                END DO
+                        END DO
+                END DO
+                WRITE(this_unit,*) "END"
+                CLOSE(this_unit)
+                this_path = ""
+                this_path = TRIM(run_name) // ".rsqmin.freq"
+                OPEN(unit=this_unit,file=this_path,ACTION='WRITE')
+                DO ibox = 1, nbr_boxes
+                        WRITE(this_unit,*) "Box ", ibox
+                        DO ti_solute = 1, wsolute_maxind
+                                WRITE(this_unit,*)
+                                WRITE(this_unit,*) "Solute atom ", ti_solute
+                                DO ti_solvent = 1, solvent_maxind
+                                        WRITE(this_unit,*) rsqmin_atompair_freq(:,ti_solvent,ti_solute,ibox)
+                                END DO
+                        END DO
+                END DO
+                WRITE(this_unit,*) "END"
+                CLOSE(this_unit)
+
 
                 DO ibox = 1, nbr_boxes
                         DO is = 1, nspecies
@@ -200,6 +297,13 @@ SUBROUTINE Read_Pair_rminsq
                 REAL(DP) :: this_rcut_lowsq, this_rsqmin_step, this_rsqmin_shifter, this_tol, min_rmin
                 INTEGER :: this_solvent_maxind, this_wsolute_maxind, this_nbr_boxes
                 INTEGER, DIMENSION(:), POINTER :: atompair_rminsq_ind_table_ptr
+                INTEGER, DIMENSION(wsolute_maxind) :: ti_which_big_atom, big_atom_ti_list
+                INTEGER :: ifrag, ia_frag, ia_frag_ti, biggest_atom, biggest_atom_ti, i_big_atom, is, wsb
+                REAL(DP) :: ia_frag_rminsq_sum, biggest_atom_rminsq_sum
+
+
+
+
                 this_unit = emax_file_unit
                 IF (.NOT. ALLOCATED(atompair_rminsq_ind_table)) THEN
                         ALLOCATE(atompair_rminsq_ind_table(solvent_maxind,wsolute_maxind,nbr_boxes,nbr_tols))
@@ -239,8 +343,59 @@ SUBROUTINE Read_Pair_rminsq
                         * this_rsqmin_step + this_rsqmin_shifter)
                 atompair_rminsq_table = REAL(atompair_rminsq_ind_table(:,:,:,1),DP) * this_rsqmin_step + this_rsqmin_shifter
                 min_rmin = SQRT(MINVAL(atompair_rminsq_table))
+                n_big_atoms = 0
+                IF (cavity_biasing_flag) THEN
+                        ti_which_big_atom = -999999
+                        DO is = 1, nspecies
+                                IF (.NOT. species_list(is)%l_wsolute) CYCLE
+                                wsb = species_list(is)%wsolute_base
+                                DO ifrag = 1, nfragments(is)
+                                        biggest_atom = 1
+                                        biggest_atom_ti = frag_list(ifrag,is)%atoms(1) + wsb
+                                        biggest_atom_rminsq_sum = SUM(atompair_rminsq_table(:,biggest_atom_ti,:))
+                                        DO ia_frag = 2, frag_list(ifrag,is)%natoms
+                                                ia_frag_ti = frag_list(ifrag,is)%atoms(ia_frag) + wsb
+                                                ia_frag_rminsq_sum = SUM(atompair_rminsq_table(:,ia_frag_ti,:))
+                                                IF (ia_frag_rminsq_sum>biggest_atom_rminsq_sum) THEN
+                                                        biggest_atom = ia_frag
+                                                        biggest_atom_ti = ia_frag_ti
+                                                        biggest_atom_rminsq_sum = ia_frag_rminsq_sum
+                                                END IF
+                                        END DO
+                                        i_big_atom = ti_which_big_atom(biggest_atom_ti)
+                                        IF (i_big_atom < 0) THEN
+                                                n_big_atoms = n_big_atoms+1
+                                                i_big_atom = n_big_atoms
+                                                ti_which_big_atom(biggest_atom_ti) = n_big_atoms
+                                                big_atom_ti_list(n_big_atoms) = biggest_atom_ti
+                                        END IF
+                                        frag_list(ifrag,is)%i_big_atom = i_big_atom
+                                        frag_list(ifrag,is)%ia_frag_big_atom = biggest_atom
+                                END DO
+                        END DO
+                END IF
+                ALLOCATE(solvent_max_rminsq(solvent_maxind,nbr_boxes))
+                ALLOCATE(solvent_min_rminsq(solvent_maxind,0:n_big_atoms,nbr_boxes))
+                ALLOCATE(solvent_max_rminsq_sp(solvent_maxind,nbr_boxes))
+                solvent_max_rminsq = MAXVAL(atompair_rminsq_table,2) ! 2-D result
+                solvent_min_rminsq(:,0,:) = MINVAL(atompair_rminsq_table,2) ! 2-D result
+                IF (cavity_biasing_flag) solvent_min_rminsq(:,1:n_big_atoms,:) = atompair_rminsq_table(:,big_atom_ti_list(1:n_big_atoms),:)
+                box_list%rcut_low_max = SQRT(MAXVAL(MAXVAL(solvent_min_rminsq,1),1))
+                box_list%ideal_bitcell_length = &
+                        MAX(min_ideal_bitcell_length,box_list%rcut_low_max/SQRT(902.0_DP)) ! vector with one element per box
+                sp_atompair_rminsq_table = REAL(atompair_rminsq_table,SP)
+                solvent_max_rminsq_sp = REAL(solvent_max_rminsq,SP)
+                solvents_or_types_maxind = solvent_maxind
                 WRITE(logunit,'(A,F6.3,A)') "Finished reading atompair rminsq table with atompair rmin values in the interval"
                 WRITE(logunit, '(8x,A,F5.3,A,F6.3,A)') "[", min_rmin, ",", max_rmin, "] Angstroms"
+                IF (bitcell_flag) THEN
+                        DO ibox = 1, nbr_boxes
+                                WRITE(logunit, '(A,F5.3,A)') "For box " // TRIM(Int_To_String(ibox)) // ", computed ideal bitcell length = ", &
+                                        box_list(ibox)%rcut_low_max/SQRT(902.0_DP), " Angstroms"
+                                WRITE(logunit, '(A,F5.3,A)') "Setting box " // TRIM(Int_To_String(ibox)) // " ideal bitcell length = ", &
+                                        box_list(ibox)%ideal_bitcell_length, " Angstroms"
+                        END DO
+                END IF
                 WRITE(logunit,'(A80)') "********************************************************************************"
         END SUBROUTINE Read_Pair_rminsq
 
diff --git a/Src/pair_nrg_routines.f90 b/Src/pair_nrg_routines.f90
index c6b8d5cf..f7f4cf93 100755
--- a/Src/pair_nrg_routines.f90
+++ b/Src/pair_nrg_routines.f90
@@ -44,6 +44,7 @@ MODULE Pair_Nrg_Routines
 
   USE Type_Definitions
   USE Global_Variables
+  !$ USE OMP_LIB
 
   IMPLICIT NONE
 
@@ -73,11 +74,7 @@ SUBROUTINE Get_Position_Alive(alive,is,position)
     INTEGER, INTENT(IN) :: alive, is
     INTEGER, INTENT(OUT) :: position
 
-    IF ( is == 1) THEN
-       position = alive
-    ELSE
-       position = SUM(max_molecules(1:is-1)) + alive
-    END IF
+    position = species_list(is)%superlocate_base + alive
 
   END SUBROUTINE Get_Position_Alive
   !********************************************************
@@ -163,19 +160,21 @@ SUBROUTINE Store_Molecule_Pair_Interaction_Arrays(alive,is,this_box,E_vdw,E_qq,
     INTEGER, OPTIONAL:: n_cls_mol, id_cls_mol(:), is_cls_mol(:)
     INTEGER, OPTIONAL :: box_cls_mol(:)
 
-    INTEGER :: n_mols, stride, position, imol
+    INTEGER :: n_mols, stride, imol
     REAL(DP), INTENT(OUT) :: E_vdw, E_qq
 
     REAL(DP), OPTIONAL, INTENT(OUT) :: box_nrg_vdw(:), box_nrg_qq(:)
 
     INTEGER :: locate_1, locate_2, this_species, this_im, locate_im
+    INTEGER :: vlen, sl_base, i_base, i
+    REAL(DP) :: nrg_vdw, nrg_qq
 
 
     IF ( .NOT. present(n_cls_mol)) THEN
        ! only a single molecule storage is necessary
        n_mols = 1
-       ALLOCATE(pair_vdw_temp(SUM(max_molecules)))
-       ALLOCATE(pair_qq_temp(SUM(max_molecules)))
+       ALLOCATE(pair_vdw_temp(SUM(nmols(:,this_box)),1))
+       ALLOCATE(pair_qq_temp(SUM(nmols(:,this_box)),1))
 
     ELSE
 
@@ -183,8 +182,8 @@ SUBROUTINE Store_Molecule_Pair_Interaction_Arrays(alive,is,this_box,E_vdw,E_qq,
        ! we will form an array that is n_cls_mol * SUM(max_molecules)
        n_mols = n_cls_mol
 
-       ALLOCATE(pair_vdw_temp(n_mols*SUM(max_molecules)))
-       ALLOCATE(pair_qq_temp(n_mols*SUM(max_molecules)))
+       ALLOCATE(pair_vdw_temp(MAXVAL(SUM(nmols(:,1:),1)),n_mols))
+       ALLOCATE(pair_qq_temp(MAXVAL(SUM(nmols(:,1:),1)),n_mols))
 
        IF ( present(box_cls_mol)) THEN
           box_nrg_vdw(:) = 0.0_DP
@@ -196,18 +195,10 @@ SUBROUTINE Store_Molecule_Pair_Interaction_Arrays(alive,is,this_box,E_vdw,E_qq,
     E_vdw = 0.0_DP
     E_qq = 0.0_DP
    
-    pair_vdw_temp(:) = 0.0_DP
-    pair_qq_temp(:) = 0.0_DP
-
-   
 
     DO imol = 1, n_mols
        
-       IF ( .NOT. present(n_cls_mol)) THEN
-       
-          CALL Get_Position_Alive(alive,is,locate_1)
-
-       ELSE
+       IF (present(n_cls_mol)) THEN
 
           alive = id_cls_mol(imol)
           is = is_cls_mol(imol)
@@ -215,41 +206,56 @@ SUBROUTINE Store_Molecule_Pair_Interaction_Arrays(alive,is,this_box,E_vdw,E_qq,
        END IF
 
        IF ( present(box_cls_mol) ) THEN
-
           this_box = box_cls_mol(imol)
           E_vdw = 0.0_DP
           E_qq = 0.0_DP
 
        END IF
           
-       CALL Get_Position_Alive(alive,is,locate_1)
+       !CALL Get_Position_Alive(alive,is,locate_1)
+       locate_1 = species_list(is)%superlocate_base+alive
 
        !Get_Position_Alive is used in conjunction with pair_vdw/pair_qq arrays
 
-       stride = (imol-1) * SUM(max_molecules)
+       i_base = 0
        
        speciesLoop: DO this_species = 1, nspecies
+                sl_base = species_list(this_species)%superlocate_base
+                vlen = nmols(this_species,this_box)
+                IF (vlen == 0) CYCLE
+                IF (open_mc_flag) THEN
+                        !$OMP PARALLEL
+                        !$OMP DO SIMD SCHEDULE(STATIC) PRIVATE(nrg_vdw,nrg_qq,locate_2) &
+                        !$OMP REDUCTION(+: E_vdw, E_qq)
+                        DO i = 1, vlen
+                                locate_2 = locate(i,this_species,this_box)+sl_base
+                                nrg_vdw = pair_nrg_vdw(locate_2,locate_1)
+                                nrg_qq = pair_nrg_qq(locate_2,locate_1)
+                                pair_vdw_temp(i_base+i,imol) = nrg_vdw
+                                pair_qq_temp(i_base+i,imol) = nrg_qq
+                                E_vdw = E_vdw + nrg_vdw
+                                E_qq = E_qq + nrg_qq
+                        END DO
+                        !$OMP END DO SIMD
+                        !$OMP END PARALLEL
+                ELSE
+                        IF (this_box > 1) sl_base = sl_base + SUM(nmols(this_species,1:this_box-1))
+                        !$OMP PARALLEL
+                        !$OMP DO SIMD SCHEDULE(STATIC) PRIVATE(nrg_vdw,nrg_qq) &
+                        !$OMP REDUCTION(+: E_vdw, E_qq)
+                        DO i = 1, vlen
+                                nrg_vdw = pair_nrg_vdw(i+sl_base,locate_1)
+                                nrg_qq = pair_nrg_qq(i+sl_base,locate_1)
+                                pair_vdw_temp(i_base+i,imol) = nrg_vdw
+                                pair_qq_temp(i_base+i,imol) = nrg_qq
+                                E_vdw = E_vdw + nrg_vdw
+                                E_qq = E_qq + nrg_qq
+                        END DO
+                        !$OMP END DO SIMD
+                        !$OMP END PARALLEL
+                END IF
+                i_base = i_base + vlen
           
-          molidLoop: DO this_im = 1, nmols(this_species, this_box)
-             
-             locate_im = locate(this_im,this_species,this_box)
-             
-             IF (molecule_list(locate_im,this_species)%live) THEN
-                   
-                   CALL Get_Position_Alive(locate_im,this_species,locate_2)
-                   
-                   position = locate_2 + stride
-                   
-                   pair_vdw_temp(position) = pair_nrg_vdw(locate_2,locate_1)
-                   pair_qq_temp(position) = pair_nrg_qq(locate_2,locate_1)
-                   
-                   E_vdw = E_vdw + pair_vdw_temp(position)
-                   E_qq = E_qq + pair_qq_temp(position)
-                   
-                
-             END IF
-             
-          END DO molidLoop
           
        END DO speciesLoop
 
@@ -290,6 +296,8 @@ SUBROUTINE Reset_Molecule_Pair_Interaction_Arrays(alive,is,this_box, n_cls_mol,
     INTEGER :: n_mols, imol, stride
 
     INTEGER :: locate_1, this_species, this_im, locate_im, locate_2
+    INTEGER :: vlen, sl_base, i_base, i
+    REAL(DP) :: nrg_vdw, nrg_qq
 
 
     IF ( present(n_cls_mol)) THEN
@@ -310,32 +318,47 @@ SUBROUTINE Reset_Molecule_Pair_Interaction_Arrays(alive,is,this_box, n_cls_mol,
        END IF
        
        
-       CALL Get_Position_Alive(alive,is,locate_1)
+       !CALL Get_Position_Alive(alive,is,locate_1)
+       locate_1 = species_list(is)%superlocate_base+alive
+       i_base = 0
 
-       stride = (imol - 1) * SUM(max_molecules)
-       
        DO this_species = 1, nspecies
-          
-          DO this_im = 1, nmols(this_species, this_box)
-             
-             locate_im = locate(this_im,this_species,this_box)
-             
-             IF (molecule_list(locate_im,this_species)%live) THEN
-                
-                   
-                   CALL Get_Position_Alive(locate_im,this_species,locate_2)
-                   
-                   pair_nrg_vdw(locate_1,locate_2) = pair_vdw_temp(locate_2 + stride)
-                   pair_nrg_vdw(locate_2,locate_1) = pair_vdw_temp(locate_2 + stride)
-                   
-                   pair_nrg_qq(locate_1,locate_2) = pair_qq_temp(locate_2 + stride)
-                   pair_nrg_qq(locate_2,locate_1) = pair_qq_temp(locate_2 + stride)
-                
-                
-             END IF
-             
-          END DO
-          
+                sl_base = species_list(this_species)%superlocate_base
+                vlen = nmols(this_species,this_box)
+                IF (vlen == 0) CYCLE
+                IF (open_mc_flag) THEN
+                        !$OMP PARALLEL WORKSHARE
+                        pair_nrg_vdw(locate(1:vlen,this_species,this_box)+sl_base,locate_1) = pair_vdw_temp(i_base+1:i_base+vlen,imol)
+                        pair_nrg_qq(locate(1:vlen,this_species,this_box)+sl_base,locate_1) = pair_qq_temp(i_base+1:i_base+vlen,imol)
+                        pair_nrg_vdw(locate_1,locate(1:vlen,this_species,this_box)+sl_base) = pair_vdw_temp(i_base+1:i_base+vlen,imol)
+                        pair_nrg_qq(locate_1,locate(1:vlen,this_species,this_box)+sl_base) = pair_qq_temp(i_base+1:i_base+vlen,imol)
+                        !$OMP END PARALLEL WORKSHARE
+                ELSE
+                        IF (this_box > 1) sl_base = sl_base + SUM(nmols(this_species,1:this_box-1))
+                        !$OMP PARALLEL
+                        !$OMP DO SIMD SCHEDULE(STATIC) PRIVATE(nrg_vdw,nrg_qq)
+                        DO i = 1, locate_1-sl_base-1
+                                nrg_vdw = pair_vdw_temp(i_base+i,imol)
+                                nrg_qq = pair_qq_temp(i_base+i,imol)
+                                pair_nrg_vdw(i+sl_base,locate_1) = nrg_vdw
+                                pair_nrg_qq(i+sl_base,locate_1) = nrg_qq
+                                pair_nrg_vdw(locate_1,i+sl_base) = nrg_vdw
+                                pair_nrg_qq(locate_1,i+sl_base) = nrg_qq
+                        END DO
+                        !$OMP END DO SIMD NOWAIT
+                        !$OMP DO SIMD SCHEDULE(STATIC) PRIVATE(nrg_vdw,nrg_qq)
+                        DO i = locate_1-sl_base+1, vlen
+                                nrg_vdw = pair_vdw_temp(i_base+i,imol)
+                                nrg_qq = pair_qq_temp(i_base+i,imol)
+                                pair_nrg_vdw(i+sl_base,locate_1) = nrg_vdw
+                                pair_nrg_qq(i+sl_base,locate_1) = nrg_qq
+                                pair_nrg_vdw(locate_1,i+sl_base) = nrg_vdw
+                                pair_nrg_qq(locate_1,i+sl_base) = nrg_qq
+                        END DO
+                        !$OMP END DO SIMD
+                        !$OMP END PARALLEL
+                END IF
+                i_base = i_base + vlen
        END DO
        
     END DO
diff --git a/Src/participation.f90 b/Src/participation.f90
index 59ae4feb..ab7d1791 100755
--- a/Src/participation.f90
+++ b/Src/participation.f90
@@ -1185,7 +1185,6 @@ SUBROUTINE Write_Ring_Fragment_MCF_Dihedral_Info(ifrag,is)
        !        dihedral_list(this_dihedral,is)%dihedral_param(7)/kjmol_to_atomic, &
        !        dihedral_list(this_dihedral,is)%dihedral_param(8), &
        !        dihedral_list(this_dihedral,is)%dihedral_param(9) * (180_DP/PI)
-               
        CASE(int_harmonic)
           
           WRITE(201,'(I5,2X, 4(I4,2X), A8,2X, 2(F8.3,2X))') i, &
@@ -1212,9 +1211,6 @@ SUBROUTINE Write_Ring_Fragment_MCF_Dihedral_Info(ifrag,is)
                dihedral_list(this_dihedral,is)%rb_c(5)/kjmol_to_atomic
        END SELECT
 
-          
-       !END IF
-
     END DO
 
   END SUBROUTINE Write_Ring_Fragment_MCF_Dihedral_Info
diff --git a/Src/pregen_control.f90 b/Src/pregen_control.f90
index 26b2b7bc..40992b46 100644
--- a/Src/pregen_control.f90
+++ b/Src/pregen_control.f90
@@ -36,6 +36,8 @@ SUBROUTINE Pregen_Control
         ! Determine whether widom insertions are done and get relevant details if they are
         CALL Get_Widom_Info
 
+        CALL Get_Lookup_Info
+
         ! Load molecular connectivity and force field parameters.  Note that Get_Nspecies
         ! must be called before this routine
         CALL Get_Molecule_Info
@@ -92,8 +94,6 @@ SUBROUTINE Pregen_Control
         ! Connect to pregenerated trajectory files and get other related settings, if any
         CALL Get_Pregen_Info
 
-        CALL Get_Lookup_Info
-
         CALL Setup_Atompair_Tables
         
 
diff --git a/Src/pregen_driver.f90 b/Src/pregen_driver.f90
index f4bf3d1b..293beeed 100755
--- a/Src/pregen_driver.f90
+++ b/Src/pregen_driver.f90
@@ -40,6 +40,7 @@ SUBROUTINE Pregen_Driver
   USE Energy_Routines
   USE Read_Write_Checkpoint
   USE Simulation_Properties
+  USE Trajectory_Reader_Routines
   USE XTC_Routines, ONLY : Close_XTC
 
   IMPLICIT NONE
@@ -52,7 +53,7 @@ SUBROUTINE Pregen_Driver
   REAL(DP) :: rand_no
   REAL(DP) :: time_start, now_time, thermo_time, coord_time, block_avg_time
 
-  LOGICAL :: overlap, early_end
+  LOGICAL :: overlap
   LOGICAL :: write_flag, complete
 
   TYPE(Energy_Class) :: energy_old
@@ -118,7 +119,7 @@ SUBROUTINE Pregen_Driver
      ! Load and advance to next frame
      !*****************************************************************************
 
-     CALL Load_Next_Frame(early_end)
+     CALL Load_Next_Frame
 
      IF (early_end) THEN
              WRITE(logunit,*)
diff --git a/Src/random_generators.f90 b/Src/random_generators.f90
index 9eae4740..527d29e8 100755
--- a/Src/random_generators.f90
+++ b/Src/random_generators.f90
@@ -31,7 +31,7 @@ MODULE random_generators
 ! of the L'Ecuyer Randomb number generator
 ! Modified by Andrew Paluch, 1 March 2009.
 USE ISO_FORTRAN_ENV
-USE Type_Definitions, ONLY : DP
+USE Type_Definitions
 !$ USE OMP_LIB
 IMPLICIT NONE
 ! The intrinsic function "selected_real_kind" takes two arguments. The first is the number 
@@ -41,7 +41,13 @@ MODULE random_generators
 ! values in case the seeds are not initialized by the user
 INTEGER (KIND=INT64), SAVE  :: s1 = 153587801, s2 = -759022222, s3 = 1288503317, &
                             s4 = -1718083407, s5 = -123456789
-!$OMP threadprivate(s1,s2,s3,s4,s5)
+INTEGER(INT64), DIMENSION(dimpad_8byte,5) :: s_arr
+INTEGER(INT64), PRIVATE, PARAMETER :: const1 = -2_INT64, &
+        const2 = -512_INT64, &
+        const3 = -4096_INT64, &
+        const4 = -131072_INT64, &
+        const5 = -8388608_INT64
+!$OMP threadprivate(s1,s2,s3,s4,s5,s_arr)
 
 CONTAINS
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -55,16 +61,21 @@ SUBROUTINE init_seeds(i1, i3)
 !
 ! For our simulations, we will specify only two of the five seeds, and leave the others
 ! as the default values
+! The above statement does not apply to SIMD lane-specific seeds
 
 IMPLICIT NONE
-INTEGER (KIND=INT64), INTENT(IN) :: i1, i3
+INTEGER (KIND=INT64), INTENT(IN), OPTIONAL :: i1, i3
 INTEGER (KIND=INT64), ALLOCATABLE, DIMENSION(:,:) :: thread_seeds
-INTEGER :: nthreads, ithread
+INTEGER :: nthreads, ithread, i, j
 nthreads = 1
-s1 = i1
-s3 = i3
-IF (IAND(s1,      -2_INT64) == 0) s1 = i1 - 8388607_INT64
-IF (IAND(s3,   -4096_INT64) == 0) s3 = i3 - 8388607_INT64
+IF (PRESENT(i1)) THEN
+        s1 = i1
+        IF (IAND(s1,      const1) == 0) s1 = i1 - 8388607_INT64
+END IF
+IF (PRESENT(i3)) THEN
+        s3 = i3
+        IF (IAND(s3,   const3) == 0) s3 = i3 - 8388607_INT64
+END IF
 !$ nthreads = OMP_GET_MAX_THREADS()
 ALLOCATE(thread_seeds(5,0:nthreads-1))
 thread_seeds(1,0) = s1
@@ -77,15 +88,30 @@ SUBROUTINE init_seeds(i1, i3)
         thread_seeds(3,ithread) = rranint()
 END DO
 ithread = 0
-!$OMP PARALLEL PRIVATE(ithread)
+!$OMP PARALLEL PRIVATE(ithread,i,j)
 !$ ithread = OMP_GET_THREAD_NUM()
 s1 = thread_seeds(1,ithread)
 s2 = thread_seeds(2,ithread)
 s3 = thread_seeds(3,ithread)
 s4 = thread_seeds(4,ithread)
 s5 = thread_seeds(5,ithread)
-IF (IAND(s1,      -2_INT64) == 0) s1 = s1 - 8388607_INT64
-IF (IAND(s3,   -4096_INT64) == 0) s3 = s3 - 8388607_INT64
+IF (IAND(s1,      const1) == 0) s1 = s1 - 8388607_INT64
+IF (IAND(s3,   const3) == 0) s3 = s3 - 8388607_INT64
+s_arr = 0_INT64
+DO WHILE (ANY(IAND(s_arr,MASKL(41,INT64))+1_INT64==1_INT64))
+        DO j = 1, 5
+                DO i = 1, dimpad_8byte
+                        s_arr(i,j) = rranint()
+                END DO
+        END DO
+END DO
+s1 = thread_seeds(1,ithread)
+s2 = thread_seeds(2,ithread)
+s3 = thread_seeds(3,ithread)
+s4 = thread_seeds(4,ithread)
+s5 = thread_seeds(5,ithread)
+IF (IAND(s1,      const1) == 0) s1 = s1 - 8388607_INT64
+IF (IAND(s3,   const3) == 0) s3 = s3 - 8388607_INT64
 !$OMP END PARALLEL
 RETURN
 END SUBROUTINE init_seeds
@@ -104,15 +130,15 @@ FUNCTION rranf()
 INTEGER (KIND=INT64) :: b
 
 b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53_INT64)
-s1 = IEOR( ISHFT( IAND(s1,-2_INT64), 10), b)
+s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
 b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
-s2 = IEOR( ISHFT( IAND(s2,-512_INT64), 5), b)
+s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
 b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
-s3 = IEOR( ISHFT( IAND(s3,-4096_INT64), 29), b)
+s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
 b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
-s4 = IEOR( ISHFT( IAND(s4,-131072_INT64), 23), b)
+s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
 b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
-s5 = IEOR( ISHFT( IAND(s5,-8388608_INT64), 8), b)
+s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
 
 ! pconst is the reciprocal of (2^64 - 1)
 rranf = IEOR( IEOR( IEOR( IEOR(s1,s2), s3), s4), s5) *5.4210108624275221E-20_DP + 0.5_DP
@@ -120,6 +146,1155 @@ FUNCTION rranf()
 IF(rranf .GE. 1.0_DP) WRITE(logunit,*) 'rranf = 1.0'
 
 END FUNCTION rranf
+SUBROUTINE vector_rranint(rranint_vec)
+
+        IMPLICIT NONE
+
+        INTEGER (KIND=INT64) :: b
+        INTEGER(INT64), DIMENSION(:), CONTIGUOUS :: rranint_vec
+        INTEGER :: vec_len, jmax, lenmod4, i, j
+        INTEGER(INT64) :: s1,s2,s3,s4,s5
+        vec_len = SIZE(rranint_vec)
+        jmax = SHIFTR(vec_len,log2dimpad_8byte)-1
+        lenmod4 = IAND(vec_len,padconst_8byte)
+
+        !DIR$ VECTOR ALIGNED
+        !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5)
+        DO i = 1, dimpad_8byte
+                s1 = s_arr(i,1)
+                s2 = s_arr(i,2)
+                s3 = s_arr(i,3)
+                s4 = s_arr(i,4)
+                s5 = s_arr(i,5)
+                !DIR$ LOOP COUNT = 250000
+                DO j = 0, jmax
+                        b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                        b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                        b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                        b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                        b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+
+                        rranint_vec(j*dimpad_8byte+i) = IEOR( IEOR( IEOR( IEOR(s1,s2), s3), s4), s5)
+                END DO
+                s_arr(i,1) = s1
+                s_arr(i,2) = s2
+                s_arr(i,3) = s3
+                s_arr(i,4) = s4
+                s_arr(i,5) = s5
+        END DO
+        !$OMP END SIMD
+        DO i = 1, lenmod4
+                rranint_vec((jmax+1)*4+i) = rranint()
+        END DO
+
+END SUBROUTINE vector_rranint
+
+SUBROUTINE vector_rranf(rranf_vec)
+
+        IMPLICIT NONE
+
+        INTEGER (KIND=INT64) :: b
+        REAL(DP), DIMENSION(:), CONTIGUOUS :: rranf_vec
+        INTEGER :: vec_len, jmax, lenmod4, i, j
+        INTEGER(INT64) :: s1,s2,s3,s4,s5,intres
+        REAL(DP), PARAMETER :: one_dp = 1.0_DP
+        vec_len = SIZE(rranf_vec)
+        jmax = SHIFTR(vec_len,log2dimpad_8byte)-1
+        lenmod4 = IAND(vec_len,padconst_8byte)
+
+        !DIR$ VECTOR ALIGNED
+        !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres) SIMDLEN(dimpad_8byte)
+        DO i = 1, dimpad_8byte
+                s1 = s_arr(i,1)
+                s2 = s_arr(i,2)
+                s3 = s_arr(i,3)
+                s4 = s_arr(i,4)
+                s5 = s_arr(i,5)
+                !DIR$ LOOP COUNT = 250
+                DO j = 0, jmax
+                        b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                        b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                        intres = IEOR(s1,s2)
+                        b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                        intres = IEOR(intres,s3)
+                        b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                        intres = IEOR(intres,s4)
+                        b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                        intres = IEOR(intres,s5)
+
+                        ! We don't use the following line because conversion from INT64 to DP is not vectorized well unless
+                        ! you have AVX-512, which we don't, and neither do any AMD processors, currently.
+                        !rranf_vec(j*4+i) = IEOR( IEOR( IEOR( IEOR(s1,s2), s3), s4), s5)*5.4210108624275221E-20_DP + 0.5_DP
+                        !intres = IEOR(IEOR(IEOR(IEOR(s1,s2),s3),s4),s5)
+                        rranf_vec(j*dimpad_8byte+i) = TRANSFER(IOR(TRANSFER(1.0_DP,intres),ISHFT(intres,-12)),1.0_DP)-1.0_DP
+                END DO
+                s_arr(i,1) = s1
+                s_arr(i,2) = s2
+                s_arr(i,3) = s3
+                s_arr(i,4) = s4
+                s_arr(i,5) = s5
+        END DO
+        !$OMP END SIMD
+        DO i = 1, lenmod4
+                rranf_vec((jmax+1)*dimpad_8byte+i) = rranf()
+        END DO
+
+END SUBROUTINE vector_rranf
+
+SUBROUTINE cavity_biased_rranf(rranf_arr,i_big_atom,ibox)
+
+        USE Global_Variables, ONLY: box_list, cavdatalist, l_compress
+
+        IMPLICIT NONE
+
+        INTEGER (KIND=INT64) :: b
+        REAL(DP), DIMENSION(:,:), CONTIGUOUS, INTENT(OUT) :: rranf_arr
+        INTEGER, INTENT(IN) :: i_big_atom, ibox
+        INTEGER, DIMENSION(SIZE(rranf_arr,1)) :: cavlocvec_int32
+        INTEGER(INT64), DIMENSION(SIZE(rranf_arr,1)) :: cavlocvec
+        REAL(DP) :: lbcr(3), cavxyzloc, ncavs_dp
+        INTEGER :: vec_len, lenmod4, i, j, jmax
+        INTEGER(INT64) :: s1,s2,s3,s4,s5,intres
+        INTEGER(INT64) :: cavloc, ncavs
+        INTEGER(INT32) :: cavloc_int32, cavxyzloc_int32
+
+        INTEGER(INT64), PARAMETER :: one_dp_as_int = TRANSFER(1.0_DP,const1)
+        INTEGER(INT64), PARAMETER :: ncavs_threshold = SHIFTL(1_INT64,31)
+        INTEGER(INT32), PARAMETER :: adjustment_mask_int32 = &
+                IOR(IOR(1,SHIFTL(1,11)),SHIFTL(1,21))
+        INTEGER(INT64), PARAMETER :: adjustment_mask_int64 = &
+                IOR(IOR(1_INT64,SHIFTL(1_INT64,21)),SHIFTL(1_INT64,42))
+        INTEGER :: ncavs_fine_int32, excess_cavloc_int32, excess_mod8_int32, adjustment_int32
+        INTEGER(INT64) :: ncavs_fine, excess_cavloc, excess_mod8_int64, adjustment_int64
+        LOGICAL(8) :: l_coarse
+
+
+        vec_len = SIZE(rranf_arr,1)
+        jmax = SHIFTR(vec_len,log2dimpad_8byte)-1
+        ! This subroutine is only valid for vec_vlen as a multiple of dimpad_8byte, so no remainder.
+        ! At time of writing, it is only used in a case in which vec_vlen is padded to a multple of dimpad_4byte.
+        !lenmod4 = IAND(vec_len,3_INT32)
+
+        ncavs_dp = cavdatalist(i_big_atom,ibox)%ncavs_dp
+        lbcr = 1.0_DP/REAL(box_list(ibox)%length_bitcells,DP)
+
+        ncavs = cavdatalist(i_big_atom,ibox)%ncavs
+        IF (l_compress .AND. ncavs<ncavs_threshold) THEN
+                ncavs_fine_int32 = INT(cavdatalist(i_big_atom,ibox)%ncavs_fine,INT32)
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres,cavloc_int32,cavxyzloc) SIMDLEN(dimpad_8byte)
+                DO i = 1, dimpad_8byte
+                        s1 = s_arr(i,1)
+                        s2 = s_arr(i,2)
+                        s3 = s_arr(i,3)
+                        s4 = s_arr(i,4)
+                        s5 = s_arr(i,5)
+                        !DIR$ LOOP COUNT = 256
+                        DO j = 0, jmax
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres, one_dp_as_int)
+                                ! only using cavxyzloc because it's a float, despite it not being used the same as in later steps
+                                cavxyzloc = ncavs_dp
+                                cavxyzloc = TRANSFER(intres,cavxyzloc)*cavxyzloc - cavxyzloc
+                                ! cavxyzloc is now random float in range [0.0,ncavs_dp)
+                                ! convert cavxyzloc to 32-bit integer in range [0,ncavs-1]
+                                cavloc_int32 = INT(cavxyzloc,INT32) ! now we have the randomly chosen cavity voxel index
+                                cavlocvec_int32(j*dimpad_8byte+i) = cavloc_int32
+                        END DO
+                        s_arr(i,1) = s1
+                        s_arr(i,2) = s2
+                        s_arr(i,3) = s3
+                        s_arr(i,4) = s4
+                        s_arr(i,5) = s5
+                END DO
+                !$OMP END SIMD
+        ELSE IF (l_compress) THEN
+                ncavs_fine = cavdatalist(i_big_atom,ibox)%ncavs_fine
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres,cavloc,cavxyzloc) SIMDLEN(dimpad_8byte)
+                DO i = 1, dimpad_8byte
+                        s1 = s_arr(i,1)
+                        s2 = s_arr(i,2)
+                        s3 = s_arr(i,3)
+                        s4 = s_arr(i,4)
+                        s5 = s_arr(i,5)
+                        !DIR$ LOOP COUNT = 256
+                        DO j = 0, jmax
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres, one_dp_as_int)
+                                ! only using cavxyzloc because it's a float, despite it not being used the same as in later steps
+                                cavxyzloc = ncavs_dp
+                                cavxyzloc = TRANSFER(intres,cavxyzloc)*cavxyzloc - cavxyzloc
+                                ! cavxyzloc is now random float in range [0.0,ncavs_dp)
+                                ! convert cavxyzloc to 32-bit integer in range [0,ncavs-1]
+                                cavloc = INT(cavxyzloc,INT64) ! now we have the randomly chosen cavity voxel index
+                                cavlocvec(j*dimpad_8byte+i) = cavloc
+                        END DO
+                        s_arr(i,1) = s1
+                        s_arr(i,2) = s2
+                        s_arr(i,3) = s3
+                        s_arr(i,4) = s4
+                        s_arr(i,5) = s5
+                END DO
+                !$OMP END SIMD
+        END IF
+        IF (box_list(ibox)%l_cavloc_int32 .AND. l_compress) THEN
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(cavloc_int32,excess_cavloc_int32,excess_mod8_int32,adjustment_int32)
+                DO i = 1, vec_len
+                        cavloc_int32 = cavlocvec_int32(i)
+                        excess_cavloc_int32 = MAX(cavloc_int32-ncavs_fine_int32,0)
+                        cavloc_int32 = MIN(cavloc_int32,ncavs_fine_int32)
+                        cavloc_int32 = cavloc_int32 + SHIFTR(excess_cavloc_int32,3)
+                        cavloc_int32 = cavdatalist(i_big_atom,ibox)%cavity_locs_int32(cavloc_int32) ! fetch voxel coordinates from random index
+                        excess_mod8_int32 = IAND(excess_cavloc_int32,7_INT32)
+                        adjustment_int32 = IOR(excess_mod8_int32,SHIFTL(excess_mod8_int32,10))
+                        adjustment_int32 = IOR(adjustment_int32,SHIFTL(excess_mod8_int32,19))
+                        adjustment_int32 = IAND(adjustment_int32,adjustment_mask_int32)
+                        cavloc_int32 = IOR(cavloc_int32, adjustment_int32)
+                        cavlocvec_int32(i) = cavloc_int32
+                END DO
+                !$OMP END SIMD
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres,cavloc_int32,cavxyzloc,cavxyzloc_int32) SIMDLEN(dimpad_8byte)
+                DO i = 1, dimpad_8byte
+                        s1 = s_arr(i,1)
+                        s2 = s_arr(i,2)
+                        s3 = s_arr(i,3)
+                        s4 = s_arr(i,4)
+                        s5 = s_arr(i,5)
+                        !DIR$ LOOP COUNT = 256
+                        DO j = 0, jmax
+                                cavloc_int32 = cavlocvec_int32(j*dimpad_8byte+i)
+
+                                ! cavloc now stores the 3-D integer voxel grid coordinates of the chosen cavity voxel as a single
+                                ! 64-bit integer.
+                                ! The first-dimension coordinate is stored in bits 0-10, the second-dimension coordinate is stored
+                                ! in the next 10 bits (11-20), and the third-dimension coordinate is stored in bits 21-30.
+                                ! bit 31 should be zero
+
+
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use two bitshifts to zero all bits to the left of bit position 10 to yield only first coordinate
+                                cavxyzloc_int32 = SHIFTL(cavloc_int32,21)
+                                cavxyzloc_int32 = SHIFTR(cavxyzloc_int32,21)
+                                ! We want a random float in range [cavxyzloc_int32, cavxyzloc_int32+1).
+                                ! decrement by 1 because the random float we will add has range [1.0, 2.0) instead of [0.0,1.0)
+                                cavxyzloc_int32 = cavxyzloc_int32 - 1
+                                cavxyzloc = REAL(cavxyzloc_int32,DP) ! convert b to float
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                ! addition or IOR or IEOR works to combine exponent and significand fraction
+                                ! because the ones don't overlap and sign bit is 0
+                                intres = IOR(intres,one_dp_as_int) 
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in first dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(1)
+                                rranf_arr(j*dimpad_8byte+i,1) = cavxyzloc ! write first-dimension fractional coordinate to memory
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                ! use two bitshifts to move second-dimension coordinate to first 10 bits & clear other bits
+                                cavxyzloc_int32 = SHIFTL(cavloc_int32,11) ! discards bits to the left of second-dimension coordinate
+                                cavxyzloc_int32 = SHIFTR(cavxyzloc_int32,22) ! moves second-dimension coordinate to proper place while discarding bits to right
+                                cavxyzloc_int32 = cavxyzloc_int32 - 1
+                                cavxyzloc = REAL(cavxyzloc_int32,DP)
+                                ! Move third-dimension coordinate to first 10 bits and clear other bits.
+                                ! Shift in place this time to keep result in cavloc_int32 because the other coordinates were already
+                                ! extracted
+                                cavloc_int32 = SHIFTR(cavloc_int32,21) ! only one bitshift is needed because nothing is to the left
+                                cavloc_int32 = cavloc_int32 - 1
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in second dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(2)
+                                rranf_arr(j*dimpad_8byte+i,2) = cavxyzloc ! write second-dimension fractional coordinate to memory
+                                cavxyzloc = REAL(cavloc_int32,DP)
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in third dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(3)
+                                rranf_arr(j*dimpad_8byte+i,3) = cavxyzloc ! write third-dimension fractional coordinate to memory
+                        END DO
+                        s_arr(i,1) = s1
+                        s_arr(i,2) = s2
+                        s_arr(i,3) = s3
+                        s_arr(i,4) = s4
+                        s_arr(i,5) = s5
+                END DO
+                !$OMP END SIMD
+        ELSE IF (box_list(ibox)%l_cavloc_int32) THEN
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres,cavloc_int32,cavxyzloc,cavxyzloc_int32) SIMDLEN(dimpad_8byte)
+                DO i = 1, dimpad_8byte
+                        s1 = s_arr(i,1)
+                        s2 = s_arr(i,2)
+                        s3 = s_arr(i,3)
+                        s4 = s_arr(i,4)
+                        s5 = s_arr(i,5)
+                        !DIR$ LOOP COUNT = 256
+                        DO j = 0, jmax
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres, one_dp_as_int)
+                                ! only using cavxyzloc because it's a float, despite it not being used the same as in later steps
+                                cavxyzloc = ncavs_dp
+                                cavxyzloc = TRANSFER(intres,cavxyzloc)*cavxyzloc - cavxyzloc
+                                ! cavxyzloc is now random float in range [0.0,ncavs_dp)
+                                ! convert cavxyzloc to 32-bit integer in range [0,ncavs-1]
+                                cavloc_int32 = INT(cavxyzloc,INT32) ! now we have the randomly chosen cavity voxel index
+                                cavloc_int32 = cavdatalist(i_big_atom,ibox)%cavity_locs_int32(cavloc_int32) ! fetch voxel coordinates from random index
+
+                                ! cavloc now stores the 3-D integer voxel grid coordinates of the chosen cavity voxel as a single
+                                ! 64-bit integer.
+                                ! The first-dimension coordinate is stored in bits 0-10, the second-dimension coordinate is stored
+                                ! in the next 10 bits (11-20), and the third-dimension coordinate is stored in bits 21-30.
+                                ! bit 31 should be zero
+
+
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use two bitshifts to zero all bits to the left of bit position 10 to yield only first coordinate
+                                cavxyzloc_int32 = SHIFTL(cavloc_int32,21)
+                                cavxyzloc_int32 = SHIFTR(cavxyzloc_int32,21)
+                                ! We want a random float in range [cavxyzloc_int32, cavxyzloc_int32+1).
+                                ! decrement by 1 because the random float we will add has range [1.0, 2.0) instead of [0.0,1.0)
+                                cavxyzloc_int32 = cavxyzloc_int32 - 1
+                                cavxyzloc = REAL(cavxyzloc_int32,DP) ! convert b to float
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                ! addition or IOR or IEOR works to combine exponent and significand fraction
+                                ! because the ones don't overlap and sign bit is 0
+                                intres = IOR(intres,one_dp_as_int) 
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in first dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(1)
+                                rranf_arr(j*dimpad_8byte+i,1) = cavxyzloc ! write first-dimension fractional coordinate to memory
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                ! use two bitshifts to move second-dimension coordinate to first 10 bits & clear other bits
+                                cavxyzloc_int32 = SHIFTL(cavloc_int32,11) ! discards bits to the left of second-dimension coordinate
+                                cavxyzloc_int32 = SHIFTR(cavxyzloc_int32,22) ! moves second-dimension coordinate to proper place while discarding bits to right
+                                cavxyzloc_int32 = cavxyzloc_int32 - 1
+                                cavxyzloc = REAL(cavxyzloc_int32,DP)
+                                ! Move third-dimension coordinate to first 10 bits and clear other bits.
+                                ! Shift in place this time to keep result in cavloc_int32 because the other coordinates were already
+                                ! extracted
+                                cavloc_int32 = SHIFTR(cavloc_int32,21) ! only one bitshift is needed because nothing is to the left
+                                cavloc_int32 = cavloc_int32 - 1
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in second dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(2)
+                                rranf_arr(j*dimpad_8byte+i,2) = cavxyzloc ! write second-dimension fractional coordinate to memory
+                                cavxyzloc = REAL(cavloc_int32,DP)
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in third dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(3)
+                                rranf_arr(j*dimpad_8byte+i,3) = cavxyzloc ! write third-dimension fractional coordinate to memory
+                        END DO
+                        s_arr(i,1) = s1
+                        s_arr(i,2) = s2
+                        s_arr(i,3) = s3
+                        s_arr(i,4) = s4
+                        s_arr(i,5) = s5
+                END DO
+                !$OMP END SIMD
+        ELSE IF (l_compress) THEN
+                IF (ncavs<ncavs_threshold) THEN
+                        !DIR$ VECTOR ALIGNED
+                        !$OMP SIMD PRIVATE(cavloc_int32,excess_cavloc_int32,excess_mod8_int64,adjustment_int64,cavloc)
+                        DO i = 1, vec_len
+                                cavloc_int32 = cavlocvec_int32(i)
+                                excess_cavloc_int32 = MAX(cavloc_int32-ncavs_fine_int32,0)
+                                cavloc_int32 = MIN(cavloc_int32,ncavs_fine_int32)
+                                cavloc_int32 = cavloc_int32 + SHIFTR(excess_cavloc_int32,3)
+                                cavloc = cavdatalist(i_big_atom,ibox)%cavity_locs(cavloc_int32) ! fetch voxel coordinates from random index
+                                excess_mod8_int64 = INT(IAND(excess_cavloc_int32,7_INT32),INT64)
+                                adjustment_int64 = IOR(excess_mod8_int64,SHIFTL(excess_mod8_int64,20))
+                                adjustment_int64 = IOR(adjustment_int64,SHIFTL(excess_mod8_int64,40))
+                                adjustment_int64 = IAND(adjustment_int64,adjustment_mask_int64)
+                                cavloc = IOR(cavloc, adjustment_int64)
+                                cavlocvec(i) = cavloc
+                        END DO
+                        !$OMP END SIMD
+                ELSE
+                        !DIR$ VECTOR ALIGNED
+                        !$OMP SIMD PRIVATE(excess_cavloc,excess_mod8_int64,adjustment_int64,cavloc)
+                        DO i = 1, vec_len
+                                cavloc = cavlocvec(i)
+                                excess_cavloc = cavloc-ncavs_fine
+                                !l_coarse = cavloc>=ncavs_fine
+                                !excess_cavloc = MERGE(excess_cavloc,0_INT64,l_coarse)
+                                !cavloc = MERGE(ncavs_fine,cavloc,l_coarse)
+                                excess_cavloc = MAX(excess_cavloc,0_INT64)
+                                cavloc = MIN(cavloc,ncavs_fine)
+                                cavloc = cavloc + SHIFTR(excess_cavloc,3)
+                                cavloc = cavdatalist(i_big_atom,ibox)%cavity_locs(cavloc) ! fetch voxel coordinates from random index
+                                excess_mod8_int64 = IAND(excess_cavloc,7_INT64)
+                                adjustment_int64 = IOR(excess_mod8_int64,SHIFTL(excess_mod8_int64,20))
+                                adjustment_int64 = IOR(adjustment_int64,SHIFTL(excess_mod8_int64,40))
+                                adjustment_int64 = IAND(adjustment_int64,adjustment_mask_int64)
+                                cavloc = IOR(cavloc, adjustment_int64)
+                                cavlocvec(i) = cavloc
+                        END DO
+                        !$OMP END SIMD
+                END IF
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres,cavloc,cavxyzloc) SIMDLEN(dimpad_8byte)
+                DO i = 1, dimpad_8byte
+                        s1 = s_arr(i,1)
+                        s2 = s_arr(i,2)
+                        s3 = s_arr(i,3)
+                        s4 = s_arr(i,4)
+                        s5 = s_arr(i,5)
+                        !DIR$ LOOP COUNT = 256
+                        DO j = 0, jmax
+                                cavloc = cavlocvec(j*dimpad_8byte+i)
+
+                                ! cavloc now stores the 3-D integer voxel grid coordinates of the chosen cavity voxel as a single
+                                ! 64-bit integer.
+                                ! The first-dimension coordinate is stored in bits 0-20, the second-dimension coordinate is stored
+                                ! in the next 21 bits (21-41), and the third-dimension coordinate is stored in the remaining bits
+
+
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use two bitshifts to zero all bits to the left of bit position 20 to yield only first coordinate
+                                b = SHIFTL(cavloc,43)
+                                b = SHIFTR(b,43)
+                                ! We want a random float in range [b, b+1) (using current value of b)
+                                ! decrement b by 1 because the random float we will add has range [1.0, 2.0) instead of [0.0,1.0)
+                                b = b - 1_INT64
+                                cavxyzloc = REAL(INT(b,INT32),DP) ! convert b to float
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                ! addition or IOR or IEOR works to combine exponent and significand fraction
+                                ! because the ones don't overlap and sign bit is 0
+                                intres = IOR(intres,one_dp_as_int) 
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in first dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(1)
+                                rranf_arr(j*dimpad_8byte+i,1) = cavxyzloc ! write first-dimension fractional coordinate to memory
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                ! use two bitshifts to move second-dimension coordinate to first 21 bits & clear other bits
+                                b = SHIFTL(cavloc,22) ! discards bits to the left of second-dimension coordinate
+                                b = SHIFTR(b,43) ! moves second-dimension coordinate to proper place while discarding bits to right
+                                b = b - 1_INT64
+                                ! Move third-dimension coordinate to first 22 bits and clear other bits.
+                                ! Shift in place this time to keep result in cavloc because the other coordinates were already
+                                ! extracted and b is also used to hold an intermediate in random integer generation
+                                cavloc = SHIFTR(cavloc,42) ! only one bitshift is needed because nothing is to the left
+                                cavloc = cavloc - 1_INT64
+                                cavxyzloc = REAL(INT(b,INT32),DP)
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in second dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(2)
+                                rranf_arr(j*dimpad_8byte+i,2) = cavxyzloc ! write second-dimension fractional coordinate to memory
+                                cavxyzloc = REAL(INT(cavloc,INT32),DP)
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in third dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(3)
+                                rranf_arr(j*dimpad_8byte+i,3) = cavxyzloc ! write third-dimension fractional coordinate to memory
+                        END DO
+                        s_arr(i,1) = s1
+                        s_arr(i,2) = s2
+                        s_arr(i,3) = s3
+                        s_arr(i,4) = s4
+                        s_arr(i,5) = s5
+                END DO
+                !$OMP END SIMD
+        ELSE IF (ncavs<ncavs_threshold) THEN
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres,cavloc,cavxyzloc) SIMDLEN(dimpad_8byte)
+                DO i = 1, dimpad_8byte
+                        s1 = s_arr(i,1)
+                        s2 = s_arr(i,2)
+                        s3 = s_arr(i,3)
+                        s4 = s_arr(i,4)
+                        s5 = s_arr(i,5)
+                        !DIR$ LOOP COUNT = 256
+                        DO j = 0, jmax
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres, one_dp_as_int)
+                                ! only using cavxyzloc because it's a float, despite it not being used the same as in later steps
+                                cavxyzloc = ncavs_dp
+                                cavxyzloc = TRANSFER(intres,cavxyzloc)*cavxyzloc - cavxyzloc
+                                ! cavxyzloc is now random float in range [0.0,ncavs_dp)
+                                ! convert cavxyzloc to integer in range [0,ncavs-1]
+                                ! convert to 32-bit integer, then 64-bit integer because conversion from 64-bit REAL to 64-bit
+                                ! integer is not vectorized with AVX2. Since ncavs is small enough, this works.
+                                cavloc = INT(INT(cavxyzloc,INT32),INT64) ! now we have the randomly chosen cavity voxel index
+                                cavloc = cavdatalist(i_big_atom,ibox)%cavity_locs(cavloc) ! fetch voxel coordinates from random index
+
+                                ! cavloc now stores the 3-D integer voxel grid coordinates of the chosen cavity voxel as a single
+                                ! 64-bit integer.
+                                ! The first-dimension coordinate is stored in bits 0-20, the second-dimension coordinate is stored
+                                ! in the next 21 bits (21-41), and the third-dimension coordinate is stored in the remaining bits
+
+
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use two bitshifts to zero all bits to the left of bit position 20 to yield only first coordinate
+                                b = SHIFTL(cavloc,43)
+                                b = SHIFTR(b,43)
+                                ! We want a random float in range [b, b+1) (using current value of b)
+                                ! decrement b by 1 because the random float we will add has range [1.0, 2.0) instead of [0.0,1.0)
+                                b = b - 1_INT64
+                                cavxyzloc = REAL(INT(b,INT32),DP) ! convert b to float
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                ! addition or IOR or IEOR works to combine exponent and significand fraction
+                                ! because the ones don't overlap and sign bit is 0
+                                intres = IOR(intres,one_dp_as_int) 
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in first dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(1)
+                                rranf_arr(j*dimpad_8byte+i,1) = cavxyzloc ! write first-dimension fractional coordinate to memory
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                ! use two bitshifts to move second-dimension coordinate to first 21 bits & clear other bits
+                                b = SHIFTL(cavloc,22) ! discards bits to the left of second-dimension coordinate
+                                b = SHIFTR(b,43) ! moves second-dimension coordinate to proper place while discarding bits to right
+                                b = b - 1_INT64
+                                ! Move third-dimension coordinate to first 22 bits and clear other bits.
+                                ! Shift in place this time to keep result in cavloc because the other coordinates were already
+                                ! extracted and b is also used to hold an intermediate in random integer generation
+                                cavloc = SHIFTR(cavloc,42) ! only one bitshift is needed because nothing is to the left
+                                cavloc = cavloc - 1_INT64
+                                cavxyzloc = REAL(INT(b,INT32),DP)
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in second dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(2)
+                                rranf_arr(j*dimpad_8byte+i,2) = cavxyzloc ! write second-dimension fractional coordinate to memory
+                                cavxyzloc = REAL(INT(cavloc,INT32),DP)
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in third dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(3)
+                                rranf_arr(j*dimpad_8byte+i,3) = cavxyzloc ! write third-dimension fractional coordinate to memory
+                        END DO
+                        s_arr(i,1) = s1
+                        s_arr(i,2) = s2
+                        s_arr(i,3) = s3
+                        s_arr(i,4) = s4
+                        s_arr(i,5) = s5
+                END DO
+                !$OMP END SIMD
+        ELSE
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres,cavloc,cavxyzloc) SIMDLEN(dimpad_8byte)
+                DO i = 1, dimpad_8byte
+                        s1 = s_arr(i,1)
+                        s2 = s_arr(i,2)
+                        s3 = s_arr(i,3)
+                        s4 = s_arr(i,4)
+                        s5 = s_arr(i,5)
+                        !DIR$ LOOP COUNT = 256
+                        DO j = 0, jmax
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres, one_dp_as_int)
+                                ! only using cavxyzloc because it's a float, despite it not being used the same as in later steps
+                                cavxyzloc = ncavs_dp
+                                cavxyzloc = TRANSFER(intres,cavxyzloc)*cavxyzloc - cavxyzloc
+                                ! cavxyzloc is now random float in range [0.0,ncavs_dp)
+                                ! convert cavxyzloc to integer in range [0,ncavs-1]
+                                cavloc = INT(cavxyzloc,INT64) ! now we have the randomly chosen cavity voxel index
+                                cavloc = cavdatalist(i_big_atom,ibox)%cavity_locs(cavloc) ! fetch voxel coordinates from random index
+
+                                ! cavloc now stores the 3-D integer voxel grid coordinates of the chosen cavity voxel as a single
+                                ! 64-bit integer.
+                                ! The first-dimension coordinate is stored in bits 0-20, the second-dimension coordinate is stored
+                                ! in the next 21 bits (21-41), and the third-dimension coordinate is stored in the remaining bits
+
+
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use two bitshifts to zero all bits to the left of bit position 20 to yield only first coordinate
+                                b = SHIFTL(cavloc,43)
+                                b = SHIFTR(b,43)
+                                ! We want a random float in range [b, b+1) (using current value of b)
+                                ! decrement b by 1 because the random float we will add has range [1.0, 2.0) instead of [0.0,1.0)
+                                b = b - 1_INT64
+                                cavxyzloc = REAL(INT(b,INT32),DP) ! convert b to float
+
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                ! addition or IOR or IEOR works to combine exponent and significand fraction
+                                ! because the ones don't overlap and sign bit is 0
+                                intres = IOR(intres,one_dp_as_int) 
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in first dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(1)
+                                rranf_arr(j*dimpad_8byte+i,1) = cavxyzloc ! write first-dimension fractional coordinate to memory
+
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                ! use two bitshifts to move second-dimension coordinate to first 21 bits & clear other bits
+                                b = SHIFTL(cavloc,22) ! discards bits to the left of second-dimension coordinate
+                                b = SHIFTR(b,43) ! moves second-dimension coordinate to proper place while discarding bits to right
+                                b = b - 1_INT64
+                                ! Move third-dimension coordinate to first 22 bits and clear other bits.
+                                ! Shift in place this time to keep result in cavloc because the other coordinates were already
+                                ! extracted and b is also used to hold an intermediate in random integer generation
+                                cavloc = SHIFTR(cavloc,42) ! only one bitshift is needed because nothing is to the left
+                                cavloc = cavloc - 1_INT64
+                                cavxyzloc = REAL(INT(b,INT32),DP)
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in second dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(2)
+                                rranf_arr(j*dimpad_8byte+i,2) = cavxyzloc ! write second-dimension fractional coordinate to memory
+                                cavxyzloc = REAL(INT(cavloc,INT32),DP)
+
+                                b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53)
+                                s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                                b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                                s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                                intres = IEOR(s1,s2)
+                                b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                                s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                                intres = IEOR(intres,s3)
+                                b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                                s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                                intres = IEOR(intres,s4)
+                                b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                                s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                                intres = IEOR(intres,s5)
+                                ! use 52 most significant bits of random integer (intres) as significand fraction of DP float
+                                ! to obtain random floating point number in range [1.0, 2.0)
+                                intres = SHIFTR(intres,12)
+                                intres = IOR(intres,one_dp_as_int)
+                                ! add resulting random float to floating point representation of decremented voxel coordinate
+                                cavxyzloc = cavxyzloc + TRANSFER(intres,cavxyzloc)
+                                ! divide by box's length in voxels in third dimension to obtain fractional coordinate
+                                cavxyzloc = cavxyzloc*lbcr(3)
+                                rranf_arr(j*dimpad_8byte+i,3) = cavxyzloc ! write third-dimension fractional coordinate to memory
+                        END DO
+                        s_arr(i,1) = s1
+                        s_arr(i,2) = s2
+                        s_arr(i,3) = s3
+                        s_arr(i,4) = s4
+                        s_arr(i,5) = s5
+                END DO
+                !$OMP END SIMD
+        END IF
+
+END SUBROUTINE cavity_biased_rranf
+
+
+
+SUBROUTINE array_boxscan_rranf(rranf_arr,kappa_ins)
+
+        IMPLICIT NONE
+
+        INTEGER (KIND=INT64) :: b
+        REAL(DP), DIMENSION(:,:), CONTIGUOUS, INTENT(OUT) :: rranf_arr
+        INTEGER, INTENT(IN) :: kappa_ins
+        REAL(DP) :: zpart_width, zshift
+        INTEGER :: vec_len, lenmod4, i, j, n_zparts_p2, n_zparts, jmax1, jmax2
+        INTEGER(INT64) :: s1,s2,s3,s4,s5,intres
+
+        n_zparts = SHIFTR(kappa_ins,log2dimpad_8byte)
+        n_zparts_p2 = 31-LEADZ(n_zparts)
+        n_zparts = ISHFT(1_INT32,n_zparts_p2)
+        !n_zparts = IAND(n_zparts,NOT(MASKR(n_zparts_p2,INT32)))
+        jmax1 = n_zparts-1
+        zpart_width = TRANSFER(TRANSFER(1.0_DP,0_INT64)-ISHFT(INT(n_zparts_p2,INT64),52),zpart_width)
+        !zpart_width = 1.0_DP/n_zparts
+        vec_len = SIZE(rranf_arr,1)
+        jmax2 = SHIFTR(vec_len,log2dimpad_8byte)-1
+        lenmod4 = IAND(vec_len,padconst_8byte)
+
+        !DIR$ VECTOR ALIGNED
+        !$OMP SIMD PRIVATE(b,s1,s2,s3,s4,s5,intres)
+        DO i = 1, dimpad_8byte
+                s1 = s_arr(i,1)
+                s2 = s_arr(i,2)
+                s3 = s_arr(i,3)
+                s4 = s_arr(i,4)
+                s5 = s_arr(i,5)
+                zshift = zpart_width
+                !DIR$ LOOP COUNT = 256
+                DO j = 0, jmax1
+                        b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53_INT64)
+                        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                        b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                        intres = IEOR(s1,s2)
+                        b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                        intres = IEOR(intres,s3)
+                        b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                        intres = IEOR(intres,s4)
+                        b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                        intres = IEOR(intres,s5)
+
+                        ! We don't use the following line because conversion from INT64 to DP is not vectorized well unless
+                        ! you have AVX-512, which we don't, and neither do any AMD processors, currently.
+                        !rranf_vec(j*4+i) = IEOR( IEOR( IEOR( IEOR(s1,s2), s3), s4), s5)*5.4210108624275221E-20_DP + 0.5_DP
+                        !intres = IEOR(IEOR(IEOR(IEOR(s1,s2),s3),s4),s5)
+                        rranf_arr(j*dimpad_8byte+i,1) = TRANSFER(IOR(TRANSFER(1.0_DP,intres),ISHFT(intres,-12)),1.0_DP)-1.0_DP
+
+
+                        b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53_INT64)
+                        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                        b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                        intres = IEOR(s1,s2)
+                        b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                        intres = IEOR(intres,s3)
+                        b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                        intres = IEOR(intres,s4)
+                        b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                        intres = IEOR(intres,s5)
+                        rranf_arr(j*dimpad_8byte+i,2) = TRANSFER(IOR(TRANSFER(1.0_DP,intres),ISHFT(intres,-12)),1.0_DP)-1.0_DP
+
+                        b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53_INT64)
+                        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                        b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                        intres = IEOR(s1,s2)
+                        b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                        intres = IEOR(intres,s3)
+                        b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                        intres = IEOR(intres,s4)
+                        b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                        intres = IEOR(intres,s5)
+                        rranf_arr(j*dimpad_8byte+i,3) = TRANSFER(IOR(TRANSFER(zpart_width,intres),ISHFT(intres,-12)),1.0_DP)-zshift
+                        zshift = zshift-zpart_width
+                END DO
+                DO j = jmax1+1, jmax2
+                        b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53_INT64)
+                        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                        b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                        intres = IEOR(s1,s2)
+                        b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                        intres = IEOR(intres,s3)
+                        b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                        intres = IEOR(intres,s4)
+                        b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                        intres = IEOR(intres,s5)
+
+                        ! We don't use the following line because conversion from INT64 to DP is not vectorized well unless
+                        ! you have AVX-512, which we don't, and neither do any AMD processors, currently.
+                        !rranf_vec(j*4+i) = IEOR( IEOR( IEOR( IEOR(s1,s2), s3), s4), s5)*5.4210108624275221E-20_DP + 0.5_DP
+                        !intres = IEOR(IEOR(IEOR(IEOR(s1,s2),s3),s4),s5)
+                        rranf_arr(j*dimpad_8byte+i,1) = TRANSFER(IOR(TRANSFER(1.0_DP,intres),ISHFT(intres,-12)),1.0_DP)-1.0_DP
+
+
+                        b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53_INT64)
+                        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                        b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                        intres = IEOR(s1,s2)
+                        b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                        intres = IEOR(intres,s3)
+                        b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                        intres = IEOR(intres,s4)
+                        b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                        intres = IEOR(intres,s5)
+                        rranf_arr(j*dimpad_8byte+i,2) = TRANSFER(IOR(TRANSFER(1.0_DP,intres),ISHFT(intres,-12)),1.0_DP)-1.0_DP
+
+
+                        b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53_INT64)
+                        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
+                        b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
+                        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
+                        intres = IEOR(s1,s2)
+                        b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
+                        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
+                        intres = IEOR(intres,s3)
+                        b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
+                        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
+                        intres = IEOR(intres,s4)
+                        b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
+                        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
+                        intres = IEOR(intres,s5)
+                        rranf_arr(j*dimpad_8byte+i,3) = TRANSFER(IOR(TRANSFER(1.0_DP,intres),ISHFT(intres,-12)),1.0_DP)-1.0_DP
+                END DO
+                s_arr(i,1) = s1
+                s_arr(i,2) = s2
+                s_arr(i,3) = s3
+                s_arr(i,4) = s4
+                s_arr(i,5) = s5
+        END DO
+        !$OMP END SIMD
+        DO j = 1, 3
+                DO i = 1, lenmod4
+                        rranf_arr((jmax2+1)*dimpad_8byte+i,j) = rranf()
+                END DO
+        END DO
+
+END SUBROUTINE array_boxscan_rranf
 
 
   INTEGER(KIND=INT64) FUNCTION rranint() 
@@ -129,15 +1304,15 @@ INTEGER(KIND=INT64) FUNCTION rranint()
         INTEGER (KIND=INT64) :: b
 
         b  = ISHFT( IEOR( ISHFT(s1,1), s1), -53_INT64)
-        s1 = IEOR( ISHFT( IAND(s1,-2_INT64), 10), b)
+        s1 = IEOR( ISHFT( IAND(s1,const1), 10), b)
         b  = ISHFT( IEOR( ISHFT(s2,24), s2), -50)
-        s2 = IEOR( ISHFT( IAND(s2,-512_INT64), 5), b)
+        s2 = IEOR( ISHFT( IAND(s2,const2), 5), b)
         b  = ISHFT( IEOR( ISHFT(s3,3), s3), -23)
-        s3 = IEOR( ISHFT( IAND(s3,-4096_INT64), 29), b)
+        s3 = IEOR( ISHFT( IAND(s3,const3), 29), b)
         b  = ISHFT( IEOR( ISHFT(s4,5), s4), -24)
-        s4 = IEOR( ISHFT( IAND(s4,-131072_INT64), 23), b)
+        s4 = IEOR( ISHFT( IAND(s4,const4), 23), b)
         b  = ISHFT( IEOR( ISHFT(s5,3), s5), -33)
-        s5 = IEOR( ISHFT( IAND(s5,-8388608_INT64), 8), b)
+        s5 = IEOR( ISHFT( IAND(s5,const5), 8), b)
 
         rranint = IEOR( IEOR( IEOR( IEOR(s1,s2), s3), s4), s5)
 
diff --git a/Src/read_write_checkpoint.f90 b/Src/read_write_checkpoint.f90
index 1363d0c1..b3e69042 100755
--- a/Src/read_write_checkpoint.f90
+++ b/Src/read_write_checkpoint.f90
@@ -34,7 +34,7 @@ MODULE Read_Write_Checkpoint
   USE Global_Variables
   USE File_Names
   USE Simulation_Properties
-  USE Random_Generators, ONLY : s1,s2,s3,s4,s5, rranf
+  USE Random_Generators, ONLY : s1,s2,s3,s4,s5,s_arr, init_seeds, rranf
   USE IO_Utilities
   USE Internal_Coordinate_Routines
 
@@ -46,6 +46,7 @@ SUBROUTINE Write_Checkpoint
 
     INTEGER :: ibox, is, ii, jj, im, this_im, ia, nmolecules_is, this_box
     INTEGER :: total_molecules_is, this_unit, position
+    INTEGER :: i
 
     LOGICAL :: lopen
     REAL(DP) :: this_lambda = 1.0_DP
@@ -107,6 +108,10 @@ SUBROUTINE Write_Checkpoint
 
     WRITE(chkptunit,*) '**** SEEDS *******'
     WRITE(chkptunit,*) s1,s2,s3,s4,s5
+    WRITE(chkptunit,*) "SIMD " // TRIM(Int_To_String(dimpad_8byte))
+    DO i = 1, dimpad_8byte
+        WRITE(chkptunit,*) s_arr(i,1), s_arr(i,2), s_arr(i,3), s_arr(i,4), s_arr(i,5)
+    END DO
 
     WRITE(chkptunit,*) '******* Info for total number of molecules'
     ! write number of molecules of each of the species
@@ -127,9 +132,9 @@ SUBROUTINE Write_Checkpoint
              DO ia = 1, natoms(is)
 !                WRITE(chkptunit,'(A,T10,3(F15.10,1X),T70,I3)') nonbond_list(ia,is)%element, &
                 WRITE(chkptunit,*) nonbond_list(ia,is)%element, &
-                     atom_list(ia,this_im,is)%rxp, &
-                     atom_list(ia,this_im,is)%ryp, &
-                     atom_list(ia,this_im,is)%rzp, &
+                     atom_list(ia,this_im,is)%rp(1), &
+                     atom_list(ia,this_im,is)%rp(2), &
+                     atom_list(ia,this_im,is)%rp(3), &
                      this_box
              END DO
 
@@ -149,12 +154,14 @@ SUBROUTINE Read_Checkpoint
     INTEGER :: sp_nmoltotal(nspecies)
     INTEGER :: this_species, nfrac_global, i, this_rxnum, j, m, alive
     INTEGER :: this_unit
+    INTEGER :: checkpoint_simd, nbr_entries, ierr
+    CHARACTER(STRING_LEN) :: line_array(60)
 
     INTEGER, DIMENSION(:), ALLOCATABLE :: total_molecules, n_int
 
     REAL(DP) :: this_lambda = 1.0_DP
     REAL(DP) :: E_self, xcom_old, ycom_old, zcom_old
-    REAL(DP) :: xcom_new, ycom_new, zcom_new
+    REAL(DP) :: xcom_new, ycom_new, zcom_new, rp(3)
 
     LOGICAL :: overlap
     LOGICAL :: lopen
@@ -258,10 +265,36 @@ SUBROUTINE Read_Checkpoint
 
     READ(restartunit,*)
     READ(restartunit,*) s1,s2,s3,s4,s5
+    CALL Init_Seeds
+    CALL Parse_String(restartunit,-1,0,nbr_entries,line_array,ierr)
+    IF (nbr_entries == 2) THEN
+            IF (line_array(1) .NE. 'SIMD') THEN
+                    err_msg = ""
+                    err_msg(1) = 'Unrecognized keyword ' // TRIM(line_array(1))
+                    err_msg(2) = 'after checkpoint RNG seeds.'
+                    err_msg(3) = 'This line should be empty or have "SIMD" followed by the number of SIMD lanes.'
+                    CALL Clean_Abort(err_msg,'Read_Checkpoint')
+            END IF
+            checkpoint_simd = String_To_Int(line_array(2))
+            IF (checkpoint_simd == dimpad_8byte) THEN
+                    WRITE(logunit,*) "Reading SIMD RNG seeds"
+                    DO i = 1, dimpad_8byte
+                        READ(restartunit,*) s_arr(i,1), s_arr(i,2), s_arr(i,3), s_arr(i,4), s_arr(i,5)
+                    END DO
+            ELSE
+                    WRITE(logunit,*) "Ignoring SIMD RNG seeds due to conflicting number of SIMD lanes"
+                    DO i = 1, checkpoint_simd
+                        READ(restartunit,*)
+                    END DO
+            END IF
+            READ(restartunit,*)
+    ELSE
+            WRITE(logunit,*) "No SIMD RNG seeds were found. Using freshly generated SIMD RNG seeds."
+    END IF
     WRITE(logunit,*) 'Seed info read successfully'
 
     ! read total number of molecules of each of the species
-    READ(restartunit,*)
+    !READ(restartunit,*) ! line advanced already when checking for SIMD seeds
     DO is = 1, nspecies
        READ(restartunit,*) this_species, sp_nmoltotal(is)
        IF (sp_nmoltotal(is) > 0) species_list(is)%l_solvent = .TRUE.
@@ -282,10 +315,16 @@ SUBROUTINE Read_Checkpoint
 
           DO ia = 1, natoms(is)
              READ(restartunit,*)nonbond_list(ia,is)%element, &
-                  atom_list(ia,im,is)%rxp, &
-                  atom_list(ia,im,is)%ryp, &
-                  atom_list(ia,im,is)%rzp, &
+                  rp(1), &
+                  rp(2), &
+                  rp(3), &
                   this_box
+             IF (box_list(ibox)%basis_changed) THEN
+                     atom_list(ia,im,is)%rp(1:3) = &
+                             MATMUL(box_list(this_box)%basis_converter,rp)
+             ELSE
+                     atom_list(ia,im,is)%rp(1:3) = rp
+             END IF
              ! set exist flags for this atom
              atom_list(ia,im,is)%exist = .TRUE.
           END DO
@@ -326,9 +365,9 @@ SUBROUTINE Read_Checkpoint
              !
              CALL Get_COM(this_im,is)
 
-             xcom_old = molecule_list(this_im,is)%xcom
-             ycom_old = molecule_list(this_im,is)%ycom
-             zcom_old = molecule_list(this_im,is)%zcom
+             xcom_old = molecule_list(this_im,is)%rcom(1)
+             ycom_old = molecule_list(this_im,is)%rcom(2)
+             zcom_old = molecule_list(this_im,is)%rcom(3)
 
              ! Apply PBC
 
@@ -340,8 +379,8 @@ SUBROUTINE Read_Checkpoint
                      xcom_new, ycom_new, zcom_new)
 
 !!$                IF (this_box == 2) THEN
-!!$                   write(203,*) atom_list(1,this_im,is)%rxp, atom_list(1,this_im,is)%ryp, &
-!!$                        atom_list(1,this_im,is)%rzp
+!!$                   write(203,*) atom_list(1,this_im,is)%rp(1), atom_list(1,this_im,is)%rp(2), &
+!!$                        atom_list(1,this_im,is)%rp(3)
 !!$                END IF
 !!$                write(*,*) 'cubic'
 
@@ -356,17 +395,17 @@ SUBROUTINE Read_Checkpoint
 
              ! COM in the central simulation box
 
-             molecule_list(this_im,is)%xcom = xcom_new
-             molecule_list(this_im,is)%ycom = ycom_new
-             molecule_list(this_im,is)%zcom = zcom_new
+             molecule_list(this_im,is)%rcom(1) = xcom_new
+             molecule_list(this_im,is)%rcom(2) = ycom_new
+             molecule_list(this_im,is)%rcom(3) = zcom_new
 
              ! displace atomic coordinates
 
-             atom_list(1:natoms(is),this_im,is)%rxp = atom_list(1:natoms(is),this_im,is)%rxp + &
+             atom_list(1:natoms(is),this_im,is)%rp(1) = atom_list(1:natoms(is),this_im,is)%rp(1) + &
                   xcom_new - xcom_old
-             atom_list(1:natoms(is),this_im,is)%ryp = atom_list(1:natoms(is),this_im,is)%ryp + &
+             atom_list(1:natoms(is),this_im,is)%rp(2) = atom_list(1:natoms(is),this_im,is)%rp(2) + &
                   ycom_new - ycom_old
-             atom_list(1:natoms(is),this_im,is)%rzp = atom_list(1:natoms(is),this_im,is)%rzp + &
+             atom_list(1:natoms(is),this_im,is)%rp(3) = atom_list(1:natoms(is),this_im,is)%rp(3) + &
                   zcom_new - zcom_old
 
              CALL Compute_Max_Com_Distance(this_im,is)
@@ -407,6 +446,7 @@ SUBROUTINE Read_Config(ibox)
     REAL(DP) :: E_recip, E_self, E_intra
     REAL(DP) :: E_old, xcom_old, ycom_old, zcom_old
     REAL(DP) :: xcom_new, ycom_new, zcom_new
+    REAL(DP) :: rp(3)
     LOGICAL :: overlap
 
     Type(Energy_Class) :: inrg
@@ -440,9 +480,15 @@ SUBROUTINE Read_Config(ibox)
           DO ia = 1, natoms(is)
 
              READ(old_config_unit,*)nonbond_list(ia,is)%element, &
-                  atom_list(ia,this_im,is)%rxp, &
-                  atom_list(ia,this_im,is)%ryp, &
-                  atom_list(ia,this_im,is)%rzp
+                  rp(1), &
+                  rp(2), &
+                  rp(3)
+             IF (box_list(ibox)%basis_changed) THEN
+                     atom_list(ia,this_im,is)%rp(1:3) = &
+                             MATMUL(box_list(ibox)%basis_converter,rp)
+             ELSE
+                     atom_list(ia,this_im,is)%rp(1:3) = rp
+             END IF
              ! set the frac and exist flags for this atom
              molecule_list(this_im,is)%frac = this_lambda
              atom_list(ia,this_im,is)%exist = .TRUE.
@@ -456,9 +502,9 @@ SUBROUTINE Read_Config(ibox)
        !
           CALL Get_COM(this_im,is)
 
-          xcom_old = molecule_list(this_im,is)%xcom
-          ycom_old = molecule_list(this_im,is)%ycom
-          zcom_old = molecule_list(this_im,is)%zcom
+          xcom_old = molecule_list(this_im,is)%rcom(1)
+          ycom_old = molecule_list(this_im,is)%rcom(2)
+          zcom_old = molecule_list(this_im,is)%rcom(3)
 
           ! Apply PBC
           IF (l_cubic(ibox)) THEN
@@ -470,22 +516,22 @@ SUBROUTINE Read_Config(ibox)
           END IF
 
           ! COM in the central simulation box
-          molecule_list(this_im,is)%xcom = xcom_new
-          molecule_list(this_im,is)%ycom = ycom_new
-          molecule_list(this_im,is)%zcom = zcom_new
+          molecule_list(this_im,is)%rcom(1) = xcom_new
+          molecule_list(this_im,is)%rcom(2) = ycom_new
+          molecule_list(this_im,is)%rcom(3) = zcom_new
 
           ! COM in the central simulation box
-          molecule_list(this_im,is)%xcom = xcom_new
-          molecule_list(this_im,is)%ycom = ycom_new
-          molecule_list(this_im,is)%zcom = zcom_new
+          molecule_list(this_im,is)%rcom(1) = xcom_new
+          molecule_list(this_im,is)%rcom(2) = ycom_new
+          molecule_list(this_im,is)%rcom(3) = zcom_new
 
           ! displace atomic coordinates
-          atom_list(1:natoms(is),this_im,is)%rxp = &
-               atom_list(1:natoms(is),this_im,is)%rxp + xcom_new - xcom_old
-          atom_list(1:natoms(is),this_im,is)%ryp = &
-               atom_list(1:natoms(is),this_im,is)%ryp + ycom_new - ycom_old
-          atom_list(1:natoms(is),this_im,is)%rzp = &
-               atom_list(1:natoms(is),this_im,is)%rzp + zcom_new - zcom_old
+          atom_list(1:natoms(is),this_im,is)%rp(1) = &
+               atom_list(1:natoms(is),this_im,is)%rp(1) + xcom_new - xcom_old
+          atom_list(1:natoms(is),this_im,is)%rp(2) = &
+               atom_list(1:natoms(is),this_im,is)%rp(2) + ycom_new - ycom_old
+          atom_list(1:natoms(is),this_im,is)%rp(3) = &
+               atom_list(1:natoms(is),this_im,is)%rp(3) + zcom_new - zcom_old
 
           nmols(is,ibox) = nmols(is,ibox) + 1
 
diff --git a/Src/ring_fragment_driver.f90 b/Src/ring_fragment_driver.f90
index 5f99545b..edd758be 100755
--- a/Src/ring_fragment_driver.f90
+++ b/Src/ring_fragment_driver.f90
@@ -120,8 +120,8 @@ SUBROUTINE Ring_Fragment_Driver
 
         DO ia = 1, natoms(is)
            WRITE(frag_file_unit,*) nonbond_list(ia,is)%element, &
-                atom_list(ia,im,is)%rxp, atom_list(ia,im,is)%ryp, &
-                atom_list(ia,im,is)%rzp
+                atom_list(ia,im,is)%rp(1), atom_list(ia,im,is)%rp(2), &
+                atom_list(ia,im,is)%rp(3)
         END DO
 
      END IF
diff --git a/Src/rotation_routines.f90 b/Src/rotation_routines.f90
index 34286944..60e7d082 100755
--- a/Src/rotation_routines.f90
+++ b/Src/rotation_routines.f90
@@ -88,9 +88,9 @@ SUBROUTINE Rotate_Molecule_Eulerian(alive,is)
 
       IF (these_atoms(ia)%exist) THEN
          
-         these_atoms(ia)%rxp = these_atoms(ia)%rxp - this_molecule%xcom
-         these_atoms(ia)%ryp = these_atoms(ia)%ryp - this_molecule%ycom
-         these_atoms(ia)%rzp = these_atoms(ia)%rzp - this_molecule%zcom
+         these_atoms(ia)%rp(1) = these_atoms(ia)%rp(1) - this_molecule%rcom(1)
+         these_atoms(ia)%rp(2) = these_atoms(ia)%rp(2) - this_molecule%rcom(2)
+         these_atoms(ia)%rp(3) = these_atoms(ia)%rp(3) - this_molecule%rcom(3)
          
       END IF
 
@@ -117,18 +117,18 @@ SUBROUTINE Rotate_Molecule_Eulerian(alive,is)
 
       IF (these_atoms(ia)%exist) THEN
          
-         rxpnew = rot11*these_atoms(ia)%rxp + rot12*these_atoms(ia)%ryp + &
-              rot13*these_atoms(ia)%rzp
-         rypnew = rot21*these_atoms(ia)%rxp + rot22*these_atoms(ia)%ryp + &
-              rot23*these_atoms(ia)%rzp
-         rzpnew = rot31*these_atoms(ia)%rxp + rot32*these_atoms(ia)%ryp + &
-              rot33*these_atoms(ia)%rzp
+         rxpnew = rot11*these_atoms(ia)%rp(1) + rot12*these_atoms(ia)%rp(2) + &
+              rot13*these_atoms(ia)%rp(3)
+         rypnew = rot21*these_atoms(ia)%rp(1) + rot22*these_atoms(ia)%rp(2) + &
+              rot23*these_atoms(ia)%rp(3)
+         rzpnew = rot31*these_atoms(ia)%rp(1) + rot32*these_atoms(ia)%rp(2) + &
+              rot33*these_atoms(ia)%rp(3)
          
          ! Shift the origin back to (0,0,0)
          
-         these_atoms(ia)%rxp = rxpnew + this_molecule%xcom
-         these_atoms(ia)%ryp = rypnew + this_molecule%ycom
-         these_atoms(ia)%rzp = rzpnew + this_molecule%zcom
+         these_atoms(ia)%rp(1) = rxpnew + this_molecule%rcom(1)
+         these_atoms(ia)%rp(2) = rypnew + this_molecule%rcom(2)
+         these_atoms(ia)%rp(3) = rzpnew + this_molecule%rcom(3)
          
       END IF
       
@@ -188,9 +188,9 @@ SUBROUTINE Rotate_XYZ_Axes(alive,is,frag_start,lx,ly,lz,mtype)
 
    atom_orig = frag_list(frag_start,is)%atoms(1)
 
-   x_orig = these_atoms(atom_orig)%rxp
-   y_orig = these_atoms(atom_orig)%ryp
-   z_orig = these_atoms(atom_orig)%rzp
+   x_orig = these_atoms(atom_orig)%rp(1)
+   y_orig = these_atoms(atom_orig)%rp(2)
+   z_orig = these_atoms(atom_orig)%rp(3)
 
    istart = 2
 
@@ -199,9 +199,9 @@ SUBROUTINE Rotate_XYZ_Axes(alive,is,frag_start,lx,ly,lz,mtype)
  
       IF (these_atoms(ia)%exist) THEN
          
-         these_atoms(ia)%rxp = these_atoms(ia)%rxp - x_orig
-         these_atoms(ia)%ryp = these_atoms(ia)%ryp - y_orig
-         these_atoms(ia)%rzp = these_atoms(ia)%rzp - z_orig
+         these_atoms(ia)%rp(1) = these_atoms(ia)%rp(1) - x_orig
+         these_atoms(ia)%rp(2) = these_atoms(ia)%rp(2) - y_orig
+         these_atoms(ia)%rp(3) = these_atoms(ia)%rp(3) - z_orig
          
       END IF
 
@@ -228,16 +228,16 @@ SUBROUTINE Rotate_XYZ_Axes(alive,is,frag_start,lx,ly,lz,mtype)
 
       IF (these_atoms(ia)%exist) THEN
          
-         rxpnew = rot11*these_atoms(ia)%rxp + rot12*these_atoms(ia)%ryp + &
-              rot13*these_atoms(ia)%rzp
-         rypnew = rot21*these_atoms(ia)%rxp + rot22*these_atoms(ia)%ryp + &
-              rot23*these_atoms(ia)%rzp
-         rzpnew = rot31*these_atoms(ia)%rxp + rot32*these_atoms(ia)%ryp + &
-              rot33*these_atoms(ia)%rzp
+         rxpnew = rot11*these_atoms(ia)%rp(1) + rot12*these_atoms(ia)%rp(2) + &
+              rot13*these_atoms(ia)%rp(3)
+         rypnew = rot21*these_atoms(ia)%rp(1) + rot22*these_atoms(ia)%rp(2) + &
+              rot23*these_atoms(ia)%rp(3)
+         rzpnew = rot31*these_atoms(ia)%rp(1) + rot32*these_atoms(ia)%rp(2) + &
+              rot33*these_atoms(ia)%rp(3)
          
-         these_atoms(ia)%rxp = rxpnew
-         these_atoms(ia)%ryp = rypnew
-         these_atoms(ia)%rzp = rzpnew
+         these_atoms(ia)%rp(1) = rxpnew
+         these_atoms(ia)%rp(2) = rypnew
+         these_atoms(ia)%rp(3) = rzpnew
 
       END IF
 
@@ -246,9 +246,9 @@ SUBROUTINE Rotate_XYZ_Axes(alive,is,frag_start,lx,ly,lz,mtype)
    ! Shift the origin back to (0,0,0)
    DO i=istart,frag_list(frag_start,is)%natoms
       ia = frag_list(frag_start,is)%atoms(i)
-      these_atoms(ia)%rxp = these_atoms(ia)%rxp + x_orig
-      these_atoms(ia)%ryp = these_atoms(ia)%ryp + y_orig
-      these_atoms(ia)%rzp = these_atoms(ia)%rzp + z_orig
+      these_atoms(ia)%rp(1) = these_atoms(ia)%rp(1) + x_orig
+      these_atoms(ia)%rp(2) = these_atoms(ia)%rp(2) + y_orig
+      these_atoms(ia)%rp(3) = these_atoms(ia)%rp(3) + z_orig
    END DO
  END SUBROUTINE Rotate_XYZ_Axes
 
diff --git a/Src/save_revert_coordinates.f90 b/Src/save_revert_coordinates.f90
index a49ad05c..786c73a5 100755
--- a/Src/save_revert_coordinates.f90
+++ b/Src/save_revert_coordinates.f90
@@ -70,22 +70,22 @@ SUBROUTINE Save_Old_Cartesian_Coordinates(im,is)
 
 ! Save the parent coordinates
 
-  atom_list(:,im,is)%rxp_old = atom_list(:,im,is)%rxp
-  atom_list(:,im,is)%ryp_old = atom_list(:,im,is)%ryp
-  atom_list(:,im,is)%rzp_old = atom_list(:,im,is)%rzp
+  atom_list(:,im,is)%rp_old(1) = atom_list(:,im,is)%rp(1)
+  atom_list(:,im,is)%rp_old(2) = atom_list(:,im,is)%rp(2)
+  atom_list(:,im,is)%rp_old(3) = atom_list(:,im,is)%rp(3)
 
 
 ! Save the COM and Eulerian angles
   
-  molecule_list(im,is)%xcom_old = molecule_list(im,is)%xcom
-  molecule_list(im,is)%ycom_old = molecule_list(im,is)%ycom
-  molecule_list(im,is)%zcom_old = molecule_list(im,is)%zcom
+  molecule_list(im,is)%rcom_old(1) = molecule_list(im,is)%rcom(1)
+  molecule_list(im,is)%rcom_old(2) = molecule_list(im,is)%rcom(2)
+  molecule_list(im,is)%rcom_old(3) = molecule_list(im,is)%rcom(3)
 
   molecule_list(im,is)%euler1_old = molecule_list(im,is)%euler1
   molecule_list(im,is)%euler2_old = molecule_list(im,is)%euler2
   molecule_list(im,is)%euler3_old = molecule_list(im,is)%euler3
 
-  molecule_list(im,is)%max_dcom_old = molecule_list(im,is)%max_dcom
+  molecule_list(im,is)%rcom_old(4) = molecule_list(im,is)%rcom(4)
 
 END SUBROUTINE Save_Old_Cartesian_Coordinates
 
@@ -202,22 +202,22 @@ SUBROUTINE Revert_Old_Cartesian_Coordinates(im,is)
   
 ! Revert to the old x,y and z parent coordinates of the atoms
 
-  atom_list(:,im,is)%rxp = atom_list(:,im,is)%rxp_old
-  atom_list(:,im,is)%ryp = atom_list(:,im,is)%ryp_old
-  atom_list(:,im,is)%rzp = atom_list(:,im,is)%rzp_old
+  atom_list(:,im,is)%rp(1) = atom_list(:,im,is)%rp_old(1)
+  atom_list(:,im,is)%rp(2) = atom_list(:,im,is)%rp_old(2)
+  atom_list(:,im,is)%rp(3) = atom_list(:,im,is)%rp_old(3)
 
 
 ! Revert to the COM and Eulerian angles for the molecule
 
-  molecule_list(im,is)%xcom = molecule_list(im,is)%xcom_old
-  molecule_list(im,is)%ycom = molecule_list(im,is)%ycom_old
-  molecule_list(im,is)%zcom = molecule_list(im,is)%zcom_old
+  molecule_list(im,is)%rcom(1) = molecule_list(im,is)%rcom_old(1)
+  molecule_list(im,is)%rcom(2) = molecule_list(im,is)%rcom_old(2)
+  molecule_list(im,is)%rcom(3) = molecule_list(im,is)%rcom_old(3)
 
   molecule_list(im,is)%euler1 = molecule_list(im,is)%euler1_old
   molecule_list(im,is)%euler2 = molecule_list(im,is)%euler2_old
   molecule_list(im,is)%euler3 = molecule_list(im,is)%euler3_old
 
-  molecule_list(im,is)%max_dcom = molecule_list(im,is)%max_dcom_old
+  molecule_list(im,is)%rcom(4) = molecule_list(im,is)%rcom_old(4)
         
 END SUBROUTINE Revert_Old_Cartesian_Coordinates
 
diff --git a/Src/sector_routines.f90 b/Src/sector_routines.f90
index 91bfc338..4c0c8636 100755
--- a/Src/sector_routines.f90
+++ b/Src/sector_routines.f90
@@ -28,211 +28,1962 @@ MODULE Sector_Routines
   !********************************************************************************
   USE Global_Variables
   USE Type_Definitions
+  USE Io_Utilities
   !$ USE OMP_LIB
 
   IMPLICIT NONE
 
+  LOGICAL :: l_firstframe = .FALSE. ! initialize this to true if you want to write voxel grid data for visualization
+
+
+  INTERFACE check_overlap
+          MODULE PROCEDURE check_overlap_ams, check_overlap_coordinates
+  END INTERFACE
   
 
 
 CONTAINS
   SUBROUTINE Sector_Setup
-          INTEGER, DIMENSION(3) :: sectormaxbound_old !, map_bound
-          INTEGER, DIMENSION(3,nbr_boxes) :: sectorbound_old
-          INTEGER :: i_sector, ci(3), dx, dy, dz, xshift, yshift, zshift, nsec_old, nsec, secind
+          INTEGER, DIMENSION(3) :: adj_cellmaxbound_old
+          INTEGER :: i_sector, dx, dy, dz, xshift, yshift, zshift, nsec_old, nsec, secind
           INTEGER :: sector_ID
-          INTEGER :: i, ibox, is, imol, im, ia
+          INTEGER :: i, ibox, is, is_present, imol, im, ia
           INTEGER, DIMENSION(3) :: sector_atom_ID
           TYPE(Atom_Class), POINTER :: atom_ptr
-          REAL(DP) :: xp, yp, zp, cp(3)
-!          INTEGER, DIMENSION(3,nbr_boxes) :: cbmc_truth_cube_bound, cut_truth_cube_bound
-          INTEGER :: xi, yi, zi, cim(3)
-          INTEGER :: max_occ_sectors_old, total_atoms
+          REAL(DP) :: xp, yp, zp
+          INTEGER :: xi, yi, zi, cim(3), xyzi(3), i_dim
+          INTEGER, DIMENSION(2,3) :: tgt_slice, src_slice, bit_tgt_slice, bit_src_slice
           INTEGER, DIMENSION(:), ALLOCATABLE :: xi_pm, yi_pm, zi_pm
           INTEGER :: dummy
-          LOGICAL :: asflag
 
-          sectorbound_old = sectorbound
-          sectormaxbound_old = sectormaxbound
-          nsec_old = MAXVAL(PRODUCT(length_cells,1))
+          REAL(SP), DIMENSION(maxboxnatoms,4,nbr_boxes) :: sp_live_atom_rsp
+          INTEGER, DIMENSION(maxboxnatoms,3,nbr_boxes) :: live_atom_cp
+          INTEGER(INT32), DIMENSION(maxboxnatoms,nbr_boxes) :: live_atom_ti, live_atom_atomtypes
+          INTEGER, DIMENSION(4,maxboxnatoms,nbr_boxes) :: ci_list
+          INTEGER(INT32) :: bsolvent
+          INTEGER :: ci(4), nca
+
+          INTEGER :: xi2, yi2, zi2, vlen, n_i_exist, inlive, inatoms, istart, iend
+          INTEGER :: cp_ub, cp_lb, lc, cp
+          INTEGER :: dxi, dyi, dzi
+          INTEGER, DIMENSION(nbr_boxes) :: box_vlen
+
+          REAL(SP) :: rp_ub, rp_lb, lbox, clr, rsp, isp, rxp, ryp, rzp
+          REAL(SP) :: h11,h21,h31,h12,h22,h32,h13,h23,h33
+          REAL(SP), DIMENSION(3) :: boxlen, unwrap_shifter
+
+          LOGICAL, DIMENSION(maxboxnatoms) :: i_exist
+          LOGICAL :: l_ortho
+
+          INTEGER, DIMENSION(:,:,:,:), ALLOCATABLE :: n_cell_atoms
+          INTEGER(INT32), DIMENSION(:,:,:,:,:), ALLOCATABLE :: this_cell_ti, this_cell_atomtypes
+          INTEGER(INT32), DIMENSION(:,:,:,:), ALLOCATABLE :: this_cell_atom_i
+          REAL(SP), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: this_cell_rsp
+
+          LOGICAL :: need_atom_ti, need_atomtypes, need_charges
+
+          REAL(SP) :: cell_H(3,3), length_cells_recip(3), hlcr(3), hl(3)
+
+          INTEGER(INT64), DIMENSION(-28:28,-28:28,solvents_or_types_maxind,0:n_big_atoms,nbr_boxes) :: bitcell_int64
+          LOGICAL(1), DIMENSION(solvents_or_types_maxind,0:n_big_atoms) :: l_inrange_vec, l_inrange_vec_old, l_switch
+          REAL(DP) :: xyzi_dp(3), dxyzi_dp(3), xyzi_dp_spread(3,3,2), drp(3), rsq, bitcell_H(3,3), bitcell_H_diag(3)
+          INTEGER(INT64) :: bitmask
+          INTEGER(INT64), DIMENSION(solvents_or_types_maxind,0:n_big_atoms) :: priv_bitcell_int64
+          INTEGER, DIMENSION(maxboxnatoms,4,nbr_boxes) :: live_atom_bcp
+          INTEGER, DIMENSION(4,maxboxnatoms) :: live_atom_bcp_T
+          INTEGER :: bcp,bcpx,bcpy,bcpz,bcps,ti,lbp32(3)
+          INTEGER(1), DIMENSION(:,:,:,:), ALLOCATABLE :: bitcell_int8_array
+          INTEGER(1), DIMENSION(:,:,:), ALLOCATABLE :: bitcell_int8_array_2
+
+          INTEGER :: max_adj_cell_atoms_old, cbmc_max_interact_old, max_neighbors
+          INTEGER :: bt(3), sbe(3)
+
+          REAL(SP) :: rcutsq, cell_rsxp, cell_rsyp, cell_rszp
+          REAL(SP), DIMENSION(maxboxnatoms) :: rsq_vec
+          REAL(SP), DIMENSION(maxboxnatoms,4) :: cbmc_cell_rsp_priv
+          INTEGER, DIMENSION(maxboxnatoms) :: ti_priv, atomtype_priv, which_interact, which_i_exist
+
+          INTEGER :: bcd(2:3)
+
+          LOGICAL(1), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: cell_l_inrange!, adj_cell_l_inrange
+
+          INTEGER :: isolvent, lbc, border_range(2), n_interact, adj_iend
+          REAL(SP) :: bclr, dsp, drxp, dryp, drzp
+
+          INTEGER(INT64) :: superpopcnt
+
+          REAL(DP), DIMENSION(3,3) :: bitcell_H_T, bitcell_H_T_sign, cell_H_T, cell_H_T_sign
+          REAL(DP), DIMENSION(3,3,2) :: bitcell_H_T_spread, bitcell_H_T_posneg_sign, cell_H_T_spread, cell_H_T_posneg_sign
+
+          REAL(DP), DIMENSION(3) :: bitcell_xyzortho_bbox_length
+          INTEGER :: yi_mult, zi_mult
+          REAL(DP), DIMENSION(3,3) :: cell_H_dp
+          REAL(SP), DIMENSION(3) :: xyzi_sp, cell_rp
+          INTEGER :: bcp_shift, xcp_base_shift_stride, xcp_base_stride, zcp_base_shift, ycp_base_shift, xcp_base_shift
+          INTEGER :: zcp_base, ycp_base, xcp_base, j, xcp, ycp, zcp, xub, ylb, zlb, xlb, yub, zub
+          INTEGER(INT64) :: xfer_int64
+          INTEGER :: sbe_ti(2:3), sbe_ti_mat(2:3,solvents_or_types_maxind,0:n_big_atoms)
+          REAL(DP) :: bfd(3), bfdr(3)
+
+          INTEGER, DIMENSION(3) :: int8shape, int8ub, lbp16, int16shape, int16ub, int16shape_coarse, int16ub_coarse
+          INTEGER(INT64) :: xub_int64
+
+          INTEGER(INT16), DIMENSION(:,:,:), ALLOCATABLE :: bitcell_int16_array, coarse_voxel_array
+          INTEGER(INT64), DIMENSION(:), ALLOCATABLE :: zcavcount, zcavcount_coarse
+          INTEGER(INT64) :: ncavs, ncavs_coarse, ncavs_combined, ncavs_fine, icav, locbase, loc, xi_int64
+          INTEGER :: locbase_int32, loc_int32
+
+          INTEGER(INT16) :: oneshift_int16, ncavs_int16
+
+          INTEGER :: i_big_atom
+
+          !LOGICAL(2), DIMENSION(0:MASKR(15)) :: ncavlvec
+
+          INTEGER :: big_atom_start
+          INTEGER(INT16), DIMENSION(0:15), PARAMETER :: oneshift_int16_vec = &
+                  ISHFT(1_INT16,(/0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15/))
+          INTEGER(INT16), DIMENSION(0:7), PARAMETER :: oneshift_int16_vec_coarse = oneshift_int16_vec(::2)
+          INTEGER :: yi_chunkstart, yi_chunkstride, yi2_chunkstride, yi_chunkend, chunksize, chunksize_p16
+          INTEGER(INT16), DIMENSION(ISHFT(1,13)) :: bitcell_int16_vec
+          INTEGER(INT16) :: cavbits_int16
+          INTEGER(INT16) :: fine00,fine10,fine01,fine11,coarse,coarse2
+          INTEGER(INT16), PARAMETER :: mask_int16 = INT(Z'5555',INT16)
+
+          need_atom_ti = read_atompair_rminsq .OR. (cbmc_cell_list_flag .AND. precalc_atompair_nrg)
+          need_atomtypes = calc_rmin_flag .OR. (cbmc_cell_list_flag .AND. .NOT. precalc_atompair_nrg)
+          need_charges = ANY(int_charge_style .NE. charge_none) .AND. cbmc_cell_list_flag .AND. .NOT. precalc_atompair_nrg
+
+          !$OMP PARALLEL PRIVATE(ci,nca) &
+          !$OMP PRIVATE(int8shape,int8ub,lbp16,int16shape,int16ub,xub_int64,ncavs,icav,locbase_int32,locbase,xi_int64,oneshift_int16,i_big_atom) &
+          !$OMP PRIVATE(ncavs_int16,yi_chunkstart, yi_chunkstride, yi_chunkend, chunksize, chunksize_p16) &
+          !$OMP PRIVATE(bitcell_int16_vec, cavbits_int16, loc, loc_int32) &
+          !$OMP PRIVATE(ibox,istart,is,inlive,inatoms,bsolvent,vlen,iend,box_vlen) &
+          !$OMP PRIVATE(xyzi,xyzi_sp,xyzi_dp,xyzi_dp_spread,dxyzi_dp,drp,cell_rp,zi_mult,yi_mult) &
+          !$OMP PRIVATE(l_ortho,l_inrange_vec,l_inrange_vec_old,xi,yi,zi,i_dim,l_switch,bitmask) &
+          !$OMP PRIVATE(isolvent,priv_bitcell_int64,zlb,ylb,xub,xlb,yub,zub) &
+          !$OMP PRIVATE(lbox,clr,bclr,cp_ub,cp_lb,lc,rp_ub,rp_lb,bcp_shift) &
+          !$OMP PRIVATE(bcp,bcpx,bcpy,bcpz,bcps,ti) &
+          !$OMP PRIVATE(boxlen,lbc,tgt_slice,src_slice,bit_tgt_slice,bit_src_slice,border_range) &
+          !$OMP PRIVATE(bt,rcutsq,hl,length_cells_recip,hlcr) &
+          !$OMP PRIVATE(h11,h21,h31,h12,h22,h32,h13,h23,h33) &
+          !$OMP PRIVATE(dzi,dyi,dxi,zi2,yi2,xi2,cell_rsxp,cell_rsyp,cell_rszp) &
+          !$OMP PRIVATE(dsp,drxp,dryp,drzp,rsq_vec,cbmc_cell_rsp_priv,ti_priv,atomtype_priv) &
+          !$OMP PRIVATE(rxp,ryp,rzp,isp,n_interact,adj_iend,which_interact,bcd,sbe,xcp,ycp,zcp,i,j) &
+          !$OMP PRIVATE(zcp_base_shift,ycp_base_shift,xcp_base_shift,zcp_base,ycp_base,xcp_base,xfer_int64) &
+          !$OMP PRIVATE(fine00,fine10,fine01,fine11,coarse,coarse2) &
+          !$OMP PRIVATE(int16shape_coarse,int16ub_coarse,yi2_chunkstride) &
+          !$OMP PRIVATE(sbe_ti,sbe_ti_mat,bfd,bfdr)
+          IF (bitcell_flag) THEN
+                  !$OMP WORKSHARE
+                  bitcell_int64 = 0_INT64
+                  !$OMP END WORKSHARE
+          END IF
+          DO ibox = 1, nbr_boxes
+                istart = 1
+                DO is_present = 1, nspecies_present
+                        is = which_species_present(is_present)
+                        inlive = nlive(is,ibox)
+                        IF (inlive < 1) CYCLE
+                        inatoms = natoms(is)
+                        bsolvent = INT(species_list(is)%solvent_base,2)
+                        vlen = inlive*inatoms
+                        iend = istart + vlen - 1
+                        !$OMP WORKSHARE
+                        sp_live_atom_rsp(istart:iend,1:3,ibox) = RESHAPE(REAL(live_atom_rsp(1:inatoms,1:inlive,1:3,is_present,ibox),SP), &
+                                (/ vlen, 3 /))
+                        !$OMP END WORKSHARE
+                        IF (l_not_all_exist) THEN
+                                !$OMP WORKSHARE
+                                i_exist(istart:iend) = RESHAPE(live_atom_exist(1:inatoms,1:inlive,is_present,ibox), (/ vlen /))
+                                !$OMP END WORKSHARE
+                        END IF
+                        IF (need_atom_ti) THEN
+                                !$OMP WORKSHARE
+                                live_atom_ti(istart:iend,ibox) = RESHAPE(SPREAD(bsolvent+INT(vec123(1:inatoms),INT32),2,inlive), (/ vlen /))
+                                !$OMP END WORKSHARE
+                        END IF
+                        IF (need_atomtypes) THEN
+                                !$OMP WORKSHARE
+                                live_atom_atomtypes(istart:iend,ibox) = RESHAPE(SPREAD( &
+                                        nonbond_list(1:inatoms,is)%atom_type_number,2,inlive), (/ vlen /))
+                                !$OMP END WORKSHARE
+                        END IF
+                        IF (need_charges) THEN
+                                !$OMP WORKSHARE
+                                sp_live_atom_rsp(istart:iend,4,ibox) = RESHAPE(SPREAD(REAL( &
+                                        nonbond_list(1:inatoms,is)%charge,SP),2,inlive), (/ vlen /))
+                                !$OMP END WORKSHARE
+                        END IF
+                        istart = istart + vlen
+                END DO
+                vlen = istart - 1
+                box_vlen(ibox) = vlen
+                IF (l_not_all_exist) THEN
+                        IF (.NOT. ALL(i_exist(1:vlen))) THEN
+                                !$OMP SINGLE
+                                n_i_exist = 0
+                                DO i = 1, vlen
+                                        IF (i_exist(i)) THEN
+                                                n_i_exist = n_i_exist + 1
+                                                which_i_exist(n_i_exist) = i
+                                        END IF
+                                END DO
+                                !$OMP END SINGLE
+                                !$OMP WORKSHARE
+                                sp_live_atom_rsp(1:n_i_exist,:,ibox) = sp_live_atom_rsp(which_i_exist(1:n_i_exist),:,ibox)
+                                live_atom_ti(1:n_i_exist,ibox) = live_atom_ti(which_i_exist(1:n_i_exist),ibox)
+                                live_atom_atomtypes(1:n_i_exist,ibox) = live_atom_atomtypes(which_i_exist(1:n_i_exist),ibox)
+                                !$OMP END WORKSHARE
+                                box_vlen(ibox) = n_i_exist
+                        END IF
+                END IF
+          END DO
+
           DO ibox = 1, nbr_boxes
+                l_ortho = box_list(ibox)%int_box_shape <= int_ortho
+                !$OMP SINGLE
                 DO i = 1, 3
-                          length_cells(i,ibox) = INT(box_list(ibox)%length(i,i)/max_rmin)
-                          IF (MOD(length_cells(i,ibox),2) .EQ. 0) length_cells(i,ibox) = length_cells(i,ibox) - 1
-                          cell_length_inv(i,ibox) = REAL(length_cells(i,ibox),DP) / box_list(ibox)%length(i,i)
-                          sectorbound(i,ibox) = length_cells(i,ibox)/2
+                          IF (bitcell_flag) THEN
+                                  IF (i == 1) THEN
+                                          box_list(ibox)%length_bitcells(i) = MIN(8*INT(box_list(ibox)%face_distance(i) / &
+                                                  (8.0_DP*box_list(ibox)%ideal_bitcell_length)),SHIFTL(1,12))
+                                  ELSE
+                                          box_list(ibox)%length_bitcells(i) = MIN(INT(box_list(ibox)%face_distance(i) / &
+                                                  box_list(ibox)%ideal_bitcell_length),SHIFTL(1,12))
+
+                                  END IF
+                                  bitcell_H(:,i) = box_list(ibox)%length(:,i) / box_list(ibox)%length_bitcells(i)
+                                  box_list(ibox)%bit_cell_length_recip(i) = REAL(REAL( &
+                                          box_list(ibox)%length_bitcells(i),DP)/box_list(ibox)%length(i,i),SP)
+                          END IF
+                          box_list(ibox)%length_cells(i) = INT(box_list(ibox)%face_distance(i)/max_rmin)
+                          IF (MOD(box_list(ibox)%length_cells(i),2) .EQ. 0) box_list(ibox)%length_cells(i) = box_list(ibox)%length_cells(i) - 1
+                          box_list(ibox)%cell_H_dp(:,i) = box_list(ibox)%length(:,i) / box_list(ibox)%length_cells(i)
+                          box_list(ibox)%cell_H_diag(i) = box_list(ibox)%cell_H_dp(i,i)
+                          box_list(ibox)%cell_length_inv(i,:) = REAL(box_list(ibox)%length_cells(i),DP) * box_list(ibox)%length_inv(i,:)
+                          box_list(ibox)%cell_length_recip(i) = REAL(REAL(box_list(ibox)%length_cells(i),DP)/box_list(ibox)%length(i,i),SP) ! kind 4
+                          box_list(ibox)%sectorbound(i) = box_list(ibox)%length_cells(i)/2
+                          box_list(ibox)%sp_diag_length(i) = REAL(box_list(ibox)%length(i,i),SP)
+                END DO
+                IF (bitcell_flag) THEN
+                        bitcell_xyzortho_bbox_length = SUM(ABS(bitcell_H),2)
+                        box_list(ibox)%real_length_bitcells = REAL(box_list(ibox)%length_bitcells,SP)
+                        box_list(ibox)%bitcell_face_distance = box_list(ibox)%face_distance / box_list(ibox)%length_bitcells
+                        box_list(ibox)%bitcell_face_distance_recip = box_list(ibox)%length_bitcells / box_list(ibox)%face_distance
+                        bfd = box_list(ibox)%bitcell_face_distance
+                        bfdr = box_list(ibox)%bitcell_face_distance_recip
+                        DO i = 1, 3
+                                box_list(ibox)%setbit_extent(i) = &
+                                        MIN(INT(SQRT((box_list(ibox)%rcut_low_max*bfdr(i))**2 + 1.0_DP - SUM(bfd*bfdr(i))))-1,28)
+                        END DO
+                        ! MIN(...,28) above is unlikely to be necessary but is included just to be safe.
+                END IF
+                box_list(ibox)%cell_H_sp = REAL(box_list(ibox)%cell_H_dp,SP)
+                cell_H_dp = box_list(ibox)%cell_H_dp
+                box_list(ibox)%cell_xyzortho_bbox_length = SUM(ABS(cell_H_dp),2)
+                box_list(ibox)%real_length_cells = REAL(box_list(ibox)%length_cells,SP)
+                box_list(ibox)%cell_face_distance = box_list(ibox)%face_distance / box_list(ibox)%length_cells
+                IF (cbmc_cell_list_flag) THEN
+                        box_list(ibox)%border_thickness = &
+                                INT(CEILING(rcut_cbmc(ibox)*box_list(ibox)%length_cells/box_list(ibox)%face_distance))
+                        IF (ALLOCATED(box_list(ibox)%cbmc_cell_mask)) THEN
+                                IF (ANY(box_list(ibox)%border_thickness .NE. UBOUND(box_list(ibox)%cbmc_cell_mask))) THEN
+                                        DEALLOCATE(box_list(ibox)%cbmc_cell_mask)
+                                END IF
+                        END IF
+                        IF (.NOT. ALLOCATED(box_list(ibox)%cbmc_cell_mask)) THEN
+                                ALLOCATE(box_list(ibox)%cbmc_cell_mask( &
+                                        -box_list(ibox)%border_thickness(1):box_list(ibox)%border_thickness(1), &
+                                        -box_list(ibox)%border_thickness(2):box_list(ibox)%border_thickness(2), &
+                                        -box_list(ibox)%border_thickness(3):box_list(ibox)%border_thickness(3)))
+                        END IF
+                ELSE
+                        box_list(ibox)%border_thickness = 1
+                END IF
+                !$OMP END SINGLE
+                IF (cbmc_cell_list_flag) THEN
+                        zub = box_list(ibox)%border_thickness(3)
+                        yub = box_list(ibox)%border_thickness(2)
+                        xub = box_list(ibox)%border_thickness(1)
+                        zlb = -zub
+                        ylb = -yub
+                        xlb = -xub
+                        !$OMP DO COLLAPSE(3) SCHEDULE(STATIC)
+                        DO zi = zlb, zub
+                        DO yi = ylb, yub
+                        DO xi = xlb, xub
+                                xyzi = (/ xi, yi, zi /)
+                                xyzi_dp = REAL(xyzi,DP)
+                                IF (l_ortho) THEN
+                                        !dxyzi_dp = SIGN(MAX(ABS(xyzi_dp)-1.0_DP,0.0_DP),xyzi_DP)
+                                        !drp = dxyzi_dp*box_list(ibox)%cell_xyzortho_bbox_length
+                                        drp = MAX(ABS(xyzi_dp)-1.0_DP,0.0_DP)*box_list(ibox)%cell_xyzortho_bbox_length
+                                ELSE
+                                        !cellcenter_drp = MATMUL(cell_H_dp,xyzi_dp)
+                                        !drp = SIGN(MAX(ABS(cellcenter_drp)-box_list(ibox)%cell_xyzortho_bbox_length,0.0_DP),cellcenter_drp)
+                                        drp = MAX(ABS(MATMUL(cell_H_dp,xyzi_dp))-box_list(ibox)%cell_xyzortho_bbox_length,0.0_DP)
+                                END IF
+                                box_list(ibox)%cbmc_cell_mask(xi,yi,zi) = &
+                                        MERGE(NOT(0),0,DOT_PRODUCT(drp,drp)<rcut_cbmcsq(ibox))
+                        END DO
+                        END DO
+                        END DO
+                        !$OMP END DO
+                END IF
+                IF (bitcell_flag) THEN
+                        vlen = solvents_or_types_maxind
+                        sbe = box_list(ibox)%setbit_extent
+                        zlb = MERGE(0,-sbe(3),l_ortho)
+                        ylb = MERGE(0,-sbe(2),l_ortho)
+                        xub = MERGE(0,sbe(1)+1,l_ortho)
+                        xlb = -sbe(1)
+                        yub = sbe(2)
+                        zub = sbe(3)
+                        !$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
+                        DO zi = zlb, zub
+                        DO yi = ylb, yub
+                        priv_bitcell_int64 = 0_INT64
+                        l_inrange_vec = .FALSE.
+                        DO xi = xlb, xub
+                                xyzi = (/ xi, yi, zi /)
+                                xyzi_dp = REAL(xyzi,DP)
+                                IF (l_ortho) THEN
+                                        !dxyzi_dp = SIGN(ABS(xyzi_dp)+1.0_DP,xyzi_dp)
+                                        !dxyzi_dp = ABS(xyzi_dp)+1.0_DP
+                                        drp = ABS(xyzi_dp)*bitcell_xyzortho_bbox_length + bitcell_xyzortho_bbox_length
+                                ELSE
+                                        ! This is a simplified, less accurate way of finding the maximum possible atomic separation
+                                        ! squared for the two triclinic bitcells for triclinic boxes.  The inaccuracy is acceptable because it can
+                                        ! never underestimate the maximum separation (overestimation is safe, just less optimized),
+                                        ! and the inaccuracy should be small, especially if the box is only slightly tilted.
+                                        ! It basically finds the maximum possible absolute value of the x, y, and z displacements
+                                        ! separately, and uses the resulting displacement vector to compute the separation distance squared.
+                                        ! Inaccuracy can arise from the maximum absolute displacement vector lying outside the
+                                        ! bitcell/fractional coordinate displacement inequality constraints of the multivariate
+                                        ! optimization problem we're trying to solve.
+                                        !cellcenter_drp = MATMUL(bitcell_H,xyzi_dp)
+                                        !drp = SIGN(ABS(cellcenter_drp)+bitcell_xyzortho_bbox_length,cellcenter_drp)
+                                        ! below is absolute value of actual displacement because sign doesn't matter
+                                        drp = ABS(MATMUL(bitcell_H,xyzi_dp))+bitcell_xyzortho_bbox_length
+                                        ! below is old way that should work but is probably slower than what we use despite giving
+                                        ! the same results
+                                        !drp = MAXVAL( &
+                                        !        ABS(SUM((SPREAD(SPREAD(xyzi_dp,2,3),3,2)+bitcell_H_T_posneg_sign)*bitcell_H_T_spread,1)),2)
+                                        ! below is old way that doesn't work for all triclinic cells
+                                        ! drp = MATMUL(bitcell_H,dxyzi_dp)
+                                END IF
+                                rsq = DOT_PRODUCT(drp,drp)
+                                l_inrange_vec_old = l_inrange_vec
+                                IF (read_atompair_rminsq) THEN
+                                        l_inrange_vec(1:vlen,:) = rsq<solvent_min_rminsq(:,:,ibox)
+                                ELSE IF (calc_rmin_flag) THEN
+                                        l_inrange_vec(1:vlen,:) = rsq<atomtype_min_rminsq
+                                ELSE
+                                        l_inrange_vec(1,0) = rsq < rcut_lowsq
+                                END IF
+                                l_switch = l_inrange_vec .AND. .NOT. l_inrange_vec_old
+                                IF (ANY(l_switch)) THEN
+                                        IF (l_ortho) THEN
+                                                bitmask = IAND(MASKL(36-xi,INT64),MASKR(29-xi,INT64))
+                                        ELSE
+                                                bitmask = MASKL(36-xi,8)
+                                        END IF
+                                        DO i_big_atom = 0, n_big_atoms
+                                                DO isolvent = 1, vlen
+                                                        IF (l_switch(isolvent,i_big_atom)) THEN
+                                                                priv_bitcell_int64(isolvent,i_big_atom) = bitmask
+                                                        END IF
+                                                END DO
+                                        END DO
+                                END IF
+                                IF (l_ortho) CYCLE
+                                l_switch = l_inrange_vec_old .AND. .NOT. l_inrange_vec
+                                IF (ANY(l_switch)) THEN
+                                        bitmask = MASKR(28+xi,8)
+                                        DO i_big_atom = 0, n_big_atoms
+                                                DO isolvent = 1, vlen
+                                                        IF (l_switch(isolvent,i_big_atom)) THEN
+                                                                priv_bitcell_int64(isolvent,i_big_atom) = &
+                                                                        IAND(priv_bitcell_int64(isolvent,i_big_atom),bitmask)
+                                                        END IF
+                                                END DO
+                                        END DO
+                                END IF
+                        END DO
+                        bitcell_int64(yi,zi,1:vlen,:,ibox) = priv_bitcell_int64(1:vlen,:)
+                        END DO
+                        END DO
+                        !$OMP END DO
+                        IF (l_ortho) THEN
+                                ! Apply symmetry for orthogonal voxels
+                                !$OMP WORKSHARE
+                                bitcell_int64(-1:-sbe(2):-1,-1:-sbe(3):-1,:,:,ibox) = &
+                                        bitcell_int64(1:sbe(2),1:sbe(3),:,:,ibox)
+                                bitcell_int64(0:sbe(2),-1:-sbe(3):-1,:,:,ibox) = &
+                                        bitcell_int64(0:sbe(2),1:sbe(3),:,:,ibox)
+                                bitcell_int64(-1:-sbe(2):-1,0:sbe(3),:,:,ibox) = &
+                                        bitcell_int64(1:sbe(2),0:sbe(3),:,:,ibox)
+                                !$OMP END WORKSHARE
+                        END IF
+                END IF
+          END DO
+          !$OMP BARRIER
+          DO ibox = 1, nbr_boxes
+                vlen = box_vlen(ibox)
+                l_ortho = box_list(ibox)%int_box_shape <= int_ortho
+                DO i_dim = 1, 3
+                        IF (l_ortho) THEN
+                                lbox = box_list(ibox)%sp_diag_length(i_dim)
+                                clr = box_list(ibox)%cell_length_recip(i_dim)
+                                bclr = box_list(ibox)%bit_cell_length_recip(i_dim)
+                        ELSE
+                                lbox = 1.0
+                                clr = box_list(ibox)%real_length_cells(i_dim)
+                                bclr = box_list(ibox)%real_length_bitcells(i_dim)
+                        END IF
+                        cp_ub = box_list(ibox)%sectorbound(i_dim)
+                        cp_lb = -cp_ub
+                        lc = box_list(ibox)%length_cells(i_dim)
+                        rp_ub = 0.5*lbox
+                        rp_lb = -rp_ub
+                        bcp_shift = MERGE(4,32,i_dim==1)
+                        !DIR$ VECTOR ALIGNED
+                        !$OMP DO SIMD PRIVATE(rsp,cp) SCHEDULE(SIMD:STATIC)
+                        DO i = 1, vlen
+                                rsp = sp_live_atom_rsp(i,i_dim,ibox)
+                                IF (rsp > rp_ub) THEN
+                                        rsp = rsp - lbox
+                                ELSE IF (rsp < rp_lb) THEN
+                                        rsp = rsp + lbox
+                                END IF
+                                sp_live_atom_rsp(i,i_dim,ibox) = rsp
+                                cp = NINT(rsp*clr)
+                                ! Wrapping cell coordinates is rarely necessary, but it is done anyway just in case
+                                ! rsp is exactly on a box boundary, which would yield out-of-bounds cell coordinates
+                                ! if not corrected.  The risk is increased because single-precision floats are used.
+                                IF (cp > cp_ub) THEN
+                                        cp = cp - lc
+                                ELSE IF (cp < cp_lb) THEN
+                                        cp = cp + lc
+                                END IF
+                                live_atom_cp(i,i_dim,ibox) = cp
+                                ! bit cell int64 array placement start
+                                ! make sure this conditional is brought out of the loop by compiler.
+                                ! bitcell lower bound (min bitcell index in box) is 32 (int 4, bit 0 for first axis)
+                                !   in order to avoid performing integer division or modulo on negative int,
+                                !   which would throw off the indexing.  The methods used require
+                                !   integer division to function like float division followed by floor.
+                                !IF (bitcell_flag) THEN
+                                !        live_atom_bcp(i,i_dim,ibox) = INT((rsp+rp_ub)*bclr) + 4
+                                !END IF
+                                IF (bitcell_flag) THEN
+                                        live_atom_bcp(i,i_dim,ibox) = INT((rsp+rp_ub)*bclr) + bcp_shift
+                                END IF
+                        END DO
+                        !$OMP END DO SIMD
                 END DO
           END DO
-          ! cell_length = 1.0_DP / cell_length_inv
+          !$OMP BARRIER
+          !$OMP SINGLE
+          sectormaxbound = box_list(1)%sectorbound + box_list(1)%border_thickness
+          DO ibox = 2, nbr_boxes
+                sectormaxbound = MAX(sectormaxbound,box_list(ibox)%sectorbound + box_list(ibox)%border_thickness)
+          END DO
 
-          sectormaxbound = MAXVAL(sectorbound, 2)
-          !map_bound = sectormaxbound*3+1
-          IF (.NOT. ALL( sectormaxbound <= sectormaxbound_old)) THEN
-                  IF (ALLOCATED(sector_index_map)) DEALLOCATE(sector_index_map)
-                  ALLOCATE(sector_index_map(-sectormaxbound(1):sectormaxbound(1), &
+          adj_cellmaxbound_old = adj_cellmaxbound
+          DO ibox = 1, nbr_boxes
+                adj_cellmaxbound = MAX(adj_cellmaxbound,box_list(ibox)%sectorbound)
+          END DO
+          ALLOCATE(n_cell_atoms( &
+                  -sectormaxbound(1):sectormaxbound(1), &
+                  -sectormaxbound(2):sectormaxbound(2), &
+                  -sectormaxbound(3):sectormaxbound(3), &
+                  nbr_boxes), Stat=AllocateStatus)
+          IF (Allocatestatus /= 0) THEN
+            err_msg = ''
+            err_msg(1) = 'Memory could not be allocated for n_cell_atoms'
+            CALL Clean_Abort(err_msg, 'Sector_Setup')
+          END IF
+          IF (ALLOCATED(n_adj_cell_atoms)) THEN
+                  IF (ANY(adj_cellmaxbound > adj_cellmaxbound_old)) THEN
+                          DEALLOCATE(n_adj_cell_atoms, Stat=AllocateStatus)
+                          IF (Allocatestatus /= 0) THEN
+                            err_msg = ''
+                            err_msg(1) = 'Memory could not be deallocated from n_adj_cell_atoms'
+                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                          END IF
+                  END IF
+          END IF
+          IF (.NOT. ALLOCATED(n_adj_cell_atoms)) THEN
+                  ALLOCATE(n_adj_cell_atoms( &
+                          -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                          -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                          -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                          nbr_boxes), Stat=AllocateStatus)
+                  IF (Allocatestatus /= 0) THEN
+                    err_msg = ''
+                    err_msg(1) = 'Memory could not be allocated for n_adj_cell_atoms'
+                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                  END IF
+                  !WRITE(*,*) "Allocated n_adj_cell_atoms with:"
+                  !WRITE(*,*) "SHAPE", SHAPE(n_adj_cell_atoms)
+                  !WRITE(*,*) "Occupying ", SIZEOF(n_adj_cell_atoms), " bytes"
+                  !WRITE(*,*) "on step ", i_mcstep
+          END IF
+          max_adj_cell_atoms_old = max_adj_cell_atoms
+          max_adj_cell_atoms = 0
+          max_neighbors = 0
+          !$OMP END SINGLE
+          !$OMP WORKSHARE
+          n_cell_atoms = 0
+          !$OMP END WORKSHARE
+          !$OMP SINGLE
+          DO ibox = 1, nbr_boxes
+                DO i = 1, box_vlen(ibox)
+                        ci(1:3) = live_atom_cp(i,:,ibox)
+                        nca = n_cell_atoms(ci(1),ci(2),ci(3),ibox)+1
+                        ci(4) = nca
+                        n_cell_atoms(ci(1),ci(2),ci(3),ibox) = nca
+                        ci_list(:,i,ibox) = ci
+                END DO
+          END DO
+          !$OMP END SINGLE
+          !$OMP WORKSHARE
+          max_sector_natoms = MAXVAL(n_cell_atoms)
+          !$OMP END WORKSHARE
+          !$OMP SINGLE
+          ALLOCATE(this_cell_rsp(max_sector_natoms, 4, &
+                  -sectormaxbound(1):sectormaxbound(1), &
+                  -sectormaxbound(2):sectormaxbound(2), &
+                  -sectormaxbound(3):sectormaxbound(3), &
+                  nbr_boxes), Stat=AllocateStatus)
+          IF (Allocatestatus /= 0) THEN
+            err_msg = ''
+            err_msg(1) = 'Memory could not be allocated for this_cell_rsp'
+            CALL Clean_Abort(err_msg, 'Sector_Setup')
+          END IF
+          IF (need_atom_ti) THEN
+                  ALLOCATE(this_cell_ti(max_sector_natoms, &
+                          -sectormaxbound(1):sectormaxbound(1), &
                           -sectormaxbound(2):sectormaxbound(2), &
                           -sectormaxbound(3):sectormaxbound(3), &
-                          nbr_boxes))
-                  IF (ALLOCATED(sector_has_atoms)) DEALLOCATE(sector_has_atoms)
-                  ALLOCATE(sector_has_atoms(-sectormaxbound(1):sectormaxbound(1), &
+                          nbr_boxes), Stat=AllocateStatus)
+                  IF (Allocatestatus /= 0) THEN
+                    err_msg = ''
+                    err_msg(1) = 'Memory could not be allocated for this_cell_ti'
+                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                  END IF
+          END IF
+          IF (need_atomtypes) THEN
+                  ALLOCATE(this_cell_atomtypes(max_sector_natoms, &
+                          -sectormaxbound(1):sectormaxbound(1), &
                           -sectormaxbound(2):sectormaxbound(2), &
                           -sectormaxbound(3):sectormaxbound(3), &
-                          nbr_boxes))
+                          nbr_boxes), Stat=AllocateStatus)
+                  IF (Allocatestatus /= 0) THEN
+                    err_msg = ''
+                    err_msg(1) = 'Memory could not be allocated for this_cell_atomtypes'
+                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                  END IF
           END IF
-          max_occ_sectors_old = max_occ_sectors
-          total_atoms = DOT_PRODUCT(SUM(nmols(:,1:),2), natoms)
-          max_occ_sectors=MAX(max_occ_sectors_old,MIN(total_atoms,SUM(PRODUCT(length_cells,1))))
-          IF (max_occ_sectors > max_occ_sectors_old) THEN
-                  IF (ALLOCATED(sector_n_atoms)) DEALLOCATE(sector_n_atoms)
-                  IF (ALLOCATED(sector_atoms)) DEALLOCATE(sector_atoms)
-                  ALLOCATE(sector_n_atoms(0:max_occ_sectors))
-                  ALLOCATE(sector_atoms(max_sector_natoms,max_occ_sectors,3))
+          IF (bitcell_flag) THEN
+                  ALLOCATE(this_cell_atom_i(max_sector_natoms, &
+                          -sectormaxbound(1):sectormaxbound(1), &
+                          -sectormaxbound(2):sectormaxbound(2), &
+                          -sectormaxbound(3):sectormaxbound(3)), &
+                          Stat=AllocateStatus)
+                  IF (Allocatestatus /= 0) THEN
+                    err_msg = ''
+                    err_msg(1) = 'Memory could not be allocated for this_cell_atom_i'
+                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                  END IF
           END IF
-          !$OMP PARALLEL WORKSHARE DEFAULT(SHARED)
-          sector_n_atoms = 0
-          n_occ_sectors = 0
-          sector_has_atoms = .FALSE.
-          !$OMP END PARALLEL WORKSHARE
-          asflag = .TRUE.
-          AllocationSizeLoop: DO
-          ! place atoms in sectors
-          BoxLoop:DO ibox = 1, nbr_boxes
-                SpeciesLoop:DO is = 1, nspecies
-                        sector_atom_ID(3) = is
-                        !$OMP PARALLEL DO DEFAULT(SHARED) &
-                        !$OMP PRIVATE(i,im,ia,atom_ptr,cp) SCHEDULE(DYNAMIC)
-                        MoleculeLoop:DO imol = 1, nmols(is,ibox)
-                                im = locate(imol, is, ibox)
-                                IF (.NOT. molecule_list(im,is)%live) CYCLE MoleculeLoop
-                                AtomLoop:DO ia = 1, natoms(is)
-                                        atom_ptr => atom_list(ia,im,is)
-                                        IF (.NOT. atom_ptr%exist) CYCLE AtomLoop
-                                        cp(1) = atom_ptr%rxp
-                                        cp(2) = atom_ptr%ryp
-                                        cp(3) = atom_ptr%rzp
-                                        atom_ptr%ci = IDNINT(cp*cell_length_inv(:,ibox))
-                                        DO i = 1,3
-                                                IF (atom_ptr%ci(i) > sectorbound(i,ibox)) THEN
-                                                        atom_ptr%ci(i) = atom_ptr%ci(i) - length_cells(i,ibox)
-                                                ELSE IF (atom_ptr%ci(i) < -sectorbound(i,ibox)) THEN
-                                                        atom_ptr%ci(i) = atom_ptr%ci(i) + length_cells(i,ibox)
+          !$OMP END SINGLE
+          DO ibox = 1, nbr_boxes
+                vlen = box_vlen(ibox)
+                !$OMP DO SCHEDULE(STATIC)
+                DO i = 1, vlen
+                        ci = ci_list(:,i,ibox)
+                        this_cell_rsp(ci(4),1:4,ci(1),ci(2),ci(3),ibox) = &
+                                sp_live_atom_rsp(i,:,ibox)
+                        IF (need_atom_ti) THEN
+                                this_cell_ti(ci(4),ci(1),ci(2),ci(3),ibox) = &
+                                        live_atom_ti(i,ibox)
+                        END IF
+                        IF (need_atomtypes) THEN
+                                this_cell_atomtypes(ci(4),ci(1),ci(2),ci(3),ibox) = &
+                                        live_atom_atomtypes(i,ibox)
+                        END IF
+                        IF (bitcell_flag) THEN
+                                this_cell_atom_i(ci(4),ci(1),ci(2),ci(3)) = i
+                        END IF
+                END DO
+                !$OMP END DO
+                l_ortho = box_list(ibox)%int_box_shape <= int_ortho
+                IF (l_ortho) THEN
+                        DO i_dim = 1, 3
+                                boxlen(i_dim) = REAL(box_list(ibox)%length(i_dim,i_dim),SP)
+                        END DO
+                ELSE
+                        boxlen = 1.0
+                END IF
+                IF (bitcell_flag) THEN
+                        sbe = box_list(ibox)%setbit_extent
+                        !$OMP SINGLE
+                        lbp32 = box_list(ibox)%length_bitcells
+                        ALLOCATE(bitcell_int8_array( &
+                                0:7+lbp32(1)/8, &
+                                0:63+lbp32(2), &
+                                0:63+lbp32(3),&
+                                0:n_big_atoms), Stat=AllocateStatus)
+                        IF (Allocatestatus /= 0) THEN
+                          err_msg = ''
+                          err_msg(1) = 'Memory could not be allocated for bitcell_int8_array'
+                          CALL Clean_Abort(err_msg, 'Sector_Setup')
+                        END IF
+                        lbp32(1) = IAND(lbp32(1)+31,NOT(31))
+                        !$OMP END SINGLE
+                        !$OMP WORKSHARE
+                        bitcell_int8_array = 0_INT8
+                        !$OMP END WORKSHARE
+                        !DIR$ VECTOR ALIGNED
+                        !$OMP DO SIMD PRIVATE(bcp) SCHEDULE(SIMD:STATIC)
+                        DO i = 1, vlen
+                                bcp = live_atom_bcp(i,1,ibox)
+                                live_atom_bcp(i,1,ibox) = ISHFT(bcp,-3) ! bcp / 8
+                                live_atom_bcp(i,4,ibox) = IAND(bcp,7) ! MOD(bcp,8)
+                        END DO
+                        !$OMP END DO SIMD
+
+
+
+                        !DIR$ ASSUME_ALIGNED live_atom_bcp_T:array_align_bytes
+                        !$OMP WORKSHARE
+                        live_atom_bcp_T = TRANSPOSE(live_atom_bcp(:,:,ibox))
+                        xcp_base_shift_stride = INT(CEILING( &
+                                72.0_DP*REAL(box_list(ibox)%length_cells(1),DP)/REAL(box_list(ibox)%length_bitcells(1),DP)))
+                        xcp_base_stride = 2*xcp_base_shift_stride
+                        !$OMP END WORKSHARE
+                        bfd = box_list(ibox)%bitcell_face_distance
+                        bfdr = box_list(ibox)%bitcell_face_distance_recip
+                        IF (read_atompair_rminsq) THEN
+                                sbe_ti_mat(2,:,:) = MIN(INT(SQRT(solvent_min_rminsq(:,:,ibox)*bfdr(2)*bfdr(2) - bfd(1)*bfdr(2) - &
+                                        bfd(3)*bfdr(2)))-1,28)
+                                sbe_ti_mat(3,:,:) = MIN(INT(SQRT(solvent_min_rminsq(:,:,ibox)*bfdr(3)*bfdr(3) - bfd(1)*bfdr(3) - &
+                                        bfd(2)*bfdr(3)))-1,28)
+                        ELSE IF (calc_rmin_flag) THEN
+                                sbe_ti_mat(2,:,:) = MIN(INT(SQRT(atomtype_min_rminsq*bfdr(2)*bfdr(2) - bfd(1)*bfdr(2) - &
+                                        bfd(3)*bfdr(2)))-1,28)
+                                sbe_ti_mat(3,:,:) = MIN(INT(SQRT(atomtype_min_rminsq*bfdr(3)*bfdr(3) - bfd(1)*bfdr(3) - &
+                                        bfd(2)*bfdr(3)))-1,28)
+                        ELSE
+                                sbe_ti = sbe(2:3)
+                                ti = 1
+                        END IF
+                        ! Domain decomposition allows some but not all cells to have overlap masks applied in parallel
+                        ! The OMP DO must be executed 8 times
+                        DO zcp_base_shift = 0,2,2
+                        DO ycp_base_shift = 0,2,2
+                        DO xcp_base_shift = 0,xcp_base_shift_stride,xcp_base_shift_stride
+                        zub = box_list(ibox)%sectorbound(3)
+                        zlb = zcp_base_shift-zub
+                        yub = box_list(ibox)%sectorbound(2)
+                        ylb = ycp_base_shift-yub
+                        xub = box_list(ibox)%sectorbound(1)
+                        xlb = xcp_base_shift-xub
+                        !$OMP DO COLLAPSE(3) SCHEDULE(STATIC)
+                        DO zcp_base = zlb, zub, 4
+                        DO ycp_base = ylb, yub, 4
+                        DO xcp_base = xlb, xub, xcp_base_stride
+                        DO zcp = zcp_base, MIN(zcp_base+1,box_list(ibox)%sectorbound(3))
+                        DO ycp = ycp_base, MIN(ycp_base+1,box_list(ibox)%sectorbound(2))
+                        DO xcp = xcp_base, MIN(xcp_base+xcp_base_shift_stride-1,box_list(ibox)%sectorbound(1))
+                        DO j = 1, n_cell_atoms(xcp,ycp,zcp,ibox)
+                                i = this_cell_atom_i(j,xcp,ycp,zcp)
+                                bcpx = live_atom_bcp_T(1,i)
+                                bcpy = live_atom_bcp_T(2,i)
+                                bcpz = live_atom_bcp_T(3,i)
+                                bcps = live_atom_bcp_T(4,i)
+                                IF (read_atompair_rminsq) THEN
+                                        ti = live_atom_ti(i,ibox)
+                                ELSE IF (calc_rmin_flag) THEN
+                                        ti = which_solvent_atomtypes_inv(live_atom_atomtypes(i,ibox))
+                                END IF
+                                ! i_big_atom 0 is the "small" atom used for BOVINE (usually not for cavity biasing)
+                                DO i_big_atom = 0, n_big_atoms
+                                        IF (read_atompair_rminsq .OR. calc_rmin_flag) sbe_ti = sbe_ti_mat(:,ti,i_big_atom)
+                                        DO zi = -sbe_ti(3), sbe_ti(3)
+                                                DO yi = -sbe_ti(2), sbe_ti(2)
+                                                        bitmask = bitcell_int64(yi,zi,ti,i_big_atom,ibox)
+                                                        IF (bitmask .EQ. 0_INT64) CYCLE ! IOR with 0 changes nothing
+                                                        bitmask = ISHFT(bitmask,bcps)
+                                                        xfer_int64 = TRANSFER(bitcell_int8_array(bcpx:bcpx+7,bcpy+yi,bcpz+zi,i_big_atom),xfer_int64)
+                                                        bitmask = IOR(xfer_int64,bitmask)
+                                                        IF (bitmask == xfer_int64) CYCLE ! No point in writing what is already there
+                                                        bitcell_int8_array(bcpx:bcpx+7,bcpy+yi,bcpz+zi,i_big_atom) = &
+                                                                TRANSFER(bitmask,bitcell_int8_array)
+                                                END DO
+                                        END DO
+                                END DO
+                        END DO
+                        END DO
+                        END DO
+                        END DO
+                        END DO
+                        END DO
+                        END DO
+                        !$OMP END DO
+                        END DO
+                        END DO
+                        END DO
+                END IF
+                ! Apply PBC to cell list structure and the overlap bitmaps
+                DO xi = -1, 1
+                        DO yi = -1, 1
+                                DO zi = -1, 1
+                                        IF (xi == 0 .AND. yi == 0 .AND. zi == 0) CYCLE
+                                        xyzi = (/ xi, yi, zi /)
+                                        unwrap_shifter = xyzi*boxlen
+                                        DO i_dim = 1, 3
+                                                IF (bitcell_flag) THEN
+                                                        lbc = box_list(ibox)%length_bitcells(i_dim)
+                                                        IF (i_dim == 1) lbc = ISHFT(lbc,-3) ! same as lbc / 8 since lbc>0
+                                                END IF
+                                                IF (xyzi(i_dim) == 0) THEN
+                                                        tgt_slice(:,i_dim) = (/ -1, 1 /) * box_list(ibox)%sectorbound(i_dim)
+                                                        src_slice(:,i_dim) = tgt_slice(:,i_dim)
+                                                        IF (bitcell_flag) THEN
+                                                                IF (i_dim == 1) THEN
+                                                                        bit_tgt_slice(:,1) = (/ 4, 3+lbc /)
+                                                                ELSE
+                                                                        bit_tgt_slice(:,i_dim) = (/ 32, &
+                                                                                31 + lbc /)
+                                                                END IF
+                                                                bit_src_slice(:,i_dim) = bit_tgt_slice(:,i_dim)
+                                                        END IF
+                                                ELSE
+                                                        IF (xyzi(i_dim) == 1) THEN
+                                                                border_range = (/ 1, box_list(ibox)%border_thickness(i_dim) /)
+                                                        ELSE
+                                                                border_range = (/ box_list(ibox)%border_thickness(i_dim), 1 /)
+                                                        END IF
+                                                        tgt_slice(:,i_dim) = box_list(ibox)%sectorbound(i_dim)*xyzi(i_dim) + &
+                                                                xyzi(i_dim)*border_range
+                                                        src_slice(:,i_dim) = -box_list(ibox)%sectorbound(i_dim)*xyzi(i_dim) + &
+                                                                xyzi(i_dim)*(border_range-1)
+                                                        IF (bitcell_flag) THEN
+                                                                IF (i_dim == 1) THEN
+                                                                        IF (xyzi(i_dim) == 1) THEN
+                                                                                bit_src_slice(:,1) = lbc + (/ 4, 7 /)
+                                                                                bit_tgt_slice(:,1) = (/ 4, 7 /)
+                                                                        ELSE
+                                                                                bit_src_slice(:,1) = (/ 0, 3 /)
+                                                                                bit_tgt_slice(:,1) = lbc + (/ 0, 3 /)
+                                                                        END IF
+                                                                ELSE
+                                                                        IF (xyzi(i_dim) == 1) THEN
+                                                                                bit_src_slice(:,i_dim) = lbc + (/ 32, 63 /)
+                                                                                bit_tgt_slice(:,i_dim) = (/ 32, 63 /)
+                                                                        ELSE
+                                                                                bit_src_slice(:,i_dim) = (/ 0, 31 /)
+                                                                                bit_tgt_slice(:,i_dim) = lbc + (/ 0, 31 /)
+                                                                        END IF
+                                                                END IF
+                                                        END IF
                                                 END IF
                                         END DO
-                                END DO AtomLoop
-                        END DO MoleculeLoop
-                        !$OMP END PARALLEL DO
-                        MoleculeLoop2:DO imol = 1, nmols(is,ibox)
-                                im = locate(imol, is, ibox)
-                                sector_atom_ID(2) = im
-                                IF (.NOT. molecule_list(im,is)%live) CYCLE MoleculeLoop2
-                                AtomLoop2:DO ia = 1, natoms(is)
-                                        IF (.NOT. atom_list(ia,im,is)%exist) CYCLE AtomLoop2
-                                        sector_atom_ID(1) = ia
-                                        ci = atom_list(ia,im,is)%ci
-                                        IF (sector_has_atoms(ci(1),ci(2),ci(3),ibox)) THEN
-                                                secind = sector_index_map(ci(1),ci(2),ci(3),ibox)
-                                                sector_n_atoms(secind) = sector_n_atoms(secind)+1
+                                        IF (bitcell_flag) THEN
+                                                !$OMP WORKSHARE
+                                                bitcell_int8_array( &
+                                                        bit_tgt_slice(1,1):bit_tgt_slice(2,1), &
+                                                        bit_tgt_slice(1,2):bit_tgt_slice(2,2), &
+                                                        bit_tgt_slice(1,3):bit_tgt_slice(2,3),:) &
+                                                        = IOR(bitcell_int8_array( &
+                                                        bit_tgt_slice(1,1):bit_tgt_slice(2,1), &
+                                                        bit_tgt_slice(1,2):bit_tgt_slice(2,2), &
+                                                        bit_tgt_slice(1,3):bit_tgt_slice(2,3),:), &
+                                                        bitcell_int8_array( &
+                                                        bit_src_slice(1,1):bit_src_slice(2,1), &
+                                                        bit_src_slice(1,2):bit_src_slice(2,2), &
+                                                        bit_src_slice(1,3):bit_src_slice(2,3),:))
+                                                !$OMP END WORKSHARE NOWAIT
+                                        END IF
+                                        DO i_dim = 1, 3
+                                                !$OMP WORKSHARE
+                                                this_cell_rsp(:,i_dim, &
+                                                        tgt_slice(1,1):tgt_slice(2,1), &
+                                                        tgt_slice(1,2):tgt_slice(2,2), &
+                                                        tgt_slice(1,3):tgt_slice(2,3), &
+                                                        ibox) = &
+                                                        this_cell_rsp(:,i_dim, &
+                                                        src_slice(1,1):src_slice(2,1), &
+                                                        src_slice(1,2):src_slice(2,2), &
+                                                        src_slice(1,3):src_slice(2,3), &
+                                                        ibox) + unwrap_shifter(i_dim)
+                                                !$OMP END WORKSHARE
+                                        END DO
+                                        !$OMP WORKSHARE
+                                        this_cell_rsp(:,4, &
+                                                tgt_slice(1,1):tgt_slice(2,1), &
+                                                tgt_slice(1,2):tgt_slice(2,2), &
+                                                tgt_slice(1,3):tgt_slice(2,3), &
+                                                ibox) = &
+                                                this_cell_rsp(:,4, &
+                                                src_slice(1,1):src_slice(2,1), &
+                                                src_slice(1,2):src_slice(2,2), &
+                                                src_slice(1,3):src_slice(2,3), &
+                                                ibox) ! charge
+                                        n_cell_atoms(&
+                                                tgt_slice(1,1):tgt_slice(2,1), &
+                                                tgt_slice(1,2):tgt_slice(2,2), &
+                                                tgt_slice(1,3):tgt_slice(2,3), &
+                                                ibox) = &
+                                                n_cell_atoms(&
+                                                src_slice(1,1):src_slice(2,1), &
+                                                src_slice(1,2):src_slice(2,2), &
+                                                src_slice(1,3):src_slice(2,3), &
+                                                ibox)
+                                        !$OMP END WORKSHARE
+                                        IF (need_atom_ti) THEN
+                                                !$OMP WORKSHARE
+                                                this_cell_ti(:, &
+                                                        tgt_slice(1,1):tgt_slice(2,1), &
+                                                        tgt_slice(1,2):tgt_slice(2,2), &
+                                                        tgt_slice(1,3):tgt_slice(2,3), &
+                                                        ibox) = &
+                                                        this_cell_ti(:, &
+                                                        src_slice(1,1):src_slice(2,1), &
+                                                        src_slice(1,2):src_slice(2,2), &
+                                                        src_slice(1,3):src_slice(2,3), &
+                                                        ibox)
+                                                !$OMP END WORKSHARE
+                                        END IF
+                                        IF (need_atomtypes) THEN
+                                                !$OMP WORKSHARE
+                                                this_cell_atomtypes(:, &
+                                                        tgt_slice(1,1):tgt_slice(2,1), &
+                                                        tgt_slice(1,2):tgt_slice(2,2), &
+                                                        tgt_slice(1,3):tgt_slice(2,3), &
+                                                        ibox) = &
+                                                        this_cell_atomtypes(:, &
+                                                        src_slice(1,1):src_slice(2,1), &
+                                                        src_slice(1,2):src_slice(2,2), &
+                                                        src_slice(1,3):src_slice(2,3), &
+                                                        ibox)
+                                                !$OMP END WORKSHARE
+                                        END IF
+                                END DO
+                        END DO
+                END DO
+                IF (bitcell_flag) THEN
+                        bcd(2) = &
+                                ISHFT(lbp32(1),-5)
+                        bcd(3) = &
+                                bcd(2) * lbp32(2)
+                        vlen = bcd(3) * lbp32(3)
+                        !$OMP SINGLE
+                        box_list(ibox)%bitcell_dimfactor = bcd
+                        IF (ALLOCATED(box_list(ibox)%bitcell_int32_vec)) THEN
+                                IF (vlen > SIZE(box_list(ibox)%bitcell_int32_vec)) THEN
+                                        DEALLOCATE(box_list(ibox)%bitcell_int32_vec, Stat=Allocatestatus)
+                                        IF (Allocatestatus /= 0) THEN
+                                          err_msg = ''
+                                          err_msg(1) = 'Memory could not be deallocated from bitcell_int32_vec'
+                                          CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                        END IF
+                                END IF
+                        END IF
+                        IF (.NOT. ALLOCATED(box_list(ibox)%bitcell_int32_vec)) THEN
+                                ALLOCATE(box_list(ibox)%bitcell_int32_vec(0:vlen-1), Stat=Allocatestatus)
+                                IF (Allocatestatus /= 0) THEN
+                                  err_msg = ''
+                                  err_msg(1) = 'Memory could not be allocated for bitcell_int32_vec'
+                                  CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                END IF
+                                WRITE(*,*) "Allocated bitcell_int32_vec with:"
+                                WRITE(*,*) "SIZE", SIZE(box_list(ibox)%bitcell_int32_vec)
+                                WRITE(*,*) "Occupying ", SIZEOF(box_list(ibox)%bitcell_int32_vec), " bytes"
+                                WRITE(*,*) "on step ", i_mcstep
+                        END IF
+                        ! bitcell_int8_array_2 shouldn't be necessary, but I sometimes get segfaults from 
+                        !    an array supposedly not being allocated if I don't use it as an 
+                        !    intermediary. Feel free to remove it and transfer straight from 
+                        !    bitcell_int8_array (with the same slicing) if you can do so without segfaults.
+                        ALLOCATE(bitcell_int8_array_2(ISHFT(lbp32(1),-3),lbp32(2),lbp32(3)), Stat=Allocatestatus)
+                        IF (Allocatestatus /= 0) THEN
+                          err_msg = ''
+                          err_msg(1) = 'Memory could not be allocated for bitcell_int8_array_2'
+                          CALL Clean_Abort(err_msg, 'Sector_Setup')
+                        END IF
+                        !$OMP END SINGLE
+                        !$OMP WORKSHARE
+                        bitcell_int8_array_2 = bitcell_int8_array( &
+                                4:3+ISHFT(lbp32(1),-3), &
+                                32:31+lbp32(2), &
+                                32:31+lbp32(3),0)
+                        box_list(ibox)%bitcell_int32_vec(0:vlen-1) = TRANSFER(bitcell_int8_array_2, &
+                                box_list(ibox)%bitcell_int32_vec)
+                        !$OMP END WORKSHARE
+                        IF (cavity_biasing_flag) THEN
+                                ! determine array dimensions and bounds
+                                int8shape = box_list(ibox)%length_bitcells
+                                xub = box_list(ibox)%length_bitcells(1)-1
+                                xub_int64 = INT(xub,INT64)
+                                int8shape(1) = ISHFT(int8shape(1),-3)
+                                int8ub = int8shape - 1
+                                lbp16 = box_list(ibox)%length_bitcells
+                                lbp16(1) = IAND(lbp16(1)+15,NOT(15))
+                                int16shape = lbp16
+                                int16shape(1) = ISHFT(int16shape(1),-4)
+                                int16ub = int16shape - 1
+                                int16shape_coarse = int16shape
+                                int16shape_coarse(2:3) = SHIFTR(int16shape_coarse(2:3),1)
+                                int16ub_coarse = int16shape_coarse - 1
+                                yi_chunkstride = MIN(MASKR(15)/lbp16(1),lbp16(2))
+                                yi2_chunkstride = MIN(MASKR(16)/lbp16(1),int16shape_coarse(2))
+                                zub = int8ub(3)
+                                !$OMP SINGLE
+                                ! can cavity voxel coordinates be represented with 32-bit integers?
+                                box_list(ibox)%l_cavloc_int32 = &
+                                        ALL(box_list(ibox)%length_bitcells <= SHIFTL(1, 10+(/1,0,0/)))
+                                big_atom_start = MERGE(1,0,read_atompair_rminsq .OR. calc_rmin_flag)
+                                IF (.NOT. ALLOCATED(cavdatalist)) ALLOCATE(cavdatalist(big_atom_start:n_big_atoms,nbr_boxes))
+                                DEALLOCATE(bitcell_int8_array_2)
+                                ALLOCATE(bitcell_int8_array_2(&
+                                        0:ISHFT(lbp16(1),-3)-1,&
+                                        0:lbp16(2)-1,&
+                                        0:lbp16(3)-1), Stat=Allocatestatus)
+                                IF (Allocatestatus /= 0) THEN
+                                  err_msg = ''
+                                  err_msg(1) = 'Memory could not be allocated for bitcell_int8_array_2'
+                                  CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                END IF
+                                ALLOCATE(bitcell_int16_array(&
+                                        0:int16ub(1),&
+                                        0:int16ub(2),&
+                                        0:int16ub(3)), Stat=Allocatestatus)
+                                IF (Allocatestatus /= 0) THEN
+                                  err_msg = ''
+                                  err_msg(1) = 'Memory could not be allocated for bitcell_int16_array'
+                                  CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                END IF
+                                ALLOCATE(zcavcount(&
+                                        0:lbp16(3)))
+                                IF (l_compress) THEN
+                                        ALLOCATE(coarse_voxel_array(&
+                                                0:int16ub_coarse(1),&
+                                                0:int16ub_coarse(2),&
+                                                0:int16ub_coarse(3)), Stat=Allocatestatus)
+                                        IF (Allocatestatus /= 0) THEN
+                                          err_msg = ''
+                                          err_msg(1) = 'Memory could not be allocated for coarse_voxel_array'
+                                          CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                        END IF
+                                        ALLOCATE(zcavcount_coarse(&
+                                                0:int16shape_coarse(3)))
+                                END IF
+                                !$OMP END SINGLE
+                                DO i_big_atom = big_atom_start, n_big_atoms
+                                        !$OMP WORKSHARE
+                                        bitcell_int8_array_2 = 0_INT8
+                                        bitcell_int8_array_2(:int8ub(1),:,:) = &
+                                                NOT(bitcell_int8_array( &
+                                                4:3+int8shape(1), &
+                                                32:31+int8shape(2), &
+                                                32:31+int8shape(3),i_big_atom))
+                                        bitcell_int16_array = &
+                                                RESHAPE(&
+                                                TRANSFER(bitcell_int8_array_2,bitcell_int16_array),&
+                                                SHAPE(bitcell_int16_array))
+                                        !zcavcount(1:) = SUM(INT(SUM(POPCNT(bitcell_int16_array),1),INT64),1)
+                                        !zcavcount(0) = 0_INT64
+                                        !$OMP END WORKSHARE
+                                        IF (l_compress) THEN
+                                                ! Facilitate lossless compression of cavity voxel coordinate array.
+                                                ! Identify 2x2x2 blocks of cavity voxel array that are entirely cavity space.
+                                                ! Clear the fine voxels that make up these blocks.
+                                                ! Set the coarse voxels representing these blocks.
+                                                ! These 2x2x2 blocks can only be recognized if they are aligned with coarse voxel
+                                                ! boundaries.
+                                                ! In the coarse voxel bitmap, only even (or zero) bit positions (0, 2, 4, 6, ...)
+                                                ! mean anything.
+                                                !$OMP DO SCHEDULE(STATIC)
+                                                DO zi2 = 0, int16ub_coarse(3)
+                                                        zi = SHIFTL(zi2,1)
+                                                        DO yi2 = 0, int16ub_coarse(2)
+                                                                yi = SHIFTL(yi2,1)
+                                                                !DIR$ LOOP COUNT MIN=1, MAX=1000, AVG=16
+                                                                DO i = 0, int16ub(1)
+                                                                        fine00 = bitcell_int16_array(i,yi,zi)
+                                                                        fine10 = bitcell_int16_array(i,yi+1,zi)
+                                                                        fine01 = bitcell_int16_array(i,yi,zi+1)
+                                                                        fine11 = bitcell_int16_array(i,yi+1,zi+1)
+                                                                        coarse = IAND(fine00,fine10)
+                                                                        coarse = IAND(coarse,fine01)
+                                                                        coarse = IAND(coarse,fine11)
+                                                                        coarse2 = SHIFTR(coarse,1)
+                                                                        coarse = IAND(coarse,coarse2)
+                                                                        coarse = IAND(coarse,mask_int16)
+                                                                        coarse2 = SHIFTL(coarse,1)
+                                                                        coarse2 = IOR(coarse,coarse2)
+                                                                        coarse2 = NOT(coarse2)
+                                                                        fine00 = IAND(fine00,coarse2)
+                                                                        fine10 = IAND(fine10,coarse2)
+                                                                        fine01 = IAND(fine01,coarse2)
+                                                                        fine11 = IAND(fine11,coarse2)
+                                                                        coarse_voxel_array(i,yi2,zi2) = coarse
+                                                                        bitcell_int16_array(i,yi,zi) = fine00
+                                                                        bitcell_int16_array(i,yi+1,zi) = fine10
+                                                                        bitcell_int16_array(i,yi,zi+1) = fine01
+                                                                        bitcell_int16_array(i,yi+1,zi+1) = fine11
+                                                                END DO
+                                                        END DO
+                                                        ! Count the coarse cavity voxels
+                                                        ncavs = 0_INT64
+                                                        DO yi_chunkstart = 0, int16ub_coarse(2), yi2_chunkstride
+                                                                yi_chunkend = MIN(yi_chunkstart + yi2_chunkstride - 1, int16ub_coarse(2))
+                                                                chunksize = (yi_chunkend + 1 - yi_chunkstart) * int16shape(1)
+                                                                bitcell_int16_vec(1:chunksize) = &
+                                                                        TRANSFER(coarse_voxel_array(:,yi_chunkstart:yi_chunkend,zi2),bitcell_int16_vec)
+                                                                chunksize_p16 = IAND(chunksize+padconst_2byte,padmask_2byte)
+                                                                IF (chunksize_p16 > chunksize) THEN
+                                                                        bitcell_int16_vec(chunksize+1:chunksize_p16) = 0_INT16
+                                                                END IF
+                                                                ncavs_int16 = 0_INT16
+                                                                !DIR$ ASSUME (MOD(chunksize_p16,dimpad_2byte) .EQ. 0)
+                                                                !DIR$ VECTOR ALIGNED
+                                                                !$OMP SIMD PRIVATE(cavbits_int16) REDUCTION(+:ncavs_int16)
+                                                                DO i = 1, chunksize_p16
+                                                                        cavbits_int16 = bitcell_int16_vec(i)
+                                                                        ncavs_int16 = ncavs_int16 + IAND(cavbits_int16,1_INT16)
+                                                                        DO j = 2, 14, 2
+                                                                                ncavs_int16 = ncavs_int16 + &
+                                                                                        IAND(SHIFTR(cavbits_int16,j),1_INT16)
+                                                                        END DO
+                                                                END DO
+                                                                !$OMP END SIMD
+                                                                ncavs = ncavs + INT(ncavs_int16,INT64)
+                                                        END DO
+                                                        zcavcount_coarse(zi2+1) = ncavs
+                                                END DO
+                                                !$OMP END DO
+                                        END IF
+                                        ! Count the fine cavity voxels
+                                        !$OMP DO SCHEDULE(STATIC)
+                                        DO zi = 0, zub
+                                                ncavs = 0_INT64
+                                                DO yi_chunkstart = 0, int16ub(2), yi_chunkstride
+                                                        yi_chunkend = MIN(yi_chunkstart + yi_chunkstride - 1, int16ub(2))
+                                                        chunksize = (yi_chunkend + 1 - yi_chunkstart) * int16shape(1)
+                                                        ! Chunks are small enough the int16 reduction can't overflow
+                                                        bitcell_int16_vec(1:chunksize) = &
+                                                                TRANSFER(bitcell_int16_array(:,yi_chunkstart:yi_chunkend,zi),bitcell_int16_vec)
+                                                        chunksize_p16 = IAND(chunksize+padconst_2byte,padmask_2byte)
+                                                        IF (chunksize_p16 > chunksize) THEN
+                                                                bitcell_int16_vec(chunksize+1:chunksize_p16) = 0_INT16
+                                                        END IF
+                                                        ncavs_int16 = 0_INT16
+                                                        !DIR$ ASSUME (MOD(chunksize_p16,dimpad_2byte) .EQ. 0)
+                                                        !DIR$ VECTOR ALIGNED
+                                                        !$OMP SIMD PRIVATE(cavbits_int16) REDUCTION(+:ncavs_int16)
+                                                        DO i = 1, chunksize_p16
+                                                                cavbits_int16 = bitcell_int16_vec(i)
+                                                                ncavs_int16 = ncavs_int16 + IAND(cavbits_int16,1_INT16)
+                                                                DO j = 1, 15
+                                                                        ncavs_int16 = ncavs_int16 + &
+                                                                                IAND(SHIFTR(cavbits_int16,j),1_INT16)
+                                                                END DO
+                                                        END DO
+                                                        !$OMP END SIMD
+                                                        ncavs = ncavs + INT(ncavs_int16,INT64)
+                                                END DO
+                                                zcavcount(zi+1) = ncavs
+                                        END DO
+                                        !$OMP END DO
+                                        !$OMP SINGLE
+                                        zcavcount(0) = 0_INT64
+                                        ncavs = zcavcount(1)
+                                        DO zi = 2, lbp16(3)
+                                                ncavs = ncavs + zcavcount(zi)
+                                                zcavcount(zi) = ncavs
+                                        END DO
+                                        ncavs_combined = ncavs
+                                        IF (l_compress) THEN
+                                                ncavs_fine = ncavs
+                                                zcavcount_coarse(0) = 0_INT64
+                                                ncavs_coarse = zcavcount_coarse(1)
+                                                DO zi2 = 2, int16shape_coarse(3)
+                                                        ncavs_coarse = ncavs_coarse + zcavcount_coarse(zi2)
+                                                        zcavcount_coarse(zi2) = ncavs_coarse
+                                                END DO
+                                                ! zcavcount_coarse must store starting index for coarse cavity locations in each z-slice
+                                                zcavcount_coarse = zcavcount_coarse + ncavs_fine
+                                                ! ncavs is number of cavity-positive voxels in original cavity bitmap
+                                                ! (before compression)
+                                                ncavs = ncavs + SHIFTL(ncavs_coarse,3)
+                                                ncavs_combined = ncavs_combined + ncavs_coarse
+                                                cavdatalist(i_big_atom,ibox)%ncavs_coarse = ncavs_coarse
+                                                cavdatalist(i_big_atom,ibox)%ncavs_fine = ncavs_fine
+                                                cavdatalist(i_big_atom,ibox)%ncavs_combined = ncavs_combined
+                                        END IF
+                                        IF (ALLOCATED(cavdatalist(i_big_atom,ibox)%cavity_locs)) THEN
+                                                IF (box_list(ibox)%l_cavloc_int32 .OR. &
+                                                        ncavs_combined>SIZE(cavdatalist(i_big_atom,ibox)%cavity_locs,1,INT64)) THEN
+                                                        DEALLOCATE(cavdatalist(i_big_atom,ibox)%cavity_locs)
+                                                END IF
+                                        END IF
+                                        IF (ALLOCATED(cavdatalist(i_big_atom,ibox)%cavity_locs_int32)) THEN
+                                                IF (.NOT. box_list(ibox)%l_cavloc_int32 .OR. &
+                                                        ncavs_combined>SIZE(cavdatalist(i_big_atom,ibox)%cavity_locs_int32,1,INT64)) THEN
+                                                        DEALLOCATE(cavdatalist(i_big_atom,ibox)%cavity_locs_int32)
+                                                END IF
+                                        END IF
+                                        IF (box_list(ibox)%l_cavloc_int32) THEN
+                                                IF (.NOT. ALLOCATED(cavdatalist(i_big_atom,ibox)%cavity_locs_int32)) THEN
+                                                        ALLOCATE(cavdatalist(i_big_atom,ibox)%cavity_locs_int32(0:ncavs_combined-1))
+                                                END IF
                                         ELSE
-                                                n_occ_sectors = n_occ_sectors+1
-                                                secind = n_occ_sectors
-                                                sector_has_atoms(ci(1),ci(2),ci(3),ibox) = .TRUE.
-                                                sector_n_atoms(secind) = 1
-                                                sector_index_map(ci(1),ci(2),ci(3),ibox) = secind
+                                                IF (.NOT. ALLOCATED(cavdatalist(i_big_atom,ibox)%cavity_locs)) THEN
+                                                        ALLOCATE(cavdatalist(i_big_atom,ibox)%cavity_locs(0:ncavs_combined-1))
+                                                END IF
                                         END IF
-                                        IF (sector_n_atoms(secind) > max_sector_natoms) asflag = .FALSE.
-                                        IF (asflag) sector_atoms(sector_n_atoms(secind),secind,:) = sector_atom_ID
-                                END DO AtomLoop2
-                        END DO MoleculeLoop2
-                END DO SpeciesLoop
-          END DO BoxLoop
-          IF (asflag) THEN
-                EXIT AllocationSizeLoop
+                                        cavdatalist(i_big_atom,ibox)%ncavs = ncavs
+                                        cavdatalist(i_big_atom,ibox)%ncavs_dp = REAL(ncavs,DP)
+                                        cavdatalist(i_big_atom,ibox)%ln_cavfrac = LOG(REAL(ncavs,DP)/&
+                                                PRODUCT(REAL(box_list(ibox)%length_bitcells,DP)))
+                                        !$OMP END SINGLE
+                                        IF (box_list(ibox)%l_cavloc_int32) THEN
+                                                IF (l_compress) THEN
+                                                        !$OMP DO SCHEDULE(STATIC)
+                                                        DO zi2 = 0, int16ub_coarse(3)
+                                                                zi = SHIFTL(zi2,1)
+                                                                icav = zcavcount_coarse(zi2)
+                                                                locbase_int32 = ISHFT(zi,21)
+                                                                DO yi2 = 0, int16ub_coarse(2)
+                                                                        DO xi = 0, int16ub_coarse(1)
+                                                                                cavbits_int16 = coarse_voxel_array(xi,yi2,zi2)
+                                                                                IF (cavbits_int16 .EQ. 0_INT16) CYCLE
+                                                                                loc_int32 = IOR(locbase_int32,SHIFTL(xi,4))
+                                                                                IF (IAND(cavbits_int16,1_INT16) .NE. 0_INT16) THEN
+                                                                                        cavdatalist(i_big_atom,ibox)%cavity_locs_int32(icav)=loc_int32
+                                                                                        icav = icav + 1_INT64
+                                                                                END IF
+                                                                                DO j = 1, 7
+                                                                                        cavbits_int16 = SHIFTR(cavbits_int16,2)
+                                                                                        loc_int32 = loc_int32 + 2
+                                                                                        IF (IAND(cavbits_int16,1_INT16) .EQ. 0_INT16) CYCLE
+                                                                                        cavdatalist(i_big_atom,ibox)%cavity_locs_int32(icav)=loc_int32
+                                                                                        icav = icav + 1_INT64
+                                                                                END DO
+                                                                        END DO
+                                                                        locbase_int32 = locbase_int32 + ISHFT(2,11)
+                                                                END DO
+                                                        END DO
+                                                        !$OMP END DO NOWAIT
+                                                END IF
+                                                !$OMP DO SCHEDULE(STATIC)
+                                                DO zi = 0, zub
+                                                        icav = zcavcount(zi)
+                                                        locbase_int32 = ISHFT(zi,21)
+                                                        DO yi = 0, int16ub(2)
+                                                                DO xi = 0, int16ub(1)
+                                                                        cavbits_int16 = bitcell_int16_array(xi,yi,zi)
+                                                                        IF (cavbits_int16 .EQ. 0_INT16) CYCLE
+                                                                        loc_int32 = IOR(locbase_int32,SHIFTL(xi,4))
+                                                                        IF (IAND(cavbits_int16,1_INT16) .NE. 0_INT16) THEN
+                                                                                cavdatalist(i_big_atom,ibox)%cavity_locs_int32(icav)=loc_int32
+                                                                                icav = icav + 1_INT64
+                                                                        END IF
+                                                                        DO j = 1, 15
+                                                                                cavbits_int16 = SHIFTR(cavbits_int16,1)
+                                                                                loc_int32 = loc_int32 + 1
+                                                                                IF (IAND(cavbits_int16,1_INT16) .EQ. 0_INT16) CYCLE
+                                                                                cavdatalist(i_big_atom,ibox)%cavity_locs_int32(icav)=loc_int32
+                                                                                icav = icav + 1_INT64
+                                                                        END DO
+                                                                END DO
+                                                                locbase_int32 = locbase_int32 + ISHFT(1,11)
+                                                        END DO
+                                                END DO
+                                                !$OMP END DO
+                                        ELSE
+                                                IF (l_compress) THEN
+                                                        !$OMP DO SCHEDULE(STATIC)
+                                                        DO zi2 = 0, int16ub_coarse(3)
+                                                                zi = SHIFTL(zi2,1)
+                                                                icav = zcavcount_coarse(zi2)
+                                                                locbase = ISHFT(INT(zi,INT64),42)
+                                                                DO yi2 = 0, int16ub_coarse(2)
+                                                                        DO xi = 0, int16ub_coarse(1)
+                                                                                cavbits_int16 = coarse_voxel_array(xi,yi2,zi2)
+                                                                                IF (cavbits_int16 .EQ. 0_INT16) CYCLE
+                                                                                loc = IOR(locbase,SHIFTL(INT(xi,INT64),4))
+                                                                                IF (IAND(cavbits_int16,1_INT16) .NE. 0_INT16) THEN
+                                                                                        cavdatalist(i_big_atom,ibox)%cavity_locs(icav)=loc
+                                                                                        icav = icav + 1_INT64
+                                                                                END IF
+                                                                                DO j = 1, 7
+                                                                                        cavbits_int16 = SHIFTR(cavbits_int16,2)
+                                                                                        loc = loc + 2_INT64
+                                                                                        IF (IAND(cavbits_int16,1_INT16) .EQ. 0_INT16) CYCLE
+                                                                                        cavdatalist(i_big_atom,ibox)%cavity_locs(icav)=loc
+                                                                                        icav = icav + 1_INT64
+                                                                                END DO
+                                                                        END DO
+                                                                        locbase = locbase + ISHFT(2,21)
+                                                                END DO
+                                                        END DO
+                                                        !$OMP END DO NOWAIT
+                                                END IF
+                                                !$OMP DO SCHEDULE(STATIC)
+                                                DO zi = 0, zub
+                                                        icav = zcavcount(zi)
+                                                        locbase = ISHFT(INT(zi,INT64),42)
+                                                        DO yi = 0, int16ub(2)
+                                                                DO xi = 0, int16ub(1)
+                                                                        cavbits_int16 = bitcell_int16_array(xi,yi,zi)
+                                                                        IF (cavbits_int16 .EQ. 0_INT16) CYCLE
+                                                                        loc = IOR(locbase,SHIFTL(INT(xi,INT64),4))
+                                                                        IF (IAND(cavbits_int16,1_INT16) .NE. 0_INT16) THEN
+                                                                                cavdatalist(i_big_atom,ibox)%cavity_locs(icav)=loc
+                                                                                icav = icav + 1_INT64
+                                                                        END IF
+                                                                        DO j = 1, 15
+                                                                                cavbits_int16 = SHIFTR(cavbits_int16,1)
+                                                                                loc = loc + 1
+                                                                                IF (IAND(cavbits_int16,1_INT16) .EQ. 0_INT16) CYCLE
+                                                                                cavdatalist(i_big_atom,ibox)%cavity_locs(icav)=loc
+                                                                                icav = icav + 1_INT64
+                                                                        END DO
+                                                                END DO
+                                                                !!DIR$ VECTOR ALIGNED
+                                                                !!$OMP SIMD PRIVATE(oneshift_int16)
+                                                                !DO i = 0, 15
+                                                                !        oneshift_int16 = oneshift_int16_vec(i) !ISHFT(1_INT16,i)
+                                                                !        DO j = 0, int16ub(1)
+                                                                !                ncavlvec(j*16+i) = &
+                                                                !                        IAND(oneshift_int16,&
+                                                                !                        bitcell_int16_array(j,yi,zi)) .EQ. 0_INT16
+                                                                !        END DO
+                                                                !END DO
+                                                                !!$OMP END SIMD
+                                                                !DO xi_int64 = 0_INT64, xub_int64
+                                                                !        IF (ncavlvec(xi_int64)) CYCLE
+                                                                !        cavdatalist(i_big_atom,ibox)%cavity_locs(icav) = &
+                                                                !                IOR(locbase,xi_int64)
+                                                                !        icav = icav + 1_INT64
+                                                                !END DO
+                                                                locbase = locbase + ISHFT(1_INT64,21)
+                                                        END DO
+                                                END DO
+                                                !$OMP END DO
+                                        END IF
+                                END DO
+                        END IF
+                        !$OMP SINGLE
+                        IF (l_firstframe) THEN
+                                DO isolvent = 1, solvents_or_types_maxind
+                                      DO i_big_atom = 0, n_big_atoms
+                                              OPEN(2777,file=TRIM(TRIM(run_name) // &
+                                                      '.bitcell_int64.' // &
+                                                      TRIM(Int_To_String(isolvent)) // '.' // &
+                                                      TRIM(Int_To_String(i_big_atom))))
+                                              DO zi = -28, 28
+                                                      WRITE(2777,*) bitcell_int64(-28:28,zi,isolvent,i_big_atom,1)
+                                              END DO
+                                              CLOSE(2777)
+                                      END DO
+                                END DO
+                                IF (cavity_biasing_flag) THEN
+                                        DO i_big_atom = big_atom_start, n_big_atoms
+                                                OPEN(2777,file=TRIM(TRIM(run_name) //&
+                                                        '.cavity_locs.' // TRIM(Int_To_String(i_big_atom))))
+                                                IF (box_list(ibox)%l_cavloc_int32) THEN
+                                                        DO icav = 0, cavdatalist(i_big_atom,1)%ncavs_combined
+                                                                WRITE(2777,*) cavdatalist(i_big_atom,1)%cavity_locs_int32(icav)
+                                                        END DO
+                                                ELSE
+                                                        DO icav = 0, cavdatalist(i_big_atom,1)%ncavs_combined
+                                                                WRITE(2777,*) cavdatalist(i_big_atom,1)%cavity_locs(icav)
+                                                        END DO
+                                                END IF
+                                                CLOSE(2777)
+                                        END DO
+                                END IF
+                                OPEN(2777,file=TRIM(TRIM(run_name) // '.bitcell_info'))
+                                WRITE(2777,*) "length_bitcells"
+                                WRITE(2777,*) box_list(1)%length_bitcells
+                                WRITE(2777,*) "bitcell_face_distance"
+                                WRITE(2777,*) box_list(1)%bitcell_face_distance
+                                WRITE(2777,*) "setbit_extent"
+                                WRITE(2777,*) box_list(1)%setbit_extent
+                                WRITE(2777,*) "bitcell_xyzortho_bbox_length"
+                                WRITE(2777,*) bitcell_xyzortho_bbox_length
+                                WRITE(2777,*) "bitcell_H"
+                                WRITE(2777,*) bitcell_H
+                                WRITE(2777,*) "lbp32"
+                                WRITE(2777,*) lbp32
+                                WRITE(2777,*) "bcd"
+                                WRITE(2777,*) bcd
+                                WRITE(2777,*) "n_big_atoms"
+                                WRITE(2777,*) n_big_atoms
+                                WRITE(2777,*) "sbe_ti_mat"
+                                WRITE(2777,*) sbe_ti_mat
+                                WRITE(2777,*) "cavfrac"
+                                WRITE(2777,*) EXP(cavdatalist(:,1)%ln_cavfrac)
+                                WRITE(2777,*) "ncavs"
+                                WRITE(2777,*) cavdatalist(:,1)%ncavs
+                                WRITE(2777,*) "ncavs_fine"
+                                WRITE(2777,*) cavdatalist(:,1)%ncavs_fine
+                                WRITE(2777,*) "ncavs_coarse"
+                                WRITE(2777,*) cavdatalist(:,1)%ncavs_coarse
+                                CLOSE(2777)
+                                OPEN(2777,file=TRIM(TRIM(run_name) // '.bitcell_int32_vec'))
+                                DO i = 0, vlen-1
+                                        WRITE(2777,*) box_list(ibox)%bitcell_int32_vec(i)
+                                END DO
+                                CLOSE(2777)
+                        END IF
+                        DEALLOCATE(bitcell_int8_array, Stat=Allocatestatus)
+                        IF (Allocatestatus /= 0) THEN
+                          err_msg = ''
+                          err_msg(1) = 'Memory could not be deallocated from bitcell_int8_array'
+                          CALL Clean_Abort(err_msg, 'Sector_Setup')
+                        END IF
+                        DEALLOCATE(bitcell_int8_array_2, Stat=Allocatestatus)
+                        IF (Allocatestatus /= 0) THEN
+                          err_msg = ''
+                          err_msg(1) = 'Memory could not be deallocated from bitcell_int8_array_2'
+                          CALL Clean_Abort(err_msg, 'Sector_Setup')
+                        END IF
+                        !$OMP END SINGLE
+                END IF
+          END DO
+          IF (cbmc_cell_list_flag) THEN
+                  DO ibox = 1, nbr_boxes
+                          bt = box_list(ibox)%border_thickness
+                          zub = box_list(ibox)%sectorbound(3)
+                          yub = box_list(ibox)%sectorbound(2)
+                          xub = box_list(ibox)%sectorbound(1)
+                          zlb = -zub
+                          ylb = -yub
+                          xlb = -xub
+                          !$OMP DO COLLAPSE(3) REDUCTION(MAX:max_neighbors)
+                          DO zi = zlb, zub
+                                  DO yi = ylb, yub
+                                          DO xi = xlb, xub
+                                                  max_neighbors = MAX(SUM(IAND(n_cell_atoms( &
+                                                          xi-bt(1):xi+bt(1), &
+                                                          yi-bt(2):yi+bt(2), &
+                                                          zi-bt(3):zi+bt(3), &
+                                                          ibox), &
+                                                          box_list(ibox)%cbmc_cell_mask)), &
+                                                          max_neighbors)
+                                          END DO
+                                  END DO
+                          END DO
+                          !$OMP END DO
+                  END DO
+                  !$OMP SINGLE
+                  IF (ALLOCATED(cbmc_cell_n_interact)) THEN
+                          IF (ANY(adj_cellmaxbound > adj_cellmaxbound_old)) THEN
+                                  DEALLOCATE(cbmc_cell_n_interact, Stat=AllocateStatus)
+                                  IF (Allocatestatus /= 0) THEN
+                                    err_msg = ''
+                                    err_msg(1) = 'Memory could not be deallocated from cbmc_cell_n_interact'
+                                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                  END IF
+                          END IF
+                  END IF
+                  IF (.NOT. ALLOCATED(cbmc_cell_n_interact)) THEN
+                          ALLOCATE(cbmc_cell_n_interact( &
+                                  -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                                  -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                                  -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                                  nbr_boxes), Stat=AllocateStatus)
+                          IF (Allocatestatus /= 0) THEN
+                            err_msg = ''
+                            err_msg(1) = 'Memory could not be allocated for cbmc_cell_n_interact'
+                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                          END IF
+                          !WRITE(*,*) "Allocated cbmc_cell_n_interact with:"
+                          !WRITE(*,*) "SHAPE", SHAPE(cbmc_cell_n_interact)
+                          !WRITE(*,*) "Occupying ", SIZEOF(cbmc_cell_n_interact), " bytes"
+                          !WRITE(*,*) "on step ", i_mcstep
+                  END IF
+                  ALLOCATE(cell_l_inrange(max_neighbors,2, &
+                          -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                          -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                          -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                          nbr_boxes), Stat=AllocateStatus)
+                  cbmc_max_interact_old = cbmc_max_interact
+                  cbmc_max_interact = 0
+                  !$OMP END SINGLE
           ELSE
-                max_sector_natoms = MAXVAL(sector_n_atoms)
-                DEALLOCATE(sector_atoms)
-                ALLOCATE(sector_atoms(max_sector_natoms,max_occ_sectors,3))
-                asflag = .TRUE.
-                sector_n_atoms = 0
+                  DO ibox = 1, nbr_boxes
+                          zub = box_list(ibox)%sectorbound(3)
+                          yub = box_list(ibox)%sectorbound(2)
+                          xub = box_list(ibox)%sectorbound(1)
+                          zlb = -zub
+                          ylb = -yub
+                          xlb = -xub
+                          !$OMP DO COLLAPSE(3) REDUCTION(MAX:max_neighbors)
+                          DO zi = zlb, zub
+                                  DO yi = ylb, yub
+                                          DO xi = xlb, xub
+                                                  max_neighbors = MAX(SUM(n_cell_atoms( &
+                                                          xi-1:xi+1, &
+                                                          yi-1:yi+1, &
+                                                          zi-1:zi+1, &
+                                                          ibox)), &
+                                                          max_neighbors)
+                                          END DO
+                                  END DO
+                          END DO
+                          !$OMP END DO
+                  END DO
+                  !$OMP SINGLE
+                  ALLOCATE(cell_l_inrange(max_neighbors,1, &
+                          -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                          -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                          -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                          nbr_boxes))
+                  !$OMP END SINGLE
           END IF
-          END DO AllocationSizeLoop
-  END SUBROUTINE Sector_Setup
+          !$OMP WORKSHARE
+          cell_l_inrange = .FALSE.
+          !$OMP END WORKSHARE
 
-  LOGICAL FUNCTION check_overlap(ia, im, is)
-          !
-          INTEGER, INTENT(IN) :: ia, im, is
-          INTEGER :: this_species, this_molecule, this_atom, this_box
-          INTEGER :: this_locate, i_dim, secind
-          INTEGER :: xi, yi, zi, i, ia_cell
-          INTEGER, DIMENSION(:), POINTER :: sector_atom_ID
-          TYPE(Atom_Class), POINTER :: atom_ptr
-          INTEGER :: cell_coords(3)
-          INTEGER, DIMENSION(3,3) :: ci
-          REAL(DP) :: cp(3), dx, dy, dz, dxp, dyp, dzp, rminsq
-          LOGICAL :: need_wrapping(3)
-          INTEGER, PARAMETER, DIMENSION(3) :: delta = (/0,-1,1/)
-          REAL(DP), DIMENSION(:), POINTER :: rminsq_ptr
-          !
-          check_overlap = .TRUE.
-          IF (widom_active) THEN
-                  cp(1) = widom_atoms(ia)%rxp
-                  cp(2) = widom_atoms(ia)%ryp
-                  cp(3) = widom_atoms(ia)%rzp
-                  this_box = widom_molecule%which_box
+
+
+          DO ibox = 1, nbr_boxes
+                  l_ortho = box_list(ibox)%int_box_shape <= int_ortho
+                  rcutsq = REAL(rcut_cbmcsq(ibox),SP)
+                  hl = REAL(box_list(ibox)%cell_xyzortho_bbox_length*0.5_DP,SP)
+                  bt = box_list(ibox)%border_thickness
+                  IF (.NOT. l_ortho) THEN
+                          h11 = REAL(box_list(ibox)%length(1,1),SP)
+                          h12 = REAL(box_list(ibox)%length(1,2),SP)
+                          h22 = REAL(box_list(ibox)%length(2,2),SP)
+                          h13 = REAL(box_list(ibox)%length(1,3),SP)
+                          h23 = REAL(box_list(ibox)%length(2,3),SP)
+                          h33 = REAL(box_list(ibox)%length(3,3),SP)
+                          cell_H = box_list(ibox)%cell_H_sp
+                          zub = bt(3)+box_list(ibox)%sectorbound(3)
+                          yub = bt(2)+box_list(ibox)%sectorbound(2)
+                          xub = bt(1)+box_list(ibox)%sectorbound(1)
+                          zlb = -zub
+                          ylb = -yub
+                          xlb = -xub
+                          !$OMP DO COLLAPSE(3) SCHEDULE(STATIC)
+                          DO zi = zlb, zub
+                                  DO yi = ylb, yub
+                                          DO xi = xlb, xub
+                                                  DO i = 1, n_cell_atoms(xi,yi,zi,ibox)
+                                                          isp = this_cell_rsp(i,1,xi,yi,zi,ibox)
+                                                          rxp = h11*isp
+                                                          isp = this_cell_rsp(i,2,xi,yi,zi,ibox)
+                                                          rxp = rxp + h12*isp
+                                                          ryp = h22*isp
+                                                          isp = this_cell_rsp(i,3,xi,yi,zi,ibox)
+                                                          rxp = rxp + h13*isp
+                                                          ryp = ryp + h23*isp
+                                                          rzp = h33*isp
+                                                          this_cell_rsp(i,1,xi,yi,zi,ibox) = rxp
+                                                          this_cell_rsp(i,2,xi,yi,zi,ibox) = ryp
+                                                          this_cell_rsp(i,3,xi,yi,zi,ibox) = rzp
+                                                  END DO
+                                          END DO
+                                  END DO
+                          END DO
+                          !$OMP END DO
+                  END IF
+                  zub = box_list(ibox)%sectorbound(3)
+                  yub = box_list(ibox)%sectorbound(2)
+                  xub = box_list(ibox)%sectorbound(1)
+                  zlb = -zub
+                  ylb = -yub
+                  xlb = -xub
+
+                  !$OMP DO COLLAPSE(3) SCHEDULE(STATIC) REDUCTION(MAX:cbmc_max_interact,max_adj_cell_atoms)
+                  DO zi = zlb, zub
+                  DO yi = ylb, yub
+                  DO xi = xlb, xub
+                        istart = 1
+                        iend = 0
+                        DO dzi = -1, 1
+                                zi2 = zi+dzi
+                                DO dyi = -1, 1
+                                        yi2 = yi + dyi
+                                        DO dxi = -1, 1
+                                                xi2 = xi + dxi
+                                                vlen = n_cell_atoms(xi2,yi2,zi2,ibox)
+                                                IF (vlen < 1) CYCLE
+                                                iend = istart + vlen - 1
+                                                cbmc_cell_rsp_priv(istart:iend,1:3) = &
+                                                        this_cell_rsp(1:vlen,1:3,xi2,yi2,zi2,ibox)
+                                                IF (read_atompair_rminsq) THEN
+                                                        ti_priv(istart:iend) = &
+                                                                this_cell_ti(1:vlen,xi2,yi2,zi2,ibox)
+                                                ELSE IF (calc_rmin_flag) THEN
+                                                        ti_priv(istart:iend) = &
+                                                                this_cell_atomtypes(1:vlen,xi2,yi2,zi2,ibox)
+                                                END IF
+                                                istart = istart + vlen
+                                        END DO
+                                END DO
+                        END DO
+                        adj_iend = iend
+                        DO dzi = -bt(3), bt(3)
+                                IF (ABS(dzi)<2) CYCLE
+                                zi2 = zi+dzi
+                                DO dyi = -bt(2), bt(2)
+                                        IF (ABS(dyi)<2) CYCLE
+                                        yi2 = yi + dyi
+                                        DO dxi = -bt(1), bt(1)
+                                                IF (ABS(dxi)<2) CYCLE
+                                                xi2 = xi + dxi
+                                                vlen = IAND(n_cell_atoms(xi2,yi2,zi2,ibox), &
+                                                        box_list(ibox)%cbmc_cell_mask(dxi,dyi,dzi))
+                                                IF (vlen < 1) CYCLE
+                                                iend = istart + vlen - 1
+                                                cbmc_cell_rsp_priv(istart:iend,1:3) = &
+                                                        this_cell_rsp(1:vlen,1:3,xi2,yi2,zi2,ibox)
+                                                istart = istart + vlen
+                                        END DO
+                                END DO
+                        END DO
+                        xyzi_sp = REAL((/ xi, yi, zi /),SP)
+                        IF (l_ortho) THEN
+                                cell_rp = xyzi_sp*box_list(ibox)%cell_H_diag
+                        ELSE
+                                cell_rp = MATMUL(cell_H,xyzi_sp)
+                        END IF
+                        !DIR$ VECTOR ALIGNED
+                        DO i = 1, iend
+                                drxp = cbmc_cell_rsp_priv(i,1)-cell_rp(1)
+                                drxp = MAX(ABS(drxp)-hl(1),0.0)
+                                dryp = cbmc_cell_rsp_priv(i,2)-cell_rp(2)
+                                dryp = MAX(ABS(dryp)-hl(2),0.0)
+                                drzp = cbmc_cell_rsp_priv(i,3)-cell_rp(3)
+                                drzp = MAX(ABS(drzp)-hl(3),0.0)
+                                rsq_vec(i) = drxp*drxp + dryp*dryp + drzp*drzp
+                        END DO
+                        IF (read_atompair_rminsq) THEN
+                                cell_l_inrange(1:adj_iend,1,xi,yi,zi,ibox) = &
+                                        rsq_vec(1:adj_iend) < solvent_max_rminsq_sp(ti_priv(1:adj_iend),ibox)
+                        ELSE IF (calc_rmin_flag) THEN
+                                cell_l_inrange(1:adj_iend,1,xi,yi,zi,ibox) = &
+                                        rsq_vec(1:adj_iend) < atomtype_max_rminsq_sp(ti_priv(1:adj_iend))
+                        ELSE
+                                cell_l_inrange(1:adj_iend,1,xi,yi,zi,ibox) = &
+                                        rsq_vec(1:adj_iend) < sp_rcut_lowsq
+                        END IF
+                        IF (cbmc_cell_list_flag) THEN
+                                cell_l_inrange(1:iend,2,xi,yi,zi,ibox) = rsq_vec(1:iend) < rcutsq
+                                cbmc_max_interact = MAX(cbmc_max_interact,COUNT(cell_l_inrange(1:iend,2,xi,yi,zi,ibox)))
+                        END IF
+                        max_adj_cell_atoms = MAX(max_adj_cell_atoms,COUNT(cell_l_inrange(1:adj_iend,1,xi,yi,zi,ibox)))
+                  END DO
+                  END DO
+                  END DO
+                  !$OMP END DO
+          END DO
+          IF (cbmc_cell_list_flag) THEN
+                  !$OMP WORKSHARE
+                  cell_l_inrange(:,2,:,:,:,:) = cell_l_inrange(:,2,:,:,:,:) .AND. .NOT. cell_l_inrange(:,1,:,:,:,:)
+                  !$OMP END WORKSHARE
+          END IF
+          !$OMP SINGLE
+          IF (cbmc_cell_list_flag) THEN
+                  cbmc_max_interact = MAX(cbmc_max_interact,cbmc_max_interact_old)
+                  ! ensure 32-byte padding of first dimension
+                  cbmc_max_interact = IAND(cbmc_max_interact+padconst_4byte,padmask_4byte)
+                  IF (cbmc_max_interact > cbmc_max_interact_old .OR. ANY(adj_cellmaxbound > adj_cellmaxbound_old)) THEN
+                          IF (ALLOCATED(cbmc_cell_rsp)) THEN
+                                  !WRITE(*,*) "cbmc_cell_rsp shape was previously: "
+                                  !WRITE(*,*) SHAPE(cbmc_cell_rsp)
+                                  !WRITE(*,*) cbmc_max_interact, ">", cbmc_max_interact_old
+                                  !WRITE(*,*) adj_cellmaxbound
+                                  !WRITE(*,*) adj_cellmaxbound_old
+                                  DEALLOCATE(cbmc_cell_rsp, Stat=AllocateStatus)
+                          END IF
+                          IF (Allocatestatus /= 0) THEN
+                            err_msg = ''
+                            err_msg(1) = 'Memory could not be deallocated from cbmc_cell_rsp'
+                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                          END IF
+                          IF (ALLOCATED(cbmc_cell_atomtypes)) DEALLOCATE(cbmc_cell_atomtypes, Stat=AllocateStatus)
+                          IF (Allocatestatus /= 0) THEN
+                            err_msg = ''
+                            err_msg(1) = 'Memory could not be deallocated from cbmc_cell_atomtypes'
+                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                          END IF
+                          IF (ALLOCATED(cbmc_cell_ti)) DEALLOCATE(cbmc_cell_ti, Stat=AllocateStatus)
+                          IF (Allocatestatus /= 0) THEN
+                            err_msg = ''
+                            err_msg(1) = 'Memory could not be deallocated from cbmc_cell_ti'
+                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                          END IF
+                  END IF
+                  IF (ALLOCATED(cbmc_cell_rsp)) THEN
+                          IF (cbmc_max_interact > cbmc_max_interact_old .OR. ANY(adj_cellmaxbound > adj_cellmaxbound_old)) THEN
+                                  DEALLOCATE(cbmc_cell_rsp, Stat=AllocateStatus)
+                                  IF (Allocatestatus /= 0) THEN
+                                    err_msg = ''
+                                    err_msg(1) = 'Memory could not be deallocated from cbmc_cell_rsp'
+                                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                  END IF
+                          END IF
+                  END IF
+                  IF (.NOT. ALLOCATED(cbmc_cell_rsp)) THEN
+                          ALLOCATE(cbmc_cell_rsp(cbmc_max_interact,4, &
+                                  -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                                  -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                                  -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                                  nbr_boxes), Stat=AllocateStatus)
+                          IF (Allocatestatus /= 0) THEN
+                            err_msg = ''
+                            err_msg(1) = 'Memory could not be allocated for cbmc_cell_rsp'
+                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                          END IF
+                          !WRITE(*,*) "Allocated cbmc_cell_rsp with:"
+                          !WRITE(*,*) "SHAPE", SHAPE(cbmc_cell_rsp)
+                          !WRITE(*,*) "Occupying ", SIZEOF(cbmc_cell_rsp), " bytes"
+                          !WRITE(*,*) "on step ", i_mcstep
+                  END IF
+                  IF (need_atom_ti) THEN
+                          IF (.NOT. ALLOCATED(cbmc_cell_ti)) THEN
+                                  ALLOCATE(cbmc_cell_ti(cbmc_max_interact, &
+                                          -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                                          -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                                          -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                                          nbr_boxes), Stat=AllocateStatus)
+                                  IF (Allocatestatus /= 0) THEN
+                                    err_msg = ''
+                                    err_msg(1) = 'Memory could not be allocated for cbmc_cell_ti'
+                                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                  END IF
+                                  !WRITE(*,*) "Allocated cbmc_cell_ti with:"
+                                  !WRITE(*,*) "SHAPE", SHAPE(cbmc_cell_ti)
+                                  !WRITE(*,*) "Occupying ", SIZEOF(cbmc_cell_ti), " bytes"
+                                  !WRITE(*,*) "on step ", i_mcstep
+                          END IF
+                  END IF
+                  IF (need_atomtypes) THEN
+                          IF (.NOT. ALLOCATED(cbmc_cell_atomtypes)) THEN
+                                  ALLOCATE(cbmc_cell_atomtypes(cbmc_max_interact, &
+                                          -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                                          -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                                          -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                                          nbr_boxes), Stat=AllocateStatus)
+                                  IF (Allocatestatus /= 0) THEN
+                                    err_msg = ''
+                                    err_msg(1) = 'Memory could not be allocated for cbmc_cell_atomtypes'
+                                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                  END IF
+                                  !WRITE(*,*) "Allocated cbmc_cell_atomtypes with:"
+                                  !WRITE(*,*) "SHAPE", SHAPE(cbmc_cell_atomtypes)
+                                  !WRITE(*,*) "Occupying ", SIZEOF(cbmc_cell_atomtypes), " bytes"
+                                  !WRITE(*,*) "on step ", i_mcstep
+                          END IF
+                  END IF
           ELSE
-                  cp(1) = atom_list(ia,im,is)%rxp
-                  cp(2) = atom_list(ia,im,is)%ryp
-                  cp(3) = atom_list(ia,im,is)%rzp
-                  this_box = molecule_list(im,is)%which_box
+                  ! ensure 32-byte padding of first dimension
+                  max_adj_cell_atoms = IAND(max_adj_cell_atoms+padconst_4byte,padmask_4byte)
+                  IF (ALLOCATED(cbmc_cell_rsp)) THEN
+                          IF (SIZE(cbmc_cell_rsp,1) < max_adj_cell_atoms .OR. ANY(adj_cellmaxbound>adj_cellmaxbound_old)) THEN
+                                  DEALLOCATE(cbmc_cell_rsp, Stat=AllocateStatus)
+                                  IF (Allocatestatus /= 0) THEN
+                                    err_msg = ''
+                                    err_msg(1) = 'Memory could not be deallocated from cbmc_cell_rsp'
+                                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                  END IF
+                                  IF (need_atom_ti) THEN
+                                          DEALLOCATE(cbmc_cell_ti, Stat=AllocateStatus)
+                                          IF (Allocatestatus /= 0) THEN
+                                            err_msg = ''
+                                            err_msg(1) = 'Memory could not be deallocated from cbmc_cell_ti'
+                                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                          END IF
+                                  END IF
+                                  IF (need_atomtypes) THEN
+                                          DEALLOCATE(cbmc_cell_atomtypes, Stat=AllocateStatus)
+                                          IF (Allocatestatus /= 0) THEN
+                                            err_msg = ''
+                                            err_msg(1) = 'Memory could not be deallocated from cbmc_cell_atomtypes'
+                                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                          END IF
+                                  END IF
+                          END IF
+                  END IF
+                  IF (.NOT. ALLOCATED(cbmc_cell_rsp)) THEN
+                          ALLOCATE(cbmc_cell_rsp(max_adj_cell_atoms,4, &
+                                  -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                                  -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                                  -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                                  nbr_boxes), Stat=AllocateStatus)
+                          IF (Allocatestatus /= 0) THEN
+                            err_msg = ''
+                            err_msg(1) = 'Memory could not be allocated for cbmc_cell_rsp'
+                            CALL Clean_Abort(err_msg, 'Sector_Setup')
+                          END IF
+                          !WRITE(*,*) "Allocated cbmc_cell_rsp with:"
+                          !WRITE(*,*) "SHAPE", SHAPE(cbmc_cell_rsp)
+                          !WRITE(*,*) "Occupying ", SIZEOF(cbmc_cell_rsp), " bytes"
+                          !WRITE(*,*) "on step ", i_mcstep
+                          IF (need_atom_ti) THEN
+                                  ALLOCATE(cbmc_cell_ti(max_adj_cell_atoms, &
+                                          -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                                          -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                                          -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                                          nbr_boxes), Stat=AllocateStatus)
+                                  IF (Allocatestatus /= 0) THEN
+                                    err_msg = ''
+                                    err_msg(1) = 'Memory could not be allocated for cbmc_cell_ti'
+                                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                  END IF
+                                  !WRITE(*,*) "Allocated cbmc_cell_ti with:"
+                                  !WRITE(*,*) "SHAPE", SHAPE(cbmc_cell_ti)
+                                  !WRITE(*,*) "Occupying ", SIZEOF(cbmc_cell_ti), " bytes"
+                                  !WRITE(*,*) "on step ", i_mcstep
+                          END IF
+                          IF (need_atomtypes) THEN
+                                  ALLOCATE(cbmc_cell_atomtypes(max_adj_cell_atoms, &
+                                          -adj_cellmaxbound(1):adj_cellmaxbound(1), &
+                                          -adj_cellmaxbound(2):adj_cellmaxbound(2), &
+                                          -adj_cellmaxbound(3):adj_cellmaxbound(3), &
+                                          nbr_boxes), Stat=AllocateStatus)
+                                  IF (Allocatestatus /= 0) THEN
+                                    err_msg = ''
+                                    err_msg(1) = 'Memory could not be allocated for cbmc_cell_atomtypes'
+                                    CALL Clean_Abort(err_msg, 'Sector_Setup')
+                                  END IF
+                                  !WRITE(*,*) "Allocated cbmc_cell_atomtypes with:"
+                                  !WRITE(*,*) "SHAPE", SHAPE(cbmc_cell_atomtypes)
+                                  !WRITE(*,*) "Occupying ", SIZEOF(cbmc_cell_atomtypes), " bytes"
+                                  !WRITE(*,*) "on step ", i_mcstep
+                          END IF
+                  END IF
+          END IF
+          l_firstframe = .FALSE.
+          !$OMP END SINGLE
+          !$OMP WORKSHARE
+          cbmc_cell_rsp(:,1:3,:,:,:,:) = infinity_sp ! guaranteed to be out of range unless overwritten
+          cbmc_cell_rsp(:,4,:,:,:,:) = 0.0
+          !$OMP END WORKSHARE NOWAIT
+          IF (need_atom_ti) THEN
+                  !$OMP WORKSHARE
+                  cbmc_cell_ti = 1
+                  !$OMP END WORKSHARE NOWAIT
+          END IF
+          IF (need_atomtypes) THEN
+                  !$OMP WORKSHARE
+                  cbmc_cell_atomtypes = 0
+                  !$OMP END WORKSHARE NOWAIT
           END IF
-          cell_coords = IDNINT(cp*cell_length_inv(:,this_box))
-          need_wrapping = ABS(cell_coords) .GE. sectorbound(:,this_box)
-          DO i_dim = 1,3
-                ci(i_dim,:) = cell_coords(i_dim) + delta
-                IF (need_wrapping(i_dim)) THEN
-                        DO i = 1,3
-                                IF (ci(i_dim,i) > sectorbound(i_dim,this_box)) THEN
-                                        ci(i_dim,i) = ci(i_dim,i) - length_cells(i_dim,this_box)
-                                ELSE IF (ci(i_dim,i) < -sectorbound(i_dim,this_box)) THEN
-                                        ci(i_dim,i) = ci(i_dim,i) + length_cells(i_dim,this_box)
+          !$OMP BARRIER
+          DO ibox = 1, nbr_boxes
+                  zub = box_list(ibox)%sectorbound(3)
+                  yub = box_list(ibox)%sectorbound(2)
+                  xub = box_list(ibox)%sectorbound(1)
+                  zlb = -zub
+                  ylb = -yub
+                  xlb = -xub
+                  !$OMP DO COLLAPSE(3) SCHEDULE(STATIC)
+                  DO zi = zlb, zub
+                  DO yi = ylb, yub
+                  DO xi = xlb, xub
+                        istart = 1
+                        iend = 0
+                        DO dzi = -1, 1
+                                zi2 = zi+dzi
+                                DO dyi = -1, 1
+                                        yi2 = yi + dyi
+                                        DO dxi = -1, 1
+                                                xi2 = xi + dxi
+                                                vlen = n_cell_atoms(xi2,yi2,zi2,ibox)
+                                                IF (vlen < 1) CYCLE
+                                                iend = istart + vlen - 1
+                                                cbmc_cell_rsp_priv(istart:iend,1:4) = &
+                                                        this_cell_rsp(1:vlen,1:4,xi2,yi2,zi2,ibox)
+                                                IF (need_atom_ti) THEN
+                                                        ti_priv(istart:iend) = &
+                                                                this_cell_ti(1:vlen,xi2,yi2,zi2,ibox)
+                                                END IF
+                                                IF (need_atomtypes) THEN
+                                                        atomtype_priv(istart:iend) = &
+                                                                this_cell_atomtypes(1:vlen,xi2,yi2,zi2,ibox)
+                                                END IF
+                                                istart = istart + vlen
+                                        END DO
+                                END DO
+                        END DO
+                        n_interact = 0
+                        DO i = 1, iend
+                                IF (cell_l_inrange(i,1,xi,yi,zi,ibox)) THEN
+                                        n_interact = n_interact + 1
+                                        which_interact(n_interact) = i
                                 END IF
                         END DO
-                END IF
+                        n_adj_cell_atoms(xi,yi,zi,ibox) = IAND(n_interact+padconst_4byte,padmask_4byte)
+                        IF (cbmc_cell_list_flag) THEN
+                                DO dzi = -bt(3), bt(3)
+                                        IF (ABS(dzi)<2) CYCLE
+                                        zi2 = zi+dzi
+                                        DO dyi = -bt(2), bt(2)
+                                                IF (ABS(dyi)<2) CYCLE
+                                                yi2 = yi + dyi
+                                                DO dxi = -bt(1), bt(1)
+                                                        IF (ABS(dxi)<2) CYCLE
+                                                        xi2 = xi + dxi
+                                                        vlen = IAND(n_cell_atoms(xi2,yi2,zi2,ibox), &
+                                                                box_list(ibox)%cbmc_cell_mask(dxi,dyi,dzi))
+                                                        IF (vlen < 1) CYCLE
+                                                        iend = istart + vlen - 1
+                                                        cbmc_cell_rsp_priv(istart:iend,1:4) = &
+                                                                this_cell_rsp(1:vlen,1:4,xi2,yi2,zi2,ibox)
+                                                        IF (need_atom_ti) THEN
+                                                                ti_priv(istart:iend) = &
+                                                                        this_cell_ti(1:vlen,xi2,yi2,zi2,ibox)
+                                                        END IF
+                                                        IF (need_atomtypes) THEN
+                                                                atomtype_priv(istart:iend) = &
+                                                                        this_cell_atomtypes(1:vlen,xi2,yi2,zi2,ibox)
+                                                        END IF
+                                                        istart = istart + vlen
+                                                END DO
+                                        END DO
+                                END DO
+                                !n_interact = 0
+                                DO i = 1, iend
+                                        IF (cell_l_inrange(i,2,xi,yi,zi,ibox)) THEN
+                                                n_interact = n_interact + 1
+                                                which_interact(n_interact) = i
+                                        END IF
+                                END DO
+                                cbmc_cell_n_interact(xi,yi,zi,ibox) = IAND(n_interact+padconst_4byte,padmask_4byte)
+                        END IF
+                        !DIR$ VECTOR ALIGNED
+                        cbmc_cell_rsp(1:n_interact,1:4,xi,yi,zi,ibox) = &
+                                cbmc_cell_rsp_priv(which_interact(1:n_interact),:)
+                        IF (need_atom_ti) THEN
+                                !DIR$ VECTOR ALIGNED
+                                cbmc_cell_ti(1:n_interact,xi,yi,zi,ibox) = &
+                                        ti_priv(which_interact(1:n_interact))
+                        END IF
+                        IF (need_atomtypes) THEN
+                                !DIR$ VECTOR ALIGNED
+                                cbmc_cell_atomtypes(1:n_interact,xi,yi,zi,ibox) = &
+                                        atomtype_priv(which_interact(1:n_interact))
+                        END IF
+                  END DO
+                  END DO
+                  END DO
+                  !$OMP END DO
           END DO
+          !$OMP END PARALLEL
+
+  END SUBROUTINE Sector_Setup
+
+  LOGICAL FUNCTION check_overlap_ams(ia, im, is)
+          !
+          INTEGER, INTENT(IN) :: ia, im, is
+          INTEGER, DIMENSION(3) :: cp, sf
+          REAL(DP) :: dprp(3)
+          REAL(SP) :: irp(3), dxp, dyp, dzp, rijsq, rminsq, sprp
+          INTEGER :: this_box
+          INTEGER :: i
+          INTEGER :: vlen
+          INTEGER :: xi, yi, zi
+          INTEGER :: ti_solute, icp, sb, lc
+          LOGICAL :: overlap, this_overlap
+
+          this_box = widom_molecule%which_box
+          dprp(1) = widom_atoms(ia)%rp(1)
+          dprp(2) = widom_atoms(ia)%rp(2)
+          dprp(3) = widom_atoms(ia)%rp(3)
+          IF (box_list(this_box)%int_box_shape <= int_ortho) THEN
+                  DO i = 1,3
+                        sprp = REAL(dprp(i),SP)
+                        irp(i) = sprp
+                        icp = NINT(sprp*box_list(this_box)%cell_length_recip(i))
+                        cp(i) = icp
+                  END DO
+                  IF (ANY(ABS(cp)>box_list(this_box)%sectorbound)) THEN
+                          sf = cp/(box_list(this_box)%sectorbound+1)
+                          cp = cp - sf*box_list(this_box)%length_cells
+                          irp = irp - sf*box_list(this_box)%sp_diag_length
+                  END IF
+          ELSE
+                  cp = IDNINT(MATMUL(box_list(this_box)%cell_length_inv,dprp))
+                  IF (ANY(ABS(cp)>box_list(this_box)%sectorbound)) THEN
+                          sf = cp/(box_list(this_box)%sectorbound+1)
+                          cp = cp - sf*box_list(this_box)%length_cells
+                          CALL Minimum_Image_Separation(this_box, &
+                                  widom_atoms(ia)%rp(1), &
+                                  widom_atoms(ia)%rp(2), &
+                                  widom_atoms(ia)%rp(3), &
+                                  dprp(1), dprp(2), dprp(3))
+                  END IF
+                  irp = REAL(dprp,SP)
+          END IF
+          check_overlap_ams = check_overlap_coordinates(irp,cp,ia,is,this_box)
+  END FUNCTION check_overlap_ams
+
+  LOGICAL FUNCTION check_overlap_coordinates(irp,cp,ia,is,this_box)
+          !
+          INTEGER, INTENT(IN) :: ia, is, this_box, cp(3)
+          REAL(SP), DIMENSION(3), INTENT(IN) :: irp
+          !REAL(SP), DIMENSION(max_adj_cell_atoms) :: rminsq_list
+          REAL(SP) :: dxp, dyp, dzp, rijsq, rminsq, sprp
+          INTEGER :: i
+          INTEGER :: vlen
+          INTEGER :: xi, yi, zi
+          INTEGER :: ti_solute, icp, sb, lc
+          LOGICAL :: overlap, this_overlap
+
+          xi = cp(1)
+          yi = cp(2)
+          zi = cp(3)
+          vlen = n_adj_cell_atoms(xi,yi,zi,this_box)
+
           IF (read_atompair_rminsq) THEN
-                  rminsq_ptr => atompair_rminsq_table(:,species_list(is)%wsolute_base+ia,this_box)
+                  ti_solute = species_list(is)%wsolute_base+ia
+                  !rminsq_list(1:vlen) = sp_atompair_rminsq_table(adj_cell_ti(1:vlen,xi,yi,zi,this_box), &
+                  !        species_list(is)%wsolute_base+ia,this_box)
+          ELSE IF (calc_rmin_flag) THEN
+                  ti_solute = nonbond_list(ia,is)%atom_type_number
+                  !rminsq_list(1:vlen) = sp_rminsq_table(adj_cell_ti(1:vlen,xi,yi,zi,this_box), &
+                  !        nonbond_list(ia,is)%atom_type_number)
           ELSE
-                  rminsq_ptr => rminsq_table(0:,nonbond_list(ia,is)%atom_type_number)
+                  rminsq = sp_rcut_lowsq
           END IF
-          DO xi = 1,3
-                DO yi = 1,3
-                        DO zi = 1,3
-                                IF (.NOT. sector_has_atoms(ci(1,xi),ci(2,yi),ci(3,zi),this_box)) CYCLE
-                                secind = sector_index_map(ci(1,xi),ci(2,yi),ci(3,zi),this_box)
-                                DO ia_cell = 1, sector_n_atoms(secind)
-                                        sector_atom_ID => sector_atoms(ia_cell,secind,:)
-                                        atom_ptr => atom_list(sector_atom_ID(1),sector_atom_ID(2),sector_atom_ID(3))
-                                        dxp = atom_ptr%rxp - cp(1)
-                                        dyp = atom_ptr%ryp - cp(2)
-                                        dzp = atom_ptr%rzp - cp(3)
-                                        CALL Minimum_Image_Separation(this_box,dxp,dyp,dzp,dx,dy,dz)
-                                        IF (read_atompair_rminsq) THEN
-                                                rminsq = rminsq_ptr( &
-                                                        species_list(sector_atom_ID(3))%solvent_base + &
-                                                        sector_atom_ID(1))
-                                        ELSE
-                                                rminsq = rminsq_ptr( &
-                                                        nonbond_list(sector_atom_ID(1),sector_atom_ID(3))%atom_type_number+1)
-                                        END IF
-                                        IF (dx*dx+dy*dy+dz*dz < rminsq) RETURN
-                                END DO
-                        END DO
-                END DO
+          overlap = .FALSE.
+          !DIR$ ASSUME (MOD(vlen,dimpad_4byte) .EQ. 0)
+          !DIR$ LOOP COUNT = 8, 16, 24, 32, 40
+          !DIR$ VECTOR ALIGNED
+          DO i = 1, vlen
+                dxp = cbmc_cell_rsp(i,1,xi,yi,zi,this_box) - irp(1)
+                dyp = cbmc_cell_rsp(i,2,xi,yi,zi,this_box) - irp(2)
+                dzp = cbmc_cell_rsp(i,3,xi,yi,zi,this_box) - irp(3)
+                rijsq = dxp*dxp
+                rijsq = rijsq + dyp*dyp
+                rijsq = rijsq + dzp*dzp
+                IF (read_atompair_rminsq) THEN
+                        rminsq = sp_atompair_rminsq_table(cbmc_cell_ti(i,xi,yi,zi,this_box),ti_solute,this_box)
+                ELSE IF (calc_rmin_flag) THEN
+                        rminsq = sp_rminsq_table(cbmc_cell_atomtypes(i,xi,yi,zi,this_box),ti_solute)
+                END IF
+                this_overlap = rijsq < rminsq
+                overlap = overlap .OR. this_overlap
           END DO
-          check_overlap = .FALSE.
+          check_overlap_coordinates = overlap
           
-  END FUNCTION check_overlap
+  END FUNCTION check_overlap_coordinates
 
   SUBROUTINE CBMC_Cell_List_Setup
+          ! This subroutine shouldn't be used anymore
           INTEGER, DIMENSION(3) :: sectormaxbound_old !, map_bound
           INTEGER, DIMENSION(3,nbr_boxes) :: sectorbound_old
           INTEGER :: i_sector, ci(3), dx, dy, dz, xshift, yshift, zshift, nsec_old, nsec, secind
@@ -242,13 +1993,13 @@ SUBROUTINE CBMC_Cell_List_Setup
           TYPE(Atom_Class), POINTER :: atom_ptr
           REAL(DP) :: xp, yp, zp, cp(3)
 !          INTEGER, DIMENSION(3,nbr_boxes) :: cbmc_truth_cube_bound, cut_truth_cube_bound
-          INTEGER :: xi, yi, zi, cim(3)
+          INTEGER :: xi, yi, zi, cim(3), xyzi(3), i_dim
           INTEGER :: max_occ_sectors_old, total_atoms
-          INTEGER, DIMENSION(2) :: xslice, yslice, zslice
-          INTEGER, DIMENSION(:,:,:), POINTER :: box_sector_ptr
+          INTEGER, DIMENSION(2,3) :: tgt_slice, src_slice
           INTEGER, DIMENSION(:), ALLOCATABLE :: xi_pm, yi_pm, zi_pm
           INTEGER :: dummy
           LOGICAL :: asflag
+          REAL(DP), DIMENSION(:,:), POINTER :: cell_length_inv_ptr
 
           
           sectorbound_old = sectorbound_cbmc
@@ -256,15 +2007,15 @@ SUBROUTINE CBMC_Cell_List_Setup
           nsec_old = MAXVAL(PRODUCT(length_cells_cbmc,1))
           DO ibox = 1, nbr_boxes
                 DO i = 1, 3
-                          length_cells_cbmc(i,ibox) = INT(box_list(ibox)%length(i,i)/rcut_cbmc(ibox))
+                          length_cells_cbmc(i,ibox) = INT(box_list(ibox)%face_distance(i)/rcut_cbmc(ibox))
                           IF (MOD(length_cells_cbmc(i,ibox),2) .EQ. 0) length_cells_cbmc(i,ibox) = length_cells_cbmc(i,ibox) - 1
-                          cell_length_inv_cbmc(i,ibox) = REAL(length_cells_cbmc(i,ibox),DP) / box_list(ibox)%length(i,i)
+                          cell_length_inv_cbmc(i,:,ibox) = REAL(length_cells_cbmc(i,ibox),DP) * box_list(ibox)%length_inv(i,:)
                           sectorbound_cbmc(i,ibox) = length_cells_cbmc(i,ibox)/2
                 END DO
           END DO
           !cell_length_cbmc = 1.0_DP / cell_length_inv_cbmc
 
-          sectormaxbound_cbmc = 3*MAXVAL(sectorbound_cbmc, 2)+1
+          sectormaxbound_cbmc = MAXVAL(sectorbound_cbmc, 2)+1
           !map_bound = sectormaxbound_cbmc*3+1
           IF (.NOT. ALL( sectormaxbound_cbmc <= sectormaxbound_old)) THEN
                   IF (ALLOCATED(sector_index_map_cbmc)) DEALLOCATE(sector_index_map_cbmc)
@@ -291,6 +2042,7 @@ SUBROUTINE CBMC_Cell_List_Setup
           AllocationSizeLoop: DO
           ! place atoms in sectors
           BoxLoop:DO ibox = 1, nbr_boxes
+                cell_length_inv_ptr => cell_length_inv_cbmc(:,:,ibox)
                 SpeciesLoop:DO is = 1, nspecies
                         sector_atom_ID(3) = is
                         !$OMP PARALLEL DO DEFAULT(SHARED) &
@@ -301,10 +2053,10 @@ SUBROUTINE CBMC_Cell_List_Setup
                                 AtomLoop:DO ia = 1, natoms(is)
                                         atom_ptr => atom_list(ia,im,is)
                                         IF (.NOT. atom_ptr%exist) CYCLE AtomLoop
-                                        cp(1) = atom_ptr%rxp
-                                        cp(2) = atom_ptr%ryp
-                                        cp(3) = atom_ptr%rzp
-                                        atom_ptr%ci_cbmc = IDNINT(cp*cell_length_inv_cbmc(:,ibox))
+                                        cp(1) = atom_ptr%rp(1)
+                                        cp(2) = atom_ptr%rp(2)
+                                        cp(3) = atom_ptr%rp(3)
+                                        atom_ptr%ci_cbmc = IDNINT(MATMUL(cell_length_inv_ptr,cp))
                                         DO i = 1,3
                                                 IF (atom_ptr%ci_cbmc(i) > sectorbound_cbmc(i,ibox)) THEN
                                                         atom_ptr%ci_cbmc(i) = atom_ptr%ci_cbmc(i) - length_cells_cbmc(i,ibox)
@@ -337,29 +2089,37 @@ SUBROUTINE CBMC_Cell_List_Setup
                                 END DO AtomLoop2
                         END DO MoleculeLoop2
                 END DO SpeciesLoop
-                box_sector_ptr => sector_index_map_cbmc( &
-                        -sectorbound_cbmc(1,ibox):sectorbound_cbmc(1,ibox), &
-                        -sectorbound_cbmc(2,ibox):sectorbound_cbmc(2,ibox), &
-                        -sectorbound_cbmc(3,ibox):sectorbound_cbmc(3,ibox), &
-                        ibox)
+                !$OMP PARALLEL PRIVATE(xyzi, i_dim, tgt_slice, src_slice)
+                !$OMP DO SCHEDULE(STATIC) COLLAPSE(3)
                 DO xi = -1, 1
-                        xslice(1) = -sectorbound_cbmc(1,ibox)+xi*length_cells_cbmc(1,ibox)
-                        xslice(2) = sectorbound_cbmc(1,ibox)+xi*length_cells_cbmc(1,ibox)
                         DO yi = -1, 1
-                                yslice(1) = -sectorbound_cbmc(2,ibox)+yi*length_cells_cbmc(2,ibox)
-                                yslice(2) = sectorbound_cbmc(2,ibox)+yi*length_cells_cbmc(2,ibox)
                                 DO zi = -1, 1
                                         IF (xi == 0 .AND. yi == 0 .AND. zi == 0) CYCLE
-                                        zslice(1) = -sectorbound_cbmc(3,ibox)+zi*length_cells_cbmc(3,ibox)
-                                        zslice(2) = sectorbound_cbmc(3,ibox)+zi*length_cells_cbmc(3,ibox)
+                                        xyzi = (/ xi, yi, zi /)
+                                        DO i_dim = 1,3
+                                                IF (xyzi(i_dim) == 0) THEN
+                                                        tgt_slice(:,i_dim) = (/ -1, 1 /) * sectorbound_cbmc(i_dim,ibox)
+                                                        src_slice(:,i_dim) = tgt_slice(:,i_dim)
+                                                ELSE
+                                                        tgt_slice(:,i_dim) = sectorbound_cbmc(i_dim,ibox)*xyzi(i_dim) + xyzi(i_dim)
+                                                        src_slice(:,i_dim) = -sectorbound_cbmc(i_dim,ibox)*xyzi(i_dim)
+                                                END IF
+                                        END DO
                                         sector_index_map_cbmc(&
-                                                xslice(1):xslice(2), &
-                                                yslice(1):yslice(2), &
-                                                zslice(1):zslice(2), &
-                                                ibox) = box_sector_ptr
+                                                tgt_slice(1,1):tgt_slice(2,1), &
+                                                tgt_slice(1,2):tgt_slice(2,2), &
+                                                tgt_slice(1,3):tgt_slice(2,3), &
+                                                ibox) = &
+                                                sector_index_map_cbmc(&
+                                                src_slice(1,1):src_slice(2,1), &
+                                                src_slice(1,2):src_slice(2,2), &
+                                                src_slice(1,3):src_slice(2,3), &
+                                                ibox)
                                 END DO
                         END DO
                 END DO
+                !$OMP END DO
+                !$OMP END PARALLEL
           END DO BoxLoop
           IF (asflag) THEN
                 EXIT AllocationSizeLoop
@@ -375,6 +2135,7 @@ END SUBROUTINE CBMC_Cell_List_Setup
 
 
   SUBROUTINE Full_Cell_List_Setup
+          ! use of this subroutine should be removed
           INTEGER, DIMENSION(3) :: sectormaxbound_old !, map_bound
           INTEGER, DIMENSION(3,nbr_boxes) :: sectorbound_old
           INTEGER :: i_sector, ci(3), dx, dy, dz, xshift, yshift, zshift, nsec_old, nsec, secind
@@ -391,6 +2152,7 @@ SUBROUTINE Full_Cell_List_Setup
           INTEGER :: dummy
           REAL(DP) :: max_rcut
           LOGICAL :: asflag
+          REAL(DP), DIMENSION(:,:), POINTER :: cell_length_inv_ptr
 
           sectorbound_old = sectorbound_full
           sectormaxbound_old = sectormaxbound_full
@@ -398,9 +2160,9 @@ SUBROUTINE Full_Cell_List_Setup
           DO ibox = 1, nbr_boxes
                 max_rcut = MAX(rcut_vdw(ibox), rcut_coul(ibox))
                 DO i = 1, 3
-                          length_cells_full(i,ibox) = INT(box_list(ibox)%length(i,i)/max_rcut)
+                          length_cells_full(i,ibox) = INT(box_list(ibox)%face_distance(i)/max_rcut)
                           IF (MOD(length_cells_full(i,ibox),2) .EQ. 0) length_cells_full(i,ibox) = length_cells_full(i,ibox) - 1
-                          cell_length_inv_full(i,ibox) = REAL(length_cells_full(i,ibox),DP) / box_list(ibox)%length(i,i)
+                          cell_length_inv_full(i,:,ibox) = REAL(length_cells_full(i,ibox),DP) * box_list(ibox)%length_inv(i,:)
                           sectorbound_full(i,ibox) = length_cells_full(i,ibox)/2
                 END DO
           END DO
@@ -433,6 +2195,7 @@ SUBROUTINE Full_Cell_List_Setup
           AllocationSizeLoop: DO
           ! place atoms in sectors
           BoxLoop:DO ibox = 1, nbr_boxes
+                cell_length_inv_ptr => cell_length_inv_full(:,:,ibox)
                 SpeciesLoop:DO is = 1, nspecies
                         sector_atom_ID(3) = is
                         !$OMP PARALLEL DO DEFAULT(SHARED) &
@@ -443,10 +2206,10 @@ SUBROUTINE Full_Cell_List_Setup
                                 AtomLoop:DO ia = 1, natoms(is)
                                         atom_ptr => atom_list(ia,im,is)
                                         IF (.NOT. atom_ptr%exist) CYCLE AtomLoop
-                                        cp(1) = atom_ptr%rxp
-                                        cp(2) = atom_ptr%ryp
-                                        cp(3) = atom_ptr%rzp
-                                        atom_ptr%ci_full = IDNINT(cp*cell_length_inv_full(:,ibox))
+                                        cp(1) = atom_ptr%rp(1)
+                                        cp(2) = atom_ptr%rp(2)
+                                        cp(3) = atom_ptr%rp(3)
+                                        atom_ptr%ci_full = IDNINT(MATMUL(cell_length_inv_ptr,cp))
                                         DO i = 1,3
                                                 IF (atom_ptr%ci_full(i) > sectorbound_full(i,ibox)) THEN
                                                         atom_ptr%ci_full(i) = atom_ptr%ci_full(i) - length_cells_full(i,ibox)
@@ -515,6 +2278,4 @@ SUBROUTINE Full_Cell_List_Setup
           END DO AllocationSizeLoop
   END SUBROUTINE Full_Cell_List_Setup
 
-
-
 END MODULE Sector_Routines
diff --git a/Src/trajectory_reader_routines.f90 b/Src/trajectory_reader_routines.f90
new file mode 100644
index 00000000..d410ad46
--- /dev/null
+++ b/Src/trajectory_reader_routines.f90
@@ -0,0 +1,579 @@
+!*****************************************************************************************
+!
+!
+!*****************************************************************************************
+MODULE Trajectory_Reader_Routines
+        USE Global_Variables
+        USE File_Names
+        USE Simulation_Properties
+        USE IO_Utilities
+        USE Energy_Routines
+        USE Type_Definitions
+        USE XTC_Routines
+        USE Internal_Coordinate_Routines
+        !$ USE OMP_LIB
+        IMPLICIT NONE
+        REAL(DP), DIMENSION(:,:), ALLOCATABLE, SAVE, PRIVATE :: frame_xyz
+        REAL(DP), DIMENSION(3,3), SAVE, PRIVATE :: this_length
+        INTEGER, PRIVATE :: ibox, natr_p
+
+        CONTAINS
+        SUBROUTINE Load_Next_Frame
+
+                INTEGER :: is, im
+
+                early_end = .FALSE.
+
+                nmols = 0
+                locate = 0
+                molecule_list(:,:)%live = .FALSE.
+                atom_list(:,:,:)%exist = .FALSE.
+                molecule_list(:,:)%molecule_type = int_none
+                molecule_list(:,:)%which_box = 0
+
+
+                DO ibox = 1, nbr_boxes
+                        IF (has_Hfile(ibox)) THEN
+                                this_length = Read_H_Frame()
+                                IF (early_end) RETURN
+                        ELSEIF (.NOT. ALLOCATED(frame_xyz)) THEN
+                                natr_p = IAND(MAXVAL(natoms_to_read)+padconst_4byte,padmask_4byte)
+                                ALLOCATE(frame_xyz(natr_p,3))
+                                !DIR$ VECTOR ALIGNED
+                                frame_xyz = 0.0_DP
+                        END IF
+                        IF (has_xyz(ibox)) THEN
+                                frame_xyz(1:natoms_to_read(ibox),:) = Read_xyz_Frame()
+                                IF (early_end) RETURN
+                        ELSEIF (has_xtc(ibox)) THEN
+                                IF (Read_xtc_Frame(ibox)) THEN
+                                        early_end = .TRUE.
+                                        EXIT
+                                END IF
+                                this_length = Get_xtc_Box(ibox)
+                                CALL Get_xtc_Coords(ibox,frame_xyz)
+                                !frame_xyz(1:natoms_to_read(ibox),:) = Get_xtc_Coords(ibox)
+                        END IF
+                        CALL Set_Frame_Box
+                        IF (.NOT. early_end) CALL Set_Frame_Coords
+                END DO
+
+                DO is = 1, nspecies
+                        DO im = max_molecules(is), SUM(nmols(is,1:nbr_boxes)) + 1, -1
+                                nmols(is,0) = nmols(is,0) + 1
+                                locate(nmols(is,0),is,0) = im
+                        END DO
+                END DO
+        END SUBROUTINE Load_Next_Frame
+
+
+        SUBROUTINE Set_Frame_Box
+
+                !REAL(DP), DIMENSION(3,3), INTENT(IN) :: this_length
+                INTEGER :: nvecsmax_old
+                INTEGER :: AllocateStatus
+
+                LOGICAL :: l_size_change
+
+
+                REAL(DP) :: frame_volume
+
+                IF (early_end) RETURN
+
+                l_size_change = (.NOT. ALL(box_list(ibox)%orig_length .EQ. this_length))
+
+                IF (l_size_change) THEN
+                        box_list(ibox)%length = this_length
+                        CALL Compute_Cell_Dimensions(ibox)
+                END IF
+
+                IF (l_size_change .AND. l_half_len_cutoff(ibox)) THEN
+                        rcut_vdw(ibox) = 0.5 * MIN(box_list(ibox)%face_distance(1), &
+                                                   box_list(ibox)%face_distance(2), &
+                                                   box_list(ibox)%face_distance(3))
+                        rcut_vdwsq(ibox) = rcut_vdw(ibox) * rcut_vdw(ibox)
+                        IF (int_charge_sum_style(ibox) /= charge_none) THEN
+                                rcut_coul(ibox) = rcut_vdw(ibox)
+                                rcut_coulsq(ibox) = rcut_vdwsq(ibox)
+                        END IF
+
+                        rcut_vdw3(ibox) = rcut_vdwsq(ibox) * rcut_vdw(ibox)
+                        rcut_vdw6(ibox) = rcut_vdw3(ibox) * rcut_vdw3(ibox)
+                        rcut3(ibox) = rcut_vdw3(ibox)
+                        rcut9(ibox) = rcut3(ibox) * rcut_vdw6(ibox)
+
+                        rcut_max(ibox) = rcut_vdw(ibox)
+                        IF ( int_charge_sum_style(ibox) == charge_ewald) THEN
+                                ! alpha_ewald(ibox) = ewald_p_sqrt(ibox) / rcut_coul(ibox)
+                                h_ewald_cut(ibox) = 2.0_DP * ewald_p(ibox) / rcut_coul(ibox)
+                        END IF
+                END IF
+                IF (l_size_change .AND. int_charge_sum_style(ibox) == charge_ewald) THEN
+                        CALL Ewald_Reciprocal_Lattice_Vector_Setup(ibox)
+                END IF
+
+        END SUBROUTINE Set_Frame_Box
+
+        FUNCTION Read_H_frame()
+                REAL(DP), DIMENSION(3,3) :: Read_H_frame
+                INTEGER :: nspecies_thisframe
+                INTEGER :: is_H, is
+                INTEGER :: nmols_H
+                INTEGER :: i, io
+
+                READ(pregen_H_unit(ibox),*,IOSTAT=io)
+                IF (io < 0) THEN
+                        early_end = .TRUE.
+                        RETURN
+                END IF
+                READ(pregen_H_unit(ibox),*)Read_H_frame(1,1), &
+                        Read_H_frame(1,2), &
+                        Read_H_frame(1,3)
+                READ(pregen_H_unit(ibox),*)Read_H_frame(2,1), &
+                        Read_H_frame(2,2), &
+                        Read_H_frame(2,3)
+                READ(pregen_H_unit(ibox),*)Read_H_frame(3,1), &
+                        Read_H_frame(3,2), &
+                        Read_H_frame(3,3)
+
+                READ(pregen_H_unit(ibox),*)
+                READ(pregen_H_unit(ibox),*)nspecies_thisframe
+                nmols_to_read(:,ibox) = 0
+                DO i = 1,nspecies_thisframe 
+                        READ(pregen_H_unit(ibox),*)is_H, nmols_H
+                        nmols_to_read(is_H,ibox) = nmols_H
+                END DO
+                atom_ibounds(2,:,ibox) = natoms*nmols_to_read(:,ibox)
+                natoms_to_read(ibox) = SUM(atom_ibounds(2,:,ibox))
+                DO is = 2, nspecies
+                        atom_ibounds(2,is,ibox) = SUM(atom_ibounds(2,is-1:is,ibox))
+                END DO
+                atom_ibounds(1,1,ibox) = 1
+                IF (nspecies > 1) atom_ibounds(1,2:nspecies,ibox) = atom_ibounds(2,1:(nspecies-1),ibox)+1
+                natr_p = IAND(natoms_to_read(ibox)+padconst_4byte,padmask_4byte)
+                IF (ALLOCATED(frame_xyz)) THEN
+                        IF (SIZE(frame_xyz,1)<natr_p) DEALLOCATE(frame_xyz)
+                END IF
+                IF (.NOT. ALLOCATED(frame_xyz)) THEN
+                        ALLOCATE(frame_xyz(natr_p,3))
+                        frame_xyz = 0.0_DP
+                END IF
+
+        END FUNCTION Read_H_frame
+
+        SUBROUTINE Set_Frame_Coords
+
+                !REAL(DP), DIMENSION(natoms_to_read(ibox),3), INTENT(IN) :: frame_xyz
+                INTEGER :: is, ia, ja, imol, this_im, locate_base, im
+
+
+                REAL(DP) :: this_lambda, e_lrc
+                LOGICAL :: overlap
+
+                INTEGER :: sloc, eloc, aib(2)
+                !REAL(DP), DIMENSION(IAND(MAXVAL(nmols_to_read(:,ibox))+padconst_8byte,padmask_8byte),MAXVAL(natoms),3) :: &
+                !        frame_xyz_rs
+                !REAL(DP), DIMENSION(IAND(MAXVAL(nmols_to_read(:,ibox))+padconst_8byte,padmask_8byte),4) :: &
+                !        molwrapvec, rcom_arr
+                REAL(DP) :: h11,h12,h13,h21,h22,h23,h31,h32,h33
+                REAL(DP) :: inv_h11,inv_h12,inv_h13,inv_h21,inv_h22,inv_h23,inv_h31,inv_h32,inv_h33
+                INTEGER :: ntr, ibond, i_dim
+                !LOGICAL, DIMENSION(MAXVAL(natoms)) :: l_moved
+                REAL(DP) :: rxp,ryp,rzp,sxp,syp,szp,rcom,scom,rxcom,rycom,rzcom,sxcom,sycom,szcom
+                REAL(DP) :: dxcom, dycom, dzcom
+                REAL(DP) :: boxlen,hl,irp,jrp, drp, absdrp, inv_total_mass, massfrac
+                !REAL(DP), DIMENSION(MAXVAL(natoms)) :: massfrac_vec(MAXVAL(natoms))
+                REAL(DP) :: isp, xcom, ycom, zcom, dcomsq, max_dcomsq, inv_l
+                REAL(DP) :: sxmwv, symwv, szmwv, mwv
+                LOGICAL :: l_ortho
+                INTEGER :: chunksize, chunkstart, chunkend, ithread, nthreads, chunkshift
+                INTEGER :: kx, ky, kz, i
+
+                l_ortho = box_list(ibox)%int_box_shape <= int_ortho
+                !$OMP PARALLEL DEFAULT(SHARED) &
+                !$OMP PRIVATE(h11,h12,h13,h21,h22,h23,h31,h32,h33) &
+                !$OMP PRIVATE(inv_h11,inv_h12,inv_h13,inv_h21,inv_h22,inv_h23,inv_h31,inv_h32,inv_h33) &
+                !$OMP PRIVATE(locate_base,is, ia,ja, imol,im, this_im) &
+                !$OMP PRIVATE(rxp,ryp,rzp,sxp,syp,szp,rcom,scom,rxcom,rycom,rzcom,sxcom,sycom,szcom) &
+                !$OMP PRIVATE(this_lambda,ntr,ibond,boxlen,hl,irp,jrp,drp,absdrp,inv_total_mass) &
+                !$OMP PRIVATE(massfrac,isp,xcom, ycom, zcom, dcomsq, max_dcomsq, inv_l) &
+                !$OMP PRIVATE(sxmwv, symwv, szmwv, mwv,dxcom,dycom,dzcom,i_dim) &
+                !$OMP PRIVATE(chunksize,chunkstart,chunkend,ithread,nthreads,chunkshift)
+
+                !!$OMP PARALLEL DEFAULT(PRIVATE) &
+                !!$OMP SHARED(frame_xyz_rs,ibox,frame_xyz,rcom_arr,molwrapvec) & 
+                !!$OMP SHARED(box_list,atom_list,molecule_list,natoms,nspecies,nmols_to_read,nonbond_list) &
+                !!$OMP SHARED(bondpart_list,nmols,locate,atom_ibounds,nbr_boxes,natr_p)
+
+
+
+                IF (.NOT. l_ortho) THEN
+                        h11 = box_list(ibox)%length(1,1)
+                        h21 = box_list(ibox)%length(2,1)
+                        h31 = box_list(ibox)%length(3,1)
+                        h12 = box_list(ibox)%length(1,2)
+                        h22 = box_list(ibox)%length(2,2)
+                        h32 = box_list(ibox)%length(3,2)
+                        h13 = box_list(ibox)%length(1,3)
+                        h23 = box_list(ibox)%length(2,3)
+                        h33 = box_list(ibox)%length(3,3)
+                        IF (box_list(ibox)%basis_changed) THEN
+                                inv_h11 = box_list(ibox)%orig_length_inv(1,1)
+                                inv_h21 = box_list(ibox)%orig_length_inv(2,1)
+                                inv_h31 = box_list(ibox)%orig_length_inv(3,1)
+                                inv_h12 = box_list(ibox)%orig_length_inv(1,2)
+                                inv_h22 = box_list(ibox)%orig_length_inv(2,2)
+                                inv_h32 = box_list(ibox)%orig_length_inv(3,2)
+                                inv_h13 = box_list(ibox)%orig_length_inv(1,3)
+                                inv_h23 = box_list(ibox)%orig_length_inv(2,3)
+                                inv_h33 = box_list(ibox)%orig_length_inv(3,3)
+                                !DIR$ ASSUME (MOD(natr_p,dimpad_8byte) .EQ. 0)
+                                !DIR$ VECTOR ALIGNED
+                                !$OMP DO SIMD SCHEDULE(SIMD:STATIC) PRIVATE(rxp,ryp,rzp,sxp,syp,szp)
+                                DO ia = 1, natr_p
+                                        rxp = frame_xyz(ia,1)
+                                        ryp = frame_xyz(ia,2)
+                                        rzp = frame_xyz(ia,3)
+                                        sxp = rxp*inv_h11 + ryp*inv_h12 + rzp*inv_h13
+                                        syp = rxp*inv_h21 + ryp*inv_h22 + rzp*inv_h23
+                                        szp = rxp*inv_h31 + ryp*inv_h32 + rzp*inv_h33
+                                        frame_xyz(ia,1) = sxp
+                                        frame_xyz(ia,2) = syp
+                                        frame_xyz(ia,3) = szp
+                                END DO
+                                !$OMP END DO SIMD
+                                inv_h11 = box_list(ibox)%length_inv(1,1)
+                                inv_h21 = box_list(ibox)%length_inv(2,1)
+                                inv_h31 = box_list(ibox)%length_inv(3,1)
+                                inv_h12 = box_list(ibox)%length_inv(1,2)
+                                inv_h22 = box_list(ibox)%length_inv(2,2)
+                                inv_h32 = box_list(ibox)%length_inv(3,2)
+                                inv_h13 = box_list(ibox)%length_inv(1,3)
+                                inv_h23 = box_list(ibox)%length_inv(2,3)
+                                inv_h33 = box_list(ibox)%length_inv(3,3)
+                        ELSE
+                                inv_h11 = box_list(ibox)%length_inv(1,1)
+                                inv_h21 = box_list(ibox)%length_inv(2,1)
+                                inv_h31 = box_list(ibox)%length_inv(3,1)
+                                inv_h12 = box_list(ibox)%length_inv(1,2)
+                                inv_h22 = box_list(ibox)%length_inv(2,2)
+                                inv_h32 = box_list(ibox)%length_inv(3,2)
+                                inv_h13 = box_list(ibox)%length_inv(1,3)
+                                inv_h23 = box_list(ibox)%length_inv(2,3)
+                                inv_h33 = box_list(ibox)%length_inv(3,3)
+                                !DIR$ ASSUME (MOD(natr_p,dimpad_8byte) .EQ. 0)
+                                !DIR$ VECTOR ALIGNED
+                                !$OMP DO SIMD SCHEDULE(SIMD:STATIC) PRIVATE(rxp,ryp,rzp,sxp,syp,szp)
+                                DO ia = 1, natr_p
+                                        rxp = frame_xyz(ia,1)
+                                        ryp = frame_xyz(ia,2)
+                                        rzp = frame_xyz(ia,3)
+                                        sxp = rxp*inv_h11 + ryp*inv_h12 + rzp*inv_h13
+                                        syp =               ryp*inv_h22 + rzp*inv_h23
+                                        szp =                             rzp*inv_h33
+                                        frame_xyz(ia,1) = sxp
+                                        frame_xyz(ia,2) = syp
+                                        frame_xyz(ia,3) = szp
+                                END DO
+                                !$OMP END DO SIMD
+                        END IF
+                END IF
+
+                !this_lambda = 1.0_DP
+                nthreads = 1
+                ithread = 0
+                !$ nthreads = OMP_GET_NUM_THREADS()
+                !$ ithread = OMP_GET_THREAD_NUM()
+                DO is = 1, nspecies
+                        IF (nmols_to_read(is,ibox) < 1) CYCLE
+                        ntr = IAND(nmols_to_read(is,ibox)+padconst_8byte,padmask_8byte)
+                        chunksize = ntr
+                        !$ chunksize = IAND((ntr+nthreads-1)/nthreads+padconst_8byte,padmask_8byte)
+                        !chunkstart = 1
+                        chunkshift = 0
+                        chunkend = ntr
+                        !$ chunkshift = ithread*chunksize
+                        !!$ chunkstart = chunkshift+1
+                        !$ chunkend = MIN((ithread+1)*chunksize,ntr)
+                        !$ chunksize = MAX(0,chunkend-chunkshift)
+                        IF (chunksize .EQ. 0) CYCLE
+                        CALL Set_Species_Frame_Coords(is,natoms(is),chunksize,chunkshift,l_ortho)
+
+                END DO
+                IF (ithread+1 .EQ. nthreads) THEN
+                        DO is = 1, nspecies
+                                locate_base = SUM(nmols(is,1:nbr_boxes))
+                                DO imol = 1, nmols_to_read(is,ibox)
+                                        locate(imol,is,ibox) = imol+locate_base
+                                END DO
+                        END DO
+                END IF
+                !$OMP BARRIER
+                !$OMP END PARALLEL
+                nmols(:,ibox) = nmols(:,ibox) + nmols_to_read(:,ibox)
+
+                IF (int_vdw_sum_style(ibox) == vdw_cut_tail) THEN 
+                        CALL Compute_Beads(ibox)
+                        CALL Compute_LR_Correction(ibox,e_lrc)
+                        energy(ibox)%lrc = e_lrc
+                ELSE
+                        energy(ibox)%lrc = 0.0_DP
+                END IF
+                
+                IF (int_charge_sum_style(ibox) == charge_ewald) THEN
+                        CALL Compute_System_Ewald_Reciprocal_Energy_Pregen(ibox)
+                ELSE
+                        energy(ibox)%reciprocal = 0.0_DP
+                END IF
+
+
+        END SUBROUTINE Set_Frame_Coords
+        SUBROUTINE Set_Species_Frame_Coords(is,na,chunksize,chunkshift,l_ortho)
+                INTEGER, INTENT(IN) :: is,na,chunksize,chunkshift
+                LOGICAL, INTENT(IN) :: l_ortho
+
+
+                REAL(DP), DIMENSION(chunksize,na,3) :: frame_xyz_rs
+                REAL(DP), DIMENSION(chunksize,4) :: rcom_arr
+                REAL(DP), DIMENSION(MERGE(chunksize,0,l_ortho),3) :: molwrapvec
+                LOGICAL, DIMENSION(na) :: l_moved
+                INTEGER :: aib(2), ia, ja, im, locate_base
+                REAL(DP) :: h11,h12,h13,h21,h22,h23,h31,h32,h33
+                REAL(DP) :: inv_h11,inv_h12,inv_h13,inv_h21,inv_h22,inv_h23,inv_h31,inv_h32,inv_h33
+                INTEGER :: ntr, ibond, i_dim
+                REAL(DP) :: rxp,ryp,rzp,sxp,syp,szp,rcom,scom,rxcom,rycom,rzcom,sxcom,sycom,szcom
+                REAL(DP) :: dxcom, dycom, dzcom
+                REAL(DP) :: boxlen,hl,irp,jrp, drp, absdrp, inv_total_mass, massfrac
+                REAL(DP), DIMENSION(na) :: massfrac_vec
+                REAL(DP) :: isp, xcom, ycom, zcom, dcomsq, max_dcomsq, inv_l
+                REAL(DP) :: sxmwv, symwv, szmwv, mwv
+                INTEGER :: loc, chunkend
+                !l_ortho = box_list(ibox)%int_box_shape <= int_ortho
+                aib(1) = atom_ibounds(1,is,ibox) + chunkshift*na
+                chunkend = MIN(chunksize,nmols_to_read(is,ibox)-chunkshift) ! unpadded chunksize
+                aib(2) = aib(1) + chunkend*na - 1
+                frame_xyz_rs(1:chunkend,:,:) = &
+                        RESHAPE(frame_xyz(aib(1):aib(2),:),&
+                        (/ chunkend,na,3 /),ORDER=(/2,1,3/))
+                IF (chunksize>chunkend) THEN
+                        frame_xyz_rs(chunkend+1:chunksize,:,:) = 0.0_DP
+                END IF
+                l_moved = .FALSE.
+                l_moved(1) = .TRUE.
+                !DIR$ ASSUME (MOD(chunksize,dimpad_8byte) .EQ. 0)
+                DO WHILE (.NOT. ALL(l_moved))
+                        DO ia = 1, na
+                                IF (.NOT. l_moved(ia)) CYCLE
+                                DO ibond = 1 , bondpart_list(ia,is)%nbonds
+                                        ja = bondpart_list(ia,is)%atom(ibond)
+                                        IF (l_moved(ja)) CYCLE
+                                        l_moved(ja) = .TRUE.
+                                        DO i_dim = 1, 3
+                                                IF (l_ortho) THEN
+                                                        boxlen = box_list(ibox)%length(i_dim,i_dim)
+                                                ELSE
+                                                        boxlen = 1.0_DP
+                                                END IF
+                                                hl = 0.5_DP * boxlen
+                                                !DIR$ VECTOR ALIGNED
+                                                !$OMP SIMD PRIVATE(irp,jrp,drp,absdrp)
+                                                DO im = 1, chunksize
+                                                        irp = frame_xyz_rs(im,ia,i_dim)
+                                                        jrp = frame_xyz_rs(im,ja,i_dim)
+                                                        drp = jrp - irp
+                                                        absdrp = ABS(drp)
+                                                        IF (absdrp > hl) jrp = jrp - SIGN(boxlen,drp)
+                                                        frame_xyz_rs(im,ja,i_dim) = jrp
+                                                END DO
+                                                !$OMP END SIMD
+                                        END DO
+                                END DO
+                        END DO
+                END DO
+                IF (.NOT. l_ortho) THEN
+                        h11 = box_list(ibox)%length(1,1)
+                        !h21 = box_list(ibox)%length(2,1)
+                        !h31 = box_list(ibox)%length(3,1)
+                        h12 = box_list(ibox)%length(1,2)
+                        h22 = box_list(ibox)%length(2,2)
+                        !h32 = box_list(ibox)%length(3,2)
+                        h13 = box_list(ibox)%length(1,3)
+                        h23 = box_list(ibox)%length(2,3)
+                        h33 = box_list(ibox)%length(3,3)
+                        DO ia = 1, natoms(is)
+                                !DIR$ VECTOR ALIGNED
+                                !$OMP SIMD PRIVATE(isp,rxp,ryp,rzp)
+                                DO im = 1, chunksize
+                                        isp = frame_xyz_rs(im,ia,1)
+                                        rxp = h11*isp
+                                        isp = frame_xyz_rs(im,ia,2)
+                                        rxp = rxp + h12*isp
+                                        ryp = h22*isp
+                                        isp = frame_xyz_rs(im,ia,3)
+                                        rxp = rxp + h13*isp
+                                        ryp = ryp + h23*isp
+                                        rzp = h33*isp
+                                        frame_xyz_rs(im,ia,1) = rxp
+                                        frame_xyz_rs(im,ia,2) = ryp
+                                        frame_xyz_rs(im,ia,3) = rzp
+                                END DO
+                                !$OMP END SIMD
+                        END DO
+                END IF
+                inv_total_mass = 1.0_DP/SUM(nonbond_list(1:natoms(is),is)%mass)
+                massfrac_vec(1:natoms(is)) = nonbond_list(1:natoms(is),is)%mass*inv_total_mass
+                !DIR$ VECTOR ALIGNED
+                !$OMP SIMD &
+                !$OMP PRIVATE(rxp,ryp,rzp,xcom,ycom,zcom,dxcom,dycom,dzcom) &
+                !$OMP PRIVATE(max_dcomsq,dcomsq,massfrac)
+                DO im = 1, chunksize
+                        massfrac = massfrac_vec(1)
+                        rxp = frame_xyz_rs(im,1,1)
+                        ryp = frame_xyz_rs(im,1,2)
+                        rzp = frame_xyz_rs(im,1,3)
+                        xcom = massfrac*rxp
+                        ycom = massfrac*ryp
+                        zcom = massfrac*rzp
+                        DO ia = 2, na
+                                massfrac = massfrac_vec(ia)
+                                rxp = frame_xyz_rs(im,ia,1)
+                                ryp = frame_xyz_rs(im,ia,2)
+                                rzp = frame_xyz_rs(im,ia,3)
+                                xcom = xcom + massfrac*rxp
+                                ycom = ycom + massfrac*ryp
+                                zcom = zcom + massfrac*rzp
+                        END DO
+                        rcom_arr(im,1) = xcom
+                        rcom_arr(im,2) = ycom
+                        rcom_arr(im,3) = zcom
+                        rxp = frame_xyz_rs(im,1,1)
+                        ryp = frame_xyz_rs(im,1,2)
+                        rzp = frame_xyz_rs(im,1,3)
+                        dxcom = rxp - xcom
+                        dycom = ryp - ycom
+                        dzcom = rzp - zcom
+                        max_dcomsq = dxcom*dxcom + dycom*dycom + dzcom*dzcom
+                        DO ia = 2, na
+                                rxp = frame_xyz_rs(im,ia,1)
+                                ryp = frame_xyz_rs(im,ia,2)
+                                rzp = frame_xyz_rs(im,ia,3)
+                                dxcom = rxp - xcom
+                                dycom = ryp - ycom
+                                dzcom = rzp - zcom
+                                dcomsq = dxcom*dxcom + dycom*dycom + dzcom*dzcom
+                                max_dcomsq = MAX(max_dcomsq,dcomsq)
+                        END DO
+                        rcom_arr(im,4) = SQRT(max_dcomsq)
+                END DO
+                !$OMP END SIMD
+                IF (l_ortho) THEN
+                        DO i_dim = 1, 3
+                                boxlen = box_list(ibox)%length(i_dim,i_dim)
+                                inv_l = 1.0_DP/boxlen
+                                !DIR$ VECTOR ALIGNED
+                                !$OMP SIMD PRIVATE(rcom,mwv,irp)
+                                DO im = 1, chunksize
+                                        rcom = rcom_arr(im,i_dim)
+                                        mwv = boxlen*ANINT(rcom*inv_l)
+                                        !molwrapvec(im,i_dim) = mwv
+                                        rcom_arr(im,i_dim) = rcom - mwv
+                                        DO ia = 1, natoms(is)
+                                                irp = frame_xyz_rs(im,ia,i_dim)
+                                                irp = irp - mwv
+                                                frame_xyz_rs(im,ia,i_dim) = irp
+                                        END DO
+                                END DO
+                                !$OMP END SIMD
+                        END DO
+                ELSE
+                        !DIR$ VECTOR ALIGNED
+                        !$OMP SIMD PRIVATE(rxcom,rycom,rzcom)
+                        DO im = 1, chunksize
+                                rxcom = rcom_arr(im,1)
+                                rycom = rcom_arr(im,2)
+                                rzcom = rcom_arr(im,3)
+                                molwrapvec(im,1) = rxcom*inv_h11 + rycom*inv_h12 + rzcom*inv_h13
+                                molwrapvec(im,2) =                 rycom*inv_h22 + rzcom*inv_h23
+                                molwrapvec(im,3) =                                 rzcom*inv_h33
+                        END DO
+                        !$OMP END SIMD
+                        !DIR$ VECTOR ALIGNED
+                        !$OMP SIMD PRIVATE(mwv)
+                        DO im = 1, chunksize
+                                mwv = molwrapvec(im,1)
+                                molwrapvec(im,1) = ANINT(mwv)
+                                mwv = molwrapvec(im,2)
+                                molwrapvec(im,2) = ANINT(mwv)
+                                mwv = molwrapvec(im,3)
+                                molwrapvec(im,3) = ANINT(mwv)
+                        END DO
+                        !$OMP END SIMD
+                        !molwrapvec(1:nmols_to_read(is,ibox),:) = ANINT(molwrapvec(1:nmols_to_read(is,ibox),:))
+                        !DIR$ VECTOR ALIGNED
+                        !$OMP SIMD PRIVATE(sxmwv,symwv,szmwv)
+                        DO im = 1, chunksize
+                                sxmwv = molwrapvec(im,1)
+                                symwv = molwrapvec(im,2)
+                                szmwv = molwrapvec(im,3)
+                                molwrapvec(im,1) = sxmwv*h11 + symwv*h12 + szmwv*h13
+                                molwrapvec(im,2) =             symwv*h22 + szmwv*h23
+                                molwrapvec(im,3) =                         szmwv*h33
+                        END DO
+                        !$OMP END SIMD
+                        DO i_dim = 1, 3
+                                !DIR$ VECTOR ALIGNED
+                                !$OMP SIMD PRIVATE(rcom,mwv,irp)
+                                DO im = 1, chunksize
+                                        rcom = rcom_arr(im,i_dim)
+                                        mwv = molwrapvec(im,i_dim)
+                                        rcom = rcom - mwv
+                                        rcom_arr(im,i_dim) = rcom
+                                        DO ia = 1, natoms(is)
+                                                irp = frame_xyz_rs(im,ia,i_dim)
+                                                irp = irp - mwv
+                                                frame_xyz_rs(im,ia,i_dim) = irp
+                                        END DO
+                                END DO
+                                !$OMP END SIMD
+                        END DO
+                END IF
+                locate_base = SUM(nmols(is,1:nbr_boxes)) + chunkshift
+                DO im = 1, chunkend
+                        loc = im + locate_base
+                        molecule_list(loc,is)%live = .TRUE.
+                        molecule_list(loc,is)%frac = 1.0_DP
+                        molecule_list(loc,is)%which_box = ibox
+                        molecule_list(loc,is)%rcom = rcom_arr(im,:)
+                        !atom_list(1:natoms(is),im,is)%exist = .TRUE.
+                        DO ia = 1, na
+                                atom_list(ia,loc,is)%exist = .TRUE.
+                                atom_list(ia,loc,is)%rp(1:3) = frame_xyz_rs(im,ia,1:3)
+                        END DO
+                END DO
+        END SUBROUTINE Set_Species_Frame_Coords
+
+        FUNCTION Read_xyz_frame()
+                REAL(DP), DIMENSION(natoms_to_read(ibox),3) :: Read_xyz_frame
+                INTEGER :: this_unit, io, i
+                CHARACTER(6) :: this_element
+
+                this_unit = pregen_xyz_unit(ibox)
+
+                READ(this_unit,*,IOSTAT=io)
+                IF (io < 0) THEN
+                        early_end = .TRUE.
+                        RETURN
+                END IF
+                READ(this_unit,*)
+                DO i = 1, natoms_to_read(ibox)
+                        READ(this_unit,*) this_element, &
+                                Read_xyz_frame(i,1), &
+                                Read_xyz_frame(i,2), &
+                                Read_xyz_frame(i,3)
+                END DO
+                
+        END FUNCTION Read_xyz_frame
+
+
+
+END MODULE Trajectory_Reader_Routines
diff --git a/Src/type_definitions.f90 b/Src/type_definitions.f90
index e4770d5b..dae67c6e 100755
--- a/Src/type_definitions.f90
+++ b/Src/type_definitions.f90
@@ -56,6 +56,12 @@ MODULE Type_Definitions
   INTEGER, PARAMETER :: SP = REAL32
   INTEGER, PARAMETER :: DP = REAL64
 
+  ! Define REAL type infinity using hexadecimal literal constants
+  REAL(REAL32), PARAMETER :: infinity_sp = REAL(Z'7F800000',REAL32)
+  REAL(REAL64), PARAMETER :: infinity_dp = REAL(Z'7FF0000000000000',REAL64)
+  !REAL(REAL32), PARAMETER :: infinity_sp = Z'7F800000'
+  !REAL(REAL64), PARAMETER :: infinity_dp = Z'7FF0000000000000'
+
   ! Specify the limits on the parameters for various intramolecular function classes
   INTEGER, PARAMETER :: max_bond_params = 5
   INTEGER, PARAMETER :: max_angle_params = 5
@@ -69,6 +75,61 @@ MODULE Type_Definitions
   ! Place a limit on the number of angle evaluations for probability calculations
   INTEGER, PARAMETER :: nregions = 1000
 
+
+  REAL(DP), PARAMETER :: PI=3.1415926536_DP
+  REAL(DP), PARAMETER :: twoPI = 6.2831853072_DP
+  REAL(DP), PARAMETER :: rootPI = 1.7724538509_DP
+  REAL(DP), PARAMETER :: halfPI = 0.5_DP*PI
+  REAL(DP), PARAMETER :: threehalfPI_DP = 3.0_DP*halfPI
+  REAL(SP), PARAMETER :: twoPI_SP = REAL(twoPI,SP)
+  REAL(SP), PARAMETER :: PI_SP = REAL(PI,SP)
+
+  LOGICAL, PARAMETER :: array8byte = INDEX(COMPILER_OPTIONS(),"array8byte") .NE. 0 .OR. &
+          INDEX(COMPILER_OPTIONS(),"-mmmx") .NE. 0
+  LOGICAL, PARAMETER :: array16byte = INDEX(COMPILER_OPTIONS(),"array16byte") .NE. 0 .OR. &
+          INDEX(COMPILER_OPTIONS(),"-msse") .NE. 0
+  LOGICAL, PARAMETER :: array32byte = INDEX(COMPILER_OPTIONS(),"array32byte") .NE. 0 .OR. &
+          INDEX(COMPILER_OPTIONS(),"-mavx") .NE. 0
+  LOGICAL, PARAMETER :: array64byte = INDEX(COMPILER_OPTIONS(),"array64byte") .NE. 0 .OR. &
+          INDEX(COMPILER_OPTIONS(),"-mavx512") .NE. 0
+  LOGICAL, PARAMETER :: array128byte = INDEX(COMPILER_OPTIONS(),"array128byte") .NE. 0
+  LOGICAL, PARAMETER :: array256byte = INDEX(COMPILER_OPTIONS(),"array256byte") .NE. 0
+  !INTEGER, DIMENSION(6), PARAMETER :: align_byte_options = (/ 8, 16, 32, 64, 128, 256 /)
+  INTEGER, DIMENSION(6), PARAMETER :: log2align_byte_options = (/ 3, 4, 5, 6, 7, 8 /)
+  !INTEGER, DIMENSION(6), PARAMETER :: align_byte_options = SHIFTL(1,log2align_byte_options)
+  LOGICAL, DIMENSION(6), PARAMETER :: array_n_byte_vec = (/ &
+          array8byte, array16byte, array32byte, array64byte, array128byte, array256byte /)
+  !INTEGER, PARAMETER :: array_align_bytes = MAXVAL(PACK(align_byte_options,array_n_byte_vec,(/1,1,1,1,1,1/)))
+  INTEGER, PARAMETER :: log2array_align_bytes = MAXVAL(MERGE(log2align_byte_options,(/0,0,0,0,0,0/),array_n_byte_vec))
+  INTEGER, PARAMETER :: array_align_bytes = SHIFTL(1,log2array_align_bytes)
+  INTEGER, PARAMETER :: log2dimpad_1byte = log2array_align_bytes
+  INTEGER, PARAMETER :: log2dimpad_2byte = MAX(0,log2array_align_bytes-1)
+  INTEGER, PARAMETER :: log2dimpad_4byte = MAX(0,log2array_align_bytes-2)
+  INTEGER, PARAMETER :: log2dimpad_8byte = MAX(0,log2array_align_bytes-3)
+  INTEGER, PARAMETER :: log2dimpad_16byte = MAX(0,log2array_align_bytes-4)
+  INTEGER, PARAMETER :: log2dimpad_32byte = MAX(0,log2array_align_bytes-5)
+
+  INTEGER, PARAMETER :: dimpad_1byte = array_align_bytes
+  INTEGER, PARAMETER :: dimpad_2byte = SHIFTL(1,log2dimpad_2byte)
+  INTEGER, PARAMETER :: dimpad_4byte = SHIFTL(1,log2dimpad_4byte)
+  INTEGER, PARAMETER :: dimpad_8byte = SHIFTL(1,log2dimpad_8byte)
+  INTEGER, PARAMETER :: dimpad_16byte = SHIFTL(1,log2dimpad_16byte)
+  INTEGER, PARAMETER :: dimpad_32byte = SHIFTL(1,log2dimpad_32byte)
+
+  INTEGER, PARAMETER :: padconst_1byte = dimpad_1byte-1
+  INTEGER, PARAMETER :: padconst_2byte = dimpad_2byte-1
+  INTEGER, PARAMETER :: padconst_4byte = dimpad_4byte-1
+  INTEGER, PARAMETER :: padconst_8byte = dimpad_8byte-1
+  INTEGER, PARAMETER :: padconst_16byte = dimpad_16byte-1
+  INTEGER, PARAMETER :: padconst_32byte = dimpad_32byte-1
+
+  INTEGER, PARAMETER :: padmask_1byte = NOT(padconst_1byte)
+  INTEGER, PARAMETER :: padmask_2byte = NOT(padconst_2byte)
+  INTEGER, PARAMETER :: padmask_4byte = NOT(padconst_4byte)
+  INTEGER, PARAMETER :: padmask_8byte = NOT(padconst_8byte)
+  INTEGER, PARAMETER :: padmask_16byte = NOT(padconst_16byte)
+  INTEGER, PARAMETER :: padmask_32byte = NOT(padconst_32byte)
+
   ! Define some classes to hold variables associated with different objects
   ! in the simulation. These will be converted to lists in global_variables for speed.
 
@@ -110,7 +171,7 @@ MODULE Type_Definitions
      LOGICAL :: linear
      ! NR: Adding to have an option not to include
      ! Coul interaction during biased growth
-     LOGICAL :: L_Coul_CBMC
+     LOGICAL :: L_Coul_CBMC = .TRUE.
      ! N.B. natoms, max_molecules, etc. are in a separate arrays
      ! NR: for insertion style
      LOGICAL :: lcom
@@ -121,13 +182,34 @@ MODULE Type_Definitions
      INTEGER (KIND=INT64), DIMENSION(:), ALLOCATABLE :: insertions_in_step, widom_interval
      REAL(DP), DIMENSION(:), ALLOCATABLE :: widom_sum
 
-     ! # of RB dihedrals, # of dihedrals with nonzero energy, # of dihedrals before stacked dihedrals were combined
-     INTEGER :: ndihedrals_rb, ndihedrals_energetic, ndihedrals_uncombined
      !! Is this a solute?  Is this a solvent species?
      LOGICAL :: l_solute, l_solvent, l_wsolute
+
      !!Atompair_nrg_table index bases
      INTEGER :: solute_base, solvent_base, wsolute_base
 
+     ! Pair energy array index base
+     INTEGER :: superlocate_base
+
+     ! # of RB dihedrals, # of dihedrals with nonzero energy, # of dihedrals before stacked dihedrals were combined
+     INTEGER :: ndihedrals_rb, ndihedrals_energetic, ndihedrals_uncombined
+
+     ! CBMC biasing info
+     INTEGER :: kappa_ins = 0, kappa_ins_pad8, kappa_ins_pad32
+     INTEGER :: kappa_dih = 0, kappa_dih_pad8, kappa_dih_pad32
+     INTEGER :: kappa_rot = 0
+     INTEGER :: nfragments
+     REAL(DP) :: theta_step, log_kappa_ins, log_kappa_rot, ln_pbias_dih_const
+     REAL(SP) :: theta_step_sp
+     LOGICAL :: need_kappa_ins = .FALSE., need_kappa_dih = .FALSE.
+
+     REAL(DP), DIMENSION(:,:), ALLOCATABLE :: sincos_lintheta_dp
+     REAL(SP), DIMENSION(:,:), ALLOCATABLE :: sincos_lintheta_sp
+
+     CONTAINS
+             PROCEDURE :: setup_CBMC_kappas => Setup_Species_kappas
+             PROCEDURE :: write_CBMC_kappas => Write_Species_kappas
+
   END TYPE Species_Class
   !****************************************************************************
 
@@ -139,6 +221,8 @@ MODULE Type_Definitions
 
      ! The molecule list will have dimensions (max_molecules,nspecies)
 
+     REAL(DP), DIMENSION(4) :: rcom, rcom_old
+
      ! What kind of molecule is this? normal, fractional, fixed, etc.
      ! Note that the following integers will be defined for the type
      ! int_normal = 0
@@ -148,22 +232,27 @@ MODULE Type_Definitions
      INTEGER :: rx_num
 
      ! for open system and multi-box simulations (GEMC, parallel tempering)
-     LOGICAL :: live
-     LOGICAL :: inside
      INTEGER :: which_box
+     LOGICAL :: live
+
+     REAL(DP) :: unused_dummy1, unused_dummy2 ! added for padding derived type
 
      ! also include com information for each of the molecules.
      ! com and euler angles refer to the x,y and z com coordinates
      ! and 1, 2 and 3 euler angles of the molecule. The suffix
      ! old denotes old coordinates.
      ! frac is the fractional scaling parameter for the molecule
-     REAL(DP)  :: xcom, ycom, zcom, euler1, euler2, euler3
-     REAL(DP)  :: xcom_old, ycom_old, zcom_old, euler1_old,euler2_old,euler3_old
+     ! xcom, ycom, and zcom are now rcom(1), rcom(2), and rcom(3), respectively
+     ! likewise for with the "_old" suffix
+     REAL(DP)  :: euler1, euler2, euler3
+     REAL(DP)  :: euler1_old,euler2_old,euler3_old
      REAL(DP)  :: frac
      ! This variable records the maximum distance of any psuedo atom from its
      ! COM. This is used to speed up energy calculations.
 
-     REAL(DP) :: max_dcom, max_dcom_old, min_dcom
+     ! max_dcom is now rcom(4) and max_dcom_old is now rcom_old(4)
+
+     REAL(DP) :: min_dcom
 
 
 
@@ -210,15 +299,26 @@ MODULE Type_Definitions
 
      ! atom_list has dimensions (natoms, max_molecules, nspecies)
 
-     REAL(DP) :: rxp, ryp, rzp
+     REAL(DP), DIMENSION(3) :: rp, rp_old
+
+     !REAL(DP) :: rxp, ryp, rzp
      REAL(DP) :: rxp_nls, ryp_nls, rzp_nls  ! The starting positions for the neighbor list
-     REAL(DP) :: rxp_old, ryp_old, rzp_old
+     !REAL(DP) :: rxp_old, ryp_old, rzp_old
      INTEGER :: ci(3), ci_cbmc(3), ci_full(3) ! the integer coordinates of the cell containing this atom
      LOGICAL :: exist
 
   END TYPE Atom_Class
   !****************************************************************************
 
+  TYPE Atom256
+          REAL(DP) :: rxp, ryp, rzp
+          LOGICAL(8) :: exist
+  END TYPE Atom256
+
+  TYPE VdW256
+          REAL(DP) :: p1, p2, p3, p4
+  END TYPE VdW256
+
 
 
   !****************************************************************************
@@ -293,10 +393,15 @@ MODULE Type_Definitions
      ! RB torsion series constants
      REAL(DP) :: rb_c(0:5)
      REAL(DP), DIMENSION(max_dihedral_params) :: dihedral_param
+     REAL(SP) :: rb_c_sp(0:5)
+     REAL(SP), DIMENSION(max_dihedral_params) :: dihedral_param_sp
      CHARACTER(20) :: dihedral_potential_type
      INTEGER :: int_dipot_type
      ! Flag to tell whether dihedral is formatted as RB torsion
      LOGICAL :: l_rb_formatted
+     CONTAINS
+             PROCEDURE :: SP_Convert => Convert_Dihedral_DP_to_SP
+             PROCEDURE :: Init => Initialize_Dihedral_Class
 
   END TYPE Dihedral_Class
   !****************************************************************************
@@ -432,16 +537,50 @@ MODULE Type_Definitions
     CHARACTER(20) :: box_shape
     INTEGER :: int_box_shape
     REAL(DP), DIMENSION(3,3) :: length, length_inv, max_delta, hlength
-    REAL(DP), DIMENSION(3) :: basis_length, cos_angle, angle, face_distance
+    REAL(DP), DIMENSION(3) :: basis_length, cos_angle, angle, face_distance, invT_face_distance
     REAL(DP) :: volume, dv_max
+    REAL(DP), DIMENSION(3,3) :: basis_converter, orig_length_inv, orig_length
+    LOGICAL :: basis_changed
+
+    REAL(SP), DIMENSION(3) :: sp_diag_length, cell_length_recip, real_length_cells
+    REAL(SP), DIMENSION(3) :: cell_H_diag
+    REAL(DP), DIMENSION(3) :: cell_face_distance, cell_xyzortho_bbox_length
+    REAL(DP), DIMENSION(3,3) :: cell_length_inv, cell_H_dp
+    REAL(SP), DIMENSION(3,3) :: cell_H_sp
+    INTEGER, DIMENSION(3) :: length_cells, sectorbound
 
   ! Inner shape is used to define a limited region into which molecules can be
   ! inserted
     INTEGER :: int_inner_shape
     REAL(DP) :: inner_volume, inner_radius, inner_radius2, inner_zmax, inner_zmin
 
+    INTEGER :: border_thickness(3)
+    INTEGER, DIMENSION(:,:,:), ALLOCATABLE :: cbmc_cell_mask
+
+    INTEGER, DIMENSION(3) :: length_bitcells, setbit_extent
+    REAL(SP), DIMENSION(3) :: bit_cell_length_recip, real_length_bitcells
+    LOGICAL :: l_cavloc_int32
+    INTEGER(4), DIMENSION(:), ALLOCATABLE :: bitcell_int32_vec
+    INTEGER, DIMENSION(2:3) :: bitcell_dimfactor
+    REAL(DP) :: ideal_bitcell_length, rcut_low_max
+    REAL(DP), DIMENSION(3) :: bitcell_face_distance, bitcell_face_distance_recip
+
+    ! Ewald reciprocal space (kspace) lattice vectors and related data
+    ! 5 columns: first 3 are hx,hy,hz; 4th is Cn, 5th is a factor used for pressure calculation
+    REAL(DP), DIMENSION(:,:), ALLOCATABLE :: kspace_vectors
+    REAL(DP), DIMENSION(:,:), ALLOCATABLE :: sincos_sum, sincos_sum_old
+    INTEGER, DIMENSION(:), ALLOCATABLE :: kspace_vector_ints
+    INTEGER :: kxyz_max(3), kxyz_maxmax
+
  END TYPE Box_Class
 
+ TYPE Cavity_Data_Class
+         INTEGER(INT64), DIMENSION(:), ALLOCATABLE :: cavity_locs
+         INTEGER(INT32), DIMENSION(:), ALLOCATABLE :: cavity_locs_int32
+         INTEGER(INT64) :: ncavs, ncavs_fine, ncavs_coarse, ncavs_combined
+         REAL(DP) :: ncavs_dp, ln_cavfrac
+ END TYPE Cavity_Data_Class
+
   !****************************************************************************
 
  TYPE Angle_Probability_Class
@@ -558,6 +697,7 @@ MODULE Type_Definitions
     LOGICAL:: ring
     REAL(DP)::rcut_vdwsq, rcut_coulsq,alpha_ewald
     REAL(DP) :: prob_ins, cum_prob_ins
+    INTEGER :: i_big_atom = 0, ia_frag_big_atom=1
 
  END TYPE Frag_Class
 !-------------------------------------------------------------------------------------------------
@@ -577,7 +717,7 @@ MODULE Type_Definitions
 !-------------------------------------------------------------------------------------------------
 
  TYPE Library_Coords_Class
-    REAL(DP) :: rxp, ryp, rzp
+    REAL(DP) :: rp(3)
  END TYPE Library_Coords_Class
 
 !-------------------------------------------------------------------------------------------------
@@ -623,4 +763,76 @@ MODULE Type_Definitions
      REAL(DP) :: exp_dE_ratio
   END TYPE Rotation_Class
 
+  PRIVATE Convert_Dihedral_DP_to_SP, Initialize_Dihedral_Class, Setup_Species_Kappas, Write_Species_Kappas
+
+  CONTAINS
+          ELEMENTAL SUBROUTINE Convert_Dihedral_DP_to_SP(this)
+                  CLASS(Dihedral_Class), INTENT(INOUT) :: this
+                  this%rb_c_sp = REAL(this%rb_c,SP)
+                  this%dihedral_param_sp = REAL(this%dihedral_param,SP)
+          END SUBROUTINE Convert_Dihedral_DP_to_SP
+          ELEMENTAL SUBROUTINE Initialize_Dihedral_Class(this)
+                  CLASS(Dihedral_Class), INTENT(INOUT) :: this
+                  this%atom = 0
+                  this%rb_c = 0.0_DP
+                  this%dihedral_param = 0.0_DP
+                  this%rb_c_sp = 0.0 ! Maybe this should be initalized to NaN, so there's a clear error if it wasn't converted first
+                  this%dihedral_param_sp = 0.0
+                  this%dihedral_potential_type = ""
+                  this%int_dipot_type = 0
+                  this%l_rb_formatted = .FALSE.
+          END SUBROUTINE Initialize_Dihedral_Class
+          ELEMENTAL SUBROUTINE Setup_Species_Kappas(this)
+                  CLASS(Species_Class), INTENT(INOUT) :: this
+                  INTEGER :: i
+                  REAL(DP) :: theta
+                  this%kappa_ins_pad8 = IAND(this%kappa_ins+padconst_4byte,padmask_4byte)
+                  this%kappa_ins_pad32 = IAND(this%kappa_ins+padconst_1byte,padmask_1byte)
+                  this%kappa_dih_pad8 = IAND(this%kappa_dih+padconst_4byte,padmask_4byte)
+                  this%kappa_dih_pad32 = IAND(this%kappa_dih+padconst_1byte,padmask_1byte)
+                  IF (this%need_kappa_ins) this%log_kappa_ins = LOG(REAL(this%kappa_ins,DP))
+                  IF (this%kappa_rot > 0) this%log_kappa_rot = LOG(REAL(this%kappa_rot,DP))
+                  IF (this%need_kappa_dih) THEN
+                          this%ln_pbias_dih_const = REAL(this%nfragments-1,DP)*LOG(REAL(this%kappa_dih,DP))
+                          IF (ALLOCATED(this%sincos_lintheta_dp)) DEALLOCATE(this%sincos_lintheta_dp)
+                          IF (ALLOCATED(this%sincos_lintheta_sp)) DEALLOCATE(this%sincos_lintheta_sp)
+                          ALLOCATE(this%sincos_lintheta_dp(this%kappa_dih_pad8,2))
+                          ALLOCATE(this%sincos_lintheta_sp(this%kappa_dih_pad8,2))
+                          this%theta_step = twoPI/this%kappa_dih
+                          this%theta_step_sp = REAL(this%theta_step,SP)
+                          !DIR$ VECTOR ALIGNED
+                          DO i = 0, this%kappa_dih_pad8-1
+                                theta = i*this%theta_step
+                                this%sincos_lintheta_dp(i+1,1) = SIN(theta)
+                                this%sincos_lintheta_dp(i+1,2) = COS(theta)
+                          END DO
+                          this%sincos_lintheta_sp = REAL(this%sincos_lintheta_dp,SP)
+                  END IF
+          END SUBROUTINE Setup_Species_Kappas
+          SUBROUTINE Write_Species_Kappas(this,outputunit)
+                  CLASS(Species_Class), INTENT(INOUT) :: this
+                  INTEGER, INTENT(IN) :: outputunit
+                  IF (this%need_kappa_ins) THEN
+                          WRITE(outputunit,'(X,A,T35,I12)') 'Kappa for first fragment insertion ', this%kappa_ins
+                  END IF
+                  IF (this%need_kappa_dih) THEN
+                          WRITE(outputunit,'(X,A,T35,I12)') 'Kappa for dihedral selection ', this%kappa_dih
+                  END IF
+          END SUBROUTINE Write_Species_Kappas
+
+
+        PURE FUNCTION Cross_Product(a,b) RESULT(c)
+                REAL(DP), DIMENSION(3) :: c
+                REAL(DP), DIMENSION(3), INTENT(IN) :: a, b
+                CALL Elemental_Cross_Product(a(1),a(2),a(3),b(1),b(2),b(3),c(1),c(2),c(3))
+        END FUNCTION Cross_Product
+
+        ELEMENTAL SUBROUTINE Elemental_Cross_Product(a1,a2,a3,b1,b2,b3,c1,c2,c3)
+                REAL(DP), INTENT(IN) :: a1,a2,a3,b1,b2,b3
+                REAL(DP), INTENT(OUT) :: c1,c2,c3
+                c1 = a2*b3 - a3*b2
+                c2 = a3*b1 - a1*b3
+                c3 = a1*b2 - a2*b1
+        END SUBROUTINE Elemental_Cross_Product
+
 END MODULE Type_Definitions
diff --git a/Src/volume.f90 b/Src/volume.f90
index c5cfbb01..4d2da91b 100755
--- a/Src/volume.f90
+++ b/Src/volume.f90
@@ -115,21 +115,23 @@ End SUBROUTINE Reset_Cartesian_Coordinates_Box
 
 !****************************************************************************
 
-  SUBROUTINE Scale_COM_Cartesian(this_box,box_list_old)
+  SUBROUTINE Scale_COM_Cartesian(this_box,length_inv_old)
 
     ! Scales the cartesian coordinates and COM of all the molecules
     ! in the input box such that intramolecular DOFs do not change
     ! This is achieved by keeping the fractional coordinates of the COM
-    ! the same before and after the move. The old cell basis vector is
-    ! used for this purpose
+    ! the same before and after the move.
 
     INTEGER, INTENT(IN) :: this_box
-    TYPE(Box_Class), INTENT(IN) :: box_list_old
+    REAL(DP), INTENT(IN) :: length_inv_old(3,3)
 
     ! Local variables
 
     INTEGER :: is, im, lm, i
     REAL(DP) :: s(3)
+    REAL(DP) :: scaling_matrix(3,3)
+
+    scaling_matrix = MATMUL(box_list(this_box)%length,length_inv_old)
 
 
 
@@ -143,41 +145,49 @@ SUBROUTINE Scale_COM_Cartesian(this_box,box_list_old)
              
              ! obtain the new coordinates of the COM for this molecule
              
-             ! first determine the fractional coordinate
-             
-             DO i = 1,3
-                
-                s(i) = box_list_old%length_inv(i,1) * molecule_list(lm,is)%xcom + &
-                     box_list_old%length_inv(i,2) * molecule_list(lm,is)%ycom + &
-                     box_list_old%length_inv(i,3) * molecule_list(lm,is)%zcom
-             END DO
+             !! first determine the fractional coordinate
+             !
+             !DO i = 1,3
+             !   
+             !   s(i) = box_list_old%length_inv(i,1) * molecule_list(lm,is)%xcom + &
+             !        box_list_old%length_inv(i,2) * molecule_list(lm,is)%ycom + &
+             !        box_list_old%length_inv(i,3) * molecule_list(lm,is)%zcom
+             !END DO
+             ! use scaling matrix to convert directly between real coordinates
+             s = MATMUL(scaling_matrix,molecule_list(lm,is)%rcom(1:3))
+             !DO i = 1,3
+             !   
+             !   s(i) = scaling_matrix(i,1) * molecule_list(lm,is)%rcom(1) + &
+             !        scaling_matrix(i,2) * molecule_list(lm,is)%rcom(2) + &
+             !        scaling_matrix(i,3) * molecule_list(lm,is)%rcom(3)
+             !END DO
              
              
              ! now obtain the new positions of COMs
              
              
-             molecule_list(lm,is)%xcom = box_list(this_box)%length(1,1) * s(1) &
-                  + box_list(this_box)%length(1,2) * s(2) + &
-                    box_list(this_box)%length(1,3) * s(3)
-             
-             molecule_list(lm,is)%ycom = box_list(this_box)%length(2,1) * s(1) &
-                  + box_list(this_box)%length(2,2) * s(2) + &
-                    box_list(this_box)%length(2,3) * s(3)
-             
-             molecule_list(lm,is)%zcom = box_list(this_box)%length(3,1) * s(1) &
-                  + box_list(this_box)%length(3,2) * s(2) + &
-                    box_list(this_box)%length(3,3) * s(3)
-             
+             !molecule_list(lm,is)%xcom = box_list(this_box)%length(1,1) * s(1) &
+             !     + box_list(this_box)%length(1,2) * s(2) + &
+             !       box_list(this_box)%length(1,3) * s(3)
+             !
+             !molecule_list(lm,is)%ycom = box_list(this_box)%length(2,1) * s(1) &
+             !     + box_list(this_box)%length(2,2) * s(2) + &
+             !       box_list(this_box)%length(2,3) * s(3)
+             !
+             !molecule_list(lm,is)%zcom = box_list(this_box)%length(3,1) * s(1) &
+             !     + box_list(this_box)%length(3,2) * s(2) + &
+             !       box_list(this_box)%length(3,3) * s(3)
+             molecule_list(lm,is)%rcom(1:3) = s
              ! Obtain the new positions of atoms in this molecule
              
-             atom_list(:,lm,is)%rxp = atom_list(:,lm,is)%rxp + &
-                  molecule_list(lm,is)%xcom - molecule_list(lm,is)%xcom_old
+             atom_list(:,lm,is)%rp(1) = atom_list(:,lm,is)%rp(1) + &
+                  molecule_list(lm,is)%rcom(1) - molecule_list(lm,is)%rcom_old(1)
              
-             atom_list(:,lm,is)%ryp = atom_list(:,lm,is)%ryp + &
-                  molecule_list(lm,is)%ycom - molecule_list(lm,is)%ycom_old
+             atom_list(:,lm,is)%rp(2) = atom_list(:,lm,is)%rp(2) + &
+                  molecule_list(lm,is)%rcom(2) - molecule_list(lm,is)%rcom_old(2)
              
-             atom_list(:,lm,is)%rzp = atom_list(:,lm,is)%rzp + &
-                  molecule_list(lm,is)%zcom - molecule_list(lm,is)%zcom_old
+             atom_list(:,lm,is)%rp(3) = atom_list(:,lm,is)%rp(3) + &
+                  molecule_list(lm,is)%rcom(3) - molecule_list(lm,is)%rcom_old(3)
              
           END IF
           
diff --git a/Src/widom_insert.f90 b/Src/widom_insert.f90
index b008e55a..ee1884a0 100755
--- a/Src/widom_insert.f90
+++ b/Src/widom_insert.f90
@@ -64,9 +64,9 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
 
   REAL(DP) :: dx, dy, dz
   REAL(DP) :: dE, dE_intra, dE_inter, dE_frag
-  REAL(DP) :: E_bond, E_angle, E_dihedral, E_improper
+  REAL(DP) :: E_bond, E_dihedral, E_improper
   REAL(DP) :: E_intra_vdw, E_intra_qq
-  REAL(DP) :: E_inter, E_periodic_qq
+  REAL(DP) :: E_inter, E_interfrag, E_periodic_qq, E_angle
   REAL(DP) :: E_reciprocal, E_self, E_lrc
   REAL(DP) :: E_ring_frag
   REAL(DP) :: ln_pacc, ln_pseq, ln_pbias, this_lambda
@@ -84,14 +84,14 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
   INTEGER(KIND=INT64), DIMENSION(rsqmin_res_d,solvent_maxind_d,natoms(is)), TARGET :: frame_rsqmin_atompair_freq
   REAL(DP), DIMENSION(:,:,:), POINTER :: rsqmin_atompair_w_max_ptr, rsqmin_atompair_w_sum_ptr
   INTEGER(KIND=INT64), DIMENSION(:,:,:), POINTER :: rsqmin_atompair_freq_ptr
-  INTEGER :: bsolute, rsq_ind, ia, ti_solvent
+  INTEGER :: bsolute, rsq_ind(solvent_maxind,natoms(is)), ia, ti_solvent
 
   REAL(DP) :: widom_prefactor, widom_var_exp, widom_sum
   REAL(DP) :: E_recip_in, lrc_diff, E_inter_constant
   REAL(DP) :: subinterval_sums(MAX(n_widom_subgroups(is,ibox),1))
   REAL(DP) :: t_cpu_e, t_cpu_s
   INTEGER :: n_subintervals
-!widom_timing  REAL(DP) :: noncbmc_time_e, noncbmc_time_s, noncbmc_time
+  REAL(DP) :: noncbmc_time_e, noncbmc_time_s, noncbmc_time
   LOGICAL :: write_wprp2
   LOGICAL :: omp_flag
 
@@ -119,7 +119,7 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
   !$ omp_flag = .TRUE.
   !$ nbr_threads = omp_get_max_threads()
 
-!widom_timing  noncbmc_time = 0.0_DP
+  noncbmc_time = 0.0_DP
 
 
   t_cpu = 0.0_DP
@@ -167,6 +167,7 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
 
 
 
+
   IF (n_widom_subgroups(is,ibox) > 0) THEN
           n_subintervals = n_widom_subgroups(is,ibox)
           write_wprp2 = .TRUE.
@@ -224,7 +225,7 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
 
      DO i = 1, natoms(is)
         i_type = nonbond_list(i,is)%atom_type_number
-        nint_beads(i_type,ibox) = nint_beads(i_type,ibox) + 1
+        IF (i_type > 0) nint_beads(i_type,ibox) = nint_beads(i_type,ibox) + 1
      END DO
 
      CALL Compute_LR_correction(ibox,E_lrc)
@@ -251,33 +252,31 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
   frame_rsqmin_atompair_w_sum = 0.0_DP
   frame_rsqmin_atompair_w_max = 0.0_DP
   frame_rsqmin_atompair_freq = 0_INT64
+  widom_sum = 0.0_DP
+  n_overlaps = 0_INT64
+  subinterval_sums = 0.0_DP
+  t_cpu = 0.0_DP
+  IF (est_emax) THEN
+          frame_w_max = 0.0_DP
+          frame_Eij_w_sum = 0.0_DP
+          Eij_freq = 0
+          thread_Eij_factor = Eij_factor_gcopy
+  END IF
 
   !$OMP PARALLEL DEFAULT(SHARED) &
   !$OMP PRIVATE(ln_pseq, ln_pbias, E_ring_frag, inter_overlap, cbmc_overlap, intra_overlap, i_interval) &
-  !$OMP PRIVATE(widom_var_exp, E_periodic_qq, E_intra_qq, E_intra_vdw, E_inter, dE_frag, dE) &
-  !$OMP PRIVATE(E_bond, E_angle, E_dihedral, E_improper, dE_intra, dE_inter, E_reciprocal, frag_order) &
+  !$OMP PRIVATE(widom_var_exp, E_interfrag, E_intra_qq, E_intra_vdw, E_inter, dE_frag, dE) &
+  !$OMP PRIVATE(E_bond, E_dihedral, E_improper, dE_intra, dE_inter, E_reciprocal, frag_order) &
   !$OMP PRIVATE(t_cpu_s, t_cpu_e, thread_changefactor, Eij_ind, rsq_ind, ia, ti_solvent) &
   !$OMP PRIVATE(frame_rsqmin_atompair_w_sum_ptr,frame_rsqmin_atompair_w_max_ptr,frame_rsqmin_atompair_freq_ptr) &
-  !$OMP REDUCTION(+:widom_sum,n_overlaps,subinterval_sums,t_cpu,Eij_freq,frame_Eij_w_sum) &
-  !$OMP REDUCTION(+:frame_rsqmin_atompair_w_sum,frame_rsqmin_atompair_freq) &
-  !$OMP REDUCTION(MAX:frame_w_max,thread_Eij_factor,frame_rsqmin_atompair_w_max)
-  frame_rsqmin_atompair_w_sum = 0.0_DP
-  frame_rsqmin_atompair_w_max = 0.0_DP
-  frame_rsqmin_atompair_freq = 0_INT64
+  !$OMP PRIVATE(noncbmc_time_e,noncbmc_time_s,E_periodic_qq,E_angle) &
+  !$OMP REDUCTION(+:Eij_freq,frame_Eij_w_sum) &
+  !$OMP REDUCTION(MAX:frame_w_max,thread_Eij_factor) &
+  !$OMP REDUCTION(+:t_cpu) &
+  !$OMP REDUCTION(+:noncbmc_time)
 
   ithread = 1
   !$ ithread = omp_get_thread_num() + 1
-  IF (est_atompair_rminsq) THEN
-          IF (l_heap) THEN
-                  frame_rsqmin_atompair_w_sum_ptr => frame_rsqmin_atompair_w_sum_tgt(:,:,:,ithread)
-                  frame_rsqmin_atompair_w_max_ptr => frame_rsqmin_atompair_w_max_tgt(:,:,:,ithread)
-                  frame_rsqmin_atompair_freq_ptr => frame_rsqmin_atompair_freq_tgt(:,:,:,ithread)
-          ELSE
-                  frame_rsqmin_atompair_w_sum_ptr => frame_rsqmin_atompair_w_sum
-                  frame_rsqmin_atompair_w_max_ptr => frame_rsqmin_atompair_w_max
-                  frame_rsqmin_atompair_freq_ptr => frame_rsqmin_atompair_freq
-          END IF
-  END IF
 
   IF (ALLOCATED(widom_atoms)) THEN
           DEALLOCATE(widom_atoms, STAT=DeallocateStatus)
@@ -305,10 +304,15 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
   END IF
   widom_molecule = molecule_list(widom_locate,is)
   widom_atoms = atom_list(1:natoms(is),widom_locate,is)
-  widom_sum = 0.0_DP
-  n_overlaps = 0_INT64
-  subinterval_sums = 0.0_DP
-  t_cpu = 0.0_DP
+  IF (est_atompair_rminsq .AND. l_heap) THEN
+          frame_rsqmin_atompair_w_sum_ptr => frame_rsqmin_atompair_w_sum_tgt(:,:,:,ithread)
+          frame_rsqmin_atompair_w_max_ptr => frame_rsqmin_atompair_w_max_tgt(:,:,:,ithread)
+          frame_rsqmin_atompair_freq_ptr => frame_rsqmin_atompair_freq_tgt(:,:,:,ithread)
+  END IF
+  !widom_sum = 0.0_DP
+  !n_overlaps = 0_INT64
+  !subinterval_sums = 0.0_DP
+  !t_cpu = 0.0_DP
   IF (est_emax) THEN
           frame_w_max = 0.0_DP
           frame_Eij_w_sum = 0.0_DP
@@ -317,10 +321,18 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
           thread_changefactor = changefactor
   END IF
 
-  !$OMP DO SCHEDULE(DYNAMIC)
+  !$OMP DO SCHEDULE(DYNAMIC) &
+  !$OMP REDUCTION(+:widom_sum,n_overlaps,subinterval_sums) &
+  !$OMP REDUCTION(+:frame_rsqmin_atompair_w_sum,frame_rsqmin_atompair_freq) &
+  !$OMP REDUCTION(MAX:frame_rsqmin_atompair_w_max)
   DO i_widom = 1, insertions_in_step
           IF (.NOT. omp_flag) CALL CPU_TIME(t_cpu_s)
           !$ t_cpu_s = omp_get_wtime()
+          IF (est_atompair_rminsq .AND. .NOT. l_heap) THEN
+                  frame_rsqmin_atompair_w_sum_ptr => frame_rsqmin_atompair_w_sum
+                  frame_rsqmin_atompair_w_max_ptr => frame_rsqmin_atompair_w_max
+                  frame_rsqmin_atompair_freq_ptr => frame_rsqmin_atompair_freq
+          END IF
           ! Initialize variables
           swi_atompair_rsqmin = 10000.0_DP
           ln_pseq = 0.0_DP
@@ -342,8 +354,17 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
           !
           ! Build_Molecule places the first fragment, then calls Fragment_Placement
           ! to place the additional fragments 
+          IF (widom_timing) THEN
+                  IF (.NOT. omp_flag) CALL cpu_time(noncbmc_time_s)
+                  !$ noncbmc_time_s = omp_get_wtime()
+          END IF
           CALL Build_Molecule(widom_locate,is,ibox,frag_order,this_lambda, &
-                  ln_pseq,ln_pbias,E_ring_frag,cbmc_overlap)
+                  ln_pseq,ln_pbias,E_ring_frag,cbmc_overlap,E_interfrag)
+          IF (widom_timing) THEN
+                  IF (.NOT. omp_flag) CALL cpu_time(noncbmc_time_e)
+                  !$ noncbmc_time_e = omp_get_wtime()
+                  total_cbmc_time = total_cbmc_time + (noncbmc_time_e - noncbmc_time_s)
+          END IF
 
           ! Turn the molecule on
           widom_molecule%live = .TRUE.
@@ -359,23 +380,25 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
           !   * the number of trial dihedrals, kappa_dih, for each dihedral.
 
           ln_pbias = ln_pbias + ln_pseq
-          ln_pbias = ln_pbias + DLOG(REAL(kappa_ins,DP))
+          ln_pbias = ln_pbias + species_list(is)%log_kappa_ins
 
-          IF (kappa_rot /= 0 ) THEN
-             ln_pbias = ln_pbias + DLOG(REAL(kappa_rot,DP))
+          IF (species_list(is)%kappa_rot > 0 ) THEN
+             ln_pbias = ln_pbias + species_list(is)%log_kappa_rot
           END IF
 
-          IF (kappa_dih /= 0 ) THEN
-             ln_pbias = ln_pbias + REAL(nfragments(is)-1,DP) * DLOG(REAL(kappa_dih,DP))
+          IF (species_list(is)%need_kappa_dih) THEN
+             ln_pbias = ln_pbias + species_list(is)%ln_pbias_dih_const
           END IF
 
           IF (.NOT. cbmc_overlap) THEN
-!widom_timing            IF (.NOT. omp_flag) CALL cpu_time(noncbmc_time_s)
-!widom_timing            !$ noncbmc_time_s = omp_get_wtime()
+            IF (widom_timing) THEN
+                    IF (.NOT. omp_flag) CALL cpu_time(noncbmc_time_s)
+                    !$ noncbmc_time_s = omp_get_wtime()
+            END IF
 
             ! Molecule COM may be outside the box boundary if grown via CBMC, so wrap
             ! the molecule coordinates back in the box (if needed)
-            IF (nfragments(is) > 1) CALL Fold_Molecule(widom_locate,is,ibox)
+            IF (species_list(is)%need_kappa_dih) CALL Fold_Molecule(widom_locate,is,ibox)
 
             ! Recompute the COM in case the molecule was wrapped
             !CALL Get_COM(widom_locate,is)
@@ -390,44 +413,63 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
                     E_inter,inter_overlap)
 
             ! Calculate the nonbonded energy interaction within the inserted molecule
-            IF (.NOT. inter_overlap) THEN
-                    CALL Compute_Molecule_Nonbond_Intra_Energy(widom_locate,is, &
-                            E_intra_vdw,E_intra_qq,E_periodic_qq,intra_overlap)
-                    E_inter = E_inter + E_periodic_qq
+            !IF (.NOT. inter_overlap) THEN
+            !        CALL Compute_Molecule_Nonbond_Intra_Energy(widom_locate,is, &
+            !                E_intra_vdw,E_intra_qq,E_periodic_qq,intra_overlap)
+            !        !E_inter = E_inter + E_periodic_qq
+            !END IF
+
+            IF (widom_timing) THEN
+                    IF (.NOT. omp_flag) CALL cpu_time(noncbmc_time_e)
+                    !$ noncbmc_time_e = omp_get_wtime()
+                    noncbmc_time = noncbmc_time + (noncbmc_time_e - noncbmc_time_s)
             END IF
-
-!widom_timing            IF (.NOT. omp_flag) CALL cpu_time(noncbmc_time_e)
-!widom_timing            !$ noncbmc_time_e = omp_get_wtime()
-!widom_timing            noncbmc_time = noncbmc_time + (noncbmc_time_e - noncbmc_time_s)
          
           END IF
 
           ! Leave widom_sum unchanged if there is any core overlap
-          IF (.NOT. (cbmc_overlap .OR. inter_overlap .OR. intra_overlap)) THEN
+          IF (.NOT. (cbmc_overlap .OR. inter_overlap)) THEN
 
                   ! There are no overlaps, so we can calculate the change in potential energy.
                   !
                   ! Already have the change in nonbonded energies
                   dE_inter = E_inter + E_inter_constant 
-                  dE_intra = E_intra_vdw + E_intra_qq
+                  dE_intra = E_interfrag + Excess_Molecule_Intrafragment_Energy(widom_locate,widom_species,ibox)
+                  !dE_intra = E_intra_vdw + E_intra_qq
 
                   ! Bonded intramolecular energies
                   ! If the molecule was grown via CBMC, we already have the intramolecular 
                   ! bond energies? Otherwise we need to compute them.
-                  CALL Compute_Molecule_Bond_Energy(widom_locate,is,E_bond)
-                  CALL Compute_Molecule_Angle_Energy(widom_locate,is,E_angle)
-                  CALL Compute_Molecule_Dihedral_Energy(widom_locate,is,E_dihedral)
-                  CALL Compute_Molecule_Improper_Energy(widom_locate,is,E_improper)
+                  ! Commenting out Bond energy because Cassanrdra only allows fixed bond lengths
+                  !CALL Compute_Molecule_Bond_Energy(widom_locate,is,E_bond)
+                  !CALL Compute_Molecule_Angle_Energy(widom_locate,is,E_angle)
+                  ! Removing intramolecular energy calculation only because only interfragment energy should be included in it
+                  !CALL Compute_Molecule_Dihedral_Energy(widom_locate,is,E_dihedral)
+                  !CALL Compute_Molecule_Improper_Energy(widom_locate,is,E_improper)
+
+                  ! Angle energy contribution is commented out because it would also be part of 
+                  !     dE_frag, which gets subtracted from dE in the Boltzmann exponent, so
+                  !     it would just cancel out.
 
-                  dE_intra = dE_intra + E_bond + E_angle + E_dihedral + E_improper
+                  !dE_intra = dE_intra + E_dihedral + E_improper ! + E_angle + E_bond
 
                   ! Ewald energies
                   IF (int_charge_style(ibox) == charge_coul) THEN
                         IF ( (int_charge_sum_style(ibox) == charge_ewald) .AND. &
                              has_charge(is) ) THEN
                        
+                            IF (widom_timing) THEN
+                                    IF (.NOT. omp_flag) CALL cpu_time(noncbmc_time_s)
+                                    !$ noncbmc_time_s = omp_get_wtime()
+                            END IF
                             CALL Update_System_Ewald_Reciprocal_Energy_Widom(widom_locate, &
                                    is,ibox,E_reciprocal)
+                            IF (widom_timing) THEN
+                                    IF (.NOT. omp_flag) CALL cpu_time(noncbmc_time_e)
+                                    !$ noncbmc_time_e = omp_get_wtime()
+                                    widom_ewald_recip_time = widom_ewald_recip_time + &
+                                            (noncbmc_time_e - noncbmc_time_s)
+                            END IF
 
                             dE_inter = dE_inter + E_reciprocal
                         END IF
@@ -438,28 +480,35 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
                   !        / (REAL(nmols(is,ibox),DP)*((species_list(is)%de_broglie(ibox))**3))
 
                   ! change in energy, less energy used to bias fragment selection
-                  dE = dE_intra + dE_inter
-                  dE_frag = E_angle + E_ring_frag
+                  ! RS: Changed such that instead of subtracting dE_frag, just leave it out of dE_intra
+                  dE = dE_inter + dE_intra
+                  !dE_frag = E_ring_frag ! + E_angle
 
                   ! mu' = -(1/beta)*ln(<widom_var>)
-                  widom_var_exp = DEXP(-beta(ibox) * (dE - dE_frag) - ln_pbias)
+                  widom_var_exp = DEXP(-beta(ibox) * dE - ln_pbias)
                   ! sum of all widom_var for this step; output argument
                   widom_sum = widom_sum + widom_var_exp
 
 
+                  ! used to subtract rsqmin_shifter but now subtract rcut_lowsq and add 1 to int in case
+                  ! floating point rounding somehow allowed proximities slightly closer than rcut_low
+                  rsq_ind = INT((swi_atompair_rsqmin-rcut_lowsq) / rsqmin_step) + 1
                   IF (est_atompair_rminsq) THEN
                           DO ia = 1, natoms(is)
                                 DO ti_solvent = 1, solvent_maxind
                                         IF (swi_atompair_rsqmin(ti_solvent,ia) >= maxrminsq) CYCLE
-                                        rsq_ind = INT((swi_atompair_rsqmin(ti_solvent,ia)-rsqmin_shifter) / rsqmin_step)
-                                        IF (widom_var_exp > frame_rsqmin_atompair_w_max_ptr(rsq_ind,ti_solvent,ia)) THEN
-                                                frame_rsqmin_atompair_w_max_ptr(rsq_ind,ti_solvent,ia) = widom_var_exp
-                                        END IF
-                                        frame_rsqmin_atompair_w_sum_ptr(rsq_ind,ti_solvent,ia) = &
-                                                frame_rsqmin_atompair_w_sum_ptr(rsq_ind,ti_solvent,ia) + &
+                                        !rsq_ind = INT((swi_atompair_rsqmin(ti_solvent,ia)-rsqmin_shifter) / rsqmin_step)
+                                        frame_rsqmin_atompair_w_max_ptr(rsq_ind(ti_solvent,ia),ti_solvent,ia) = &
+                                                MAX(widom_var_exp, &
+                                                frame_rsqmin_atompair_w_max_ptr(rsq_ind(ti_solvent,ia),ti_solvent,ia))
+                                        !IF (widom_var_exp > frame_rsqmin_atompair_w_max_ptr(rsq_ind,ti_solvent,ia)) THEN
+                                        !        frame_rsqmin_atompair_w_max_ptr(rsq_ind,ti_solvent,ia) = widom_var_exp
+                                        !END IF
+                                        frame_rsqmin_atompair_w_sum_ptr(rsq_ind(ti_solvent,ia),ti_solvent,ia) = &
+                                                frame_rsqmin_atompair_w_sum_ptr(rsq_ind(ti_solvent,ia),ti_solvent,ia) + &
                                                 widom_var_exp
-                                        frame_rsqmin_atompair_freq_ptr(rsq_ind,ti_solvent,ia) = &
-                                                frame_rsqmin_atompair_freq_ptr(rsq_ind,ti_solvent,ia) + 1
+                                        frame_rsqmin_atompair_freq_ptr(rsq_ind(ti_solvent,ia),ti_solvent,ia) = &
+                                                frame_rsqmin_atompair_freq_ptr(rsq_ind(ti_solvent,ia),ti_solvent,ia) + 1
                                 END DO
                           END DO
                   END IF
@@ -487,6 +536,43 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
           t_cpu = t_cpu + t_cpu_e - t_cpu_s
   END DO
   !$OMP END DO
+  IF (est_atompair_rminsq) THEN
+          IF (l_heap) THEN
+                  !$OMP WORKSHARE
+                  rsqmin_atompair_freq(:,:,bsolute+1:bsolute+natoms(is),ibox) = &
+                          rsqmin_atompair_freq(:,:,bsolute+1:bsolute+natoms(is),ibox) + SUM(frame_rsqmin_atompair_freq_tgt,4)
+                  rsqmin_atompair_w_max(:,:,bsolute+1:bsolute+natoms(is),ibox) = &
+                          MAX(rsqmin_atompair_w_max(:,:,bsolute+1:bsolute+natoms(is),ibox),widom_prefactor*MAXVAL(frame_rsqmin_atompair_w_max_tgt,4))
+                  rsqmin_atompair_w_sum(:,:,bsolute+1:bsolute+natoms(is),ibox) = &
+                          rsqmin_atompair_w_sum(:,:,bsolute+1:bsolute+natoms(is),ibox) + widom_prefactor*SUM(frame_rsqmin_atompair_w_sum_tgt,4)
+                  !$OMP END WORKSHARE
+                  !!$OMP SECTIONS
+                  !!$OMP SECTION
+                  !rsqmin_atompair_freq_ptr = rsqmin_atompair_freq_ptr + SUM(frame_rsqmin_atompair_freq_tgt,4)
+                  !!$OMP SECTION
+                  !rsqmin_atompair_w_max_ptr = MAX(rsqmin_atompair_w_max_ptr, widom_prefactor*MAXVAL(frame_rsqmin_atompair_w_max_tgt,4))
+                  !!$OMP SECTION
+                  !rsqmin_atompair_w_sum_ptr = rsqmin_atompair_w_sum_ptr + widom_prefactor*SUM(frame_rsqmin_atompair_w_sum_tgt,4)
+                  !!$OMP END SECTIONS
+          ELSE
+                  !$OMP WORKSHARE
+                  rsqmin_atompair_freq(:,:,bsolute+1:bsolute+natoms(is),ibox) = &
+                          rsqmin_atompair_freq(:,:,bsolute+1:bsolute+natoms(is),ibox) + frame_rsqmin_atompair_freq
+                  rsqmin_atompair_w_max(:,:,bsolute+1:bsolute+natoms(is),ibox) = &
+                          MAX(rsqmin_atompair_w_max(:,:,bsolute+1:bsolute+natoms(is),ibox),widom_prefactor*frame_rsqmin_atompair_w_max)
+                  rsqmin_atompair_w_sum(:,:,bsolute+1:bsolute+natoms(is),ibox) = &
+                          rsqmin_atompair_w_sum(:,:,bsolute+1:bsolute+natoms(is),ibox) + widom_prefactor*frame_rsqmin_atompair_w_sum
+                  !$OMP END WORKSHARE
+                  !!$OMP SECTIONS
+                  !!$OMP SECTION
+                  !rsqmin_atompair_freq_ptr = rsqmin_atompair_freq_ptr + frame_rsqmin_atompair_freq
+                  !!$OMP SECTION
+                  !rsqmin_atompair_w_sum_ptr = rsqmin_atompair_w_sum_ptr + widom_prefactor*frame_rsqmin_atompair_w_sum
+                  !!$OMP SECTION
+                  !rsqmin_atompair_w_max_ptr = MAX(rsqmin_atompair_w_max_ptr,widom_prefactor*frame_rsqmin_atompair_w_max)
+                  !!$OMP END SECTIONS
+          END IF
+  END IF
   IF (est_emax) THEN
           !$OMP CRITICAL
           changefactor = MAX(changefactor,thread_changefactor)
@@ -499,40 +585,12 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
           IF (changefactor>1) CALL coarsen_w_max(w_max_gcopy,Eij_w_sum_gcopy,Eij_freq_gcopy,Eij_factor_gcopy,changefactor)
           Eij_factor(is,ibox) = Eij_factor_gcopy
   END IF
-  !$OMP PARALLEL
   IF (est_emax) THEN
-          !$OMP SECTIONS
-          !$OMP SECTION
           w_max(:,is,ibox) = MAX(w_max_gcopy, widom_prefactor*frame_w_max)
-          !$OMP SECTION
           Eij_w_sum(:,is,ibox) = Eij_w_sum_gcopy + widom_prefactor*frame_Eij_w_sum
-          !$OMP SECTION
           Eij_freq_total(:,is,ibox) = Eij_freq_gcopy + Eij_freq
-          !$OMP END SECTIONS NOWAIT
   END IF
 
-  IF (est_atompair_rminsq) THEN
-          IF (l_heap) THEN
-                  !$OMP SECTIONS
-                  !$OMP SECTION
-                  rsqmin_atompair_freq_ptr = rsqmin_atompair_freq_ptr + SUM(frame_rsqmin_atompair_freq_tgt,4)
-                  !$OMP SECTION
-                  rsqmin_atompair_w_max_ptr = MAX(rsqmin_atompair_w_max_ptr, widom_prefactor*MAXVAL(frame_rsqmin_atompair_w_max_tgt,4))
-                  !$OMP SECTION
-                  rsqmin_atompair_w_sum_ptr = rsqmin_atompair_w_sum_ptr + widom_prefactor*SUM(frame_rsqmin_atompair_w_sum_tgt,4)
-                  !$OMP END SECTIONS
-          ELSE
-                  !$OMP SECTIONS
-                  !$OMP SECTION
-                  rsqmin_atompair_freq_ptr = rsqmin_atompair_freq_ptr + frame_rsqmin_atompair_freq
-                  !$OMP SECTION
-                  rsqmin_atompair_w_sum_ptr = rsqmin_atompair_w_sum_ptr + widom_prefactor*frame_rsqmin_atompair_w_sum
-                  !$OMP SECTION
-                  rsqmin_atompair_w_max_ptr = MAX(rsqmin_atompair_w_max_ptr,widom_prefactor*frame_rsqmin_atompair_w_max)
-                  !$OMP END SECTIONS
-          END IF
-  END IF
-  !$OMP END PARALLEL
 
   widom_active = .FALSE.
   widom_sum = widom_sum * widom_prefactor
@@ -565,6 +623,7 @@ SUBROUTINE Widom_Insert(is,ibox,widom_sum,t_cpu, n_overlaps)
 
 
   ntrials(is,ibox)%widom = ntrials(is,ibox)%widom + insertions_in_step
+  IF (widom_timing) noncbmc_time_total = noncbmc_time_total + noncbmc_time
 
   CONTAINS
           SUBROUTINE coarsen_w_max(wmax,wsum,Efreq,Efactor,cfactor)
@@ -588,8 +647,5 @@ SUBROUTINE coarsen_w_max(wmax,wsum,Efreq,Efactor,cfactor)
                   Efreq(ic:) = 0
           END SUBROUTINE coarsen_w_max
 
-
-!widom_timing  WRITE(*,*) noncbmc_time
-
 END SUBROUTINE Widom_Insert
 !*******************************************************************************
diff --git a/Src/widom_subdriver.f90 b/Src/widom_subdriver.f90
index d085d2c5..ce6d8a14 100755
--- a/Src/widom_subdriver.f90
+++ b/Src/widom_subdriver.f90
@@ -40,6 +40,7 @@ SUBROUTINE Widom_Subdriver
 
   USE Global_Variables
   USE Sector_Routines
+  USE Energy_Routines , ONLY: Field_Allocation, Livelist_Packing
   !$ USE OMP_LIB
 
   !*****************************************************************************
@@ -56,8 +57,9 @@ SUBROUTINE Widom_Subdriver
   REAL(DP) :: widom_sum, widom_avg!widom_timing, setup_time_s, setup_time_e, setup_time
   REAL(DP) :: t_wc_s, t_wc_e, t_cpu
 !widom_timing  REAL(DP) :: r_cell_list_time, r_normal_overlap_time, r_non_overlap_time, r_nrg_overlap_time
-  LOGICAL :: need_init, omp_flag
-  need_init = l_sectors
+  LOGICAL :: need_init, omp_flag, l_unitstride
+  l_unitstride = .NOT. open_mc_flag
+  need_init = .TRUE.
   omp_flag = .FALSE.
   !$ omp_flag = .TRUE.
   ! Loop over all species
@@ -72,8 +74,12 @@ SUBROUTINE Widom_Subdriver
                 IF (need_init) THEN
 !widom_timing                        IF (.NOT. omp_flag) CALL cpu_time(setup_time_s)
 !widom_timing                        !$ setup_time_s = omp_get_wtime()
-                        CALL Sector_Setup
-                        IF (cbmc_cell_list_flag) CALL CBMC_Cell_List_Setup
+                        IF (l_vectorized .OR. l_sectors) THEN
+                                CALL Field_Allocation
+                                CALL Livelist_Packing(.TRUE.,l_unitstride)
+                        END IF
+                        IF (l_sectors) CALL Sector_Setup
+                        !IF (cbmc_cell_list_flag) CALL CBMC_Cell_List_Setup
                         IF (full_cell_list_flag) CALL Full_Cell_List_Setup
 !widom_timing                        IF (.NOT. omp_flag) CALL cpu_time(setup_time_e)
 !widom_timing                        !$ setup_time_e = omp_get_wtime()
diff --git a/Src/write_properties.f90 b/Src/write_properties.f90
index 918bcd74..a1cc1a1a 100755
--- a/Src/write_properties.f90
+++ b/Src/write_properties.f90
@@ -512,9 +512,9 @@ SUBROUTINE Write_Coords_XYZ(this_box)
         IF(molecule_list(this_im,is)%live) THEN
            DO ia = 1, natoms(is)
               WRITE(M_XYZ_unit,*) nonbond_list(ia,is)%element, &
-                   atom_list(ia,this_im,is)%rxp, &
-                   atom_list(ia,this_im,is)%ryp, &
-                   atom_list(ia,this_im,is)%rzp
+                   atom_list(ia,this_im,is)%rp(1), &
+                   atom_list(ia,this_im,is)%rp(2), &
+                   atom_list(ia,this_im,is)%rp(3)
            END DO
         END IF
      END DO
@@ -605,9 +605,9 @@ SUBROUTINE Write_Coords_Custom
       WRITE(movie_custom_unit,'(I0,1X,I0)') this_im+mol_id_base, ibox
       DO ia = 1, natoms(is)
         WRITE(movie_custom_unit,'(A,2X,4F12.5)') nonbond_list(ia,is)%element, &
-                                        atom_list(ia,this_im,is)%rxp, &
-                                        atom_list(ia,this_im,is)%ryp, &
-                                        atom_list(ia,this_im,is)%rzp, &
+                                        atom_list(ia,this_im,is)%rp(1), &
+                                        atom_list(ia,this_im,is)%rp(2), &
+                                        atom_list(ia,this_im,is)%rp(3), &
                                         nonbond_list(ia,is)%charge
       END DO
     END DO
diff --git a/Src/write_widom_properties.f90 b/Src/write_widom_properties.f90
index bd66d5c6..246f526f 100755
--- a/Src/write_widom_properties.f90
+++ b/Src/write_widom_properties.f90
@@ -36,7 +36,7 @@ SUBROUTINE Write_Widom_Properties(is,this_box,widom_avg,t_cpu,n_overlaps)
   INTEGER(KIND=INT64), INTENT(IN) :: n_overlaps
 
   REAL(DP), INTENT(IN) :: t_cpu
-  REAL(DP) :: widom_avg
+  REAL(DP) :: widom_avg, cavfrac, cavbias
 
   INTEGER :: this_unit
 
@@ -46,6 +46,15 @@ SUBROUTINE Write_Widom_Properties(is,this_box,widom_avg,t_cpu,n_overlaps)
      
   !-- check to see whether the file is open or not
   this_unit = wprop_file_unit(is,this_box)
+  IF (cavity_biasing_flag) THEN
+          cavfrac = SUM(EXP(cavdatalist(frag_list(1:species_list(is)%nfragments,is)%i_big_atom,this_box)%ln_cavfrac)) / &
+                  species_list(is)%nfragments
+          cavbias = SUM(EXP(-cavdatalist(frag_list(1:species_list(is)%nfragments,is)%i_big_atom,this_box)%ln_cavfrac)) / &
+                  species_list(is)%nfragments
+  ELSE
+          cavfrac = 1.0_DP
+          cavbias = 1.0_DP
+  END IF
      
   IF (first_open_wprop(is,this_box)) THEN
      OPEN(unit=this_unit,file=wprop_filenames(is,this_box))
@@ -53,11 +62,11 @@ SUBROUTINE Write_Widom_Properties(is,this_box,widom_avg,t_cpu,n_overlaps)
      ! in the file
      !CALL Write_Widom_Header(i)
      IF (is_sweeps) THEN
-             WRITE(this_unit,'(A19,7X,A30,7X,A30,7X,A17)') 'Sweep_#', 'Average widom_var for sweep', 'Widom CPU time for sweep (s)', &
-                     'Number of overlaps'
+             WRITE(this_unit,'(A19,7X,A30,7X,A30,7X,A17,9X,A15,11X,A18)') 'Sweep_#', 'Average widom_var for sweep', 'Widom CPU time for sweep (s)', &
+                     'Number of overlaps', 'Cavity fraction', 'Cavity bias factor'
      ELSE
-             WRITE(this_unit,'(A19,7X,A30,7X,A30,7X,A17)') 'Step_#', 'Average widom_var for step', 'Widom CPU time for step (s)', &
-                     'Number of overlaps'
+             WRITE(this_unit,'(A19,7X,A30,7X,A30,7X,A17,9X,A15,11X,A18)') 'Step_#', 'Average widom_var for step', 'Widom CPU time for step (s)', &
+                     'Number of overlaps', 'Cavity fraction', 'Cavity bias factor'
      END IF
      first_open_wprop(is,this_box) = .FALSE.
   END IF
@@ -65,8 +74,10 @@ SUBROUTINE Write_Widom_Properties(is,this_box,widom_avg,t_cpu,n_overlaps)
   !widom_avg = widom_sum / species_list(is)%insertions_in_step(this_box)
   IF (widom_avg < 1.0e-99_DP) widom_avg = 0.0_DP
   IF (is_sweeps) THEN
-          WRITE(this_unit,'(I19,7X,E30.22,7X,E30.22,7X,I19)') i_mcstep/steps_per_sweep, widom_avg, t_cpu, n_overlaps
+          WRITE(this_unit,'(I19,7X,E30.22,7X,E30.22,7X,I19,E26.18,E26.18)') i_mcstep/steps_per_sweep, widom_avg, t_cpu, n_overlaps, &
+                  cavfrac, cavbias
   ELSE
-          WRITE(this_unit,'(I19,7X,E30.22,7X,E30.22,7X,I19)') i_mcstep, widom_avg, t_cpu, n_overlaps
+          WRITE(this_unit,'(I19,7X,E30.22,7X,E30.22,7X,I19,E26.18,E26.18)') i_mcstep, widom_avg, t_cpu, n_overlaps, &
+                  cavfrac, cavbias
   END IF
 END SUBROUTINE Write_Widom_Properties
diff --git a/Src/xtc_routines.f90 b/Src/xtc_routines.f90
index 3845f030..4c0459c8 100755
--- a/Src/xtc_routines.f90
+++ b/Src/xtc_routines.f90
@@ -28,6 +28,7 @@ MODULE XTC_Routines
   USE Simulation_Properties
   USE IO_Utilities
   USE GMXFORT_TRAJECTORY
+  !$ USE OMP_LIB
 
   IMPLICIT NONE
 
@@ -92,20 +93,63 @@ FUNCTION Read_XTC_Frame(ibox)
 
   END FUNCTION Read_XTC_Frame
 
-  FUNCTION Get_XTC_Coords(ibox)
-          INTEGER :: ibox
-          REAL(DP), DIMENSION(natoms_to_read(ibox),3) :: Get_XTC_Coords
-          INTEGER :: i, j
-
-          j = iframe(ibox)
-
-          !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC)
-          DO i = 1, natoms_to_read(ibox)
-                Get_XTC_Coords(i,:) = trj(ibox)%x(j,i) * 10.0_DP
-          END DO
-          !$OMP END PARALLEL DO
-
-  END FUNCTION Get_XTC_Coords
+  SUBROUTINE Get_XTC_Coords(ibox,xtc_coords_dp)
+          INTEGER, INTENT(IN) :: ibox
+          REAL(DP), DIMENSION(:,:), CONTIGUOUS, INTENT(OUT) :: xtc_coords_dp
+          INTEGER :: chunkstart, chunkend, ithread, nthreads, chunksize
+          !INTEGER :: i, j
+
+          !j = iframe(ibox)
+
+          !!$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC)
+          !DO i = 1, natoms_to_read(ibox)
+          !      Get_XTC_Coords(i,:) = trj(ibox)%x(j,i) * 10.0_DP
+          !END DO
+          !!$OMP END PARALLEL DO
+
+
+          !$OMP PARALLEL DEFAULT(SHARED) &
+          !$OMP PRIVATE(chunkstart, chunkend, chunksize, ithread, nthreads)
+          !$ nthreads = OMP_GET_NUM_THREADS()
+          chunkstart = 1
+          chunkend = IAND(natoms_to_read(ibox)+padconst_4byte,padmask_4byte)
+          !$ chunksize = IAND((chunkend+nthreads-1)/nthreads+padconst_4byte,padmask_4byte)
+          !$ ithread = OMP_GET_THREAD_NUM()
+          !$ chunkstart = ithread*chunksize+1
+          !$ chunkend = MIN((ithread+1)*chunksize,chunkend)
+          IF (chunkend>chunkstart) CALL Thread_XTC_Coords(chunkstart,chunkend)
+          !$OMP END PARALLEL
+
+
+          !DO i = 1, natoms_to_read(ibox)
+          !      XTC_Coords_sp(i,:) = trj(ibox)%x(j,i)
+          !END DO
+          !DO i = 1, natoms_to_read(ibox)
+          !      Get_XTC_Coords(i,1) = REAL(XTC_Coords_sp(i,1)*10.0,DP)
+          !      Get_XTC_Coords(i,2) = REAL(XTC_Coords_sp(i,2)*10.0,DP)
+          !      Get_XTC_Coords(i,3) = REAL(XTC_Coords_sp(i,3)*10.0,DP)
+          !END DO
+
+          CONTAINS
+                  SUBROUTINE Thread_XTC_Coords(chunkstart,chunkend)
+                          INTEGER, INTENT(IN) :: chunkstart,chunkend
+                          REAL(SP), DIMENSION(chunkstart:chunkend,3) :: xtc_coords_sp
+                          INTEGER :: i, j, i_dim
+                          j = iframe(ibox)
+                          DO i = chunkstart, MIN(chunkend,natoms_to_read(ibox))
+                                xtc_coords_sp(i,:) = trj(ibox)%x(j,i)
+                          END DO
+                          !DIR$ ASSUME (MOD(chunkstart,dimpad_4byte) .EQ. 1)
+                          !DIR$ ASSUME (MOD(chunkend,dimpad_4byte) .EQ. 0)
+                          DO i_dim = 1, 3
+                                  !DIR$ VECTOR ALIGNED
+                                  DO i = chunkstart, chunkend
+                                        xtc_coords_dp(i,i_dim) = REAL(xtc_coords_sp(i,i_dim)*10.0,DP)
+                                  END DO
+                          END DO
+                  END SUBROUTINE Thread_XTC_Coords
+
+  END SUBROUTINE Get_XTC_Coords
 
   FUNCTION Get_XTC_Box(ibox)
           INTEGER :: ibox
diff --git a/Src/xtc_routines.no_xtc.f90 b/Src/xtc_routines.no_xtc.f90
index c7e039eb..aeb46814 100755
--- a/Src/xtc_routines.no_xtc.f90
+++ b/Src/xtc_routines.no_xtc.f90
@@ -70,17 +70,17 @@ LOGICAL FUNCTION Read_XTC_Frame(ibox)
 
   END FUNCTION Read_XTC_Frame
 
-  FUNCTION Get_XTC_Coords(ibox)
-          INTEGER :: ibox
-          REAL(DP), DIMENSION(natoms_to_read(ibox),3) :: Get_XTC_Coords
+  SUBROUTINE Get_XTC_Coords(ibox,xtc_coords_dp)
+          INTEGER, INTENT(IN) :: ibox
+          REAL(DP), DIMENSION(:,:), INTENT(OUT) :: xtc_coords_dp
 
           err_msg = ''
           err_msg(1) = 'Cassandra must be compiled with libgmxfort to support xtc file operations.'
           CALL Clean_Abort(err_msg, 'Get_XTC_Coords')
 
-          Get_XTC_Coords = 0.0_DP
+          !xtc_coords_dp = 0.0_DP
 
-  END FUNCTION Get_XTC_Coords
+  END SUBROUTINE Get_XTC_Coords
 
   FUNCTION Get_XTC_Box(ibox)
           INTEGER :: ibox