From 5c8710fd86313788e55ec57f22772de2db85a465 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sun, 10 Dec 2023 17:35:31 -0700
Subject: [PATCH 001/174] create data structure for super-ellipsoid inside
 bonus

---
 src/atom_vec_ellipsoid.h | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index 666f9cbc42f..e8b66874cbe 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -29,6 +29,9 @@ class AtomVecEllipsoid : virtual public AtomVec {
   struct Bonus {
     double shape[3];
     double quat[4];
+    double block[3];
+    double radcirc;
+    bool flag_super;
     int ilocal;
   };
   struct Bonus *bonus;

From 66d00e65953caeaa9134ae46c16341eebb404e20 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sun, 10 Dec 2023 21:27:54 -0700
Subject: [PATCH 002/174] add functions to compute the circumscribed radius of
 the super ellipsoid. add function to set blockiness and modify set_shape
 function to include circumscribed radius.

Also add function to compute volume of the super ellipsoid but this does not compile because
std::beta is not recognized. We need a way to compute the beta function, either from existing library
or by implementing it ourselves, e.g., in math_special, TODO
---
 src/atom_vec_ellipsoid.cpp | 108 +++++++++++++++++++++++++++++++++++++
 src/atom_vec_ellipsoid.h   |   3 ++
 2 files changed, 111 insertions(+)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 84af470409e..ade1324f758 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -27,10 +27,13 @@
 #include "modify.h"
 
 #include <cstring>
+#include <tr1/cmath>
 
 using namespace LAMMPS_NS;
 using MathConst::MY_PI;
 
+#define EPSBLOCK2 1.0e-3
+
 /* ---------------------------------------------------------------------- */
 
 AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
@@ -627,6 +630,8 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     if (nlocal_bonus == nmax_bonus) grow_bonus();
     double *shape = bonus[nlocal_bonus].shape;
     double *quat = bonus[nlocal_bonus].quat;
+    double *block = bonus[nlocal_bonus].block;
+    bool flag_super = bonus[nlocal_bonus].flag_super;
     shape[0] = shapex;
     shape[1] = shapey;
     shape[2] = shapez;
@@ -634,6 +639,7 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     quat[1] = 0.0;
     quat[2] = 0.0;
     quat[3] = 0.0;
+    bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = i;
     ellipsoid[i] = nlocal_bonus++;
   } else if (shapex == 0.0 && shapey == 0.0 && shapez == 0.0) {
@@ -642,8 +648,110 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     ellipsoid[i] = -1;
   } else {
     double *shape = bonus[ellipsoid[i]].shape;
+    double *block = bonus[ellipsoid[i]].block;
+    bool flag_super = bonus[ellipsoid[i]].flag_super;
     shape[0] = shapex;
     shape[1] = shapey;
     shape[2] = shapez;
+    bonus[ellipsoid[i]].radcirc = compute_radcirc(shape, block, flag_super);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set block values in bonus data for particle I
+   oriented aligned with xyz axes
+   this may create entry in bonus data
+------------------------------------------------------------------------- */
+
+void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
+{
+  if (ellipsoid[i] < 0) {
+    if (nlocal_bonus == nmax_bonus) grow_bonus();
+    double *shape = bonus[nlocal_bonus].shape;
+    double *quat = bonus[nlocal_bonus].quat;
+    double *block = bonus[nlocal_bonus].block;
+    bool &flag_super = bonus[nlocal_bonus].flag_super;
+    block[0] = blockn1;
+    block[1] = blockn2;
+    block[2] = blockn1 / blockn2;
+    quat[0] = 1.0;
+    quat[1] = 0.0;
+    quat[2] = 0.0;
+    quat[3] = 0.0;
+    bonus[nlocal_bonus].ilocal = i;
+    flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) && (std::fabs(blockn2 - 2) > EPSBLOCK2));
+    bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
+    ellipsoid[i] = nlocal_bonus++;
+  } else {
+    double *shape = bonus[ellipsoid[i]].shape;
+    double *block = bonus[ellipsoid[i]].block;
+    bool &flag_super = bonus[ellipsoid[i]].flag_super;
+    block[0] = blockn1;
+    block[1] = blockn2;
+    block[2] = blockn1 / blockn2;
+    flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) && (std::fabs(blockn2 - 2) > EPSBLOCK2));
+    bonus[ellipsoid[i]].radcirc = compute_radcirc(shape, block, flag_super);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute the circumscribed radius to the ellipsoid
+------------------------------------------------------------------------- */
+
+double AtomVecEllipsoid::compute_radcirc(double *shape, double *block, bool flag_super)
+{
+  if (!flag_super) return std::max(std::max(shape[0], shape[1]), shape[2]);
+
+  // Super ellipsoid
+  double a = shape[0], b = shape[1], c = shape[2];
+  double n1 = block[0], n2 = block[1], n1divn2 = block[2];
+  if (shape[0] < shape[1]) {a = shape[1]; b = shape[0];}
+
+  // Cylinder approximation for n2=2
+
+  if (std::fabs(n2 - 2.0) < EPSBLOCK2) return sqrt(a * a + c * c);
+
+  // Ellipsoid approximation for n1=2
+
+  if (std::fabs(n1 - 2.0) < EPSBLOCK2) return std::max(c, sqrt(a * a + b * b));
+
+  // Bounding box approximation when n1>2 and n2>2
+
+  return sqrt(a * a + b * b + c * c);
+
+  // General super-ellipsoid, Eq. (12) of Podlozhnyuk et al. 2017
+  // Not sure if exact solution worth it compared to boundig box diagonal
+  // If both blockiness exponents are greater than 2, the exact radius does not
+  // seem significantly smaller than the bounding box diagonal. At most sqrt(3)~ 70% too large
+  /*
+  double x, y, z, alpha, beta, gamma, xtilde;
+  double small = 0.1; // TO AVOID OVERFLOW IN POW
+
+  alpha = std::fabs(n2 - 2.0) > small ? std::pow(b / a, 2.0 / (n2 - 2.0)) : 0.0;
+  gamma = std::fabs(n1divn2 - 1.0) > small ? std::pow((1.0 + std::pow(alpha, n2)), n1divn2 - 1.0) : 1.0;
+  beta = std::pow(gamma * c * c / (a * a), 1.0 / std::max(n1 - 2.0, small));
+  xtilde = 1.0 / std::pow(std::pow(1.0 + std::pow(alpha, n2), n1divn2) + std::pow(beta, n1), 1.0 / n1);
+  x = a * xtilde;
+  y = alpha * b * xtilde;
+  z = beta * c * xtilde;
+  return sqrt(x * x + y * y + z * z);
+  */
+}
+
+/* ----------------------------------------------------------------------
+   compute the volume of the ellipsoid
+------------------------------------------------------------------------- */
+
+double AtomVecEllipsoid::compute_volume(double *shape, double *block, bool flag_super)
+{
+  double unitvol = 4.0 * MY_PI / 3.0;
+
+  // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003, for p = q = r = 0
+
+  if (flag_super) {
+    double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
+    unitvol = e1 * e2 * std::beta(0.5 * e1, 1.0 + e1) *
+                        std::beta(0.5 * e2, 0.5 * e2); // CAN'T GET std::beta to be recognized, need help
   }
+  return unitvol * shape[0] * shape[1] * shape[2];
 }
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index e8b66874cbe..37a08b09b29 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -69,6 +69,9 @@ class AtomVecEllipsoid : virtual public AtomVec {
   // unique to AtomVecEllipsoid
 
   void set_shape(int, double, double, double);
+  void set_block(int, double, double);
+  double compute_radcirc(double *, double *, bool);
+  double compute_volume(double *, double *, bool);
 
   int nlocal_bonus;
 

From 22b0045bff2e299e517493dafd656b04b31e3d2a Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sun, 10 Dec 2023 22:19:22 -0700
Subject: [PATCH 003/174] add blockiness to pack/unpack methods

modify std::beta to tr1::std::beta (forgotten in previous commit)
---
 src/atom_vec_ellipsoid.cpp | 48 +++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index ade1324f758..e10001d06d7 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -44,9 +44,9 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
   bonus_flag = 1;
 
   size_forward_bonus = 4;
-  size_border_bonus = 8;
-  size_restart_bonus_one = 8;
-  size_data_bonus = 8;
+  size_border_bonus = 10;
+  size_restart_bonus_one = 10;
+  size_data_bonus = 10;
 
   atom->ellipsoid_flag = 1;
   atom->rmass_flag = atom->angmom_flag = atom->torque_flag = 1;
@@ -197,6 +197,7 @@ int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
 {
   int i, j, m;
   double *shape, *quat;
+  double *block;
 
   m = 0;
   for (i = 0; i < n; i++) {
@@ -207,6 +208,7 @@ int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
       buf[m++] = ubuf(1).d;
       shape = bonus[ellipsoid[j]].shape;
       quat = bonus[ellipsoid[j]].quat;
+      block = bonus[ellipsoid[j]].block;
       buf[m++] = shape[0];
       buf[m++] = shape[1];
       buf[m++] = shape[2];
@@ -214,6 +216,8 @@ int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
       buf[m++] = quat[1];
       buf[m++] = quat[2];
       buf[m++] = quat[3];
+      buf[m++] = block[0];
+      buf[m++] = block[1];
     }
   }
 
@@ -226,6 +230,8 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
 {
   int i, j, m, last;
   double *shape, *quat;
+  double *block;
+  bool flag_super;
 
   m = 0;
   last = first + n;
@@ -237,6 +243,7 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       if (j == nmax_bonus) grow_bonus();
       shape = bonus[j].shape;
       quat = bonus[j].quat;
+      block = bonus[j].block;
       shape[0] = buf[m++];
       shape[1] = buf[m++];
       shape[2] = buf[m++];
@@ -244,7 +251,13 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       quat[1] = buf[m++];
       quat[2] = buf[m++];
       quat[3] = buf[m++];
+      block[0] = buf[m++];
+      block[1] = buf[m++];
+      block[2] = block[0] / block[1];
+      flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) && (std::fabs(block[1] - 2) > EPSBLOCK2));
+      bonus[j].flag_super = flag_super;
       bonus[j].ilocal = i;
+      bonus[j].radcirc = compute_radcirc(shape, block, flag_super);
       ellipsoid[i] = j;
       nghost_bonus++;
     }
@@ -269,6 +282,7 @@ int AtomVecEllipsoid::pack_exchange_bonus(int i, double *buf)
     int j = ellipsoid[i];
     double *shape = bonus[j].shape;
     double *quat = bonus[j].quat;
+    double *block = bonus[j].block;
     buf[m++] = shape[0];
     buf[m++] = shape[1];
     buf[m++] = shape[2];
@@ -276,6 +290,8 @@ int AtomVecEllipsoid::pack_exchange_bonus(int i, double *buf)
     buf[m++] = quat[1];
     buf[m++] = quat[2];
     buf[m++] = quat[3];
+    buf[m++] = block[0];
+    buf[m++] = block[1];
   }
 
   return m;
@@ -293,6 +309,8 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     if (nlocal_bonus == nmax_bonus) grow_bonus();
     double *shape = bonus[nlocal_bonus].shape;
     double *quat = bonus[nlocal_bonus].quat;
+    double *block = bonus[nlocal_bonus].block;
+    bool &flag_super = bonus[nlocal_bonus].flag_super;
     shape[0] = buf[m++];
     shape[1] = buf[m++];
     shape[2] = buf[m++];
@@ -300,6 +318,11 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     quat[1] = buf[m++];
     quat[2] = buf[m++];
     quat[3] = buf[m++];
+    block[0] = buf[m++];
+    block[1] = buf[m++];
+    block[2] = block[0] / block[1];
+    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) && (std::fabs(block[1] - 2) > EPSBLOCK2));
+    bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = ilocal;
     ellipsoid[ilocal] = nlocal_bonus++;
   }
@@ -350,6 +373,8 @@ int AtomVecEllipsoid::pack_restart_bonus(int i, double *buf)
     buf[m++] = bonus[j].quat[1];
     buf[m++] = bonus[j].quat[2];
     buf[m++] = bonus[j].quat[3];
+    buf[m++] = bonus[j].block[0];
+    buf[m++] = bonus[j].block[1];
   }
 
   return m;
@@ -370,6 +395,8 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     if (nlocal_bonus == nmax_bonus) grow_bonus();
     double *shape = bonus[nlocal_bonus].shape;
     double *quat = bonus[nlocal_bonus].quat;
+    double *block = bonus[nlocal_bonus].block;
+    bool &flag_super = bonus[nlocal_bonus].flag_super;
     shape[0] = buf[m++];
     shape[1] = buf[m++];
     shape[2] = buf[m++];
@@ -377,6 +404,11 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     quat[1] = buf[m++];
     quat[2] = buf[m++];
     quat[3] = buf[m++];
+    block[0] = buf[m++];
+    block[1] = buf[m++];
+    block[2] = block[0] / block[1];
+    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) && (std::fabs(block[1] - 2) > EPSBLOCK2));
+    bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = ilocal;
     ellipsoid[ilocal] = nlocal_bonus++;
   }
@@ -480,7 +512,9 @@ void AtomVecEllipsoid::pack_data_pre(int ilocal)
 
   if (ellipsoid_flag >= 0) {
     shape = bonus[ellipsoid_flag].shape;
-    rmass[ilocal] /= 4.0 * MY_PI / 3.0 * shape[0] * shape[1] * shape[2];
+    block = bonus[ellipsoid_flag].block;
+    flag_super = bonus[ellipsoid_flag].flag_super;
+    rmass[ilocal] /= compute_volume(shape, block, flag_super);
   }
 }
 
@@ -519,6 +553,8 @@ int AtomVecEllipsoid::pack_data_bonus(double *buf, int /*flag*/)
       buf[m++] = bonus[j].quat[1];
       buf[m++] = bonus[j].quat[2];
       buf[m++] = bonus[j].quat[3];
+      buf[m++] = bonus[j].block[0];
+      buf[m++] = bonus[j].block[1];
     } else
       m += size_data_bonus;
   }
@@ -750,8 +786,8 @@ double AtomVecEllipsoid::compute_volume(double *shape, double *block, bool flag_
 
   if (flag_super) {
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    unitvol = e1 * e2 * std::beta(0.5 * e1, 1.0 + e1) *
-                        std::beta(0.5 * e2, 0.5 * e2); // CAN'T GET std::beta to be recognized, need help
+    unitvol = e1 * e2 * std::tr1::beta(0.5 * e1, 1.0 + e1) *
+                        std::tr1::beta(0.5 * e2, 0.5 * e2); // CAN'T GET std::beta to be recognized, need help
   }
   return unitvol * shape[0] * shape[1] * shape[2];
 }

From 22653aaf9f791c96bca5e7c6f0fc86dda4317a45 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Wed, 13 Dec 2023 21:06:51 -0700
Subject: [PATCH 004/174] add omitted declarations

---
 src/atom_vec_ellipsoid.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index e10001d06d7..f36ae3c0afa 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -500,7 +500,8 @@ void AtomVecEllipsoid::data_atom_post(int ilocal)
 
 void AtomVecEllipsoid::pack_data_pre(int ilocal)
 {
-  double *shape;
+  double *shape, *block;
+  bool flag_super;
 
   ellipsoid_flag = atom->ellipsoid[ilocal];
   rmass_one = atom->rmass[ilocal];

From bbd9cd8631831252a35fe538f9d3fd0539b09362 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Wed, 13 Dec 2023 21:58:51 -0700
Subject: [PATCH 005/174] add blockiness parameter to read_data and write_data.

reading blockiness parameters is made optional for backward compatibility.
---
 doc/src/read_data.rst      | 11 +++++++++--
 src/atom_vec_ellipsoid.cpp | 23 ++++++++++++++++++++---
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/doc/src/read_data.rst b/doc/src/read_data.rst
index 53de3a2a5c1..358f676d6a5 100644
--- a/doc/src/read_data.rst
+++ b/doc/src/read_data.rst
@@ -1327,19 +1327,20 @@ and a general discussion of how type labels can be used.
 *Ellipsoids* section:
 
 * one line per ellipsoid
-* line syntax: atom-ID shapex shapey shapez quatw quati quatj quatk
+* line syntax: atom-ID shapex shapey shapez quatw quati quatj quatk block1 block2
 
   .. parsed-literal::
 
        atom-ID = ID of atom which is an ellipsoid
        shapex,shapey,shapez = 3 diameters of ellipsoid (distance units)
        quatw,quati,quatj,quatk = quaternion components for orientation of atom
+       block1,block2 = 2 blockiness parameters for super-ellipsoids
 
 * example:
 
   .. parsed-literal::
 
-       12 1 2 1 1 0 0 0
+       12 1 2 1 1 0 0 0 2 2
 
 The *Ellipsoids* section must appear if :doc:`atom_style ellipsoid
 <atom_style>` is used and any atoms are listed in the *Atoms* section
@@ -1367,6 +1368,12 @@ for each ellipsoid should be specified for its orientation relative to
 the standard x,y,z coordinate axes.  When the system is converted to a
 restricted triclinic box, the ellipsoid quaternions will be altered to
 reflect the new orientation of the ellipsoid.
+The blockiness values *block1*, *block2* generalize the geometry to a super
+ellipsoid for use in granualr simulations.  Sections through the center and
+parallel to the z-axis are superellipses with squareness *block1* and sections
+in the x-y plane are superellipses with squareness *block2*.  These parameters
+are optional and default to a value of 2, recovering ellipsoid geometry.
+When specified, both values must be greater than or equal to 2.
 
 The *Ellipsoids* section must appear after the *Atoms* section.
 
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index f36ae3c0afa..f62dcd67f8a 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -441,11 +441,27 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
   quat[3] = utils::numeric(FLERR, values[ivalue++], true, lmp);
   MathExtra::qnormalize(quat);
 
+  // Blockiness exponents can be given optionally for superellipsoids
+
+  double *block = bonus[nlocal_bonus].block;
+  bool &flag_super = bonus[nlocal_bonus].flag_super;
+  if (ivalue == values.size()) {
+    block[0] = block[1] = 2.0;
+    flag_super = false;
+  }
+  else {
+    block[0] = utils::numeric(FLERR, values[ivalue++], true, lmp);
+    block[1] = utils::numeric(FLERR, values[ivalue++], true, lmp);
+    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) && (std::fabs(block[1] - 2) > EPSBLOCK2));
+  }
+  block[2] = block[0] / block[1]; // ASSUMES EVEN NUMBERS ONLY?
+
   // reset ellipsoid mass
   // previously stored density in rmass
 
-  rmass[m] *= 4.0 * MY_PI / 3.0 * shape[0] * shape[1] * shape[2];
+  rmass[m] *= compute_volume(shape, block, flag_super);
 
+  bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
   bonus[nlocal_bonus].ilocal = m;
   ellipsoid[m] = nlocal_bonus++;
 }
@@ -571,8 +587,9 @@ void AtomVecEllipsoid::write_data_bonus(FILE *fp, int n, double *buf, int /*flag
 {
   int i = 0;
   while (i < n) {
-    utils::print(fp, "{} {} {} {} {} {} {} {}\n", ubuf(buf[i]).i, buf[i + 1], buf[i + 2], buf[i + 3],
-               buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]);
+    utils::print(fp, "{} {} {} {} {} {} {} {} {} {}\n", ubuf(buf[i]).i, buf[i + 1], buf[i + 2], buf[i + 3],
+               buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7], buf[i + 8], buf[i + 9]);
+
     i += size_data_bonus;
   }
 }

From e2dce8e7ad53df3074646415223fcee864223b42 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sat, 16 Dec 2023 19:43:45 -0700
Subject: [PATCH 006/174] implement Beta function in MathSpecial based on
 cephes/scipy implementation.

This is necessary for C++11 compatibility since std::beta() is C++17

Implementation is modified/simplified assuming strictly positive arguments (always the case for super-ellipsoid calculations involving Beta)
---
 src/atom_vec_ellipsoid.cpp |   6 +-
 src/math_special.cpp       | 137 +++++++++++++++++++++++++++++++++++++
 src/math_special.h         |  23 ++++++-
 3 files changed, 161 insertions(+), 5 deletions(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index f62dcd67f8a..0754fd1fdd0 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -23,11 +23,11 @@
 #include "fix.h"
 #include "math_const.h"
 #include "math_extra.h"
+#include "math_special.h"
 #include "memory.h"
 #include "modify.h"
 
 #include <cstring>
-#include <tr1/cmath>
 
 using namespace LAMMPS_NS;
 using MathConst::MY_PI;
@@ -804,8 +804,8 @@ double AtomVecEllipsoid::compute_volume(double *shape, double *block, bool flag_
 
   if (flag_super) {
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    unitvol = e1 * e2 * std::tr1::beta(0.5 * e1, 1.0 + e1) *
-                        std::tr1::beta(0.5 * e2, 0.5 * e2); // CAN'T GET std::beta to be recognized, need help
+    unitvol = e1 * e2 * MathSpecial::beta(0.5 * e1, 1.0 + e1) *
+                        MathSpecial::beta(0.5 * e2, 0.5 * e2);
   }
   return unitvol * shape[0] * shape[1] * shape[2];
 }
diff --git a/src/math_special.cpp b/src/math_special.cpp
index c23cb23228c..e56b81e65ed 100644
--- a/src/math_special.cpp
+++ b/src/math_special.cpp
@@ -668,6 +668,143 @@ double MathSpecial::erfcx_y100(const double y100)
     return 1.0;
 } /* erfcx_y100 */
 
+
+/* Library cephes:
+ *    Some software in this archive may be from the book _Methods and
+ * Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+ * International, 1989) or from the Cephes Mathematical Library, a
+ * commercial product. In either event, it is copyrighted by the author.
+ * What you see here may be used freely but it comes with no support or
+ * guarantee.
+ *
+ *    The two known misprints in the book are repaired here in the
+ * source listings for the gamma function and the incomplete beta
+ * integral.
+ *
+ *    Stephen L. Moshier
+ *    moshier@na-net.ornl.gov
+ *
+ * File beta.c:
+ *   Beta function
+ *
+ *
+ *
+ *   SYNOPSIS:
+ *
+ *   double a, b, y, beta();
+ *
+ *   y = beta( a, b );
+ *
+ *
+ *
+ *   DESCRIPTION:
+ *
+ *                     -     -
+ *                    | (a) | (b)
+ *   beta( a, b )  =  -----------.
+ *                       -
+ *                      | (a+b)
+ *
+ *   For large arguments the logarithm of the function is
+ *   evaluated using lgam(), then exponentiated.
+ *
+ *
+ *
+ *   ACCURACY:
+ *
+ *                        Relative error:
+ *   arithmetic   domain     # trials      peak         rms
+ *      DEC        0,30        1700       7.7e-15     1.5e-15
+ *      IEEE       0,30       30000       8.1e-14     1.1e-14
+ *
+ *   ERROR MESSAGES:
+ *
+ *     message         condition          value returned
+ *     beta overflow    log(beta) > MAXLOG       0.0
+ *                      a or b <0 integer        0.0
+ *
+ * Copyright:
+ *   Cephes Math Library Release 2.0:  April, 1987
+ *   Copyright 1984, 1987 by Stephen L. Moshier
+ *   Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+ *
+ * Licence:
+ *
+ * Authors:
+ *   Stephen L. Moshier, 1987, core author
+ *
+ * Website:
+ *   https://www.netlib.org/cephes/
+ *   https://github.com/scipy/scipy/blob/main/scipy/special/cephes/beta.c
+ *   (the Scipy implementation contains useful updates adopted here)
+ *
+ */
+
+static constexpr double MAXGAM = 171.624376956302725;
+static constexpr double ASYMP_FACTOR = 1e6;
+static constexpr double MAXLOG = 7.09782712893383996732E2;  /* log(DBL_MAX) */
+
+/* ----------------------------------------------------------------------
+   beta function
+------------------------------------------------------------------------- */
+
+double MathSpecial::beta(double a, double b)
+{
+  // Inputs assumed strictly positive, not checked
+  // Simplifies the original cephes code for our purposes
+  double y;
+
+  if (a < b) {
+    y = a; a = b; b = y;
+  }
+
+  /*
+   * Asymptotic expansion for  ln(|B(a, b)|) for a > ASYMP_FACTOR*max(|b|, 1).
+   */
+  auto lbeta_asymp = [](double aa, double bb) {
+    double r = std::lgamma(bb);
+    r -= bb * std::log(aa);
+
+    r += bb*(1-bb)/(2*aa);
+    r += bb*(1-bb)*(1-2*bb)/(12*aa*aa);
+    r += - bb*bb*(1-bb)*(1-bb)/(12*aa*aa*aa);
+
+    return r;
+  };
+
+  if (a > ASYMP_FACTOR * b && a > ASYMP_FACTOR) {
+    /* Avoid loss of precision in lgam(a + b) - lgam(a) */
+    y = lbeta_asymp(a, b);
+    return std::exp(y);
+  }
+
+  y = a + b;
+  if( y > MAXGAM || a > MAXGAM || b > MAXGAM ) {
+    y = std::lgamma(y);
+    y = std::lgamma(b) - y;
+    y = std::lgamma(a) + y;
+    if( y > MAXLOG ) {
+      // No check for overflow? lmp-error ?
+    }
+    return std::exp(y);
+  }
+
+  y = std::tgamma(y);
+  a = std::tgamma(a);
+  b = std::tgamma(b);
+
+  if (std::fabs(a - y) > std::fabs(b) - fabs(y)) {
+    y = b / y;
+    y *= a;
+  }
+  else {
+    y = a / y;
+    y *= b;
+  }
+
+   return y;
+}
+
 /* optimizer friendly implementation of exp2(x).
  *
  * strategy:
diff --git a/src/math_special.h b/src/math_special.h
index b5ac1c73fba..6b80c0eca8b 100644
--- a/src/math_special.h
+++ b/src/math_special.h
@@ -69,7 +69,25 @@ namespace LAMMPS_NS::MathSpecial {
 
   extern double erfcx_y100(const double y100);
 
-  /*! Fast scaled error function complement exp(x*x)*erfc(x) for coul/long styles
+   /* Beta function
+   *
+   * This function computes the Beta function of two real, positive arguments.
+   * Necessary to compute the beta function for super-ellipsoid volume and
+   * moments of inertia. This function is used because std::beta() is C++17
+   * standard and core LAMMPS code has to be C++11 compatible.
+   *
+   * This implementation is restricted to positive arguments for 2 reasons:
+   * 1. Super-ellipsoid calculations only require positive arguments.
+   * 2. Negative arguments require tracking signs of the gamma function so that
+   * std::lgamma() and std::tgamma() functions cannot be used as is, this would
+   * require more implementation than desired for this function.
+   *
+   *  \param   x, y arguments
+   *  \return  value of beta(x) */
+
+  extern double beta(double x, double y);
+
+ /*! Fast scaled error function complement exp(x*x)*erfc(x) for coul/long styles
    *
    *  This is a portable fast implementation of exp(x*x)*erfc(x) that can be used
    *  in coul/long pair styles as a replacement for the polynomial expansion that
@@ -183,7 +201,8 @@ namespace LAMMPS_NS::MathSpecial {
 
     return yy;
   }
-} // namespace LAMMPS_NS::MathSpecial
 
+}    // namespace MathSpecial
+}    // namespace LAMMPS_NS
 
 #endif

From 66b59fbf186e4ada900dfb457aacdf938ed2cccf Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sat, 16 Dec 2023 20:32:37 -0700
Subject: [PATCH 007/174] fix logic for super-ellipsoid flag

---
 src/atom_vec_ellipsoid.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 0754fd1fdd0..9f6f4b2de23 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -254,7 +254,7 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       block[0] = buf[m++];
       block[1] = buf[m++];
       block[2] = block[0] / block[1];
-      flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) && (std::fabs(block[1] - 2) > EPSBLOCK2));
+      flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
       bonus[j].flag_super = flag_super;
       bonus[j].ilocal = i;
       bonus[j].radcirc = compute_radcirc(shape, block, flag_super);
@@ -321,7 +321,7 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     block[0] = buf[m++];
     block[1] = buf[m++];
     block[2] = block[0] / block[1];
-    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) && (std::fabs(block[1] - 2) > EPSBLOCK2));
+    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
     bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = ilocal;
     ellipsoid[ilocal] = nlocal_bonus++;
@@ -407,7 +407,7 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     block[0] = buf[m++];
     block[1] = buf[m++];
     block[2] = block[0] / block[1];
-    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) && (std::fabs(block[1] - 2) > EPSBLOCK2));
+    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
     bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = ilocal;
     ellipsoid[ilocal] = nlocal_bonus++;
@@ -452,7 +452,7 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
   else {
     block[0] = utils::numeric(FLERR, values[ivalue++], true, lmp);
     block[1] = utils::numeric(FLERR, values[ivalue++], true, lmp);
-    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) && (std::fabs(block[1] - 2) > EPSBLOCK2));
+    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
   }
   block[2] = block[0] / block[1]; // ASSUMES EVEN NUMBERS ONLY?
 
@@ -733,7 +733,7 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     quat[2] = 0.0;
     quat[3] = 0.0;
     bonus[nlocal_bonus].ilocal = i;
-    flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) && (std::fabs(blockn2 - 2) > EPSBLOCK2));
+    flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
     bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     ellipsoid[i] = nlocal_bonus++;
   } else {
@@ -743,7 +743,7 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     block[0] = blockn1;
     block[1] = blockn2;
     block[2] = blockn1 / blockn2;
-    flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) && (std::fabs(blockn2 - 2) > EPSBLOCK2));
+    flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
     bonus[ellipsoid[i]].radcirc = compute_radcirc(shape, block, flag_super);
   }
 }

From 4691cc50c9728f051dedfa6e41b7d149cb4684ae Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sat, 16 Dec 2023 21:07:33 -0700
Subject: [PATCH 008/174] block and flag_super must be initialized with
 set_shape when ellipsoid[i]<0

---
 src/atom_vec_ellipsoid.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 9f6f4b2de23..56f4fd8f94a 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -685,7 +685,7 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     double *shape = bonus[nlocal_bonus].shape;
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
-    bool flag_super = bonus[nlocal_bonus].flag_super;
+    bool &flag_super = bonus[nlocal_bonus].flag_super;
     shape[0] = shapex;
     shape[1] = shapey;
     shape[2] = shapez;
@@ -693,6 +693,9 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     quat[1] = 0.0;
     quat[2] = 0.0;
     quat[3] = 0.0;
+    block[0] = 2;
+    block[1] = 2;
+    flag_super = false;
     bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = i;
     ellipsoid[i] = nlocal_bonus++;

From 2e9bfca52892fa98e2584e753a22e59705b5d505 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sat, 16 Dec 2023 21:50:22 -0700
Subject: [PATCH 009/174] allow set command to set the ellipsoid block
 exponents.

---
 doc/src/set.rst | 17 ++++++++++++++-
 src/set.cpp     | 57 ++++++++++++++++++++++++++++++++++++++++++++++++-
 src/set.h       |  2 ++
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/doc/src/set.rst b/doc/src/set.rst
index 5a6995ee09b..b4f91c83f13 100644
--- a/doc/src/set.rst
+++ b/doc/src/set.rst
@@ -23,7 +23,7 @@ Syntax
 
 * one or more keyword/value pairs may be appended
 
-* keyword = *angle* or *angmom* or *apip/lambda* or *bond* or *cc* or *charge*
+* keyword = *angle* or *angmom* or *apip/lambda* or *block* or *bond* or *cc* or *charge*
   or *density* or *density/disc* or *diameter* or *dihedral* or *dipole*
   or *dipole/random* or *dpd/theta* or *edpd/cv* or *edpd/temp* or
   *epsilon* or *image* or *improper* or *length* or *mass* or *mol* or
@@ -45,6 +45,8 @@ Syntax
          fast = switching parameter of fast potential (1)
          precise = switching parameter of fast potential (0)
          float = constant float or atom-style variable (between 0 and 1)
+       *block* value = block1, block2
+         block1,block2 = 2 blockiness parameters for super-ellipsoids
        *bond* value = numeric bond type or bond type label, for all bonds between selected atoms
        *cc* values = index cc
          index = index of a chemical species (1 to Nspecies)
@@ -538,6 +540,19 @@ other. Note that the SPH smoothing kernel diameter used for computing
 long range, nonlocal interactions, is set using the *diameter*
 keyword.
 
+Keyword *block* sets the blockiness of the selected atoms.  The
+particles must be ellipsoids as defined by the :doc:`atom_style
+ellipsoid <atom_style>` command.  This command is used to define
+super-ellipsoid particle shapes for use in granular simulations.
+The *block1*, *block2* settings are the 2 exponents of the super-ellipsoid
+in the horizontal and vertical directions.  Vertical sections through the
+center are superellipses with squareness *block1* and horizontal sections
+are superellipses with squareness *block2*.  If both parameters are set to
+a value of 2 (the default), the atom is an ellipsoid. Note that this command does not
+adjust the particle mass, even if it was defined with a density,
+e.g. via the :doc:`read_data <read_data>` command.
+
+
 Keyword *smd/mass/density* sets the mass of all selected particles,
 but it is only applicable to the Smooth Mach Dynamics package MACHDYN.
 It assumes that the particle volume has already been correctly set and
diff --git a/src/set.cpp b/src/set.cpp
index 4a780e53e00..14d749e0142 100644
--- a/src/set.cpp
+++ b/src/set.cpp
@@ -44,7 +44,7 @@ using namespace MathConst;
 
 enum{ATOM_SELECT,MOL_SELECT,TYPE_SELECT,GROUP_SELECT,REGION_SELECT};
 
-enum{ANGLE,ANGMOM,APIP_LAMBDA,BOND,CC,CHARGE,DENSITY,DIAMETER,DIHEDRAL,DIPOLE,
+enum{ANGLE,ANGMOM,APIP_LAMBDA,BLOCK,BOND,CC,CHARGE,DENSITY,DIAMETER,DIHEDRAL,DIPOLE,
   DIPOLE_RANDOM,DPD_THETA,EDPD_CV,EDPD_TEMP,EPSILON,IMAGE,IMPROPER,LENGTH,
   MASS,MOLECULE,OMEGA,QUAT,QUAT_RANDOM,RADIUS_ELECTRON,RHEO_STATUS,SHAPE,
   SMD_CONTACT_RADIUS,SMD_MASS_DENSITY,SPH_CV,SPH_E,SPH_RHO,
@@ -212,6 +212,10 @@ void Set::process_args(int caller_flag, int narg, char **arg)
       action->keyword = APIP_LAMBDA;
       process_apip_lambda(iarg,narg,arg,action);
       invoke_choice[naction++] = &Set::invoke_apip_lambda;
+    } else if (strcmp(arg[iarg],"block") == 0) {
+      action->keyword = BLOCK;
+      process_block(iarg, narg, arg, action);
+      invoke_choice[naction++] = &Set::invoke_block;
     } else if (strcmp(arg[iarg],"bond") == 0) {
       action->keyword = BOND;
       process_bond(iarg,narg,arg,action);
@@ -1114,6 +1118,57 @@ void Set::invoke_apip_lambda(Action *action)
 
 /* ---------------------------------------------------------------------- */
 
+void Set::process_block(int &iarg, int narg, char **arg, Action *action)
+{
+  if (!atom->ellipsoid_flag)
+    error->all(FLERR,"Cannot set attribute {} for atom style {}", arg[iarg], atom->get_style());
+  if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "set block", error);
+  if (utils::strmatch(arg[iarg+1],"^v_")) varparse(arg[iarg+1],1,action);
+  else {
+    action->dvalue1 = utils::numeric(FLERR,arg[iarg+1],false,lmp);
+    if (action->dvalue1 < 2.0) error->one(FLERR,"Invalid block in set command");
+  }
+  if (utils::strmatch(arg[iarg+2],"^v_")) varparse(arg[iarg+2],2,action);
+  else {
+    action->dvalue2 = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+    if (action->dvalue2 < 2.0) error->one(FLERR,"Invalid block in set command");
+  }
+  iarg += 3;
+}
+
+
+void Set::invoke_block(Action *action)
+{
+  int nlocal = atom->nlocal;
+  auto *avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+
+  int varflag = action->varflag;
+  double block1 = 0.0, block2 = 0.0;
+  if (!action->varflag1) xvalue = action->dvalue1;
+  if (!action->varflag2) block2 = action->dvalue2;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (!select[i]) continue;
+
+    if (varflag) {
+      if (action->varflag1) block1 = vec1[i];
+      if (action->varflag2) block2 = vec2[i];
+      if (block1 < 2.0 || block2 < 2.0)
+        error->one(FLERR, Error::NOLASTLINE, "Invalid block in set command");
+    }
+
+    avec_ellipsoid->set_block(i, block1, block2);
+  }
+
+  // update global ellipsoid count
+  // TODO: Not sure if block should update the ellipsoid count
+  //       what happens if you call this twice in invike_shape and invoke_block ?
+  //   bigint nlocal_bonus = avec_ellipsoid->nlocal_bonus;
+  //   MPI_Allreduce(&nlocal_bonus,&atom->nellipsoids,1,MPI_LMP_BIGINT,MPI_SUM,world);
+}
+
+/* ---------------------------------------------------------------------- */
+
 void Set::process_bond(int &iarg, int narg, char **arg, Action *action)
 {
   if (atom->avec->bonds_allow == 0)
diff --git a/src/set.h b/src/set.h
index bcf06e6ba18..a44f6b9b8bf 100644
--- a/src/set.h
+++ b/src/set.h
@@ -93,6 +93,7 @@ class Set : public Command {
   void process_angle(int &, int, char **, Action *);
   void process_angmom(int &, int, char **, Action *);
   void process_apip_lambda(int &, int, char **, Action *);
+  void process_block(int &, int, char **, Action *);
   void process_bond(int &, int, char **, Action *);
   void process_cc(int &, int, char **, Action *);
   void process_charge(int &, int, char **, Action *);
@@ -147,6 +148,7 @@ class Set : public Command {
   void invoke_angle(Action *);
   void invoke_angmom(Action *);
   void invoke_apip_lambda(Action *);
+  void invoke_block(Action *);
   void invoke_bond(Action *);
   void invoke_cc(Action *);
   void invoke_charge(Action *);

From 6295d642e3ec96817841d0cb909ba3d78ff0968d Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sun, 17 Dec 2023 13:47:04 -0700
Subject: [PATCH 010/174] allow shape and block to be set in any order by
 setting shape to 1 1 1 if block is called before shape.

Explain that default behavior in the doc. add example and line about atom-style variable for shape and block
---
 doc/src/set.rst            | 9 +++++++--
 src/atom_vec_ellipsoid.cpp | 3 +++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/doc/src/set.rst b/doc/src/set.rst
index b4f91c83f13..66c7939a8f9 100644
--- a/doc/src/set.rst
+++ b/doc/src/set.rst
@@ -184,6 +184,7 @@ Examples
    set atom * charge v_atomfile
    set atom 100*200 x 0.5 y 1.0
    set atom 100 vx 0.0 vy 0.0 vz -1.0
+   set atom 200 shape 1.5 2.0 4.0 block 2.0 4.0
    set atom 1492 type 3
    set atom 1492 type H
    set atom * i_myVal 5
@@ -545,10 +546,14 @@ particles must be ellipsoids as defined by the :doc:`atom_style
 ellipsoid <atom_style>` command.  This command is used to define
 super-ellipsoid particle shapes for use in granular simulations.
 The *block1*, *block2* settings are the 2 exponents of the super-ellipsoid
-in the horizontal and vertical directions.  Vertical sections through the
+in the vertical and horizontal directions.  Vertical sections through the
 center are superellipses with squareness *block1* and horizontal sections
 are superellipses with squareness *block2*.  If both parameters are set to
-a value of 2 (the default), the atom is an ellipsoid. Note that this command does not
+a value of 2 (the default), the atom is a regular ellipsoid.  The keyword
+*block* should be used together with the keyword *shape* to give the particle
+the desired shape.  If the keyword *block* is given alone, and the *shape* has
+not been defined, e.g., in a previous *set* command, the 3 diameters would be
+set to a value of 1 internally.  Note that this command does not
 adjust the particle mass, even if it was defined with a density,
 e.g. via the :doc:`read_data <read_data>` command.
 
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 56f4fd8f94a..5e5d7edf21e 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -728,6 +728,9 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
     bool &flag_super = bonus[nlocal_bonus].flag_super;
+    shape[0] = 0.5;
+    shape[1] = 0.5;
+    shape[2] = 0.5;
     block[0] = blockn1;
     block[1] = blockn2;
     block[2] = blockn1 / blockn2;

From 431f54ede825013dbaaf3ece4e6c43d1288a90c2 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sun, 17 Dec 2023 17:15:28 -0700
Subject: [PATCH 011/174] add unit test for new bonus properties to
 test_atom_styles

---
 unittest/formats/test_atom_styles.cpp | 40 +++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp
index f3004951b34..2ced044c5dd 100644
--- a/unittest/formats/test_atom_styles.cpp
+++ b/unittest/formats/test_atom_styles.cpp
@@ -1311,6 +1311,46 @@ TEST_F(AtomStyleTest, ellipsoid)
     EXPECT_NEAR(bonus[3].quat[1], sqrt(5.0 / 30.0), EPSILON);
     EXPECT_NEAR(bonus[3].quat[2], sqrt(5.0 / 30.0), EPSILON);
     EXPECT_NEAR(bonus[3].quat[3], sqrt(5.0 / 30.0), EPSILON);
+    EXPECT_NEAR(bonus[0].block[0], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[0].block[1], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[1].block[0], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[1].block[1], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[2].block[0], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[2].block[1], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[3].block[0], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[3].block[1], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[0].radcirc, 0.5, EPSILON);
+    EXPECT_NEAR(bonus[1].radcirc, 0.5, EPSILON);
+    EXPECT_NEAR(bonus[2].radcirc, 1.5, EPSILON);
+    EXPECT_NEAR(bonus[3].radcirc, 1.5, EPSILON);
+    ASSERT_FALSE(bonus[0].flag_super);
+    ASSERT_FALSE(bonus[1].flag_super);
+    ASSERT_FALSE(bonus[2].flag_super);
+    ASSERT_FALSE(bonus[3].flag_super);
+
+    BEGIN_HIDE_OUTPUT();
+    command("set atom 5 block 8.0 8.0");
+    command("set atom 6 shape 4.0 0.5 1.6 block 4.0 2.0");
+    END_HIDE_OUTPUT();
+    ASSERT_EQ(ellipsoid[GETIDX(5)], 4);
+    ASSERT_EQ(ellipsoid[GETIDX(6)], 5);
+    EXPECT_NEAR(bonus[4].shape[0], 0.5, EPSILON);
+    EXPECT_NEAR(bonus[4].shape[1], 0.5, EPSILON);
+    EXPECT_NEAR(bonus[4].shape[2], 0.5, EPSILON);
+    EXPECT_NEAR(bonus[5].shape[0], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[5].shape[1], 0.25, EPSILON);
+    EXPECT_NEAR(bonus[5].shape[2], 0.8, EPSILON);
+    EXPECT_NEAR(bonus[4].block[0], 8.0, EPSILON);
+    EXPECT_NEAR(bonus[4].block[1], 8.0, EPSILON);
+    EXPECT_NEAR(bonus[5].block[0], 4.0, EPSILON);
+    EXPECT_NEAR(bonus[5].block[1], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[4].radcirc, 0.5*sqrt(3.0), EPSILON);
+    EXPECT_NEAR(bonus[5].radcirc, sqrt(4.64), EPSILON);
+    ASSERT_TRUE(bonus[4].flag_super);
+    ASSERT_TRUE(bonus[5].flag_super);
+    BEGIN_HIDE_OUTPUT();
+    command("set atom 5*6 shape 0.0 0.0 0.0");
+    END_HIDE_OUTPUT();
 
     BEGIN_HIDE_OUTPUT();
     command("group two id 2:4:2");

From c741ae68a68c331892f211214055225b99355b26 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sun, 21 Jan 2024 13:01:27 -0700
Subject: [PATCH 012/174] implement moment of inertia for super ellipsoid. Make
 block and flag arguments optional to keep existing calls as is

---
 src/atom_vec_ellipsoid.cpp |  4 ++--
 src/math_extra.cpp         | 33 +++++++++++++++++++++++++++++----
 src/math_extra.h           |  5 +++--
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 5e5d7edf21e..5ce06481080 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -30,7 +30,7 @@
 #include <cstring>
 
 using namespace LAMMPS_NS;
-using MathConst::MY_PI;
+using MathConst::MY_4PI3;
 
 #define EPSBLOCK2 1.0e-3
 
@@ -804,7 +804,7 @@ double AtomVecEllipsoid::compute_radcirc(double *shape, double *block, bool flag
 
 double AtomVecEllipsoid::compute_volume(double *shape, double *block, bool flag_super)
 {
-  double unitvol = 4.0 * MY_PI / 3.0;
+  double unitvol = MY_4PI3;
 
   // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003, for p = q = r = 0
 
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index a36600d970a..9c81e29e9ed 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -17,9 +17,12 @@
 ------------------------------------------------------------------------- */
 
 #include "math_extra.h"
+#include "math_special.h"
 #include <cstdio>
 #include <cstring>
 
+using namespace LAMMPS_NS;
+
 namespace MathExtra {
 
 /* ----------------------------------------------------------------------
@@ -477,16 +480,38 @@ void quat_to_mat_trans(const double *quat, double mat[3][3])
 ------------------------------------------------------------------------- */
 
 void inertia_ellipsoid(double *radii, double *quat, double mass,
-                       double *inertia)
+                       double *inertia, double *block, bool flag_super)
 {
   double p[3][3],ptrans[3][3],itemp[3][3],tensor[3][3];
   double idiag[3];
+  double rsq0 = radii[0] * radii[0];
+  double rsq1 = radii[1] * radii[1];
+  double rsq2 = radii[2] * radii[2];
 
   quat_to_mat(quat,p);
   quat_to_mat_trans(quat,ptrans);
-  idiag[0] = 0.2*mass * (radii[1]*radii[1] + radii[2]*radii[2]);
-  idiag[1] = 0.2*mass * (radii[0]*radii[0] + radii[2]*radii[2]);
-  idiag[2] = 0.2*mass * (radii[0]*radii[0] + radii[1]*radii[1]);
+
+  if (flag_super) {
+    // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003
+    double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
+    double dens = mass / (MathSpecial::beta(0.5 * e1, 1.0 + e1) *
+                          MathSpecial::beta(0.5 * e2, 0.5 * e2));
+    double m0 = rsq0 * MathSpecial::beta(0.5 * e1, 1 + 2 * e1) *
+                       MathSpecial::beta(0.5 * e2, 1.5 * e2);
+    double m1 = rsq1 * MathSpecial::beta(0.5 * e1, 1 + 2 * e1) *
+                       MathSpecial::beta(1.5 * e2, 0.5 * e2);
+    double m2 = rsq2 * MathSpecial::beta(1.5 * e1, 1 + e1) *
+                       MathSpecial::beta(0.5 * e2, 0.5 * e2);
+    idiag[0] = dens * (m1 + m2);
+    idiag[1] = dens * (m0 + m2);
+    idiag[2] = dens * (m0 + m1);
+  }
+  else {
+    idiag[0] = 0.2*mass * (rsq1 + rsq2);
+    idiag[1] = 0.2*mass * (rsq0 + rsq2);
+    idiag[2] = 0.2*mass * (rsq0 + rsq1);
+  }
+
   diag_times3(idiag,ptrans,itemp);
   times3(p,itemp,tensor);
   inertia[0] = tensor[0][0];
diff --git a/src/math_extra.h b/src/math_extra.h
index 52d1d838ffb..35eb464f321 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -117,12 +117,13 @@ void BuildRzMatrix(double R[3][3], const double angle);
 
 // moment of inertia operations
 
-void inertia_ellipsoid(double *shape, double *quat, double mass, double *inertia);
+void inertia_ellipsoid(double *shape, double *quat, double mass, double *inertia,
+                       double *block = nullptr, bool flag_super = false);
 void inertia_line(double length, double theta, double mass, double *inertia);
 void inertia_triangle(double *v0, double *v1, double *v2, double mass, double *inertia);
 void inertia_triangle(double *idiag, double *quat, double mass, double *inertia);
 
-// triclinic bounding box of a spher
+// triclinic bounding box of a sphere
 
 void tribbox(double *, double, double *);
 

From de30be958f8e00c183f0d4eb2de5c3f9be4f48ea Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Sun, 21 Jan 2024 14:51:47 -0700
Subject: [PATCH 013/174] Create principal inertia function: avoids code
 duplication and fixes mistakes.

Did not change INTEL and GPU functions yet because of different, special syntax.
---
 src/ASPHERE/compute_erotate_asphere.cpp |  5 +--
 src/ASPHERE/compute_temp_asphere.cpp    | 17 ++------
 src/ASPHERE/fix_nh_asphere.cpp          |  4 +-
 src/ASPHERE/fix_nve_asphere.cpp         |  6 +--
 src/ASPHERE/fix_nve_asphere_noforce.cpp |  4 +-
 src/CG-DNA/fix_nve_dot.cpp              |  6 +--
 src/CG-DNA/fix_nve_dotc_langevin.cpp    |  6 +--
 src/OPENMP/fix_nh_asphere_omp.cpp       |  6 +--
 src/OPENMP/fix_rigid_nh_omp.cpp         |  8 +---
 src/OPENMP/fix_rigid_omp.cpp            |  8 +---
 src/OPENMP/fix_rigid_small_omp.cpp      |  8 +---
 src/RIGID/fix_rigid.cpp                 |  8 +---
 src/RIGID/fix_rigid_small.cpp           |  8 +---
 src/RIGID/rigid_const.h                 |  2 -
 src/SRD/fix_srd.cpp                     |  9 +++--
 src/fix_langevin.cpp                    |  5 +--
 src/fix_move.cpp                        | 16 +-------
 src/math_extra.cpp                      | 54 ++++++++++++++++---------
 src/math_extra.h                        |  3 +-
 19 files changed, 66 insertions(+), 117 deletions(-)

diff --git a/src/ASPHERE/compute_erotate_asphere.cpp b/src/ASPHERE/compute_erotate_asphere.cpp
index 95a323b4e3f..91c6d654cfd 100644
--- a/src/ASPHERE/compute_erotate_asphere.cpp
+++ b/src/ASPHERE/compute_erotate_asphere.cpp
@@ -110,10 +110,7 @@ double ComputeERotateAsphere::compute_scalar()
         quat = ebonus[ellipsoid[i]].quat;
 
         // principal moments of inertia
-
-        inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
-        inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
-        inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
         // wbody = angular velocity in body frame
 
diff --git a/src/ASPHERE/compute_temp_asphere.cpp b/src/ASPHERE/compute_temp_asphere.cpp
index d99d9f30c8c..ac4d3c5f53e 100644
--- a/src/ASPHERE/compute_temp_asphere.cpp
+++ b/src/ASPHERE/compute_temp_asphere.cpp
@@ -33,7 +33,6 @@
 using namespace LAMMPS_NS;
 
 enum { ROTATE, ALL };
-static constexpr double INERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
@@ -222,9 +221,7 @@ double ComputeTempAsphere::compute_scalar()
         shape = bonus[ellipsoid[i]].shape;
         quat = bonus[ellipsoid[i]].quat;
 
-        inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
-        inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
-        inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
         // wbody = angular velocity in body frame
 
@@ -247,9 +244,7 @@ double ComputeTempAsphere::compute_scalar()
         shape = bonus[ellipsoid[i]].shape;
         quat = bonus[ellipsoid[i]].quat;
 
-        inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
-        inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
-        inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
         // wbody = angular velocity in body frame
 
@@ -321,9 +316,7 @@ void ComputeTempAsphere::compute_vector()
         shape = bonus[ellipsoid[i]].shape;
         quat = bonus[ellipsoid[i]].quat;
 
-        inertia[0] = INERTIA*massone * (shape[1]*shape[1]+shape[2]*shape[2]);
-        inertia[1] = INERTIA*massone * (shape[0]*shape[0]+shape[2]*shape[2]);
-        inertia[2] = INERTIA*massone * (shape[0]*shape[0]+shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, massone, inertia);
 
         // wbody = angular velocity in body frame
 
@@ -353,9 +346,7 @@ void ComputeTempAsphere::compute_vector()
         quat = bonus[ellipsoid[i]].quat;
         massone = rmass[i];
 
-        inertia[0] = INERTIA*massone * (shape[1]*shape[1]+shape[2]*shape[2]);
-        inertia[1] = INERTIA*massone * (shape[0]*shape[0]+shape[2]*shape[2]);
-        inertia[2] = INERTIA*massone * (shape[0]*shape[0]+shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, massone, inertia);
 
         // wbody = angular velocity in body frame
 
diff --git a/src/ASPHERE/fix_nh_asphere.cpp b/src/ASPHERE/fix_nh_asphere.cpp
index 35d0e404be5..3f62eb61d96 100644
--- a/src/ASPHERE/fix_nh_asphere.cpp
+++ b/src/ASPHERE/fix_nh_asphere.cpp
@@ -117,9 +117,7 @@ void FixNHAsphere::nve_x()
       shape = bonus[ellipsoid[i]].shape;
       quat = bonus[ellipsoid[i]].quat;
 
-      inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
-      inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
-      inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
diff --git a/src/ASPHERE/fix_nve_asphere.cpp b/src/ASPHERE/fix_nve_asphere.cpp
index a5655b875cc..13e9ea620fd 100644
--- a/src/ASPHERE/fix_nve_asphere.cpp
+++ b/src/ASPHERE/fix_nve_asphere.cpp
@@ -26,8 +26,6 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
-
 /* ---------------------------------------------------------------------- */
 
 FixNVEAsphere::FixNVEAsphere(LAMMPS *lmp, int narg, char **arg) :
@@ -101,9 +99,7 @@ void FixNVEAsphere::initial_integrate(int /*vflag*/)
       shape = bonus[ellipsoid[i]].shape;
       quat = bonus[ellipsoid[i]].quat;
 
-      inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
-      inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
-      inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
diff --git a/src/ASPHERE/fix_nve_asphere_noforce.cpp b/src/ASPHERE/fix_nve_asphere_noforce.cpp
index aaa21d9550f..7d97eaad613 100644
--- a/src/ASPHERE/fix_nve_asphere_noforce.cpp
+++ b/src/ASPHERE/fix_nve_asphere_noforce.cpp
@@ -89,9 +89,7 @@ void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
       shape = bonus[ellipsoid[i]].shape;
       quat = bonus[ellipsoid[i]].quat;
 
-      inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
-      inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
-      inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
diff --git a/src/CG-DNA/fix_nve_dot.cpp b/src/CG-DNA/fix_nve_dot.cpp
index 4fbf9bb9be4..55aaf5f21f6 100644
--- a/src/CG-DNA/fix_nve_dot.cpp
+++ b/src/CG-DNA/fix_nve_dot.cpp
@@ -26,8 +26,6 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
 
-static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
-
 /* ---------------------------------------------------------------------- */
 
 FixNVEDot::FixNVEDot(LAMMPS *lmp, int narg, char **arg) :
@@ -113,9 +111,7 @@ void FixNVEDot::initial_integrate(int /*vflag*/)
       conjqm[3] += dt * fquat[3];
 
       // principal moments of inertia
-      inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
-      inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
-      inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+      inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
       // rotate quaternion and quaternion 4-momentum by full step
       no_squish_rotate(3,conjqm,quat,inertia,dthlf);
diff --git a/src/CG-DNA/fix_nve_dotc_langevin.cpp b/src/CG-DNA/fix_nve_dotc_langevin.cpp
index 8151cbe4fea..1003930538a 100644
--- a/src/CG-DNA/fix_nve_dotc_langevin.cpp
+++ b/src/CG-DNA/fix_nve_dotc_langevin.cpp
@@ -33,8 +33,6 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
 
-static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
-
 /* ---------------------------------------------------------------------- */
 
 FixNVEDotcLangevin::FixNVEDotcLangevin(LAMMPS *lmp, int narg, char **arg) :
@@ -188,9 +186,7 @@ void FixNVEDotcLangevin::initial_integrate(int /*vflag*/)
       conjqm[3] += dt * fquat[3];
 
       // principal moments of inertia
-      inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
-      inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
-      inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+      inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
       M = inertia[0]*inertia[1]*inertia[2];
       M /= inertia[1]*inertia[2]+inertia[0]*inertia[2]+inertia[0]*inertia[1];
diff --git a/src/OPENMP/fix_nh_asphere_omp.cpp b/src/OPENMP/fix_nh_asphere_omp.cpp
index 35a42d2119d..29d680cb074 100644
--- a/src/OPENMP/fix_nh_asphere_omp.cpp
+++ b/src/OPENMP/fix_nh_asphere_omp.cpp
@@ -133,12 +133,10 @@ void FixNHAsphereOMP::nve_x()
 
       // principal moments of inertia
 
-      const double * const shape = bonus[ellipsoid[i]].shape;
+      double * const shape = bonus[ellipsoid[i]].shape;
       double * const quat = bonus[ellipsoid[i]].quat;
 
-      inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
-      inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
-      inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
 
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
diff --git a/src/OPENMP/fix_rigid_nh_omp.cpp b/src/OPENMP/fix_rigid_nh_omp.cpp
index c063a1db0c7..90b38ec3a15 100644
--- a/src/OPENMP/fix_rigid_nh_omp.cpp
+++ b/src/OPENMP/fix_rigid_nh_omp.cpp
@@ -802,9 +802,7 @@ void FixRigidNHOMP::set_xv_thr()
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
                                    angmom_one[i]);
@@ -986,9 +984,7 @@ void FixRigidNHOMP::set_v_thr()
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
                                    angmom_one[i]);
diff --git a/src/OPENMP/fix_rigid_omp.cpp b/src/OPENMP/fix_rigid_omp.cpp
index 5f548c4af9e..3bfe51ddc0c 100644
--- a/src/OPENMP/fix_rigid_omp.cpp
+++ b/src/OPENMP/fix_rigid_omp.cpp
@@ -533,9 +533,7 @@ void FixRigidOMP::set_xv_thr()
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
                                    angmom_one[i]);
@@ -717,9 +715,7 @@ void FixRigidOMP::set_v_thr()
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
                                    angmom_one[i]);
diff --git a/src/OPENMP/fix_rigid_small_omp.cpp b/src/OPENMP/fix_rigid_small_omp.cpp
index 98c411d11ae..2111e3baf56 100644
--- a/src/OPENMP/fix_rigid_small_omp.cpp
+++ b/src/OPENMP/fix_rigid_small_omp.cpp
@@ -465,9 +465,7 @@ void FixRigidSmallOMP::set_xv_thr()
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(b.quat,orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,ione,angmom[i]);
       } else if (eflags[i] & LINE) {
@@ -645,9 +643,7 @@ void FixRigidSmallOMP::set_v_thr()
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,ione,
                                    angmom[i]);
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index 46d93ba8dfd..5ad85160e8a 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -1432,9 +1432,7 @@ void FixRigid::set_xv()
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
                                    angmom_one[i]);
@@ -1590,9 +1588,7 @@ void FixRigid::set_v()
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
                                    angmom_one[i]);
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index 441d7fb674c..0ed78fcc2db 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -1334,9 +1334,7 @@ void FixRigidSmall::set_xv()
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(b->quat,orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,ione,angmom[i]);
       } else if (eflags[i] & LINE) {
@@ -1489,9 +1487,7 @@ void FixRigidSmall::set_v()
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
-        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
-        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
+        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,ione,
                                    angmom[i]);
diff --git a/src/RIGID/rigid_const.h b/src/RIGID/rigid_const.h
index 69bed21cfac..1956f74687d 100644
--- a/src/RIGID/rigid_const.h
+++ b/src/RIGID/rigid_const.h
@@ -40,8 +40,6 @@
 
     // moment of inertia prefactor for sphere
     static constexpr double SINERTIA = 0.4;
-    // moment of inertia prefactor for ellipsoid
-    static constexpr double EINERTIA = 0.2;
     // moment of inertia prefactor for line segment
     static constexpr double LINERTIA = 1.0/12.0;
 
diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp
index 5b6bd5988ce..8c52bf70826 100644
--- a/src/SRD/fix_srd.cpp
+++ b/src/SRD/fix_srd.cpp
@@ -52,7 +52,6 @@ enum { BIG_MOVE, SRD_MOVE, SRD_ROTATE };
 enum { CUBIC_ERROR, CUBIC_WARN };
 enum { SHIFT_NO, SHIFT_YES, SHIFT_POSSIBLE };
 
-static constexpr double EINERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
 static constexpr int ATOMPERBIN = 30;
 static constexpr double BIG = 1.0e20;
@@ -60,6 +59,10 @@ static constexpr int VBINSIZE = 5;
 static constexpr double TOLERANCE = 0.00001;
 static constexpr int MAXITER = 20;
 
+
+
+
+
 static const char cite_fix_srd[] =
     "fix srd command: https://doi.org/10.1063/1.3419070\n\n"
     "@Article{Petersen10,\n"
@@ -2967,9 +2970,7 @@ void FixSRD::big_dynamic()
       quat = ebonus[ellipsoid[i]].quat;
       MathExtra::q_to_exyz(quat, biglist[k].ex, biglist[k].ey, biglist[k].ez);
       shape = ebonus[ellipsoid[i]].shape;
-      inertiaone[0] = EINERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
-      inertiaone[1] = EINERTIA * rmass[i] * (shape[0] * shape[0] + shape[2] * shape[2]);
-      inertiaone[2] = EINERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
+      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertiaone);
       MathExtra::angmom_to_omega(angmom[i], biglist[k].ex, biglist[k].ey, biglist[k].ez, inertiaone,
                                  biglist[k].omega);
 
diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp
index 9b637853a53..f3736ceac2e 100644
--- a/src/fix_langevin.cpp
+++ b/src/fix_langevin.cpp
@@ -43,7 +43,6 @@ enum { NOBIAS, BIAS };
 enum { CONSTANT, EQUAL, ATOM };
 
 static constexpr double SINERTIA = 0.4;    // moment of inertia prefactor for sphere
-static constexpr double EINERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
@@ -618,9 +617,7 @@ void FixLangevin::angmom_thermostat()
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       shape = bonus[ellipsoid[i]].shape;
-      inertia[0] = EINERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
-      inertia[1] = EINERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
-      inertia[2] = EINERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
       quat = bonus[ellipsoid[i]].quat;
       MathExtra::mq_to_omega(angmom[i],quat,inertia,omega);
 
diff --git a/src/fix_move.cpp b/src/fix_move.cpp
index be170aea9e3..70f09f0472d 100644
--- a/src/fix_move.cpp
+++ b/src/fix_move.cpp
@@ -42,8 +42,6 @@ using namespace MathConst;
 enum { LINEAR, WIGGLE, ROTATE, VARIABLE, TRANSROT };
 enum { EQUAL, ATOM };
 
-static constexpr double INERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
-
 /* ---------------------------------------------------------------------- */
 
 FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) :
@@ -781,12 +779,7 @@ void FixMove::initial_integrate(int /*vflag*/)
             if (ellipsoid_flag && ellipsoid[i] >= 0) {
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
               shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
-              inertia_ellipsoid[0] =
-                  INERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
-              inertia_ellipsoid[1] =
-                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[2] * shape[2]);
-              inertia_ellipsoid[2] =
-                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
+              MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia_ellipsoid);
               inertia = inertia_ellipsoid;
             } else if (tri_flag && tri[i] >= 0) {
               quat = avec_tri->bonus[tri[i]].quat;
@@ -925,12 +918,7 @@ void FixMove::initial_integrate(int /*vflag*/)
             if (ellipsoid_flag && ellipsoid[i] >= 0) {
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
               shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
-              inertia_ellipsoid[0] =
-                  INERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
-              inertia_ellipsoid[1] =
-                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[2] * shape[2]);
-              inertia_ellipsoid[2] =
-                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
+              MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia_ellipsoid);
               inertia = inertia_ellipsoid;
             } else if (tri_flag && tri[i] >= 0) {
               quat = avec_tri->bonus[tri[i]].quat;
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 9c81e29e9ed..583c8ada403 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -473,24 +473,19 @@ void quat_to_mat_trans(const double *quat, double mat[3][3])
 }
 
 /* ----------------------------------------------------------------------
-   compute space-frame inertia tensor of an ellipsoid
-   radii = 3 radii of ellipsoid
+   compute principal-frame inertia tensor of an ellipsoid
+   shape = 3 radii of ellipsoid
    quat = orientiation quaternion of ellipsoid
-   return symmetric inertia tensor as 6-vector in Voigt ordering
+   block = blockiness exponents of super-ellipsoid
+   return principal inertia tensor diagonal as 3-vector
 ------------------------------------------------------------------------- */
 
-void inertia_ellipsoid(double *radii, double *quat, double mass,
-                       double *inertia, double *block, bool flag_super)
+void inertia_ellipsoid_principal(double *shape, double mass, double *inertia,
+                                 double *block, bool flag_super)
 {
-  double p[3][3],ptrans[3][3],itemp[3][3],tensor[3][3];
-  double idiag[3];
-  double rsq0 = radii[0] * radii[0];
-  double rsq1 = radii[1] * radii[1];
-  double rsq2 = radii[2] * radii[2];
-
-  quat_to_mat(quat,p);
-  quat_to_mat_trans(quat,ptrans);
-
+  double rsq0 = shape[0] * shape[0];
+  double rsq1 = shape[1] * shape[1];
+  double rsq2 = shape[2] * shape[2];
   if (flag_super) {
     // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
@@ -502,16 +497,35 @@ void inertia_ellipsoid(double *radii, double *quat, double mass,
                        MathSpecial::beta(1.5 * e2, 0.5 * e2);
     double m2 = rsq2 * MathSpecial::beta(1.5 * e1, 1 + e1) *
                        MathSpecial::beta(0.5 * e2, 0.5 * e2);
-    idiag[0] = dens * (m1 + m2);
-    idiag[1] = dens * (m0 + m2);
-    idiag[2] = dens * (m0 + m1);
+    inertia[0] = dens * (m1 + m2);
+    inertia[1] = dens * (m0 + m2);
+    inertia[2] = dens * (m0 + m1);
   }
   else {
-    idiag[0] = 0.2*mass * (rsq1 + rsq2);
-    idiag[1] = 0.2*mass * (rsq0 + rsq2);
-    idiag[2] = 0.2*mass * (rsq0 + rsq1);
+    double dens = 0.2 * mass;
+    inertia[0] = dens * (rsq1 + rsq2);
+    inertia[1] = dens * (rsq0 + rsq2);
+    inertia[2] = dens * (rsq0 + rsq1);
   }
+}
 
+/* ----------------------------------------------------------------------
+   compute space-frame inertia tensor of an ellipsoid
+   shape = 3 radii of ellipsoid
+   quat = orientiation quaternion of ellipsoid
+   block = blockiness exponents of super-ellipsoid
+   return symmetric inertia tensor as 6-vector in Voigt ordering
+------------------------------------------------------------------------- */
+
+void inertia_ellipsoid(double *shape, double *quat, double mass,
+                       double *inertia, double *block, bool flag_super)
+{
+  double p[3][3],ptrans[3][3],itemp[3][3],tensor[3][3];
+  double idiag[3];
+
+  quat_to_mat(quat,p);
+  quat_to_mat_trans(quat,ptrans);
+  inertia_ellipsoid_principal(shape, mass, idiag, block, flag_super);
   diag_times3(idiag,ptrans,itemp);
   times3(p,itemp,tensor);
   inertia[0] = tensor[0][0];
diff --git a/src/math_extra.h b/src/math_extra.h
index 35eb464f321..8f512e62925 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -116,7 +116,8 @@ void BuildRyMatrix(double R[3][3], const double angle);
 void BuildRzMatrix(double R[3][3], const double angle);
 
 // moment of inertia operations
-
+void inertia_ellipsoid_principal(double *shape, double mass, double *inertia,
+                                 double *block = nullptr, bool flag_super = false);
 void inertia_ellipsoid(double *shape, double *quat, double mass, double *inertia,
                        double *block = nullptr, bool flag_super = false);
 void inertia_line(double length, double theta, double mass, double *inertia);

From f179e37b1f8808e891bf6799e1cf4a2831c6fc73 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Mon, 22 Jan 2024 12:29:57 -0700
Subject: [PATCH 014/174] Add principal moments of inertia to bonus struct to
 avoid recomputing at every step.

Add moments of inertia calculation to set_shape, set_block and set_density for ellipsoids

Move volume calculation to MathExtra
---
 src/ASPHERE/compute_erotate_asphere.cpp | 10 ++--
 src/ASPHERE/compute_temp_asphere.cpp    | 26 ++++++----
 src/ASPHERE/fix_nh_asphere.cpp          |  7 +--
 src/ASPHERE/fix_nve_asphere.cpp         |  8 ++-
 src/ASPHERE/fix_nve_asphere_noforce.cpp |  9 ++--
 src/CG-DNA/fix_nve_dot.cpp              |  7 +--
 src/CG-DNA/fix_nve_dotc_langevin.cpp    |  7 +--
 src/GPU/fix_nve_asphere_gpu.cpp         | 16 +++---
 src/INTEL/fix_nve_asphere_intel.cpp     | 22 ++++----
 src/OPENMP/fix_nh_asphere_omp.cpp       |  7 +--
 src/OPENMP/fix_rigid_nh_omp.cpp         | 22 ++++----
 src/OPENMP/fix_rigid_omp.cpp            | 22 ++++----
 src/OPENMP/fix_rigid_small_omp.cpp      | 21 ++++----
 src/RIGID/fix_rigid.cpp                 | 34 ++++++-------
 src/RIGID/fix_rigid_small.cpp           | 33 ++++++------
 src/SRD/fix_srd.cpp                     | 12 ++---
 src/atom_vec_ellipsoid.cpp              | 67 +++++++++++++++----------
 src/atom_vec_ellipsoid.h                |  2 +-
 src/fix_langevin.cpp                    |  6 +--
 src/fix_move.cpp                        |  8 ++-
 src/math_extra.cpp                      | 65 ++++++++++++++++--------
 src/math_extra.h                        |  8 +--
 22 files changed, 220 insertions(+), 199 deletions(-)

diff --git a/src/ASPHERE/compute_erotate_asphere.cpp b/src/ASPHERE/compute_erotate_asphere.cpp
index 91c6d654cfd..16c294e0e4a 100644
--- a/src/ASPHERE/compute_erotate_asphere.cpp
+++ b/src/ASPHERE/compute_erotate_asphere.cpp
@@ -98,7 +98,6 @@ double ComputeERotateAsphere::compute_scalar()
   // no point particles since divide by inertia
 
   double length;
-  double *shape, *quat;
   double wbody[3], inertia[3];
   double rot[3][3];
   double erotate = 0.0;
@@ -106,15 +105,16 @@ double ComputeERotateAsphere::compute_scalar()
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
       if (ellipsoid && ebonus && (ellipsoid[i] >= 0)) {
-        shape = ebonus[ellipsoid[i]].shape;
-        quat = ebonus[ellipsoid[i]].quat;
 
         // principal moments of inertia
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
+
+        inertia[0] = ebonus[ellipsoid[i]].inertia[0];
+        inertia[1] = ebonus[ellipsoid[i]].inertia[1];
+        inertia[2] = ebonus[ellipsoid[i]].inertia[2];
 
         // wbody = angular velocity in body frame
 
-        MathExtra::quat_to_mat(quat,rot);
+        MathExtra::quat_to_mat(ebonus[ellipsoid[i]].quat,rot);
         MathExtra::transpose_matvec(rot,angmom[i],wbody);
         wbody[0] /= inertia[0];
         wbody[1] /= inertia[1];
diff --git a/src/ASPHERE/compute_temp_asphere.cpp b/src/ASPHERE/compute_temp_asphere.cpp
index ac4d3c5f53e..ff4bb8178fe 100644
--- a/src/ASPHERE/compute_temp_asphere.cpp
+++ b/src/ASPHERE/compute_temp_asphere.cpp
@@ -202,7 +202,7 @@ double ComputeTempAsphere::compute_scalar()
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
-  double *shape,*quat;
+  double *quat;
   double wbody[3],inertia[3];
   double rot[3][3];
 
@@ -218,10 +218,11 @@ double ComputeTempAsphere::compute_scalar()
 
         // principal moments of inertia
 
-        shape = bonus[ellipsoid[i]].shape;
         quat = bonus[ellipsoid[i]].quat;
 
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
+        inertia[0] = bonus[ellipsoid[i]].inertia[0];
+        inertia[1] = bonus[ellipsoid[i]].inertia[1];
+        inertia[2] = bonus[ellipsoid[i]].inertia[2];
 
         // wbody = angular velocity in body frame
 
@@ -241,10 +242,11 @@ double ComputeTempAsphere::compute_scalar()
 
         // principal moments of inertia
 
-        shape = bonus[ellipsoid[i]].shape;
         quat = bonus[ellipsoid[i]].quat;
 
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
+        inertia[0] = bonus[ellipsoid[i]].inertia[0];
+        inertia[1] = bonus[ellipsoid[i]].inertia[1];
+        inertia[2] = bonus[ellipsoid[i]].inertia[2];
 
         // wbody = angular velocity in body frame
 
@@ -290,7 +292,7 @@ void ComputeTempAsphere::compute_vector()
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
-  double *shape,*quat;
+  double *quat;
   double wbody[3],inertia[3],t[6];
   double rot[3][3];
   double massone;
@@ -313,10 +315,11 @@ void ComputeTempAsphere::compute_vector()
 
         // principal moments of inertia
 
-        shape = bonus[ellipsoid[i]].shape;
-        quat = bonus[ellipsoid[i]].quat;
+        inertia[0] = bonus[ellipsoid[i]].inertia[0];
+        inertia[1] = bonus[ellipsoid[i]].inertia[1];
+        inertia[2] = bonus[ellipsoid[i]].inertia[2];
 
-        MathExtra::inertia_ellipsoid_principal(shape, massone, inertia);
+        quat = bonus[ellipsoid[i]].quat;
 
         // wbody = angular velocity in body frame
 
@@ -342,11 +345,12 @@ void ComputeTempAsphere::compute_vector()
 
         // principal moments of inertia
 
-        shape = bonus[ellipsoid[i]].shape;
         quat = bonus[ellipsoid[i]].quat;
         massone = rmass[i];
 
-        MathExtra::inertia_ellipsoid_principal(shape, massone, inertia);
+        inertia[0] = bonus[ellipsoid[i]].inertia[0];
+        inertia[1] = bonus[ellipsoid[i]].inertia[1];
+        inertia[2] = bonus[ellipsoid[i]].inertia[2];
 
         // wbody = angular velocity in body frame
 
diff --git a/src/ASPHERE/fix_nh_asphere.cpp b/src/ASPHERE/fix_nh_asphere.cpp
index 3f62eb61d96..d0a71d23ae8 100644
--- a/src/ASPHERE/fix_nh_asphere.cpp
+++ b/src/ASPHERE/fix_nh_asphere.cpp
@@ -106,19 +106,16 @@ void FixNHAsphere::nve_x()
   // returns new normalized quaternion
   // principal moments of inertia
 
-  double *shape,*quat;
-  double inertia[3];
+  double *inertia,*quat;
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
 
       // principal moments of inertia
 
-      shape = bonus[ellipsoid[i]].shape;
+      inertia = bonus[ellipsoid[i]].inertia;
       quat = bonus[ellipsoid[i]].quat;
 
-      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
-
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
       // returns new normalized quaternion
diff --git a/src/ASPHERE/fix_nve_asphere.cpp b/src/ASPHERE/fix_nve_asphere.cpp
index 13e9ea620fd..a57608ca10c 100644
--- a/src/ASPHERE/fix_nve_asphere.cpp
+++ b/src/ASPHERE/fix_nve_asphere.cpp
@@ -59,8 +59,8 @@ void FixNVEAsphere::init()
 void FixNVEAsphere::initial_integrate(int /*vflag*/)
 {
   double dtfm;
-  double inertia[3],omega[3];
-  double *shape,*quat;
+  double omega[3];
+  double *inertia,*quat;
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   int *ellipsoid = atom->ellipsoid;
@@ -96,11 +96,9 @@ void FixNVEAsphere::initial_integrate(int /*vflag*/)
 
       // principal moments of inertia
 
-      shape = bonus[ellipsoid[i]].shape;
+      inertia = bonus[ellipsoid[i]].inertia;
       quat = bonus[ellipsoid[i]].quat;
 
-      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
-
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
       // returns new normalized quaternion
diff --git a/src/ASPHERE/fix_nve_asphere_noforce.cpp b/src/ASPHERE/fix_nve_asphere_noforce.cpp
index 7d97eaad613..a09b5dc396e 100644
--- a/src/ASPHERE/fix_nve_asphere_noforce.cpp
+++ b/src/ASPHERE/fix_nve_asphere_noforce.cpp
@@ -66,14 +66,13 @@ void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
   double **x = atom->x;
   double **v = atom->v;
   double **angmom = atom->angmom;
-  double *rmass = atom->rmass;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
-  double *shape,*quat;
-  double inertia[3],omega[3];
+  double *inertia,*quat;
+  double omega[3];
 
   // update positions and quaternions for all particles
 
@@ -86,11 +85,9 @@ void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
 
       // principal moments of inertia
 
-      shape = bonus[ellipsoid[i]].shape;
+      inertia = bonus[ellipsoid[i]].inertia;
       quat = bonus[ellipsoid[i]].quat;
 
-      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
-
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
       // returns new normalized quaternion
diff --git a/src/CG-DNA/fix_nve_dot.cpp b/src/CG-DNA/fix_nve_dot.cpp
index 55aaf5f21f6..4f25ecf1bf0 100644
--- a/src/CG-DNA/fix_nve_dot.cpp
+++ b/src/CG-DNA/fix_nve_dot.cpp
@@ -58,7 +58,7 @@ void FixNVEDot::init()
 
 void FixNVEDot::initial_integrate(int /*vflag*/)
 {
-  double *shape,*quat;
+  double *quat;
   double fquat[4],conjqm[4],inertia[3];
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
@@ -83,7 +83,6 @@ void FixNVEDot::initial_integrate(int /*vflag*/)
 
       dthlfm = dthlf / rmass[i];
       quat = bonus[ellipsoid[i]].quat;
-      shape = bonus[ellipsoid[i]].shape;
 
       // update momentum by 1/2 step
       v[i][0] += dthlfm * f[i][0];
@@ -111,7 +110,9 @@ void FixNVEDot::initial_integrate(int /*vflag*/)
       conjqm[3] += dt * fquat[3];
 
       // principal moments of inertia
-      inertia_ellipsoid_principal(shape, rmass[i], inertia);
+      inertia[0] = bonus[ellipsoid[i]].inertia[0];
+      inertia[1] = bonus[ellipsoid[i]].inertia[1];
+      inertia[2] = bonus[ellipsoid[i]].inertia[2];
 
       // rotate quaternion and quaternion 4-momentum by full step
       no_squish_rotate(3,conjqm,quat,inertia,dthlf);
diff --git a/src/CG-DNA/fix_nve_dotc_langevin.cpp b/src/CG-DNA/fix_nve_dotc_langevin.cpp
index 1003930538a..7145af73885 100644
--- a/src/CG-DNA/fix_nve_dotc_langevin.cpp
+++ b/src/CG-DNA/fix_nve_dotc_langevin.cpp
@@ -127,7 +127,7 @@ void FixNVEDotcLangevin::compute_target()
 
 void FixNVEDotcLangevin::initial_integrate(int /*vflag*/)
 {
-  double *shape,*quat;
+  double *quat;
   double fquat[4],conjqm[4],inertia[3];
   double slq_conjqm[3];
 
@@ -158,7 +158,6 @@ void FixNVEDotcLangevin::initial_integrate(int /*vflag*/)
 
       dthlfm = dthlf / rmass[i];
       quat = bonus[ellipsoid[i]].quat;
-      shape = bonus[ellipsoid[i]].shape;
 
       // update momentum by 1/2 step
       v[i][0] += dthlfm * f[i][0];
@@ -186,7 +185,9 @@ void FixNVEDotcLangevin::initial_integrate(int /*vflag*/)
       conjqm[3] += dt * fquat[3];
 
       // principal moments of inertia
-      inertia_ellipsoid_principal(shape, rmass[i], inertia);
+      inertia[0] = bonus[ellipsoid[i]].inertia[0];
+      inertia[1] = bonus[ellipsoid[i]].inertia[1];
+      inertia[2] = bonus[ellipsoid[i]].inertia[2];
 
       M = inertia[0]*inertia[1]*inertia[2];
       M /= inertia[1]*inertia[2]+inertia[0]*inertia[2]+inertia[0]*inertia[1];
diff --git a/src/GPU/fix_nve_asphere_gpu.cpp b/src/GPU/fix_nve_asphere_gpu.cpp
index 9b75964c791..ecf8dbc8d25 100644
--- a/src/GPU/fix_nve_asphere_gpu.cpp
+++ b/src/GPU/fix_nve_asphere_gpu.cpp
@@ -35,8 +35,6 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
-
 #define ME_qnormalize(q)                                                \
 {                                                                       \
   double norm = 1.0 /                                                   \
@@ -384,14 +382,13 @@ double FixNVEAsphereGPU::reset_dt_omp(const int ifrom, const int ito,
       _dtfm[n++] = dtfir;
       _dtfm[n++] = dtfir;
       _dtfm[n++] = dtfir;
-      double *shape = bonus[ellipsoid[i]].shape;
-      double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+      double idot = bonus[ellipsoid[i]].inertia[0];
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia0[i] = idot;
-      idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+      idot = bonus[ellipsoid[i]].inertia[1];
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia1[i] = idot;
-      idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+      idot = bonus[ellipsoid[i]].inertia[2];
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia2[i] = idot;
     }
@@ -404,14 +401,13 @@ double FixNVEAsphereGPU::reset_dt_omp(const int ifrom, const int ito,
         _dtfm[n++] = dtfir;
         _dtfm[n++] = dtfir;
         _dtfm[n++] = dtfir;
-        double *shape = bonus[ellipsoid[i]].shape;
-        double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+        double idot = bonus[ellipsoid[i]].inertia[0];
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia0[i] = idot;
-        idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+        idot = bonus[ellipsoid[i]].inertia[1];
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia1[i] = idot;
-        idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+        idot = bonus[ellipsoid[i]].inertia[2];
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia2[i] = idot;
       } else {
diff --git a/src/INTEL/fix_nve_asphere_intel.cpp b/src/INTEL/fix_nve_asphere_intel.cpp
index 848afa20cca..249ea0b6b43 100644
--- a/src/INTEL/fix_nve_asphere_intel.cpp
+++ b/src/INTEL/fix_nve_asphere_intel.cpp
@@ -30,8 +30,6 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
-
 /* ---------------------------------------------------------------------- */
 
 FixNVEAsphereIntel::FixNVEAsphereIntel(LAMMPS *lmp, int narg, char **arg) :
@@ -202,14 +200,19 @@ void FixNVEAsphereIntel::reset_dt() {
       _dtfm[n++] = dtf / rmass[i];
       _dtfm[n++] = dtf / rmass[i];
       _dtfm[n++] = dtf / rmass[i];
-      double *shape = bonus[ellipsoid[i]].shape;
-      double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+      if (inertia[0] != 0.0) inertia[0] = 1.0 / inertia[0];
+      _inertia0[i] = inertia[0];
+      if (inertia[1] != 0.0) inertia[1] = 1.0 / inertia[1];
+      _inertia1[i] = inertia[1];
+      if (inertia[2] != 0.0) inertia[2] = 1.0 / inertia[2];
+      _inertia2[i] = inertia[2];
+      double idot = bonus[ellipsoid[i]].inertia[0];
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia0[i] = idot;
-      idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+      idot = bonus[ellipsoid[i]].inertia[1];
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia1[i] = idot;
-      idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+      idot = bonus[ellipsoid[i]].inertia[2];
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia2[i] = idot;
     }
@@ -221,14 +224,13 @@ void FixNVEAsphereIntel::reset_dt() {
         _dtfm[n++] = dtf / rmass[i];
         _dtfm[n++] = dtf / rmass[i];
         _dtfm[n++] = dtf / rmass[i];
-        double *shape = bonus[ellipsoid[i]].shape;
-        double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+        double idot = bonus[ellipsoid[i]].inertia[0];
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia0[i] = idot;
-        idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+        idot = bonus[ellipsoid[i]].inertia[1];
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia1[i] = idot;
-        idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+        idot = bonus[ellipsoid[i]].inertia[2];
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia2[i] = idot;
       } else {
diff --git a/src/OPENMP/fix_nh_asphere_omp.cpp b/src/OPENMP/fix_nh_asphere_omp.cpp
index 29d680cb074..ff123b0b292 100644
--- a/src/OPENMP/fix_nh_asphere_omp.cpp
+++ b/src/OPENMP/fix_nh_asphere_omp.cpp
@@ -105,7 +105,6 @@ void FixNHAsphereOMP::nve_x()
   auto * _noalias const x = (dbl3_t *) atom->x[0];
   const auto * _noalias const v = (dbl3_t *) atom->v[0];
   auto * _noalias const angmom = (dbl3_t *) atom->angmom[0];
-  const double * _noalias const rmass = atom->rmass;
   const int * _noalias const mask = atom->mask;
   AtomVecEllipsoid::Bonus * _noalias const bonus = avec->bonus;
   const int * _noalias const ellipsoid = atom->ellipsoid;
@@ -125,7 +124,7 @@ void FixNHAsphereOMP::nve_x()
 #endif
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
-      double omega[3], inertia[3];
+      double omega[3];
 
       x[i].x += dtv * v[i].x;
       x[i].y += dtv * v[i].y;
@@ -133,11 +132,9 @@ void FixNHAsphereOMP::nve_x()
 
       // principal moments of inertia
 
-      double * const shape = bonus[ellipsoid[i]].shape;
+      double * const inertia = bonus[ellipsoid[i]].inertia;
       double * const quat = bonus[ellipsoid[i]].quat;
 
-      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
-
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
       // returns new normalized quaternion
diff --git a/src/OPENMP/fix_rigid_nh_omp.cpp b/src/OPENMP/fix_rigid_nh_omp.cpp
index 90b38ec3a15..dbf0b26a972 100644
--- a/src/OPENMP/fix_rigid_nh_omp.cpp
+++ b/src/OPENMP/fix_rigid_nh_omp.cpp
@@ -772,9 +772,9 @@ void FixRigidNHOMP::set_xv_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
     double theta_body,theta;
-    double ione[3],exone[3],eyone[3],ezone[3],p[3][3];
+    double exone[3],eyone[3],ezone[3],p[3][3];
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -798,14 +798,13 @@ void FixRigidNHOMP::set_xv_thr()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
-                                   angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
+                                   inertiaatom,angmom_one[i]);
       } else if (eflags[i] & LINE) {
         if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
         else theta_body = -2.0*acos(quat[ibody][0]);
@@ -961,8 +960,8 @@ void FixRigidNHOMP::set_v_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double *shape,*quatatom,*inertiaatom;
-    double ione[3],exone[3],eyone[3],ezone[3];
+    double *quatatom,*inertiaatom;
+    double exone[3],eyone[3],ezone[3];
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -982,12 +981,11 @@ void FixRigidNHOMP::set_v_thr()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
-                                   angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
+                                   inertiaatom,angmom_one[i]);
       } else if (eflags[i] & LINE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
diff --git a/src/OPENMP/fix_rigid_omp.cpp b/src/OPENMP/fix_rigid_omp.cpp
index 3bfe51ddc0c..a510a1fb6fc 100644
--- a/src/OPENMP/fix_rigid_omp.cpp
+++ b/src/OPENMP/fix_rigid_omp.cpp
@@ -503,9 +503,9 @@ void FixRigidOMP::set_xv_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
     double theta_body,theta;
-    double ione[3],exone[3],eyone[3],ezone[3],p[3][3];
+    double exone[3],eyone[3],ezone[3],p[3][3];
 
     AtomVecEllipsoid::Bonus *ebonus = nullptr;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -529,14 +529,13 @@ void FixRigidOMP::set_xv_thr()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
-                                   angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
+                                   inertiaatom,angmom_one[i]);
       } else if (eflags[i] & LINE) {
         if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
         else theta_body = -2.0*acos(quat[ibody][0]);
@@ -692,8 +691,8 @@ void FixRigidOMP::set_v_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double *shape,*quatatom,*inertiaatom;
-    double ione[3],exone[3],eyone[3],ezone[3];
+    double *quatatom,*inertiaatom;
+    double exone[3],eyone[3],ezone[3];
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -713,12 +712,11 @@ void FixRigidOMP::set_v_thr()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
-                                   angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
+                                   inertiaatom,angmom_one[i]);
       } else if (eflags[i] & LINE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
diff --git a/src/OPENMP/fix_rigid_small_omp.cpp b/src/OPENMP/fix_rigid_small_omp.cpp
index 2111e3baf56..d4f61af259e 100644
--- a/src/OPENMP/fix_rigid_small_omp.cpp
+++ b/src/OPENMP/fix_rigid_small_omp.cpp
@@ -435,9 +435,9 @@ void FixRigidSmallOMP::set_xv_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double ione[3],exone[3],eyone[3],ezone[3],p[3][3];
+    double exone[3],eyone[3],ezone[3],p[3][3];
     double theta_body,theta;
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -461,13 +461,13 @@ void FixRigidSmallOMP::set_xv_thr()
         omega[i][1] = b.omega[1];
         omega[i][2] = b.omega[2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(b.quat,orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,ione,angmom[i]);
+        MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,
+                                   inertiaatom,angmom[i]);
       } else if (eflags[i] & LINE) {
         if (b.quat[3] >= 0.0) theta_body = 2.0*acos(b.quat[0]);
         else theta_body = -2.0*acos(b.quat[0]);
@@ -620,8 +620,8 @@ void FixRigidSmallOMP::set_v_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double ione[3],exone[3],eyone[3],ezone[3];
-    double *shape,*quatatom,*inertiaatom;
+    double exone[3],eyone[3],ezone[3];
+    double *quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -641,12 +641,11 @@ void FixRigidSmallOMP::set_v_thr()
         omega[i][1] = b.omega[1];
         omega[i][2] = b.omega[2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,ione,
-                                   angmom[i]);
+        MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,
+                                   inertiaatom,angmom[i]);
       } else if (eflags[i] & LINE) {
         omega[i][0] = b.omega[0];
         omega[i][1] = b.omega[1];
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index 5ad85160e8a..0028d337099 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -1299,7 +1299,7 @@ void FixRigid::set_xv()
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
   double xy,xz,yz;
-  double ione[3],exone[3],eyone[3],ezone[3],vr[6],p[3][3];
+  double exone[3],eyone[3],ezone[3],vr[6],p[3][3];
 
   double **x = atom->x;
   double **v = atom->v;
@@ -1404,7 +1404,7 @@ void FixRigid::set_xv()
 
   if (extended) {
     double theta_body,theta;
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus = nullptr;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -1428,14 +1428,13 @@ void FixRigid::set_xv()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
-                                   angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
+                                   inertiaatom,angmom_one[i]);
       } else if (eflags[i] & LINE) {
         if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
         else theta_body = -2.0*acos(quat[ibody][0]);
@@ -1480,7 +1479,7 @@ void FixRigid::set_v()
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
   double xy,xz,yz;
-  double ione[3],exone[3],eyone[3],ezone[3],delta[3],vr[6];
+  double exone[3],eyone[3],ezone[3],delta[3],vr[6];
 
   double **x = atom->x;
   double **v = atom->v;
@@ -1566,7 +1565,7 @@ void FixRigid::set_v()
   // set omega, angmom of each extended particle
 
   if (extended) {
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -1586,12 +1585,11 @@ void FixRigid::set_v()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
-                                   angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
+                                   inertiaatom,angmom_one[i]);
       } else if (eflags[i] & LINE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
@@ -1843,7 +1841,7 @@ void FixRigid::setup_bodies_static()
 
   if (extended) {
     double ivec[6];
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
     double length,theta;
 
     for (i = 0; i < nlocal; i++) {
@@ -1857,9 +1855,9 @@ void FixRigid::setup_bodies_static()
         sum[ibody][1] += SINERTIA*massone * radius[i]*radius[i];
         sum[ibody][2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid(shape,quatatom,massone,ivec);
+        MathExtra::inertia_ellipsoid(inertiaatom,quatatom,massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
@@ -2068,7 +2066,7 @@ void FixRigid::setup_bodies_static()
 
   if (extended) {
     double ivec[6];
-    double *shape,*inertiaatom;
+    double *inertiaatom;
     double length;
 
     for (i = 0; i < nlocal; i++) {
@@ -2082,8 +2080,8 @@ void FixRigid::setup_bodies_static()
         sum[ibody][1] += SINERTIA*massone * radius[i]*radius[i];
         sum[ibody][2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
-        MathExtra::inertia_ellipsoid(shape,orient[i],massone,ivec);
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        MathExtra::inertia_ellipsoid(inertiaatom,orient[i],massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index 0ed78fcc2db..4b087eb77f4 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -1192,7 +1192,7 @@ void FixRigidSmall::set_xv()
 {
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
-  double ione[3],exone[3],eyone[3],ezone[3],vr[6],p[3][3];
+  double exone[3],eyone[3],ezone[3],vr[6],p[3][3];
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
@@ -1306,7 +1306,7 @@ void FixRigidSmall::set_xv()
 
   if (extended) {
     double theta_body,theta;
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -1330,13 +1330,13 @@ void FixRigidSmall::set_xv()
         omega[i][1] = b->omega[1];
         omega[i][2] = b->omega[2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(b->quat,orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,ione,angmom[i]);
+        MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,
+                                   inertiaatom,angmom[i]);
       } else if (eflags[i] & LINE) {
         if (b->quat[3] >= 0.0) theta_body = 2.0*acos(b->quat[0]);
         else theta_body = -2.0*acos(b->quat[0]);
@@ -1380,7 +1380,7 @@ void FixRigidSmall::set_v()
 {
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
-  double ione[3],exone[3],eyone[3],ezone[3],delta[3],vr[6];
+  double exone[3],eyone[3],ezone[3],delta[3],vr[6];
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
@@ -1465,7 +1465,7 @@ void FixRigidSmall::set_v()
   // set omega, angmom of each extended particle
 
   if (extended) {
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -1485,12 +1485,11 @@ void FixRigidSmall::set_v()
         omega[i][1] = b->omega[1];
         omega[i][2] = b->omega[2];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid_principal(shape, rmass[i], ione);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,ione,
-                                   angmom[i]);
+        MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,
+                                   inertiaatom,angmom[i]);
       } else if (eflags[i] & LINE) {
         omega[i][0] = b->omega[0];
         omega[i][1] = b->omega[1];
@@ -1989,7 +1988,7 @@ void FixRigidSmall::setup_bodies_static()
 
   if (extended) {
     double ivec[6];
-    double *shape,*quatatom,*inertiaatom;
+    double *quatatom,*inertiaatom;
     double length,theta;
 
     for (i = 0; i < nlocal; i++) {
@@ -2004,9 +2003,9 @@ void FixRigidSmall::setup_bodies_static()
         inertia[1] += SINERTIA*massone * radius[i]*radius[i];
         inertia[2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid(shape,quatatom,massone,ivec);
+        MathExtra::inertia_ellipsoid(inertiaatom,quatatom,massone,ivec);
         inertia[0] += ivec[0];
         inertia[1] += ivec[1];
         inertia[2] += ivec[2];
@@ -2226,7 +2225,7 @@ void FixRigidSmall::setup_bodies_static()
 
   if (extended) {
     double ivec[6];
-    double *shape,*inertiaatom;
+    double *inertiaatom;
     double length;
 
     for (i = 0; i < nlocal; i++) {
@@ -2241,8 +2240,8 @@ void FixRigidSmall::setup_bodies_static()
         inertia[1] += SINERTIA*massone * radius[i]*radius[i];
         inertia[2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
-        shape = ebonus[ellipsoid[i]].shape;
-        MathExtra::inertia_ellipsoid(shape,orient[i],massone,ivec);
+        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        MathExtra::inertia_ellipsoid(inertiaatom,orient[i],massone,ivec);
         inertia[0] += ivec[0];
         inertia[1] += ivec[1];
         inertia[2] += ivec[2];
diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp
index 8c52bf70826..24501adaa44 100644
--- a/src/SRD/fix_srd.cpp
+++ b/src/SRD/fix_srd.cpp
@@ -2653,10 +2653,10 @@ void FixSRD::parameterize()
       if (mask[i] & biggroupbit) {
         if (radius && radius[i] > 0.0) {
           double r = radfactor * radius[i];
-          volbig += 4.0 / 3.0 * MY_PI * r * r * r;
+          volbig += MY_4PI3 * r * r * r;
         } else if (ellipsoid && ellipsoid[i] >= 0) {
           double *shape = ebonus[ellipsoid[i]].shape;
-          volbig += 4.0 / 3.0 * MY_PI * shape[0] * shape[1] * shape[2] * radfactor * radfactor *
+          volbig += MathExtra::volume_ellipsoid(shape) * radfactor * radfactor *
               radfactor;
         } else if (tri && tri[i] >= 0) {
           double *c1 = tbonus[tri[i]].c1;
@@ -2935,8 +2935,7 @@ void FixSRD::big_static()
 void FixSRD::big_dynamic()
 {
   int i;
-  double *shape, *quat, *inertia;
-  double inertiaone[3];
+  double *quat, *inertia;
 
   AtomVecEllipsoid::Bonus *ebonus;
   if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -2969,9 +2968,8 @@ void FixSRD::big_dynamic()
     } else if (biglist[k].type == ELLIPSOID) {
       quat = ebonus[ellipsoid[i]].quat;
       MathExtra::q_to_exyz(quat, biglist[k].ex, biglist[k].ey, biglist[k].ez);
-      shape = ebonus[ellipsoid[i]].shape;
-      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertiaone);
-      MathExtra::angmom_to_omega(angmom[i], biglist[k].ex, biglist[k].ey, biglist[k].ez, inertiaone,
+      inertia = ebonus[ellipsoid[i]].inertia;
+      MathExtra::angmom_to_omega(angmom[i], biglist[k].ex, biglist[k].ey, biglist[k].ez, inertia,
                                  biglist[k].omega);
 
       // line
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 5ce06481080..3fae19c3f07 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -30,7 +30,6 @@
 #include <cstring>
 
 using namespace LAMMPS_NS;
-using MathConst::MY_4PI3;
 
 #define EPSBLOCK2 1.0e-3
 
@@ -44,8 +43,8 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
   bonus_flag = 1;
 
   size_forward_bonus = 4;
-  size_border_bonus = 10;
-  size_restart_bonus_one = 10;
+  size_border_bonus = 13;
+  size_restart_bonus_one = 13;
   size_data_bonus = 10;
 
   atom->ellipsoid_flag = 1;
@@ -196,8 +195,7 @@ void AtomVecEllipsoid::unpack_comm_bonus(int n, int first, double *buf)
 int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
 {
   int i, j, m;
-  double *shape, *quat;
-  double *block;
+  double *shape, *quat, *block, *inertia;
 
   m = 0;
   for (i = 0; i < n; i++) {
@@ -209,6 +207,7 @@ int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
       shape = bonus[ellipsoid[j]].shape;
       quat = bonus[ellipsoid[j]].quat;
       block = bonus[ellipsoid[j]].block;
+      inertia = bonus[ellipsoid[j]].inertia;
       buf[m++] = shape[0];
       buf[m++] = shape[1];
       buf[m++] = shape[2];
@@ -218,6 +217,9 @@ int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
       buf[m++] = quat[3];
       buf[m++] = block[0];
       buf[m++] = block[1];
+      buf[m++] = inertia[0];
+      buf[m++] = inertia[1];
+      buf[m++] = inertia[2];
     }
   }
 
@@ -229,8 +231,7 @@ int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
 int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
 {
   int i, j, m, last;
-  double *shape, *quat;
-  double *block;
+  double *shape, *quat, *block, *inertia;
   bool flag_super;
 
   m = 0;
@@ -254,6 +255,9 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       block[0] = buf[m++];
       block[1] = buf[m++];
       block[2] = block[0] / block[1];
+      inertia[0] = buf[m++];
+      inertia[1] = buf[m++];
+      inertia[2] = buf[m++];
       flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
       bonus[j].flag_super = flag_super;
       bonus[j].ilocal = i;
@@ -283,6 +287,7 @@ int AtomVecEllipsoid::pack_exchange_bonus(int i, double *buf)
     double *shape = bonus[j].shape;
     double *quat = bonus[j].quat;
     double *block = bonus[j].block;
+    double *inertia = bonus[j].inertia;
     buf[m++] = shape[0];
     buf[m++] = shape[1];
     buf[m++] = shape[2];
@@ -292,6 +297,9 @@ int AtomVecEllipsoid::pack_exchange_bonus(int i, double *buf)
     buf[m++] = quat[3];
     buf[m++] = block[0];
     buf[m++] = block[1];
+    buf[m++] = inertia[0];
+    buf[m++] = inertia[1];
+    buf[m++] = inertia[2];
   }
 
   return m;
@@ -310,6 +318,7 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     double *shape = bonus[nlocal_bonus].shape;
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
+    double *inertia = bonus[nlocal_bonus].inertia;
     bool &flag_super = bonus[nlocal_bonus].flag_super;
     shape[0] = buf[m++];
     shape[1] = buf[m++];
@@ -321,6 +330,9 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     block[0] = buf[m++];
     block[1] = buf[m++];
     block[2] = block[0] / block[1];
+    inertia[0] = buf[m++];
+    inertia[1] = buf[m++];
+    inertia[2] = buf[m++];
     flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
     bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = ilocal;
@@ -375,6 +387,9 @@ int AtomVecEllipsoid::pack_restart_bonus(int i, double *buf)
     buf[m++] = bonus[j].quat[3];
     buf[m++] = bonus[j].block[0];
     buf[m++] = bonus[j].block[1];
+    buf[m++] = bonus[j].inertia[0];
+    buf[m++] = bonus[j].inertia[1];
+    buf[m++] = bonus[j].inertia[2];
   }
 
   return m;
@@ -396,6 +411,7 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     double *shape = bonus[nlocal_bonus].shape;
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
+    double *inertia = bonus[nlocal_bonus].inertia;
     bool &flag_super = bonus[nlocal_bonus].flag_super;
     shape[0] = buf[m++];
     shape[1] = buf[m++];
@@ -407,6 +423,9 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     block[0] = buf[m++];
     block[1] = buf[m++];
     block[2] = block[0] / block[1];
+    inertia[0] = buf[m++];
+    inertia[1] = buf[m++];
+    inertia[2] = buf[m++];
     flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
     bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = ilocal;
@@ -454,12 +473,16 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
     block[1] = utils::numeric(FLERR, values[ivalue++], true, lmp);
     flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
   }
-  block[2] = block[0] / block[1]; // ASSUMES EVEN NUMBERS ONLY?
+  block[2] = block[0] / block[1];
 
   // reset ellipsoid mass
   // previously stored density in rmass
 
-  rmass[m] *= compute_volume(shape, block, flag_super);
+  rmass[m] *= MathExtra::volume_ellipsoid(shape, block, flag_super);
+
+  // Principal moments of inertia
+
+  MathExtra::inertia_ellipsoid_principal(shape, rmass[m], bonus[nlocal_bonus].inertia, block, flag_super);
 
   bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
   bonus[nlocal_bonus].ilocal = m;
@@ -531,7 +554,7 @@ void AtomVecEllipsoid::pack_data_pre(int ilocal)
     shape = bonus[ellipsoid_flag].shape;
     block = bonus[ellipsoid_flag].block;
     flag_super = bonus[ellipsoid_flag].flag_super;
-    rmass[ilocal] /= compute_volume(shape, block, flag_super);
+    rmass[ilocal] /= MathExtra::volume_ellipsoid(shape, block, flag_super);
   }
 }
 
@@ -685,6 +708,7 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     double *shape = bonus[nlocal_bonus].shape;
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
+    double *inertia = bonus[nlocal_bonus].inertia;
     bool &flag_super = bonus[nlocal_bonus].flag_super;
     shape[0] = shapex;
     shape[1] = shapey;
@@ -696,6 +720,7 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     block[0] = 2;
     block[1] = 2;
     flag_super = false;
+    MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
     bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = i;
     ellipsoid[i] = nlocal_bonus++;
@@ -706,10 +731,12 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
   } else {
     double *shape = bonus[ellipsoid[i]].shape;
     double *block = bonus[ellipsoid[i]].block;
+    double *inertia = bonus[nlocal_bonus].inertia;
     bool flag_super = bonus[ellipsoid[i]].flag_super;
     shape[0] = shapex;
     shape[1] = shapey;
     shape[2] = shapez;
+    MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
     bonus[ellipsoid[i]].radcirc = compute_radcirc(shape, block, flag_super);
   }
 }
@@ -727,6 +754,7 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     double *shape = bonus[nlocal_bonus].shape;
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
+    double *inertia = bonus[nlocal_bonus].inertia;
     bool &flag_super = bonus[nlocal_bonus].flag_super;
     shape[0] = 0.5;
     shape[1] = 0.5;
@@ -740,16 +768,19 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     quat[3] = 0.0;
     bonus[nlocal_bonus].ilocal = i;
     flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
+    MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
     bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     ellipsoid[i] = nlocal_bonus++;
   } else {
     double *shape = bonus[ellipsoid[i]].shape;
     double *block = bonus[ellipsoid[i]].block;
+    double *inertia = bonus[nlocal_bonus].inertia;
     bool &flag_super = bonus[ellipsoid[i]].flag_super;
     block[0] = blockn1;
     block[1] = blockn2;
     block[2] = blockn1 / blockn2;
     flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
+    MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
     bonus[ellipsoid[i]].radcirc = compute_radcirc(shape, block, flag_super);
   }
 }
@@ -798,20 +829,4 @@ double AtomVecEllipsoid::compute_radcirc(double *shape, double *block, bool flag
   */
 }
 
-/* ----------------------------------------------------------------------
-   compute the volume of the ellipsoid
-------------------------------------------------------------------------- */
 
-double AtomVecEllipsoid::compute_volume(double *shape, double *block, bool flag_super)
-{
-  double unitvol = MY_4PI3;
-
-  // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003, for p = q = r = 0
-
-  if (flag_super) {
-    double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    unitvol = e1 * e2 * MathSpecial::beta(0.5 * e1, 1.0 + e1) *
-                        MathSpecial::beta(0.5 * e2, 0.5 * e2);
-  }
-  return unitvol * shape[0] * shape[1] * shape[2];
-}
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index 37a08b09b29..6265c976a61 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -30,6 +30,7 @@ class AtomVecEllipsoid : virtual public AtomVec {
     double shape[3];
     double quat[4];
     double block[3];
+    double inertia[3];
     double radcirc;
     bool flag_super;
     int ilocal;
@@ -71,7 +72,6 @@ class AtomVecEllipsoid : virtual public AtomVec {
   void set_shape(int, double, double, double);
   void set_block(int, double, double);
   double compute_radcirc(double *, double *, bool);
-  double compute_volume(double *, double *, bool);
 
   int nlocal_bonus;
 
diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp
index f3736ceac2e..8c9c25dcfa8 100644
--- a/src/fix_langevin.cpp
+++ b/src/fix_langevin.cpp
@@ -611,13 +611,13 @@ void FixLangevin::angmom_thermostat()
   // gives correct rotational diffusivity behavior if (nearly) spherical
   // any value will be incorrect for rotational diffusivity if aspherical
 
-  double inertia[3],omega[3],tran[3];
-  double *shape,*quat;
+  double omega[3],tran[3];
+  double *shape,*quat,*inertia;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       shape = bonus[ellipsoid[i]].shape;
-      MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
+      inertia = bonus[ellipsoid[i]].inertia;
       quat = bonus[ellipsoid[i]].quat;
       MathExtra::mq_to_omega(angmom[i],quat,inertia,omega);
 
diff --git a/src/fix_move.cpp b/src/fix_move.cpp
index 70f09f0472d..df9810bc165 100644
--- a/src/fix_move.cpp
+++ b/src/fix_move.cpp
@@ -564,7 +564,7 @@ void FixMove::initial_integrate(int /*vflag*/)
   double ddotr, dx, dy, dz;
   double dtfm, theta_new;
   double xold[3], a[3], b[3], c[3], d[3], disp[3], w[3], ex[3], ey[3], ez[3];
-  double inertia_ellipsoid[3], qrotate[4];
+  double qrotate[4];
   double *quat, *inertia, *shape;
 
   double delta = (update->ntimestep - time_origin) * dt;
@@ -779,8 +779,7 @@ void FixMove::initial_integrate(int /*vflag*/)
             if (ellipsoid_flag && ellipsoid[i] >= 0) {
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
               shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
-              MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia_ellipsoid);
-              inertia = inertia_ellipsoid;
+              inertia = avec_ellipsoid->bonus[ellipsoid[i]].inertia;
             } else if (tri_flag && tri[i] >= 0) {
               quat = avec_tri->bonus[tri[i]].quat;
               inertia = avec_tri->bonus[tri[i]].inertia;
@@ -918,8 +917,7 @@ void FixMove::initial_integrate(int /*vflag*/)
             if (ellipsoid_flag && ellipsoid[i] >= 0) {
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
               shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
-              MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia_ellipsoid);
-              inertia = inertia_ellipsoid;
+              inertia = avec_ellipsoid->bonus[ellipsoid[i]].inertia;
             } else if (tri_flag && tri[i] >= 0) {
               quat = avec_tri->bonus[tri[i]].quat;
               inertia = avec_tri->bonus[tri[i]].inertia;
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 583c8ada403..28e22cdfd9b 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -18,10 +18,12 @@
 
 #include "math_extra.h"
 #include "math_special.h"
+#include "math_const.h"
 #include <cstdio>
 #include <cstring>
 
 using namespace LAMMPS_NS;
+using MathConst::MY_4PI3;
 
 namespace MathExtra {
 
@@ -473,14 +475,14 @@ void quat_to_mat_trans(const double *quat, double mat[3][3])
 }
 
 /* ----------------------------------------------------------------------
-   compute principal-frame inertia tensor of an ellipsoid
+   compute principal moments of inertia of an ellipsoid
    shape = 3 radii of ellipsoid
    quat = orientiation quaternion of ellipsoid
    block = blockiness exponents of super-ellipsoid
-   return principal inertia tensor diagonal as 3-vector
+   return principal moments of inertia as 3-vector
 ------------------------------------------------------------------------- */
 
-void inertia_ellipsoid_principal(double *shape, double mass, double *inertia,
+void inertia_ellipsoid_principal(double *shape, double mass, double *idiag,
                                  double *block, bool flag_super)
 {
   double rsq0 = shape[0] * shape[0];
@@ -489,23 +491,22 @@ void inertia_ellipsoid_principal(double *shape, double mass, double *inertia,
   if (flag_super) {
     // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    double dens = mass / (MathSpecial::beta(0.5 * e1, 1.0 + e1) *
-                          MathSpecial::beta(0.5 * e2, 0.5 * e2));
-    double m0 = rsq0 * MathSpecial::beta(0.5 * e1, 1 + 2 * e1) *
-                       MathSpecial::beta(0.5 * e2, 1.5 * e2);
-    double m1 = rsq1 * MathSpecial::beta(0.5 * e1, 1 + 2 * e1) *
-                       MathSpecial::beta(1.5 * e2, 0.5 * e2);
-    double m2 = rsq2 * MathSpecial::beta(1.5 * e1, 1 + e1) *
-                       MathSpecial::beta(0.5 * e2, 0.5 * e2);
-    inertia[0] = dens * (m1 + m2);
-    inertia[1] = dens * (m0 + m2);
-    inertia[2] = dens * (m0 + m1);
+    double beta_tmp1 = MathSpecial::beta(0.5 * e1, 1 + 2 * e1);
+    double beta_tmp2 = MathSpecial::beta(0.5 * e2, 0.5 * e2);
+    double beta_tmp3 = MathSpecial::beta(0.5 * e2, 1.5 * e2);
+    double dens = mass / (MathSpecial::beta(0.5 * e1, 1.0 + e1) * beta_tmp2);
+    double m0 = rsq0 * beta_tmp1 * beta_tmp3;
+    double m1 = rsq1 * beta_tmp1 * beta_tmp3;
+    double m2 = rsq2 * MathSpecial::beta(1.5 * e1, 1 + e1) * beta_tmp2;
+    idiag[0] = dens * (m1 + m2);
+    idiag[1] = dens * (m0 + m2);
+    idiag[2] = dens * (m0 + m1);
   }
   else {
     double dens = 0.2 * mass;
-    inertia[0] = dens * (rsq1 + rsq2);
-    inertia[1] = dens * (rsq0 + rsq2);
-    inertia[2] = dens * (rsq0 + rsq1);
+    idiag[0] = dens * (rsq1 + rsq2);
+    idiag[1] = dens * (rsq0 + rsq2);
+    idiag[2] = dens * (rsq0 + rsq1);
   }
 }
 
@@ -515,17 +516,18 @@ void inertia_ellipsoid_principal(double *shape, double mass, double *inertia,
    quat = orientiation quaternion of ellipsoid
    block = blockiness exponents of super-ellipsoid
    return symmetric inertia tensor as 6-vector in Voigt ordering
+
+   THIS IS EXACTLY THE SAME FUNCTION AS INERTIA_TRIANGLE
+   TAKES DIAG PRINCIPA INERTIA AND ROTATES IT. SHOULD WE CONSOLIDATE ???
 ------------------------------------------------------------------------- */
 
-void inertia_ellipsoid(double *shape, double *quat, double mass,
-                       double *inertia, double *block, bool flag_super)
+void inertia_ellipsoid(double *idiag, double *quat, double /*mass*/,
+                       double *inertia)
 {
   double p[3][3],ptrans[3][3],itemp[3][3],tensor[3][3];
-  double idiag[3];
 
   quat_to_mat(quat,p);
   quat_to_mat_trans(quat,ptrans);
-  inertia_ellipsoid_principal(shape, mass, idiag, block, flag_super);
   diag_times3(idiag,ptrans,itemp);
   times3(p,itemp,tensor);
   inertia[0] = tensor[0][0];
@@ -639,6 +641,27 @@ void inertia_triangle(double *idiag, double *quat, double /*mass*/,
   inertia[5] = tensor[0][1];
 }
 
+/* ----------------------------------------------------------------------
+   compute the volume of the ellipsoid
+   shape = 3 radii of ellipsoid
+   block = blockiness exponents of super-ellipsoid
+   return volume of the ellipsoid
+------------------------------------------------------------------------- */
+
+double volume_ellipsoid(double *shape, double *block, bool flag_super)
+{
+  double unitvol = MY_4PI3;
+
+  // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003, for p = q = r = 0
+
+  if (flag_super) {
+    double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
+    unitvol = e1 * e2 * MathSpecial::beta(0.5 * e1, 1.0 + e1) *
+                        MathSpecial::beta(0.5 * e2, 0.5 * e2);
+  }
+  return unitvol * shape[0] * shape[1] * shape[2];
+}
+
 /* ----------------------------------------------------------------------
    build rotation matrix for a small angle rotation around the X axis
 ------------------------------------------------------------------------- */
diff --git a/src/math_extra.h b/src/math_extra.h
index 8f512e62925..ad9800842e2 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -116,14 +116,16 @@ void BuildRyMatrix(double R[3][3], const double angle);
 void BuildRzMatrix(double R[3][3], const double angle);
 
 // moment of inertia operations
-void inertia_ellipsoid_principal(double *shape, double mass, double *inertia,
+void inertia_ellipsoid_principal(double *shape, double mass, double *idiag,
                                  double *block = nullptr, bool flag_super = false);
-void inertia_ellipsoid(double *shape, double *quat, double mass, double *inertia,
-                       double *block = nullptr, bool flag_super = false);
+void inertia_ellipsoid(double *idiag, double *quat, double mass, double *inertia);
 void inertia_line(double length, double theta, double mass, double *inertia);
 void inertia_triangle(double *v0, double *v1, double *v2, double mass, double *inertia);
 void inertia_triangle(double *idiag, double *quat, double mass, double *inertia);
 
+// volumes
+double volume_ellipsoid(double *shape, double *block = nullptr, bool flag_super = false);
+
 // triclinic bounding box of a sphere
 
 void tribbox(double *, double, double *);

From 6248e81731a36835dc254c09f9583df54658ca2e Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Mon, 22 Jan 2024 13:32:10 -0700
Subject: [PATCH 015/174] correct bug in inertia, forgotten 0.5 factor

---
 src/math_extra.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 28e22cdfd9b..85619c5b249 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -495,8 +495,8 @@ void inertia_ellipsoid_principal(double *shape, double mass, double *idiag,
     double beta_tmp2 = MathSpecial::beta(0.5 * e2, 0.5 * e2);
     double beta_tmp3 = MathSpecial::beta(0.5 * e2, 1.5 * e2);
     double dens = mass / (MathSpecial::beta(0.5 * e1, 1.0 + e1) * beta_tmp2);
-    double m0 = rsq0 * beta_tmp1 * beta_tmp3;
-    double m1 = rsq1 * beta_tmp1 * beta_tmp3;
+    double m0 = 0.5 * rsq0 * beta_tmp1 * beta_tmp3;
+    double m1 = 0.5 * rsq1 * beta_tmp1 * beta_tmp3;
     double m2 = rsq2 * MathSpecial::beta(1.5 * e1, 1 + e1) * beta_tmp2;
     idiag[0] = dens * (m1 + m2);
     idiag[1] = dens * (m0 + m2);

From 8ee761634eaf20ba475c13e806dd4114e2c34cf9 Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Mon, 22 Jan 2024 17:08:44 -0700
Subject: [PATCH 016/174] fix omitted pointer initialization

---
 src/atom_vec_ellipsoid.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 3fae19c3f07..9a9ea541668 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -245,6 +245,7 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       shape = bonus[j].shape;
       quat = bonus[j].quat;
       block = bonus[j].block;
+      inertia = bonus[j].inertia;
       shape[0] = buf[m++];
       shape[1] = buf[m++];
       shape[2] = buf[m++];

From 7b91db73e4ed927ccd4a379fc730b2ae6dbd1a2b Mon Sep 17 00:00:00 2001
From: jbcouli <jbcouli@sandia.gov>
Date: Mon, 29 Jan 2024 22:24:34 -0700
Subject: [PATCH 017/174] use radius pointer to store circumscribed radius of
 ellipsoid. Move radius calculation to MathExtra

---
 src/atom_vec_ellipsoid.cpp            | 79 ++++++---------------------
 src/atom_vec_ellipsoid.h              |  3 +-
 src/atom_vec_sphere.cpp               |  6 +-
 src/math_extra.cpp                    | 49 +++++++++++++++++
 src/math_extra.h                      |  3 +-
 unittest/formats/test_atom_styles.cpp | 16 ++++--
 6 files changed, 83 insertions(+), 73 deletions(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 9a9ea541668..bfcf1c3f50a 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -50,6 +50,9 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
   atom->ellipsoid_flag = 1;
   atom->rmass_flag = atom->angmom_flag = atom->torque_flag = 1;
 
+  // Circumscribed radius, not physical radius
+  atom->radius_flag = 1;
+
   nlocal_bonus = nghost_bonus = nmax_bonus = 0;
 
   // strings with peratom variables to include in each AtomVec method
@@ -57,15 +60,15 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
   // order of fields in a string does not matter
   // except: fields_data_atom & fields_data_vel must match data file
 
-  fields_grow = {"rmass", "angmom", "torque", "ellipsoid"};
-  fields_copy = {"rmass", "angmom"};
+  fields_grow = {"radius", "rmass", "angmom", "torque", "ellipsoid"};
+  fields_copy = {"radius", "rmass", "angmom"};
   fields_comm_vel = {"angmom"};
   fields_reverse = {"torque"};
-  fields_border = {"rmass"};
-  fields_border_vel = {"rmass", "angmom"};
-  fields_exchange = {"rmass", "angmom"};
-  fields_restart = {"rmass", "angmom"};
-  fields_create = {"rmass", "angmom", "ellipsoid"};
+  fields_border = {"radius", "rmass"};
+  fields_border_vel = {"radius", "rmass", "angmom"};
+  fields_exchange = {"radius", "rmass", "angmom"};
+  fields_restart = {"radius", "rmass", "angmom"};
+  fields_create = {"radius", "rmass", "angmom", "ellipsoid"};
   fields_data_atom = {"id", "type", "ellipsoid", "rmass", "x"};
   fields_data_vel = {"id", "v", "angmom"};
 
@@ -87,6 +90,7 @@ AtomVecEllipsoid::~AtomVecEllipsoid()
 void AtomVecEllipsoid::grow_pointers()
 {
   ellipsoid = atom->ellipsoid;
+  radius = atom->radius;
   rmass = atom->rmass;
   angmom = atom->angmom;
 }
@@ -262,7 +266,6 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
       bonus[j].flag_super = flag_super;
       bonus[j].ilocal = i;
-      bonus[j].radcirc = compute_radcirc(shape, block, flag_super);
       ellipsoid[i] = j;
       nghost_bonus++;
     }
@@ -335,7 +338,6 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     inertia[1] = buf[m++];
     inertia[2] = buf[m++];
     flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
-    bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = ilocal;
     ellipsoid[ilocal] = nlocal_bonus++;
   }
@@ -428,7 +430,6 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     inertia[1] = buf[m++];
     inertia[2] = buf[m++];
     flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
-    bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = ilocal;
     ellipsoid[ilocal] = nlocal_bonus++;
   }
@@ -485,7 +486,7 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
 
   MathExtra::inertia_ellipsoid_principal(shape, rmass[m], bonus[nlocal_bonus].inertia, block, flag_super);
 
-  bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
+  radius[m] = MathExtra::radius_ellipsoid(shape, block, flag_super);
   bonus[nlocal_bonus].ilocal = m;
   ellipsoid[m] = nlocal_bonus++;
 }
@@ -508,6 +509,7 @@ double AtomVecEllipsoid::memory_usage_bonus()
 void AtomVecEllipsoid::create_atom_post(int ilocal)
 {
   rmass[ilocal] = 1.0;
+  radius[ilocal] = 0.0;
   ellipsoid[ilocal] = -1;
 }
 
@@ -722,13 +724,14 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     block[1] = 2;
     flag_super = false;
     MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
-    bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
+    radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
     bonus[nlocal_bonus].ilocal = i;
     ellipsoid[i] = nlocal_bonus++;
   } else if (shapex == 0.0 && shapey == 0.0 && shapez == 0.0) {
     copy_bonus_all(nlocal_bonus - 1, ellipsoid[i]);
     nlocal_bonus--;
     ellipsoid[i] = -1;
+    radius[i] = 0.0;
   } else {
     double *shape = bonus[ellipsoid[i]].shape;
     double *block = bonus[ellipsoid[i]].block;
@@ -738,7 +741,7 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     shape[1] = shapey;
     shape[2] = shapez;
     MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
-    bonus[ellipsoid[i]].radcirc = compute_radcirc(shape, block, flag_super);
+    radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
   }
 }
 
@@ -770,7 +773,7 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     bonus[nlocal_bonus].ilocal = i;
     flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
     MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
-    bonus[nlocal_bonus].radcirc = compute_radcirc(shape, block, flag_super);
+    radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
     ellipsoid[i] = nlocal_bonus++;
   } else {
     double *shape = bonus[ellipsoid[i]].shape;
@@ -782,52 +785,6 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     block[2] = blockn1 / blockn2;
     flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
     MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
-    bonus[ellipsoid[i]].radcirc = compute_radcirc(shape, block, flag_super);
+    radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
   }
 }
-
-/* ----------------------------------------------------------------------
-   compute the circumscribed radius to the ellipsoid
-------------------------------------------------------------------------- */
-
-double AtomVecEllipsoid::compute_radcirc(double *shape, double *block, bool flag_super)
-{
-  if (!flag_super) return std::max(std::max(shape[0], shape[1]), shape[2]);
-
-  // Super ellipsoid
-  double a = shape[0], b = shape[1], c = shape[2];
-  double n1 = block[0], n2 = block[1], n1divn2 = block[2];
-  if (shape[0] < shape[1]) {a = shape[1]; b = shape[0];}
-
-  // Cylinder approximation for n2=2
-
-  if (std::fabs(n2 - 2.0) < EPSBLOCK2) return sqrt(a * a + c * c);
-
-  // Ellipsoid approximation for n1=2
-
-  if (std::fabs(n1 - 2.0) < EPSBLOCK2) return std::max(c, sqrt(a * a + b * b));
-
-  // Bounding box approximation when n1>2 and n2>2
-
-  return sqrt(a * a + b * b + c * c);
-
-  // General super-ellipsoid, Eq. (12) of Podlozhnyuk et al. 2017
-  // Not sure if exact solution worth it compared to boundig box diagonal
-  // If both blockiness exponents are greater than 2, the exact radius does not
-  // seem significantly smaller than the bounding box diagonal. At most sqrt(3)~ 70% too large
-  /*
-  double x, y, z, alpha, beta, gamma, xtilde;
-  double small = 0.1; // TO AVOID OVERFLOW IN POW
-
-  alpha = std::fabs(n2 - 2.0) > small ? std::pow(b / a, 2.0 / (n2 - 2.0)) : 0.0;
-  gamma = std::fabs(n1divn2 - 1.0) > small ? std::pow((1.0 + std::pow(alpha, n2)), n1divn2 - 1.0) : 1.0;
-  beta = std::pow(gamma * c * c / (a * a), 1.0 / std::max(n1 - 2.0, small));
-  xtilde = 1.0 / std::pow(std::pow(1.0 + std::pow(alpha, n2), n1divn2) + std::pow(beta, n1), 1.0 / n1);
-  x = a * xtilde;
-  y = alpha * b * xtilde;
-  z = beta * c * xtilde;
-  return sqrt(x * x + y * y + z * z);
-  */
-}
-
-
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index 6265c976a61..f8fa1342cbc 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -31,7 +31,6 @@ class AtomVecEllipsoid : virtual public AtomVec {
     double quat[4];
     double block[3];
     double inertia[3];
-    double radcirc;
     bool flag_super;
     int ilocal;
   };
@@ -77,7 +76,7 @@ class AtomVecEllipsoid : virtual public AtomVec {
 
  protected:
   int *ellipsoid;
-  double *rmass;
+  double *radius, *rmass;
   double **angmom;
   double **quat_hold;
 
diff --git a/src/atom_vec_sphere.cpp b/src/atom_vec_sphere.cpp
index 3c7be5d3ee4..6f670911100 100644
--- a/src/atom_vec_sphere.cpp
+++ b/src/atom_vec_sphere.cpp
@@ -109,7 +109,7 @@ void AtomVecSphere::grow_pointers()
 void AtomVecSphere::create_atom_post(int ilocal)
 {
   radius[ilocal] = 0.5;
-  rmass[ilocal] = 4.0 * MY_PI / 3.0 * 0.5 * 0.5 * 0.5;
+  rmass[ilocal] = MY_4PI3 * 0.5 * 0.5 * 0.5;
 }
 
 /* ----------------------------------------------------------------------
@@ -121,7 +121,7 @@ void AtomVecSphere::data_atom_post(int ilocal)
 {
   radius_one = 0.5 * atom->radius[ilocal];
   radius[ilocal] = radius_one;
-  if (radius_one > 0.0) rmass[ilocal] *= 4.0 * MY_PI / 3.0 * radius_one * radius_one * radius_one;
+  if (radius_one > 0.0) rmass[ilocal] *= MY_4PI3 * radius_one * radius_one * radius_one;
 
   if (rmass[ilocal] <= 0.0) error->one(FLERR, "Invalid density in Atoms section of data file");
 
@@ -141,7 +141,7 @@ void AtomVecSphere::pack_data_pre(int ilocal)
 
   radius[ilocal] *= 2.0;
   if (radius_one != 0.0)
-    rmass[ilocal] = rmass_one / (4.0 * MY_PI / 3.0 * radius_one * radius_one * radius_one);
+    rmass[ilocal] = rmass_one / (MY_4PI3 * radius_one * radius_one * radius_one);
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 85619c5b249..34fb5d74d60 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -19,6 +19,7 @@
 #include "math_extra.h"
 #include "math_special.h"
 #include "math_const.h"
+#include <algorithm>
 #include <cstdio>
 #include <cstring>
 
@@ -662,6 +663,54 @@ double volume_ellipsoid(double *shape, double *block, bool flag_super)
   return unitvol * shape[0] * shape[1] * shape[2];
 }
 
+
+/* ----------------------------------------------------------------------
+   compute the circumscribed radius to the ellipsoid
+   shape = 3 radii of ellipsoid
+   block = blockiness exponents of super-ellipsoid
+   return circumscribed radius of the ellipsoid
+------------------------------------------------------------------------- */
+
+double radius_ellipsoid(double *shape, double *block, bool flag_super)
+{
+  if (!flag_super) return std::max(std::max(shape[0], shape[1]), shape[2]);
+
+  // Super ellipsoid
+  double a = shape[0], b = shape[1], c = shape[2];
+  double n1 = block[0], n2 = block[1], n1divn2 = block[2];
+  if (shape[0] < shape[1]) {a = shape[1]; b = shape[0];}
+
+  // Cylinder approximation for n2=2
+
+  if (n2 < 2.01) return sqrt(a * a + c * c);
+
+  // Ellipsoid approximation for n1=2
+
+  if (n1 < 2.01) return std::max(c, sqrt(a * a + b * b));
+
+  // Bounding box approximation when n1>2 and n2>2
+
+  return sqrt(a * a + b * b + c * c);
+
+  // General super-ellipsoid, Eq. (12) of Podlozhnyuk et al. 2017
+  // Not sure if exact solution worth it compared to boundig box diagonal
+  // If both blockiness exponents are greater than 2, the exact radius does not
+  // seem significantly smaller than the bounding box diagonal. At most sqrt(3)~ 70% too large
+  /*
+  double x, y, z, alpha, beta, gamma, xtilde;
+  double small = 0.1; // TO AVOID OVERFLOW IN POW
+
+  alpha = std::fabs(n2 - 2.0) > small ? std::pow(b / a, 2.0 / (n2 - 2.0)) : 0.0;
+  gamma = std::fabs(n1divn2 - 1.0) > small ? std::pow((1.0 + std::pow(alpha, n2)), n1divn2 - 1.0) : 1.0;
+  beta = std::pow(gamma * c * c / (a * a), 1.0 / std::max(n1 - 2.0, small));
+  xtilde = 1.0 / std::pow(std::pow(1.0 + std::pow(alpha, n2), n1divn2) + std::pow(beta, n1), 1.0 / n1);
+  x = a * xtilde;
+  y = alpha * b * xtilde;
+  z = beta * c * xtilde;
+  return sqrt(x * x + y * y + z * z);
+  */
+}
+
 /* ----------------------------------------------------------------------
    build rotation matrix for a small angle rotation around the X axis
 ------------------------------------------------------------------------- */
diff --git a/src/math_extra.h b/src/math_extra.h
index ad9800842e2..90e21fbf0ac 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -123,8 +123,9 @@ void inertia_line(double length, double theta, double mass, double *inertia);
 void inertia_triangle(double *v0, double *v1, double *v2, double mass, double *inertia);
 void inertia_triangle(double *idiag, double *quat, double mass, double *inertia);
 
-// volumes
+// volumes and circumscribed radius
 double volume_ellipsoid(double *shape, double *block = nullptr, bool flag_super = false);
+double radius_ellipsoid(double *shape, double *block, bool flag_super);
 
 // triclinic bounding box of a sphere
 
diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp
index 2ced044c5dd..3bd68dbc797 100644
--- a/unittest/formats/test_atom_styles.cpp
+++ b/unittest/formats/test_atom_styles.cpp
@@ -1135,6 +1135,7 @@ TEST_F(AtomStyleTest, ellipsoid)
     expected.molecular      = Atom::ATOMIC;
     expected.tag_enable     = 1;
     expected.ellipsoid_flag = 1;
+    expected.radius_flag    = 1;
     expected.rmass_flag     = 1;
     expected.angmom_flag    = 1;
     expected.torque_flag    = 1;
@@ -1226,6 +1227,7 @@ TEST_F(AtomStyleTest, ellipsoid)
     auto *type      = lmp->atom->type;
     auto *ellipsoid = lmp->atom->ellipsoid;
     auto *rmass     = lmp->atom->rmass;
+    auto *radius    = lmp->atom->radius;
     auto *avec      = dynamic_cast<AtomVecEllipsoid *>(lmp->atom->avec);
     auto *bonus     = avec->bonus;
     EXPECT_NEAR(x[GETIDX(1)][0], -2.0, EPSILON);
@@ -1319,10 +1321,10 @@ TEST_F(AtomStyleTest, ellipsoid)
     EXPECT_NEAR(bonus[2].block[1], 2.0, EPSILON);
     EXPECT_NEAR(bonus[3].block[0], 2.0, EPSILON);
     EXPECT_NEAR(bonus[3].block[1], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[0].radcirc, 0.5, EPSILON);
-    EXPECT_NEAR(bonus[1].radcirc, 0.5, EPSILON);
-    EXPECT_NEAR(bonus[2].radcirc, 1.5, EPSILON);
-    EXPECT_NEAR(bonus[3].radcirc, 1.5, EPSILON);
+    EXPECT_NEAR(radius[GETIDX(1)], 0.5, EPSILON);
+    EXPECT_NEAR(radius[GETIDX(2)], 0.5, EPSILON);
+    EXPECT_NEAR(radius[GETIDX(3)], 1.5, EPSILON);
+    EXPECT_NEAR(radius[GETIDX(4)], 1.5, EPSILON);
     ASSERT_FALSE(bonus[0].flag_super);
     ASSERT_FALSE(bonus[1].flag_super);
     ASSERT_FALSE(bonus[2].flag_super);
@@ -1344,8 +1346,8 @@ TEST_F(AtomStyleTest, ellipsoid)
     EXPECT_NEAR(bonus[4].block[1], 8.0, EPSILON);
     EXPECT_NEAR(bonus[5].block[0], 4.0, EPSILON);
     EXPECT_NEAR(bonus[5].block[1], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[4].radcirc, 0.5*sqrt(3.0), EPSILON);
-    EXPECT_NEAR(bonus[5].radcirc, sqrt(4.64), EPSILON);
+    EXPECT_NEAR(radius[GETIDX(5)], 0.5*sqrt(3.0), EPSILON);
+    EXPECT_NEAR(radius[GETIDX(6)], sqrt(4.64), EPSILON);
     ASSERT_TRUE(bonus[4].flag_super);
     ASSERT_TRUE(bonus[5].flag_super);
     BEGIN_HIDE_OUTPUT();
@@ -4286,6 +4288,7 @@ TEST_F(AtomStyleTest, full_ellipsoid)
     expected.tag_enable     = 1;
     expected.molecule_flag  = 1;
     expected.ellipsoid_flag = 1;
+    expected.radius_flag    = 1;
     expected.q_flag         = 1;
     expected.rmass_flag     = 1;
     expected.torque_flag    = 1;
@@ -4941,6 +4944,7 @@ TEST_F(AtomStyleTest, oxdna)
     expected.tag_enable     = 1;
     expected.molecule_flag  = 1;
     expected.ellipsoid_flag = 1;
+    expected.radius_flag    = 1;
     expected.rmass_flag     = 1;
     expected.torque_flag    = 1;
     expected.angmom_flag    = 1;

From 03693728df8ea3d0f1d57b6419763a13e9bcc1c1 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Mon, 21 Jul 2025 10:59:10 -0600
Subject: [PATCH 018/174] fix typo after rebase

---
 src/set.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/set.cpp b/src/set.cpp
index 14d749e0142..3949a013c2b 100644
--- a/src/set.cpp
+++ b/src/set.cpp
@@ -1144,7 +1144,7 @@ void Set::invoke_block(Action *action)
 
   int varflag = action->varflag;
   double block1 = 0.0, block2 = 0.0;
-  if (!action->varflag1) xvalue = action->dvalue1;
+  if (!action->varflag1) block1 = action->dvalue1;
   if (!action->varflag2) block2 = action->dvalue2;
 
   for (int i = 0; i < nlocal; i++) {

From 75b8424c5a480baf3913f2cca86aebc8c30bc927 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 21 Jul 2025 19:38:16 +0200
Subject: [PATCH 019/174] Created placeholder for granular hooke pair style
 with ellipsoids

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 829 ++++++++++++++++++
 .../pair_gran_hooke_history_ellipsoid.h       |  77 ++
 2 files changed, 906 insertions(+)
 create mode 100644 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
 create mode 100644 src/GRANULAR/pair_gran_hooke_history_ellipsoid.h

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
new file mode 100644
index 00000000000..b80d283b626
--- /dev/null
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -0,0 +1,829 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
+------------------------------------------------------------------------- */
+
+#include "pair_gran_hooke_history.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "fix.h"
+#include "fix_dummy.h"
+#include "fix_neigh_history.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
+{
+  single_enable = 1;
+  no_virial_fdotr_compute = 1;
+  centroidstressflag = CENTROID_NOTAVAIL;
+  finitecutflag = 1;
+  history = 1;
+  size_history = 3;
+
+  single_extra = 10;
+  svector = new double[10];
+
+  neighprev = 0;
+
+  nmax = 0;
+  mass_rigid = nullptr;
+
+  // set comm size needed by this Pair if used with fix rigid
+
+  comm_forward = 1;
+
+  // keep default behavior of history[i][j] = -history[j][i]
+
+  nondefault_history_transfer = 0;
+
+  // create dummy fix as placeholder for FixNeighHistory
+  // this is so final order of Modify:fix will conform to input script
+
+  fix_history = nullptr;
+  fix_dummy = dynamic_cast<FixDummy *>(
+      modify->add_fix("NEIGH_HISTORY_HH_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHookeHistory::~PairGranHookeHistory()
+{
+  if (copymode) return;
+
+  delete[] svector;
+
+  if (!fix_history)
+    modify->delete_fix("NEIGH_HISTORY_HH_DUMMY" + std::to_string(instance_me));
+  else
+    modify->delete_fix("NEIGH_HISTORY_HH" + std::to_string(instance_me));
+
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+
+    delete[] onerad_dynamic;
+    delete[] onerad_frozen;
+    delete[] maxrad_dynamic;
+    delete[] maxrad_frozen;
+  }
+
+  memory->destroy(mass_rigid);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistory::compute(int eflag, int vflag)
+{
+  int i, j, ii, jj, inum, jnum;
+  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
+  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double wr1, wr2, wr3;
+  double vtr1, vtr2, vtr3, vrel;
+  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
+  double fn, fs, fs1, fs2, fs3;
+  double shrmag, rsht;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  int *touch, **firsttouch;
+  double *shear, *allshear, **firstshear;
+
+  ev_init(eflag, vflag);
+
+  int shearupdate = 1;
+  if (update->setupflag) shearupdate = 0;
+
+  // update rigid body info for owned & ghost atoms if using FixRigid masses
+  // body[i] = which body atom I is in, -1 if none
+  // mass_body = mass of each rigid body
+
+  if (fix_rigid && neighbor->ago == 0) {
+    int tmp;
+    int *body = (int *) fix_rigid->extract("body", tmp);
+    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
+    if (atom->nmax > nmax) {
+      memory->destroy(mass_rigid);
+      nmax = atom->nmax;
+      memory->create(mass_rigid, nmax, "pair:mass_rigid");
+    }
+    int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++)
+      if (body[i] >= 0)
+        mass_rigid[i] = mass_body[body[i]];
+      else
+        mass_rigid[i] = 0.0;
+    comm->forward_comm(this);
+  }
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **omega = atom->omega;
+  double **torque = atom->torque;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+  double *special_lj = force->special_lj;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = fix_history->firstflag;
+  firstshear = fix_history->firstvalue;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    touch = firsttouch[i];
+    allshear = firstshear[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      if (factor_lj == 0) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq >= radsum * radsum) {
+
+        // unset non-touching neighbors
+
+        touch[jj] = 0;
+        shear = &allshear[3 * jj];
+        shear[0] = 0.0;
+        shear[1] = 0.0;
+        shear[2] = 0.0;
+
+      } else {
+        r = sqrt(rsq);
+        rinv = 1.0 / r;
+        rsqinv = 1.0 / rsq;
+
+        // relative translational velocity
+
+        vr1 = v[i][0] - v[j][0];
+        vr2 = v[i][1] - v[j][1];
+        vr3 = v[i][2] - v[j][2];
+
+        // normal component
+
+        vnnr = vr1 * delx + vr2 * dely + vr3 * delz;
+        vn1 = delx * vnnr * rsqinv;
+        vn2 = dely * vnnr * rsqinv;
+        vn3 = delz * vnnr * rsqinv;
+
+        // tangential component
+
+        vt1 = vr1 - vn1;
+        vt2 = vr2 - vn2;
+        vt3 = vr3 - vn3;
+
+        // relative rotational velocity
+
+        wr1 = (radi * omega[i][0] + radj * omega[j][0]) * rinv;
+        wr2 = (radi * omega[i][1] + radj * omega[j][1]) * rinv;
+        wr3 = (radi * omega[i][2] + radj * omega[j][2]) * rinv;
+
+        // meff = effective mass of pair of particles
+        // if I or J part of rigid body, use body mass
+        // if I or J is frozen, meff is other particle
+
+        mi = rmass[i];
+        mj = rmass[j];
+        if (fix_rigid) {
+          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+        }
+
+        meff = mi * mj / (mi + mj);
+        if (mask[i] & freeze_group_bit) meff = mj;
+        if (mask[j] & freeze_group_bit) meff = mi;
+
+        // normal forces = Hookian contact + normal velocity damping
+
+        damp = meff * gamman * vnnr * rsqinv;
+        ccel = kn * (radsum - r) * rinv - damp;
+        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+        // relative velocities
+
+        vtr1 = vt1 - (delz * wr2 - dely * wr3);
+        vtr2 = vt2 - (delx * wr3 - delz * wr1);
+        vtr3 = vt3 - (dely * wr1 - delx * wr2);
+        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+        vrel = sqrt(vrel);
+
+        // shear history effects
+
+        touch[jj] = 1;
+        shear = &allshear[3 * jj];
+
+        if (shearupdate) {
+          shear[0] += vtr1 * dt;
+          shear[1] += vtr2 * dt;
+          shear[2] += vtr3 * dt;
+        }
+        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+        if (shearupdate) {
+
+          // rotate shear displacements
+
+          rsht = shear[0] * delx + shear[1] * dely + shear[2] * delz;
+          rsht *= rsqinv;
+          shear[0] -= rsht * delx;
+          shear[1] -= rsht * dely;
+          shear[2] -= rsht * delz;
+        }
+
+        // tangential forces = shear + tangential velocity damping
+
+        fs1 = -(kt * shear[0] + meff * gammat * vtr1);
+        fs2 = -(kt * shear[1] + meff * gammat * vtr2);
+        fs3 = -(kt * shear[2] + meff * gammat * vtr3);
+
+        // rescale frictional displacements and forces if needed
+
+        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+        fn = xmu * fabs(ccel * r);
+
+        if (fs > fn) {
+          if (shrmag != 0.0) {
+            shear[0] =
+                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
+            shear[1] =
+                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
+            shear[2] =
+                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
+            fs1 *= fn / fs;
+            fs2 *= fn / fs;
+            fs3 *= fn / fs;
+          } else
+            fs1 = fs2 = fs3 = 0.0;
+        }
+
+        // forces & torques
+
+        fx = delx * ccel + fs1;
+        fy = dely * ccel + fs2;
+        fz = delz * ccel + fs3;
+        fx *= factor_lj;
+        fy *= factor_lj;
+        fz *= factor_lj;
+        f[i][0] += fx;
+        f[i][1] += fy;
+        f[i][2] += fz;
+
+        tor1 = rinv * (dely * fs3 - delz * fs2);
+        tor2 = rinv * (delz * fs1 - delx * fs3);
+        tor3 = rinv * (delx * fs2 - dely * fs1);
+        tor1 *= factor_lj;
+        tor2 *= factor_lj;
+        tor3 *= factor_lj;
+        torque[i][0] -= radi * tor1;
+        torque[i][1] -= radi * tor2;
+        torque[i][2] -= radi * tor3;
+
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fx;
+          f[j][1] -= fy;
+          f[j][2] -= fz;
+          torque[j][0] -= radj * tor1;
+          torque[j][1] -= radj * tor2;
+          torque[j][2] -= radj * tor3;
+        }
+
+        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz);
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistory::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag, n + 1, n + 1, "pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++) setflag[i][j] = 0;
+
+  memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
+
+  onerad_dynamic = new double[n + 1];
+  onerad_frozen = new double[n + 1];
+  maxrad_dynamic = new double[n + 1];
+  maxrad_frozen = new double[n + 1];
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistory::settings(int narg, char **arg)
+{
+  if (narg != 6 && narg != 7) error->all(FLERR, "Illegal pair_style command");
+
+  kn = utils::numeric(FLERR, arg[0], false, lmp);
+  if (strcmp(arg[1], "NULL") == 0)
+    kt = kn * 2.0 / 7.0;
+  else
+    kt = utils::numeric(FLERR, arg[1], false, lmp);
+
+  gamman = utils::numeric(FLERR, arg[2], false, lmp);
+  if (strcmp(arg[3], "NULL") == 0)
+    gammat = 0.5 * gamman;
+  else
+    gammat = utils::numeric(FLERR, arg[3], false, lmp);
+
+  xmu = utils::numeric(FLERR, arg[4], false, lmp);
+  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
+  if (dampflag == 0) gammat = 0.0;
+
+  limit_damping = 0;
+  if (narg == 7) {
+    if (strcmp(arg[6], "limit_damping") == 0)
+      limit_damping = 1;
+    else
+      error->all(FLERR, "Illegal pair_style command");
+  }
+
+  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
+      dampflag < 0 || dampflag > 1)
+    error->all(FLERR, "Illegal pair_style command");
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistory::coeff(int narg, char **arg)
+{
+  if (narg > 2) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
+  if (!allocated) allocate();
+
+  int ilo, ihi, jlo, jhi;
+  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
+  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo, i); j <= jhi; j++) {
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistory::init_style()
+{
+  int i;
+
+  // error and warning checks
+
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->omega_flag)
+    error->all(FLERR, "Pair gran/h* requires atom attributes radius, rmass, omega");
+  if (comm->ghost_velocity == 0)
+    error->all(FLERR, "Pair gran/h* requires ghost atoms store velocity");
+
+  // need a granular neighbor list
+
+  if (history)
+    neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_HISTORY);
+  else
+    neighbor->add_request(this, NeighConst::REQ_SIZE);
+
+  dt = update->dt;
+
+  // if history is stored and first init, create Fix to store history
+  // it replaces FixDummy, created in the constructor
+  // this is so its order in the fix list is preserved
+
+  if (history && (fix_history == nullptr)) {
+    auto cmd = fmt::format("NEIGH_HISTORY_HH{} all NEIGH_HISTORY {}", instance_me, size_history);
+    fix_history = dynamic_cast<FixNeighHistory *>(
+        modify->replace_fix("NEIGH_HISTORY_HH_DUMMY" + std::to_string(instance_me), cmd, 1));
+    fix_history->pair = this;
+  }
+
+  // check for FixFreeze and set freeze_group_bit
+
+  auto fixlist = modify->get_fix_by_style("^freeze");
+  if (fixlist.size() == 0)
+    freeze_group_bit = 0;
+  else if (fixlist.size() > 1)
+    error->all(FLERR, "Only one fix freeze command at a time allowed");
+  else
+    freeze_group_bit = fixlist.front()->groupbit;
+
+  // check for FixRigid so can extract rigid body masses
+
+  fix_rigid = nullptr;
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (ifix->rigid_flag) {
+      if (fix_rigid)
+        error->all(FLERR, "Only one fix rigid command at a time allowed");
+      else
+        fix_rigid = ifix;
+    }
+  }
+
+  // check for FixPour and FixDeposit so can extract particle radii
+
+  auto pours = modify->get_fix_by_style("^pour");
+  auto deps = modify->get_fix_by_style("^deposit");
+
+  // set maxrad_dynamic and maxrad_frozen for each type
+  // include future FixPour and FixDeposit particles as dynamic
+
+  int itype;
+  for (i = 1; i <= atom->ntypes; i++) {
+    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
+    for (auto &ipour : pours) {
+      itype = i;
+      double maxrad = *((double *) ipour->extract("radius", itype));
+      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
+    }
+    for (auto &idep : deps) {
+      itype = i;
+      double maxrad = *((double *) idep->extract("radius", itype));
+      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
+    }
+  }
+
+  double *radius = atom->radius;
+  int *mask = atom->mask;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    if (mask[i] & freeze_group_bit)
+      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]], radius[i]);
+    else
+      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
+  }
+
+  MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
+  MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
+
+  // set fix which stores history info
+
+  if (history) {
+    fix_history = dynamic_cast<FixNeighHistory *>(
+        modify->get_fix_by_id("NEIGH_HISTORY_HH" + std::to_string(instance_me)));
+    if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairGranHookeHistory::init_one(int i, int j)
+{
+  if (!allocated) allocate();
+
+  // cutoff = sum of max I,J radii for
+  // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
+
+  double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
+  cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
+  cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
+  return cutoff;
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistory::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i, j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) fwrite(&setflag[i][j], sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistory::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+
+  int i, j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
+      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
+    }
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistory::write_restart_settings(FILE *fp)
+{
+  fwrite(&kn, sizeof(double), 1, fp);
+  fwrite(&kt, sizeof(double), 1, fp);
+  fwrite(&gamman, sizeof(double), 1, fp);
+  fwrite(&gammat, sizeof(double), 1, fp);
+  fwrite(&xmu, sizeof(double), 1, fp);
+  fwrite(&dampflag, sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistory::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &kn, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &kt, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &gamman, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &gammat, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &xmu, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &dampflag, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&kn, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&kt, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&gamman, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&gammat, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&xmu, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&dampflag, 1, MPI_INT, 0, world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistory::reset_dt()
+{
+  dt = update->dt;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranHookeHistory::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
+                                    double /*factor_coul*/, double /*factor_lj*/, double &fforce)
+{
+  double radi, radj, radsum;
+  double r, rinv, rsqinv, delx, dely, delz;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3, wr1, wr2, wr3;
+  double mi, mj, meff, damp, ccel;
+  double vtr1, vtr2, vtr3, vrel, shrmag;
+  double fs1, fs2, fs3, fs, fn;
+
+  double *radius = atom->radius;
+  radi = radius[i];
+  radj = radius[j];
+  radsum = radi + radj;
+
+  if (rsq >= radsum * radsum) {
+    fforce = 0.0;
+    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+    return 0.0;
+  }
+
+  r = sqrt(rsq);
+  rinv = 1.0 / r;
+  rsqinv = 1.0 / rsq;
+
+  // relative translational velocity
+
+  double **v = atom->v;
+  vr1 = v[i][0] - v[j][0];
+  vr2 = v[i][1] - v[j][1];
+  vr3 = v[i][2] - v[j][2];
+
+  // normal component
+
+  double **x = atom->x;
+  delx = x[i][0] - x[j][0];
+  dely = x[i][1] - x[j][1];
+  delz = x[i][2] - x[j][2];
+
+  vnnr = vr1 * delx + vr2 * dely + vr3 * delz;
+  vn1 = delx * vnnr * rsqinv;
+  vn2 = dely * vnnr * rsqinv;
+  vn3 = delz * vnnr * rsqinv;
+
+  // tangential component
+
+  vt1 = vr1 - vn1;
+  vt2 = vr2 - vn2;
+  vt3 = vr3 - vn3;
+
+  // relative rotational velocity
+
+  double **omega = atom->omega;
+  wr1 = (radi * omega[i][0] + radj * omega[j][0]) * rinv;
+  wr2 = (radi * omega[i][1] + radj * omega[j][1]) * rinv;
+  wr3 = (radi * omega[i][2] + radj * omega[j][2]) * rinv;
+
+  // meff = effective mass of pair of particles
+  // if I or J part of rigid body, use body mass
+  // if I or J is frozen, meff is other particle
+
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+
+  mi = rmass[i];
+  mj = rmass[j];
+  if (fix_rigid) {
+    // NOTE: ensure mass_rigid is current for owned+ghost atoms?
+    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+  }
+
+  meff = mi * mj / (mi + mj);
+  if (mask[i] & freeze_group_bit) meff = mj;
+  if (mask[j] & freeze_group_bit) meff = mi;
+
+  // normal forces = Hookian contact + normal velocity damping
+
+  damp = meff * gamman * vnnr * rsqinv;
+  ccel = kn * (radsum - r) * rinv - damp;
+  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+  // relative velocities
+
+  vtr1 = vt1 - (delz * wr2 - dely * wr3);
+  vtr2 = vt2 - (delx * wr3 - delz * wr1);
+  vtr3 = vt3 - (dely * wr1 - delx * wr2);
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
+
+  // shear history effects
+  // neighprev = index of found neigh on previous call
+  // search entire jnum list of neighbors of I for neighbor J
+  // start from neighprev, since will typically be next neighbor
+  // reset neighprev to 0 as necessary
+
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+  double *allshear = fix_history->firstvalue[i];
+
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+
+  double *shear = &allshear[3 * neighprev];
+  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+  // tangential forces = shear + tangential velocity damping
+
+  fs1 = -(kt * shear[0] + meff * gammat * vtr1);
+  fs2 = -(kt * shear[1] + meff * gammat * vtr2);
+  fs3 = -(kt * shear[2] + meff * gammat * vtr3);
+
+  // rescale frictional displacements and forces if needed
+
+  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+  fn = xmu * fabs(ccel * r);
+
+  if (fs > fn) {
+    if (shrmag != 0.0) {
+      fs1 *= fn / fs;
+      fs2 *= fn / fs;
+      fs3 *= fn / fs;
+      fs *= fn / fs;
+    } else
+      fs1 = fs2 = fs3 = fs = 0.0;
+  }
+
+  // set force and return no energy
+
+  fforce = ccel;
+
+  // set single_extra quantities
+
+  svector[0] = fs1;
+  svector[1] = fs2;
+  svector[2] = fs3;
+  svector[3] = fs;
+  svector[4] = vn1;
+  svector[5] = vn2;
+  svector[6] = vn3;
+  svector[7] = vt1;
+  svector[8] = vt2;
+  svector[9] = vt3;
+
+  return 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairGranHookeHistory::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/,
+                                            int * /*pbc*/)
+{
+  int i, j, m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = mass_rigid[j];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistory::unpack_forward_comm(int n, int first, double *buf)
+{
+  int i, m, last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) mass_rigid[i] = buf[m++];
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+double PairGranHookeHistory::memory_usage()
+{
+  double bytes = (double) nmax * sizeof(double);
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   self-interaction range of particle
+------------------------------------------------------------------------- */
+
+double PairGranHookeHistory::atom2cut(int i)
+{
+  double cut = atom->radius[i] * 2;
+  return cut;
+}
+
+/* ----------------------------------------------------------------------
+   maximum interaction range for two finite particles
+------------------------------------------------------------------------- */
+
+double PairGranHookeHistory::radii2cut(double r1, double r2)
+{
+  double cut = r1 + r2;
+  return cut;
+}
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
new file mode 100644
index 00000000000..54491147e85
--- /dev/null
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -0,0 +1,77 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(gran/hooke/history,PairGranHookeHistory);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_H
+#define LMP_PAIR_GRAN_HOOKE_HISTORY_H
+
+#include "pair.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHookeHistory : public Pair {
+ public:
+  PairGranHookeHistory(class LAMMPS *);
+  ~PairGranHookeHistory() override;
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_restart_settings(FILE *) override;
+  void read_restart_settings(FILE *) override;
+  void reset_dt() override;
+  double single(int, int, int, int, double, double, double, double &) override;
+  int pack_forward_comm(int, int *, double *, int, int *) override;
+  void unpack_forward_comm(int, int, double *) override;
+  double memory_usage() override;
+  double atom2cut(int) override;
+  double radii2cut(double, double) override;
+
+ protected:
+  double kn, kt, gamman, gammat, xmu;
+  int dampflag;
+  double dt;
+  int freeze_group_bit;
+  int history;
+  int limit_damping;
+
+  int neighprev;
+  double *onerad_dynamic, *onerad_frozen;
+  double *maxrad_dynamic, *maxrad_frozen;
+
+  int size_history;
+
+  class FixDummy *fix_dummy;
+  class FixNeighHistory *fix_history;
+
+  // storage of rigid body masses for use in granular interactions
+
+  class Fix *fix_rigid;    // ptr to rigid body fix, null pointer if none
+  double *mass_rigid;      // rigid mass for owned+ghost atoms
+  int nmax;                // allocated size of mass_rigid
+
+  void allocate();
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif

From f6f9be1cb56420d3a48e769c456b0aa41b8ac2f3 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 8 Aug 2025 16:03:17 +0200
Subject: [PATCH 020/174] Added shape function to ellipsoid granular pair style

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 146 +++++++++++++++---
 .../pair_gran_hooke_history_ellipsoid.h       |  24 ++-
 src/math_extra.h                              |   2 +-
 3 files changed, 141 insertions(+), 31 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index b80d283b626..2e425e91aca 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -15,7 +15,7 @@
    Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
 ------------------------------------------------------------------------- */
 
-#include "pair_gran_hooke_history.h"
+#include "pair_gran_hooke_history_ellipsoid.h"
 
 #include "atom.h"
 #include "comm.h"
@@ -29,6 +29,7 @@
 #include "neigh_list.h"
 #include "neighbor.h"
 #include "update.h"
+#include "math_extra.h" // probably needed for some computations
 
 #include <cmath>
 #include <cstring>
@@ -37,14 +38,14 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
+PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 1;
   no_virial_fdotr_compute = 1;
   centroidstressflag = CENTROID_NOTAVAIL;
   finitecutflag = 1;
   history = 1;
-  size_history = 3;
+  size_history = 7;  // shear[3], prevevious_cp[3], pair_was_in_contact_flag
 
   single_extra = 10;
   svector = new double[10];
@@ -72,7 +73,7 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
 
 /* ---------------------------------------------------------------------- */
 
-PairGranHookeHistory::~PairGranHookeHistory()
+PairGranHookeHistoryEllipsoid::~PairGranHookeHistoryEllipsoid()
 {
   if (copymode) return;
 
@@ -98,7 +99,7 @@ PairGranHookeHistory::~PairGranHookeHistory()
 
 /* ---------------------------------------------------------------------- */
 
-void PairGranHookeHistory::compute(int eflag, int vflag)
+void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 {
   int i, j, ii, jj, inum, jnum;
   double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
@@ -347,7 +348,7 @@ void PairGranHookeHistory::compute(int eflag, int vflag)
    allocate all arrays
 ------------------------------------------------------------------------- */
 
-void PairGranHookeHistory::allocate()
+void PairGranHookeHistoryEllipsoid::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
@@ -368,7 +369,7 @@ void PairGranHookeHistory::allocate()
    global settings
 ------------------------------------------------------------------------- */
 
-void PairGranHookeHistory::settings(int narg, char **arg)
+void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
 {
   if (narg != 6 && narg != 7) error->all(FLERR, "Illegal pair_style command");
 
@@ -405,7 +406,7 @@ void PairGranHookeHistory::settings(int narg, char **arg)
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
-void PairGranHookeHistory::coeff(int narg, char **arg)
+void PairGranHookeHistoryEllipsoid::coeff(int narg, char **arg)
 {
   if (narg > 2) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
   if (!allocated) allocate();
@@ -429,16 +430,16 @@ void PairGranHookeHistory::coeff(int narg, char **arg)
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
-void PairGranHookeHistory::init_style()
+void PairGranHookeHistoryEllipsoid::init_style()
 {
   int i;
 
   // error and warning checks
 
-  if (!atom->radius_flag || !atom->rmass_flag || !atom->omega_flag)
-    error->all(FLERR, "Pair gran/h* requires atom attributes radius, rmass, omega");
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->omega_flag || !atom->ellipsoid_flag)
+    error->all(FLERR, "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, omega and ellipdoid flag");
   if (comm->ghost_velocity == 0)
-    error->all(FLERR, "Pair gran/h* requires ghost atoms store velocity");
+    error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
 
   // need a granular neighbor list
 
@@ -505,6 +506,8 @@ void PairGranHookeHistory::init_style()
     }
   }
 
+  // since for ellipsoids radius is the maximum of the three axes, no need to change this part
+
   double *radius = atom->radius;
   int *mask = atom->mask;
   int *type = atom->type;
@@ -533,7 +536,7 @@ void PairGranHookeHistory::init_style()
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
-double PairGranHookeHistory::init_one(int i, int j)
+double PairGranHookeHistoryEllipsoid::init_one(int i, int j)
 {
   if (!allocated) allocate();
 
@@ -550,7 +553,7 @@ double PairGranHookeHistory::init_one(int i, int j)
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
-void PairGranHookeHistory::write_restart(FILE *fp)
+void PairGranHookeHistoryEllipsoid::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
@@ -563,7 +566,7 @@ void PairGranHookeHistory::write_restart(FILE *fp)
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
-void PairGranHookeHistory::read_restart(FILE *fp)
+void PairGranHookeHistoryEllipsoid::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
@@ -581,7 +584,7 @@ void PairGranHookeHistory::read_restart(FILE *fp)
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
-void PairGranHookeHistory::write_restart_settings(FILE *fp)
+void PairGranHookeHistoryEllipsoid::write_restart_settings(FILE *fp)
 {
   fwrite(&kn, sizeof(double), 1, fp);
   fwrite(&kt, sizeof(double), 1, fp);
@@ -595,7 +598,7 @@ void PairGranHookeHistory::write_restart_settings(FILE *fp)
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
-void PairGranHookeHistory::read_restart_settings(FILE *fp)
+void PairGranHookeHistoryEllipsoid::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     utils::sfread(FLERR, &kn, sizeof(double), 1, fp, nullptr, error);
@@ -615,14 +618,14 @@ void PairGranHookeHistory::read_restart_settings(FILE *fp)
 
 /* ---------------------------------------------------------------------- */
 
-void PairGranHookeHistory::reset_dt()
+void PairGranHookeHistoryEllipsoid::reset_dt()
 {
   dt = update->dt;
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairGranHookeHistory::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
+double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
                                     double /*factor_coul*/, double /*factor_lj*/, double &fforce)
 {
   double radi, radj, radsum;
@@ -774,7 +777,7 @@ double PairGranHookeHistory::single(int i, int j, int /*itype*/, int /*jtype*/,
 
 /* ---------------------------------------------------------------------- */
 
-int PairGranHookeHistory::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/,
+int PairGranHookeHistoryEllipsoid::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/,
                                             int * /*pbc*/)
 {
   int i, j, m;
@@ -789,7 +792,7 @@ int PairGranHookeHistory::pack_forward_comm(int n, int *list, double *buf, int /
 
 /* ---------------------------------------------------------------------- */
 
-void PairGranHookeHistory::unpack_forward_comm(int n, int first, double *buf)
+void PairGranHookeHistoryEllipsoid::unpack_forward_comm(int n, int first, double *buf)
 {
   int i, m, last;
 
@@ -802,7 +805,7 @@ void PairGranHookeHistory::unpack_forward_comm(int n, int first, double *buf)
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
-double PairGranHookeHistory::memory_usage()
+double PairGranHookeHistoryEllipsoid::memory_usage()
 {
   double bytes = (double) nmax * sizeof(double);
   return bytes;
@@ -812,7 +815,7 @@ double PairGranHookeHistory::memory_usage()
    self-interaction range of particle
 ------------------------------------------------------------------------- */
 
-double PairGranHookeHistory::atom2cut(int i)
+double PairGranHookeHistoryEllipsoid::atom2cut(int i)
 {
   double cut = atom->radius[i] * 2;
   return cut;
@@ -822,8 +825,103 @@ double PairGranHookeHistory::atom2cut(int i)
    maximum interaction range for two finite particles
 ------------------------------------------------------------------------- */
 
-double PairGranHookeHistory::radii2cut(double r1, double r2)
+double PairGranHookeHistoryEllipsoid::radii2cut(double r1, double r2)
 {
   double cut = r1 + r2;
   return cut;
 }
+
+
+/* ----------------------------------------------------------------------
+   express local (particle level) to global (system level) coordinates
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::local2global_vector(const double v[3], const double *quat, double global_v[3]){
+
+   MathExtra::quatrotvec(const_cast<double*>(quat) , const_cast<double*>(v), global_v);
+};
+
+void PairGranHookeHistoryEllipsoid::local2global_matrix(const double m[3][3], const double *quat, double global_m[3][3]){
+    double rot[3][3],  temp[3][3];
+    MathExtra::quat_to_mat(const_cast<double*>(quat), rot);
+    MathExtra::times3(rot, m, temp);
+    MathExtra::transpose_times3(rot, temp, global_m);
+};
+
+  
+/* ----------------------------------------------------------------------
+   express global (system level) to local (particle level) coordinates
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::global2local_vector(const double *v, const double *quat, double *local_v){
+
+    double qc[4];
+    MathExtra::qconjugate(const_cast<double*>(quat), qc);
+    MathExtra::quatrotvec(qc, const_cast<double*>(v), local_v);
+
+};
+
+
+void PairGranHookeHistoryEllipsoid::global2local_matrix(const double m[3][3], const double *quat, double local_m[3][3]){
+    double rot[3][3], temp[3][3];
+    MathExtra::quat_to_mat(quat, rot);
+    MathExtra::transpose_times3(rot, m, temp);
+    MathExtra::times3(temp, rot, local_m);
+}
+
+/* ----------------------------------------------------------------------
+   shape function computations for superellipsoids
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f){
+  const double n1 = block[0], n2 = block[1];
+  
+  local_f = pow( pow(abs(point[0]/shape[0]), n2) + pow(abs(point[1]/shape[1]), n2) , n1/ n2) + pow(abs(point[2]/shape[2]), n1)  - 1.0;
+};
+
+void PairGranHookeHistoryEllipsoid::shape_function_global(const double *shape, const double *block, const double *quat, const double *point, double global_f){
+  double local_point[3];
+  global2local_vector(const_cast<double*>(point), const_cast<double*>(quat), local_point);
+  shape_function_local(shape, block, quat, local_point, global_f);
+};
+
+void PairGranHookeHistoryEllipsoid::shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad){
+  const double n1 = block[0], n2 = block[1];
+  const double ainv = 1.0 / shape[0];
+  const double binv = 1.0 / shape[1];
+  const double cinv = 1.0 / shape[2];
+
+  const double nu = pow(abs(point[0] * ainv), n2) + pow(abs(point[1] * binv), n2);
+  const double nu_12 = pow(nu, n1 / n2 - 1.0);
+
+  local_grad[0] = n1*ainv * pow(abs(point[0] * ainv), n2 - 1.0) * nu_12 * copysign(1.0, point[0]);
+  local_grad[1] = n1*binv * pow(abs(point[1] * binv), n2 - 1.0) * nu_12 * copysign(1.0, point[1]);
+  local_grad[2] = n1*cinv * pow(abs(point[2] * cinv), n1 - 1.0) * copysign(1.0, point[2]);
+
+};
+
+void PairGranHookeHistoryEllipsoid::shape_function_local_hessian(
+  const double *shape, const double *block, const double *quat, const double *point, double local_hess[3][3]) {
+  const double n1 = block[0], n2 = block[1];
+  const double ainv = 1.0 / shape[0];
+  const double binv = 1.0 / shape[1];
+  const double cinv = 1.0 / shape[2];
+
+  const double nu = pow(abs(point[0] * ainv), n2) + pow(abs(point[1] * binv), n2);
+  const double nu_12_1 = pow(nu, n1 / n2 - 1.0);
+  const double nu_12_2 = pow(nu, n1 / n2 - 2.0);
+
+  local_hess[0][2] = local_hess[2][0] = local_hess[1][2] = local_hess[2][1] =0;
+
+  local_hess[0][0] = n1 * (n2 - 1) * ainv * ainv * pow(abs(point[0] * ainv), n2 - 2.0)* nu_12_1 +
+                     n1 * (n1 - n2) * ainv * ainv * pow(abs(point[0] * ainv), 2*n2 - 2.0)* nu_12_2;
+
+  local_hess[1][1] = n1 * (n2 - 1) * binv * binv * pow(abs(point[1] * binv), n2 - 2.0)* nu_12_1 +
+                     n1 * (n1 - n2) * ainv * ainv * pow(abs(point[1] * binv), 2*n2 - 2.0)* nu_12_2;
+
+  local_hess[2][2] = n1 * (n1 - 1) * cinv * cinv * pow(abs(point[2] * cinv), n1-2);
+
+  local_hess[0][1] = n1 * (n1 - n2) * ainv * binv * pow(abs(point[0]*ainv), n2 - 1) *
+                     pow(abs(point[1]*binv), n2 -1) * pow(nu, n1 / n2 - 2) * copysign(1.0, shape[0] * shape[1]); 
+                
+  }
\ No newline at end of file
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 54491147e85..4f3b72b1088 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -13,21 +13,21 @@
 
 #ifdef PAIR_CLASS
 // clang-format off
-PairStyle(gran/hooke/history,PairGranHookeHistory);
+PairStyle(gran/hooke/history/ellipsoid,PairGranHookeHistory);
 // clang-format on
 #else
 
-#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_H
-#define LMP_PAIR_GRAN_HOOKE_HISTORY_H
+#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
+#define LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
-class PairGranHookeHistory : public Pair {
+class PairGranHookeHistoryEllipsoid : public Pair {
  public:
-  PairGranHookeHistory(class LAMMPS *);
-  ~PairGranHookeHistory() override;
+  PairGranHookeHistoryEllipsoid(class LAMMPS *);
+  ~PairGranHookeHistoryEllipsoid() override;
   void compute(int, int) override;
   void settings(int, char **) override;
   void coeff(int, char **) override;
@@ -45,6 +45,18 @@ class PairGranHookeHistory : public Pair {
   double atom2cut(int) override;
   double radii2cut(double, double) override;
 
+  // needed for shape functions grad and matrix 
+  void local2global_vector(const double v[3], const double *quat, double global_v[3]);
+  void global2local_vector(const double v[3], const double *quat, double local_v[3]);
+  void local2global_matrix(const double m[3][3], const double *quat, double global_m[3][3]);
+  void global2local_matrix(const double m[3][3], const double *quat, double local_m[3][3]);
+
+  // shape function computations
+  void shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f);
+  void shape_function_global(const double *shape, const double *block, const double *quat, const double *point, double global_f);
+  void shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad);
+  void shape_function_local_hessian(const double *shape, const double *block, const double *quat, const double *point, double local_hessian[3][3]);
+
  protected:
   double kn, kt, gamman, gammat, xmu;
   int dampflag;
diff --git a/src/math_extra.h b/src/math_extra.h
index 90e21fbf0ac..a945f814c21 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -88,7 +88,7 @@ inline void multiply_shape_shape(const double *one, const double *two, double *a
 // quaternion operations
 
 inline void qnormalize(double *q);
-inline void qconjugate(double *q, double *qc);
+inline void qconjugate(double *q, double *qc); // would it be better to have q passed as const double?
 inline void vecquat(double *a, double *b, double *c);
 inline void quatvec(double *a, double *b, double *c);
 inline void quatquat(double *a, double *b, double *c);

From 43ea8824c379518b015263cbb3c4a9631dc4412c Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 22 Aug 2025 17:31:03 -0600
Subject: [PATCH 021/174] Correct wrong Class name in style creator. Make fix
 neigh history name for ellipsoids different from hooke/history

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 12 ++++++------
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.h   |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 2e425e91aca..1a220f61a6e 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -68,7 +68,7 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
 
   fix_history = nullptr;
   fix_dummy = dynamic_cast<FixDummy *>(
-      modify->add_fix("NEIGH_HISTORY_HH_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
+      modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
 }
 
 /* ---------------------------------------------------------------------- */
@@ -80,9 +80,9 @@ PairGranHookeHistoryEllipsoid::~PairGranHookeHistoryEllipsoid()
   delete[] svector;
 
   if (!fix_history)
-    modify->delete_fix("NEIGH_HISTORY_HH_DUMMY" + std::to_string(instance_me));
+    modify->delete_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me));
   else
-    modify->delete_fix("NEIGH_HISTORY_HH" + std::to_string(instance_me));
+    modify->delete_fix("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me));
 
   if (allocated) {
     memory->destroy(setflag);
@@ -455,9 +455,9 @@ void PairGranHookeHistoryEllipsoid::init_style()
   // this is so its order in the fix list is preserved
 
   if (history && (fix_history == nullptr)) {
-    auto cmd = fmt::format("NEIGH_HISTORY_HH{} all NEIGH_HISTORY {}", instance_me, size_history);
+    auto cmd = fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
     fix_history = dynamic_cast<FixNeighHistory *>(
-        modify->replace_fix("NEIGH_HISTORY_HH_DUMMY" + std::to_string(instance_me), cmd, 1));
+        modify->replace_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me), cmd, 1));
     fix_history->pair = this;
   }
 
@@ -527,7 +527,7 @@ void PairGranHookeHistoryEllipsoid::init_style()
 
   if (history) {
     fix_history = dynamic_cast<FixNeighHistory *>(
-        modify->get_fix_by_id("NEIGH_HISTORY_HH" + std::to_string(instance_me)));
+        modify->get_fix_by_id("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me)));
     if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
   }
 }
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 4f3b72b1088..e4dff486311 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -13,7 +13,7 @@
 
 #ifdef PAIR_CLASS
 // clang-format off
-PairStyle(gran/hooke/history/ellipsoid,PairGranHookeHistory);
+PairStyle(gran/hooke/history/ellipsoid,PairGranHookeHistoryEllipsoid);
 // clang-format on
 #else
 

From 134ace13411d7016137917b7dbe85ebb03484e73 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 22 Aug 2025 18:50:23 -0600
Subject: [PATCH 022/174] build LAPACK/linalg for ASPHERE + GRANULAR for
 contact detection between ellipsoids

---
 cmake/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index af12ad24fc0..d19f9192d2c 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -520,7 +520,7 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUA
 endif()
 
 option(USE_INTERNAL_LINALG "Prefer internal library with BLAS/LAPACK subset over system BLAS/LAPACK" OFF)
-if(PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR BUILD_TOOLS)
+if(PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR (PKG_ASPHERE AND PKG_GRANULAR) OR BUILD_TOOLS)
   if(NOT USE_INTERNAL_LINALG)
     find_package(LAPACK)
     find_package(BLAS)

From 22930a78caed28f10f5ec91511fc96087a57d371 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 22 Aug 2025 18:51:15 -0600
Subject: [PATCH 023/174] TEMP TEST COMMIT TO BE DELETED IN THE FUTURE: test
 case for LAPACK in constructor

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 60 ++++++++++++++++++-
 1 file changed, 57 insertions(+), 3 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 1a220f61a6e..6d4a78468ab 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -11,9 +11,6 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-/* ----------------------------------------------------------------------
-   Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
-------------------------------------------------------------------------- */
 
 #include "pair_gran_hooke_history_ellipsoid.h"
 
@@ -36,6 +33,26 @@
 
 using namespace LAMMPS_NS;
 
+// TODO: This is temporary to check if it LAPACK / linalg works
+//       Pick the ones we end up using and clean that up
+//    LAPACK doc: https://netlib.org/lapack/lug/node38.html
+// WARNING: FORTRAN uses pass by reference semantics so must use pointer arguments in C++
+
+extern "C" { // General Matrices
+    void dgetrf_(const int *m, const int *n, double *a, const int *lda, int *ipiv, int *info); // Factorize
+    void dgetrs_(const char *trans, const int *n, const int *nrhs, double *a, const int *lda, int *ipiv, double *b, const int *ldb, int *info); // Solve (using factorzation)
+}
+
+extern "C" { // Symmetric positive definite (regular storage, i.e., not packed)
+    void dpotrf_(const char *uplo, const int *n, double *a, const int *lda, int *info); // Factorize
+    void dpotrs_(const char *uplo, const int *n, const int *nrhs, double *a, const int *lda, double *b, const int *ldb, int *info); // Solve (using factorization)
+}
+
+extern "C" { // Symmetric indefinite (regular storage, i.e., not packed)
+    void dsytrf_(const char *uplo, const int *n, double *a, const int *lda, int *ipiv, double *work, const int *lwork, int *info); // Factorize
+    void dsytrs_(const char *uplo, const int *n, const int *nrhs, double *a, const int *lda, int *ipiv, double *b, const int *ldb, int *info); // Solve (using factorization)
+}
+
 /* ---------------------------------------------------------------------- */
 
 PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair(lmp)
@@ -69,6 +86,43 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
   fix_history = nullptr;
   fix_dummy = dynamic_cast<FixDummy *>(
       modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
+
+  // TEMP TEST HERE IN THE CONSTRUCTOR FOR AVAILABILITY AND FUNCTIONALITY OF LAPACK FUNCTIONS
+  // WARNING: 1D column-major matrix for LAPACK compatibility
+  static constexpr int n = 4;
+  // General: (solution = {-1, 1, 2, 0})
+  double A[n][n] = {{2 , -9, 9 , -1},
+                    {-4, -8, -8, -5},
+                    {6 , -2, -1, -2},
+                    {8 , -6, -2, -2}};
+  double rhs[n] = {7, -20, -10, -18};
+  double A_LAPACK[n * n];
+  for (int i = 0 ; i < n ; i++){
+    for (int j = 0 ; j < n ; j++){
+      A_LAPACK[i + j*n] = A[i][j];
+    }
+  }
+  int lapack_error;
+  int ipiv[n*n];
+  const char trans = 'N';
+  const int nrhs = 1;
+
+  dgetrf_(&n, &n, A_LAPACK, &n, ipiv, &lapack_error); // Factorize
+  if (lapack_error) {
+    error->all(FLERR, "LAPACK factorization error in ellipsoid code, info = {} ", lapack_error);
+  }
+  utils::logmesg(lmp," rhs before solve = ({}, {}, {}, {})\n", rhs[0], rhs[1], rhs[2], rhs[3]);
+  dgetrs_(&trans, &n, &nrhs, A_LAPACK, &n, ipiv, rhs, &n, &lapack_error); // Solve (using factorzation)
+  if (lapack_error) {
+    error->all(FLERR, "LAPACK solve error in ellipsoid code, info = {} ", lapack_error);
+  }
+  // Output results
+  utils::logmesg(lmp," LAPACK RESULTS: \n");
+  utils::logmesg(lmp," Expected vector = (-1, 1, 2, 0)\n");
+  utils::logmesg(lmp," rhs after solve = ({}, {}, {}, {})\n", rhs[0], rhs[1], rhs[2], rhs[3]);
+
+
+  
 }
 
 /* ---------------------------------------------------------------------- */

From 541359f8d4f3b01af3ec3a92889860d8086f36fd Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 14 Nov 2025 16:38:14 +0100
Subject: [PATCH 024/174] Added extra math for superellipsoids for curvature
 calculation

---
 src/ASPHERE/math_extra_superellipsoids.cpp    | 246 ++++++++++++++++++
 src/ASPHERE/math_extra_superellipsoids.h      |  89 +++++++
 .../pair_gran_hooke_history_ellipsoid.cpp     | 128 +++------
 .../pair_gran_hooke_history_ellipsoid.h       |  12 -
 4 files changed, 367 insertions(+), 108 deletions(-)
 create mode 100644 src/ASPHERE/math_extra_superellipsoids.cpp
 create mode 100644 src/ASPHERE/math_extra_superellipsoids.h

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
new file mode 100644
index 00000000000..2a7b83f375c
--- /dev/null
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -0,0 +1,246 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril Coulibaly (??)
+------------------------------------------------------------------------- */
+
+#include "math_extra_superellipsoids.h"
+#include "math_extra.h"
+#include <cmath>
+
+// #include "math_special.h"
+// #include "math_const.h"
+
+// #include <algorithm>
+// #include <cstdio>
+// #include <cstring>
+
+namespace MathExtraSuperellipsoids {
+
+/* ----------------------------------------------------------------------
+   beta function B(x,y) = Gamma(x) * Gamma(y) / Gamma(x+y)
+------------------------------------------------------------------------- */
+double beta_func(double a, double b) {
+    return exp(lgamma(a) + lgamma(b) - lgamma(a + b));
+}
+
+/* ----------------------------------------------------------------------
+   Volume of superellipsoid
+   source https://cse.buffalo.edu/~jryde/cse673/files/superquadrics.pdf
+------------------------------------------------------------------------- */
+
+void volume_superellipsoid(const double *blockiness, const double *shape, double volume)
+{
+  const double eps1 = 2.0 / blockiness[0]; // shape exponent in latitude direction
+  const double eps2 = 2.0 / blockiness[1]; // shape exponent in longitude direction
+  volume = 2.0*shape[0]*shape[1]*shape[2]*eps1*eps2*
+      beta_func(0.5*eps1, eps1 + 1.0)*
+      beta_func(0.5*eps2, 0.5*eps2 + 1.0);
+}
+
+/* ----------------------------------------------------------------------
+   inertia tensor of superellipsoid
+   source https://cse.buffalo.edu/~jryde/cse673/files/superquadrics.pdf
+------------------------------------------------------------------------- */
+void inertia_superellipsoid(const double *shape, const double *blockiness, double density, double *inertia)
+
+{
+
+  const double eps1 = 2.0 / blockiness[0]; // shape exponent in latitude direction
+  const double eps2 = 2.0 / blockiness[1]; // shape exponent in longitude direction
+
+  const double a1 = shape[0];
+  const double a2 = shape[1];
+  const double a3 = shape[2];
+  const double I_xx = 0.5*a1*a2*a3*eps1*eps2*(a2*a2*beta_func(1.5*eps2, 0.5*eps2)*beta_func(0.5*eps1, 2.0*eps1+1.0)+
+      4.0*a3*a3*beta_func(0.5*eps2, 0.5*eps2+1.0)*beta_func(1.5*eps1, eps1+1.0)) * density;
+  const double I_yy = 0.5*a1*a2*a3*eps1*eps2*(a1*a1*beta_func(1.5*eps2, 0.5*eps2)*beta_func(0.5*eps1, 2.0*eps1+1.0)+
+      4.0*a3*a3*beta_func(0.5*eps2, 0.5*eps2+1.0)*beta_func(1.5*eps1, eps1+1.0)) * density;
+  const double I_zz = 0.5*a1*a2*a3*eps1*eps2*(a1*a1 + a2*a2)*
+      beta_func(1.5*eps2, 0.5*eps2)*beta_func(0.5*eps1, 2.0*eps1+1.0) * density;
+
+  inertia[0] = I_xx;
+  inertia[1] = I_yy;
+  inertia[2] = I_zz;
+}
+
+
+/* ----------------------------------------------------------------------
+   curvature of superellipsoid
+   source https://en.wikipedia.org/wiki/Mean_curvature
+------------------------------------------------------------------------- */
+
+void mean_curvature_superellipsoid(const double *shape, const double *blockiness, const double* quat, const double *global_point, double curvature)
+{
+  // this code computes the mean curvature on the superellipsoid surface
+  // for the given global point
+  double local_point[3],hessian[3][3], nablaF[3], f, normal[3];
+  global2local_vector(global_point, quat, local_point); 
+  shape_function_local(shape, blockiness, quat, local_point, f);
+  double koef = pow(fabs(0.5), std::max(blockiness[0], blockiness[1])-2.0);
+  double alpha = 1.0 / pow(fabs(f/koef + 1.0), 1.0/blockiness[0]);
+  for(int i = 0; i < 3; i++)
+    local_point[i] *= alpha;
+  shape_function_local_grad(shape, blockiness, quat, local_point, nablaF);
+  shape_function_local_hessian(shape, blockiness, quat, local_point, hessian);
+  MathExtra::normalize3(nablaF, normal);
+  double temp[3];
+  MathExtra::matvec(hessian, normal, temp);
+  double F_mag = sqrt(MathExtra::dot3(nablaF, nablaF));
+  curvature = fabs(MathExtra::dot3(normal, temp) - (hessian[0][0] + hessian[1][1] + hessian[2][2])) / fabs(2.0 * F_mag);
+}
+
+void gaussian_curvature_superellipsoid(const double *shape, const double *blockiness, const double* quat, const double *global_point, double curvature)
+{
+  // this code computes the gaussian curvature coefficient
+  // for the given global point
+  double local_point[3],hessian[3][3], nablaF[3], f, normal[3];
+  global2local_vector(global_point, quat, local_point); 
+  shape_function_local(shape, blockiness, quat, local_point, f);
+  double koef = pow(fabs(0.5), std::max(blockiness[0], blockiness[1])-2.0);
+  double alpha = 1.0 / pow(fabs(f/koef + 1.0), 1.0/blockiness[0]);
+  for(int i = 0; i < 3; i++)
+    local_point[i] *= alpha;
+  shape_function_local_grad(shape, blockiness, quat, local_point, nablaF);
+  shape_function_local_hessian(shape, blockiness, quat, local_point, hessian);
+  MathExtra::normalize3(nablaF, normal);
+  double temp[3];
+  MathExtra::matvec(hessian, normal, temp);
+  double F_mag = sqrt(MathExtra::dot3(nablaF, nablaF));
+
+  double fx = nablaF[0];
+  double fy = nablaF[1];
+  double fz = nablaF[2];
+
+  double fxx = hessian[0][0];
+  double fxy = hessian[0][1];
+  double fxz = hessian[0][2];
+
+  double fyy = hessian[1][1];
+  double fyz = hessian[1][2];
+
+  double fzz = hessian[2][2];
+
+  double mat[4][4] = {
+    {fxx, fxy, fxz, fx},
+    {fxy, fyy, fyz, fy},
+    {fxz, fyz, fzz, fz},
+    {fx,  fy,  fz, 0.0} 
+  };
+
+    double K = -det4_M44_zero(mat) / (F_mag*F_mag*F_mag*F_mag);
+    curvature =  sqrt(fabs(K));
+}
+
+
+/* ----------------------------------------------------------------------
+   express local (particle level) to global (system level) coordinates
+------------------------------------------------------------------------- */
+
+void local2global_vector(const double v[3], const double *quat, double global_v[3]){
+
+   MathExtra::quatrotvec(const_cast<double*>(quat) , const_cast<double*>(v), global_v);
+};
+
+void local2global_matrix(const double m[3][3], const double *quat, double global_m[3][3]){
+    double rot[3][3],  temp[3][3];
+    MathExtra::quat_to_mat(const_cast<double*>(quat), rot);
+    MathExtra::times3(rot, m, temp);
+    MathExtra::transpose_times3(rot, temp, global_m);
+};
+
+  
+/* ----------------------------------------------------------------------
+   express global (system level) to local (particle level) coordinates
+------------------------------------------------------------------------- */
+
+void global2local_vector(const double *v, const double *quat, double *local_v){
+
+    double qc[4];
+    MathExtra::qconjugate(const_cast<double*>(quat), qc);
+    MathExtra::quatrotvec(qc, const_cast<double*>(v), local_v);
+
+};
+
+
+void global2local_matrix(const double m[3][3], const double *quat, double local_m[3][3]){
+    double rot[3][3], temp[3][3];
+    MathExtra::quat_to_mat(quat, rot);
+    MathExtra::transpose_times3(rot, m, temp);
+    MathExtra::times3(temp, rot, local_m);
+}
+
+/* ----------------------------------------------------------------------
+   shape function computations for superellipsoids
+------------------------------------------------------------------------- */
+
+void shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f){
+  const double n1 = block[0], n2 = block[1];
+  
+  local_f = pow( pow(abs(point[0]/shape[0]), n2) + pow(abs(point[1]/shape[1]), n2) , n1/ n2) + pow(abs(point[2]/shape[2]), n1)  - 1.0;
+};
+
+void shape_function_global(const double *shape, const double *block, const double *quat, const double *point, double global_f){
+  double local_point[3];
+  global2local_vector(const_cast<double*>(point), const_cast<double*>(quat), local_point);
+  shape_function_local(shape, block, quat, local_point, global_f);
+};
+
+void shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad){
+  // point is in local coordinates
+  const double n1 = block[0], n2 = block[1];
+  const double ainv = 1.0 / shape[0];
+  const double binv = 1.0 / shape[1];
+  const double cinv = 1.0 / shape[2];
+
+  const double nu = pow(abs(point[0] * ainv), n2) + pow(abs(point[1] * binv), n2);
+  const double nu_12 = pow(nu, n1 / n2 - 1.0);
+
+  local_grad[0] = n1*ainv * pow(abs(point[0] * ainv), n2 - 1.0) * nu_12 * copysign(1.0, point[0]);
+  local_grad[1] = n1*binv * pow(abs(point[1] * binv), n2 - 1.0) * nu_12 * copysign(1.0, point[1]);
+  local_grad[2] = n1*cinv * pow(abs(point[2] * cinv), n1 - 1.0) * copysign(1.0, point[2]);
+
+};
+
+void shape_function_local_hessian(
+  const double *shape, const double *block, const double *quat, const double *point, double local_hess[3][3]) {
+  const double n1 = block[0], n2 = block[1];
+  const double ainv = 1.0 / shape[0];
+  const double binv = 1.0 / shape[1];
+  const double cinv = 1.0 / shape[2];
+
+  const double nu = pow(abs(point[0] * ainv), n2) + pow(abs(point[1] * binv), n2);
+  const double nu_12_1 = pow(nu, n1 / n2 - 1.0);
+  const double nu_12_2 = pow(nu, n1 / n2 - 2.0);
+
+  local_hess[0][2] = local_hess[2][0] = local_hess[1][2] = local_hess[2][1] =0;
+
+  local_hess[0][0] = n1 * (n2 - 1) * ainv * ainv * pow(abs(point[0] * ainv), n2 - 2.0)* nu_12_1 +
+                     n1 * (n1 - n2) * ainv * ainv * pow(abs(point[0] * ainv), 2*n2 - 2.0)* nu_12_2;
+
+  local_hess[1][1] = n1 * (n2 - 1) * binv * binv * pow(abs(point[1] * binv), n2 - 2.0)* nu_12_1 +
+                     n1 * (n1 - n2) * ainv * ainv * pow(abs(point[1] * binv), 2*n2 - 2.0)* nu_12_2;
+
+  local_hess[2][2] = n1 * (n1 - 1) * cinv * cinv * pow(abs(point[2] * cinv), n1-2);
+
+  local_hess[0][1] = n1 * (n1 - n2) * ainv * binv * pow(abs(point[0]*ainv), n2 - 1) *
+                     pow(abs(point[1]*binv), n2 -1) * pow(nu, n1 / n2 - 2) * copysign(1.0, shape[0] * shape[1]); 
+                
+  }
+
+
+}
+
+
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
new file mode 100644
index 00000000000..c1d28604cd7
--- /dev/null
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -0,0 +1,89 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+    Contributing author: Jacopo Bilotto (EPFL), Jibril Coulibaly (??)
+------------------------------------------------------------------------- */
+
+#ifndef LMP_MATH_EXTRA_SUPERELLIPOIDS_H
+#define LMP_MATH_EXTRA_SUPERELLIPOIDS_H
+
+#include "math_extra.h"
+
+namespace MathExtraSuperellipsoids {
+  double beta_func(double a, double b);
+  void volume_superellipsoid(const double *blockiness, const double *shape, double volume); // duplicated from math_extra might remove
+  void inertia_superellipsoid(const double *shape, const double *blockiness, double density, double *inertia); // duplicated from math_extra might remove
+
+  // needed for shape functions grad and matrix 
+  void local2global_vector(const double v[3], const double *quat, double global_v[3]);
+  void global2local_vector(const double v[3], const double *quat, double local_v[3]);
+  void local2global_matrix(const double m[3][3], const double *quat, double global_m[3][3]);
+  void global2local_matrix(const double m[3][3], const double *quat, double local_m[3][3]);
+
+  // shape function computations
+  void shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f);
+  void shape_function_global(const double *shape, const double *block, const double *quat, const double *point, double global_f);
+  void shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad);
+  void shape_function_local_hessian(const double *shape, const double *block, const double *quat, const double *point, double local_hessian[3][3]);
+
+  inline double det4_M44_zero(const double m[4][4]);
+
+
+  // ADD CONTACT DETECTION HERE
+
+};
+
+
+/* ----------------------------------------------------------------------
+   determinant of a 4x4 matrix M with M[3][3] assumed to be zero
+------------------------------------------------------------------------- */
+inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
+{
+    // Define the 3x3 submatrices (M_41, M_42, M_43)
+
+    // Submatrix M_41 
+    double m41[3][3] = {
+        {m[0][1], m[0][2], m[0][3]},
+        {m[1][1], m[1][2], m[1][3]},
+        {m[2][1], m[2][2], m[2][3]}
+    };
+
+    // Submatrix M_42 
+    double m42[3][3] = {
+        {m[0][0], m[0][2], m[0][3]},
+        {m[1][0], m[1][2], m[1][3]},
+        {m[2][0], m[2][2], m[2][3]}
+    };
+
+    // Submatrix M_43
+    double m43[3][3] = {
+        {m[0][0], m[0][1], m[0][3]},
+        {m[1][0], m[1][1], m[1][3]},
+        {m[2][0], m[2][1], m[2][3]}
+    };
+    
+    // Calculate the determinant using the simplified Laplace expansion (M_44=0)
+    // det(M) = -M[3][0]*det(M_41) + M[3][1]*det(M_42) - M[3][2]*det(M_43)
+    
+    double ans = -m[3][0] * MathExtra::det3(m41) 
+                 + m[3][1] * MathExtra::det3(m42) 
+                 - m[3][2] * MathExtra::det3(m43);
+                 
+    return ans;
+}
+
+
+
+
+#endif
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 6d4a78468ab..87379a51869 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -15,6 +15,7 @@
 #include "pair_gran_hooke_history_ellipsoid.h"
 
 #include "atom.h"
+#include "atom_vec_ellipsoid.h"
 #include "comm.h"
 #include "error.h"
 #include "fix.h"
@@ -27,6 +28,7 @@
 #include "neighbor.h"
 #include "update.h"
 #include "math_extra.h" // probably needed for some computations
+#include "math_extra_superellipsoids.h"
 
 #include <cmath>
 #include <cstring>
@@ -62,7 +64,7 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
   centroidstressflag = CENTROID_NOTAVAIL;
   finitecutflag = 1;
   history = 1;
-  size_history = 7;  // shear[3], prevevious_cp[3], pair_was_in_contact_flag
+  size_history = 6;  // shear[3], previous_cp[3]
 
   single_extra = 10;
   svector = new double[10];
@@ -166,7 +168,11 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double shrmag, rsht;
   int *ilist, *jlist, *numneigh, **firstneigh;
   int *touch, **firsttouch;
-  double *shear, *allshear, **firstshear;
+  double *shear, *allshear, **firstshear, *prev_cp; // added previous contact point placeholder
+
+  double shapex, shapey, shapez; // ellipsoid shape params
+  double quat1, quat2, quat3, quat4;
+  double block1, block2;
 
   ev_init(eflag, vflag);
 
@@ -202,6 +208,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double **torque = atom->torque;
   double *radius = atom->radius;
   double *rmass = atom->rmass;
+
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
@@ -222,6 +229,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
     ytmp = x[i][1];
     ztmp = x[i][2];
     radi = radius[i];
+
+
     touch = firsttouch[i];
     allshear = firstshear[i];
     jlist = firstneigh[i];
@@ -234,6 +243,27 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
       if (factor_lj == 0) continue;
 
+      // if intersected at previous point in time, no need to check bounding sphere
+      if (touch[jj] == 1) continue;
+      else {
+        // check intersection of bounding spheres (radius stores bounding sphere for ellipsoids)
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+        radj = radius[j];
+        radsum = radi + radj; 
+        if (rsq >= radsum * radsum) 
+        {
+          touch[jj] = 0;
+          shear = &allshear[3 * jj];
+          shear[0] = 0.0;
+          shear[1] = 0.0;
+          shear[2] = 0.0;
+        }
+        else touch[jj] = 1; 
+      }
+
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
@@ -885,97 +915,3 @@ double PairGranHookeHistoryEllipsoid::radii2cut(double r1, double r2)
   return cut;
 }
 
-
-/* ----------------------------------------------------------------------
-   express local (particle level) to global (system level) coordinates
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::local2global_vector(const double v[3], const double *quat, double global_v[3]){
-
-   MathExtra::quatrotvec(const_cast<double*>(quat) , const_cast<double*>(v), global_v);
-};
-
-void PairGranHookeHistoryEllipsoid::local2global_matrix(const double m[3][3], const double *quat, double global_m[3][3]){
-    double rot[3][3],  temp[3][3];
-    MathExtra::quat_to_mat(const_cast<double*>(quat), rot);
-    MathExtra::times3(rot, m, temp);
-    MathExtra::transpose_times3(rot, temp, global_m);
-};
-
-  
-/* ----------------------------------------------------------------------
-   express global (system level) to local (particle level) coordinates
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::global2local_vector(const double *v, const double *quat, double *local_v){
-
-    double qc[4];
-    MathExtra::qconjugate(const_cast<double*>(quat), qc);
-    MathExtra::quatrotvec(qc, const_cast<double*>(v), local_v);
-
-};
-
-
-void PairGranHookeHistoryEllipsoid::global2local_matrix(const double m[3][3], const double *quat, double local_m[3][3]){
-    double rot[3][3], temp[3][3];
-    MathExtra::quat_to_mat(quat, rot);
-    MathExtra::transpose_times3(rot, m, temp);
-    MathExtra::times3(temp, rot, local_m);
-}
-
-/* ----------------------------------------------------------------------
-   shape function computations for superellipsoids
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f){
-  const double n1 = block[0], n2 = block[1];
-  
-  local_f = pow( pow(abs(point[0]/shape[0]), n2) + pow(abs(point[1]/shape[1]), n2) , n1/ n2) + pow(abs(point[2]/shape[2]), n1)  - 1.0;
-};
-
-void PairGranHookeHistoryEllipsoid::shape_function_global(const double *shape, const double *block, const double *quat, const double *point, double global_f){
-  double local_point[3];
-  global2local_vector(const_cast<double*>(point), const_cast<double*>(quat), local_point);
-  shape_function_local(shape, block, quat, local_point, global_f);
-};
-
-void PairGranHookeHistoryEllipsoid::shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad){
-  const double n1 = block[0], n2 = block[1];
-  const double ainv = 1.0 / shape[0];
-  const double binv = 1.0 / shape[1];
-  const double cinv = 1.0 / shape[2];
-
-  const double nu = pow(abs(point[0] * ainv), n2) + pow(abs(point[1] * binv), n2);
-  const double nu_12 = pow(nu, n1 / n2 - 1.0);
-
-  local_grad[0] = n1*ainv * pow(abs(point[0] * ainv), n2 - 1.0) * nu_12 * copysign(1.0, point[0]);
-  local_grad[1] = n1*binv * pow(abs(point[1] * binv), n2 - 1.0) * nu_12 * copysign(1.0, point[1]);
-  local_grad[2] = n1*cinv * pow(abs(point[2] * cinv), n1 - 1.0) * copysign(1.0, point[2]);
-
-};
-
-void PairGranHookeHistoryEllipsoid::shape_function_local_hessian(
-  const double *shape, const double *block, const double *quat, const double *point, double local_hess[3][3]) {
-  const double n1 = block[0], n2 = block[1];
-  const double ainv = 1.0 / shape[0];
-  const double binv = 1.0 / shape[1];
-  const double cinv = 1.0 / shape[2];
-
-  const double nu = pow(abs(point[0] * ainv), n2) + pow(abs(point[1] * binv), n2);
-  const double nu_12_1 = pow(nu, n1 / n2 - 1.0);
-  const double nu_12_2 = pow(nu, n1 / n2 - 2.0);
-
-  local_hess[0][2] = local_hess[2][0] = local_hess[1][2] = local_hess[2][1] =0;
-
-  local_hess[0][0] = n1 * (n2 - 1) * ainv * ainv * pow(abs(point[0] * ainv), n2 - 2.0)* nu_12_1 +
-                     n1 * (n1 - n2) * ainv * ainv * pow(abs(point[0] * ainv), 2*n2 - 2.0)* nu_12_2;
-
-  local_hess[1][1] = n1 * (n2 - 1) * binv * binv * pow(abs(point[1] * binv), n2 - 2.0)* nu_12_1 +
-                     n1 * (n1 - n2) * ainv * ainv * pow(abs(point[1] * binv), 2*n2 - 2.0)* nu_12_2;
-
-  local_hess[2][2] = n1 * (n1 - 1) * cinv * cinv * pow(abs(point[2] * cinv), n1-2);
-
-  local_hess[0][1] = n1 * (n1 - n2) * ainv * binv * pow(abs(point[0]*ainv), n2 - 1) *
-                     pow(abs(point[1]*binv), n2 -1) * pow(nu, n1 / n2 - 2) * copysign(1.0, shape[0] * shape[1]); 
-                
-  }
\ No newline at end of file
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index e4dff486311..b6a79cf7ee8 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -45,18 +45,6 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   double atom2cut(int) override;
   double radii2cut(double, double) override;
 
-  // needed for shape functions grad and matrix 
-  void local2global_vector(const double v[3], const double *quat, double global_v[3]);
-  void global2local_vector(const double v[3], const double *quat, double local_v[3]);
-  void local2global_matrix(const double m[3][3], const double *quat, double global_m[3][3]);
-  void global2local_matrix(const double m[3][3], const double *quat, double local_m[3][3]);
-
-  // shape function computations
-  void shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f);
-  void shape_function_global(const double *shape, const double *block, const double *quat, const double *point, double global_f);
-  void shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad);
-  void shape_function_local_hessian(const double *shape, const double *block, const double *quat, const double *point, double local_hessian[3][3]);
-
  protected:
   double kn, kt, gamman, gammat, xmu;
   int dampflag;

From e5f8baeaa8929cdbe1b94ad236ab5c0101984a7e Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 14 Nov 2025 15:32:21 -0600
Subject: [PATCH 025/174] fix flag from omega to angmom. remove test LAPACK
 code

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 54 +------------------
 1 file changed, 2 insertions(+), 52 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 87379a51869..84cd83d069a 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -35,25 +35,12 @@
 
 using namespace LAMMPS_NS;
 
-// TODO: This is temporary to check if it LAPACK / linalg works
-//       Pick the ones we end up using and clean that up
-//    LAPACK doc: https://netlib.org/lapack/lug/node38.html
-// WARNING: FORTRAN uses pass by reference semantics so must use pointer arguments in C++
 
 extern "C" { // General Matrices
     void dgetrf_(const int *m, const int *n, double *a, const int *lda, int *ipiv, int *info); // Factorize
     void dgetrs_(const char *trans, const int *n, const int *nrhs, double *a, const int *lda, int *ipiv, double *b, const int *ldb, int *info); // Solve (using factorzation)
 }
 
-extern "C" { // Symmetric positive definite (regular storage, i.e., not packed)
-    void dpotrf_(const char *uplo, const int *n, double *a, const int *lda, int *info); // Factorize
-    void dpotrs_(const char *uplo, const int *n, const int *nrhs, double *a, const int *lda, double *b, const int *ldb, int *info); // Solve (using factorization)
-}
-
-extern "C" { // Symmetric indefinite (regular storage, i.e., not packed)
-    void dsytrf_(const char *uplo, const int *n, double *a, const int *lda, int *ipiv, double *work, const int *lwork, int *info); // Factorize
-    void dsytrs_(const char *uplo, const int *n, const int *nrhs, double *a, const int *lda, int *ipiv, double *b, const int *ldb, int *info); // Solve (using factorization)
-}
 
 /* ---------------------------------------------------------------------- */
 
@@ -88,43 +75,6 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
   fix_history = nullptr;
   fix_dummy = dynamic_cast<FixDummy *>(
       modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
-
-  // TEMP TEST HERE IN THE CONSTRUCTOR FOR AVAILABILITY AND FUNCTIONALITY OF LAPACK FUNCTIONS
-  // WARNING: 1D column-major matrix for LAPACK compatibility
-  static constexpr int n = 4;
-  // General: (solution = {-1, 1, 2, 0})
-  double A[n][n] = {{2 , -9, 9 , -1},
-                    {-4, -8, -8, -5},
-                    {6 , -2, -1, -2},
-                    {8 , -6, -2, -2}};
-  double rhs[n] = {7, -20, -10, -18};
-  double A_LAPACK[n * n];
-  for (int i = 0 ; i < n ; i++){
-    for (int j = 0 ; j < n ; j++){
-      A_LAPACK[i + j*n] = A[i][j];
-    }
-  }
-  int lapack_error;
-  int ipiv[n*n];
-  const char trans = 'N';
-  const int nrhs = 1;
-
-  dgetrf_(&n, &n, A_LAPACK, &n, ipiv, &lapack_error); // Factorize
-  if (lapack_error) {
-    error->all(FLERR, "LAPACK factorization error in ellipsoid code, info = {} ", lapack_error);
-  }
-  utils::logmesg(lmp," rhs before solve = ({}, {}, {}, {})\n", rhs[0], rhs[1], rhs[2], rhs[3]);
-  dgetrs_(&trans, &n, &nrhs, A_LAPACK, &n, ipiv, rhs, &n, &lapack_error); // Solve (using factorzation)
-  if (lapack_error) {
-    error->all(FLERR, "LAPACK solve error in ellipsoid code, info = {} ", lapack_error);
-  }
-  // Output results
-  utils::logmesg(lmp," LAPACK RESULTS: \n");
-  utils::logmesg(lmp," Expected vector = (-1, 1, 2, 0)\n");
-  utils::logmesg(lmp," rhs after solve = ({}, {}, {}, {})\n", rhs[0], rhs[1], rhs[2], rhs[3]);
-
-
-  
 }
 
 /* ---------------------------------------------------------------------- */
@@ -520,8 +470,8 @@ void PairGranHookeHistoryEllipsoid::init_style()
 
   // error and warning checks
 
-  if (!atom->radius_flag || !atom->rmass_flag || !atom->omega_flag || !atom->ellipsoid_flag)
-    error->all(FLERR, "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, omega and ellipdoid flag");
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->ellipsoid_flag)
+    error->all(FLERR, "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and ellipdoid flag");
   if (comm->ghost_velocity == 0)
     error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
 

From 835d6f88432eafbe0b67cfc35a08cb2e6e206599 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 14 Nov 2025 15:52:42 -0600
Subject: [PATCH 026/174] WIP: implement basics of Newton method for contact
 point calculation

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 281 ++++++++++++++++--
 .../pair_gran_hooke_history_ellipsoid.h       |   9 +
 2 files changed, 268 insertions(+), 22 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 84cd83d069a..fcde82aca86 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -35,6 +35,12 @@
 
 using namespace LAMMPS_NS;
 
+static constexpr int ITERMAX_NEWTON = 100;
+static constexpr double CONVERGENCE_NEWTON = 1e-6;
+static constexpr int ITERMAX_LINESEARCH = 10;
+static constexpr double PARAMETER_LINESEARCH = 1e-4;
+static constexpr double CUTBACK_LINESEARCH = 0.5;
+static constexpr int NUMSTEP_INITIAL_GUESS = 8;
 
 extern "C" { // General Matrices
     void dgetrf_(const int *m, const int *n, double *a, const int *lda, int *ipiv, int *info); // Factorize
@@ -124,6 +130,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double quat1, quat2, quat3, quat4;
   double block1, block2;
 
+  double X0[4], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+
   ev_init(eflag, vflag);
 
   int shearupdate = 1;
@@ -163,6 +171,9 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
   double *special_lj = force->special_lj;
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
 
   inum = list->inum;
   ilist = list->ilist;
@@ -193,27 +204,6 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
       if (factor_lj == 0) continue;
 
-      // if intersected at previous point in time, no need to check bounding sphere
-      if (touch[jj] == 1) continue;
-      else {
-        // check intersection of bounding spheres (radius stores bounding sphere for ellipsoids)
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx * delx + dely * dely + delz * delz;
-        radj = radius[j];
-        radsum = radi + radj; 
-        if (rsq >= radsum * radsum) 
-        {
-          touch[jj] = 0;
-          shear = &allshear[3 * jj];
-          shear[0] = 0.0;
-          shear[1] = 0.0;
-          shear[2] = 0.0;
-        }
-        else touch[jj] = 1; 
-      }
-
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
@@ -221,8 +211,59 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
       radj = radius[j];
       radsum = radi + radj;
 
+      bool touching;
       if (rsq >= radsum * radsum) {
+        touching = false;
+      // TODO: consider implementing a bounding-box check for hierchical detection
+      //       Could be useful for high aspect ratio grain.
+      //       Maybe make it an option, since it could be slower for low aspect ratio grains
+      } else {
+        // Super-ellipsoid contact detection between atoms i and j
+        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+        // TODO: Not sure if j is accessible if ghost, radius is, so bonus props must have been communicated on ghost atoms I think
+        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+
+        if (touch[jj] == 1) {
+          // Continued contact: use grain true shape and last contact point
+          MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+          MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+          MathExtra::copy3(bonus[ellipsoid[j]].block, blocki);
+          MathExtra::copy3(bonus[ellipsoid[j]].block, shapej);
+          // TODO: implement neigh history!
+          // TODO: move contact point with rigid body motion of the pair ?
+          //       not sure if enough information to do that
+          MathExtra::copy3(prev_cp, X0);
+          int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0);
+          if (status == 0)
+            touching = true;
+          else if(status == 5)
+            touching = false;
+          else
+            error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+        } else {
+          // New contact: Build initial guess incrementally
+          MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+          X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
+          for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+            double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+            double shapei[3] = {1.0, 1.0, 1.0};
+            double shapej[3] = {1.0, 1.0, 1.0};
+            MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+            MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+            double blocki[2] = {2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0), 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0)};
+            double blockj[2] = {2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0), 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0)};
+            int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0);
+            if (status == 0)
+              touching = true;
+            else if(status == 5)
+              touching = false;
+            else
+              error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+          }
+        }
+      }
 
+      if (!touching) {
         // unset non-touching neighbors
 
         touch[jj] = 0;
@@ -230,8 +271,12 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         shear[0] = 0.0;
         shear[1] = 0.0;
         shear[2] = 0.0;
-
       } else {
+        // TODO: Compute the force between the 2 superquadrics
+        MathExtra::copy3(X0, prev_cp);
+
+        // TODO: Everything below must be changed
+
         r = sqrt(rsq);
         rinv = 1.0 / r;
         rsqinv = 1.0 / rsq;
@@ -865,3 +910,195 @@ double PairGranHookeHistoryEllipsoid::radii2cut(double r1, double r2)
   return cut;
 }
 
+
+// High performance versions
+// TODO: this creates a fair bit of code duplication
+//       but avoids recomputing some of the expensive pow(), etc that would come with creating 3 functions:
+//       compute_shape(), compute_gradient, compute_jacobian.
+//       not sure how to best do this without creating many small help functions
+//       Pushing that logic, the calculation of a_inv, etc is not necessary. could define shapeinv
+void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]) {
+  double a_inv = 1.0 / shape[0];
+  double b_inv = 1.0 / shape[1];
+  double c_inv = 1.0 / shape[2];
+  double x_a = xlocal[0] * a_inv;
+  double y_b = xlocal[1] * b_inv;
+  double z_c = xlocal[2] * c_inv;
+  double n1 = block[0];
+  double n2 = block[1];
+  // Consider simplifying with flag_super
+  double x_a_pow_n2_m2 = std::pow(std::abs(x_a), n2 - 2.0);
+  double x_a_pow_n2_m1 = x_a_pow_n2_m2 * x_a;
+  double y_b_pow_n2_m2 = std::pow(std::abs(y_b), n2 - 2.0);
+  double y_b_pow_n2_m1 = y_b_pow_n2_m2 * y_b;
+
+  double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
+  double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0);
+  double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m1 * nu;
+
+  double z_c_pow_n1_m2 = std::pow(std::abs(z_c), n1 -2.0);
+
+  // Equation (14)
+  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
+  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
+  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
+  grad[0] = n1 * a_inv * x_a_pow_n2_m1 * nu_pow_n1_n2_m1 * signx;
+  grad[1] = n1 * b_inv * y_b_pow_n2_m1 * nu_pow_n1_n2_m1 * signy;
+  grad[2] = n1 * c_inv * (z_c_pow_n1_m2 * z_c) * signz;
+
+  // Equation (15)
+  double signxy = signx * signy;
+  hess[0][0] = a_inv * a_inv * (n1 * (n2 - 1.0) * x_a_pow_n2_m2 * nu_pow_n1_n2_m1 +
+                                (n1 - n2) * n1 * (x_a_pow_n2_m1 * x_a_pow_n2_m1) * nu_pow_n1_n2_m2);
+  hess[1][1] = b_inv * b_inv * (n1 * (n2 - 1.0) * y_b_pow_n2_m2 * nu_pow_n1_n2_m1 +
+                                (n1 - n2) * n1 * (y_b_pow_n2_m1 * y_b_pow_n2_m1) * nu_pow_n1_n2_m2);
+  hess[0][1] = hess[1][0] = a_inv * b_inv * (n1 - n2) * n1 * x_a_pow_n2_m1 * y_b_pow_n2_m1 * nu_pow_n1_n2_m2 * signxy;
+  hess[2][2] = c_inv * c_inv * n1 * (n1 - 1.0) * z_c_pow_n1_m2;
+  hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+}
+
+// High performance version
+double PairGranHookeHistoryEllipsoid::shape_and_gradient_local(const double* xlocal, const double* shape, const double* block, double* grad) {
+  double a_inv = 1.0 / shape[0];
+  double b_inv = 1.0 / shape[1];
+  double c_inv = 1.0 / shape[2];
+  double x_a = xlocal[0] * a_inv;
+  double y_b = xlocal[1] * b_inv;
+  double z_c = xlocal[2] * c_inv;
+  double n1 = block[0];
+  double n2 = block[1];
+  // Consider simplifying with flag_super
+  double x_a_pow_n2_m1 = std::pow(std::abs(x_a), n2 - 1.0);
+  double y_b_pow_n2_m1 = std::pow(std::abs(y_b), n2 - 1.0);
+
+  double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
+  double nu_pow_n1_n2_m1 = std::pow(nu, n1/n2 - 1.0);
+
+  double z_c_pow_n1_m1 = std::pow(std::abs(z_c), n1 - 1.0);
+
+  // Equation (14)
+  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
+  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
+  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
+  grad[0] = n1 * a_inv * x_a_pow_n2_m1 * nu_pow_n1_n2_m1 * signx;
+  grad[1] = n1 * b_inv * y_b_pow_n2_m1 * nu_pow_n1_n2_m1 * signy;
+  grad[2] = n1 * c_inv * z_c_pow_n1_m1 * signz;
+
+  return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
+}
+
+double PairGranHookeHistoryEllipsoid::compute_residual(double* xci, double Ri[3][3], double* shapei, double* blocki,
+                                                       double* xcj, double Rj[3][3], double* shapej, double* blockj,
+                                                       double* X, double* shapefunc, double* residual) {
+  double tmp[3];
+  double xi_local[3], xj_local[3];
+  double gradi[3], gradj[3];
+
+  MathExtra::sub3(X, xci, tmp);
+  MathExtra::transpose_matvec(Ri, tmp, xi_local);
+  shapefunc[0] = shape_and_gradient_local(xi_local, shapei, blocki, tmp);
+  MathExtra::matvec(Ri, tmp, gradi);
+
+  MathExtra::sub3(X, xcj, tmp);
+  MathExtra::transpose_matvec(Rj, tmp, xj_local);
+  shapefunc[1] = shape_and_gradient_local(xj_local, shapej, blockj, tmp);
+  MathExtra::matvec(Rj, tmp, gradj);
+
+  // Equation (23)
+  MathExtra::scaleadd3(X[3], gradj, gradi, residual);
+  residual[3] = shapefunc[0] - shapefunc[1];
+  return residual[0]*residual[0] + residual[1]*residual[1] + residual[2]*residual[2] + residual[3]*residual[3];
+}
+
+void PairGranHookeHistoryEllipsoid::compute_jacobian(double* xci, double Ri[3][3], double* shapei, double* blocki,
+                                                     double* xcj, double Rj[3][3], double* shapej, double* blockj,
+                                                     double* X, double* jacobian) {
+  double tmp_v[3], tmp_m[3][3];
+  double xi_local[3], xj_local[3];
+  double gradi[3], hessi[3][3], gradj[3], hessj[3][3];
+
+  MathExtra::sub3(X, xci, tmp_v);
+  MathExtra::transpose_matvec(Ri, tmp_v, xi_local);
+  derivatives_local(xi_local, shapei, blocki, tmp_v, hessi);
+  MathExtra::matvec(Ri, tmp_v, gradi);
+  MathExtra::times3_transpose(hessi, Ri, tmp_m);
+  MathExtra::times3(Ri, tmp_m, hessi);
+
+  MathExtra::sub3(X, xcj, tmp_v);
+  MathExtra::transpose_matvec(Rj, tmp_v, xj_local);
+  derivatives_local(xj_local, shapej, blockj, tmp_v, hessj);
+  MathExtra::matvec(Rj, tmp_v, gradj);
+  MathExtra::times3_transpose(hessj, Rj, tmp_m);
+  MathExtra::times3(Rj, tmp_m, hessj);
+
+  // Jacobian (derivative of residual)
+  // 1D column-major matrix for LAPACK/linalg compatibility
+  for (int row = 0 ; row < 3 ; row++) {
+    for (int col = 0 ; col < 3 ; col++) {
+      jacobian[row + col*4] = hessi[row][col] + X[3] * hessj[row][col];
+    }
+    jacobian[row + 3*4] = gradj[row];
+  }
+  for (int col = 0 ; col < 3 ; col++) {
+    jacobian[3 + col*4] = gradi[col] - gradj[col];
+  }
+  jacobian[15] = 0.0;
+}
+
+
+int PairGranHookeHistoryEllipsoid::determine_contact_point(double* xci, double Ri[3][3], double* shapei, double* blocki,
+                                                            double* xcj, double Rj[3][3], double* shapej, double* blockj,
+                                                            double* X0) {
+  double norm, norm_ini, shapefunc[2], residual[4], jacobian[16];
+  bool converged(false);
+  norm = compute_residual(xci, Ri, shapei, blocki, xcj, Rj, shapej, blockj, X0, shapefunc, residual);
+  for (int iter = 0 ; iter < ITERMAX_NEWTON ; iter++) {
+    norm_ini = norm;
+    compute_jacobian(xci, Ri, shapei, blocki, xcj, Rj, shapej, blockj, X0, jacobian);
+
+    // Solve Newton step
+    int lapack_error, ipiv[16];
+    const int n = 4;
+    const char trans = 'N';
+    const int nrhs = 1;
+    double rhs[4] = {-residual[0], -residual[1], -residual[2], -residual[3]};
+    dgetrf_(&n, &n, jacobian, &n, ipiv, &lapack_error);
+    if (lapack_error)
+      return lapack_error;
+    dgetrs_(&trans, &n, &nrhs, jacobian, &n, ipiv, rhs, &n, &lapack_error);
+    if (lapack_error)
+      return lapack_error;
+
+    // Backtracking line search
+    double a(1.0), X_line[4];
+    for (int iter_ls = 0 ; iter_ls < ITERMAX_LINESEARCH ; iter_ls++) {
+      X_line[0] = X0[0] + a * rhs[0];
+      X_line[1] = X0[1] + a * rhs[1];
+      X_line[2] = X0[2] + a * rhs[2];
+      X_line[3] = X0[3] + a * rhs[3];
+
+      norm = compute_residual(xci, Ri, shapei, blocki, xcj, Rj, shapej, blockj, X_line, shapefunc, residual);
+      if (norm < norm_ini - PARAMETER_LINESEARCH * a * norm_ini)
+        break; // Armijo - Goldstein condition
+      else
+        a *= CUTBACK_LINESEARCH;
+    }
+    X0[0] = X_line[0];
+    X0[1] = X_line[1];
+    X0[2] = X_line[2];
+    X0[3] = X_line[3];
+
+    if (norm < CONVERGENCE_NEWTON) {
+      converged = true;
+      break;
+    }
+  }
+
+  // LAPACK error are within [-4, 4], use 5 non-touching, -5 for non-converging
+  if (!converged)
+    return -5;
+  if (shapefunc[0] <= 0.0 && shapefunc[1] <= 0.0)
+    return 5;
+
+  return 0;
+}
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index b6a79cf7ee8..73cba82c1a2 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -69,6 +69,15 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   int nmax;                // allocated size of mass_rigid
 
   void allocate();
+
+ private:
+  static void derivatives_local(const double*, const double*, const double*, double*, double[3][3]); // High performance version
+  static double shape_and_gradient_local(const double*, const double*, const double*, double*); // High performance version
+  static double compute_residual(double*, double[3][3], double*, double*, double*, double[3][3], double*, double*, double*, double*, double*);
+  static void compute_jacobian(double*, double[3][3], double*, double*, double*, double[3][3], double*, double*, double*, double*);
+  static int determine_contact_point(double*, double[3][3], double*, double*, double*, double[3][3], double*, double*, double*);
+
+
 };
 
 }    // namespace LAMMPS_NS

From 3b3775cf68b851ece79fdabfc65595b2f2eb9aec Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 14 Nov 2025 15:58:14 -0600
Subject: [PATCH 027/174] fix copy paste bugs

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index fcde82aca86..6c123cf46cd 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -227,8 +227,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           // Continued contact: use grain true shape and last contact point
           MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
           MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-          MathExtra::copy3(bonus[ellipsoid[j]].block, blocki);
-          MathExtra::copy3(bonus[ellipsoid[j]].block, shapej);
+          MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+          MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
           // TODO: implement neigh history!
           // TODO: move contact point with rigid body motion of the pair ?
           //       not sure if enough information to do that

From a8a65680ef889e13802eccca56a3f8ea3a819277 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 14 Nov 2025 16:13:07 -0600
Subject: [PATCH 028/174] fix more small bugs: initialize Lagrange multiplier,
 and inverse logic for detection

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 6c123cf46cd..1efe945d953 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -233,6 +233,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           // TODO: move contact point with rigid body motion of the pair ?
           //       not sure if enough information to do that
           MathExtra::copy3(prev_cp, X0);
+          X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
           int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0);
           if (status == 0)
             touching = true;
@@ -243,8 +244,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         } else {
           // New contact: Build initial guess incrementally
           MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-          X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
           for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+            X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
             double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
             double shapei[3] = {1.0, 1.0, 1.0};
             double shapej[3] = {1.0, 1.0, 1.0};
@@ -1094,10 +1095,10 @@ int PairGranHookeHistoryEllipsoid::determine_contact_point(double* xci, double R
     }
   }
 
-  // LAPACK error are within [-4, 4], use 5 non-touching, -5 for non-converging
+  // LAPACK error are within [-4, 4], use 5 non-touching, -5 non-converging
   if (!converged)
     return -5;
-  if (shapefunc[0] <= 0.0 && shapefunc[1] <= 0.0)
+  if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0)
     return 5;
 
   return 0;

From 2cc6c1f09c7ece43d7f47ea0a2f0c4e72aa329d7 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 14 Nov 2025 18:12:03 -0600
Subject: [PATCH 029/174] remove redefinition of shape and block to avoid
 masking. Only update the block info if it is not an ellipsoid

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 1efe945d953..1237e8b56e7 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -247,12 +247,18 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
             X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
             double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-            double shapei[3] = {1.0, 1.0, 1.0};
-            double shapej[3] = {1.0, 1.0, 1.0};
+            shapei[0] = shapei[1] = shapei[2] = 1.0;
+            shapej[0] = shapej[1] = shapej[2] = 1.0;
             MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
             MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-            double blocki[2] = {2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0), 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0)};
-            double blockj[2] = {2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0), 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0)};
+            if (bonus[ellipsoid[i]].flag_super) { // not a big time save
+              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].blocki[0] - 2.0);
+              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].blocki[1] - 2.0);
+            }
+            if (bonus[ellipsoid[j]].flag_super) {
+              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].blocki[0] - 2.0);
+              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].blocki[1] - 2.0);
+            }
             int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0);
             if (status == 0)
               touching = true;

From aa0fecfc2ccaf9f7f079e2b49f95af1fd3cb614d Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 14 Nov 2025 18:17:51 -0600
Subject: [PATCH 030/174] fix non absolute values and abs vs fabs bugs in
 gradient and hessian

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 31 ++++++++++---------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 1237e8b56e7..095ccf15fd5 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -928,22 +928,21 @@ void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, cons
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
-  double x_a = xlocal[0] * a_inv;
-  double y_b = xlocal[1] * b_inv;
-  double z_c = xlocal[2] * c_inv;
+  double x_a = std::fabs(xlocal[0] * a_inv);
+  double y_b = std::fabs(xlocal[1] * b_inv);
+  double z_c = std::fabs(xlocal[2] * c_inv);
   double n1 = block[0];
   double n2 = block[1];
-  // Consider simplifying with flag_super
-  double x_a_pow_n2_m2 = std::pow(std::abs(x_a), n2 - 2.0);
+  double x_a_pow_n2_m2 = std::pow(x_a, n2 - 2.0);
   double x_a_pow_n2_m1 = x_a_pow_n2_m2 * x_a;
-  double y_b_pow_n2_m2 = std::pow(std::abs(y_b), n2 - 2.0);
+  double y_b_pow_n2_m2 = std::pow(y_b, n2 - 2.0);
   double y_b_pow_n2_m1 = y_b_pow_n2_m2 * y_b;
 
   double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
-  double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0);
+  double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0); // TODO: if n1=n2, this should be zero, not 1/nu. Guard against this by making multiple cases
   double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m1 * nu;
 
-  double z_c_pow_n1_m2 = std::pow(std::abs(z_c), n1 -2.0);
+  double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
 
   // Equation (14)
   double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
@@ -964,24 +963,26 @@ void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, cons
   hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
 }
 
+
+
 // High performance version
 double PairGranHookeHistoryEllipsoid::shape_and_gradient_local(const double* xlocal, const double* shape, const double* block, double* grad) {
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
-  double x_a = xlocal[0] * a_inv;
-  double y_b = xlocal[1] * b_inv;
-  double z_c = xlocal[2] * c_inv;
+  double x_a = std::fabs(xlocal[0] * a_inv);
+  double y_b = std::fabs(xlocal[1] * b_inv);
+  double z_c = std::fabs(xlocal[2] * c_inv);
   double n1 = block[0];
   double n2 = block[1];
   // Consider simplifying with flag_super
-  double x_a_pow_n2_m1 = std::pow(std::abs(x_a), n2 - 1.0);
-  double y_b_pow_n2_m1 = std::pow(std::abs(y_b), n2 - 1.0);
+  double x_a_pow_n2_m1 = std::pow(x_a, n2 - 1.0);
+  double y_b_pow_n2_m1 = std::pow(y_b, n2 - 1.0);
 
   double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
-  double nu_pow_n1_n2_m1 = std::pow(nu, n1/n2 - 1.0);
+  double nu_pow_n1_n2_m1 = std::pow(nu, n1/n2 - 1.0); // TODO: guard against n1 = n2
 
-  double z_c_pow_n1_m1 = std::pow(std::abs(z_c), n1 - 1.0);
+  double z_c_pow_n1_m1 = std::pow(z_c, n1 - 1.0);
 
   // Equation (14)
   double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;

From b3e3b827835888c58141cbe3c3489ec36094cdb7 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sat, 15 Nov 2025 16:52:53 -0600
Subject: [PATCH 031/174] fix typo

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 095ccf15fd5..62187f03356 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -252,12 +252,12 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
             MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
             MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
             if (bonus[ellipsoid[i]].flag_super) { // not a big time save
-              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].blocki[0] - 2.0);
-              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].blocki[1] - 2.0);
+              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
             }
             if (bonus[ellipsoid[j]].flag_super) {
-              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].blocki[0] - 2.0);
-              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].blocki[1] - 2.0);
+              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
             }
             int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0);
             if (status == 0)

From db12cddb1ce5d405d08a64ce65aa0405df78bcbf Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sat, 15 Nov 2025 16:56:32 -0600
Subject: [PATCH 032/174] create special case for shape function, gradients and
 hessian of ellipsoids, and equal exponent grains, for robustness and
 performance

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 201 ++++++++++++++----
 .../pair_gran_hooke_history_ellipsoid.h       |  19 +-
 2 files changed, 171 insertions(+), 49 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 62187f03356..ef494e8f071 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -131,6 +131,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double block1, block2;
 
   double X0[4], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  // TODO: Maybe we can make flag_super of the grain an int instead, to cimplify when n1 = n2 ?
+  int flagi, flagj; // 0 : ellipsoid, 1 : equal exponents n1=n2, 2: general super-ellipsoid n1 >2, n2>2, n1!=n2
 
   ev_init(eflag, vflag);
 
@@ -920,10 +922,8 @@ double PairGranHookeHistoryEllipsoid::radii2cut(double r1, double r2)
 
 // High performance versions
 // TODO: this creates a fair bit of code duplication
-//       but avoids recomputing some of the expensive pow(), etc that would come with creating 3 functions:
-//       compute_shape(), compute_gradient, compute_jacobian.
 //       not sure how to best do this without creating many small help functions
-//       Pushing that logic, the calculation of a_inv, etc is not necessary. could define shapeinv
+//       Pushing that logic, the calculation of a_inv, etc is not necessary. could define and store shapeinv
 void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]) {
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
@@ -939,7 +939,7 @@ void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, cons
   double y_b_pow_n2_m1 = y_b_pow_n2_m2 * y_b;
 
   double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
-  double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0); // TODO: if n1=n2, this should be zero, not 1/nu. Guard against this by making multiple cases
+  double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0);
   double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m1 * nu;
 
   double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
@@ -963,6 +963,72 @@ void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, cons
   hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
 }
 
+// Special case for n2 = n2 = n > 2
+void PairGranHookeHistoryEllipsoid::derivatives_local_equaln(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]) {
+  double a_inv = 1.0 / shape[0];
+  double b_inv = 1.0 / shape[1];
+  double c_inv = 1.0 / shape[2];
+  double x_a = std::fabs(xlocal[0] * a_inv);
+  double y_b = std::fabs(xlocal[1] * b_inv);
+  double z_c = std::fabs(xlocal[2] * c_inv);
+  double x_a_pow_n_m2 = std::pow(x_a, n - 2.0);
+  double y_b_pow_n_m2 = std::pow(y_b, n - 2.0);
+  double z_c_pow_n_m2 = std::pow(z_c, n - 2.0);
+
+  // Equation (14)
+  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
+  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
+  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
+  grad[0] = n * a_inv * (x_a_pow_n_m2 * x_a) * signx;
+  grad[1] = n * b_inv * (y_b_pow_n_m2 * y_b) * signy;
+  grad[2] = n * c_inv * (z_c_pow_n_m2 * z_c) * signz;
+
+  // Equation (15)
+  double signxy = signx * signy;
+  hess[0][0] = a_inv * a_inv * n * (n - 1.0) * x_a_pow_n_m2;
+  hess[1][1] = b_inv * b_inv * n * (n - 1.0) * y_b_pow_n_m2;
+  hess[2][2] = c_inv * c_inv * n * (n - 1.0) * z_c_pow_n_m2;
+  hess[0][1] = hess[1][0] = hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+}
+
+
+// Special case for n1 = n2 = 2
+void PairGranHookeHistoryEllipsoid::derivatives_local_ellips(const double* xlocal, const double* shape, double* grad, double hess[3][3]) {
+  double a = 2.0 / (shape[0] * shape[0]);
+  double b = 2.0 / (shape[1] * shape[1]);
+  double c = 2.0 / (shape[2] * shape[2]);
+  
+  // Equation (14) simplified for n1 = n2 = 2
+  grad[0] = a * xlocal[0];
+  grad[1] = b * xlocal[1];
+  grad[2] = c * xlocal[2];
+
+  // Equation (15)
+  hess[0][0] = a;
+  hess[1][1] = b;
+  hess[2][2] = c;
+  hess[0][1] = hess[1][0] = hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+}
+
+void PairGranHookeHistoryEllipsoid::derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
+  double xlocal[3], tmp_v[3], tmp_m[3][3];
+  MathExtra::sub3(X0, xc, tmp_v);
+  MathExtra::transpose_matvec(R, tmp_v, xlocal);
+  switch (flag) {
+    case 0:
+      derivatives_local_ellips(xlocal, shape, tmp_v, hess);
+      break;
+    case 1:
+      derivatives_local_equaln(xlocal, shape, block[0], tmp_v, hess);
+      break;
+    case 2:
+      derivatives_local(xlocal, shape, block, tmp_v, hess);
+      break;
+  }
+  MathExtra::matvec(R, tmp_v, grad);
+  MathExtra::times3_transpose(hess, R, tmp_m);
+  MathExtra::times3(R, tmp_m, hess);
+}
 
 
 // High performance version
@@ -975,12 +1041,12 @@ double PairGranHookeHistoryEllipsoid::shape_and_gradient_local(const double* xlo
   double z_c = std::fabs(xlocal[2] * c_inv);
   double n1 = block[0];
   double n2 = block[1];
-  // Consider simplifying with flag_super
+
   double x_a_pow_n2_m1 = std::pow(x_a, n2 - 1.0);
   double y_b_pow_n2_m1 = std::pow(y_b, n2 - 1.0);
 
   double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
-  double nu_pow_n1_n2_m1 = std::pow(nu, n1/n2 - 1.0); // TODO: guard against n1 = n2
+  double nu_pow_n1_n2_m1 = std::pow(nu, n1/n2 - 1.0);
 
   double z_c_pow_n1_m1 = std::pow(z_c, n1 - 1.0);
 
@@ -995,22 +1061,69 @@ double PairGranHookeHistoryEllipsoid::shape_and_gradient_local(const double* xlo
   return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
 }
 
-double PairGranHookeHistoryEllipsoid::compute_residual(double* xci, double Ri[3][3], double* shapei, double* blocki,
-                                                       double* xcj, double Rj[3][3], double* shapej, double* blockj,
-                                                       double* X, double* shapefunc, double* residual) {
-  double tmp[3];
-  double xi_local[3], xj_local[3];
-  double gradi[3], gradj[3];
+// Special case for n2 = n2 = n > 2
+double PairGranHookeHistoryEllipsoid::shape_and_gradient_local_equaln(const double* xlocal, const double* shape, const double n, double* grad) {
+  double a_inv = 1.0 / shape[0];
+  double b_inv = 1.0 / shape[1];
+  double c_inv = 1.0 / shape[2];
+  double x_a = std::fabs(xlocal[0] * a_inv);
+  double y_b = std::fabs(xlocal[1] * b_inv);
+  double z_c = std::fabs(xlocal[2] * c_inv);
+
+  double x_a_pow_n_m1 = std::pow(x_a, n - 1.0);
+  double y_b_pow_n_m1 = std::pow(y_b, n - 1.0);
+  double z_c_pow_n_m1 = std::pow(z_c, n - 1.0);
+
+  // Equation (14)
+  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
+  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
+  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
+  grad[0] = n * a_inv * x_a_pow_n_m1 * signx;
+  grad[1] = n * b_inv * y_b_pow_n_m1 * signy;
+  grad[2] = n * c_inv * z_c_pow_n_m1 * signz;
+
+  return (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c) - 1.0;
+}
 
-  MathExtra::sub3(X, xci, tmp);
-  MathExtra::transpose_matvec(Ri, tmp, xi_local);
-  shapefunc[0] = shape_and_gradient_local(xi_local, shapei, blocki, tmp);
-  MathExtra::matvec(Ri, tmp, gradi);
+// Special case for n1 = n2 = 2
+double PairGranHookeHistoryEllipsoid::shape_and_gradient_local_ellips(const double* xlocal, const double* shape, double* grad) {
+  double a = 2.0 / (shape[0] * shape[0]);
+  double b = 2.0 / (shape[1] * shape[1]);
+  double c = 2.0 / (shape[2] * shape[2]);
 
-  MathExtra::sub3(X, xcj, tmp);
-  MathExtra::transpose_matvec(Rj, tmp, xj_local);
-  shapefunc[1] = shape_and_gradient_local(xj_local, shapej, blockj, tmp);
-  MathExtra::matvec(Rj, tmp, gradj);
+  // Equation (14) simplified for n1 = n2 = 2
+  grad[0] = a * xlocal[0];
+  grad[1] = b * xlocal[1];
+  grad[2] = c * xlocal[2];
+
+  return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
+}
+
+double PairGranHookeHistoryEllipsoid::shape_and_gradient_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad) {
+  double shapefunc, tmp[3], xlocal[3];
+  MathExtra::sub3(X0, xc, tmp);
+  MathExtra::transpose_matvec(R, tmp, xlocal);
+  switch (flag) {
+    case 0:
+      shapefunc = shape_and_gradient_local_ellips(xlocal, shape, tmp);
+      break;
+    case 1:
+      shapefunc = shape_and_gradient_local_equaln(xlocal, shape, block[0], tmp);
+      break;
+    case 2:
+      shapefunc = shape_and_gradient_local(xlocal, shape, block, tmp);
+      break;
+  }
+  MathExtra::matvec(R, tmp, grad);
+  return shapefunc;
+}
+
+double PairGranHookeHistoryEllipsoid::compute_residual(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                                                       const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
+                                                       const double* X, double* shapefunc, double* residual) {
+  double gradi[3], gradj[3];
+  shapefunc[0] = shape_and_gradient_global(xci, Ri, shapei, blocki, flagi, X, gradi);
+  shapefunc[1] = shape_and_gradient_global(xcj, Rj, shapej, blockj, flagj, X, gradj);
 
   // Equation (23)
   MathExtra::scaleadd3(X[3], gradj, gradi, residual);
@@ -1018,26 +1131,13 @@ double PairGranHookeHistoryEllipsoid::compute_residual(double* xci, double Ri[3]
   return residual[0]*residual[0] + residual[1]*residual[1] + residual[2]*residual[2] + residual[3]*residual[3];
 }
 
-void PairGranHookeHistoryEllipsoid::compute_jacobian(double* xci, double Ri[3][3], double* shapei, double* blocki,
-                                                     double* xcj, double Rj[3][3], double* shapej, double* blockj,
-                                                     double* X, double* jacobian) {
-  double tmp_v[3], tmp_m[3][3];
-  double xi_local[3], xj_local[3];
+void PairGranHookeHistoryEllipsoid::compute_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                                                     const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
+                                                     const double* X, double* jacobian) {
   double gradi[3], hessi[3][3], gradj[3], hessj[3][3];
 
-  MathExtra::sub3(X, xci, tmp_v);
-  MathExtra::transpose_matvec(Ri, tmp_v, xi_local);
-  derivatives_local(xi_local, shapei, blocki, tmp_v, hessi);
-  MathExtra::matvec(Ri, tmp_v, gradi);
-  MathExtra::times3_transpose(hessi, Ri, tmp_m);
-  MathExtra::times3(Ri, tmp_m, hessi);
-
-  MathExtra::sub3(X, xcj, tmp_v);
-  MathExtra::transpose_matvec(Rj, tmp_v, xj_local);
-  derivatives_local(xj_local, shapej, blockj, tmp_v, hessj);
-  MathExtra::matvec(Rj, tmp_v, gradj);
-  MathExtra::times3_transpose(hessj, Rj, tmp_m);
-  MathExtra::times3(Rj, tmp_m, hessj);
+  derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi);
+  derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj);
 
   // Jacobian (derivative of residual)
   // 1D column-major matrix for LAPACK/linalg compatibility
@@ -1054,15 +1154,18 @@ void PairGranHookeHistoryEllipsoid::compute_jacobian(double* xci, double Ri[3][3
 }
 
 
-int PairGranHookeHistoryEllipsoid::determine_contact_point(double* xci, double Ri[3][3], double* shapei, double* blocki,
-                                                            double* xcj, double Rj[3][3], double* shapej, double* blockj,
-                                                            double* X0) {
+int PairGranHookeHistoryEllipsoid::determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki,
+                                                           const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
+                                                           double* X0) {
   double norm, norm_ini, shapefunc[2], residual[4], jacobian[16];
   bool converged(false);
-  norm = compute_residual(xci, Ri, shapei, blocki, xcj, Rj, shapej, blockj, X0, shapefunc, residual);
+  int flagi = determine_flag(blocki);
+  int flagj = determine_flag(blockj);
+
+  norm = compute_residual(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual);
   for (int iter = 0 ; iter < ITERMAX_NEWTON ; iter++) {
     norm_ini = norm;
-    compute_jacobian(xci, Ri, shapei, blocki, xcj, Rj, shapej, blockj, X0, jacobian);
+    compute_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, jacobian);
 
     // Solve Newton step
     int lapack_error, ipiv[16];
@@ -1085,7 +1188,7 @@ int PairGranHookeHistoryEllipsoid::determine_contact_point(double* xci, double R
       X_line[2] = X0[2] + a * rhs[2];
       X_line[3] = X0[3] + a * rhs[3];
 
-      norm = compute_residual(xci, Ri, shapei, blocki, xcj, Rj, shapej, blockj, X_line, shapefunc, residual);
+      norm = compute_residual(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X_line, shapefunc, residual);
       if (norm < norm_ini - PARAMETER_LINESEARCH * a * norm_ini)
         break; // Armijo - Goldstein condition
       else
@@ -1110,3 +1213,13 @@ int PairGranHookeHistoryEllipsoid::determine_contact_point(double* xci, double R
 
   return 0;
 }
+
+int PairGranHookeHistoryEllipsoid::determine_flag(const double* block) {
+  const double EPSBLOCK(1e-3);
+  int flag(2);
+  if ((std::fabs(block[0] - 2) <= EPSBLOCK) || (std::fabs(block[1] - 2) <= EPSBLOCK))
+    flag = 0;
+  else if (std::fabs(block[0] - block[1]) <= EPSBLOCK)
+    flag = 1;
+  return flag;
+}
\ No newline at end of file
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 73cba82c1a2..221d967b38d 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -71,11 +71,20 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   void allocate();
 
  private:
-  static void derivatives_local(const double*, const double*, const double*, double*, double[3][3]); // High performance version
-  static double shape_and_gradient_local(const double*, const double*, const double*, double*); // High performance version
-  static double compute_residual(double*, double[3][3], double*, double*, double*, double[3][3], double*, double*, double*, double*, double*);
-  static void compute_jacobian(double*, double[3][3], double*, double*, double*, double[3][3], double*, double*, double*, double*);
-  static int determine_contact_point(double*, double[3][3], double*, double*, double*, double[3][3], double*, double*, double*);
+  static void derivatives_local(const double*, const double*, const double*, double*, double[3][3]);
+  static void derivatives_local_equaln(const double*, const double*, const double, double*, double[3][3]);
+  static void derivatives_local_ellips(const double*, const double*, double*, double[3][3]);
+  static void derivatives_global(const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double[3][3]);
+
+  static double shape_and_gradient_local(const double*, const double*, const double*, double*);
+  static double shape_and_gradient_local_equaln(const double*, const double*, const double, double*);
+  static double shape_and_gradient_local_ellips(const double*, const double*, double*);
+  static double shape_and_gradient_global(const double*, const double[3][3], const double*, const double*, const int, const double*, double*);
+
+  static double compute_residual(const double*, const double[3][3], const double*, const double*, const int, const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double*);
+  static void compute_jacobian(const double*, const double[3][3], const double*, const double*, const int, const double*, const double[3][3], const double*, const double*, const int, const double*, double*);
+  static int determine_contact_point(const double*, const double[3][3], const double*, const double*, const double*, const double[3][3], const double*, const double*, double*);
+  static int determine_flag(const double*);
 
 
 };

From 999afa29b79d8a84a59210746d4391915f466b57 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sat, 15 Nov 2025 17:29:33 -0600
Subject: [PATCH 033/174] fix logic bug in flagging type of ellipsoid

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index ef494e8f071..1a0edc146cd 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -997,7 +997,7 @@ void PairGranHookeHistoryEllipsoid::derivatives_local_ellips(const double* xloca
   double a = 2.0 / (shape[0] * shape[0]);
   double b = 2.0 / (shape[1] * shape[1]);
   double c = 2.0 / (shape[2] * shape[2]);
-  
+
   // Equation (14) simplified for n1 = n2 = 2
   grad[0] = a * xlocal[0];
   grad[1] = b * xlocal[1];
@@ -1217,7 +1217,7 @@ int PairGranHookeHistoryEllipsoid::determine_contact_point(const double* xci, co
 int PairGranHookeHistoryEllipsoid::determine_flag(const double* block) {
   const double EPSBLOCK(1e-3);
   int flag(2);
-  if ((std::fabs(block[0] - 2) <= EPSBLOCK) || (std::fabs(block[1] - 2) <= EPSBLOCK))
+  if ((std::fabs(block[0] - 2) <= EPSBLOCK) && (std::fabs(block[1] - 2) <= EPSBLOCK))
     flag = 0;
   else if (std::fabs(block[0] - block[1]) <= EPSBLOCK)
     flag = 1;

From 28a8369e0c368f7d928176e8a171ad65212dc81c Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 16 Nov 2025 15:46:46 -0600
Subject: [PATCH 034/174] clean up shape and derivatives functions. Refactor
 Newton's method to avoid re-computing many expensive variables.

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 276 +++++++++---------
 .../pair_gran_hooke_history_ellipsoid.h       |  25 +-
 2 files changed, 143 insertions(+), 158 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 1a0edc146cd..ee0f84d8c99 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -919,12 +919,27 @@ double PairGranHookeHistoryEllipsoid::radii2cut(double r1, double r2)
   return cut;
 }
 
+double PairGranHookeHistoryEllipsoid::shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
+  double shapefunc;
+  switch (flag) {
+    case 0: {
+      shapefunc = shape_and_derivatives_local_ellipsoid(xlocal, shape, grad, hess);
+      break;
+    }
+    case 1: {
+      shapefunc = shape_and_derivatives_local_n1equaln2(xlocal, shape, block[0], grad, hess);
+      break;
+    }
+    case 2: {
+      shapefunc = shape_and_derivatives_local_superquad(xlocal, shape, block, grad, hess);
+      break;
+    }
+  }
+  return shapefunc;
+}
 
-// High performance versions
-// TODO: this creates a fair bit of code duplication
-//       not sure how to best do this without creating many small help functions
-//       Pushing that logic, the calculation of a_inv, etc is not necessary. could define and store shapeinv
-void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]) {
+// General case for n1 != n2 > 2
+double PairGranHookeHistoryEllipsoid::shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]) {
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
@@ -943,6 +958,7 @@ void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, cons
   double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m1 * nu;
 
   double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
+  double z_c_pow_n1_m1 = z_c_pow_n1_m2 / z_c;
 
   // Equation (14)
   double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
@@ -950,7 +966,7 @@ void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, cons
   double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
   grad[0] = n1 * a_inv * x_a_pow_n2_m1 * nu_pow_n1_n2_m1 * signx;
   grad[1] = n1 * b_inv * y_b_pow_n2_m1 * nu_pow_n1_n2_m1 * signy;
-  grad[2] = n1 * c_inv * (z_c_pow_n1_m2 * z_c) * signz;
+  grad[2] = n1 * c_inv * z_c_pow_n1_m1 * signz;
 
   // Equation (15)
   double signxy = signx * signy;
@@ -961,10 +977,12 @@ void PairGranHookeHistoryEllipsoid::derivatives_local(const double* xlocal, cons
   hess[0][1] = hess[1][0] = a_inv * b_inv * (n1 - n2) * n1 * x_a_pow_n2_m1 * y_b_pow_n2_m1 * nu_pow_n1_n2_m2 * signxy;
   hess[2][2] = c_inv * c_inv * n1 * (n1 - 1.0) * z_c_pow_n1_m2;
   hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+
+  return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
 }
 
 // Special case for n2 = n2 = n > 2
-void PairGranHookeHistoryEllipsoid::derivatives_local_equaln(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]) {
+double PairGranHookeHistoryEllipsoid::shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]) {
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
@@ -972,16 +990,19 @@ void PairGranHookeHistoryEllipsoid::derivatives_local_equaln(const double* xloca
   double y_b = std::fabs(xlocal[1] * b_inv);
   double z_c = std::fabs(xlocal[2] * c_inv);
   double x_a_pow_n_m2 = std::pow(x_a, n - 2.0);
+  double x_a_pow_n_m1 = x_a_pow_n_m2 * x_a;
   double y_b_pow_n_m2 = std::pow(y_b, n - 2.0);
+  double y_b_pow_n_m1 = y_b_pow_n_m2 * y_b;
   double z_c_pow_n_m2 = std::pow(z_c, n - 2.0);
+  double z_c_pow_n_m1 = z_c_pow_n_m2 * z_c;
 
   // Equation (14)
   double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
   double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
   double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
-  grad[0] = n * a_inv * (x_a_pow_n_m2 * x_a) * signx;
-  grad[1] = n * b_inv * (y_b_pow_n_m2 * y_b) * signy;
-  grad[2] = n * c_inv * (z_c_pow_n_m2 * z_c) * signz;
+  grad[0] = n * a_inv * x_a_pow_n_m1 * signx;
+  grad[1] = n * b_inv * y_b_pow_n_m1 * signy;
+  grad[2] = n * c_inv * z_c_pow_n_m1 * signz;
 
   // Equation (15)
   double signxy = signx * signy;
@@ -989,11 +1010,13 @@ void PairGranHookeHistoryEllipsoid::derivatives_local_equaln(const double* xloca
   hess[1][1] = b_inv * b_inv * n * (n - 1.0) * y_b_pow_n_m2;
   hess[2][2] = c_inv * c_inv * n * (n - 1.0) * z_c_pow_n_m2;
   hess[0][1] = hess[1][0] = hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+
+  return (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c) - 1.0;
 }
 
 
 // Special case for n1 = n2 = 2
-void PairGranHookeHistoryEllipsoid::derivatives_local_ellips(const double* xlocal, const double* shape, double* grad, double hess[3][3]) {
+double PairGranHookeHistoryEllipsoid::shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]) {
   double a = 2.0 / (shape[0] * shape[0]);
   double b = 2.0 / (shape[1] * shape[1]);
   double c = 2.0 / (shape[2] * shape[2]);
@@ -1008,151 +1031,54 @@ void PairGranHookeHistoryEllipsoid::derivatives_local_ellips(const double* xloca
   hess[1][1] = b;
   hess[2][2] = c;
   hess[0][1] = hess[1][0] = hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+
+  return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
 }
 
-void PairGranHookeHistoryEllipsoid::derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
-  double xlocal[3], tmp_v[3], tmp_m[3][3];
+
+double PairGranHookeHistoryEllipsoid::shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
+  double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
   MathExtra::sub3(X0, xc, tmp_v);
   MathExtra::transpose_matvec(R, tmp_v, xlocal);
-  switch (flag) {
-    case 0:
-      derivatives_local_ellips(xlocal, shape, tmp_v, hess);
-      break;
-    case 1:
-      derivatives_local_equaln(xlocal, shape, block[0], tmp_v, hess);
-      break;
-    case 2:
-      derivatives_local(xlocal, shape, block, tmp_v, hess);
-      break;
-  }
+  shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
   MathExtra::matvec(R, tmp_v, grad);
   MathExtra::times3_transpose(hess, R, tmp_m);
   MathExtra::times3(R, tmp_m, hess);
-}
-
-
-// High performance version
-double PairGranHookeHistoryEllipsoid::shape_and_gradient_local(const double* xlocal, const double* shape, const double* block, double* grad) {
-  double a_inv = 1.0 / shape[0];
-  double b_inv = 1.0 / shape[1];
-  double c_inv = 1.0 / shape[2];
-  double x_a = std::fabs(xlocal[0] * a_inv);
-  double y_b = std::fabs(xlocal[1] * b_inv);
-  double z_c = std::fabs(xlocal[2] * c_inv);
-  double n1 = block[0];
-  double n2 = block[1];
-
-  double x_a_pow_n2_m1 = std::pow(x_a, n2 - 1.0);
-  double y_b_pow_n2_m1 = std::pow(y_b, n2 - 1.0);
-
-  double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
-  double nu_pow_n1_n2_m1 = std::pow(nu, n1/n2 - 1.0);
-
-  double z_c_pow_n1_m1 = std::pow(z_c, n1 - 1.0);
-
-  // Equation (14)
-  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
-  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
-  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
-  grad[0] = n1 * a_inv * x_a_pow_n2_m1 * nu_pow_n1_n2_m1 * signx;
-  grad[1] = n1 * b_inv * y_b_pow_n2_m1 * nu_pow_n1_n2_m1 * signy;
-  grad[2] = n1 * c_inv * z_c_pow_n1_m1 * signz;
-
-  return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
-}
-
-// Special case for n2 = n2 = n > 2
-double PairGranHookeHistoryEllipsoid::shape_and_gradient_local_equaln(const double* xlocal, const double* shape, const double n, double* grad) {
-  double a_inv = 1.0 / shape[0];
-  double b_inv = 1.0 / shape[1];
-  double c_inv = 1.0 / shape[2];
-  double x_a = std::fabs(xlocal[0] * a_inv);
-  double y_b = std::fabs(xlocal[1] * b_inv);
-  double z_c = std::fabs(xlocal[2] * c_inv);
-
-  double x_a_pow_n_m1 = std::pow(x_a, n - 1.0);
-  double y_b_pow_n_m1 = std::pow(y_b, n - 1.0);
-  double z_c_pow_n_m1 = std::pow(z_c, n - 1.0);
-
-  // Equation (14)
-  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
-  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
-  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
-  grad[0] = n * a_inv * x_a_pow_n_m1 * signx;
-  grad[1] = n * b_inv * y_b_pow_n_m1 * signy;
-  grad[2] = n * c_inv * z_c_pow_n_m1 * signz;
-
-  return (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c) - 1.0;
-}
-
-// Special case for n1 = n2 = 2
-double PairGranHookeHistoryEllipsoid::shape_and_gradient_local_ellips(const double* xlocal, const double* shape, double* grad) {
-  double a = 2.0 / (shape[0] * shape[0]);
-  double b = 2.0 / (shape[1] * shape[1]);
-  double c = 2.0 / (shape[2] * shape[2]);
-
-  // Equation (14) simplified for n1 = n2 = 2
-  grad[0] = a * xlocal[0];
-  grad[1] = b * xlocal[1];
-  grad[2] = c * xlocal[2];
-
-  return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
-}
-
-double PairGranHookeHistoryEllipsoid::shape_and_gradient_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad) {
-  double shapefunc, tmp[3], xlocal[3];
-  MathExtra::sub3(X0, xc, tmp);
-  MathExtra::transpose_matvec(R, tmp, xlocal);
-  switch (flag) {
-    case 0:
-      shapefunc = shape_and_gradient_local_ellips(xlocal, shape, tmp);
-      break;
-    case 1:
-      shapefunc = shape_and_gradient_local_equaln(xlocal, shape, block[0], tmp);
-      break;
-    case 2:
-      shapefunc = shape_and_gradient_local(xlocal, shape, block, tmp);
-      break;
-  }
-  MathExtra::matvec(R, tmp, grad);
   return shapefunc;
 }
 
-double PairGranHookeHistoryEllipsoid::compute_residual(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
-                                                       const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                                                       const double* X, double* shapefunc, double* residual) {
-  double gradi[3], gradj[3];
-  shapefunc[0] = shape_and_gradient_global(xci, Ri, shapei, blocki, flagi, X, gradi);
-  shapefunc[1] = shape_and_gradient_global(xcj, Rj, shapej, blockj, flagj, X, gradj);
-
+double PairGranHookeHistoryEllipsoid::compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual) {
   // Equation (23)
-  MathExtra::scaleadd3(X[3], gradj, gradi, residual);
-  residual[3] = shapefunc[0] - shapefunc[1];
+  MathExtra::scaleadd3(mu2, gradj_global, gradi_global, residual);
+  residual[3] = shapefunci - shapefuncj;
   return residual[0]*residual[0] + residual[1]*residual[1] + residual[2]*residual[2] + residual[3]*residual[3];
 }
 
-void PairGranHookeHistoryEllipsoid::compute_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
-                                                     const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                                                     const double* X, double* jacobian) {
-  double gradi[3], hessi[3][3], gradj[3], hessj[3][3];
-
-  derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi);
-  derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj);
-
+void PairGranHookeHistoryEllipsoid::compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian) {
   // Jacobian (derivative of residual)
   // 1D column-major matrix for LAPACK/linalg compatibility
   for (int row = 0 ; row < 3 ; row++) {
     for (int col = 0 ; col < 3 ; col++) {
-      jacobian[row + col*4] = hessi[row][col] + X[3] * hessj[row][col];
+      jacobian[row + col*4] = hessi_global[row][col] + mu2 * hessj_global[row][col];
     }
-    jacobian[row + 3*4] = gradj[row];
+    jacobian[row + 3*4] = gradj_global[row];
   }
   for (int col = 0 ; col < 3 ; col++) {
-    jacobian[3 + col*4] = gradi[col] - gradj[col];
+    jacobian[3 + col*4] = gradi_global[col] - gradj_global[col];
   }
   jacobian[15] = 0.0;
 }
 
+double PairGranHookeHistoryEllipsoid::compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                                                                    const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
+                                                                    const double* X, double* shapefunc, double* residual, double* jacobian) {
+  double gradi[3], hessi[3][3], gradj[3], hessj[3][3];
+  shapefunc[0] = shape_and_derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi);
+  shapefunc[1] = shape_and_derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj);
+  compute_jacobian(gradi, hessi, gradj, hessj, X[3], jacobian);
+  return compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X[3], residual);
+}
+
 
 int PairGranHookeHistoryEllipsoid::determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki,
                                                            const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
@@ -1162,10 +1088,9 @@ int PairGranHookeHistoryEllipsoid::determine_contact_point(const double* xci, co
   int flagi = determine_flag(blocki);
   int flagj = determine_flag(blockj);
 
-  norm = compute_residual(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual);
+  norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
   for (int iter = 0 ; iter < ITERMAX_NEWTON ; iter++) {
     norm_ini = norm;
-    compute_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, jacobian);
 
     // Solve Newton step
     int lapack_error, ipiv[16];
@@ -1182,27 +1107,88 @@ int PairGranHookeHistoryEllipsoid::determine_contact_point(const double* xci, co
 
     // Backtracking line search
     double a(1.0), X_line[4];
-    for (int iter_ls = 0 ; iter_ls < ITERMAX_LINESEARCH ; iter_ls++) {
+    int iter_ls;
+    for (iter_ls = 0 ; iter_ls < ITERMAX_LINESEARCH ; iter_ls++) {
       X_line[0] = X0[0] + a * rhs[0];
       X_line[1] = X0[1] + a * rhs[1];
       X_line[2] = X0[2] + a * rhs[2];
       X_line[3] = X0[3] + a * rhs[3];
 
-      norm = compute_residual(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X_line, shapefunc, residual);
-      if (norm < norm_ini - PARAMETER_LINESEARCH * a * norm_ini)
-        break; // Armijo - Goldstein condition
-      else
+      // Line search iterates not selected for the next Newton iteration
+      // do not need to compute the expensive Jacobian, only the residual.
+      // We want to avoid calling `compute_residual_and_jacobian()` for each
+      // line search iterate.
+      // However, many intermediate variables that are costly to compute
+      // are shared by the local gradient and local hessian calculations.
+      // We want to avoid calling `compute_residual()` followed by `compute_jacobian()`
+      // for the iterates that satisfy the descent condition.
+      // To do so, we duplicate `compute_residual_and_jacobian()`, but only
+      // build the global hessians if the descent condition is satisfied and
+      // the iterate will be used in the next Newton step.
+      // This leads to some code duplication, and still computes
+      // the local hessians even when they are not necessary.
+      // This seems to be an acceptable in-between of performance and clean code.
+      // As most of the cost in the Hessian is in the 2 matrix products to
+      // Compute the global matrix from the local one
+
+      // One alternative would be to store the intermediate variables from
+      // the local gradient calculation when calling `shape_and_gradient_local()`,
+      // and re-use them during the local hessian calculation (function that 
+      // calculates only the Hessian from these intermediate values would need
+      // to be implemented).
+      // This seems a bit clunky just to save the few multiplications of the
+      // local hessian calculation, that is why I did not do it. I am open to
+      // other ideas and solutions.
+      // Even then, we would have some code duplication with `compute_residual_and_jacobian()`
+      // So maybe I am overthinking this...
+
+      double xilocal[3], gradi[3], hessi[3][3], xjlocal[3], gradj[3], hessj[3][3], tmp_v[3];
+
+      MathExtra::sub3(X_line, xci, tmp_v);
+      MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
+      shapefunc[0] = shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hessi);
+      MathExtra::matvec(Ri, tmp_v, gradi);
+
+      MathExtra::sub3(X_line, xcj, tmp_v);
+      MathExtra::transpose_matvec(Rj, tmp_v, xjlocal);
+      shapefunc[1] = shape_and_derivatives_local(xjlocal, shapej, blockj, flagj, tmp_v, hessj);
+      MathExtra::matvec(Rj, tmp_v, gradj);
+
+      norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual);
+
+      if (norm > norm_ini - PARAMETER_LINESEARCH * a * norm_ini) { // Armijo - Goldstein condition not met
         a *= CUTBACK_LINESEARCH;
+      } else {
+        X0[0] = X_line[0];
+        X0[1] = X_line[1];
+        X0[2] = X_line[2];
+        X0[3] = X_line[3];
+        // Only compute the jacobian if there is another Newton iteration to come
+        if (norm > CONVERGENCE_NEWTON) {
+          double tmp_m[3][3];
+          MathExtra::times3_transpose(hessi, Ri, tmp_m);
+          MathExtra::times3(Ri, tmp_m, hessi);
+          MathExtra::times3_transpose(hessj, Rj, tmp_m);
+          MathExtra::times3(Rj, tmp_m, hessj);
+          compute_jacobian(gradi, hessi, gradj, hessj, X0[3], jacobian);
+        } else {
+          converged = true;
+        }
+        break;
+      }
     }
-    X0[0] = X_line[0];
-    X0[1] = X_line[1];
-    X0[2] = X_line[2];
-    X0[3] = X_line[3];
 
-    if (norm < CONVERGENCE_NEWTON) {
-      converged = true;
-      break;
+    // If no descent with line search, take full step, try to escape bad region
+    if (iter_ls == ITERMAX_LINESEARCH) {
+      X0[0] += rhs[0];
+      X0[1] += rhs[1];
+      X0[2] += rhs[2];
+      X0[3] += rhs[3];
+      norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
     }
+
+    if (converged)
+      break;
   }
 
   // LAPACK error are within [-4, 4], use 5 non-touching, -5 non-converging
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 221d967b38d..738d9546624 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -71,22 +71,21 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   void allocate();
 
  private:
-  static void derivatives_local(const double*, const double*, const double*, double*, double[3][3]);
-  static void derivatives_local_equaln(const double*, const double*, const double, double*, double[3][3]);
-  static void derivatives_local_ellips(const double*, const double*, double*, double[3][3]);
-  static void derivatives_global(const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double[3][3]);
-
-  static double shape_and_gradient_local(const double*, const double*, const double*, double*);
-  static double shape_and_gradient_local_equaln(const double*, const double*, const double, double*);
-  static double shape_and_gradient_local_ellips(const double*, const double*, double*);
-  static double shape_and_gradient_global(const double*, const double[3][3], const double*, const double*, const int, const double*, double*);
-
-  static double compute_residual(const double*, const double[3][3], const double*, const double*, const int, const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double*);
-  static void compute_jacobian(const double*, const double[3][3], const double*, const double*, const int, const double*, const double[3][3], const double*, const double*, const int, const double*, double*);
+  static double shape_and_derivatives_local(const double*, const double*, const double*, const int, double*, double[3][3]);
+  static double shape_and_derivatives_local_superquad(const double*, const double*, const double*, double*, double[3][3]);
+  static double shape_and_derivatives_local_n1equaln2(const double*, const double*, const double, double*, double[3][3]);
+  static double shape_and_derivatives_local_ellipsoid(const double*, const double*, double*, double[3][3]);
+  static double shape_and_derivatives_global(const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double[3][3]);
+
+  static double compute_residual(const double, const double*, const double, const double*, const double, double*);
+  static void compute_jacobian(const double*, const double[3][3], const double*, const double[3][3], const double, double* jacobian);
+  static double compute_residual_and_jacobian(const double*, const double[3][3], const double*, const double*, const int, const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double*, double*);
   static int determine_contact_point(const double*, const double[3][3], const double*, const double*, const double*, const double[3][3], const double*, const double*, double*);
   static int determine_flag(const double*);
 
-
+  // Below not implemented. Placeholder if we decide not to compute local hessian in line search
+  static double shape_and_gradient_local(const double*, const double*, const double*, double*); // would return a vector of temporary variables
+  static double hessian_local(const double*, const double*, const double*, double*); // would use the above vector of temporary variables to compute local hessian
 };
 
 }    // namespace LAMMPS_NS

From da81a173e5f6571f59a03d80e2bf0a7345462fed Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Wed, 19 Nov 2025 16:19:25 -0600
Subject: [PATCH 035/174] fix bug in initial shape evolution for first contact
 detection

---
 .../pair_gran_hooke_history_ellipsoid.cpp         | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index ee0f84d8c99..1e27035095e 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -245,12 +245,21 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
             error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
         } else {
           // New contact: Build initial guess incrementally
-          MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+          // TODO: there might be better heuristic for the "volume equivalent spheres" suggested in the paper
+          //       but this is good enough. We might even be able to use radi and radj which is cheaper, TBD when testing
+          //       If we pick a small radius, we could guaranteed to start outise the grains, would that be better for the Newton?
+          //       If we pick a large radius (e.g. radi, radj) we are more likely to start inside the grains, is this an easier minimization landscape to navigate?
+          //       I don't think there is a general answer because we don't know the shape, and contact point may be far from spherical initial guess
+          //       This makes me think using radi and radj could be fine! To be investigated
+          double reqi = std::cbrt(bonus[ellipsoid[i]].shape[0] * bonus[ellipsoid[i]].shape[1] * bonus[ellipsoid[i]].shape[2]);
+          double reqj = std::cbrt(bonus[ellipsoid[j]].shape[0] * bonus[ellipsoid[j]].shape[1] * bonus[ellipsoid[j]].shape[2]);
+          MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+          //   MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
           for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
             X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
             double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-            shapei[0] = shapei[1] = shapei[2] = 1.0;
-            shapej[0] = shapej[1] = shapej[2] = 1.0;
+            shapei[0] = shapei[1] = shapei[2] = reqi;
+            shapej[0] = shapej[1] = shapej[2] = reqj;
             MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
             MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
             if (bonus[ellipsoid[i]].flag_super) { // not a big time save

From 86d9aefa2b9ed87922b087dc443ecd85d765b058 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Wed, 19 Nov 2025 16:53:26 -0600
Subject: [PATCH 036/174] return the contact normal from
 determine_contact_point for overlap and force calculation

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 13 +++++++++----
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.h   |  2 +-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 1e27035095e..6d60b518c3d 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -130,7 +130,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double quat1, quat2, quat3, quat4;
   double block1, block2;
 
-  double X0[4], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
   // TODO: Maybe we can make flag_super of the grain an int instead, to cimplify when n1 = n2 ?
   int flagi, flagj; // 0 : ellipsoid, 1 : equal exponents n1=n2, 2: general super-ellipsoid n1 >2, n2>2, n1!=n2
 
@@ -236,7 +236,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           //       not sure if enough information to do that
           MathExtra::copy3(prev_cp, X0);
           X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
-          int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0);
+          int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
           if (status == 0)
             touching = true;
           else if(status == 5)
@@ -270,7 +270,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
               blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
               blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
             }
-            int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0);
+            int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
             if (status == 0)
               touching = true;
             else if(status == 5)
@@ -1091,7 +1091,7 @@ double PairGranHookeHistoryEllipsoid::compute_residual_and_jacobian(const double
 
 int PairGranHookeHistoryEllipsoid::determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki,
                                                            const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
-                                                           double* X0) {
+                                                           double* X0, double* nij) {
   double norm, norm_ini, shapefunc[2], residual[4], jacobian[16];
   bool converged(false);
   int flagi = determine_flag(blocki);
@@ -1182,6 +1182,11 @@ int PairGranHookeHistoryEllipsoid::determine_contact_point(const double* xci, co
           compute_jacobian(gradi, hessi, gradj, hessj, X0[3], jacobian);
         } else {
           converged = true;
+          // TODO: consider testing picking the normal with the least error
+          //       i.e., likely the grain with the smallest curvature (Hessian norm)
+          //       or some other measure like average gradients.
+          //       right now we use the gradient on grain i for simplicity and performance. When testing, we could see if using  is just as good
+          MathExtra::normalize3(gradi, nij);
         }
         break;
       }
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 738d9546624..800c2dcc2a8 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -80,7 +80,7 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   static double compute_residual(const double, const double*, const double, const double*, const double, double*);
   static void compute_jacobian(const double*, const double[3][3], const double*, const double[3][3], const double, double* jacobian);
   static double compute_residual_and_jacobian(const double*, const double[3][3], const double*, const double*, const int, const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double*, double*);
-  static int determine_contact_point(const double*, const double[3][3], const double*, const double*, const double*, const double[3][3], const double*, const double*, double*);
+  static int determine_contact_point(const double*, const double[3][3], const double*, const double*, const double*, const double[3][3], const double*, const double*, double*, double*);
   static int determine_flag(const double*);
 
   // Below not implemented. Placeholder if we decide not to compute local hessian in line search

From 0f990d384941c7dc1481b9992aa3a6ecb731ad3a Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 20 Nov 2025 16:46:20 +0100
Subject: [PATCH 037/174] Added handwritten gauss elimination solvers for 4*4
 system

---
 src/ASPHERE/math_extra_superellipsoids.h | 208 +++++++++++++++++++++++
 1 file changed, 208 insertions(+)

diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index c1d28604cd7..95b269aa54e 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -18,6 +18,7 @@
 #ifndef LMP_MATH_EXTRA_SUPERELLIPOIDS_H
 #define LMP_MATH_EXTRA_SUPERELLIPOIDS_H
 
+#include <cmath>
 #include "math_extra.h"
 
 namespace MathExtraSuperellipsoids {
@@ -39,6 +40,10 @@ namespace MathExtraSuperellipsoids {
 
   inline double det4_M44_zero(const double m[4][4]);
 
+  // 4 by 4 sytems solvers, they all overwrite b with the solution
+  inline bool solve_4x4_manual(double A[16], double b[4]);  
+  inline bool solve_4x4_robust(double A[16], double b[4]);
+  inline bool solve_4x4_robust_unrolled(double A[16], double b[4]); 
 
   // ADD CONTACT DETECTION HERE
 
@@ -83,6 +88,209 @@ inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
     return ans;
 }
 
+inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4]) {
+    // 1. Pivot 0 
+    double inv0 = 1.0 / A[0];
+    double m1 = A[4] * inv0;
+    double m2 = A[8] * inv0;
+    double m3 = A[12] * inv0;
+
+    A[5] -= m1 * A[1]; A[6] -= m1 * A[2]; A[7] -= m1 * A[3]; b[1] -= m1 * b[0];
+    A[9] -= m2 * A[1]; A[10] -= m2 * A[2]; A[11] -= m2 * A[3]; b[2] -= m2 * b[0];
+    A[13] -= m3 * A[1]; A[14] -= m3 * A[2]; A[15] -= m3 * A[3]; b[3] -= m3 * b[0];
+
+    // 2. Pivot 1 
+    double inv1 = 1.0 / A[5];
+    double m4 = A[9] * inv1;
+    double m5 = A[13] * inv1;
+
+    A[10] -= m4 * A[6]; A[11] -= m4 * A[7]; b[2] -= m4 * b[1];
+    A[14] -= m5 * A[6]; A[15] -= m5 * A[7]; b[3] -= m5 * b[1];
+
+    // 3. Pivot 2
+    double inv2 = 1.0 / A[10];
+    double m6 = A[14] * inv2;
+
+    A[15] -= m6 * A[11]; b[3] -= m6 * b[2];
+
+    // 4. Backward Substitution
+    b[3] = b[3] / A[15];
+    b[2] = (b[2] - A[11] * b[3]) * inv2;
+    b[1] = (b[1] - A[7] * b[3] - A[6] * b[2]) * inv1;
+    b[0] = (b[0] - A[3] * b[3] - A[2] * b[2] - A[1] * b[1]) * inv0;
+
+    return true;
+}
+
+inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]) {
+    // Helper lambda to access A[row, col]
+    auto at = [&](int r, int c) -> double& { return A[r * 4 + c]; };
+
+    // --- FORWARD ELIMINATION with PARTIAL PIVOTING ---
+    
+    for (int i = 0; i < 3; ++i) { // Loop over columns 0, 1, 2
+        // 1. Find the Pivot (Max absolute value in this column)
+        int pivot_row = i;
+        double max_val = std::abs(at(i, i));
+
+        for (int k = i + 1; k < 4; ++k) {
+            double val = std::abs(at(k, i));
+            if (val > max_val) {
+                max_val = val;
+                pivot_row = k;
+            }
+        }
+
+        // 2. Singularity Check (The "Flat Particle" Guard)
+        if (max_val < 1e-14) return false;
+
+        // 3. Swap Rows if needed (Swap A rows AND b elements)
+        if (pivot_row != i) {
+            std::swap(b[i], b[pivot_row]);
+            for (int k = i; k < 4; ++k) { // Only need to swap from column 'i' onwards
+                std::swap(at(i, k), at(pivot_row, k));
+            }
+        }
+
+        // 4. Eliminate
+        double inv_pivot = 1.0 / at(i, i);
+        for (int k = i + 1; k < 4; ++k) {
+            double factor = at(k, i) * inv_pivot;
+            // A[k, i] becomes 0, no need to compute it.
+            // Update the rest of the row:
+            for (int j = i + 1; j < 4; ++j) {
+                at(k, j) -= factor * at(i, j);
+            }
+            // Update RHS
+            b[k] -= factor * b[i];
+        }
+    }
+
+    // Final Pivot Check for the last element
+    if (std::abs(at(3, 3)) < 1e-14) return false;
+
+    // --- BACKWARD SUBSTITUTION ---
+    b[3] /= at(3, 3);
+    b[2] = (b[2] - at(2, 3) * b[3]) / at(2, 2);
+    b[1] = (b[1] - at(1, 2) * b[2] - at(1, 3) * b[3]) / at(1, 1);
+    b[0] = (b[0] - at(0, 1) * b[1] - at(0, 2) * b[2] - at(0, 3) * b[3]) / at(0, 0);
+
+    return true;
+
+}
+
+inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], double b[4]) {
+     // --- COLUMN 0 ---
+    // 1. Find Pivot in Col 0
+    int p = 0; 
+    double max_val = std::abs(A[0]);
+    double val;
+
+    val = std::abs(A[4]); 
+    if (val > max_val) { max_val = val; p = 1; }
+    val = std::abs(A[8]); 
+    if (val > max_val) { max_val = val; p = 2; }
+    val = std::abs(A[12]);
+    if (val > max_val) { max_val = val; p = 3; }
+
+    if (max_val < 1e-14) return false;
+    // 2. Swap Row 0 with Row p
+    if (p != 0) {
+        int row_offset = p * 4;
+        std::swap(b[0], b[p]);
+        std::swap(A[0], A[row_offset]);     std::swap(A[1], A[row_offset + 1]);
+        std::swap(A[2], A[row_offset + 2]); std::swap(A[3], A[row_offset + 3]);
+    }
+
+    // 3. Eliminate Col 0
+    {
+        double inv = 1.0 / A[0];
+        // Row 1
+        double f1 = A[4] * inv;
+        A[5] -= f1 * A[1]; A[6] -= f1 * A[2]; A[7] -= f1 * A[3]; b[1] -= f1 * b[0];
+        // Row 2
+        double f2 = A[8] * inv;
+        A[9] -= f2 * A[1]; A[10] -= f2 * A[2]; A[11] -= f2 * A[3]; b[2] -= f2 * b[0];
+        // Row 3
+        double f3 = A[12] * inv;
+        A[13] -= f3 * A[1]; A[14] -= f3 * A[2]; A[15] -= f3 * A[3]; b[3] -= f3 * b[0];
+    }
+
+    // --- COLUMN 1 ---
+    // 1. Find Pivot in Col 1 (starting from row 1)
+    p = 1;
+    max_val = std::abs(A[5]);
+    
+    val = std::abs(A[9]);  if (val > max_val) { max_val = val; p = 2; }
+    val = std::abs(A[13]); if (val > max_val) { max_val = val; p = 3; }
+
+    if (max_val < 1e-14) return false;
+
+    // 2. Swap Row 1 with Row p
+    if (p != 1) {
+        int row_offset = p * 4;
+        std::swap(b[1], b[p]);
+        // Optimization: Col 0 is already 0, so we only swap cols 1,2,3
+        std::swap(A[5], A[row_offset + 1]);
+        std::swap(A[6], A[row_offset + 2]);
+        std::swap(A[7], A[row_offset + 3]);
+    }
+
+    // 3. Eliminate Col 1
+    {
+        double inv = 1.0 / A[5];
+        // Row 2
+        double f2 = A[9] * inv;
+        A[10] -= f2 * A[6]; A[11] -= f2 * A[7]; b[2] -= f2 * b[1];
+        // Row 3
+        double f3 = A[13] * inv;
+        A[14] -= f3 * A[6]; A[15] -= f3 * A[7]; b[3] -= f3 * b[1];
+    }
+
+    // --- COLUMN 2 ---
+    // 1. Find Pivot in Col 2 (starting from row 2)
+    p = 2;
+    max_val = std::abs(A[10]);
+
+    val = std::abs(A[14]); if (val > max_val) { max_val = val; p = 3; }
+
+    if (max_val < 1e-14) return false;
+
+    // 2. Swap Row 2 with Row p
+    if (p != 2) {
+        std::swap(b[2], b[3]);
+        // Optimization: Only swap cols 2,3
+        std::swap(A[10], A[14]);
+        std::swap(A[11], A[15]);
+    }
+
+    // 3. Eliminate Col 2
+    {
+        double inv = 1.0 / A[10];
+        // Row 3
+        double f3 = A[14] * inv;
+        A[15] -= f3 * A[11]; b[3] -= f3 * b[2];
+    }
+
+    // --- BACKWARD SUBSTITUTION ---
+    // Check last pivot
+    if (std::abs(A[15]) < 1e-14) return false;
+
+    double inv3 = 1.0 / A[15];
+    b[3] *= inv3;
+
+    double inv2 = 1.0 / A[10];
+    b[2] = (b[2] - A[11] * b[3]) * inv2;
+
+    double inv1 = 1.0 / A[5];
+    b[1] = (b[1] - A[6] * b[2] - A[7] * b[3]) * inv1;
+
+    double inv0 = 1.0 / A[0];
+    b[0] = (b[0] - A[1] * b[1] - A[2] * b[2] - A[3] * b[3]) * inv0;
+
+    return true;
+
+}
 
 
 

From 1ab64761d7525a100272a28ce984b7d7f5bb6dbf Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 21 Nov 2025 14:33:42 +0100
Subject: [PATCH 038/174] Implemented oriented bounding box separating axis
 check

---
 src/ASPHERE/math_extra_superellipsoids.h | 159 +++++++++++++++++++++++
 1 file changed, 159 insertions(+)

diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 95b269aa54e..4044c76546f 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -46,6 +46,13 @@ namespace MathExtraSuperellipsoids {
   inline bool solve_4x4_robust_unrolled(double A[16], double b[4]); 
 
   // ADD CONTACT DETECTION HERE
+  inline bool check_oriented_bounding_boxes(const double* xc1, const double R1[3][3], const double* shape1,
+                                        const double* xc2, const double R2[3][3], const double* shape2, 
+                                        int &cached_axis);
+
+  inline bool check_intersection_axis(const int axis_id, const double C[3][3], const double AbsC[3][3], 
+                                      const double* center_distance_box1, const double* center_distance_box2,
+                                      const double* a, const double* b);
 
 };
 
@@ -293,5 +300,157 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
 }
 
 
+// algorithm from https://www.geometrictools.com/Documentation/DynamicCollisionDetection.pdf
+/* * Oriented Bounding Box intersection test.
+ * Logic and optimization strategies adapted from LIGGGHTS (CFDEMproject).
+ * See: src/math_extra_liggghts_nonspherical.cpp in LIGGGHTS distribution.
+ * * This implementation uses the "cached separating axis" optimization 
+ * for temporal coherence.
+ */
+inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+    const double* xc1, const double R1[3][3], const double* shape1,
+    const double* xc2, const double R2[3][3], const double* shape2, 
+    int &cached_axis
+){
+    // cache axis is the axis that separated the boxes last time
+    // due to temporal coherence we check it first
+
+    bool separated = false;
+
+    // for orientated bounding boxes we check the 15 separating axes
+    double C[3][3], AbsC[3][3];
+    MathExtra::transpose_times3(R1, R2, C); // C = R1^T * R2
+    for (unsigned int i=0; i<3; i++){
+        for (unsigned int j=0; j<3; j++){
+            AbsC[i][j] = std::fabs(C[i][j]); // we only need the absolute values
+        }
+    }
+
+    double center_distance[3];
+    for (unsigned int i=0; i<3; i++){
+        center_distance[i] = xc2[i] - xc1[i];
+    } 
+
+    // rotate center distance into box 1 frame
+    double center_distance_box1[3];
+    MathExtra::transpose_matvec(R1, center_distance,  center_distance_box1);
+
+    //rotate center distance into box 2 frame
+    double center_distance_box2[3];
+    MathExtra::transpose_matvec(R2, center_distance,  center_distance_box2);
+
+    // first check the cached axis
+    separated = check_intersection_axis(cached_axis, C, AbsC, center_distance_box1, center_distance_box2, shape1, shape2);
+
+    if (separated) return true;
+    // then check all the other axes
+    for (int axis_id = 0; axis_id < 15; axis_id++){
+        if (axis_id == cached_axis) continue; // already checked
+        separated = check_intersection_axis(axis_id, C, AbsC, center_distance_box1, center_distance_box2, shape1, shape2);
+        if (separated) {
+            cached_axis = axis_id; // update cached axis
+            return true;
+        }
+    }
+    return false; // no separation found
+}
+
+inline bool MathExtraSuperellipsoids::check_intersection_axis(
+    const int axis_id, const double C[3][3], const double AbsC[3][3], 
+    const double* center_distance_box1, const double* center_distance_box2,
+    const double* a, const double* b
+){
+    // here axis_id goes from 0 to 14
+    // a and b are the half-sizes of the boxes along their local axes
+    // returns true if there is a separation along this axis
+    // changes the cached axis if separation found
+    double R1, R2, R;
+
+    switch(axis_id){
+        case 0: // A0
+            R1 = a[0];
+            R2 = b[0] * AbsC[0][0] + b[1] * AbsC[0][1] + b[2] * AbsC[0][2];
+            R = std::fabs(center_distance_box1[0]);
+            break;
+        case 1: // A1
+            R1 = a[1];
+            R2 = b[0] * AbsC[1][0] + b[1] * AbsC[1][1] + b[2] * AbsC[1][2];
+            R = std::fabs(center_distance_box1[1]);
+            break;
+        case 2: // A2
+            R1 = a[2];
+            R2 = b[0] * AbsC[2][0] + b[1] * AbsC[2][1] + b[2] * AbsC[2][2];
+            R = std::fabs(center_distance_box1[2]);
+            break;
+        case 3: // B0
+            R1 = a[0] * AbsC[0][0] + a[1] * AbsC[1][0] + a[2] * AbsC[2][0];
+            R2 = b[0];
+            R = std::fabs(center_distance_box2[0]);
+            break;
+        case 4: // B1
+            R1 = a[0] * AbsC[0][1] + a[1] * AbsC[1][1] + a[2] * AbsC[2][1];
+            R2 = b[1];
+            R = std::fabs(center_distance_box2[1]);
+            break;
+        case 5: // B2
+            R1 = a[0] * AbsC[0][2] + a[1] * AbsC[1][2] + a[2] * AbsC[2][2];
+            R2 = b[2];
+            R = std::fabs(center_distance_box2[2]);
+            break;
+        case 6: // A0 x B0
+            R1 = a[1] * AbsC[2][0] + a[2] * AbsC[1][0];
+            R2 = b[1] * AbsC[0][2] + b[2] * AbsC[0][1];
+            R = std::fabs(center_distance_box1[2] * C[1][0] - center_distance_box1[1] * C[2][0]);
+            break;
+        case 7: // A0 x B1
+            R1 = a[1] * AbsC[2][1] + a[2] * AbsC[1][1];
+            R2 = b[0] * AbsC[0][2] + b[2] * AbsC[0][0];
+            R = std::fabs(center_distance_box1[2] * C[1][1] - center_distance_box1[1] * C[2][1]);
+            break;
+        case 8: // A0 x B2
+            R1 = a[1] * AbsC[2][2] + a[2] * AbsC[1][2];
+            R2 = b[0] * AbsC[0][1] + b[1] * AbsC[0][0];
+            R = std::fabs(center_distance_box1[2] * C[1][2] - center_distance_box1[1] * C[2][2]);
+            break;
+        case 9: // A1 x B0
+            R1 = a[0] * AbsC[2][0] + a[2] * AbsC[0][0];
+            R2 = b[1] * AbsC[1][2] + b[2] * AbsC[1][1];
+            R = std::fabs(center_distance_box1[0] * C[2][0] - center_distance_box1[2] * C[0][0]);
+            break;
+        case 10: // A1 x B1
+            R1 = a[0] * AbsC[2][1] + a[2] * AbsC[0][1];
+            R2 = b[0] * AbsC[1][2] + b[2] * AbsC[1][0];
+            R = std::fabs(center_distance_box1[0] * C[2][1] - center_distance_box1[2] * C[0][1]);
+            break;
+        case 11: // A1 x B2
+            R1 = a[0] * AbsC[2][2] + a[2] * AbsC[0][2];
+            R2 = b[0] * AbsC[1][1] + b[1] * AbsC[1][0];
+            R = std::fabs(center_distance_box1[0] * C[2][2] - center_distance_box1[2] * C[0][2]);
+            break;
+        case 12: // A2 x B0
+            R1 = a[0] * AbsC[1][0] + a[1] * AbsC[0][0];
+            R2 = b[1] * AbsC[2][2] + b[2] * AbsC[2][1];
+            R = std::fabs(center_distance_box1[1] * C[0][0] - center_distance_box1[0] * C[1][0]);
+            break;
+        case 13: // A2 x B1
+            R1 = a[0] * AbsC[1][1] + a[1] * AbsC[0][1];
+            R2 = b[0] * AbsC[2][2] + b[2] * AbsC[2][0];
+            R = std::fabs(center_distance_box1[1] * C[0][1] - center_distance_box1[0] * C[1][1]);
+            break;
+        case 14: // A2 x B2
+            R1 = a[0] * AbsC[1][2] + a[1] * AbsC[0][2];
+            R2 = b[0] * AbsC[2][1] + b[1] * AbsC[2][0];
+            R = std::fabs(center_distance_box1[1] * C[0][2] - center_distance_box1[0] * C[1][2]);
+            break;
+    }
+
+    if (R > R1 + R2){
+        return true; // separation found
+    } else {
+        return false; // no separation
+    }
+}
+
+
 
 #endif

From 5a785152226b27bb373d15d050a86e69c094c9f0 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 21 Nov 2025 12:25:02 -0600
Subject: [PATCH 039/174] Move derivatives and contact detection functions into
 MathExtraSuperellipsoids utilities

---
 src/ASPHERE/math_extra_superellipsoids.cpp    | 309 ++++++++++++++++-
 src/ASPHERE/math_extra_superellipsoids.h      |  16 +
 .../pair_gran_hooke_history_ellipsoid.cpp     | 312 +-----------------
 .../pair_gran_hooke_history_ellipsoid.h       |  11 -
 4 files changed, 327 insertions(+), 321 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 2a7b83f375c..435f56a18d2 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -27,15 +27,28 @@
 // #include <cstdio>
 // #include <cstring>
 
+extern "C" { // General Matrices
+    void dgetrf_(const int *m, const int *n, double *a, const int *lda, int *ipiv, int *info); // Factorize
+    void dgetrs_(const char *trans, const int *n, const int *nrhs, double *a, const int *lda, int *ipiv, double *b, const int *ldb, int *info); // Solve (using factorzation)
+}
+
 namespace MathExtraSuperellipsoids {
 
+static constexpr int ITERMAX_NEWTON = 100;
+static constexpr double CONVERGENCE_NEWTON = 1e-6;
+static constexpr int ITERMAX_LINESEARCH = 10;
+static constexpr double PARAMETER_LINESEARCH = 1e-4;
+static constexpr double CUTBACK_LINESEARCH = 0.5;
+
 /* ----------------------------------------------------------------------
    beta function B(x,y) = Gamma(x) * Gamma(y) / Gamma(x+y)
 ------------------------------------------------------------------------- */
 double beta_func(double a, double b) {
     return exp(lgamma(a) + lgamma(b) - lgamma(a + b));
 }
-
+// TODO: the reason why I had codded the beta function from scratch is because LAMMPS must be guaranteed to work with some older standard of C++
+//       I don't remember which one exactly (C++14 I think) but this standard does not have gamma() of beta() in the <cmath> implementation
+//       TBD if the code above will be accepted or if we need to fall back to the implementation of beta I copied from Cephes
 /* ----------------------------------------------------------------------
    Volume of superellipsoid
    source https://cse.buffalo.edu/~jryde/cse673/files/superquadrics.pdf
@@ -241,6 +254,300 @@ void shape_function_local_hessian(
   }
 
 
+double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
+  double shapefunc;
+  switch (flag) {
+    case 0: {
+      shapefunc = shape_and_derivatives_local_ellipsoid(xlocal, shape, grad, hess);
+      break;
+    }
+    case 1: {
+      shapefunc = shape_and_derivatives_local_n1equaln2(xlocal, shape, block[0], grad, hess);
+      break;
+    }
+    case 2: {
+      shapefunc = shape_and_derivatives_local_superquad(xlocal, shape, block, grad, hess);
+      break;
+    }
+  }
+  return shapefunc;
+}
+
+// General case for n1 != n2 > 2
+double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]) {
+  double a_inv = 1.0 / shape[0];
+  double b_inv = 1.0 / shape[1];
+  double c_inv = 1.0 / shape[2];
+  double x_a = std::fabs(xlocal[0] * a_inv);
+  double y_b = std::fabs(xlocal[1] * b_inv);
+  double z_c = std::fabs(xlocal[2] * c_inv);
+  double n1 = block[0];
+  double n2 = block[1];
+  double x_a_pow_n2_m2 = std::pow(x_a, n2 - 2.0);
+  double x_a_pow_n2_m1 = x_a_pow_n2_m2 * x_a;
+  double y_b_pow_n2_m2 = std::pow(y_b, n2 - 2.0);
+  double y_b_pow_n2_m1 = y_b_pow_n2_m2 * y_b;
+
+  double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
+  double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0);
+  double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m1 * nu;
+
+  double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
+  double z_c_pow_n1_m1 = z_c_pow_n1_m2 / z_c;
+
+  // Equation (14)
+  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
+  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
+  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
+  grad[0] = n1 * a_inv * x_a_pow_n2_m1 * nu_pow_n1_n2_m1 * signx;
+  grad[1] = n1 * b_inv * y_b_pow_n2_m1 * nu_pow_n1_n2_m1 * signy;
+  grad[2] = n1 * c_inv * z_c_pow_n1_m1 * signz;
+
+  // Equation (15)
+  double signxy = signx * signy;
+  hess[0][0] = a_inv * a_inv * (n1 * (n2 - 1.0) * x_a_pow_n2_m2 * nu_pow_n1_n2_m1 +
+                                (n1 - n2) * n1 * (x_a_pow_n2_m1 * x_a_pow_n2_m1) * nu_pow_n1_n2_m2);
+  hess[1][1] = b_inv * b_inv * (n1 * (n2 - 1.0) * y_b_pow_n2_m2 * nu_pow_n1_n2_m1 +
+                                (n1 - n2) * n1 * (y_b_pow_n2_m1 * y_b_pow_n2_m1) * nu_pow_n1_n2_m2);
+  hess[0][1] = hess[1][0] = a_inv * b_inv * (n1 - n2) * n1 * x_a_pow_n2_m1 * y_b_pow_n2_m1 * nu_pow_n1_n2_m2 * signxy;
+  hess[2][2] = c_inv * c_inv * n1 * (n1 - 1.0) * z_c_pow_n1_m2;
+  hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+
+  return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
+}
+
+// Special case for n2 = n2 = n > 2
+double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]) {
+  double a_inv = 1.0 / shape[0];
+  double b_inv = 1.0 / shape[1];
+  double c_inv = 1.0 / shape[2];
+  double x_a = std::fabs(xlocal[0] * a_inv);
+  double y_b = std::fabs(xlocal[1] * b_inv);
+  double z_c = std::fabs(xlocal[2] * c_inv);
+  double x_a_pow_n_m2 = std::pow(x_a, n - 2.0);
+  double x_a_pow_n_m1 = x_a_pow_n_m2 * x_a;
+  double y_b_pow_n_m2 = std::pow(y_b, n - 2.0);
+  double y_b_pow_n_m1 = y_b_pow_n_m2 * y_b;
+  double z_c_pow_n_m2 = std::pow(z_c, n - 2.0);
+  double z_c_pow_n_m1 = z_c_pow_n_m2 * z_c;
+
+  // Equation (14)
+  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
+  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
+  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
+  grad[0] = n * a_inv * x_a_pow_n_m1 * signx;
+  grad[1] = n * b_inv * y_b_pow_n_m1 * signy;
+  grad[2] = n * c_inv * z_c_pow_n_m1 * signz;
+
+  // Equation (15)
+  double signxy = signx * signy;
+  hess[0][0] = a_inv * a_inv * n * (n - 1.0) * x_a_pow_n_m2;
+  hess[1][1] = b_inv * b_inv * n * (n - 1.0) * y_b_pow_n_m2;
+  hess[2][2] = c_inv * c_inv * n * (n - 1.0) * z_c_pow_n_m2;
+  hess[0][1] = hess[1][0] = hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+
+  return (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c) - 1.0;
+}
+
+
+// Special case for n1 = n2 = 2
+double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]) {
+  double a = 2.0 / (shape[0] * shape[0]);
+  double b = 2.0 / (shape[1] * shape[1]);
+  double c = 2.0 / (shape[2] * shape[2]);
+
+  // Equation (14) simplified for n1 = n2 = 2
+  grad[0] = a * xlocal[0];
+  grad[1] = b * xlocal[1];
+  grad[2] = c * xlocal[2];
+
+  // Equation (15)
+  hess[0][0] = a;
+  hess[1][1] = b;
+  hess[2][2] = c;
+  hess[0][1] = hess[1][0] = hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
+
+  return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
+}
+
+double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
+  double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
+  MathExtra::sub3(X0, xc, tmp_v);
+  MathExtra::transpose_matvec(R, tmp_v, xlocal);
+  shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
+  MathExtra::matvec(R, tmp_v, grad);
+  MathExtra::times3_transpose(hess, R, tmp_m);
+  MathExtra::times3(R, tmp_m, hess);
+  return shapefunc;
 }
 
+double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual) {
+  // Equation (23)
+  MathExtra::scaleadd3(mu2, gradj_global, gradi_global, residual);
+  residual[3] = shapefunci - shapefuncj;
+  return residual[0]*residual[0] + residual[1]*residual[1] + residual[2]*residual[2] + residual[3]*residual[3];
+}
+
+void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian) {
+  // Jacobian (derivative of residual)
+  // 1D column-major matrix for LAPACK/linalg compatibility
+  for (int row = 0 ; row < 3 ; row++) {
+    for (int col = 0 ; col < 3 ; col++) {
+      jacobian[row + col*4] = hessi_global[row][col] + mu2 * hessj_global[row][col];
+    }
+    jacobian[row + 3*4] = gradj_global[row];
+  }
+  for (int col = 0 ; col < 3 ; col++) {
+    jacobian[3 + col*4] = gradi_global[col] - gradj_global[col];
+  }
+  jacobian[15] = 0.0;
+}
+
+double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                                     const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
+                                     const double* X, double* shapefunc, double* residual, double* jacobian) {
+  double gradi[3], hessi[3][3], gradj[3], hessj[3][3];
+  shapefunc[0] = shape_and_derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi);
+  shapefunc[1] = shape_and_derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj);
+  compute_jacobian(gradi, hessi, gradj, hessj, X[3], jacobian);
+  return compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X[3], residual);
+}
+
+
+int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki,
+                            const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
+                            double* X0, double* nij) {
+  double norm, norm_ini, shapefunc[2], residual[4], jacobian[16];
+  bool converged(false);
+  int flagi = determine_flag(blocki);
+  int flagj = determine_flag(blockj);
+
+  norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
+  for (int iter = 0 ; iter < ITERMAX_NEWTON ; iter++) {
+    norm_ini = norm;
+
+    // Solve Newton step
+    int lapack_error, ipiv[16];
+    const int n = 4;
+    const char trans = 'N';
+    const int nrhs = 1;
+    double rhs[4] = {-residual[0], -residual[1], -residual[2], -residual[3]};
+    dgetrf_(&n, &n, jacobian, &n, ipiv, &lapack_error);
+    if (lapack_error)
+      return lapack_error;
+    dgetrs_(&trans, &n, &nrhs, jacobian, &n, ipiv, rhs, &n, &lapack_error);
+    if (lapack_error)
+      return lapack_error;
+
+    // Backtracking line search
+    double a(1.0), X_line[4];
+    int iter_ls;
+    for (iter_ls = 0 ; iter_ls < ITERMAX_LINESEARCH ; iter_ls++) {
+      X_line[0] = X0[0] + a * rhs[0];
+      X_line[1] = X0[1] + a * rhs[1];
+      X_line[2] = X0[2] + a * rhs[2];
+      X_line[3] = X0[3] + a * rhs[3];
+
+      // Line search iterates not selected for the next Newton iteration
+      // do not need to compute the expensive Jacobian, only the residual.
+      // We want to avoid calling `compute_residual_and_jacobian()` for each
+      // line search iterate.
+      // However, many intermediate variables that are costly to compute
+      // are shared by the local gradient and local hessian calculations.
+      // We want to avoid calling `compute_residual()` followed by `compute_jacobian()`
+      // for the iterates that satisfy the descent condition.
+      // To do so, we duplicate `compute_residual_and_jacobian()`, but only
+      // build the global hessians if the descent condition is satisfied and
+      // the iterate will be used in the next Newton step.
+      // This leads to some code duplication, and still computes
+      // the local hessians even when they are not necessary.
+      // This seems to be an acceptable in-between of performance and clean code.
+      // As most of the cost in the Hessian is in the 2 matrix products to
+      // Compute the global matrix from the local one
+
+      // One alternative would be to store the intermediate variables from
+      // the local gradient calculation when calling `shape_and_gradient_local()`,
+      // and re-use them during the local hessian calculation (function that 
+      // calculates only the Hessian from these intermediate values would need
+      // to be implemented).
+      // This seems a bit clunky just to save the few multiplications of the
+      // local hessian calculation, that is why I did not do it. I am open to
+      // other ideas and solutions.
+      // Even then, we would have some code duplication with `compute_residual_and_jacobian()`
+      // So maybe I am overthinking this...
+
+      double xilocal[3], gradi[3], hessi[3][3], xjlocal[3], gradj[3], hessj[3][3], tmp_v[3];
+
+      MathExtra::sub3(X_line, xci, tmp_v);
+      MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
+      shapefunc[0] = shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hessi);
+      MathExtra::matvec(Ri, tmp_v, gradi);
+
+      MathExtra::sub3(X_line, xcj, tmp_v);
+      MathExtra::transpose_matvec(Rj, tmp_v, xjlocal);
+      shapefunc[1] = shape_and_derivatives_local(xjlocal, shapej, blockj, flagj, tmp_v, hessj);
+      MathExtra::matvec(Rj, tmp_v, gradj);
+
+      norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual);
+
+      if (norm > norm_ini - PARAMETER_LINESEARCH * a * norm_ini) { // Armijo - Goldstein condition not met
+        a *= CUTBACK_LINESEARCH;
+      } else {
+        X0[0] = X_line[0];
+        X0[1] = X_line[1];
+        X0[2] = X_line[2];
+        X0[3] = X_line[3];
+        // Only compute the jacobian if there is another Newton iteration to come
+        if (norm > CONVERGENCE_NEWTON) {
+          double tmp_m[3][3];
+          MathExtra::times3_transpose(hessi, Ri, tmp_m);
+          MathExtra::times3(Ri, tmp_m, hessi);
+          MathExtra::times3_transpose(hessj, Rj, tmp_m);
+          MathExtra::times3(Rj, tmp_m, hessj);
+          compute_jacobian(gradi, hessi, gradj, hessj, X0[3], jacobian);
+        } else {
+          converged = true;
+          // TODO: consider testing picking the normal with the least error
+          //       i.e., likely the grain with the smallest curvature (Hessian norm)
+          //       or some other measure like average gradients.
+          //       right now we use the gradient on grain i for simplicity and performance. When testing, we could see if using  is just as good
+          MathExtra::normalize3(gradi, nij);
+        }
+        break;
+      }
+    }
+
+    // If no descent with line search, take full step, try to escape bad region
+    if (iter_ls == ITERMAX_LINESEARCH) {
+      X0[0] += rhs[0];
+      X0[1] += rhs[1];
+      X0[2] += rhs[2];
+      X0[3] += rhs[3];
+      norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
+    }
+
+    if (converged)
+      break;
+  }
+
+  // LAPACK error are within [-4, 4], use 5 non-touching, -5 non-converging
+  if (!converged)
+    return -5;
+  if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0)
+    return 5;
+
+  return 0;
+}
+
+
+int determine_flag(const double* block) {
+  const double EPSBLOCK(1e-3);
+  int flag(2);
+  if ((std::fabs(block[0] - 2) <= EPSBLOCK) && (std::fabs(block[1] - 2) <= EPSBLOCK))
+    flag = 0;
+  else if (std::fabs(block[0] - block[1]) <= EPSBLOCK)
+    flag = 1;
+  return flag;
+}
 
+} // namespace MathExtraSuperellipsoids
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 4044c76546f..de5604d5e68 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -54,6 +54,22 @@ namespace MathExtraSuperellipsoids {
                                       const double* center_distance_box1, const double* center_distance_box2,
                                       const double* a, const double* b);
 
+  // Jibril's versions of the functions for contact detection
+  double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]);
+  double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]);
+  double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]);
+  double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]);
+  double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]);
+  double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual);
+  void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian);
+  double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                                       const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
+                                       const double* X, double* shapefunc, double* residual, double* jacobian);
+  int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki,
+                              const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
+                              double* X0, double* nij);
+  int determine_flag(const double* block);
+
 };
 
 
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 6d60b518c3d..341ee0ffd26 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -35,18 +35,8 @@
 
 using namespace LAMMPS_NS;
 
-static constexpr int ITERMAX_NEWTON = 100;
-static constexpr double CONVERGENCE_NEWTON = 1e-6;
-static constexpr int ITERMAX_LINESEARCH = 10;
-static constexpr double PARAMETER_LINESEARCH = 1e-4;
-static constexpr double CUTBACK_LINESEARCH = 0.5;
-static constexpr int NUMSTEP_INITIAL_GUESS = 8;
-
-extern "C" { // General Matrices
-    void dgetrf_(const int *m, const int *n, double *a, const int *lda, int *ipiv, int *info); // Factorize
-    void dgetrs_(const char *trans, const int *n, const int *nrhs, double *a, const int *lda, int *ipiv, double *b, const int *ldb, int *info); // Solve (using factorzation)
-}
 
+static constexpr int NUMSTEP_INITIAL_GUESS = 8;
 
 /* ---------------------------------------------------------------------- */
 
@@ -236,7 +226,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           //       not sure if enough information to do that
           MathExtra::copy3(prev_cp, X0);
           X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
-          int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
+          int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
           if (status == 0)
             touching = true;
           else if(status == 5)
@@ -270,7 +260,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
               blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
               blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
             }
-            int status = determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
+            int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
             if (status == 0)
               touching = true;
             else if(status == 5)
@@ -927,299 +917,3 @@ double PairGranHookeHistoryEllipsoid::radii2cut(double r1, double r2)
   double cut = r1 + r2;
   return cut;
 }
-
-double PairGranHookeHistoryEllipsoid::shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
-  double shapefunc;
-  switch (flag) {
-    case 0: {
-      shapefunc = shape_and_derivatives_local_ellipsoid(xlocal, shape, grad, hess);
-      break;
-    }
-    case 1: {
-      shapefunc = shape_and_derivatives_local_n1equaln2(xlocal, shape, block[0], grad, hess);
-      break;
-    }
-    case 2: {
-      shapefunc = shape_and_derivatives_local_superquad(xlocal, shape, block, grad, hess);
-      break;
-    }
-  }
-  return shapefunc;
-}
-
-// General case for n1 != n2 > 2
-double PairGranHookeHistoryEllipsoid::shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]) {
-  double a_inv = 1.0 / shape[0];
-  double b_inv = 1.0 / shape[1];
-  double c_inv = 1.0 / shape[2];
-  double x_a = std::fabs(xlocal[0] * a_inv);
-  double y_b = std::fabs(xlocal[1] * b_inv);
-  double z_c = std::fabs(xlocal[2] * c_inv);
-  double n1 = block[0];
-  double n2 = block[1];
-  double x_a_pow_n2_m2 = std::pow(x_a, n2 - 2.0);
-  double x_a_pow_n2_m1 = x_a_pow_n2_m2 * x_a;
-  double y_b_pow_n2_m2 = std::pow(y_b, n2 - 2.0);
-  double y_b_pow_n2_m1 = y_b_pow_n2_m2 * y_b;
-
-  double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
-  double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0);
-  double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m1 * nu;
-
-  double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
-  double z_c_pow_n1_m1 = z_c_pow_n1_m2 / z_c;
-
-  // Equation (14)
-  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
-  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
-  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
-  grad[0] = n1 * a_inv * x_a_pow_n2_m1 * nu_pow_n1_n2_m1 * signx;
-  grad[1] = n1 * b_inv * y_b_pow_n2_m1 * nu_pow_n1_n2_m1 * signy;
-  grad[2] = n1 * c_inv * z_c_pow_n1_m1 * signz;
-
-  // Equation (15)
-  double signxy = signx * signy;
-  hess[0][0] = a_inv * a_inv * (n1 * (n2 - 1.0) * x_a_pow_n2_m2 * nu_pow_n1_n2_m1 +
-                                (n1 - n2) * n1 * (x_a_pow_n2_m1 * x_a_pow_n2_m1) * nu_pow_n1_n2_m2);
-  hess[1][1] = b_inv * b_inv * (n1 * (n2 - 1.0) * y_b_pow_n2_m2 * nu_pow_n1_n2_m1 +
-                                (n1 - n2) * n1 * (y_b_pow_n2_m1 * y_b_pow_n2_m1) * nu_pow_n1_n2_m2);
-  hess[0][1] = hess[1][0] = a_inv * b_inv * (n1 - n2) * n1 * x_a_pow_n2_m1 * y_b_pow_n2_m1 * nu_pow_n1_n2_m2 * signxy;
-  hess[2][2] = c_inv * c_inv * n1 * (n1 - 1.0) * z_c_pow_n1_m2;
-  hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
-
-  return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
-}
-
-// Special case for n2 = n2 = n > 2
-double PairGranHookeHistoryEllipsoid::shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]) {
-  double a_inv = 1.0 / shape[0];
-  double b_inv = 1.0 / shape[1];
-  double c_inv = 1.0 / shape[2];
-  double x_a = std::fabs(xlocal[0] * a_inv);
-  double y_b = std::fabs(xlocal[1] * b_inv);
-  double z_c = std::fabs(xlocal[2] * c_inv);
-  double x_a_pow_n_m2 = std::pow(x_a, n - 2.0);
-  double x_a_pow_n_m1 = x_a_pow_n_m2 * x_a;
-  double y_b_pow_n_m2 = std::pow(y_b, n - 2.0);
-  double y_b_pow_n_m1 = y_b_pow_n_m2 * y_b;
-  double z_c_pow_n_m2 = std::pow(z_c, n - 2.0);
-  double z_c_pow_n_m1 = z_c_pow_n_m2 * z_c;
-
-  // Equation (14)
-  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
-  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
-  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
-  grad[0] = n * a_inv * x_a_pow_n_m1 * signx;
-  grad[1] = n * b_inv * y_b_pow_n_m1 * signy;
-  grad[2] = n * c_inv * z_c_pow_n_m1 * signz;
-
-  // Equation (15)
-  double signxy = signx * signy;
-  hess[0][0] = a_inv * a_inv * n * (n - 1.0) * x_a_pow_n_m2;
-  hess[1][1] = b_inv * b_inv * n * (n - 1.0) * y_b_pow_n_m2;
-  hess[2][2] = c_inv * c_inv * n * (n - 1.0) * z_c_pow_n_m2;
-  hess[0][1] = hess[1][0] = hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
-
-  return (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c) - 1.0;
-}
-
-
-// Special case for n1 = n2 = 2
-double PairGranHookeHistoryEllipsoid::shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]) {
-  double a = 2.0 / (shape[0] * shape[0]);
-  double b = 2.0 / (shape[1] * shape[1]);
-  double c = 2.0 / (shape[2] * shape[2]);
-
-  // Equation (14) simplified for n1 = n2 = 2
-  grad[0] = a * xlocal[0];
-  grad[1] = b * xlocal[1];
-  grad[2] = c * xlocal[2];
-
-  // Equation (15)
-  hess[0][0] = a;
-  hess[1][1] = b;
-  hess[2][2] = c;
-  hess[0][1] = hess[1][0] = hess[0][2] = hess[2][0] = hess[1][2] = hess[2][1] = 0.0;
-
-  return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
-}
-
-
-double PairGranHookeHistoryEllipsoid::shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
-  double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
-  MathExtra::sub3(X0, xc, tmp_v);
-  MathExtra::transpose_matvec(R, tmp_v, xlocal);
-  shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
-  MathExtra::matvec(R, tmp_v, grad);
-  MathExtra::times3_transpose(hess, R, tmp_m);
-  MathExtra::times3(R, tmp_m, hess);
-  return shapefunc;
-}
-
-double PairGranHookeHistoryEllipsoid::compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual) {
-  // Equation (23)
-  MathExtra::scaleadd3(mu2, gradj_global, gradi_global, residual);
-  residual[3] = shapefunci - shapefuncj;
-  return residual[0]*residual[0] + residual[1]*residual[1] + residual[2]*residual[2] + residual[3]*residual[3];
-}
-
-void PairGranHookeHistoryEllipsoid::compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian) {
-  // Jacobian (derivative of residual)
-  // 1D column-major matrix for LAPACK/linalg compatibility
-  for (int row = 0 ; row < 3 ; row++) {
-    for (int col = 0 ; col < 3 ; col++) {
-      jacobian[row + col*4] = hessi_global[row][col] + mu2 * hessj_global[row][col];
-    }
-    jacobian[row + 3*4] = gradj_global[row];
-  }
-  for (int col = 0 ; col < 3 ; col++) {
-    jacobian[3 + col*4] = gradi_global[col] - gradj_global[col];
-  }
-  jacobian[15] = 0.0;
-}
-
-double PairGranHookeHistoryEllipsoid::compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
-                                                                    const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                                                                    const double* X, double* shapefunc, double* residual, double* jacobian) {
-  double gradi[3], hessi[3][3], gradj[3], hessj[3][3];
-  shapefunc[0] = shape_and_derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi);
-  shapefunc[1] = shape_and_derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj);
-  compute_jacobian(gradi, hessi, gradj, hessj, X[3], jacobian);
-  return compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X[3], residual);
-}
-
-
-int PairGranHookeHistoryEllipsoid::determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki,
-                                                           const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
-                                                           double* X0, double* nij) {
-  double norm, norm_ini, shapefunc[2], residual[4], jacobian[16];
-  bool converged(false);
-  int flagi = determine_flag(blocki);
-  int flagj = determine_flag(blockj);
-
-  norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
-  for (int iter = 0 ; iter < ITERMAX_NEWTON ; iter++) {
-    norm_ini = norm;
-
-    // Solve Newton step
-    int lapack_error, ipiv[16];
-    const int n = 4;
-    const char trans = 'N';
-    const int nrhs = 1;
-    double rhs[4] = {-residual[0], -residual[1], -residual[2], -residual[3]};
-    dgetrf_(&n, &n, jacobian, &n, ipiv, &lapack_error);
-    if (lapack_error)
-      return lapack_error;
-    dgetrs_(&trans, &n, &nrhs, jacobian, &n, ipiv, rhs, &n, &lapack_error);
-    if (lapack_error)
-      return lapack_error;
-
-    // Backtracking line search
-    double a(1.0), X_line[4];
-    int iter_ls;
-    for (iter_ls = 0 ; iter_ls < ITERMAX_LINESEARCH ; iter_ls++) {
-      X_line[0] = X0[0] + a * rhs[0];
-      X_line[1] = X0[1] + a * rhs[1];
-      X_line[2] = X0[2] + a * rhs[2];
-      X_line[3] = X0[3] + a * rhs[3];
-
-      // Line search iterates not selected for the next Newton iteration
-      // do not need to compute the expensive Jacobian, only the residual.
-      // We want to avoid calling `compute_residual_and_jacobian()` for each
-      // line search iterate.
-      // However, many intermediate variables that are costly to compute
-      // are shared by the local gradient and local hessian calculations.
-      // We want to avoid calling `compute_residual()` followed by `compute_jacobian()`
-      // for the iterates that satisfy the descent condition.
-      // To do so, we duplicate `compute_residual_and_jacobian()`, but only
-      // build the global hessians if the descent condition is satisfied and
-      // the iterate will be used in the next Newton step.
-      // This leads to some code duplication, and still computes
-      // the local hessians even when they are not necessary.
-      // This seems to be an acceptable in-between of performance and clean code.
-      // As most of the cost in the Hessian is in the 2 matrix products to
-      // Compute the global matrix from the local one
-
-      // One alternative would be to store the intermediate variables from
-      // the local gradient calculation when calling `shape_and_gradient_local()`,
-      // and re-use them during the local hessian calculation (function that 
-      // calculates only the Hessian from these intermediate values would need
-      // to be implemented).
-      // This seems a bit clunky just to save the few multiplications of the
-      // local hessian calculation, that is why I did not do it. I am open to
-      // other ideas and solutions.
-      // Even then, we would have some code duplication with `compute_residual_and_jacobian()`
-      // So maybe I am overthinking this...
-
-      double xilocal[3], gradi[3], hessi[3][3], xjlocal[3], gradj[3], hessj[3][3], tmp_v[3];
-
-      MathExtra::sub3(X_line, xci, tmp_v);
-      MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
-      shapefunc[0] = shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hessi);
-      MathExtra::matvec(Ri, tmp_v, gradi);
-
-      MathExtra::sub3(X_line, xcj, tmp_v);
-      MathExtra::transpose_matvec(Rj, tmp_v, xjlocal);
-      shapefunc[1] = shape_and_derivatives_local(xjlocal, shapej, blockj, flagj, tmp_v, hessj);
-      MathExtra::matvec(Rj, tmp_v, gradj);
-
-      norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual);
-
-      if (norm > norm_ini - PARAMETER_LINESEARCH * a * norm_ini) { // Armijo - Goldstein condition not met
-        a *= CUTBACK_LINESEARCH;
-      } else {
-        X0[0] = X_line[0];
-        X0[1] = X_line[1];
-        X0[2] = X_line[2];
-        X0[3] = X_line[3];
-        // Only compute the jacobian if there is another Newton iteration to come
-        if (norm > CONVERGENCE_NEWTON) {
-          double tmp_m[3][3];
-          MathExtra::times3_transpose(hessi, Ri, tmp_m);
-          MathExtra::times3(Ri, tmp_m, hessi);
-          MathExtra::times3_transpose(hessj, Rj, tmp_m);
-          MathExtra::times3(Rj, tmp_m, hessj);
-          compute_jacobian(gradi, hessi, gradj, hessj, X0[3], jacobian);
-        } else {
-          converged = true;
-          // TODO: consider testing picking the normal with the least error
-          //       i.e., likely the grain with the smallest curvature (Hessian norm)
-          //       or some other measure like average gradients.
-          //       right now we use the gradient on grain i for simplicity and performance. When testing, we could see if using  is just as good
-          MathExtra::normalize3(gradi, nij);
-        }
-        break;
-      }
-    }
-
-    // If no descent with line search, take full step, try to escape bad region
-    if (iter_ls == ITERMAX_LINESEARCH) {
-      X0[0] += rhs[0];
-      X0[1] += rhs[1];
-      X0[2] += rhs[2];
-      X0[3] += rhs[3];
-      norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
-    }
-
-    if (converged)
-      break;
-  }
-
-  // LAPACK error are within [-4, 4], use 5 non-touching, -5 non-converging
-  if (!converged)
-    return -5;
-  if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0)
-    return 5;
-
-  return 0;
-}
-
-int PairGranHookeHistoryEllipsoid::determine_flag(const double* block) {
-  const double EPSBLOCK(1e-3);
-  int flag(2);
-  if ((std::fabs(block[0] - 2) <= EPSBLOCK) && (std::fabs(block[1] - 2) <= EPSBLOCK))
-    flag = 0;
-  else if (std::fabs(block[0] - block[1]) <= EPSBLOCK)
-    flag = 1;
-  return flag;
-}
\ No newline at end of file
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 800c2dcc2a8..33764e67d68 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -71,17 +71,6 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   void allocate();
 
  private:
-  static double shape_and_derivatives_local(const double*, const double*, const double*, const int, double*, double[3][3]);
-  static double shape_and_derivatives_local_superquad(const double*, const double*, const double*, double*, double[3][3]);
-  static double shape_and_derivatives_local_n1equaln2(const double*, const double*, const double, double*, double[3][3]);
-  static double shape_and_derivatives_local_ellipsoid(const double*, const double*, double*, double[3][3]);
-  static double shape_and_derivatives_global(const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double[3][3]);
-
-  static double compute_residual(const double, const double*, const double, const double*, const double, double*);
-  static void compute_jacobian(const double*, const double[3][3], const double*, const double[3][3], const double, double* jacobian);
-  static double compute_residual_and_jacobian(const double*, const double[3][3], const double*, const double*, const int, const double*, const double[3][3], const double*, const double*, const int, const double*, double*, double*, double*);
-  static int determine_contact_point(const double*, const double[3][3], const double*, const double*, const double*, const double[3][3], const double*, const double*, double*, double*);
-  static int determine_flag(const double*);
 
   // Below not implemented. Placeholder if we decide not to compute local hessian in line search
   static double shape_and_gradient_local(const double*, const double*, const double*, double*); // would return a vector of temporary variables

From ad66804b909acfb02a3473afbb4ad240b31edf9e Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Sat, 22 Nov 2025 17:33:36 +0100
Subject: [PATCH 040/174] Implemented nonspehrical contact force logic for
 hooke law

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 115 ++++++++++--------
 1 file changed, 67 insertions(+), 48 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 341ee0ffd26..0c5560363db 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -120,7 +120,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double quat1, quat2, quat3, quat4;
   double block1, block2;
 
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1, overlap2;
   // TODO: Maybe we can make flag_super of the grain an int instead, to cimplify when n1 = n2 ?
   int flagi, flagj; // 0 : ellipsoid, 1 : equal exponents n1=n2, 2: general super-ellipsoid n1 >2, n2>2, n1!=n2
 
@@ -225,7 +225,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           // TODO: move contact point with rigid body motion of the pair ?
           //       not sure if enough information to do that
           MathExtra::copy3(prev_cp, X0);
-          X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
+          X0[3] = 1.0; // Lagrange multiplier mu^2 initially one (makes the Newton more stable in continued contact)
           int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
           if (status == 0)
             touching = true;
@@ -289,36 +289,56 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         rinv = 1.0 / r;
         rsqinv = 1.0 / rsq;
 
-        // relative translational velocity
+        // branch vectors 
+        double cr1[3], cr2[3];
+        MathExtra::sub3(X0, x[i], cr1);
+        MathExtra::sub3(X0, x[j], cr2);
 
-        vr1 = v[i][0] - v[j][0];
-        vr2 = v[i][1] - v[j][1];
-        vr3 = v[i][2] - v[j][2];
+        // we need to take the cross product of omega
+        double omega_cross_r1[3], omega_cross_r2[3];
+        MathExtra::cross3(omega[i], cr1, omega_cross_r1);
+        MathExtra::cross3(omega[j], cr2, omega_cross_r2);
+
+        // relative translational velocity 
+        // compute directly the sum of relative translational velocity at contact point
+        // since rotational velocity contribution is different for superellipsoids
+        double cv1[3], cv2[3];
+
+        cv1[0] = v[i][0] + omega_cross_r1[0];
+        cv1[1] = v[i][1] + omega_cross_r1[1];
+        cv1[2] = v[i][2] + omega_cross_r1[2];
+
+        cv2[0] = v[j][0] + omega_cross_r2[0];
+        cv2[1] = v[j][1] + omega_cross_r2[1];
+        cv2[2] = v[j][2] + omega_cross_r2[2];
+
+        // total relavtive velocity at contact point
+        vr1 = cv1[0] - cv2[0];
+        vr2 = cv1[1] - cv2[1];
+        vr3 = cv1[2] - cv2[2];
 
         // normal component
 
-        vnnr = vr1 * delx + vr2 * dely + vr3 * delz;
-        vn1 = delx * vnnr * rsqinv;
-        vn2 = dely * vnnr * rsqinv;
-        vn3 = delz * vnnr * rsqinv;
+        vn1 = nij[0] * vr1; // dot product 
+        vn2 = nij[1] * vr2;
+        vn3 = nij[2] * vr3;
 
-        // tangential component
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitu
 
-        vt1 = vr1 - vn1;
-        vt2 = vr2 - vn2;
-        vt3 = vr3 - vn3;
+        // tangential component
 
-        // relative rotational velocity
+        vtr1 = vr1 - vnnr * nij[0];
+        vtr2 = vr2 - vnnr * nij[1];
+        vtr3 = vr3 - vnnr * nij[2];
 
-        wr1 = (radi * omega[i][0] + radj * omega[j][0]) * rinv;
-        wr2 = (radi * omega[i][1] + radj * omega[j][1]) * rinv;
-        wr3 = (radi * omega[i][2] + radj * omega[j][2]) * rinv;
+        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+        vrel = sqrt(vrel);
 
         // meff = effective mass of pair of particles
         // if I or J part of rigid body, use body mass
         // if I or J is frozen, meff is other particle
 
-        mi = rmass[i];
+        mi = rmass[i]; // JB I assume this is the mass of particle i, need to check
         mj = rmass[j];
         if (fix_rigid) {
           if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
@@ -331,18 +351,10 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
         // normal forces = Hookian contact + normal velocity damping
 
-        damp = meff * gamman * vnnr * rsqinv;
-        ccel = kn * (radsum - r) * rinv - damp;
+        damp = meff * gamman * vnnr;
+        ccel = kn * (overlap1 + overlap2) - damp; // assuming we get the overlap depth
         if (limit_damping && (ccel < 0.0)) ccel = 0.0;
 
-        // relative velocities
-
-        vtr1 = vt1 - (delz * wr2 - dely * wr3);
-        vtr2 = vt2 - (delx * wr3 - delz * wr1);
-        vtr3 = vt3 - (dely * wr1 - delx * wr2);
-        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-        vrel = sqrt(vrel);
-
         // shear history effects
 
         touch[jj] = 1;
@@ -359,11 +371,10 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
           // rotate shear displacements
 
-          rsht = shear[0] * delx + shear[1] * dely + shear[2] * delz;
-          rsht *= rsqinv;
-          shear[0] -= rsht * delx;
-          shear[1] -= rsht * dely;
-          shear[2] -= rsht * delz;
+          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
+          shear[0] -= rsht * nij[0];
+          shear[1] -= rsht * nij[1];
+          shear[2] -= rsht * nij[2];
         }
 
         // tangential forces = shear + tangential velocity damping
@@ -375,7 +386,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // rescale frictional displacements and forces if needed
 
         fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-        fn = xmu * fabs(ccel * r);
+        fn = xmu * fabs(ccel);
 
         if (fs > fn) {
           if (shrmag != 0.0) {
@@ -394,33 +405,41 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
         // forces & torques
 
-        fx = delx * ccel + fs1;
-        fy = dely * ccel + fs2;
-        fz = delz * ccel + fs3;
-        fx *= factor_lj;
+        fx = nij[0] * ccel + fs1;
+        fy = nij[1] * ccel + fs2;
+        fz = nij[2] * ccel + fs3;
+        fx *= factor_lj; // I think factor lj is just 1 except for special bonds
         fy *= factor_lj;
         fz *= factor_lj;
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
 
-        tor1 = rinv * (dely * fs3 - delz * fs2);
-        tor2 = rinv * (delz * fs1 - delx * fs3);
-        tor3 = rinv * (delx * fs2 - dely * fs1);
+        // torques are cross prodcuts of branch vector with the entire force at contact point
+
+        tor1 = cr1[1] * fz - cr1[2] * fy;
+        tor2 = cr1[2] * fx - cr1[0] * fz;
+        tor3 = cr1[0] * fy - cr1[1] * fx; 
+
         tor1 *= factor_lj;
         tor2 *= factor_lj;
         tor3 *= factor_lj;
-        torque[i][0] -= radi * tor1;
-        torque[i][1] -= radi * tor2;
-        torque[i][2] -= radi * tor3;
+        torque[i][0] += tor1;
+        torque[i][1] += tor2;
+        torque[i][2] += tor3;
 
         if (newton_pair || j < nlocal) {
           f[j][0] -= fx;
           f[j][1] -= fy;
           f[j][2] -= fz;
-          torque[j][0] -= radj * tor1;
-          torque[j][1] -= radj * tor2;
-          torque[j][2] -= radj * tor3;
+          
+          tor1 = cr2[1] * fz - cr2[2] * fy;
+          tor2 = cr2[2] * fx - cr2[0] * fz;
+          tor3 = cr2[0] * fy - cr2[1] * fx; 
+
+          torque[j][0] -= tor1;
+          torque[j][1] -= tor2;
+          torque[j][2] -= tor3;
         }
 
         if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz);

From 5f36ceab8419a636db81c77c5c53da0afbb93359 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Sat, 22 Nov 2025 19:37:34 +0100
Subject: [PATCH 041/174] Implemented scaled gradient for newton rapson surface
 contact point

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 137 ++++++++++++++++++++-
 src/ASPHERE/math_extra_superellipsoids.h   |   5 +
 2 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 435f56a18d2..af88608db10 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -290,7 +290,7 @@ double shape_and_derivatives_local_superquad(const double* xlocal, const double*
 
   double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
   double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0);
-  double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m1 * nu;
+  double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m2 * nu;
 
   double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
   double z_c_pow_n1_m1 = z_c_pow_n1_m2 / z_c;
@@ -550,4 +550,139 @@ int determine_flag(const double* block) {
   return flag;
 }
 
+// Functions to compute shape function and gradient only when called for newton method
+// to avoid computing hessian when not needed and having smoother landscape for the line search
+// General case for n1 != n2 > 2
+double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad) {
+  double a_inv = 1.0 / shape[0];
+  double b_inv = 1.0 / shape[1];
+  double c_inv = 1.0 / shape[2];
+  double x_a = std::fabs(xlocal[0] * a_inv);
+  double y_b = std::fabs(xlocal[1] * b_inv);
+  double z_c = std::fabs(xlocal[2] * c_inv);
+  double n1 = block[0];
+  double n2 = block[1];
+  double x_a_pow_n2_m2 = std::pow(x_a, n2 - 2.0);
+  double x_a_pow_n2_m1 = x_a_pow_n2_m2 * x_a;
+  double y_b_pow_n2_m2 = std::pow(y_b, n2 - 2.0);
+  double y_b_pow_n2_m1 = y_b_pow_n2_m2 * y_b;
+
+  double nu = (x_a_pow_n2_m1 * x_a) + (y_b_pow_n2_m1 * y_b);
+  double nu_pow_n1_n2_m2 = std::pow(nu, n1/n2 - 2.0);
+  double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m2 * nu;
+
+  double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
+  double z_c_pow_n1_m1 = z_c_pow_n1_m2 / z_c;
+
+  // Equation (14)
+  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
+  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
+  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
+  grad[0] = n1 * a_inv * x_a_pow_n2_m1 * nu_pow_n1_n2_m1 * signx;
+  grad[1] = n1 * b_inv * y_b_pow_n2_m1 * nu_pow_n1_n2_m1 * signy;
+  grad[2] = n1 * c_inv * z_c_pow_n1_m1 * signz;
+
+  double F = (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c);
+
+  double scale_factor = std::pow(F, 1.0/n1 -1.0) / n1;
+
+  grad[0] *= scale_factor;
+  grad[1] *= scale_factor;
+  grad[2] *= scale_factor;
+
+  return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
+}
+
+// Special case for n2 = n2 = n > 2
+double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad) {
+  double a_inv = 1.0 / shape[0];
+  double b_inv = 1.0 / shape[1];
+  double c_inv = 1.0 / shape[2];
+  double x_a = std::fabs(xlocal[0] * a_inv);
+  double y_b = std::fabs(xlocal[1] * b_inv);
+  double z_c = std::fabs(xlocal[2] * c_inv);
+  double x_a_pow_n_m2 = std::pow(x_a, n - 2.0);
+  double x_a_pow_n_m1 = x_a_pow_n_m2 * x_a;
+  double y_b_pow_n_m2 = std::pow(y_b, n - 2.0);
+  double y_b_pow_n_m1 = y_b_pow_n_m2 * y_b;
+  double z_c_pow_n_m2 = std::pow(z_c, n - 2.0);
+  double z_c_pow_n_m1 = z_c_pow_n_m2 * z_c;
+
+  // Equation (14)
+  double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
+  double signy = xlocal[1] > 0.0 ? 1.0 : -1.0;
+  double signz = xlocal[2] > 0.0 ? 1.0 : -1.0;
+  grad[0] = n * a_inv * x_a_pow_n_m1 * signx;
+  grad[1] = n * b_inv * y_b_pow_n_m1 * signy;
+  grad[2] = n * c_inv * z_c_pow_n_m1 * signz;
+
+  double F = (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c);
+  double scale_factor = std::pow(F, 1.0/n -1.0) / n;
+
+  grad[0] *= scale_factor;
+  grad[1] *= scale_factor;
+  grad[2] *= scale_factor;
+
+  return (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c) - 1.0;
+}
+
+
+// Special case for n1 = n2 = 2
+double stable_shape_and_gradients_local_ellipsoid(const double* xlocal, const double* shape, double* grad) {
+  double a = 2.0 / (shape[0] * shape[0]);
+  double b = 2.0 / (shape[1] * shape[1]);
+  double c = 2.0 / (shape[2] * shape[2]);
+
+  // Equation (14) simplified for n1 = n2 = 2
+  grad[0] = a * xlocal[0];
+  grad[1] = b * xlocal[1];
+  grad[2] = c * xlocal[2];
+
+  return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
+}
+
+// Newton Rapson method to find the surface point from the contact point given the normal
+// TODO : implement this function
+// void find_surface_point(
+//   const double* shape, const double* blockiness, const double* quat,
+//   const double* global_point, const double* global_normal) {
+//   double local_point[3], local_normal[3];
+//   global2local_vector(global_point, quat, local_point);
+//   global2local_vector(global_normal, quat, local_normal);
+//   double overlap = 0.0;
+//   double tol = 1e-8;
+//   unsigned int max_iter = 100;
+//   double local_f;
+//   double local_grad[3];
+    
+
+//   for (unsigned int iter = 0; iter < max_iter; iter++) {
+    
+//     if (blockiness[0] == 2.0 && blockiness[1] == 2.0) {
+//       local_f = stable_shape_and_gradients_local_ellipsoid(local_point, shape, local_grad);
+//     } else if (std::fabs(blockiness[0] - blockiness[1]) < 1e-3) {
+//       local_f = stable_shape_and_gradient_local_n1equaln2(local_point, shape, blockiness[0], local_grad);
+//     } else {
+//       local_f = stable_shape_and_gradient_local_superquad(local_point, shape, blockiness,  local_grad);
+//     }
+
+//     if (std::fabs(local_f) < tol) {
+//       break;
+//     }
+
+
+    
+//     double denom = MathExtra::dot3(local_grad, local_normal);
+//     if (std::fabs(denom) < 1e-12) {
+//       // Avoid division by zero
+//       break;
+//     }
+//     double delta = local_f / denom;
+//     for (int i = 0; i < 3; i++) {
+//       local_point[i] -= delta * local_normal[i];
+//     }
+//   }
+
+
+
 } // namespace MathExtraSuperellipsoids
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index de5604d5e68..c5f9af27325 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -69,6 +69,11 @@ namespace MathExtraSuperellipsoids {
                               const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
                               double* X0, double* nij);
   int determine_flag(const double* block);
+ 
+  // functions to compute shape function and gradient only when called for newton method
+  double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad);
+  double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad);
+  double stable_shape_and_gradient_local_ellipsoid(const double* xlocal, const double* shape, double* grad);
 
 };
 

From dc857992271f03d3a9ce3434f43a6dbbcb39d322 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Sat, 22 Nov 2025 21:05:24 +0100
Subject: [PATCH 042/174] Added newton-rapson for surface point detection.
 Analytical solution for ellipsoids

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 112 ++++++++++++++-------
 src/ASPHERE/math_extra_superellipsoids.h   |   1 +
 2 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index af88608db10..474e9c4d0a8 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -641,48 +641,88 @@ double stable_shape_and_gradients_local_ellipsoid(const double* xlocal, const do
   return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
 }
 
-// Newton Rapson method to find the surface point from the contact point given the normal
-// TODO : implement this function
-// void find_surface_point(
-//   const double* shape, const double* blockiness, const double* quat,
-//   const double* global_point, const double* global_normal) {
-//   double local_point[3], local_normal[3];
-//   global2local_vector(global_point, quat, local_point);
-//   global2local_vector(global_normal, quat, local_normal);
-//   double overlap = 0.0;
-//   double tol = 1e-8;
-//   unsigned int max_iter = 100;
-//   double local_f;
-//   double local_grad[3];
-    
+// Newton Rapson method to find the overlap distance from the contact point given the normal
+void find_overlap_distance(
+  const double* shape, const double* block, const double* quat,
+  const double* global_point, const double* global_normal, double& overlap)
+   {
+  double local_point[3], local_normal[3];
+  global2local_vector(global_point, quat, local_point);
+  global2local_vector(global_normal, quat, local_normal);
+  double local_f;
+  double local_grad[3];
+  
+  // elliposid analytical solution, might need to double check the math 
+  // there is an easy way to find this by parametrizing the straight line as
+  // X0 + t * n anf then substituting in the ellipsoid equation  for x, y, z
+  // this results in a quadratic equation and we take the positive solution since
+  // we are taking the outward facing normal for each grain
 
-//   for (unsigned int iter = 0; iter < max_iter; iter++) {
-    
-//     if (blockiness[0] == 2.0 && blockiness[1] == 2.0) {
-//       local_f = stable_shape_and_gradients_local_ellipsoid(local_point, shape, local_grad);
-//     } else if (std::fabs(blockiness[0] - blockiness[1]) < 1e-3) {
-//       local_f = stable_shape_and_gradient_local_n1equaln2(local_point, shape, blockiness[0], local_grad);
-//     } else {
-//       local_f = stable_shape_and_gradient_local_superquad(local_point, shape, blockiness,  local_grad);
-//     }
+  if (block[0] == 2.0 && block[1] == 2.0){
+
+    double a_inv2 = 1.0 / (shape[0] * shape[0]);
+    double b_inv2 = 1.0 / (shape[1] * shape[1]);
+    double c_inv2 = 1.0 / (shape[2] * shape[2]);
+
+    // Coefficients for At^2 + Bt + C = 0
+    double A = (local_normal[0] * local_normal[0] * a_inv2) +
+               (local_normal[1] * local_normal[1] * b_inv2) +
+               (local_normal[2] * local_normal[2] * c_inv2);
+
+    double B = 2.0 * ( (local_point[0] * local_normal[0] * a_inv2) +
+                     (local_point[1] * local_normal[1] * b_inv2) +
+                     (local_point[2] * local_normal[2] * c_inv2) );
+
+    double C = (local_point[0] * local_point[0] * a_inv2) +
+               (local_point[1] * local_point[1] * b_inv2) +
+               (local_point[2] * local_point[2] * c_inv2) - 1.0;
+
+    // Discriminant
+    double delta = B*B - 4.0*A*C;
 
-//     if (std::fabs(local_f) < tol) {
-//       break;
-//     }
+    // Clamp delta to zero just in case numerical noise makes it negative
+    if (delta < 0.0) delta = 0.0; 
+    double t = (-B + std::sqrt(delta)) / (2.0 * A);
 
 
+    } else {
+      // --- Superquadric Case (Newton-Raphson on Distance Estimator) ---
     
-//     double denom = MathExtra::dot3(local_grad, local_normal);
-//     if (std::fabs(denom) < 1e-12) {
-//       // Avoid division by zero
-//       break;
-//     }
-//     double delta = local_f / denom;
-//     for (int i = 0; i < 3; i++) {
-//       local_point[i] -= delta * local_normal[i];
-//     }
-//   }
+    double t = 0.0; // Distance along the normal
+    double current_p[3];
+    double val;
+    double tol = 1e-8;
+    unsigned int max_iter = 20;
+    
+    for (unsigned int iter = 0; iter < max_iter; iter++) {
+        // Update current search position: P = Start + t * Normal
+        current_p[0] = local_point[0] + t * local_normal[0];
+        current_p[1] = local_point[1] + t * local_normal[1];
+        current_p[2] = local_point[2] + t * local_normal[2];
+
+        // Calculate Distance Estimator value and Gradient
+        if (std::fabs(block[0] - block[1]) < 1e-6) {
+            val = stable_shape_and_gradient_local_n1equaln2(current_p, shape, block[0], local_grad);
+        } else {
+            val = stable_shape_and_gradient_local_superquad(current_p, shape, block, local_grad);
+        }
+
+        // Convergence Check
+        if (std::fabs(val) < tol) break;
 
+        // Newton Step
+        double slope = local_grad[0] * local_normal[0] + 
+                       local_grad[1] * local_normal[1] + 
+                       local_grad[2] * local_normal[2];
 
+        // Safety check to prevent divide-by-zero if ray grazes surface
+        if (std::fabs(slope) < 1e-12) break;
+
+        t -= val / slope;
+    }
+    
+    overlap = t;
+  }
+} 
 
 } // namespace MathExtraSuperellipsoids
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index c5f9af27325..28ef1b72d9c 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -74,6 +74,7 @@ namespace MathExtraSuperellipsoids {
   double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad);
   double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad);
   double stable_shape_and_gradient_local_ellipsoid(const double* xlocal, const double* shape, double* grad);
+  void find_overlap_distance(const double* shape, const double* block, const double* quat, const double* global_point, const double* global_normal, double& overlap);
 
 };
 

From 2191b43e33dfd487f285c2c93673c33742eb45b6 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Sat, 22 Nov 2025 22:28:03 +0100
Subject: [PATCH 043/174] Corrected contact detection loop, added check for obb
 if aspect ratio is high

---
 src/ASPHERE/math_extra_superellipsoids.cpp    |  14 +-
 src/ASPHERE/math_extra_superellipsoids.h      |   2 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     | 147 ++++++++++--------
 3 files changed, 96 insertions(+), 67 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 474e9c4d0a8..4e5d1ca2307 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -642,13 +642,17 @@ double stable_shape_and_gradients_local_ellipsoid(const double* xlocal, const do
 }
 
 // Newton Rapson method to find the overlap distance from the contact point given the normal
-void find_overlap_distance(
-  const double* shape, const double* block, const double* quat,
-  const double* global_point, const double* global_normal, double& overlap)
+void compute_overlap_distance(
+  const double* shape, const double* block, const double Rot[3][3],
+  const double* global_point, const double* global_normal,
+  const double* center, double& overlap)
    {
   double local_point[3], local_normal[3];
-  global2local_vector(global_point, quat, local_point);
-  global2local_vector(global_normal, quat, local_normal);
+  double del[3];
+  MathExtra::sub3(global_point, center, del);  // bring origin to 0.0
+  MathExtra::transpose_matvec(Rot, del, local_point); 
+  MathExtra::transpose_matvec(Rot, global_normal, local_normal);
+  
   double local_f;
   double local_grad[3];
   
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 28ef1b72d9c..612a4e7b69e 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -74,7 +74,7 @@ namespace MathExtraSuperellipsoids {
   double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad);
   double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad);
   double stable_shape_and_gradient_local_ellipsoid(const double* xlocal, const double* shape, double* grad);
-  void find_overlap_distance(const double* shape, const double* block, const double* quat, const double* global_point, const double* global_normal, double& overlap);
+  void compute_overlap_distance(const double* shape, const double* block, const double Rot[3][3], const double* global_point, const double* global_normal, const double* center, double& overlap);
 
 };
 
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 0c5560363db..9ec5c200952 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -113,7 +113,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double fn, fs, fs1, fs2, fs3;
   double shrmag, rsht;
   int *ilist, *jlist, *numneigh, **firstneigh;
-  int *touch, **firsttouch;
+  int *touch, **firsttouch, *cached_axis_index; // added cached axis index for bounding box check
   double *shear, *allshear, **firstshear, *prev_cp; // added previous contact point placeholder
 
   double shapex, shapey, shapez; // ellipsoid shape params
@@ -203,29 +203,87 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
       radj = radius[j];
       radsum = radi + radj;
 
-      bool touching;
+      bool touching = true;
       if (rsq >= radsum * radsum) {
         touching = false;
-      // TODO: consider implementing a bounding-box check for hierchical detection
-      //       Could be useful for high aspect ratio grain.
-      //       Maybe make it an option, since it could be slower for low aspect ratio grains
+      }
+      else {    
+         // compute aspect ratios, if they are not that different from zero skip
+         // to the newton rapson, else do the bounding box
+         MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+         MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+         MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+         MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+
+         double min_dim_i = std::fmin(shapei[0], std::fmin(shapei[1], shapei[2]));
+         double min_dim_j = std::fmin(shapej[0], std::fmin(shapej[1], shapej[2]));
+         double max_dim_i = std::fmax(shapei[0], std::fmax(shapei[1], shapei[2]));
+         double max_dim_j = std::fmax(shapej[0], std::fmax(shapej[1], shapej[2]));
+
+         double ar_i = max_dim_i / min_dim_i; 
+         double ar_j = max_dim_j / min_dim_j;
+
+         // I put an arbitrary value for when to skip the bounding boxes
+         // this might need testing
+         bool high_aspect_ratio = (ar_i > 1.5 || ar_j > 1.5); 
+         MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+         // TODO: Not sure if j is accessible if ghost, radius is, so bonus props must have been communicated on ghost atoms I think
+         MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+          
+         if (high_aspect_ratio){
+          // check the bounding box
+          bool obb_separate = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+            x[i], Ri, shapei, x[j], Rj, shapej, *cached_axis_index);
+          
+          if (obb_separate) {
+            touching = false;
+          }
+        }
+      }
+
+      // Super-ellipsoid contact detection between atoms i and j
+      if (touch[jj] == 1  && touching) {
+        // Continued contact: use grain true shape and last contact point
+        // TODO: implement neigh history!
+        // TODO: move contact point with rigid body motion of the pair ?
+        //       not sure if enough information to do that
+        MathExtra::copy3(prev_cp, X0);
+        X0[3] = 1.0; // Lagrange multiplier mu^2 initially one (makes the Newton more stable in continued contact)
+        int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
+        if (status == 0)
+          touching = true;
+        else if(status == 5)
+          touching = false;
+        else
+          error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
       } else {
-        // Super-ellipsoid contact detection between atoms i and j
-        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-        // TODO: Not sure if j is accessible if ghost, radius is, so bonus props must have been communicated on ghost atoms I think
-        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-
-        if (touch[jj] == 1) {
-          // Continued contact: use grain true shape and last contact point
-          MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-          MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-          MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-          MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-          // TODO: implement neigh history!
-          // TODO: move contact point with rigid body motion of the pair ?
-          //       not sure if enough information to do that
-          MathExtra::copy3(prev_cp, X0);
-          X0[3] = 1.0; // Lagrange multiplier mu^2 initially one (makes the Newton more stable in continued contact)
+        if (touching){
+        // New contact: Build initial guess incrementally
+        // TODO: there might be better heuristic for the "volume equivalent spheres" suggested in the paper
+        //       but this is good enough. We might even be able to use radi and radj which is cheaper, TBD when testing
+        //       If we pick a small radius, we could guaranteed to start outise the grains, would that be better for the Newton?
+        //       If we pick a large radius (e.g. radi, radj) we are more likely to start inside the grains, is this an easier minimization landscape to navigate?
+        //       I don't think there is a general answer because we don't know the shape, and contact point may be far from spherical initial guess
+        //       This makes me think using radi and radj could be fine! To be investigated
+        double reqi = std::cbrt(bonus[ellipsoid[i]].shape[0] * bonus[ellipsoid[i]].shape[1] * bonus[ellipsoid[i]].shape[2]);
+        double reqj = std::cbrt(bonus[ellipsoid[j]].shape[0] * bonus[ellipsoid[j]].shape[1] * bonus[ellipsoid[j]].shape[2]);
+        MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+        //   MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+        for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+          X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
+          double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+          shapei[0] = shapei[1] = shapei[2] = reqi;
+          shapej[0] = shapej[1] = shapej[2] = reqj;
+          MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+          MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+          if (bonus[ellipsoid[i]].flag_super) { // not a big time save
+            blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+            blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+          }
+          if (bonus[ellipsoid[j]].flag_super) {
+            blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+            blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+          }
           int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
           if (status == 0)
             touching = true;
@@ -233,40 +291,6 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
             touching = false;
           else
             error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
-        } else {
-          // New contact: Build initial guess incrementally
-          // TODO: there might be better heuristic for the "volume equivalent spheres" suggested in the paper
-          //       but this is good enough. We might even be able to use radi and radj which is cheaper, TBD when testing
-          //       If we pick a small radius, we could guaranteed to start outise the grains, would that be better for the Newton?
-          //       If we pick a large radius (e.g. radi, radj) we are more likely to start inside the grains, is this an easier minimization landscape to navigate?
-          //       I don't think there is a general answer because we don't know the shape, and contact point may be far from spherical initial guess
-          //       This makes me think using radi and radj could be fine! To be investigated
-          double reqi = std::cbrt(bonus[ellipsoid[i]].shape[0] * bonus[ellipsoid[i]].shape[1] * bonus[ellipsoid[i]].shape[2]);
-          double reqj = std::cbrt(bonus[ellipsoid[j]].shape[0] * bonus[ellipsoid[j]].shape[1] * bonus[ellipsoid[j]].shape[2]);
-          MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-          //   MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-          for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
-            X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
-            double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-            shapei[0] = shapei[1] = shapei[2] = reqi;
-            shapej[0] = shapej[1] = shapej[2] = reqj;
-            MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-            MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-            if (bonus[ellipsoid[i]].flag_super) { // not a big time save
-              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-            }
-            if (bonus[ellipsoid[j]].flag_super) {
-              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-            }
-            int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
-            if (status == 0)
-              touching = true;
-            else if(status == 5)
-              touching = false;
-            else
-              error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
           }
         }
       }
@@ -283,11 +307,12 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // TODO: Compute the force between the 2 superquadrics
         MathExtra::copy3(X0, prev_cp);
 
-        // TODO: Everything below must be changed
-
-        r = sqrt(rsq);
-        rinv = 1.0 / r;
-        rsqinv = 1.0 / rsq;
+        double nji[3] = { -nij[0], -nij[1], -nij[2] };
+        // compute overlap depth along normal direction for each grain
+        // overlap is positive for both grains
+        overlap1 = 0.0, overlap2 = 0.0;
+        MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, X0, nij, x[i], overlap1);
+        MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, X0, nji, x[j], overlap2);
 
         // branch vectors 
         double cr1[3], cr2[3];
@@ -442,7 +467,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           torque[j][2] -= tor3;
         }
 
-        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz);
+        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // need to check this, it is for virial stress, but needs some tweaking for non-spherical particles
       }
     }
   }

From e94b09dd2b3acbfd6d851a1757182c6bf060c354 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 23 Nov 2025 19:06:57 -0600
Subject: [PATCH 044/174] small cleanup on overlap calculation

---
 src/ASPHERE/math_extra_superellipsoids.cpp    | 69 +++++++++----------
 src/ASPHERE/math_extra_superellipsoids.h      |  2 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     |  7 +-
 3 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 4e5d1ca2307..f509e902f76 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -39,6 +39,9 @@ static constexpr double CONVERGENCE_NEWTON = 1e-6;
 static constexpr int ITERMAX_LINESEARCH = 10;
 static constexpr double PARAMETER_LINESEARCH = 1e-4;
 static constexpr double CUTBACK_LINESEARCH = 0.5;
+static constexpr double CONVERGENCE_OVERLAP = 1e-8;
+static constexpr unsigned int ITERMAX_OVERLAP = 20;
+static constexpr double MINSLOPE_OVERLAP = 1e-12;
 
 /* ----------------------------------------------------------------------
    beta function B(x,y) = Gamma(x) * Gamma(y) / Gamma(x+y)
@@ -642,13 +645,13 @@ double stable_shape_and_gradients_local_ellipsoid(const double* xlocal, const do
 }
 
 // Newton Rapson method to find the overlap distance from the contact point given the normal
-void compute_overlap_distance(
-  const double* shape, const double* block, const double Rot[3][3],
+double compute_overlap_distance(
+  const double* shape, const double* block, const double Rot[3][3], const int flag,
   const double* global_point, const double* global_normal,
-  const double* center, double& overlap)
-   {
+  const double* center) {
   double local_point[3], local_normal[3];
   double del[3];
+  double overlap;
   MathExtra::sub3(global_point, center, del);  // bring origin to 0.0
   MathExtra::transpose_matvec(Rot, del, local_point); 
   MathExtra::transpose_matvec(Rot, global_normal, local_normal);
@@ -662,7 +665,7 @@ void compute_overlap_distance(
   // this results in a quadratic equation and we take the positive solution since
   // we are taking the outward facing normal for each grain
 
-  if (block[0] == 2.0 && block[1] == 2.0){
+  if (flag == 0){
 
     double a_inv2 = 1.0 / (shape[0] * shape[0]);
     double b_inv2 = 1.0 / (shape[1] * shape[1]);
@@ -686,47 +689,41 @@ void compute_overlap_distance(
 
     // Clamp delta to zero just in case numerical noise makes it negative
     if (delta < 0.0) delta = 0.0; 
-    double t = (-B + std::sqrt(delta)) / (2.0 * A);
-
-
-    } else {
+    overlap = (-B + std::sqrt(delta)) / (2.0 * A);
+  } else {
       // --- Superquadric Case (Newton-Raphson on Distance Estimator) ---
     
-    double t = 0.0; // Distance along the normal
+    overlap = 0.0; // Distance along the normal
     double current_p[3];
     double val;
-    double tol = 1e-8;
-    unsigned int max_iter = 20;
-    
-    for (unsigned int iter = 0; iter < max_iter; iter++) {
-        // Update current search position: P = Start + t * Normal
-        current_p[0] = local_point[0] + t * local_normal[0];
-        current_p[1] = local_point[1] + t * local_normal[1];
-        current_p[2] = local_point[2] + t * local_normal[2];
-
-        // Calculate Distance Estimator value and Gradient
-        if (std::fabs(block[0] - block[1]) < 1e-6) {
-            val = stable_shape_and_gradient_local_n1equaln2(current_p, shape, block[0], local_grad);
-        } else {
-            val = stable_shape_and_gradient_local_superquad(current_p, shape, block, local_grad);
-        }
+    for (unsigned int iter = 0; iter < ITERMAX_OVERLAP; iter++) {
+      // Update current search position: P = Start + t * Normal
+      current_p[0] = local_point[0] + overlap * local_normal[0];
+      current_p[1] = local_point[1] + overlap * local_normal[1];
+      current_p[2] = local_point[2] + overlap * local_normal[2];
+
+      // Calculate Distance Estimator value and Gradient
+      if (flag == 1) {
+        val = stable_shape_and_gradient_local_n1equaln2(current_p, shape, block[0], local_grad);
+      } else {
+        val = stable_shape_and_gradient_local_superquad(current_p, shape, block, local_grad);
+      }
 
-        // Convergence Check
-        if (std::fabs(val) < tol) break;
+      // Convergence Check
+      if (std::fabs(val) < CONVERGENCE_OVERLAP) break;
 
-        // Newton Step
-        double slope = local_grad[0] * local_normal[0] + 
-                       local_grad[1] * local_normal[1] + 
-                       local_grad[2] * local_normal[2];
+      // Newton Step
+      double slope = local_grad[0] * local_normal[0] +
+                     local_grad[1] * local_normal[1] +
+                     local_grad[2] * local_normal[2];
 
-        // Safety check to prevent divide-by-zero if ray grazes surface
-        if (std::fabs(slope) < 1e-12) break;
+      // Safety check to prevent divide-by-zero if ray grazes surface
+      if (std::fabs(slope) < MINSLOPE_OVERLAP) break;
 
-        t -= val / slope;
+      overlap -= val / slope;
     }
-    
-    overlap = t;
   }
+  return overlap;
 } 
 
 } // namespace MathExtraSuperellipsoids
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 612a4e7b69e..b1632736500 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -74,7 +74,7 @@ namespace MathExtraSuperellipsoids {
   double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad);
   double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad);
   double stable_shape_and_gradient_local_ellipsoid(const double* xlocal, const double* shape, double* grad);
-  void compute_overlap_distance(const double* shape, const double* block, const double Rot[3][3], const double* global_point, const double* global_normal, const double* center, double& overlap);
+  double compute_overlap_distance(const double* shape, const double* block, const double Rot[3][3], const int flag, const double* global_point, const double* global_normal, const double* center);
 
 };
 
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 9ec5c200952..8539f52a887 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -241,6 +241,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         }
       }
 
+      flagi = MathExtraSuperellipsoids::determine_flag(blocki); // TODO: actually pass those and use them in contact point instead of recomputing
+      flagj = MathExtraSuperellipsoids::determine_flag(blockj); // TODO: actually pass those and use them in contact point instead of recomputing
       // Super-ellipsoid contact detection between atoms i and j
       if (touch[jj] == 1  && touching) {
         // Continued contact: use grain true shape and last contact point
@@ -310,9 +312,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         double nji[3] = { -nij[0], -nij[1], -nij[2] };
         // compute overlap depth along normal direction for each grain
         // overlap is positive for both grains
-        overlap1 = 0.0, overlap2 = 0.0;
-        MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, X0, nij, x[i], overlap1);
-        MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, X0, nji, x[j], overlap2);
+        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]); // TODO: Jibril: I wonder if we'd get the correct, but negative overlap if we picked nji, which might be cheaper than computing nji
 
         // branch vectors 
         double cr1[3], cr2[3];

From 83a493ef337f11ca4965a2ade66a9895b7ca9c43 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 23 Nov 2025 20:33:56 -0600
Subject: [PATCH 045/174] fix more small bugs

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index f509e902f76..ab8018f1804 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -296,7 +296,7 @@ double shape_and_derivatives_local_superquad(const double* xlocal, const double*
   double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m2 * nu;
 
   double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
-  double z_c_pow_n1_m1 = z_c_pow_n1_m2 / z_c;
+  double z_c_pow_n1_m1 = z_c_pow_n1_m2 * z_c;
 
   // Equation (14)
   double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;
@@ -575,7 +575,7 @@ double stable_shape_and_gradient_local_superquad(const double* xlocal, const dou
   double nu_pow_n1_n2_m1 = nu_pow_n1_n2_m2 * nu;
 
   double z_c_pow_n1_m2 = std::pow(z_c, n1 -2.0);
-  double z_c_pow_n1_m1 = z_c_pow_n1_m2 / z_c;
+  double z_c_pow_n1_m1 = z_c_pow_n1_m2 * z_c;
 
   // Equation (14)
   double signx = xlocal[0] > 0.0 ? 1.0 : -1.0;

From 8bff2261cd6d2bcadc424af29c82a482b6f80faf Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 23 Nov 2025 20:38:12 -0600
Subject: [PATCH 046/174] pass flags for grain type to
 determine_contact_point()

---
 src/ASPHERE/math_extra_superellipsoids.cpp         | 6 ++----
 src/ASPHERE/math_extra_superellipsoids.h           | 4 ++--
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 8 ++++----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index ab8018f1804..40b0b7d67b6 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -417,13 +417,11 @@ double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], c
 }
 
 
-int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki,
-                            const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
+int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                            const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
                             double* X0, double* nij) {
   double norm, norm_ini, shapefunc[2], residual[4], jacobian[16];
   bool converged(false);
-  int flagi = determine_flag(blocki);
-  int flagj = determine_flag(blockj);
 
   norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
   for (int iter = 0 ; iter < ITERMAX_NEWTON ; iter++) {
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index b1632736500..f8aa481c845 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -65,8 +65,8 @@ namespace MathExtraSuperellipsoids {
   double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                                        const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
                                        const double* X, double* shapefunc, double* residual, double* jacobian);
-  int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki,
-                              const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj,
+  int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                              const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
                               double* X0, double* nij);
   int determine_flag(const double* block);
  
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 8539f52a887..e23484d8c65 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -241,8 +241,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         }
       }
 
-      flagi = MathExtraSuperellipsoids::determine_flag(blocki); // TODO: actually pass those and use them in contact point instead of recomputing
-      flagj = MathExtraSuperellipsoids::determine_flag(blockj); // TODO: actually pass those and use them in contact point instead of recomputing
+      flagi = MathExtraSuperellipsoids::determine_flag(blocki);
+      flagj = MathExtraSuperellipsoids::determine_flag(blockj);
       // Super-ellipsoid contact detection between atoms i and j
       if (touch[jj] == 1  && touching) {
         // Continued contact: use grain true shape and last contact point
@@ -251,7 +251,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         //       not sure if enough information to do that
         MathExtra::copy3(prev_cp, X0);
         X0[3] = 1.0; // Lagrange multiplier mu^2 initially one (makes the Newton more stable in continued contact)
-        int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
+        int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij);
         if (status == 0)
           touching = true;
         else if(status == 5)
@@ -286,7 +286,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
             blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
             blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
           }
-          int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, x[j], Rj, shapej, blockj, X0, nij);
+          int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij);
           if (status == 0)
             touching = true;
           else if(status == 5)

From 08f2ff00a66cf47ff8f8981e8014f94c41937c56 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 23 Nov 2025 21:02:08 -0600
Subject: [PATCH 047/174] compue omega from angmom. avec_ellipsoid does not
 define omega

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index e23484d8c65..a293ae773b4 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -120,7 +120,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double quat1, quat2, quat3, quat4;
   double block1, block2;
 
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1, overlap2;
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1, overlap2, omegai[3], omegaj[3];
   // TODO: Maybe we can make flag_super of the grain an int instead, to cimplify when n1 = n2 ?
   int flagi, flagj; // 0 : ellipsoid, 1 : equal exponents n1=n2, 2: general super-ellipsoid n1 >2, n2>2, n1!=n2
 
@@ -154,7 +154,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
-  double **omega = atom->omega;
+  double **angmom = atom->angmom;
   double **torque = atom->torque;
   double *radius = atom->radius;
   double *rmass = atom->rmass;
@@ -227,9 +227,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
          // this might need testing
          bool high_aspect_ratio = (ar_i > 1.5 || ar_j > 1.5); 
          MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-         // TODO: Not sure if j is accessible if ghost, radius is, so bonus props must have been communicated on ghost atoms I think
          MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-          
+
          if (high_aspect_ratio){
           // check the bounding box
           bool obb_separate = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
@@ -321,9 +320,18 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::sub3(X0, x[j], cr2);
 
         // we need to take the cross product of omega
+
+        double ex_space[3],ey_space[3],ez_space[3];
+        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat,ex_space,ey_space,ez_space);
+        MathExtra::angmom_to_omega(angmom[i],ex_space,ey_space,ez_space,
+                                   bonus[ellipsoid[i]].inertia,omegai);
+        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat,ex_space,ey_space,ez_space);
+        MathExtra::angmom_to_omega(angmom[j],ex_space,ey_space,ez_space,
+                                   bonus[ellipsoid[j]].inertia,omegaj);
+
         double omega_cross_r1[3], omega_cross_r2[3];
-        MathExtra::cross3(omega[i], cr1, omega_cross_r1);
-        MathExtra::cross3(omega[j], cr2, omega_cross_r2);
+        MathExtra::cross3(omegai, cr1, omega_cross_r1);
+        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
 
         // relative translational velocity 
         // compute directly the sum of relative translational velocity at contact point
@@ -757,6 +765,7 @@ void PairGranHookeHistoryEllipsoid::reset_dt()
 
 /* ---------------------------------------------------------------------- */
 
+// TODO: implement the single() function for the super-ellipsoid
 double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
                                     double /*factor_coul*/, double /*factor_lj*/, double &fforce)
 {

From aab8afe852a478d0af42a3d4539fd7436b8b07f0 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 23 Nov 2025 22:11:23 -0600
Subject: [PATCH 048/174] implement neigh history variables

---
 src/ASPHERE/math_extra_superellipsoids.h      | 11 ++---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 45 ++++++++++---------
 .../pair_gran_hooke_history_ellipsoid.h       |  2 +-
 3 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index f8aa481c845..6b836640133 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -48,7 +48,7 @@ namespace MathExtraSuperellipsoids {
   // ADD CONTACT DETECTION HERE
   inline bool check_oriented_bounding_boxes(const double* xc1, const double R1[3][3], const double* shape1,
                                         const double* xc2, const double R2[3][3], const double* shape2, 
-                                        int &cached_axis);
+                                        double* cached_axis);
 
   inline bool check_intersection_axis(const int axis_id, const double C[3][3], const double AbsC[3][3], 
                                       const double* center_distance_box1, const double* center_distance_box2,
@@ -332,7 +332,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
 inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
     const double* xc1, const double R1[3][3], const double* shape1,
     const double* xc2, const double R2[3][3], const double* shape2, 
-    int &cached_axis
+    double* cached_axis
 ){
     // cache axis is the axis that separated the boxes last time
     // due to temporal coherence we check it first
@@ -362,15 +362,16 @@ inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
     MathExtra::transpose_matvec(R2, center_distance,  center_distance_box2);
 
     // first check the cached axis
-    separated = check_intersection_axis(cached_axis, C, AbsC, center_distance_box1, center_distance_box2, shape1, shape2);
+    const int axis = *cached_axis;
+    separated = check_intersection_axis(axis, C, AbsC, center_distance_box1, center_distance_box2, shape1, shape2);
 
     if (separated) return true;
     // then check all the other axes
     for (int axis_id = 0; axis_id < 15; axis_id++){
-        if (axis_id == cached_axis) continue; // already checked
+        if (axis_id == axis) continue; // already checked
         separated = check_intersection_axis(axis_id, C, AbsC, center_distance_box1, center_distance_box2, shape1, shape2);
         if (separated) {
-            cached_axis = axis_id; // update cached axis
+            *cached_axis = axis_id; // update cached axis
             return true;
         }
     }
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index a293ae773b4..f536a419d0b 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -46,8 +46,8 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
   no_virial_fdotr_compute = 1;
   centroidstressflag = CENTROID_NOTAVAIL;
   finitecutflag = 1;
-  history = 1;
-  size_history = 6;  // shear[3], previous_cp[3]
+  use_history = 1;
+  size_history = 8;  // shear[3], contact_point_and_Lagrange_multiplier[4], separating_axis_index
 
   single_extra = 10;
   svector = new double[10];
@@ -113,8 +113,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double fn, fs, fs1, fs2, fs3;
   double shrmag, rsht;
   int *ilist, *jlist, *numneigh, **firstneigh;
-  int *touch, **firsttouch, *cached_axis_index; // added cached axis index for bounding box check
-  double *shear, *allshear, **firstshear, *prev_cp; // added previous contact point placeholder
+  int *touch, **firsttouch;
+  double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
 
   double shapex, shapey, shapez; // ellipsoid shape params
   double quat1, quat2, quat3, quat4;
@@ -172,7 +172,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
   firsttouch = fix_history->firstflag;
-  firstshear = fix_history->firstvalue;
+  firsthistory = fix_history->firstvalue;
 
   // loop over neighbors of my atoms
 
@@ -185,7 +185,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
 
     touch = firsttouch[i];
-    allshear = firstshear[i];
+    allhistory = firsthistory[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
@@ -203,11 +203,14 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
       radj = radius[j];
       radsum = radi + radj;
 
+      X0_prev = &allhistory[3 + size_history * jj];
+
       bool touching = true;
       if (rsq >= radsum * radsum) {
         touching = false;
       }
       else {    
+        separating_axis = &allhistory[7 + size_history * jj];
          // compute aspect ratios, if they are not that different from zero skip
          // to the newton rapson, else do the bounding box
          MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
@@ -232,7 +235,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
          if (high_aspect_ratio){
           // check the bounding box
           bool obb_separate = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-            x[i], Ri, shapei, x[j], Rj, shapej, *cached_axis_index);
+            x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
           
           if (obb_separate) {
             touching = false;
@@ -248,8 +251,10 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // TODO: implement neigh history!
         // TODO: move contact point with rigid body motion of the pair ?
         //       not sure if enough information to do that
-        MathExtra::copy3(prev_cp, X0);
-        X0[3] = 1.0; // Lagrange multiplier mu^2 initially one (makes the Newton more stable in continued contact)
+        X0[0] = X0_prev[0];
+        X0[1] = X0_prev[1];
+        X0[2] = X0_prev[2];
+        X0[3] = X0_prev[3];
         int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij);
         if (status == 0)
           touching = true;
@@ -271,7 +276,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
         //   MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
         for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
-          X0[3] = 0.0; // Lagrange multiplier mu^2 initially zero
+          X0[3] = 1.0; // Lagrange multiplier mu^2 initially one (makes the Newton more stable in continued contact)
           double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
           shapei[0] = shapei[1] = shapei[2] = reqi;
           shapej[0] = shapej[1] = shapej[2] = reqj;
@@ -300,13 +305,13 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // unset non-touching neighbors
 
         touch[jj] = 0;
-        shear = &allshear[3 * jj];
-        shear[0] = 0.0;
-        shear[1] = 0.0;
-        shear[2] = 0.0;
+        history = &allhistory[size_history * jj];
+        for (int k = 0; k < size_history; k++) history[k] = 0.0;
       } else {
-        // TODO: Compute the force between the 2 superquadrics
-        MathExtra::copy3(X0, prev_cp);
+        X0_prev[0] = X0[0];
+        X0_prev[1] = X0[1];
+        X0_prev[2] = X0[2];
+        X0_prev[3] = X0[3];
 
         double nji[3] = { -nij[0], -nij[1], -nij[2] };
         // compute overlap depth along normal direction for each grain
@@ -392,7 +397,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // shear history effects
 
         touch[jj] = 1;
-        shear = &allshear[3 * jj];
+        shear = &allhistory[size_history * jj];
 
         if (shearupdate) {
           shear[0] += vtr1 * dt;
@@ -583,7 +588,7 @@ void PairGranHookeHistoryEllipsoid::init_style()
 
   // need a granular neighbor list
 
-  if (history)
+  if (use_history)
     neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_HISTORY);
   else
     neighbor->add_request(this, NeighConst::REQ_SIZE);
@@ -594,7 +599,7 @@ void PairGranHookeHistoryEllipsoid::init_style()
   // it replaces FixDummy, created in the constructor
   // this is so its order in the fix list is preserved
 
-  if (history && (fix_history == nullptr)) {
+  if (use_history && (fix_history == nullptr)) {
     auto cmd = fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
     fix_history = dynamic_cast<FixNeighHistory *>(
         modify->replace_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me), cmd, 1));
@@ -665,7 +670,7 @@ void PairGranHookeHistoryEllipsoid::init_style()
 
   // set fix which stores history info
 
-  if (history) {
+  if (use_history) {
     fix_history = dynamic_cast<FixNeighHistory *>(
         modify->get_fix_by_id("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me)));
     if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 33764e67d68..da2940d617b 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -50,7 +50,7 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   int dampflag;
   double dt;
   int freeze_group_bit;
-  int history;
+  int use_history;
   int limit_damping;
 
   int neighprev;

From 288c411acd3c40a51abe6d6f7d9df06305f9cc72 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 24 Nov 2025 14:26:54 +0100
Subject: [PATCH 049/174] Implemented starting seed guess from OBB

---
 src/ASPHERE/math_extra_superellipsoids.h | 294 ++++++++++++++++++++++-
 1 file changed, 287 insertions(+), 7 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 6b836640133..bba66aca359 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -54,6 +54,11 @@ namespace MathExtraSuperellipsoids {
                                       const double* center_distance_box1, const double* center_distance_box2,
                                       const double* a, const double* b);
 
+  inline bool check_collision_and_get_seed(const double* xc1, const double R1[3][3], const double* shape1,
+                                         const double* xc2, const double R2[3][3], const double* shape2,
+                                        double* cached_axis, double* contact_point);
+
+
   // Jibril's versions of the functions for contact detection
   double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]);
@@ -344,7 +349,7 @@ inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
     MathExtra::transpose_times3(R1, R2, C); // C = R1^T * R2
     for (unsigned int i=0; i<3; i++){
         for (unsigned int j=0; j<3; j++){
-            AbsC[i][j] = std::fabs(C[i][j]); // we only need the absolute values
+            AbsC[i][j] = std::fabs(C[i][j]); // for when absolute values are needed
         }
     }
 
@@ -353,16 +358,13 @@ inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
         center_distance[i] = xc2[i] - xc1[i];
     } 
 
-    // rotate center distance into box 1 frame
-    double center_distance_box1[3];
+    // Project center distance into both local frames
+    double center_distance_box1[3], center_distance_box2[3];
     MathExtra::transpose_matvec(R1, center_distance,  center_distance_box1);
-
-    //rotate center distance into box 2 frame
-    double center_distance_box2[3];
     MathExtra::transpose_matvec(R2, center_distance,  center_distance_box2);
 
     // first check the cached axis
-    const int axis = *cached_axis;
+    const int axis = (int) (*cached_axis);
     separated = check_intersection_axis(axis, C, AbsC, center_distance_box1, center_distance_box2, shape1, shape2);
 
     if (separated) return true;
@@ -475,5 +477,283 @@ inline bool MathExtraSuperellipsoids::check_intersection_axis(
 }
 
 
+inline bool MathExtraSuperellipsoids::check_collision_and_get_seed(
+    const double* xc1, const double R1[3][3], const double* shape1,
+    const double* xc2, const double R2[3][3], const double* shape2, 
+    double* cached_axis, double* contact_point
+){  
+    // cache axis is the axis that separated the boxes last time
+    // due to temporal coherence we check it first
+
+    double C[3][3], AbsC[3][3];
+    MathExtra::transpose_times3(R1, R2, C); // C = R1^T * R2
+    
+    // for orientated bounding boxes we check the 15 separating axes
+    const double eps = 1e-20;
+    for (unsigned int i=0; i<3; i++){
+        for (unsigned int j=0; j<3; j++){
+            AbsC[i][j] = std::fabs(C[i][j]) + eps; // Add epsilon to prevent division by zero in edge cases
+        }
+    }
+
+    double center_distance[3]; // Center distance in Global Frame
+        for (unsigned int i=0; i<3; i++){
+        center_distance[i] = xc2[i] - xc1[i];
+    } 
+
+    // Project center distance into both local frames
+    double center_distance_box1[3], center_distance_box2[3];
+    MathExtra::transpose_matvec(R1, center_distance, center_distance_box1);
+    MathExtra::transpose_matvec(R2, center_distance, center_distance_box2);
+
+    int best_axis = -1;
+    double min_overlap = 0.0;
+    const double edge_bias = 1.05; // Prefer face contacts over edge contacts
+
+    // Lambda to test an axis. Returns TRUE if SEPARATED.
+    // I was reading that lambdas can be optimized away by the compiler.
+    // and have less overhead than function calls.
+    auto test_axis_separated = [&](int i) -> bool {
+        double R1_rad, R2_rad, dist, overlap;
+
+        // Switch is efficient here; compiler generates a jump table.
+        switch(i){
+            case 0: // A0
+                R1_rad = shape1[0];
+                R2_rad = shape2[0] * AbsC[0][0] + shape2[1] * AbsC[0][1] + shape2[2] * AbsC[0][2];
+                dist = std::fabs(center_distance_box1[0]);
+                break;
+            case 1: // A1
+                R1_rad = shape1[1];
+                R2_rad = shape2[0] * AbsC[1][0] + shape2[1] * AbsC[1][1] + shape2[2] * AbsC[1][2];
+                dist = std::fabs(center_distance_box1[1]);
+                break;
+            case 2: // A2
+                R1_rad = shape1[2];
+                R2_rad = shape2[0] * AbsC[2][0] + shape2[1] * AbsC[2][1] + shape2[2] * AbsC[2][2];
+                dist = std::fabs(center_distance_box1[2]);
+                break;
+            case 3: // B0
+                R1_rad = shape1[0] * AbsC[0][0] + shape1[1] * AbsC[1][0] + shape1[2] * AbsC[2][0];
+                R2_rad = shape2[0];
+                dist = std::fabs(center_distance_box2[0]);
+                break;
+            case 4: // B1
+                R1_rad = shape1[0] * AbsC[0][1] + shape1[1] * AbsC[1][1] + shape1[2] * AbsC[2][1];
+                R2_rad = shape2[1];
+                dist = std::fabs(center_distance_box2[1]);
+                break;
+            case 5: // B2
+                R1_rad = shape1[0] * AbsC[0][2] + shape1[1] * AbsC[1][2] + shape1[2] * AbsC[2][2];
+                R2_rad = shape2[2];
+                dist = std::fabs(center_distance_box2[2]);
+                break;
+            case 6: // A0 x B0
+                R1_rad = shape1[1] * AbsC[2][0] + shape1[2] * AbsC[1][0];
+                R2_rad = shape2[1] * AbsC[0][2] + shape2[2] * AbsC[0][1];
+                dist = std::fabs(center_distance_box1[2] * C[1][0] - center_distance_box1[1] * C[2][0]);
+                break;
+            case 7: // A0 x B1
+                R1_rad = shape1[1] * AbsC[2][1] + shape1[2] * AbsC[1][1];
+                R2_rad = shape2[0] * AbsC[0][2] + shape2[2] * AbsC[0][0];
+                dist = std::fabs(center_distance_box1[2] * C[1][1] - center_distance_box1[1] * C[2][1]);
+                break;
+            case 8: // A0 x B2
+                R1_rad = shape1[1] * AbsC[2][2] + shape1[2] * AbsC[1][2];
+                R2_rad = shape2[0] * AbsC[0][1] + shape2[1] * AbsC[0][0];
+                dist = std::fabs(center_distance_box1[2] * C[1][2] - center_distance_box1[1] * C[2][2]);
+                break;
+            case 9: // A1 x B0
+                R1_rad = shape1[0] * AbsC[2][0] + shape1[2] * AbsC[0][0];
+                R2_rad = shape2[1] * AbsC[1][2] + shape2[2] * AbsC[1][1];
+                dist = std::fabs(center_distance_box1[0] * C[2][0] - center_distance_box1[2] * C[0][0]);
+                break;
+            case 10: // A1 x B1
+                R1_rad = shape1[0] * AbsC[2][1] + shape1[2] * AbsC[0][1];
+                R2_rad = shape2[0] * AbsC[1][2] + shape2[2] * AbsC[1][0];
+                dist = std::fabs(center_distance_box1[0] * C[2][1] - center_distance_box1[2] * C[0][1]);
+                break;
+            case 11: // A1 x B2
+                R1_rad = shape1[0] * AbsC[2][2] + shape1[2] * AbsC[0][2];
+                R2_rad = shape2[0] * AbsC[1][1] + shape2[1] * AbsC[1][0];
+                dist = std::fabs(center_distance_box1[0] * C[2][2] - center_distance_box1[2] * C[0][2]);
+                break;
+            case 12: // A2 x B0
+                R1_rad = shape1[0] * AbsC[1][0] + shape1[1] * AbsC[0][0];
+                R2_rad = shape2[1] * AbsC[2][2] + shape2[2] * AbsC[2][1];
+                dist = std::fabs(center_distance_box1[1] * C[0][0] - center_distance_box1[0] * C[1][0]);
+                break;
+            case 13: // A2 x B1
+                R1_rad = shape1[0] * AbsC[1][1] + shape1[1] * AbsC[0][1];
+                R2_rad = shape2[0] * AbsC[2][2] + shape2[2] * AbsC[2][0];
+                dist = std::fabs(center_distance_box1[1] * C[0][1] - center_distance_box1[0] * C[1][1]);
+                break;
+            case 14: // A2 x B2
+                R1_rad = shape1[0] * AbsC[1][2] + shape1[1] * AbsC[0][2];
+                R2_rad = shape2[0] * AbsC[2][1] + shape2[1] * AbsC[2][0];
+                dist = std::fabs(center_distance_box1[1] * C[0][2] - center_distance_box1[0] * C[1][2]);
+                break;
+            default: return false;
+        }
+
+        if (dist > R1_rad + R2_rad) return true; // Separated!
+
+        // If not separated, track the overlap depth
+        overlap = (R1_rad + R2_rad) - dist;
+        
+        // Bias: Penalize edge axes slightly to prefer stable face contacts
+        if (i >= 6) overlap *= edge_bias;
+
+        if (overlap < min_overlap) {
+            min_overlap = overlap;
+            best_axis = i;
+        }
+        return false; // Not separated
+    };
+
+    // Check Cached Axis First (Temporal Coherence)
+    int c_axis = (int)(*cached_axis);
+    if (test_axis_separated(c_axis)) return false; 
+
+    // Check remaining axes
+    for (int i = 0; i < 15; i++){
+        if (i == c_axis) continue;
+        if (test_axis_separated(i)) {
+            *cached_axis = (double)i;
+            return false;
+        }
+    }
+   
+    // If we reached here, 'best_axis' holds the axis index where the overlap is minimal
+    if (best_axis < 6) {
+        // Face-to-Face contact logic: Project "Incident" box onto "Reference" face, clip to find overlap center.
+        // Pointers to define who is Reference (the face) and who is Incident
+        const double* posRef = xc1;
+        const double* posInc = xc2;
+        const double (*RRef)[3] = R1;
+        const double (*RInc)[3] = R2;
+        const double* shapeRef = shape1;
+        const double* shapeInc = shape2;
+        double* D_local_Ref = center_distance_box1; // Center dist in Ref frame
+
+        int axis = best_axis; 
+
+        // Swap if Reference is Box 2 (Indices 3, 4, 5)
+        if (best_axis >= 3) {
+            posRef = xc2;
+            posInc = xc1;
+            RRef = R2;
+            RInc = R1;
+            shapeRef = shape2;
+            shapeInc = shape1;
+            D_local_Ref = center_distance_box2;
+            axis -= 3;
+        }
+
+        double seed_local[3];
+
+        //Normal Component: Midway through the penetration depth
+        // Calculate projected radius of Incident block onto this axis
+        
+        double dir = (D_local_Ref[axis] > 0) ? 1.0 : -1.0;
+        double radInc_proj = 0.0;
+        for(int k=0; k<3; k++) {
+            // If swapped (Box 2 is Ref), we need AbsC^T, so we swap AbsC indices
+            double val = (best_axis < 3) ? AbsC[axis][k] : AbsC[k][axis];
+            radInc_proj += shapeInc[k] * val;
+        }
+
+        double surfRef = dir * shapeRef[axis];
+        double surfInc = D_local_Ref[axis] - (dir * radInc_proj);
+        seed_local[axis] = 0.5 * (surfRef + surfInc);
+
+        // Lateral Components: 1D Interval Overlap
+        for(int k=0; k<3; k++) {
+            if (k == axis) continue; // Skip the normal axis
+
+            double minRef = -shapeRef[k];
+            double maxRef =  shapeRef[k];
+
+            double radInc = 0.0;
+            for(int j=0; j<3; j++) {
+                double val = (best_axis < 3) ? AbsC[k][j] : AbsC[j][k]; 
+                radInc += shapeInc[j] * val;
+            }
+            double centerInc = D_local_Ref[k];
+            
+            double minInc = centerInc - radInc;
+            double maxInc = centerInc + radInc;
+
+            // Find intersection of intervals [minRef, maxRef] and [minInc, maxInc]
+            double start = (minRef > minInc) ? minRef : minInc; 
+            double end   = (maxRef < maxInc) ? maxRef : maxInc; 
+            seed_local[k] = 0.5 * (start + end); // Midpoint of overlap
+
+        }
+
+        // Transform Local Seed -> World Space
+        MathExtra::matvec(RRef, seed_local, contact_point);
+        for(int k=0; k<3; k++) contact_point[k] += posRef[k];
+    } 
+    else {
+        // Edge-to-edge contact logic: Midpoint of the closest points on the two skew edge lines.
+        // The logic is that index 6 corresponds to A_0 x B_0, 7 to A_0 x B_1, ..., 14 to A_2 x B_2
+        int edgeA_idx = (best_axis - 6) / 3;
+        int edgeB_idx = (best_axis - 6) % 3;
+
+        // Get World directions of the edges
+        double u[3] = { R1[0][edgeA_idx], R1[1][edgeA_idx], R1[2][edgeA_idx] };
+        double v[3] = { R2[0][edgeB_idx], R2[1][edgeB_idx], R2[2][edgeB_idx] };
+
+        // Identify the specific edges by checking the normal direction
+        // The normal N is roughly the distance vector center_distance for the closest edges
+        double N_loc1[3], N_loc2[3];
+        MathExtra::transpose_matvec(R1, center_distance, N_loc1);
+        MathExtra::transpose_matvec(R2, center_distance, N_loc2);
+
+        // Find Center of Edge A in World Space
+        double midA[3]; for(int k=0; k<3; k++) midA[k] = xc1[k];
+        for(int k=0; k<3; k++){
+            if(k == edgeA_idx) continue;
+            // Move to the face pointing towards B
+            double sign = (N_loc1[k] > 0) ? 1.0 : -1.0;
+            double offset = sign * shape1[k];
+            midA[0] += R1[0][k]*offset; midA[1] += R1[1][k]*offset; midA[2] += R1[2][k]*offset;
+        }
+
+        // Find Center of Edge B in World Space
+        double midB[3]; for(int k=0; k<3; k++) midB[k] = xc2[k];
+        for(int k=0; k<3; k++){
+            if(k == edgeB_idx) continue;
+            // Move to the face pointing away from A (Since center_distance is A->B, we check -N_loc2)
+            double sign = (N_loc2[k] < 0) ? 1.0 : -1.0; 
+            double offset = sign * shape2[k];
+            midB[0] += R2[0][k]*offset; midB[1] += R2[1][k]*offset; midB[2] += R2[2][k]*offset;
+        }
+
+        // Closest Points on Two Skew Lines 
+        // Line1 parameterized by s: P_A = midA + s*u
+        // Line2 parameterized by t: P_B = midB + t*v
+        double r[3] = { midB[0]-midA[0], midB[1]-midA[1], midB[2]-midA[2] };
+        double u_dot_v = u[0]*v[0]+u[1]*v[1]+u[2]*v[2];
+        double u_dot_r = u[0]*r[0]+u[1]*r[1]+u[2]*r[2];
+        double v_dot_r = v[0]*r[0]+v[1]*r[1]+v[2]*r[2];
+        
+        // Denom is 1 - (u.v)^2 because u and v are unit vectors
+        double denom = 1.0 - u_dot_v*u_dot_v + eps; 
+        double s = (u_dot_r - u_dot_v * v_dot_r) / denom;
+        double t = (u_dot_v * u_dot_r - v_dot_r) / denom; // Note: simplified derivation
+
+        // Compute World Points
+        double PA[3] = { midA[0]+s*u[0], midA[1]+s*u[1], midA[2]+s*u[2] };
+        double PB[3] = { midB[0]+t*v[0], midB[1]+t*v[1], midB[2]+t*v[2] };
+
+        // Seed is the midpoint
+        for(int k=0; k<3; k++) contact_point[k] = 0.5 * (PA[k] + PB[k]);
+    }
+
+    return true; // Collision confirmed
+}
+
 
 #endif

From 7878aeb71d102af9e88c6807a4782401c3e2e7b2 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 24 Nov 2025 14:33:52 +0100
Subject: [PATCH 050/174] Fixed inconsistency with signed distance in NS and
 coresect the stabl gradient for ellipsoids

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 40b0b7d67b6..3228409f33e 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -591,7 +591,7 @@ double stable_shape_and_gradient_local_superquad(const double* xlocal, const dou
   grad[1] *= scale_factor;
   grad[2] *= scale_factor;
 
-  return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
+  return std::pow(F, 1.0/n1) - 1.0;
 }
 
 // Special case for n2 = n2 = n > 2
@@ -624,7 +624,7 @@ double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const dou
   grad[1] *= scale_factor;
   grad[2] *= scale_factor;
 
-  return (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c) - 1.0;
+  return std::pow(F, 1.0/n) - 1.0;
 }
 
 
@@ -639,7 +639,14 @@ double stable_shape_and_gradients_local_ellipsoid(const double* xlocal, const do
   grad[1] = b * xlocal[1];
   grad[2] = c * xlocal[2];
 
-  return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
+  double F = 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]);
+  double scale_factor = std::sqrt(F) / 2.0;
+  
+  grad[0] *= scale_factor;
+  grad[1] *= scale_factor;
+  grad[2] *= scale_factor;
+
+  return std::sqrt(F) - 1.0;
 }
 
 // Newton Rapson method to find the overlap distance from the contact point given the normal

From fa62b21538f0d16b114d389104ee131d5174c37f Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 24 Nov 2025 15:20:53 +0100
Subject: [PATCH 051/174] Added blockiness to compute_property_atom to easily
 dump it in output

---
 src/compute_property_atom.cpp | 41 +++++++++++++++++++++++++++++++++++
 src/compute_property_atom.h   |  2 ++
 2 files changed, 43 insertions(+)

diff --git a/src/compute_property_atom.cpp b/src/compute_property_atom.cpp
index a2f7e4025d0..0abe1b8de7a 100644
--- a/src/compute_property_atom.cpp
+++ b/src/compute_property_atom.cpp
@@ -239,6 +239,14 @@ ComputePropertyAtom::ComputePropertyAtom(LAMMPS *lmp, int narg, char **arg) :
         error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_shapez;
 
+    } else if (strcmp(arg[iarg],"block1") == 0) {
+      if (!avec_ellipsoid) 
+        error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
+      pack_choice[i] = &ComputePropertyAtom::pack_block1;
+    } else if (strcmp(arg[iarg],"block2") == 0) {
+      if (!avec_ellipsoid) 
+        error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
+      pack_choice[i] = &ComputePropertyAtom::pack_block2;
     } else if (strcmp(arg[iarg],"quatw") == 0) {
       if (!avec_ellipsoid && !avec_body && !atom->quat_flag)
         error->all(FLERR,"Compute property/atom {} is not available", arg[iarg]);
@@ -1366,6 +1374,39 @@ void ComputePropertyAtom::pack_shapez(int n)
 
 /* ---------------------------------------------------------------------- */
 
+void ComputePropertyAtom::pack_block1(int n)
+{
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+      buf[n] = bonus[ellipsoid[i]].block[0];
+    else buf[n] = 1.0;
+    n += nvalues;
+  }
+}
+/* ---------------------------------------------------------------------- */
+
+void ComputePropertyAtom::pack_block2(int n)
+{
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+      buf[n] = bonus[ellipsoid[i]].block[1];
+    else buf[n] = 1.0;
+    n += nvalues;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 void ComputePropertyAtom::pack_quatw(int n)
 {
   if (avec_ellipsoid) {
diff --git a/src/compute_property_atom.h b/src/compute_property_atom.h
index c6f4b2fd652..fc55acb54c2 100644
--- a/src/compute_property_atom.h
+++ b/src/compute_property_atom.h
@@ -104,6 +104,8 @@ class ComputePropertyAtom : public Compute {
   void pack_shapex(int);
   void pack_shapey(int);
   void pack_shapez(int);
+  void pack_block1(int);
+  void pack_block2(int);
   void pack_quatw(int);
   void pack_quati(int);
   void pack_quatj(int);

From 0df189c3703e15187ca20fc6fdab25a901d5085f Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 25 Nov 2025 17:08:41 +0100
Subject: [PATCH 052/174] Added regularization to gauss elimination solver

---
 src/ASPHERE/math_extra_superellipsoids.h | 26 ++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index bba66aca359..7a78cbcc2f4 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -123,6 +123,15 @@ inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
 }
 
 inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4]) {
+    
+    // 0. Regularization to avoid singularities
+    // Add small epsilon to diagonal to handle singular cases (e.g. flat contact)
+    const double lambda = 1e-8; 
+    A[0]  += lambda;
+    A[5]  += lambda;
+    A[10] += lambda;
+    A[15] += lambda;
+
     // 1. Pivot 0 
     double inv0 = 1.0 / A[0];
     double m1 = A[4] * inv0;
@@ -160,6 +169,14 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]
     // Helper lambda to access A[row, col]
     auto at = [&](int r, int c) -> double& { return A[r * 4 + c]; };
 
+    // 0. Regularization to avoid singularities
+    // Add small epsilon to diagonal to handle singular cases (e.g. flat contact)
+    const double lambda = 1e-8; 
+    A[0]  += lambda;
+    A[5]  += lambda;
+    A[10] += lambda;
+    A[15] += lambda;
+
     // --- FORWARD ELIMINATION with PARTIAL PIVOTING ---
     
     for (int i = 0; i < 3; ++i) { // Loop over columns 0, 1, 2
@@ -214,6 +231,15 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]
 }
 
 inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], double b[4]) {
+    
+    // 0. Regularization to avoid singularities
+    // Add small epsilon to diagonal to handle singular cases (e.g. flat contact)
+    const double lambda = 1e-8; 
+    A[0]  += lambda;
+    A[5]  += lambda;
+    A[10] += lambda;
+    A[15] += lambda;
+    
      // --- COLUMN 0 ---
     // 1. Find Pivot in Col 0
     int p = 0; 

From 75d0627f5bd118cc1ac63a77d55a24bace291d6c Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Mon, 24 Nov 2025 17:23:11 -0600
Subject: [PATCH 053/174] positive force on atom i goes from j to i, not from i
 to j

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index f536a419d0b..8fe0d77f920 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -444,9 +444,9 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
         // forces & torques
 
-        fx = nij[0] * ccel + fs1;
-        fy = nij[1] * ccel + fs2;
-        fz = nij[2] * ccel + fs3;
+        fx = -nij[0] * ccel + fs1;
+        fy = -nij[1] * ccel + fs2;
+        fz = -nij[2] * ccel + fs3;
         fx *= factor_lj; // I think factor lj is just 1 except for special bonds
         fy *= factor_lj;
         fz *= factor_lj;

From 31cff6de331369f47e6f0a7b206d4a647d501681 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Tue, 25 Nov 2025 08:22:41 -0600
Subject: [PATCH 054/174] add superellipsoid granular example

---
 .../superellipsoid_gran/in.ellipsoid_gran     | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran

diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
new file mode 100644
index 00000000000..6b6bb3f189f
--- /dev/null
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -0,0 +1,45 @@
+# Test demo for LAPACK functions in ASPHERE packagecmake -C ../cmake/presets/most.cmake     [OPTIONS] ../cmake
+
+units           si
+atom_style      ellipsoid
+dimension       3
+boundary p p p
+comm_modify vel yes
+newton off
+# create big ellipsoidal particles
+
+region          box block 0 10 0 10 0 10
+create_box      2 box
+create_atoms 1 single 5 5 4.5
+create_atoms 1 single 5 5 6
+group bot id 1
+group top id 2
+#create_atoms    1 region box
+
+set             type 1 mass 1.0
+set             type 1 shape 2.0 1.0 1.0
+# TODO: Line below currently fails in contact detection with status 1
+# set             type 1 block 4.0 4.0
+
+pair_style      gran/hooke/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0
+pair_coeff      * *
+
+compute diameter all property/atom shapex shapey shapez
+compute orient all property/atom quatw quati quatj quatk
+compute block all property/atom block1 block2
+# Ovito uses the reciprocal exponents for the blockiness
+# https://docs.ovito.org/advanced_topics/aspherical_particles.html#howto-aspherical-particles-superquadrics
+# Define atom variables from block
+variable phi atom "2/c_block[1]"
+variable theta atom "2/c_block[2]"
+
+dump mydump all custom 10 dump.lammpstrj id x y z fx fy fz c_diameter[*] c_orient[*] v_phi v_theta
+
+
+fix 1 bot freeze
+fix 2 top gravity 9.81 vector 0 0 -1
+fix 3 all nve/asphere
+
+thermo 10
+timestep 0.01
+run             1000

From 0e62d4e91882abe1292ce4360d57fb211bb6cdd5 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Tue, 25 Nov 2025 11:38:49 -0600
Subject: [PATCH 055/174] add Tikhonov regularization to Jacobian

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 3228409f33e..0bf8f57dc2d 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -42,6 +42,7 @@ static constexpr double CUTBACK_LINESEARCH = 0.5;
 static constexpr double CONVERGENCE_OVERLAP = 1e-8;
 static constexpr unsigned int ITERMAX_OVERLAP = 20;
 static constexpr double MINSLOPE_OVERLAP = 1e-12;
+static constexpr double TIKHONOV_SCALE = 1e-8;
 
 /* ----------------------------------------------------------------------
    beta function B(x,y) = Gamma(x) * Gamma(y) / Gamma(x+y)
@@ -404,6 +405,14 @@ void compute_jacobian(const double* gradi_global, const double hessi_global[3][3
     jacobian[3 + col*4] = gradi_global[col] - gradj_global[col];
   }
   jacobian[15] = 0.0;
+
+  // Tikhonov regularization
+  // High blockiness grains can have zero curvature / singular Hessian
+  // along principal local axes (x=0, y=0, z=0)
+  double diag_weight = TIKHONOV_SCALE * (jacobian[0] + jacobian[5] + jacobian[10]);
+  jacobian[0]  += diag_weight;
+  jacobian[5]  += diag_weight;
+  jacobian[10] += diag_weight;
 }
 
 double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,

From 210e933b4c1b1352d21b8c2fc2b091d8577cd653 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 25 Nov 2025 20:24:12 +0100
Subject: [PATCH 056/174] Corrected scale of regularization for linear system
 solve

---
 src/ASPHERE/math_extra_superellipsoids.cpp |  1 -
 src/ASPHERE/math_extra_superellipsoids.h   | 46 ++++++++++++----------
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 0bf8f57dc2d..fc7f9f7a780 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -42,7 +42,6 @@ static constexpr double CUTBACK_LINESEARCH = 0.5;
 static constexpr double CONVERGENCE_OVERLAP = 1e-8;
 static constexpr unsigned int ITERMAX_OVERLAP = 20;
 static constexpr double MINSLOPE_OVERLAP = 1e-12;
-static constexpr double TIKHONOV_SCALE = 1e-8;
 
 /* ----------------------------------------------------------------------
    beta function B(x,y) = Gamma(x) * Gamma(y) / Gamma(x+y)
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 7a78cbcc2f4..4580c7c5769 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -22,6 +22,7 @@
 #include "math_extra.h"
 
 namespace MathExtraSuperellipsoids {
+  inline constexpr double TIKHONOV_SCALE = 1e-8;
   double beta_func(double a, double b);
   void volume_superellipsoid(const double *blockiness, const double *shape, double volume); // duplicated from math_extra might remove
   void inertia_superellipsoid(const double *shape, const double *blockiness, double density, double *inertia); // duplicated from math_extra might remove
@@ -124,13 +125,14 @@ inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
 
 inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4]) {
     
-    // 0. Regularization to avoid singularities
-    // Add small epsilon to diagonal to handle singular cases (e.g. flat contact)
-    const double lambda = 1e-8; 
-    A[0]  += lambda;
-    A[5]  += lambda;
-    A[10] += lambda;
-    A[15] += lambda;
+    // Tikhonov regularization
+    // High blockiness grains can have zero curvature / singular Hessian
+    // along principal local axes (x=0, y=0, z=0)
+    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10] + A[15]);
+    A[0]  += diag_weight;
+    A[5]  += diag_weight;
+    A[10] += diag_weight;
+    A[15] += diag_weight;
 
     // 1. Pivot 0 
     double inv0 = 1.0 / A[0];
@@ -169,13 +171,14 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]
     // Helper lambda to access A[row, col]
     auto at = [&](int r, int c) -> double& { return A[r * 4 + c]; };
 
-    // 0. Regularization to avoid singularities
-    // Add small epsilon to diagonal to handle singular cases (e.g. flat contact)
-    const double lambda = 1e-8; 
-    A[0]  += lambda;
-    A[5]  += lambda;
-    A[10] += lambda;
-    A[15] += lambda;
+    // Tikhonov regularization
+    // High blockiness grains can have zero curvature / singular Hessian
+    // along principal local axes (x=0, y=0, z=0)
+    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10] + A[15]);
+    A[0]  += diag_weight;
+    A[5]  += diag_weight;
+    A[10] += diag_weight;
+    A[15] += diag_weight;
 
     // --- FORWARD ELIMINATION with PARTIAL PIVOTING ---
     
@@ -232,13 +235,14 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]
 
 inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], double b[4]) {
     
-    // 0. Regularization to avoid singularities
-    // Add small epsilon to diagonal to handle singular cases (e.g. flat contact)
-    const double lambda = 1e-8; 
-    A[0]  += lambda;
-    A[5]  += lambda;
-    A[10] += lambda;
-    A[15] += lambda;
+    // Tikhonov regularization
+    // High blockiness grains can have zero curvature / singular Hessian
+    // along principal local axes (x=0, y=0, z=0)
+    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10] + A[15]);
+    A[0]  += diag_weight;
+    A[5]  += diag_weight;
+    A[10] += diag_weight;
+    A[15] += diag_weight;
     
      // --- COLUMN 0 ---
     // 1. Find Pivot in Col 0

From c80694767de1f7f9acbb1f97669338624175c8f4 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 25 Nov 2025 21:21:48 +0100
Subject: [PATCH 057/174] Added regularization to shape function to make
 gardient order one

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 53 ++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index fc7f9f7a780..aecbff5bb1e 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -255,7 +255,60 @@ void shape_function_local_hessian(
                      pow(abs(point[1]*binv), n2 -1) * pow(nu, n1 / n2 - 2) * copysign(1.0, shape[0] * shape[1]); 
                 
   }
+/* ----------------------------------------------------------------------
+   Possible regularization for the shape functions
+   Instead of F(x,y,z) - 1 = 0 we use (F(x,y,z))^(1/n1) -1 = G(x,y,z) = 0
+   The gradient is simply nabla G = (1/n1) * (F)^(1/n1 - 1) * nabla F
+   The hessian is H(G) = (1/n1) * (F)^(1/n1 - 1) * H(F) + (1/n1) * (1/n1 - 1) * (F)^(1/n1 - 2) * nabla F (nabla F)^T
+------------------------------------------------------------------------- */
+void apply_regularization_shape_function(double n1, double *value, double *grad, double hess[3][3]){
+  // value is F - 1
+  double F = *value + 1.0; // should be fine as long as one does not start from the center (otherwise we could guard against it)
+  double inv_n1 = 1.0 / n1;
+  double F_pow_1_n1_m1 = pow(F, inv_n1 - 1.0);
+
+  // scale factor for grainet and first term in the hessian
+  double scale_grad_hess1 = inv_n1 * F_pow_1_n1_m1;
+
+  // B = (1/n) * (1/n - 1) * F^(1/n - 2) simplifies to scale_grad * (inv_n1 - 1.0) / F
+  double scale_hess_add = scale_grad_hess1 * (inv_n1 - 1.0) / F;
+
+  *value = pow(F, inv_n1) - 1.0; 
+
+  // hessian update
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 3; j++) {
+      double grad_gratT = grad[i] * grad[j];
+      hess[i][j] = (hess[i][j] * scale_grad_hess1) + (scale_hess_add * grad_gratT);
+    }
+  }
+
+  for (int i = 0; i < 3; i++) {
+    grad[i] *= scale_grad_hess1;
+  }
+};
+
+double regularized_shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
+  double shapefunc;
+  double n1 = block[0];
+  switch (flag) {
+    case 0: {
+      shapefunc = shape_and_derivatives_local_ellipsoid(xlocal, shape, grad, hess);
+      break;
+    }
+    case 1: {
+      shapefunc = shape_and_derivatives_local_n1equaln2(xlocal, shape, block[0], grad, hess);
+      break;
+    }
+    case 2: {
+      shapefunc = shape_and_derivatives_local_superquad(xlocal, shape, block, grad, hess);
+      break;
+    }
+  }
 
+  apply_regularization_shape_function(n1, &shapefunc, grad, hess);
+  return shapefunc;
+}
 
 double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
   double shapefunc;

From 8b4fb3dbf84d65a17a675496274ddb1837761248 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Tue, 25 Nov 2025 13:49:51 -0600
Subject: [PATCH 058/174] test convergence before line search descent: tiny
 residuals can fail Armijo -Goldstein condition, yet pass convergence test

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 35 ++++++++++++----------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index aecbff5bb1e..43bc7ca12aa 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -485,6 +485,8 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   bool converged(false);
 
   norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
+  // TODO: would it be wise or crazy to test for convergence before even attempting Newton's method?
+  //       the initial guess is the old X0, so with temporal coherence, it might still pass deformation is slow!
   for (int iter = 0 ; iter < ITERMAX_NEWTON ; iter++) {
     norm_ini = norm;
 
@@ -552,7 +554,17 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
       norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual);
 
-      if (norm > norm_ini - PARAMETER_LINESEARCH * a * norm_ini) { // Armijo - Goldstein condition not met
+      if (norm <= CONVERGENCE_NEWTON) {
+        converged = true;
+        // TODO: consider testing picking the normal with the least error
+        //       i.e., likely the grain with the smallest curvature (Hessian norm)
+        //       or some other measure like average gradients.
+        //       right now we use the gradient on grain i for simplicity and performance. When testing, we could see if using  is just as good
+        MathExtra::normalize3(gradi, nij);
+        break;
+      } else if (norm > norm_ini - PARAMETER_LINESEARCH * a * norm_ini) { // Armijo - Goldstein condition not met
+        // Tested after convergence check because tiny values of norm and norm_ini < CONVERGENCE_NEWTON
+        // Can still fail the Armijo - Goldstein condition`
         a *= CUTBACK_LINESEARCH;
       } else {
         X0[0] = X_line[0];
@@ -560,21 +572,12 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
         X0[2] = X_line[2];
         X0[3] = X_line[3];
         // Only compute the jacobian if there is another Newton iteration to come
-        if (norm > CONVERGENCE_NEWTON) {
-          double tmp_m[3][3];
-          MathExtra::times3_transpose(hessi, Ri, tmp_m);
-          MathExtra::times3(Ri, tmp_m, hessi);
-          MathExtra::times3_transpose(hessj, Rj, tmp_m);
-          MathExtra::times3(Rj, tmp_m, hessj);
-          compute_jacobian(gradi, hessi, gradj, hessj, X0[3], jacobian);
-        } else {
-          converged = true;
-          // TODO: consider testing picking the normal with the least error
-          //       i.e., likely the grain with the smallest curvature (Hessian norm)
-          //       or some other measure like average gradients.
-          //       right now we use the gradient on grain i for simplicity and performance. When testing, we could see if using  is just as good
-          MathExtra::normalize3(gradi, nij);
-        }
+        double tmp_m[3][3];
+        MathExtra::times3_transpose(hessi, Ri, tmp_m);
+        MathExtra::times3(Ri, tmp_m, hessi);
+        MathExtra::times3_transpose(hessj, Rj, tmp_m);
+        MathExtra::times3(Rj, tmp_m, hessj);
+        compute_jacobian(gradi, hessi, gradj, hessj, X0[3], jacobian);
         break;
       }
     }

From d40a03bfc1e535f925912ff0d1937ba2d56c904d Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 25 Nov 2025 22:12:17 +0100
Subject: [PATCH 059/174] Fixed Tikhonov regularization for handwritten solver,
 added utility for regularized shapefunc

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 11 +++++++++++
 src/ASPHERE/math_extra_superellipsoids.h   | 17 ++++++++++-------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 43bc7ca12aa..ac949b9a07a 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -426,6 +426,17 @@ double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double*
   return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
 }
 
+double regularized_shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
+  double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
+  MathExtra::sub3(X0, xc, tmp_v);
+  MathExtra::transpose_matvec(R, tmp_v, xlocal);
+  regularized_shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
+  MathExtra::matvec(R, tmp_v, grad);
+  MathExtra::times3_transpose(hess, R, tmp_m);
+  MathExtra::times3(R, tmp_m, hess);
+  return shapefunc;
+}
+
 double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
   double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
   MathExtra::sub3(X0, xc, tmp_v);
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 4580c7c5769..5902633d594 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -66,6 +66,10 @@ namespace MathExtraSuperellipsoids {
   double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]);
   double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]);
+  
+  double regularized_shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]);
+  double regularized_shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]);
+
   double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual);
   void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian);
   double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
@@ -82,6 +86,8 @@ namespace MathExtraSuperellipsoids {
   double stable_shape_and_gradient_local_ellipsoid(const double* xlocal, const double* shape, double* grad);
   double compute_overlap_distance(const double* shape, const double* block, const double Rot[3][3], const int flag, const double* global_point, const double* global_normal, const double* center);
 
+  void apply_regularization_shape_function(double n1, double *value, double *grad, double hess[3][3]);
+  
 };
 
 
@@ -128,12 +134,11 @@ inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4]
     // Tikhonov regularization
     // High blockiness grains can have zero curvature / singular Hessian
     // along principal local axes (x=0, y=0, z=0)
-    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10] + A[15]);
+    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10]);
     A[0]  += diag_weight;
     A[5]  += diag_weight;
     A[10] += diag_weight;
-    A[15] += diag_weight;
-
+    
     // 1. Pivot 0 
     double inv0 = 1.0 / A[0];
     double m1 = A[4] * inv0;
@@ -174,11 +179,10 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]
     // Tikhonov regularization
     // High blockiness grains can have zero curvature / singular Hessian
     // along principal local axes (x=0, y=0, z=0)
-    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10] + A[15]);
+    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10]);
     A[0]  += diag_weight;
     A[5]  += diag_weight;
     A[10] += diag_weight;
-    A[15] += diag_weight;
 
     // --- FORWARD ELIMINATION with PARTIAL PIVOTING ---
     
@@ -238,11 +242,10 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
     // Tikhonov regularization
     // High blockiness grains can have zero curvature / singular Hessian
     // along principal local axes (x=0, y=0, z=0)
-    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10] + A[15]);
+    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10] );
     A[0]  += diag_weight;
     A[5]  += diag_weight;
     A[10] += diag_weight;
-    A[15] += diag_weight;
     
      // --- COLUMN 0 ---
     // 1. Find Pivot in Col 0

From 5c11207b7678d29b208047ac9130c27689d27bc2 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 25 Nov 2025 22:32:53 +0100
Subject: [PATCH 060/174] Fixed direction of damping force

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 8fe0d77f920..29cf1bae548 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -391,7 +391,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // normal forces = Hookian contact + normal velocity damping
 
         damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) - damp; // assuming we get the overlap depth
+        ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
         if (limit_damping && (ccel < 0.0)) ccel = 0.0;
 
         // shear history effects

From be6070970d725babf71039703a76c89729d3d6c7 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 26 Nov 2025 10:14:18 +0100
Subject: [PATCH 061/174] Implmented particle contact with flat wall logic

---
 src/ASPHERE/math_extra_superellipsoids.h | 102 +++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 5902633d594..8ac91808439 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -59,6 +59,8 @@ namespace MathExtraSuperellipsoids {
                                          const double* xc2, const double R2[3][3], const double* shape2,
                                         double* cached_axis, double* contact_point);
 
+  inline int determine_contact_point_wall(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                                        const double* x_wall, const double* n_wall, double* X0, double* nij, double* overlap);
 
   // Jibril's versions of the functions for contact detection
   double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]);
@@ -788,5 +790,105 @@ inline bool MathExtraSuperellipsoids::check_collision_and_get_seed(
     return true; // Collision confirmed
 }
 
+inline int MathExtraSuperellipsoids::determine_contact_point_wall(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
+                                        const double* x_wall, const double* n_wall, double* X0, double* nij, double* overlap){
+    //x_wall is a point on the wall TODO: is this actually stored somewhere?
+    // n_wall is the wall normal pointing from wall to particle in the global frame
+    // We might hav to change the fix wall gran files to achieve contact with the wall.
+    // I implemented the function but we might not use it.
+    // Unlike for particle-particle contacts, here we get directly the overlap value.
+
+    double n_local[3];
+    // Transform wall normal into local frame
+    // If n_wall points from Wall->Particle, we want surface normal -n_wall.
+    double n_search[3] = {-n_wall[0], -n_wall[1], -n_wall[2]};
+    MathExtra::transpose_matvec(Ri, n_search, n_local);
+    
+    double nx = n_local[0], ny = n_local[1], nz = n_local[2];
+    double a = shapei[0], b = shapei[1], c = shapei[2];
+    double X0_local[3];
+
+    // Calculate Deepest Point
+    if (flagi == 0){ 
+        // Ellipsoid
+        double norm = std::sqrt(a*a*nx*nx + b*b*ny*ny + c*c*nz*nz);
+        double inv_norm = (norm > 1e-14) ? 1.0/norm : 0.0;
+
+        X0_local[0] = a*a * nx * inv_norm;
+        X0_local[1] = b*b * ny * inv_norm;
+        X0_local[2] = c*c * nz * inv_norm;
+    }
+    else{ 
+        // General Superellipsoid
+        double nx_abs = std::fabs(nx);
+        double ny_abs = std::fabs(ny);
+        double nz_abs = std::fabs(nz);
+        double n1 = blocki[0];
+        double n2 = blocki[1];
+        
+        double x, y, z;
+
+        if (nx_abs < 1e-14 && ny_abs < 1e-14) {
+            x = 0.0; y = 0.0; 
+            z = c * ((nz > 0) ? 1.0 : -1.0);
+        } 
+        else {
+            double p2 = 1.0 / (n2 - 1.0);
+            double p1 = 1.0 / (n1 - 1.0);
+            
+            if (nx_abs > ny_abs) {
+                double alpha = std::pow((b * ny_abs) / (a * nx_abs), p2);
+                double gamma = std::pow(1.0 + std::pow(alpha, n2), n1/n2 - 1.0);
+                double beta = std::pow((c * nz_abs) / (a * nx_abs) * gamma, p1);
+                
+                double den = std::pow(std::pow(1.0 + std::pow(alpha, n2), n1/n2) + std::pow(beta, n1), 1.0/n1);
+                x = 1.0 / den;
+                y = alpha * x;
+                z = beta * x;
+            } else {
+                double alpha = std::pow((a * nx_abs) / (b * ny_abs), p2);
+                double gamma = std::pow(1.0 + std::pow(alpha, n2), n1/n2 - 1.0);
+                double beta = std::pow((c * nz_abs) / (b * ny_abs) * gamma, p1);
+                
+                double den = std::pow(std::pow(1.0 + std::pow(alpha, n2), n1/n2) + std::pow(beta, n1), 1.0/n1);
+                y = 1.0 / den;
+                x = alpha * y;
+                z = beta * y;
+            }
+            
+            x *= a; y *= b; z *= c;
+            
+            if (n_local[0] < 0) x = -x;
+            if (n_local[1] < 0) y = -y;
+            if (n_local[2] < 0) z = -z;
+        }
+        X0_local[0] = x; X0_local[1] = y; X0_local[2] = z;
+    }
+
+    // Transform to Global Frame
+    MathExtra::matvec(Ri, X0_local, X0);
+    for(int k=0; k<3; k++) X0[k] += xci[k]; // Translate to Global Position
+
+    // Set Contact Normal (Always wall normal for plane contacts)
+    nij[0] = n_wall[0];
+    nij[1] = n_wall[1];
+    nij[2] = n_wall[2];
+
+    // Check Overlap
+    double dx = X0[0] - x_wall[0];
+    double dy = X0[1] - x_wall[1];
+    double dz = X0[2] - x_wall[2];
+
+    // Project onto Wall Normal, if dist < 0, the point is "behind" the wall face.
+    double dist = dx*n_wall[0] + dy*n_wall[1] + dz*n_wall[2];
+
+    if (dist < 0.0) {
+        *overlap = -dist; // Store positive overlap value
+        return 0; // contact
+    }
+
+    *overlap = 0.0;
+    return 1; // no contact
+}
 
 #endif

From 088426781bafaf640950e3b871c4a1728707dd46 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Tue, 25 Nov 2025 16:51:04 -0600
Subject: [PATCH 062/174] fix bug I introduced in refactoring Newton: contact
 point must be updated

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index ac949b9a07a..ad324851696 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -578,28 +578,29 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
         // Can still fail the Armijo - Goldstein condition`
         a *= CUTBACK_LINESEARCH;
       } else {
-        X0[0] = X_line[0];
-        X0[1] = X_line[1];
-        X0[2] = X_line[2];
-        X0[3] = X_line[3];
         // Only compute the jacobian if there is another Newton iteration to come
         double tmp_m[3][3];
         MathExtra::times3_transpose(hessi, Ri, tmp_m);
         MathExtra::times3(Ri, tmp_m, hessi);
         MathExtra::times3_transpose(hessj, Rj, tmp_m);
         MathExtra::times3(Rj, tmp_m, hessj);
-        compute_jacobian(gradi, hessi, gradj, hessj, X0[3], jacobian);
+        compute_jacobian(gradi, hessi, gradj, hessj, X_line[3], jacobian);
         break;
       }
     }
-
-    // If no descent with line search, take full step, try to escape bad region
+    // Take full step if no descent at the end of line search
+    // Try to escape bad region
     if (iter_ls == ITERMAX_LINESEARCH) {
       X0[0] += rhs[0];
       X0[1] += rhs[1];
       X0[2] += rhs[2];
       X0[3] += rhs[3];
       norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
+    } else {
+      X0[0] = X_line[0];
+      X0[1] = X_line[1];
+      X0[2] = X_line[2];
+      X0[3] = X_line[3];
     }
 
     if (converged)

From d95a50407fc1cf92119f4e295ef93a06096924c3 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Tue, 25 Nov 2025 17:26:18 -0600
Subject: [PATCH 063/174] fix bad bug: shapefunctions computed but never
 assigned! My previous commit on Newton method may be useless. Think of
 reverting to the original, which was cleaner

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index ad324851696..2d1913fb61c 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -430,7 +430,7 @@ double regularized_shape_and_derivatives_global(const double* xc, const double R
   double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
   MathExtra::sub3(X0, xc, tmp_v);
   MathExtra::transpose_matvec(R, tmp_v, xlocal);
-  regularized_shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
+  shapefunc = regularized_shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
   MathExtra::matvec(R, tmp_v, grad);
   MathExtra::times3_transpose(hess, R, tmp_m);
   MathExtra::times3(R, tmp_m, hess);
@@ -441,7 +441,7 @@ double shape_and_derivatives_global(const double* xc, const double R[3][3], cons
   double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
   MathExtra::sub3(X0, xc, tmp_v);
   MathExtra::transpose_matvec(R, tmp_v, xlocal);
-  shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
+  shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
   MathExtra::matvec(R, tmp_v, grad);
   MathExtra::times3_transpose(hess, R, tmp_m);
   MathExtra::times3(R, tmp_m, hess);

From cd4e2e243e6236706d0130345d4af89714c9f12c Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 26 Nov 2025 13:55:23 +0100
Subject: [PATCH 064/174] Added example script for box compression to test
 multiple contacts

---
 .../in.ellipsoid_box_compress                 | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress

diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
new file mode 100644
index 00000000000..ba0da37bba5
--- /dev/null
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
@@ -0,0 +1,52 @@
+# Test demo for LAPACK functions in ASPHERE packagecmake -C ../cmake/presets/most.cmake     [OPTIONS] ../cmake
+
+units           lj
+atom_style      ellipsoid
+dimension       3
+boundary        p p p       
+comm_modify     vel yes
+newton          off         
+
+# Create a sparse box first to avoid initial overlaps
+region          box block 0 15 0 15 0 15
+create_box      1 box
+
+# Create particles on a lattice (10x10x10 = 1000 particles)
+lattice         sc 1.2      # Spacing > 1.0 to ensure no initial overlap
+create_atoms    1 box
+
+set             group all mass 1.0
+set             group all shape 0.5 0.4 0.4 
+set             group all block 4.0 4.0
+
+# Rotation: Randomize orientation so we get Face-Face, Edge-Edge, and Corner-Corner
+set             group all quat/random 12345
+
+pair_style      gran/hooke/history/ellipsoid 1000.0 0.0 1.0 0.0 0.5 0
+pair_coeff      * *
+
+# Neighbor list, need to check
+neighbor        0.2 bin
+neigh_modify    delay 0 every 1 check yes
+
+# 5. Computes for Visualization
+compute shape all property/atom shapex shapey shapez
+compute orient all property/atom quatw quati quatj quatk
+compute block all property/atom block1 block2
+variable phi atom "2/c_block[1]"
+variable theta atom "2/c_block[2]"
+dump 1 all custom 100 compress.lammpstrj id x y z vx vy vz fx fy fz c_shape[*] c_orient[*] v_phi v_theta
+
+fix             1 all nve/asphere
+
+timestep        0.001
+thermo_style    custom step atoms ke vol cpu
+thermo          100
+
+# Settling particles
+run             1000
+
+# run compression
+# Compress the periodic box by 1% every unit of time, forces the particles to jam together
+fix             2 all deform 1 x erate -0.01 y erate -0.01 z erate -0.01 remap v
+run             5000
\ No newline at end of file

From 8e38bc661eea5bd50b4972aadd2c379f3d728b90 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Wed, 26 Nov 2025 09:06:35 -0600
Subject: [PATCH 065/174] rename quaternion components in dump for explicit and
 automatic detection by Ovito

---
 .../ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress    | 3 +++
 examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran       | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
index ba0da37bba5..a9f2105583c 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
@@ -36,6 +36,9 @@ compute block all property/atom block1 block2
 variable phi atom "2/c_block[1]"
 variable theta atom "2/c_block[2]"
 dump 1 all custom 100 compress.lammpstrj id x y z vx vy vz fx fy fz c_shape[*] c_orient[*] v_phi v_theta
+# Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
+dump_modify 1 colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
+
 
 fix             1 all nve/asphere
 
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 6b6bb3f189f..c19f173e833 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -33,8 +33,9 @@ compute block all property/atom block1 block2
 variable phi atom "2/c_block[1]"
 variable theta atom "2/c_block[2]"
 
-dump mydump all custom 10 dump.lammpstrj id x y z fx fy fz c_diameter[*] c_orient[*] v_phi v_theta
-
+dump mydump all custom 1 dump.lammpstrj id x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+# Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
+dump_modify mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
 fix 1 bot freeze
 fix 2 top gravity 9.81 vector 0 0 -1

From 6e43c32c663a6e83b6a88f458d54b6fb3d39358f Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Wed, 26 Nov 2025 12:29:18 -0600
Subject: [PATCH 066/174] modify output to map to Ovito: n1 corresponds to
 theta, n2 corresponds to phi. Consider changing this in the future because
 the Podlozhnyuk paper seems different from the rest of the literature

---
 .../ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress     | 4 ++--
 examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
index a9f2105583c..00f66737488 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
@@ -33,8 +33,8 @@ neigh_modify    delay 0 every 1 check yes
 compute shape all property/atom shapex shapey shapez
 compute orient all property/atom quatw quati quatj quatk
 compute block all property/atom block1 block2
-variable phi atom "2/c_block[1]"
-variable theta atom "2/c_block[2]"
+variable phi atom "2/c_block[2]"
+variable theta atom "2/c_block[1]"
 dump 1 all custom 100 compress.lammpstrj id x y z vx vy vz fx fy fz c_shape[*] c_orient[*] v_phi v_theta
 # Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
 dump_modify 1 colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index c19f173e833..551d304bde7 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -30,8 +30,8 @@ compute block all property/atom block1 block2
 # Ovito uses the reciprocal exponents for the blockiness
 # https://docs.ovito.org/advanced_topics/aspherical_particles.html#howto-aspherical-particles-superquadrics
 # Define atom variables from block
-variable phi atom "2/c_block[1]"
-variable theta atom "2/c_block[2]"
+variable phi atom "2/c_block[2]"
+variable theta atom "2/c_block[1]"
 
 dump mydump all custom 1 dump.lammpstrj id x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly

From 86b4c76235c78b0affe686eeb1bee1502898a986 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Wed, 26 Nov 2025 12:45:45 -0600
Subject: [PATCH 067/174] add bolwing example

---
 .../ASPHERE/superellipsoid_gran/in.bowling    | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 examples/ASPHERE/superellipsoid_gran/in.bowling

diff --git a/examples/ASPHERE/superellipsoid_gran/in.bowling b/examples/ASPHERE/superellipsoid_gran/in.bowling
new file mode 100644
index 00000000000..4cbb88512cc
--- /dev/null
+++ b/examples/ASPHERE/superellipsoid_gran/in.bowling
@@ -0,0 +1,62 @@
+# Test demo for LAPACK functions in ASPHERE packagecmake -C ../cmake/presets/most.cmake     [OPTIONS] ../cmake
+
+units           si
+atom_style      ellipsoid
+dimension       3
+boundary p p p
+comm_modify vel yes
+newton off
+# create big ellipsoidal particles
+
+region          box block 0 10 0 10 0 10
+create_box      2 box
+
+# Pins
+create_atoms 1 single 5 5 5
+
+create_atoms 1 single 4 6 5
+create_atoms 1 single 6 6 5
+
+create_atoms 1 single 3 7 5
+create_atoms 1 single 5 7 5
+create_atoms 1 single 7 7 5
+
+create_atoms 1 single 2 8 5
+create_atoms 1 single 4 8 5
+create_atoms 1 single 6 8 5
+create_atoms 1 single 8 8 5
+
+set             type 1 shape 1.0 1.0 4.0
+set             type 1 block 8.0 2.0
+set             type 1 mass 1.0
+
+# Bowling ellipsoids
+create_atoms 2 single 5.5 1.5 6
+set             type 2 shape 2.0 2.0 1.0
+set             type 2 block 2.0 2.0
+set             type 2 mass 10.0
+group ball type 2
+
+pair_style      gran/hooke/history/ellipsoid 1e3 5e2 0.0 0.0 0.5 0
+pair_coeff      * *
+
+compute diameter all property/atom shapex shapey shapez
+compute orient all property/atom quatw quati quatj quatk
+compute block all property/atom block1 block2
+# Ovito uses the reciprocal exponents for the blockiness
+# https://docs.ovito.org/advanced_topics/aspherical_particles.html#howto-aspherical-particles-superquadrics
+# Define atom variables from block
+variable phi atom "2/c_block[2]"
+variable theta atom "2/c_block[1]"
+
+dump mydump all custom 1 shapes.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+# Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
+dump_modify mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
+
+set group ball angmom 1.0 0.0 0.5 vy 7.0
+
+fix 3 all nve/asphere
+
+thermo 10
+timestep 0.01
+run             100

From d3014ca897e0e602c5e10c910ef6b6b4a3c7626d Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Tue, 9 Dec 2025 13:15:03 -0600
Subject: [PATCH 068/174] add description for block1 and block2 in doc of
 compute property/atom

---
 doc/src/compute_property_atom.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst
index 996ef2092ed..332c64c3feb 100644
--- a/doc/src/compute_property_atom.rst
+++ b/doc/src/compute_property_atom.rst
@@ -26,6 +26,7 @@ Syntax
                              temperature, heatflow,
                              angmomx, angmomy, angmomz,
                              shapex, shapey, shapez,
+                             block1, block2,
                              quatw, quati, quatj, quatk, tqx, tqy, tqz,
                              end1x, end1y, end1z, end2x, end2y, end2z,
                              corner1x, corner1y, corner1z,
@@ -64,6 +65,7 @@ Syntax
            *heatflow* = internal heat flow of spherical particle
            *angmomx,angmomy,angmomz* = angular momentum of aspherical particle
            *shapex,shapey,shapez* = 3 diameters of aspherical particle
+           *block1,block2* = 2 blockiness exponents of aspherical particle
            *quatw,quati,quatj,quatk* = quaternion components for aspherical or body particles
            *tqx,tqy,tqz* = torque on finite-size particles
            *end12x, end12y, end12z* = end points of line segment
@@ -163,6 +165,11 @@ If :doc:`newton bond off <newton>` is set, it will be tallied with both atom
 The quantities *shapex*, *shapey*, and *shapez* are defined for ellipsoidal
 particles and define the 3d shape of each particle.
 
+The quantities *block1*, and *block2*, are defined for ellipsoidal
+particles and define the blockiness of each super-ellipsoid particle.
+See the :doc:`set <set>` command for an explanation of the blockiness.
+
+
 The quantities *quatw*, *quati*, *quatj*, and *quatk* are defined for
 ellipsoidal particles and body particles and store the 4-vector quaternion
 representing the orientation of each particle.  See the :doc:`set <set>`

From ba35169a0a69608b2efc53f6265d49ec7a2093d5 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Tue, 9 Dec 2025 14:56:38 -0600
Subject: [PATCH 069/174] start unit testing superellipsoid contact detection
 and overlap calculation functions

---
 src/ASPHERE/math_extra_superellipsoids.h      |  2 +-
 unittest/utils/CMakeLists.txt                 |  7 ++
 .../utils/test_math_extra_superellipsoids.cpp | 69 +++++++++++++++++++
 3 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 unittest/utils/test_math_extra_superellipsoids.cpp

diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 8ac91808439..6363c2ff4ff 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -22,7 +22,7 @@
 #include "math_extra.h"
 
 namespace MathExtraSuperellipsoids {
-  inline constexpr double TIKHONOV_SCALE = 1e-8;
+  inline constexpr double TIKHONOV_SCALE = 1e-8; // TODO: I don't think we should declare those in the header or they will appear whenever the header is included. that means we should move the definition of the manual solver into the .cpp file
   double beta_func(double a, double b);
   void volume_superellipsoid(const double *blockiness, const double *shape, double volume); // duplicated from math_extra might remove
   void inertia_superellipsoid(const double *shape, const double *blockiness, double density, double *inertia); // duplicated from math_extra might remove
diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index 5d86bc592b1..812d71e4169 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -154,3 +154,10 @@ endif()
 # =============================================================================
 # End of FFT Testing Infrastructure
 # =============================================================================
+
+# Extra math tests for superellipsoids
+if(PKG_ASPHERE AND PKG_GRANULAR)
+  add_executable(test_math_extra_superellipsoids test_math_extra_superellipsoids.cpp)
+  target_link_libraries(test_math_extra_superellipsoids PRIVATE lammps GTest::GMockMain)
+  add_test(NAME MathExtraSuperellipsoids COMMAND test_math_extra_superellipsoids)
+endif()
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
new file mode 100644
index 00000000000..6042555dbad
--- /dev/null
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -0,0 +1,69 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "../../src/ASPHERE/math_extra_superellipsoids.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <cmath>
+
+// TODO: consider making a fixture with several setup functions?
+
+static constexpr double EPSILON = 1e-4;
+
+TEST(ContactPointAndNormal, sphere)
+{
+  // First grain
+  double xci[3] = {1.0, 5.246, 3.123};
+  double ri = 2.5;
+  double shapei[3] = {ri, ri, ri};
+  double Ri[3][3] = {{1.0, 0.0, 0.0},
+                     {0.0, 1.0, 0.0},
+                     {0.0, 0.0, 1.0}};
+  double blocki[2] = {2.0, 2.0};
+  int flagi = 0;
+
+  // Second grains
+  double xcj[3] = {2.0, -1.562, 4.607};
+  double rj = 1.25;
+  double shapej[3] = {rj, rj, rj};
+  double Rj[3][3] = {{1.0, 0.0, 0.0},
+                     {0.0, 1.0, 0.0},
+                     {0.0, 0.0, 1.0}};
+  double blockj[2] = {2.0, 2.0};
+  int flagj = 0;
+
+  // Contact detection
+  double X0[4] = {0.0, 0.0, 0.0, 0.0}, nij[3];
+  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
+                                                    xcj, Rj, shapej, blockj, flagj,
+                                                    X0, nij);
+  // Analytical solution
+  double X0_analytical[4] = {rj * xci[0] / (ri+rj) + ri * xcj[0] / (ri+rj),
+                             rj * xci[1] / (ri+rj) + ri * xcj[1] / (ri+rj),
+                             rj * xci[2] / (ri+rj) + ri * xcj[2] / (ri+rj),
+                             rj / ri};
+  double nij_analytical[3] = {xcj[0] - xci[0], xcj[1] - xci[1], xcj[2] - xci[2]};
+  double norm = std::sqrt(nij_analytical[0]*nij_analytical[0] + nij_analytical[1]*nij_analytical[1] + nij_analytical[2]*nij_analytical[2]);
+  nij_analytical[0] /= norm;
+  nij_analytical[1] /= norm;
+  nij_analytical[2] /= norm;
+
+  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
+  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
+  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
+  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
+
+  ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+  ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+  ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+}

From a134a83370db664fc7a764bab74dbfa06a3d3c76 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Wed, 10 Dec 2025 22:00:54 -0600
Subject: [PATCH 070/174] use some MathExtra to simplify superellipsoids unit
 tests setup. Add rotational invariance test for spheres

---
 .../utils/test_math_extra_superellipsoids.cpp | 41 +++++++++++++++++--
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index 6042555dbad..9f2428534b1 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -12,6 +12,7 @@
 ------------------------------------------------------------------------- */
 
 #include "../../src/ASPHERE/math_extra_superellipsoids.h"
+#include "../../src/math_extra.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <cmath>
@@ -53,10 +54,7 @@ TEST(ContactPointAndNormal, sphere)
                              rj * xci[2] / (ri+rj) + ri * xcj[2] / (ri+rj),
                              rj / ri};
   double nij_analytical[3] = {xcj[0] - xci[0], xcj[1] - xci[1], xcj[2] - xci[2]};
-  double norm = std::sqrt(nij_analytical[0]*nij_analytical[0] + nij_analytical[1]*nij_analytical[1] + nij_analytical[2]*nij_analytical[2]);
-  nij_analytical[0] /= norm;
-  nij_analytical[1] /= norm;
-  nij_analytical[2] /= norm;
+  MathExtra::norm3(nij_analytical);
 
   ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
   ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
@@ -66,4 +64,39 @@ TEST(ContactPointAndNormal, sphere)
   ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
   ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
   ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+
+  // Rotational invariance
+  double anglei = 0.456;
+  double axisi[3] = {1,2,3};
+  MathExtra::norm3(axisi);
+  double quati[4] = {std::cos(anglei),
+                     std::sin(anglei)*axisi[0],
+                     std::sin(anglei)*axisi[1],
+                     std::sin(anglei)*axisi[2]};
+  MathExtra::quat_to_mat(quati, Ri);
+
+  double anglej = 0.123;
+  double axisj[3] = {-1,2,1};
+  MathExtra::norm3(axisj);
+  double quatj[4] = {std::cos(anglej),
+                     std::sin(anglej)*axisj[0],
+                     std::sin(anglej)*axisj[1],
+                     std::sin(anglej)*axisj[2]};
+  MathExtra::quat_to_mat(quatj, Rj);
+
+  X0[0] = X0[1] = X0[2] = X0[3] = 0.0;
+  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
+                                                    xcj, Rj, shapej, blockj, flagj,
+                                                    X0, nij);
+
+  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
+  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
+  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
+  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
+
+  ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+  ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+  ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+
+ 
 }

From 24929f99e65251bbca7586f55789261fc7ebcf16 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Thu, 11 Dec 2025 01:42:04 -0600
Subject: [PATCH 071/174] Improve initial guess and residual scaling. Implement
 unit test for high blockiness contact: difficulties converging on flat
 surfaces

---
 src/ASPHERE/math_extra_superellipsoids.cpp    | 12 ++++-
 .../pair_gran_hooke_history_ellipsoid.cpp     |  2 +-
 .../utils/test_math_extra_superellipsoids.cpp | 45 +++++++++++++++++++
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 2d1913fb61c..dbdc2c09f9f 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -35,7 +35,7 @@ extern "C" { // General Matrices
 namespace MathExtraSuperellipsoids {
 
 static constexpr int ITERMAX_NEWTON = 100;
-static constexpr double CONVERGENCE_NEWTON = 1e-6;
+static constexpr double CONVERGENCE_NEWTON = 1e-10 * 1e-10;
 static constexpr int ITERMAX_LINESEARCH = 10;
 static constexpr double PARAMETER_LINESEARCH = 1e-4;
 static constexpr double CUTBACK_LINESEARCH = 0.5;
@@ -452,7 +452,15 @@ double compute_residual(const double shapefunci, const double* gradi_global, con
   // Equation (23)
   MathExtra::scaleadd3(mu2, gradj_global, gradi_global, residual);
   residual[3] = shapefunci - shapefuncj;
-  return residual[0]*residual[0] + residual[1]*residual[1] + residual[2]*residual[2] + residual[3]*residual[3];
+  // Normalize residual Equation (23)
+  // shape functions and gradients dimensions are not homogeneous
+  // Gradient equality F1' + mu2 * F2' evaluated relative to magnitude of gradient ||F1'|| = ||mu2 * F2'||
+  // Shape function equality F1 - F2 evaluated relative to magnitude of shape function + 1
+  //    the shift f = polynomial - 1 is not necessary and cancels out in F1 - F2
+  // TODO: based on line above, consider removing the -1 in definition of shape function, and compare inside outside to 1 instead of 0.
+  // Last component homogeneous to shape function
+  return MathExtra::lensq3(residual) / MathExtra::lensq3(gradi_global) +
+         residual[3] * residual[3] / ((shapefunci + 1) * (shapefunci + 1));
 }
 
 void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian) {
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 29cf1bae548..497af42c615 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -276,7 +276,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
         //   MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
         for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
-          X0[3] = 1.0; // Lagrange multiplier mu^2 initially one (makes the Newton more stable in continued contact)
+          X0[3] = reqj / reqi; // Lagrange multiplier mu^2
           double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
           shapei[0] = shapei[1] = shapei[2] = reqi;
           shapej[0] = shapej[1] = shapej[2] = reqj;
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index 9f2428534b1..b520e561e32 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -16,6 +16,7 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <cmath>
+#include <vector>
 
 // TODO: consider making a fixture with several setup functions?
 
@@ -100,3 +101,47 @@ TEST(ContactPointAndNormal, sphere)
 
  
 }
+
+TEST(ContactPointAndNormal, supersphere_mono)
+{
+  double r = 3.456;
+  double xci[3] = {-2*r, 0.0, 0.0};
+  double xcj[3] = {2*r, 0.0, 0.0};
+  double shape[3] = {r, r, r};
+  double R[3][3] = {{1.0, 0.0, 0.0},
+                    {0.0, 1.0, 0.0},
+                    {0.0, 0.0, 1.0}};
+
+  std::vector<double> blocks = {2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0};
+  for (auto n : blocks) {
+    double block[2] = {n, n};
+    int flag = MathExtraSuperellipsoids::determine_flag(block);
+
+    // Contact detection
+    // Some starting point away from (0,0,0). Possibly bad initial guess so test is demanding
+    double X0[4] = {r, -r, 2*r, 0.0}, nij[3];
+    int status = MathExtraSuperellipsoids::determine_contact_point(xci, R, shape, block, flag,
+                                                                   xcj, R, shape, block, flag,
+                                                                   X0, nij);
+    // Analytical solution
+    double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};
+    double nij_analytical[3] = {1.0, 0.0, 0.0};
+    // TODO / WIP:
+    // Gradients can be smaller in different directions, hard to naviguate canyon on high blockiness
+    // Little progress made along the flat faces. Maybe use Levenberg-Marquardt or Newton with momentum (previous step memory) or other methods.
+    // I think this might be an intrinsic problem with the solution having a Hessian of zero, leading to slow convergence + bad conditioning in Newton's method.
+
+    std::cout<<n<<" "<<status<<" "<<X0[0]<<" "<<X0[1]<<" "<<X0[2]<<" "<<X0[3]<<std::endl;
+    ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
+    ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
+    ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
+    ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
+
+    ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+    ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+    ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+  }
+}
+
+
+// for polydisperse solution should be at the radii ratio
\ No newline at end of file

From 9750f06e0002e02e243c7cbd512f901ff9705cac Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 12 Dec 2025 13:18:17 -0600
Subject: [PATCH 072/174] make tikhonov regularization only when matrix found
 to be truly singular by dgetrs. add convergence criterion on the change in
 contact point position. Add acceleration for roots with multiplicity

---
 src/ASPHERE/math_extra_superellipsoids.cpp    | 83 ++++++++++++-------
 src/ASPHERE/math_extra_superellipsoids.h      |  2 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     | 10 +--
 3 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index dbdc2c09f9f..64a9378c87c 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -34,12 +34,15 @@ extern "C" { // General Matrices
 
 namespace MathExtraSuperellipsoids {
 
-static constexpr int ITERMAX_NEWTON = 100;
-static constexpr double CONVERGENCE_NEWTON = 1e-10 * 1e-10;
-static constexpr int ITERMAX_LINESEARCH = 10;
-static constexpr double PARAMETER_LINESEARCH = 1e-4;
-static constexpr double CUTBACK_LINESEARCH = 0.5;
-static constexpr double CONVERGENCE_OVERLAP = 1e-8;
+static constexpr int ITERMAX_NR = 100;
+static constexpr double TOL_NR_RES = 1e-10 * 1e-10;
+static constexpr double TOL_NR_POS = 1e-6 * 1e-6;
+
+static constexpr int ITERMAX_LS = 10;
+static constexpr double PARAMETER_LS = 1e-4;
+static constexpr double CUTBACK_LS = 0.5;
+
+static constexpr double TOL_OVERLAP = 1e-8;
 static constexpr unsigned int ITERMAX_OVERLAP = 20;
 static constexpr double MINSLOPE_OVERLAP = 1e-12;
 
@@ -476,14 +479,6 @@ void compute_jacobian(const double* gradi_global, const double hessi_global[3][3
     jacobian[3 + col*4] = gradi_global[col] - gradj_global[col];
   }
   jacobian[15] = 0.0;
-
-  // Tikhonov regularization
-  // High blockiness grains can have zero curvature / singular Hessian
-  // along principal local axes (x=0, y=0, z=0)
-  double diag_weight = TIKHONOV_SCALE * (jacobian[0] + jacobian[5] + jacobian[10]);
-  jacobian[0]  += diag_weight;
-  jacobian[5]  += diag_weight;
-  jacobian[10] += diag_weight;
 }
 
 double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
@@ -500,14 +495,26 @@ double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], c
 int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                             const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
                             double* X0, double* nij) {
-  double norm, norm_ini, shapefunc[2], residual[4], jacobian[16];
+  double norm, norm_old, shapefunc[2], residual[4], jacobian[16];
+  double lsq = MathExtra::distsq3(xci, xcj);
   bool converged(false);
 
+  // Accelerate convergence rate for high blockiness / flat faces
+  // with high root multiplicity N
+  // e.g.: f(x) = x^N , Newton's iterate: x_k+1 = x_k - x_k / N
+  // Estimate N from |x_k+1 - x_k| / |x_k - x_k-1| = 1 - 1/N
+  // within bounds 1 < N < max(block)-1
+  // then multiply Newton's step size by N to recover quadratic convergence
+  double multiplicity(1.0);
+  double rhs_old[3];
+  double blockmax = std::fmax(std::fmax(blocki[0],blocki[1]), std::fmax(blockj[0], blockj[1]));
+
   norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
   // TODO: would it be wise or crazy to test for convergence before even attempting Newton's method?
   //       the initial guess is the old X0, so with temporal coherence, it might still pass deformation is slow!
-  for (int iter = 0 ; iter < ITERMAX_NEWTON ; iter++) {
-    norm_ini = norm;
+
+  for (int iter = 0 ; iter < ITERMAX_NR ; iter++) {
+    norm_old = norm;
 
     // Solve Newton step
     int lapack_error, ipiv[16];
@@ -516,16 +523,29 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     const int nrhs = 1;
     double rhs[4] = {-residual[0], -residual[1], -residual[2], -residual[3]};
     dgetrf_(&n, &n, jacobian, &n, ipiv, &lapack_error);
-    if (lapack_error)
+    if (lapack_error < 0)
       return lapack_error;
+    else if (lapack_error > 0) { // Singular matrix: Tikhonov regularization
+      // High blockiness grains can have zero curvature / singular Hessian
+      // along principal local axes (x=0, y=0, z=0)
+      double diag_weight = TIKHONOV_SCALE * (jacobian[0] + jacobian[5] + jacobian[10]);
+      jacobian[0]  += diag_weight;
+      jacobian[5]  += diag_weight;
+      jacobian[10] += diag_weight;
+    }
     dgetrs_(&trans, &n, &nrhs, jacobian, &n, ipiv, rhs, &n, &lapack_error);
     if (lapack_error)
       return lapack_error;
 
+    if (iter > 0)
+      multiplicity = std::fmin(std::fmax(1.0, 1.0 / (1.0 - std::sqrt(MathExtra::lensq3(rhs)/MathExtra::lensq3(rhs_old)))), blockmax - 1.0);
+    MathExtra::copy3(rhs, rhs_old);
+
     // Backtracking line search
-    double a(1.0), X_line[4];
+    double a(multiplicity), X_line[4];
     int iter_ls;
-    for (iter_ls = 0 ; iter_ls < ITERMAX_LINESEARCH ; iter_ls++) {
+
+    for (iter_ls = 0 ; iter_ls < ITERMAX_LS ; iter_ls++) {
       X_line[0] = X0[0] + a * rhs[0];
       X_line[1] = X0[1] + a * rhs[1];
       X_line[2] = X0[2] + a * rhs[2];
@@ -573,7 +593,8 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
       norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual);
 
-      if (norm <= CONVERGENCE_NEWTON) {
+      if ((norm <= TOL_NR_RES) &&
+          (MathExtra::lensq3(rhs) * a * a <= TOL_NR_POS * lsq)) {
         converged = true;
         // TODO: consider testing picking the normal with the least error
         //       i.e., likely the grain with the smallest curvature (Hessian norm)
@@ -581,10 +602,10 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
         //       right now we use the gradient on grain i for simplicity and performance. When testing, we could see if using  is just as good
         MathExtra::normalize3(gradi, nij);
         break;
-      } else if (norm > norm_ini - PARAMETER_LINESEARCH * a * norm_ini) { // Armijo - Goldstein condition not met
-        // Tested after convergence check because tiny values of norm and norm_ini < CONVERGENCE_NEWTON
+      } else if (norm > norm_old - PARAMETER_LS * a * norm_old) { // Armijo - Goldstein condition not met
+        // Tested after convergence check because tiny values of norm and norm_old < TOL_NR
         // Can still fail the Armijo - Goldstein condition`
-        a *= CUTBACK_LINESEARCH;
+        a *= CUTBACK_LS;
       } else {
         // Only compute the jacobian if there is another Newton iteration to come
         double tmp_m[3][3];
@@ -598,7 +619,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     }
     // Take full step if no descent at the end of line search
     // Try to escape bad region
-    if (iter_ls == ITERMAX_LINESEARCH) {
+    if (iter_ls == ITERMAX_LS) {
       X0[0] += rhs[0];
       X0[1] += rhs[1];
       X0[2] += rhs[2];
@@ -615,12 +636,14 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       break;
   }
 
-  // LAPACK error are within [-4, 4], use 5 non-touching, -5 non-converging
+  // LAPACK dgetrs() error values are negative, return values:
+  // 2 = failed convergence
+  // 1 = converged but grains not touching
+  // 0 = converged and grains touching
   if (!converged)
-    return -5;
+    return 2;
   if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0)
-    return 5;
-
+    return 1;
   return 0;
 }
 
@@ -799,7 +822,7 @@ double compute_overlap_distance(
       }
 
       // Convergence Check
-      if (std::fabs(val) < CONVERGENCE_OVERLAP) break;
+      if (std::fabs(val) < TOL_OVERLAP) break;
 
       // Newton Step
       double slope = local_grad[0] * local_normal[0] +
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 6363c2ff4ff..038a7770de7 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -22,7 +22,7 @@
 #include "math_extra.h"
 
 namespace MathExtraSuperellipsoids {
-  inline constexpr double TIKHONOV_SCALE = 1e-8; // TODO: I don't think we should declare those in the header or they will appear whenever the header is included. that means we should move the definition of the manual solver into the .cpp file
+  inline constexpr double TIKHONOV_SCALE = 1e-14; // TODO: I don't think we should declare those in the header or they will appear whenever the header is included. that means we should move the definition of the manual solver into the .cpp file
   double beta_func(double a, double b);
   void volume_superellipsoid(const double *blockiness, const double *shape, double volume); // duplicated from math_extra might remove
   void inertia_superellipsoid(const double *shape, const double *blockiness, double density, double *inertia); // duplicated from math_extra might remove
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 497af42c615..2efabc421a7 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -258,9 +258,9 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij);
         if (status == 0)
           touching = true;
-        else if(status == 5)
+        else if(status == 1)
           touching = false;
-        else
+        else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged vs fail the run ?
           error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
       } else {
         if (touching){
@@ -293,11 +293,11 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij);
           if (status == 0)
             touching = true;
-          else if(status == 5)
+          else if(status == 1)
             touching = false;
-          else
+          else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged vs fail the run ?
             error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
-          }
+        }
         }
       }
 

From 74da3c1b06cf130e0a1c110c3f1b702412adb104 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Tue, 30 Dec 2025 14:42:27 -0600
Subject: [PATCH 073/174] WIP: write documentation for pair_style
 gran/ellipsoid/ TODO: Hertz

---
 doc/src/Commands_pair.rst       |   2 +
 doc/src/Howto_spherical.rst     |  26 ++-
 doc/src/pair_gran_ellipsoid.rst | 353 ++++++++++++++++++++++++++++++++
 doc/src/pair_style.rst          |   6 +-
 4 files changed, 375 insertions(+), 12 deletions(-)
 create mode 100644 doc/src/pair_gran_ellipsoid.rst

diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index 4f65dca6eb0..5672e54adad 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -109,8 +109,10 @@ OPT.
    * :doc:`gauss/cut (o) <pair_gauss>`
    * :doc:`gayberne (gio) <pair_gayberne>`
    * :doc:`gran/hertz/history (o) <pair_gran>`
+   * :doc:`gran/hertz/history/ellipsoid <pair_gran_ellipsoid>`
    * :doc:`gran/hooke (o) <pair_gran>`
    * :doc:`gran/hooke/history (ko) <pair_gran>`
+   * :doc:`gran/hooke/history/ellipsoid <pair_gran_ellipsoid>`
    * :doc:`granular <pair_granular>`
    * :doc:`gw <pair_gw>`
    * :doc:`gw/zbl <pair_gw>`
diff --git a/doc/src/Howto_spherical.rst b/doc/src/Howto_spherical.rst
index d86ea12b4bf..583e7235e22 100644
--- a/doc/src/Howto_spherical.rst
+++ b/doc/src/Howto_spherical.rst
@@ -50,13 +50,14 @@ individual particles, after then are created.
 
 The ellipsoid style defines particles that are ellipsoids and thus can
 be aspherical.  Each particle has a shape, specified by 3 diameters,
-and mass (or density).  These particles store an angular momentum and
-their orientation (quaternion), and can be acted upon by torque.  They
-do not store an angular velocity (omega), which can be in a different
-direction than angular momentum, rather they compute it as needed.
-The "set" command can be used to modify the diameter, orientation, and
-mass of individual particles, after then are created.  It also has a
-brief explanation of what quaternions are.
+and mass (or density).  Superellipsoid particles can be defined by
+specifying 2 blockiness exponents (block).  These particles store an angular
+momentum and their orientation (quaternion), and can be acted upon by
+torque.  They do not store an angular velocity (omega), which can be
+in a different direction than angular momentum, rather they compute it
+as needed.  The "set" command can be used to modify the diameter,
+blockiness, orientation, and mass of individual particles, after they
+are created.  It also has a brief explanation of what quaternions are.
 
 The line style defines line segment particles with two end points and
 a mass (or density).  They can be used in 2d simulations, and they can
@@ -113,9 +114,12 @@ When a system with finite-size particles is defined, the particles
 will only rotate and experience torque if the force field computes
 such interactions.  These are the various :doc:`pair styles <pair_style>` that generate torque:
 
-* :doc:`pair_style gran/history <pair_gran>`
-* :doc:`pair_style gran/hertz <pair_gran>`
-* :doc:`pair_style gran/no_history <pair_gran>`
+* :doc:`pair_style granular <pair_granular>`
+* :doc:`pair_style gran/hooke <pair_gran>`
+* :doc:`pair_style gran/hooke/history <pair_gran>`
+* :doc:`pair_style gran/hertz/history <pair_gran>`
+* :doc:`pair_style gran/hooke/history/ellipsoid <pair_gran_ellipsoid>`
+* :doc:`pair_style gran/hertz/history/ellipsoid <pair_gran_ellipsoid>`
 * :doc:`pair_style dipole/cut <pair_dipole>`
 * :doc:`pair_style gayberne <pair_gayberne>`
 * :doc:`pair_style resquared <pair_resquared>`
@@ -126,6 +130,8 @@ such interactions.  These are the various :doc:`pair styles <pair_style>` that g
 * :doc:`pair_style body/nparticle <pair_body_nparticle>`
 
 The granular pair styles are used with spherical particles.  The
+*gran/ellipsoid* granular pair styles are used with
+ellipsoidal and superellipsoid particles.  The
 dipole pair style is used with the dipole atom style, which could be
 applied to spherical or ellipsoidal particles.  The GayBerne and
 REsquared potentials require ellipsoidal particles, though they will
diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
new file mode 100644
index 00000000000..7b7a292e880
--- /dev/null
+++ b/doc/src/pair_gran_ellipsoid.rst
@@ -0,0 +1,353 @@
+.. index:: pair_style gran/hooke/history/ellipsoid
+.. index:: pair_style gran/hertz/history/ellipsoid
+
+pair_style gran/hooke/history/ellipsoid command
+===============================================
+
+pair_style gran/hertz/history/ellipsoid command
+===============================================
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   pair_style style Kn Kt gamma_n gamma_t xmu dampflag keyword
+
+* style = *gran/hooke* or *gran/hooke/history* or *gran/hertz/history*
+* Kn = elastic constant for normal particle repulsion (force/distance units or pressure units - see discussion below)
+* Kt = elastic constant for tangential contact (force/distance units or pressure units - see discussion below)
+* gamma_n = damping coefficient for collisions in normal direction (1/time units or 1/time-distance units - see discussion below)
+* gamma_t = damping coefficient for collisions in tangential direction (1/time units or 1/time-distance units - see discussion below)
+* xmu = static yield criterion (unitless value between 0.0 and 1.0e4)
+* dampflag = 0 or 1 if tangential damping force is excluded or included
+
+* keyword = *limit_damping*, *bounding_box*
+
+  .. parsed-literal::
+
+      *limit_damping* value = none
+         limit damping to prevent attractive interaction
+      *bounding_box* value = none
+         use oriented bounding box in contact detection
+
+.. note::
+
+   Versions of LAMMPS before 9Jan09 had different style names for
+   granular force fields.  This is to emphasize the fact that the
+   Hertzian equation has changed to model polydispersity more accurately.
+   A side effect of the change is that the Kn, Kt, gamma_n, and gamma_t
+   coefficients in the pair_style command must be specified with
+   different values in order to reproduce calculations made with earlier
+   versions of LAMMPS, even for monodisperse systems.  See the NOTE below
+   for details.
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   pair_style gran/hooke/history/ellipsoid 200000.0 NULL 50.0 NULL 0.5 1 bounding_box
+   pair_style gran/hooke/history/ellipsoid 200000.0 70000.0 50.0 30.0 0.5 0
+   pair_style gran/hooke/history/ellipsoid 200000.0 70000.0 50.0 30.0 0.5 0 limit_damping
+
+
+Description
+"""""""""""
+
+The *gran/history/ellipsoid* styles allow granular contact
+between two superellipsoid particles whose surface is implicitly defined as:
+
+.. math::
+
+    f(\mathbf{x}) = \left(
+    \left|\frac{x}{a}\right|^{n_2} + \left|\frac{y}{b}\right|^{n_2}
+    \right)^{n_1 / n_2}
+    + \left|\frac{z}{c}\right|^{n_1} - 1 = 0
+
+for a point :math:`\mathbf{x} = (x, y, z)` in *canonical* coordinates.
+The half-diameters :math:`a`, :math:`b`, and :math:`c` correspond to the *shape*
+property, and the exponents :math:`n_1` and :math:`n_2` to the *block* property
+of the ellipsoid atom. See the doc page for the :doc:`set <set>` command for
+more details.
+
+Contact detection for these aspherical particles uses the ''midway''
+minimization approach from :ref:`(Houlsby) <Houlsby>`. Considering two
+particles with shape functions :math:`F_i` and :math:`F_j`,
+the midway point :math:`\mathbf{X}_0` is obtained as:
+
+.. math::
+
+    \mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}}
+                   \ F_i(\mathbf{X}) + F_j(\mathbf{X})
+                   \text{, subject to } F_i(\mathbf{X}) = F_j(\mathbf{X}) 
+
+where the shape function is given by
+:math:`F_i(\mathbf{X}) = f(\mathbf{R}_i^T (\mathbf{X} - \mathbf{X}_i))`
+where :math:`\mathbf{X}_i` and :math:`\mathbf{R}_i` are the center of mass
+and rotation matrix of the particle.
+The constrained minimization problem is solved using Lagrang multipliers and
+Newton's method with a line search as described by :ref:`(Podlozhnyuk) <Podlozhnyuk>`.
+
+.. note::
+
+    This formulation leads to a 4x4 system of non-linear equations.
+    Tikhonov regularization and correction for high root multiplicity is used
+    to ensure robustness of the direct solver and high convergence rate,
+    even for blocky particles with near flat faces.
+
+The particles overlap if both shape functions are negative at the midway point.
+The contact normal is obtained as: :math:`\mathbf{n}_{ij} = \nabla F_i(\mathbf{X}_0) / \| \nabla F_i(\mathbf{X}_0)\|`
+and the overlap :math:`\delta = \|\mathbf{X}_j^{\mathrm{surf}} - \mathbf{X}_i^{\mathrm{surf}}\|`
+is computed as the distance between the points on the
+particles surfaces that are closest to the midway point in the
+direction of the contact normal: :math:`F_i(\mathbf{X}_i^{\mathrm{surf}} = \mathbf{X}_0 + \lambda \mathbf{n}_{ij}) = 0`.
+One-dimensional Newton's method is used to solve this equation for
+:math:`\lambda` and find the surface points.
+
+.. note::
+    TODO: Jacopo: a modified representation of the particle surface is defined
+    to make the function more linear and accelerate convergence.
+
+A hierarchical approach is used to limit the cost of contact detection.
+First, intersection of the bounding spheres of the two particles of bounding
+radii :math:`r_i` and :math:`r_j` is checked. If the distance :math:`d`
+between the particles center is more than the sum of the radii
+:math:`d > r_i + r_j`, the particles do not intersect.
+Then, if the bounding spheres intersect, intersection of the oriented
+bounding box is checked. This is done following the equations of
+:ref:`(Eberly) <GeometricTools>`.
+This check is only performed if the *bounding_box* keyword is used.
+This can be advantageous for grain with high aspect ratio, where the
+bounding sphere encompasses a large empty volume.
+However, for grains with aspect ratio near 1:1,
+the additional bounding box check may be detrimental to performance.
+
+The force calculation is similar to the :doc:`gran/hooke/history <pair_gran>`
+and :doc:`gran/hertz/history <pair_gran>` styles for spherical particles,
+and use the following formulas for the frictional force
+between two granular particles, as described in
+:ref:`(Brilliantov) <Brilliantov>`, :ref:`(Silbert) <Silbert>`, and
+:ref:`(Zhang) <Zhang3>`, 
+
+The Hookean style use this formula:
+
+.. math::
+
+   F_{hk} = (k_n \delta \mathbf{n}_{ij} -
+   m_{eff} \gamma_n\mathbf{ v}_n) -
+   (k_t \boldsymbol{\Delta} \mathbf{s}_t +
+   m_{eff} \gamma_t \mathbf{v}_t)
+
+The Hertzian style uses this formula:
+
+.. note::
+    TODO: Jacopo, Hertz force calculation using the mean curvature
+
+In both equations the first parenthesized term is the normal force
+between the two particles and the second parenthesized term is the
+tangential force.  The normal force has 2 terms, a contact force and a
+damping force.  The tangential force also has 2 terms: a shear force
+and a damping force.  The shear force is a "history" effect that
+accounts for the tangential displacement between the particles for the
+duration of the time they are in contact.  The tangential damping force
+term is included in all three pair styles if *dampflag* is set to 1;
+it is not included if *dampflag* is set to 0.
+
+The other quantities in the equations are as follows:
+
+* :math:`\delta` = overlap distance of 2 particles
+* :math:`K_n` = elastic constant for normal contact
+* :math:`K_t` = elastic constant for tangential contact
+* :math:`\gamma_n` = viscoelastic damping constant for normal contact
+* :math:`\gamma_t` = viscoelastic damping constant for tangential contact
+* :math:`m_{eff} = M_i M_j / (M_i + M_j) =` effective mass of 2 particles of mass M_i and M_j
+* :math:`\boldsymbol{\Delta} \mathbf{s}_t =` tangential displacement vector between 2 particles which is truncated to satisfy a frictional yield criterion
+* :math:`n_{ij} =` unit vector along the line connecting the centers of the 2 particles
+* :math:`V_n =` normal component of the relative velocity of the 2 particles
+* :math:`V_t =` tangential component of the relative velocity of the 2 particles
+
+The :math:`K_n`, :math:`K_t`, :math:`\gamma_n`, and :math:`\gamma_t`
+coefficients are specified as parameters to the pair_style command.  If
+a NULL is used for :math:`K_t`, then a default value is used where
+:math:`K_t = 2/7 K_n`.  If a NULL is used for :math:`\gamma_t`, then a
+default value is used where :math:`\gamma_t = 1/2 \gamma_n`.
+
+The interpretation and units for these 4 coefficients are different in
+the Hookean versus Hertzian equations.
+
+The Hookean model is one where the normal push-back force for two
+overlapping particles is a linear function of the overlap distance.
+Thus the specified :math:`K_n` is in units of (force/distance).  Note
+that this push-back force is independent of absolute particle size (in
+the monodisperse case) and of the relative sizes of the two particles
+(in the polydisperse case).  This model also applies to the other terms
+in the force equation so that the specified :math:`\gamma_n` is in units
+of (1/time), :math:`K_t` is in units of (force/distance), and
+:math:`\gamma_t` is in units of (1/time).
+
+.. note::
+    TODO: Jacopo, modify below.
+
+The Hertzian model is one where the normal push-back force for two
+overlapping particles is proportional to the area of overlap of the
+two particles, and is thus a non-linear function of overlap distance.
+Thus Kn has units of force per area and is thus specified in units of
+(pressure).  The effects of absolute particle size (monodispersity)
+and relative size (polydispersity) are captured in the radii-dependent
+prefactors.  When these prefactors are carried through to the other
+terms in the force equation it means that the specified :math:`\gamma_n` is in
+units of (1/(time\*distance)), :math:`K_t` is in units of (pressure), and
+:math:`\gamma_t` is in units of (1/(time\*distance)).
+
+Note that in the Hookean case, :math:`K_n` can be thought of as a linear
+spring constant with units of force/distance.  In the Hertzian case,
+:math:`K_n` is like a non-linear spring constant with units of
+force/area or pressure, and as shown in the :ref:`(Zhang) <Zhang3>`
+paper, :math:`K_n = 4G / (3(1-\nu))` where :math:`\nu =` the Poisson ratio,
+G = shear modulus = :math:`E / (2(1+\nu))`, and E = Young's modulus.  Similarly,
+:math:`K_t = 4G / (2-\nu)`.  (NOTE: in an earlier version of the manual, we incorrectly
+stated that :math:`K_t = 8G / (2-\nu)`.)
+
+Thus in the Hertzian case :math:`K_n` and :math:`K_t` can be set to
+values that corresponds to properties of the material being modeled.
+This is also true in the Hookean case, except that a spring constant
+must be chosen that is appropriate for the absolute size of particles in
+the model.  Since relative particle sizes are not accounted for, the
+Hookean styles may not be a suitable model for polydisperse systems.
+
+.. note::
+
+   In versions of LAMMPS before 9Jan09, the equation for Hertzian
+   interactions did not include the :math:`\sqrt{r_i r_j / (r_i + r_j)}`
+   term and thus was not as accurate for polydisperse systems.  For
+   monodisperse systems, :math:`\sqrt{ r_i r_j /(r_i+r_j)}` is a
+   constant factor that effectively scales all 4 coefficients:
+   :math:`K_n, K_t, \gamma_n, \gamma_t`.  Thus you can set the values of
+   these 4 coefficients appropriately in the current code to reproduce
+   the results of a previous Hertzian monodisperse calculation.  For
+   example, for the common case of a monodisperse system with particles
+   of diameter 1, all 4 of these coefficients should now be set 2x
+   larger than they were previously.
+
+Xmu is also specified in the pair_style command and is the upper limit
+of the tangential force through the Coulomb criterion Ft = xmu\*Fn,
+where Ft and Fn are the total tangential and normal force components
+in the formulas above.  Thus in the Hookean case, the tangential force
+between 2 particles grows according to a tangential spring and
+dash-pot model until Ft/Fn = xmu and is then held at Ft = Fn\*xmu until
+the particles lose contact.  In the Hertzian case, a similar analogy
+holds, though the spring is no longer linear.
+
+.. note::
+
+   Normally, xmu should be specified as a fractional value between
+   0.0 and 1.0, however LAMMPS allows large values (up to 1.0e4) to allow
+   for modeling of systems which can sustain very large tangential
+   forces.
+
+The effective mass *m_eff* is given by the formula above for two
+isolated particles.  If either particle is part of a rigid body, its
+mass is replaced by the mass of the rigid body in the formula above.
+This is determined by searching for a :doc:`fix rigid <fix_rigid>`
+command (or its variants).
+
+For granular styles there are no additional coefficients to set for
+each pair of atom types via the :doc:`pair_coeff <pair_coeff>` command.
+All settings are global and are made via the pair_style command.
+However you must still use the :doc:`pair_coeff <pair_coeff>` for all
+pairs of granular atom types.  For example the command
+
+.. code-block:: LAMMPS
+
+   pair_coeff * *
+
+should be used if all atoms in the simulation interact via a granular
+potential (i.e. one of the pair styles above is used).  If a granular
+potential is used as a sub-style of :doc:`pair_style hybrid <pair_hybrid>`, then specific atom types can be used in the
+pair_coeff command to determine which atoms interact via a granular
+potential.
+
+If two particles are moving away from each other while in contact, there
+is a possibility that the particles could experience an effective attractive
+force due to damping. If the *limit_damping* keyword is used, this option
+will zero out the normal component of the force if there is an effective
+attractive force.
+
+
+Mixing, shift, table, tail correction, restart, rRESPA info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+The :doc:`pair_modify <pair_modify>` mix, shift, table, and tail options
+are not relevant for granular pair styles.
+
+These pair styles write their information to :doc:`binary restart files <restart>`, so a pair_style command does not need to be
+specified in an input script that reads a restart file.
+
+These pair styles can only be used via the *pair* keyword of the
+:doc:`run_style respa <run_style>` command.  They do not support the
+*inner*, *middle*, *outer* keywords.
+
+The single() function of these pair styles returns 0.0 for the energy
+of a pairwise interaction, since energy is not conserved in these
+dissipative potentials.  It also returns only the normal component of
+the pairwise interaction force.  However, the single() function also
+calculates 10 extra pairwise quantities.  The first 3 are the
+components of the tangential force between particles I and J, acting
+on particle I.  The fourth is the magnitude of this tangential force.
+The next 3 (5-7) are the components of the relative velocity in the
+normal direction (along the contact normal :math:`\mathbf{n}_{ij}`).  The
+last 3 (8-10) the components of the relative velocity in the
+tangential direction.
+
+These extra quantities can be accessed by the :doc:`compute pair/local <compute_pair_local>` command, as *p1*, *p2*, ...,
+*p10*\ .
+
+----------
+
+Restrictions
+""""""""""""
+
+All the granular pair styles are part of the GRANULAR package.  It is
+only enabled if LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+
+These pair styles require the *ellipsoid* atom style, which is part of the ASPHERE package. It is
+only enabled if LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+
+This pair style requires you to use the :doc:`comm_modify vel yes <comm_modify>` command so that velocities are stored by ghost
+atoms.
+
+These pair styles will not restart exactly when using the
+:doc:`read_restart <read_restart>` command, though they should provide
+statistically similar results.  This is because the forces they
+compute depend on atom velocities.  See the
+:doc:`read_restart <read_restart>` command for more details.
+
+Accumulated values for individual contacts are saved to to restart
+files but are not saved to data files. Therefore, forces may
+differ significantly when a system is reloaded using A
+:doc:`read_data <read_data>` command.
+
+Related commands
+""""""""""""""""
+
+:doc:`pair_coeff <pair_coeff>`
+
+Default
+"""""""
+
+none
+
+----------
+
+.. _Podlozhnyuk:
+
+**(Podlozhnyuk)** Podlozhnyuk, Pirker, Kloss, Comp. Part. Mech., 4:101-118 (2017).
+
+.. _Houlsby:
+
+**(Houlsby)** Houlsby, Computers and Geotechnics, 36, 953-959 (2009).
+
+.. _GeometricTools:
+
+**(Eberly)** Eberly, Geometric Tools: Dynamic Collision Detection Using Oriented Bounding Boxes (2008).
diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst
index 4cbe1c7d965..aaccc64dcd0 100644
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@@ -202,8 +202,10 @@ accelerated styles exist.
 * :doc:`gayberne <pair_gayberne>` - Gay-Berne ellipsoidal potential
 * :doc:`granular <pair_granular>` - Generalized granular potential
 * :doc:`gran/hertz/history <pair_gran>` - granular potential with Hertzian interactions
-* :doc:`gran/hooke <pair_gran>` - granular potential with history effects
-* :doc:`gran/hooke/history <pair_gran>` - granular potential without history effects
+* :doc:`gran/hertz/history/ellipsoid <pair_gran_ellipsoid>` - granular potential with Hertzian interactions for superellipsoids
+* :doc:`gran/hooke <pair_gran>` - granular potential without history effects
+* :doc:`gran/hooke/history <pair_gran>` - granular potential with history effects
+* :doc:`gran/hooke/history/ellipsoid <pair_gran_ellipsoid>` - granular potential for superellipsoids with history effects
 * :doc:`gw <pair_gw>` - Gao-Weber potential
 * :doc:`gw/zbl <pair_gw>` - Gao-Weber potential with a repulsive ZBL core
 * :doc:`harmonic/cut <pair_harmonic_cut>` - repulsive-only harmonic potential

From 7f31acaa7d26b8f063788ff2e775e6f8636db25f Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Wed, 31 Dec 2025 11:21:12 -0600
Subject: [PATCH 074/174] remove atom2radii and atom2cut method following
 dccbfd46a1856f756fe36fe5ee74fc923177a843

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 20 -------------------
 .../pair_gran_hooke_history_ellipsoid.h       |  2 --
 2 files changed, 22 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 2efabc421a7..a4c438ba586 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -956,23 +956,3 @@ double PairGranHookeHistoryEllipsoid::memory_usage()
   double bytes = (double) nmax * sizeof(double);
   return bytes;
 }
-
-/* ----------------------------------------------------------------------
-   self-interaction range of particle
-------------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::atom2cut(int i)
-{
-  double cut = atom->radius[i] * 2;
-  return cut;
-}
-
-/* ----------------------------------------------------------------------
-   maximum interaction range for two finite particles
-------------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::radii2cut(double r1, double r2)
-{
-  double cut = r1 + r2;
-  return cut;
-}
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index da2940d617b..22e36d6c593 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -42,8 +42,6 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   int pack_forward_comm(int, int *, double *, int, int *) override;
   void unpack_forward_comm(int, int, double *) override;
   double memory_usage() override;
-  double atom2cut(int) override;
-  double radii2cut(double, double) override;
 
  protected:
   double kn, kt, gamman, gammat, xmu;

From 0942d11792c55d14c6d3ee0fc12498286d96d00c Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Wed, 31 Dec 2025 14:20:06 -0600
Subject: [PATCH 075/174] make bounding box check optional. refactor contact
 detection accordingly

---
 doc/src/pair_gran_ellipsoid.rst               |  47 +++--
 .../ASPHERE/superellipsoid_gran/in.bowling    |   2 +-
 src/ASPHERE/math_extra_superellipsoids.cpp    |   2 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     | 180 +++++++++---------
 .../pair_gran_hooke_history_ellipsoid.h       |   1 +
 5 files changed, 122 insertions(+), 110 deletions(-)

diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
index 7b7a292e880..57979874ac1 100644
--- a/doc/src/pair_gran_ellipsoid.rst
+++ b/doc/src/pair_gran_ellipsoid.rst
@@ -14,7 +14,7 @@ Syntax
 
    pair_style style Kn Kt gamma_n gamma_t xmu dampflag keyword
 
-* style = *gran/hooke* or *gran/hooke/history* or *gran/hertz/history*
+* style = *gran/hooke/history/ellipsoid* or *gran/hertz/history/ellipsoid*
 * Kn = elastic constant for normal particle repulsion (force/distance units or pressure units - see discussion below)
 * Kt = elastic constant for tangential contact (force/distance units or pressure units - see discussion below)
 * gamma_n = damping coefficient for collisions in normal direction (1/time units or 1/time-distance units - see discussion below)
@@ -65,16 +65,17 @@ between two superellipsoid particles whose surface is implicitly defined as:
     \right)^{n_1 / n_2}
     + \left|\frac{z}{c}\right|^{n_1} - 1 = 0
 
-for a point :math:`\mathbf{x} = (x, y, z)` in *canonical* coordinates.
+for a point :math:`\mathbf{x} = (x, y, z)` where the coordinates are given
+in the principal directions of inertia of the particle.
 The half-diameters :math:`a`, :math:`b`, and :math:`c` correspond to the *shape*
 property, and the exponents :math:`n_1` and :math:`n_2` to the *block* property
 of the ellipsoid atom. See the doc page for the :doc:`set <set>` command for
 more details.
 
-Contact detection for these aspherical particles uses the ''midway''
+Contact detection for these aspherical particles uses the so-called ''midway''
 minimization approach from :ref:`(Houlsby) <Houlsby>`. Considering two
 particles with shape functions :math:`F_i` and :math:`F_j`,
-the midway point :math:`\mathbf{X}_0` is obtained as:
+the contact point :math:`\mathbf{X}_0` is obtained as:
 
 .. math::
 
@@ -84,11 +85,20 @@ the midway point :math:`\mathbf{X}_0` is obtained as:
 
 where the shape function is given by
 :math:`F_i(\mathbf{X}) = f(\mathbf{R}_i^T (\mathbf{X} - \mathbf{X}_i))`
-where :math:`\mathbf{X}_i` and :math:`\mathbf{R}_i` are the center of mass
-and rotation matrix of the particle.
+and where :math:`\mathbf{X}_i` and :math:`\mathbf{R}_i` are the center of mass
+and rotation matrix of the particle, respectively.
 The constrained minimization problem is solved using Lagrang multipliers and
 Newton's method with a line search as described by :ref:`(Podlozhnyuk) <Podlozhnyuk>`.
 
+.. note::
+
+    The shape function :math:`F` is not a signed distance function and
+    does not have unit gradient :math:`\|\nabla F \| \neq 1` so that the
+    so-called ''midway'' point is not actually located at an equal distance from the
+    surface of both particles.
+    For contact between non-identical particles, the contact point tends to
+    be closer to the surface of the smaller and blockier particle.
+
 .. note::
 
     This formulation leads to a 4x4 system of non-linear equations.
@@ -96,14 +106,16 @@ Newton's method with a line search as described by :ref:`(Podlozhnyuk) <Podlozhn
     to ensure robustness of the direct solver and high convergence rate,
     even for blocky particles with near flat faces.
 
-The particles overlap if both shape functions are negative at the midway point.
-The contact normal is obtained as: :math:`\mathbf{n}_{ij} = \nabla F_i(\mathbf{X}_0) / \| \nabla F_i(\mathbf{X}_0)\|`
+The particles overlap if both shape functions are negative at the contact point.
+The contact normal is obtained as: :math:`\mathbf{n}_{ij} = \nabla F_i(\mathbf{X}_0) / \| \nabla F_i(\mathbf{X}_0)\| = - \nabla F_j(\mathbf{X}_0) / \| \nabla F_j(\mathbf{X}_0)\|`
 and the overlap :math:`\delta = \|\mathbf{X}_j^{\mathrm{surf}} - \mathbf{X}_i^{\mathrm{surf}}\|`
 is computed as the distance between the points on the
-particles surfaces that are closest to the midway point in the
-direction of the contact normal: :math:`F_i(\mathbf{X}_i^{\mathrm{surf}} = \mathbf{X}_0 + \lambda \mathbf{n}_{ij}) = 0`.
-One-dimensional Newton's method is used to solve this equation for
-:math:`\lambda` and find the surface points.
+particles surfaces that are closest to the contact point in the
+direction of the contact normal: :math:`F_i(\mathbf{X}_i^{\mathrm{surf}} = \mathbf{X}_0 + \lambda_i \mathbf{n}_{ij}) = 0`
+and :math:`F_j(\mathbf{X}_j^{\mathrm{surf}} = \mathbf{X}_0 + \lambda_j \mathbf{n}_{ij}) = 0`.
+Newton's method is used to solve this equation for the scalars
+:math:`\lambda_i` and :math:`\lambda_j` and find the surface points
+:math:`\mathbf{X}_i^{\mathrm{surf}}` and :math:`\mathbf{X}_j^{\mathrm{surf}}`.
 
 .. note::
     TODO: Jacopo: a modified representation of the particle surface is defined
@@ -111,15 +123,18 @@ One-dimensional Newton's method is used to solve this equation for
 
 A hierarchical approach is used to limit the cost of contact detection.
 First, intersection of the bounding spheres of the two particles of bounding
-radii :math:`r_i` and :math:`r_j` is checked. If the distance :math:`d`
+radii :math:`r_i` and :math:`r_j` is checked. If the distance
 between the particles center is more than the sum of the radii
-:math:`d > r_i + r_j`, the particles do not intersect.
+:math:`\|\mathbf{X}_j - \mathbf{X}_j\| > r_i + r_j`, the particles do not intersect.
 Then, if the bounding spheres intersect, intersection of the oriented
 bounding box is checked. This is done following the equations of
 :ref:`(Eberly) <GeometricTools>`.
 This check is only performed if the *bounding_box* keyword is used.
-This can be advantageous for grain with high aspect ratio, where the
-bounding sphere encompasses a large empty volume.
+This can be advantageous for particles with high aspect ratio, where the
+bounding sphere encompasses a large empty volume, and minimization
+would otherwise be performed for particles that might be separated by
+large distances, and using a poor initial guess that might cause
+poor convergence of Newton's method.
 However, for grains with aspect ratio near 1:1,
 the additional bounding box check may be detrimental to performance.
 
diff --git a/examples/ASPHERE/superellipsoid_gran/in.bowling b/examples/ASPHERE/superellipsoid_gran/in.bowling
index 4cbb88512cc..516858147ed 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.bowling
+++ b/examples/ASPHERE/superellipsoid_gran/in.bowling
@@ -37,7 +37,7 @@ set             type 2 block 2.0 2.0
 set             type 2 mass 10.0
 group ball type 2
 
-pair_style      gran/hooke/history/ellipsoid 1e3 5e2 0.0 0.0 0.5 0
+pair_style      gran/hooke/history/ellipsoid 1e3 5e2 0.0 0.0 0.5 0 bounding_box
 pair_coeff      * *
 
 compute diameter all property/atom shapex shapey shapez
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 64a9378c87c..1a6755cc480 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -605,7 +605,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       } else if (norm > norm_old - PARAMETER_LS * a * norm_old) { // Armijo - Goldstein condition not met
         // Tested after convergence check because tiny values of norm and norm_old < TOL_NR
         // Can still fail the Armijo - Goldstein condition`
-        a *= CUTBACK_LS;
+        a *= CUTBACK_LS; // TODO: Golden-section search? Simple cutback strategy is crude and might miss low residual loci along the line search
       } else {
         // Only compute the jacobian if there is another Newton iteration to come
         double tmp_m[3][3];
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index a4c438ba586..4521cf8566b 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -47,7 +47,7 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
   centroidstressflag = CENTROID_NOTAVAIL;
   finitecutflag = 1;
   use_history = 1;
-  size_history = 8;  // shear[3], contact_point_and_Lagrange_multiplier[4], separating_axis_index
+  size_history = 8;  // shear[3], contact_point_and_Lagrange_multiplier[4], bounding_box_separating_axis_index
 
   single_extra = 10;
   svector = new double[10];
@@ -205,102 +205,93 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
       X0_prev = &allhistory[3 + size_history * jj];
 
-      bool touching = true;
+      // TODO: Below could be a `touch()` function
+      bool touching;
       if (rsq >= radsum * radsum) {
         touching = false;
-      }
-      else {    
-        separating_axis = &allhistory[7 + size_history * jj];
-         // compute aspect ratios, if they are not that different from zero skip
-         // to the newton rapson, else do the bounding box
-         MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-         MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-         MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-         MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-
-         double min_dim_i = std::fmin(shapei[0], std::fmin(shapei[1], shapei[2]));
-         double min_dim_j = std::fmin(shapej[0], std::fmin(shapej[1], shapej[2]));
-         double max_dim_i = std::fmax(shapei[0], std::fmax(shapei[1], shapei[2]));
-         double max_dim_j = std::fmax(shapej[0], std::fmax(shapej[1], shapej[2]));
-
-         double ar_i = max_dim_i / min_dim_i; 
-         double ar_j = max_dim_j / min_dim_j;
-
-         // I put an arbitrary value for when to skip the bounding boxes
-         // this might need testing
-         bool high_aspect_ratio = (ar_i > 1.5 || ar_j > 1.5); 
-         MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-         MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-
-         if (high_aspect_ratio){
-          // check the bounding box
-          bool obb_separate = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-            x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
-          
-          if (obb_separate) {
-            touching = false;
-          }
+      } else {
+        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+        bool skip_contact_detection(false);
+        if(bounding_box) {
+          separating_axis = &allhistory[7 + size_history * jj];
+          skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+                                       x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
         }
-      }
-
-      flagi = MathExtraSuperellipsoids::determine_flag(blocki);
-      flagj = MathExtraSuperellipsoids::determine_flag(blockj);
-      // Super-ellipsoid contact detection between atoms i and j
-      if (touch[jj] == 1  && touching) {
-        // Continued contact: use grain true shape and last contact point
-        // TODO: implement neigh history!
-        // TODO: move contact point with rigid body motion of the pair ?
-        //       not sure if enough information to do that
-        X0[0] = X0_prev[0];
-        X0[1] = X0_prev[1];
-        X0[2] = X0_prev[2];
-        X0[3] = X0_prev[3];
-        int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij);
-        if (status == 0)
-          touching = true;
-        else if(status == 1)
+        if (skip_contact_detection)
           touching = false;
-        else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged vs fail the run ?
-          error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
-      } else {
-        if (touching){
-        // New contact: Build initial guess incrementally
-        // TODO: there might be better heuristic for the "volume equivalent spheres" suggested in the paper
-        //       but this is good enough. We might even be able to use radi and radj which is cheaper, TBD when testing
-        //       If we pick a small radius, we could guaranteed to start outise the grains, would that be better for the Newton?
-        //       If we pick a large radius (e.g. radi, radj) we are more likely to start inside the grains, is this an easier minimization landscape to navigate?
-        //       I don't think there is a general answer because we don't know the shape, and contact point may be far from spherical initial guess
-        //       This makes me think using radi and radj could be fine! To be investigated
-        double reqi = std::cbrt(bonus[ellipsoid[i]].shape[0] * bonus[ellipsoid[i]].shape[1] * bonus[ellipsoid[i]].shape[2]);
-        double reqj = std::cbrt(bonus[ellipsoid[j]].shape[0] * bonus[ellipsoid[j]].shape[1] * bonus[ellipsoid[j]].shape[2]);
-        MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-        //   MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-        for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
-          X0[3] = reqj / reqi; // Lagrange multiplier mu^2
-          double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-          shapei[0] = shapei[1] = shapei[2] = reqi;
-          shapej[0] = shapej[1] = shapej[2] = reqj;
-          MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-          MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-          if (bonus[ellipsoid[i]].flag_super) { // not a big time save
-            blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-            blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-          }
-          if (bonus[ellipsoid[j]].flag_super) {
-            blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-            blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+        else {
+          // Super-ellipsoid contact detection between atoms i and j
+          flagi = MathExtraSuperellipsoids::determine_flag(blocki);
+          flagj = MathExtraSuperellipsoids::determine_flag(blockj);
+          if (touch[jj] == 1) {
+            // Continued contact: use grain true shape and last contact point
+            // TODO: implement neigh history!
+            // TODO: move contact point with rigid body motion of the pair ?
+            //       not sure if enough information to do that
+            X0[0] = X0_prev[0];
+            X0[1] = X0_prev[1];
+            X0[2] = X0_prev[2];
+            X0[3] = X0_prev[3];
+            int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
+                                                                           x[j], Rj, shapej, blockj, flagj,
+                                                                           X0, nij);
+            if (status == 0)
+              touching = true;
+            else if(status == 1)
+              touching = false;
+            else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
+              error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+          } else {
+            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
+
+            // TODO: there might be better heuristic for the "volume equivalent spheres" suggested in the paper
+            //       but this is good enough. We might even be able to use radi and radj which is cheaper, TBD when testing
+            //       If we pick a small radius, we could guaranteed to start outise the grains, would that be better for the Newton?
+            //       If we pick a large radius (e.g. radi, radj) we are more likely to start inside the grains, is this an easier minimization landscape to navigate?
+            //       I don't think there is a general answer because we don't know the shape, and contact point may be far from spherical initial guess
+            //       This makes me think using radi and radj could be fine! To be investigated
+            //       MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+
+            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+            X0[3] = reqj / reqi; // Lagrange multiplier mu^2
+            for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+              shapei[0] = shapei[1] = shapei[2] = reqi;
+              shapej[0] = shapej[1] = shapej[2] = reqj;
+              MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+              MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+              if (bonus[ellipsoid[i]].flag_super) { // not a big time save
+                blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+                blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+              }
+              if (bonus[ellipsoid[j]].flag_super) {
+                blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+                blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+              }
+              // force ellipsoid flag for first initial guess iteration.
+              // Avoid incorrect values of n1/n2 -1 in derivatives.
+              int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? 0 : flagi,
+                                                                             x[j], Rj, shapej, blockj, iter_ig == 1 ? 0 : flagj,
+                                                                             X0, nij);
+              if (status == 0)
+                touching = true;
+              else if(status == 1)
+                touching = false;
+              else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
+                error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+            }
           }
-          int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij);
-          if (status == 0)
-            touching = true;
-          else if(status == 1)
-            touching = false;
-          else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged vs fail the run ?
-            error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
-        }
         }
       }
 
+
       if (!touching) {
         // unset non-touching neighbors
 
@@ -516,7 +507,7 @@ void PairGranHookeHistoryEllipsoid::allocate()
 
 void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
 {
-  if (narg != 6 && narg != 7) error->all(FLERR, "Illegal pair_style command");
+  if (narg != 6 && narg != 7 && narg != 8) error->all(FLERR, "Illegal pair_style command");
 
   kn = utils::numeric(FLERR, arg[0], false, lmp);
   if (strcmp(arg[1], "NULL") == 0)
@@ -535,13 +526,18 @@ void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
   if (dampflag == 0) gammat = 0.0;
 
   limit_damping = 0;
-  if (narg == 7) {
-    if (strcmp(arg[6], "limit_damping") == 0)
+  bounding_box = 0;
+  for (int iarg = 6 ; iarg < narg ; iarg++) {
+    if (strcmp(arg[iarg], "limit_damping") == 0)
       limit_damping = 1;
+    else if (strcmp(arg[iarg], "bounding_box") == 0)
+      bounding_box = 1;
     else
       error->all(FLERR, "Illegal pair_style command");
   }
 
+  if (bounding_box == 0) size_history--;
+
   if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
       dampflag < 0 || dampflag > 1)
     error->all(FLERR, "Illegal pair_style command");
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 22e36d6c593..d7aa150374b 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -50,6 +50,7 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   int freeze_group_bit;
   int use_history;
   int limit_damping;
+  int bounding_box;
 
   int neighprev;
   double *onerad_dynamic, *onerad_frozen;

From 32dea6d858f2e6fd1fdbffb7b87ffe15a4f54dfc Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 2 Jan 2026 12:04:52 -0600
Subject: [PATCH 076/174] don't store n1/n2 in block

---
 src/atom_vec_ellipsoid.cpp | 6 ------
 src/atom_vec_ellipsoid.h   | 2 +-
 src/math_extra.cpp         | 2 +-
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index bfcf1c3f50a..33ad1295d95 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -259,7 +259,6 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       quat[3] = buf[m++];
       block[0] = buf[m++];
       block[1] = buf[m++];
-      block[2] = block[0] / block[1];
       inertia[0] = buf[m++];
       inertia[1] = buf[m++];
       inertia[2] = buf[m++];
@@ -333,7 +332,6 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     quat[3] = buf[m++];
     block[0] = buf[m++];
     block[1] = buf[m++];
-    block[2] = block[0] / block[1];
     inertia[0] = buf[m++];
     inertia[1] = buf[m++];
     inertia[2] = buf[m++];
@@ -425,7 +423,6 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     quat[3] = buf[m++];
     block[0] = buf[m++];
     block[1] = buf[m++];
-    block[2] = block[0] / block[1];
     inertia[0] = buf[m++];
     inertia[1] = buf[m++];
     inertia[2] = buf[m++];
@@ -475,7 +472,6 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
     block[1] = utils::numeric(FLERR, values[ivalue++], true, lmp);
     flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
   }
-  block[2] = block[0] / block[1];
 
   // reset ellipsoid mass
   // previously stored density in rmass
@@ -765,7 +761,6 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     shape[2] = 0.5;
     block[0] = blockn1;
     block[1] = blockn2;
-    block[2] = blockn1 / blockn2;
     quat[0] = 1.0;
     quat[1] = 0.0;
     quat[2] = 0.0;
@@ -782,7 +777,6 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     bool &flag_super = bonus[ellipsoid[i]].flag_super;
     block[0] = blockn1;
     block[1] = blockn2;
-    block[2] = blockn1 / blockn2;
     flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
     MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
     radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index f8fa1342cbc..96f8c8d2412 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -29,7 +29,7 @@ class AtomVecEllipsoid : virtual public AtomVec {
   struct Bonus {
     double shape[3];
     double quat[4];
-    double block[3];
+    double block[2];
     double inertia[3];
     bool flag_super;
     int ilocal;
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 34fb5d74d60..629da49b0bf 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -677,7 +677,7 @@ double radius_ellipsoid(double *shape, double *block, bool flag_super)
 
   // Super ellipsoid
   double a = shape[0], b = shape[1], c = shape[2];
-  double n1 = block[0], n2 = block[1], n1divn2 = block[2];
+  double n1 = block[0], n2 = block[1];
   if (shape[0] < shape[1]) {a = shape[1]; b = shape[0];}
 
   // Cylinder approximation for n2=2

From 040c1ea91990d0e6117321ab5927e8de1600c346 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 2 Jan 2026 13:40:05 -0600
Subject: [PATCH 077/174] remove Cephes library version of beta function from
 MathSpecial and use std::beta since LAMMPS compatible with C++17 since
 10Sep2025

---
 src/ASPHERE/math_extra_superellipsoids.cpp |  27 ++--
 src/ASPHERE/math_extra_superellipsoids.h   |   3 +-
 src/atom_vec_ellipsoid.cpp                 |   1 -
 src/math_extra.cpp                         |  14 +--
 src/math_special.cpp                       | 136 ---------------------
 src/math_special.h                         |  17 ---
 6 files changed, 15 insertions(+), 183 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 1a6755cc480..92017f1d036 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -46,27 +46,14 @@ static constexpr double TOL_OVERLAP = 1e-8;
 static constexpr unsigned int ITERMAX_OVERLAP = 20;
 static constexpr double MINSLOPE_OVERLAP = 1e-12;
 
-/* ----------------------------------------------------------------------
-   beta function B(x,y) = Gamma(x) * Gamma(y) / Gamma(x+y)
-------------------------------------------------------------------------- */
-double beta_func(double a, double b) {
-    return exp(lgamma(a) + lgamma(b) - lgamma(a + b));
-}
-// TODO: the reason why I had codded the beta function from scratch is because LAMMPS must be guaranteed to work with some older standard of C++
-//       I don't remember which one exactly (C++14 I think) but this standard does not have gamma() of beta() in the <cmath> implementation
-//       TBD if the code above will be accepted or if we need to fall back to the implementation of beta I copied from Cephes
-/* ----------------------------------------------------------------------
-   Volume of superellipsoid
-   source https://cse.buffalo.edu/~jryde/cse673/files/superquadrics.pdf
-------------------------------------------------------------------------- */
 
 void volume_superellipsoid(const double *blockiness, const double *shape, double volume)
 {
   const double eps1 = 2.0 / blockiness[0]; // shape exponent in latitude direction
   const double eps2 = 2.0 / blockiness[1]; // shape exponent in longitude direction
   volume = 2.0*shape[0]*shape[1]*shape[2]*eps1*eps2*
-      beta_func(0.5*eps1, eps1 + 1.0)*
-      beta_func(0.5*eps2, 0.5*eps2 + 1.0);
+      std::beta(0.5*eps1, eps1 + 1.0)*
+      std::beta(0.5*eps2, 0.5*eps2 + 1.0);
 }
 
 /* ----------------------------------------------------------------------
@@ -83,12 +70,12 @@ void inertia_superellipsoid(const double *shape, const double *blockiness, doubl
   const double a1 = shape[0];
   const double a2 = shape[1];
   const double a3 = shape[2];
-  const double I_xx = 0.5*a1*a2*a3*eps1*eps2*(a2*a2*beta_func(1.5*eps2, 0.5*eps2)*beta_func(0.5*eps1, 2.0*eps1+1.0)+
-      4.0*a3*a3*beta_func(0.5*eps2, 0.5*eps2+1.0)*beta_func(1.5*eps1, eps1+1.0)) * density;
-  const double I_yy = 0.5*a1*a2*a3*eps1*eps2*(a1*a1*beta_func(1.5*eps2, 0.5*eps2)*beta_func(0.5*eps1, 2.0*eps1+1.0)+
-      4.0*a3*a3*beta_func(0.5*eps2, 0.5*eps2+1.0)*beta_func(1.5*eps1, eps1+1.0)) * density;
+  const double I_xx = 0.5*a1*a2*a3*eps1*eps2*(a2*a2*std::beta(1.5*eps2, 0.5*eps2)*std::beta(0.5*eps1, 2.0*eps1+1.0)+
+      4.0*a3*a3*std::beta(0.5*eps2, 0.5*eps2+1.0)*std::beta(1.5*eps1, eps1+1.0)) * density;
+  const double I_yy = 0.5*a1*a2*a3*eps1*eps2*(a1*a1*std::beta(1.5*eps2, 0.5*eps2)*std::beta(0.5*eps1, 2.0*eps1+1.0)+
+      4.0*a3*a3*std::beta(0.5*eps2, 0.5*eps2+1.0)*std::beta(1.5*eps1, eps1+1.0)) * density;
   const double I_zz = 0.5*a1*a2*a3*eps1*eps2*(a1*a1 + a2*a2)*
-      beta_func(1.5*eps2, 0.5*eps2)*beta_func(0.5*eps1, 2.0*eps1+1.0) * density;
+      std::beta(1.5*eps2, 0.5*eps2)*std::beta(0.5*eps1, 2.0*eps1+1.0) * density;
 
   inertia[0] = I_xx;
   inertia[1] = I_yy;
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 038a7770de7..640398031f6 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -22,8 +22,7 @@
 #include "math_extra.h"
 
 namespace MathExtraSuperellipsoids {
-  inline constexpr double TIKHONOV_SCALE = 1e-14; // TODO: I don't think we should declare those in the header or they will appear whenever the header is included. that means we should move the definition of the manual solver into the .cpp file
-  double beta_func(double a, double b);
+  inline constexpr double TIKHONOV_SCALE = 1e-14; // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
   void volume_superellipsoid(const double *blockiness, const double *shape, double volume); // duplicated from math_extra might remove
   void inertia_superellipsoid(const double *shape, const double *blockiness, double density, double *inertia); // duplicated from math_extra might remove
 
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 33ad1295d95..2eedaa88cf1 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -23,7 +23,6 @@
 #include "fix.h"
 #include "math_const.h"
 #include "math_extra.h"
-#include "math_special.h"
 #include "memory.h"
 #include "modify.h"
 
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 629da49b0bf..8cbde1337aa 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -492,13 +492,13 @@ void inertia_ellipsoid_principal(double *shape, double mass, double *idiag,
   if (flag_super) {
     // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    double beta_tmp1 = MathSpecial::beta(0.5 * e1, 1 + 2 * e1);
-    double beta_tmp2 = MathSpecial::beta(0.5 * e2, 0.5 * e2);
-    double beta_tmp3 = MathSpecial::beta(0.5 * e2, 1.5 * e2);
-    double dens = mass / (MathSpecial::beta(0.5 * e1, 1.0 + e1) * beta_tmp2);
+    double beta_tmp1 = std::beta(0.5 * e1, 1 + 2 * e1);
+    double beta_tmp2 = std::beta(0.5 * e2, 0.5 * e2);
+    double beta_tmp3 = std::beta(0.5 * e2, 1.5 * e2);
+    double dens = mass / (std::beta(0.5 * e1, 1.0 + e1) * beta_tmp2);
     double m0 = 0.5 * rsq0 * beta_tmp1 * beta_tmp3;
     double m1 = 0.5 * rsq1 * beta_tmp1 * beta_tmp3;
-    double m2 = rsq2 * MathSpecial::beta(1.5 * e1, 1 + e1) * beta_tmp2;
+    double m2 = rsq2 * std::beta(1.5 * e1, 1 + e1) * beta_tmp2;
     idiag[0] = dens * (m1 + m2);
     idiag[1] = dens * (m0 + m2);
     idiag[2] = dens * (m0 + m1);
@@ -657,8 +657,8 @@ double volume_ellipsoid(double *shape, double *block, bool flag_super)
 
   if (flag_super) {
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    unitvol = e1 * e2 * MathSpecial::beta(0.5 * e1, 1.0 + e1) *
-                        MathSpecial::beta(0.5 * e2, 0.5 * e2);
+    unitvol = e1 * e2 * std::beta(0.5 * e1, 1.0 + e1) *
+                        std::beta(0.5 * e2, 0.5 * e2);
   }
   return unitvol * shape[0] * shape[1] * shape[2];
 }
diff --git a/src/math_special.cpp b/src/math_special.cpp
index e56b81e65ed..3bc903a742d 100644
--- a/src/math_special.cpp
+++ b/src/math_special.cpp
@@ -669,142 +669,6 @@ double MathSpecial::erfcx_y100(const double y100)
 } /* erfcx_y100 */
 
 
-/* Library cephes:
- *    Some software in this archive may be from the book _Methods and
- * Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
- * International, 1989) or from the Cephes Mathematical Library, a
- * commercial product. In either event, it is copyrighted by the author.
- * What you see here may be used freely but it comes with no support or
- * guarantee.
- *
- *    The two known misprints in the book are repaired here in the
- * source listings for the gamma function and the incomplete beta
- * integral.
- *
- *    Stephen L. Moshier
- *    moshier@na-net.ornl.gov
- *
- * File beta.c:
- *   Beta function
- *
- *
- *
- *   SYNOPSIS:
- *
- *   double a, b, y, beta();
- *
- *   y = beta( a, b );
- *
- *
- *
- *   DESCRIPTION:
- *
- *                     -     -
- *                    | (a) | (b)
- *   beta( a, b )  =  -----------.
- *                       -
- *                      | (a+b)
- *
- *   For large arguments the logarithm of the function is
- *   evaluated using lgam(), then exponentiated.
- *
- *
- *
- *   ACCURACY:
- *
- *                        Relative error:
- *   arithmetic   domain     # trials      peak         rms
- *      DEC        0,30        1700       7.7e-15     1.5e-15
- *      IEEE       0,30       30000       8.1e-14     1.1e-14
- *
- *   ERROR MESSAGES:
- *
- *     message         condition          value returned
- *     beta overflow    log(beta) > MAXLOG       0.0
- *                      a or b <0 integer        0.0
- *
- * Copyright:
- *   Cephes Math Library Release 2.0:  April, 1987
- *   Copyright 1984, 1987 by Stephen L. Moshier
- *   Direct inquiries to 30 Frost Street, Cambridge, MA 02140
- *
- * Licence:
- *
- * Authors:
- *   Stephen L. Moshier, 1987, core author
- *
- * Website:
- *   https://www.netlib.org/cephes/
- *   https://github.com/scipy/scipy/blob/main/scipy/special/cephes/beta.c
- *   (the Scipy implementation contains useful updates adopted here)
- *
- */
-
-static constexpr double MAXGAM = 171.624376956302725;
-static constexpr double ASYMP_FACTOR = 1e6;
-static constexpr double MAXLOG = 7.09782712893383996732E2;  /* log(DBL_MAX) */
-
-/* ----------------------------------------------------------------------
-   beta function
-------------------------------------------------------------------------- */
-
-double MathSpecial::beta(double a, double b)
-{
-  // Inputs assumed strictly positive, not checked
-  // Simplifies the original cephes code for our purposes
-  double y;
-
-  if (a < b) {
-    y = a; a = b; b = y;
-  }
-
-  /*
-   * Asymptotic expansion for  ln(|B(a, b)|) for a > ASYMP_FACTOR*max(|b|, 1).
-   */
-  auto lbeta_asymp = [](double aa, double bb) {
-    double r = std::lgamma(bb);
-    r -= bb * std::log(aa);
-
-    r += bb*(1-bb)/(2*aa);
-    r += bb*(1-bb)*(1-2*bb)/(12*aa*aa);
-    r += - bb*bb*(1-bb)*(1-bb)/(12*aa*aa*aa);
-
-    return r;
-  };
-
-  if (a > ASYMP_FACTOR * b && a > ASYMP_FACTOR) {
-    /* Avoid loss of precision in lgam(a + b) - lgam(a) */
-    y = lbeta_asymp(a, b);
-    return std::exp(y);
-  }
-
-  y = a + b;
-  if( y > MAXGAM || a > MAXGAM || b > MAXGAM ) {
-    y = std::lgamma(y);
-    y = std::lgamma(b) - y;
-    y = std::lgamma(a) + y;
-    if( y > MAXLOG ) {
-      // No check for overflow? lmp-error ?
-    }
-    return std::exp(y);
-  }
-
-  y = std::tgamma(y);
-  a = std::tgamma(a);
-  b = std::tgamma(b);
-
-  if (std::fabs(a - y) > std::fabs(b) - fabs(y)) {
-    y = b / y;
-    y *= a;
-  }
-  else {
-    y = a / y;
-    y *= b;
-  }
-
-   return y;
-}
-
 /* optimizer friendly implementation of exp2(x).
  *
  * strategy:
diff --git a/src/math_special.h b/src/math_special.h
index 6b80c0eca8b..c1b5c355410 100644
--- a/src/math_special.h
+++ b/src/math_special.h
@@ -69,23 +69,6 @@ namespace LAMMPS_NS::MathSpecial {
 
   extern double erfcx_y100(const double y100);
 
-   /* Beta function
-   *
-   * This function computes the Beta function of two real, positive arguments.
-   * Necessary to compute the beta function for super-ellipsoid volume and
-   * moments of inertia. This function is used because std::beta() is C++17
-   * standard and core LAMMPS code has to be C++11 compatible.
-   *
-   * This implementation is restricted to positive arguments for 2 reasons:
-   * 1. Super-ellipsoid calculations only require positive arguments.
-   * 2. Negative arguments require tracking signs of the gamma function so that
-   * std::lgamma() and std::tgamma() functions cannot be used as is, this would
-   * require more implementation than desired for this function.
-   *
-   *  \param   x, y arguments
-   *  \return  value of beta(x) */
-
-  extern double beta(double x, double y);
 
  /*! Fast scaled error function complement exp(x*x)*erfc(x) for coul/long styles
    *

From fe13c5c56b50adde7bbe5b600c870e111a6ef15e Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Fri, 2 Jan 2026 18:58:48 -0600
Subject: [PATCH 078/174] Change flag_super from bool to enum to distinguish
 more cases. Refactor accordingly. Move radius and inertia calculation from
 MathExra to private static functions in atom_vec_ellipsoid class.

Questions and TODO remain on how to best manage that enum across multiple files.
---
 src/ASPHERE/math_extra_superellipsoids.cpp    |  21 +--
 src/ASPHERE/math_extra_superellipsoids.h      |   2 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     |  20 +--
 src/atom_vec_ellipsoid.cpp                    | 142 ++++++++++++++----
 src/atom_vec_ellipsoid.h                      |  13 +-
 src/math_extra.cpp                            |  83 +---------
 src/math_extra.h                              |   7 +-
 unittest/formats/test_atom_styles.cpp         |  12 +-
 .../utils/test_math_extra_superellipsoids.cpp |   2 +-
 9 files changed, 151 insertions(+), 151 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 92017f1d036..3c0eb9e0b4a 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -302,7 +302,16 @@ double regularized_shape_and_derivatives_local(const double* xlocal, const doubl
 
 double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
   double shapefunc;
-  switch (flag) {
+  // TODO: Not sure how to make flag values more clear
+  // Cannot forward declare the enum AtomVecEllipsoid::BlockType
+  // Could use scoped (enum class) but no implicit conversion:
+  //    must pass type `LAMMPS_NS::AtomVecEllipsoid::BlockType` instead of int,
+  //    and/or static_cast the enum class to int, which is similar to current
+  // Could define the enum in a dedicated header
+  //    seems overkill just for one enum
+  // I think the comment below making reference to the BlockType should be enough
+  // Feel free to change to a better design
+  switch (flag) { // LAMMPS_NS::AtomVecEllipsoid::BlockType
     case 0: {
       shapefunc = shape_and_derivatives_local_ellipsoid(xlocal, shape, grad, hess);
       break;
@@ -635,15 +644,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 }
 
 
-int determine_flag(const double* block) {
-  const double EPSBLOCK(1e-3);
-  int flag(2);
-  if ((std::fabs(block[0] - 2) <= EPSBLOCK) && (std::fabs(block[1] - 2) <= EPSBLOCK))
-    flag = 0;
-  else if (std::fabs(block[0] - block[1]) <= EPSBLOCK)
-    flag = 1;
-  return flag;
-}
+
 
 // Functions to compute shape function and gradient only when called for newton method
 // to avoid computing hessian when not needed and having smoother landscape for the line search
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 640398031f6..93e4693615c 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -21,6 +21,7 @@
 #include <cmath>
 #include "math_extra.h"
 
+
 namespace MathExtraSuperellipsoids {
   inline constexpr double TIKHONOV_SCALE = 1e-14; // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
   void volume_superellipsoid(const double *blockiness, const double *shape, double volume); // duplicated from math_extra might remove
@@ -79,7 +80,6 @@ namespace MathExtraSuperellipsoids {
   int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                               const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
                               double* X0, double* nij);
-  int determine_flag(const double* block);
  
   // functions to compute shape function and gradient only when called for newton method
   double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad);
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 4521cf8566b..6503d3465cc 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -121,8 +121,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double block1, block2;
 
   double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1, overlap2, omegai[3], omegaj[3];
-  // TODO: Maybe we can make flag_super of the grain an int instead, to cimplify when n1 = n2 ?
-  int flagi, flagj; // 0 : ellipsoid, 1 : equal exponents n1=n2, 2: general super-ellipsoid n1 >2, n2>2, n1!=n2
+  AtomVecEllipsoid::BlockType flagi, flagj;
 
   ev_init(eflag, vflag);
 
@@ -226,8 +225,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           touching = false;
         else {
           // Super-ellipsoid contact detection between atoms i and j
-          flagi = MathExtraSuperellipsoids::determine_flag(blocki);
-          flagj = MathExtraSuperellipsoids::determine_flag(blockj);
+          flagi = bonus[ellipsoid[i]].type;
+          flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
             // Continued contact: use grain true shape and last contact point
             // TODO: implement neigh history!
@@ -267,14 +266,11 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
               shapej[0] = shapej[1] = shapej[2] = reqj;
               MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
               MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-              if (bonus[ellipsoid[i]].flag_super) { // not a big time save
-                blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-                blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-              }
-              if (bonus[ellipsoid[j]].flag_super) {
-                blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-                blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-              }
+              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
               // force ellipsoid flag for first initial guess iteration.
               // Avoid incorrect values of n1/n2 -1 in derivatives.
               int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? 0 : flagi,
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 2eedaa88cf1..3131c0f99f6 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -30,7 +30,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSBLOCK2 1.0e-3
+static constexpr double EPSILON_BLOCK = 1.0e-3;
 
 /* ---------------------------------------------------------------------- */
 
@@ -235,7 +235,6 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
 {
   int i, j, m, last;
   double *shape, *quat, *block, *inertia;
-  bool flag_super;
 
   m = 0;
   last = first + n;
@@ -261,8 +260,10 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       inertia[0] = buf[m++];
       inertia[1] = buf[m++];
       inertia[2] = buf[m++];
-      flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
-      bonus[j].flag_super = flag_super;
+      // Particle type inferred from block to reduce comm
+      // TODO: is this a good idea or is that not saving much compared to
+      //       passing the flag in the buffer?
+      bonus[j].type = determine_type(block);
       bonus[j].ilocal = i;
       ellipsoid[i] = j;
       nghost_bonus++;
@@ -321,7 +322,7 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
     double *inertia = bonus[nlocal_bonus].inertia;
-    bool &flag_super = bonus[nlocal_bonus].flag_super;
+    BlockType &type = bonus[nlocal_bonus].type;
     shape[0] = buf[m++];
     shape[1] = buf[m++];
     shape[2] = buf[m++];
@@ -334,7 +335,7 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     inertia[0] = buf[m++];
     inertia[1] = buf[m++];
     inertia[2] = buf[m++];
-    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
+    type = determine_type(block);
     bonus[nlocal_bonus].ilocal = ilocal;
     ellipsoid[ilocal] = nlocal_bonus++;
   }
@@ -412,7 +413,7 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
     double *inertia = bonus[nlocal_bonus].inertia;
-    bool &flag_super = bonus[nlocal_bonus].flag_super;
+    BlockType &type = bonus[nlocal_bonus].type;
     shape[0] = buf[m++];
     shape[1] = buf[m++];
     shape[2] = buf[m++];
@@ -425,7 +426,7 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     inertia[0] = buf[m++];
     inertia[1] = buf[m++];
     inertia[2] = buf[m++];
-    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
+    type = determine_type(block);
     bonus[nlocal_bonus].ilocal = ilocal;
     ellipsoid[ilocal] = nlocal_bonus++;
   }
@@ -461,27 +462,27 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
   // Blockiness exponents can be given optionally for superellipsoids
 
   double *block = bonus[nlocal_bonus].block;
-  bool &flag_super = bonus[nlocal_bonus].flag_super;
+  BlockType &type = bonus[nlocal_bonus].type;
   if (ivalue == values.size()) {
     block[0] = block[1] = 2.0;
-    flag_super = false;
+    type = BlockType::ELLIPSOID;
   }
   else {
     block[0] = utils::numeric(FLERR, values[ivalue++], true, lmp);
     block[1] = utils::numeric(FLERR, values[ivalue++], true, lmp);
-    flag_super = ((std::fabs(block[0] - 2) > EPSBLOCK2) || (std::fabs(block[1] - 2) > EPSBLOCK2));
+    type = determine_type(block);
   }
 
   // reset ellipsoid mass
   // previously stored density in rmass
 
-  rmass[m] *= MathExtra::volume_ellipsoid(shape, block, flag_super);
+  rmass[m] *= MathExtra::volume_ellipsoid(shape, block, type);
 
   // Principal moments of inertia
 
-  MathExtra::inertia_ellipsoid_principal(shape, rmass[m], bonus[nlocal_bonus].inertia, block, flag_super);
+  inertia_ellipsoid_principal(shape, rmass[m], bonus[nlocal_bonus].inertia, block, type);
 
-  radius[m] = MathExtra::radius_ellipsoid(shape, block, flag_super);
+  radius[m] = radius_ellipsoid(shape, block, type);
   bonus[nlocal_bonus].ilocal = m;
   ellipsoid[m] = nlocal_bonus++;
 }
@@ -538,7 +539,7 @@ void AtomVecEllipsoid::data_atom_post(int ilocal)
 void AtomVecEllipsoid::pack_data_pre(int ilocal)
 {
   double *shape, *block;
-  bool flag_super;
+  BlockType type;
 
   ellipsoid_flag = atom->ellipsoid[ilocal];
   rmass_one = atom->rmass[ilocal];
@@ -551,8 +552,8 @@ void AtomVecEllipsoid::pack_data_pre(int ilocal)
   if (ellipsoid_flag >= 0) {
     shape = bonus[ellipsoid_flag].shape;
     block = bonus[ellipsoid_flag].block;
-    flag_super = bonus[ellipsoid_flag].flag_super;
-    rmass[ilocal] /= MathExtra::volume_ellipsoid(shape, block, flag_super);
+    type = bonus[ellipsoid_flag].type;
+    rmass[ilocal] /= MathExtra::volume_ellipsoid(shape, block, type);
   }
 }
 
@@ -707,7 +708,7 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
     double *inertia = bonus[nlocal_bonus].inertia;
-    bool &flag_super = bonus[nlocal_bonus].flag_super;
+    BlockType &type = bonus[nlocal_bonus].type;
     shape[0] = shapex;
     shape[1] = shapey;
     shape[2] = shapez;
@@ -717,9 +718,9 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     quat[3] = 0.0;
     block[0] = 2;
     block[1] = 2;
-    flag_super = false;
-    MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia);
-    radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
+    type = BlockType::ELLIPSOID;
+    inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
+    radius[i] = radius_ellipsoid(shape, block, type);
     bonus[nlocal_bonus].ilocal = i;
     ellipsoid[i] = nlocal_bonus++;
   } else if (shapex == 0.0 && shapey == 0.0 && shapez == 0.0) {
@@ -731,12 +732,12 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
     double *shape = bonus[ellipsoid[i]].shape;
     double *block = bonus[ellipsoid[i]].block;
     double *inertia = bonus[nlocal_bonus].inertia;
-    bool flag_super = bonus[ellipsoid[i]].flag_super;
+    BlockType type = bonus[ellipsoid[i]].type;
     shape[0] = shapex;
     shape[1] = shapey;
     shape[2] = shapez;
-    MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
-    radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
+    inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
+    radius[i] = radius_ellipsoid(shape, block, type);
   }
 }
 
@@ -754,7 +755,7 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     double *quat = bonus[nlocal_bonus].quat;
     double *block = bonus[nlocal_bonus].block;
     double *inertia = bonus[nlocal_bonus].inertia;
-    bool &flag_super = bonus[nlocal_bonus].flag_super;
+    BlockType &type = bonus[nlocal_bonus].type;
     shape[0] = 0.5;
     shape[1] = 0.5;
     shape[2] = 0.5;
@@ -765,19 +766,96 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     quat[2] = 0.0;
     quat[3] = 0.0;
     bonus[nlocal_bonus].ilocal = i;
-    flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
-    MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
-    radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
+    type = determine_type(block);
+    inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
+    radius[i] = radius_ellipsoid(shape, block, type);
     ellipsoid[i] = nlocal_bonus++;
   } else {
     double *shape = bonus[ellipsoid[i]].shape;
     double *block = bonus[ellipsoid[i]].block;
     double *inertia = bonus[nlocal_bonus].inertia;
-    bool &flag_super = bonus[ellipsoid[i]].flag_super;
+    BlockType &type = bonus[ellipsoid[i]].type;
     block[0] = blockn1;
     block[1] = blockn2;
-    flag_super = ((std::fabs(blockn1 - 2) > EPSBLOCK2) || (std::fabs(blockn2 - 2) > EPSBLOCK2));
-    MathExtra::inertia_ellipsoid_principal(shape, rmass[i], inertia, block, flag_super);
-    radius[i] = MathExtra::radius_ellipsoid(shape, block, flag_super);
+    type = determine_type(block);
+    inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
+    radius[i] = radius_ellipsoid(shape, block, type);
+  }
+}
+
+AtomVecEllipsoid::BlockType AtomVecEllipsoid::determine_type(double* block) {
+ BlockType flag(BlockType::GENERAL);
+  if ((std::fabs(block[0] - 2) <= EPSILON_BLOCK) && (std::fabs(block[1] - 2) <= EPSILON_BLOCK))
+    flag = BlockType::ELLIPSOID;
+  else if (std::fabs(block[0] - block[1]) <= EPSILON_BLOCK)
+    flag = BlockType::N1_EQUAL_N2;
+  return flag;
+}
+
+double AtomVecEllipsoid::radius_ellipsoid(double *shape, double *block, BlockType flag_type)
+{
+  if (flag_type == BlockType::ELLIPSOID)
+    return std::max(std::max(shape[0], shape[1]), shape[2]);
+
+  // Super ellipsoid
+  double a = shape[0], b = shape[1], c = shape[2];
+  double n1 = block[0], n2 = block[1];
+  if (shape[0] < shape[1]) {a = shape[1]; b = shape[0];}
+
+  // Cylinder approximation for n2=2
+
+  if (n2 < 2.0 + EPSILON_BLOCK) return sqrt(a * a + c * c);
+
+  // Ellipsoid approximation for n1=2
+
+  if (n1 < 2.0 + EPSILON_BLOCK) return std::max(c, sqrt(a * a + b * b));
+
+  // Bounding box approximation when n1>2 and n2>2
+
+  return sqrt(a * a + b * b + c * c);
+
+  // General super-ellipsoid, Eq. (12) of Podlozhnyuk et al. 2017
+  // Not sure if exact solution worth it compared to boundig box diagonal
+  // If both blockiness exponents are greater than 2, the exact radius does not
+  // seem significantly smaller than the bounding box diagonal. At most sqrt(3)~ 70% too large
+  /*
+  double x, y, z, alpha, beta, gamma, xtilde;
+  double small = 0.1; // TO AVOID OVERFLOW IN POW
+
+  alpha = std::fabs(n2 - 2.0) > small ? std::pow(b / a, 2.0 / (n2 - 2.0)) : 0.0;
+  gamma = std::fabs(n1divn2 - 1.0) > small ? std::pow((1.0 + std::pow(alpha, n2)), n1divn2 - 1.0) : 1.0;
+  beta = std::pow(gamma * c * c / (a * a), 1.0 / std::max(n1 - 2.0, small));
+  xtilde = 1.0 / std::pow(std::pow(1.0 + std::pow(alpha, n2), n1divn2) + std::pow(beta, n1), 1.0 / n1);
+  x = a * xtilde;
+  y = alpha * b * xtilde;
+  z = beta * c * xtilde;
+  return sqrt(x * x + y * y + z * z);
+  */
+}
+
+void AtomVecEllipsoid::inertia_ellipsoid_principal(double *shape, double mass, double *idiag,
+                                                   double *block, BlockType flag_type)
+{
+  double rsq0 = shape[0] * shape[0];
+  double rsq1 = shape[1] * shape[1];
+  double rsq2 = shape[2] * shape[2];
+  if (flag_type == BlockType::ELLIPSOID) {
+    double dens = 0.2 * mass;
+    idiag[0] = dens * (rsq1 + rsq2);
+    idiag[1] = dens * (rsq0 + rsq2);
+    idiag[2] = dens * (rsq0 + rsq1);
+  } else {
+    // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003
+    double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
+    double beta_tmp1 = std::beta(0.5 * e1, 1 + 2 * e1);
+    double beta_tmp2 = std::beta(0.5 * e2, 0.5 * e2);
+    double beta_tmp3 = std::beta(0.5 * e2, 1.5 * e2);
+    double dens = mass / (std::beta(0.5 * e1, 1.0 + e1) * beta_tmp2);
+    double m0 = 0.5 * rsq0 * beta_tmp1 * beta_tmp3;
+    double m1 = 0.5 * rsq1 * beta_tmp1 * beta_tmp3;
+    double m2 = rsq2 * std::beta(1.5 * e1, 1 + e1) * beta_tmp2;
+    idiag[0] = dens * (m1 + m2);
+    idiag[1] = dens * (m0 + m2);
+    idiag[2] = dens * (m0 + m1);
   }
 }
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index 96f8c8d2412..16d52ad4ad8 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -26,12 +26,17 @@ namespace LAMMPS_NS {
 
 class AtomVecEllipsoid : virtual public AtomVec {
  public:
+  enum BlockType {
+    ELLIPSOID = 0, // n1 = n2 = 2
+    N1_EQUAL_N2 = 1, // n1 = n2 > 2
+    GENERAL = 2, // n2 != n2 > 2
+  };
   struct Bonus {
     double shape[3];
     double quat[4];
     double block[2];
     double inertia[3];
-    bool flag_super;
+    BlockType type;
     int ilocal;
   };
   struct Bonus *bonus;
@@ -70,7 +75,6 @@ class AtomVecEllipsoid : virtual public AtomVec {
 
   void set_shape(int, double, double, double);
   void set_block(int, double, double);
-  double compute_radcirc(double *, double *, bool);
 
   int nlocal_bonus;
 
@@ -86,6 +90,11 @@ class AtomVecEllipsoid : virtual public AtomVec {
 
   virtual void grow_bonus();
   void copy_bonus_all(int, int);
+
+  static BlockType determine_type(double *);
+  static double radius_ellipsoid(double *, double *, BlockType);
+  static void inertia_ellipsoid_principal(double *, double, double *,
+                                   double *block, BlockType);
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 8cbde1337aa..33dc1c26c3f 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -475,41 +475,6 @@ void quat_to_mat_trans(const double *quat, double mat[3][3])
   mat[2][2] = w2-i2-j2+k2;
 }
 
-/* ----------------------------------------------------------------------
-   compute principal moments of inertia of an ellipsoid
-   shape = 3 radii of ellipsoid
-   quat = orientiation quaternion of ellipsoid
-   block = blockiness exponents of super-ellipsoid
-   return principal moments of inertia as 3-vector
-------------------------------------------------------------------------- */
-
-void inertia_ellipsoid_principal(double *shape, double mass, double *idiag,
-                                 double *block, bool flag_super)
-{
-  double rsq0 = shape[0] * shape[0];
-  double rsq1 = shape[1] * shape[1];
-  double rsq2 = shape[2] * shape[2];
-  if (flag_super) {
-    // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003
-    double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    double beta_tmp1 = std::beta(0.5 * e1, 1 + 2 * e1);
-    double beta_tmp2 = std::beta(0.5 * e2, 0.5 * e2);
-    double beta_tmp3 = std::beta(0.5 * e2, 1.5 * e2);
-    double dens = mass / (std::beta(0.5 * e1, 1.0 + e1) * beta_tmp2);
-    double m0 = 0.5 * rsq0 * beta_tmp1 * beta_tmp3;
-    double m1 = 0.5 * rsq1 * beta_tmp1 * beta_tmp3;
-    double m2 = rsq2 * std::beta(1.5 * e1, 1 + e1) * beta_tmp2;
-    idiag[0] = dens * (m1 + m2);
-    idiag[1] = dens * (m0 + m2);
-    idiag[2] = dens * (m0 + m1);
-  }
-  else {
-    double dens = 0.2 * mass;
-    idiag[0] = dens * (rsq1 + rsq2);
-    idiag[1] = dens * (rsq0 + rsq2);
-    idiag[2] = dens * (rsq0 + rsq1);
-  }
-}
 
 /* ----------------------------------------------------------------------
    compute space-frame inertia tensor of an ellipsoid
@@ -649,7 +614,7 @@ void inertia_triangle(double *idiag, double *quat, double /*mass*/,
    return volume of the ellipsoid
 ------------------------------------------------------------------------- */
 
-double volume_ellipsoid(double *shape, double *block, bool flag_super)
+double volume_ellipsoid(double *shape, double *block, int flag_super)
 {
   double unitvol = MY_4PI3;
 
@@ -664,52 +629,6 @@ double volume_ellipsoid(double *shape, double *block, bool flag_super)
 }
 
 
-/* ----------------------------------------------------------------------
-   compute the circumscribed radius to the ellipsoid
-   shape = 3 radii of ellipsoid
-   block = blockiness exponents of super-ellipsoid
-   return circumscribed radius of the ellipsoid
-------------------------------------------------------------------------- */
-
-double radius_ellipsoid(double *shape, double *block, bool flag_super)
-{
-  if (!flag_super) return std::max(std::max(shape[0], shape[1]), shape[2]);
-
-  // Super ellipsoid
-  double a = shape[0], b = shape[1], c = shape[2];
-  double n1 = block[0], n2 = block[1];
-  if (shape[0] < shape[1]) {a = shape[1]; b = shape[0];}
-
-  // Cylinder approximation for n2=2
-
-  if (n2 < 2.01) return sqrt(a * a + c * c);
-
-  // Ellipsoid approximation for n1=2
-
-  if (n1 < 2.01) return std::max(c, sqrt(a * a + b * b));
-
-  // Bounding box approximation when n1>2 and n2>2
-
-  return sqrt(a * a + b * b + c * c);
-
-  // General super-ellipsoid, Eq. (12) of Podlozhnyuk et al. 2017
-  // Not sure if exact solution worth it compared to boundig box diagonal
-  // If both blockiness exponents are greater than 2, the exact radius does not
-  // seem significantly smaller than the bounding box diagonal. At most sqrt(3)~ 70% too large
-  /*
-  double x, y, z, alpha, beta, gamma, xtilde;
-  double small = 0.1; // TO AVOID OVERFLOW IN POW
-
-  alpha = std::fabs(n2 - 2.0) > small ? std::pow(b / a, 2.0 / (n2 - 2.0)) : 0.0;
-  gamma = std::fabs(n1divn2 - 1.0) > small ? std::pow((1.0 + std::pow(alpha, n2)), n1divn2 - 1.0) : 1.0;
-  beta = std::pow(gamma * c * c / (a * a), 1.0 / std::max(n1 - 2.0, small));
-  xtilde = 1.0 / std::pow(std::pow(1.0 + std::pow(alpha, n2), n1divn2) + std::pow(beta, n1), 1.0 / n1);
-  x = a * xtilde;
-  y = alpha * b * xtilde;
-  z = beta * c * xtilde;
-  return sqrt(x * x + y * y + z * z);
-  */
-}
 
 /* ----------------------------------------------------------------------
    build rotation matrix for a small angle rotation around the X axis
diff --git a/src/math_extra.h b/src/math_extra.h
index a945f814c21..e6da14c08a7 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -116,16 +116,13 @@ void BuildRyMatrix(double R[3][3], const double angle);
 void BuildRzMatrix(double R[3][3], const double angle);
 
 // moment of inertia operations
-void inertia_ellipsoid_principal(double *shape, double mass, double *idiag,
-                                 double *block = nullptr, bool flag_super = false);
 void inertia_ellipsoid(double *idiag, double *quat, double mass, double *inertia);
 void inertia_line(double length, double theta, double mass, double *inertia);
 void inertia_triangle(double *v0, double *v1, double *v2, double mass, double *inertia);
 void inertia_triangle(double *idiag, double *quat, double mass, double *inertia);
 
-// volumes and circumscribed radius
-double volume_ellipsoid(double *shape, double *block = nullptr, bool flag_super = false);
-double radius_ellipsoid(double *shape, double *block, bool flag_super);
+// volume of ellipsoid
+double volume_ellipsoid(double *shape, double *block = nullptr, int flag_super = 0);
 
 // triclinic bounding box of a sphere
 
diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp
index 3bd68dbc797..2bc9533ff52 100644
--- a/unittest/formats/test_atom_styles.cpp
+++ b/unittest/formats/test_atom_styles.cpp
@@ -1325,10 +1325,10 @@ TEST_F(AtomStyleTest, ellipsoid)
     EXPECT_NEAR(radius[GETIDX(2)], 0.5, EPSILON);
     EXPECT_NEAR(radius[GETIDX(3)], 1.5, EPSILON);
     EXPECT_NEAR(radius[GETIDX(4)], 1.5, EPSILON);
-    ASSERT_FALSE(bonus[0].flag_super);
-    ASSERT_FALSE(bonus[1].flag_super);
-    ASSERT_FALSE(bonus[2].flag_super);
-    ASSERT_FALSE(bonus[3].flag_super);
+    ASSERT_FALSE(bonus[0].type);
+    ASSERT_FALSE(bonus[1].type);
+    ASSERT_FALSE(bonus[2].type);
+    ASSERT_FALSE(bonus[3].type);
 
     BEGIN_HIDE_OUTPUT();
     command("set atom 5 block 8.0 8.0");
@@ -1348,8 +1348,8 @@ TEST_F(AtomStyleTest, ellipsoid)
     EXPECT_NEAR(bonus[5].block[1], 2.0, EPSILON);
     EXPECT_NEAR(radius[GETIDX(5)], 0.5*sqrt(3.0), EPSILON);
     EXPECT_NEAR(radius[GETIDX(6)], sqrt(4.64), EPSILON);
-    ASSERT_TRUE(bonus[4].flag_super);
-    ASSERT_TRUE(bonus[5].flag_super);
+    ASSERT_TRUE(bonus[4].type);
+    ASSERT_TRUE(bonus[5].type);
     BEGIN_HIDE_OUTPUT();
     command("set atom 5*6 shape 0.0 0.0 0.0");
     END_HIDE_OUTPUT();
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index b520e561e32..10ffc361366 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -115,7 +115,7 @@ TEST(ContactPointAndNormal, supersphere_mono)
   std::vector<double> blocks = {2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0};
   for (auto n : blocks) {
     double block[2] = {n, n};
-    int flag = MathExtraSuperellipsoids::determine_flag(block);
+    int flag =  (n < 2.01) ? 0 : 1;
 
     // Contact detection
     // Some starting point away from (0,0,0). Possibly bad initial guess so test is demanding

From bd2282b0b0a737f2f91c25134fa7fb51d1690c40 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sat, 3 Jan 2026 23:38:11 -0600
Subject: [PATCH 079/174] WIP: cleanup math_extra_super_ellipsoid. Outstanding
 points to be discussed with Jacopo.

---
 src/ASPHERE/math_extra_superellipsoids.cpp    | 125 +++---------------
 src/ASPHERE/math_extra_superellipsoids.h      |  24 ++--
 .../pair_gran_hooke_history_ellipsoid.cpp     |   1 -
 .../utils/test_math_extra_superellipsoids.cpp |   8 +-
 4 files changed, 35 insertions(+), 123 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 3c0eb9e0b4a..6d8e8f4c777 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -13,7 +13,7 @@
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril Coulibaly (??)
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
 ------------------------------------------------------------------------- */
 
 #include "math_extra_superellipsoids.h"
@@ -47,60 +47,26 @@ static constexpr unsigned int ITERMAX_OVERLAP = 20;
 static constexpr double MINSLOPE_OVERLAP = 1e-12;
 
 
-void volume_superellipsoid(const double *blockiness, const double *shape, double volume)
-{
-  const double eps1 = 2.0 / blockiness[0]; // shape exponent in latitude direction
-  const double eps2 = 2.0 / blockiness[1]; // shape exponent in longitude direction
-  volume = 2.0*shape[0]*shape[1]*shape[2]*eps1*eps2*
-      std::beta(0.5*eps1, eps1 + 1.0)*
-      std::beta(0.5*eps2, 0.5*eps2 + 1.0);
-}
-
-/* ----------------------------------------------------------------------
-   inertia tensor of superellipsoid
-   source https://cse.buffalo.edu/~jryde/cse673/files/superquadrics.pdf
-------------------------------------------------------------------------- */
-void inertia_superellipsoid(const double *shape, const double *blockiness, double density, double *inertia)
-
-{
-
-  const double eps1 = 2.0 / blockiness[0]; // shape exponent in latitude direction
-  const double eps2 = 2.0 / blockiness[1]; // shape exponent in longitude direction
-
-  const double a1 = shape[0];
-  const double a2 = shape[1];
-  const double a3 = shape[2];
-  const double I_xx = 0.5*a1*a2*a3*eps1*eps2*(a2*a2*std::beta(1.5*eps2, 0.5*eps2)*std::beta(0.5*eps1, 2.0*eps1+1.0)+
-      4.0*a3*a3*std::beta(0.5*eps2, 0.5*eps2+1.0)*std::beta(1.5*eps1, eps1+1.0)) * density;
-  const double I_yy = 0.5*a1*a2*a3*eps1*eps2*(a1*a1*std::beta(1.5*eps2, 0.5*eps2)*std::beta(0.5*eps1, 2.0*eps1+1.0)+
-      4.0*a3*a3*std::beta(0.5*eps2, 0.5*eps2+1.0)*std::beta(1.5*eps1, eps1+1.0)) * density;
-  const double I_zz = 0.5*a1*a2*a3*eps1*eps2*(a1*a1 + a2*a2)*
-      std::beta(1.5*eps2, 0.5*eps2)*std::beta(0.5*eps1, 2.0*eps1+1.0) * density;
-
-  inertia[0] = I_xx;
-  inertia[1] = I_yy;
-  inertia[2] = I_zz;
-}
-
-
 /* ----------------------------------------------------------------------
    curvature of superellipsoid
    source https://en.wikipedia.org/wiki/Mean_curvature
 ------------------------------------------------------------------------- */
-
+// TODO Jacopo: please refactor using the high-performance functions.
+//              This recomputes a lot of expensive things twice or more
 void mean_curvature_superellipsoid(const double *shape, const double *blockiness, const double* quat, const double *global_point, double curvature)
 {
   // this code computes the mean curvature on the superellipsoid surface
   // for the given global point
   double local_point[3],hessian[3][3], nablaF[3], f, normal[3];
-  global2local_vector(global_point, quat, local_point); 
-  shape_function_local(shape, blockiness, quat, local_point, f);
+  global2local_vector(global_point, quat, local_point);
+  shape_function_local(shape, blockiness, quat, local_point, f);  
   double koef = pow(fabs(0.5), std::max(blockiness[0], blockiness[1])-2.0);
   double alpha = 1.0 / pow(fabs(f/koef + 1.0), 1.0/blockiness[0]);
   for(int i = 0; i < 3; i++)
-    local_point[i] *= alpha;
-  shape_function_local_grad(shape, blockiness, quat, local_point, nablaF);
-  shape_function_local_hessian(shape, blockiness, quat, local_point, hessian);
+    local_point[i] *= alpha; // TODO: why is the local point moved after the shape function is computed? This does not seem to appear in Eq (39) of Podlozhnyuk
+                             //       If not, we may directly use the function that computes shape func, grad and hess for cheaper
+  shape_function_local_grad(shape, blockiness, quat, local_point, nablaF);  
+  shape_function_local_hessian(shape, blockiness, quat, local_point, hessian);  
   MathExtra::normalize3(nablaF, normal);
   double temp[3];
   MathExtra::matvec(hessian, normal, temp);
@@ -118,7 +84,8 @@ void gaussian_curvature_superellipsoid(const double *shape, const double *blocki
   double koef = pow(fabs(0.5), std::max(blockiness[0], blockiness[1])-2.0);
   double alpha = 1.0 / pow(fabs(f/koef + 1.0), 1.0/blockiness[0]);
   for(int i = 0; i < 3; i++)
-    local_point[i] *= alpha;
+    local_point[i] *= alpha; // TODO: why is the local point moved after the shape function is computed? This does not seem to appear in Eq (39) of Podlozhnyuk
+                             //       If not, we may directly use the function that computes shape func, grad and hess for cheaper
   shape_function_local_grad(shape, blockiness, quat, local_point, nablaF);
   shape_function_local_hessian(shape, blockiness, quat, local_point, hessian);
   MathExtra::normalize3(nablaF, normal);
@@ -150,23 +117,6 @@ void gaussian_curvature_superellipsoid(const double *shape, const double *blocki
     curvature =  sqrt(fabs(K));
 }
 
-
-/* ----------------------------------------------------------------------
-   express local (particle level) to global (system level) coordinates
-------------------------------------------------------------------------- */
-
-void local2global_vector(const double v[3], const double *quat, double global_v[3]){
-
-   MathExtra::quatrotvec(const_cast<double*>(quat) , const_cast<double*>(v), global_v);
-};
-
-void local2global_matrix(const double m[3][3], const double *quat, double global_m[3][3]){
-    double rot[3][3],  temp[3][3];
-    MathExtra::quat_to_mat(const_cast<double*>(quat), rot);
-    MathExtra::times3(rot, m, temp);
-    MathExtra::transpose_times3(rot, temp, global_m);
-};
-
   
 /* ----------------------------------------------------------------------
    express global (system level) to local (particle level) coordinates
@@ -181,29 +131,18 @@ void global2local_vector(const double *v, const double *quat, double *local_v){
 };
 
 
-void global2local_matrix(const double m[3][3], const double *quat, double local_m[3][3]){
-    double rot[3][3], temp[3][3];
-    MathExtra::quat_to_mat(quat, rot);
-    MathExtra::transpose_times3(rot, m, temp);
-    MathExtra::times3(temp, rot, local_m);
-}
-
 /* ----------------------------------------------------------------------
    shape function computations for superellipsoids
 ------------------------------------------------------------------------- */
-
+// TODO Jacopo: this function does nothing (f is passed by value), return double instead
+//              Please refactor using ideas from the high-performance functions and distinguish between cases
+//              Also, this function only seems to be used in curvature calculation. After we discuss why the local_point is moved, we may not even need a function that only computes the shape function without cumputing its derivatives
 void shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f){
   const double n1 = block[0], n2 = block[1];
   
   local_f = pow( pow(abs(point[0]/shape[0]), n2) + pow(abs(point[1]/shape[1]), n2) , n1/ n2) + pow(abs(point[2]/shape[2]), n1)  - 1.0;
 };
 
-void shape_function_global(const double *shape, const double *block, const double *quat, const double *point, double global_f){
-  double local_point[3];
-  global2local_vector(const_cast<double*>(point), const_cast<double*>(quat), local_point);
-  shape_function_local(shape, block, quat, local_point, global_f);
-};
-
 void shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad){
   // point is in local coordinates
   const double n1 = block[0], n2 = block[1];
@@ -278,27 +217,6 @@ void apply_regularization_shape_function(double n1, double *value, double *grad,
   }
 };
 
-double regularized_shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
-  double shapefunc;
-  double n1 = block[0];
-  switch (flag) {
-    case 0: {
-      shapefunc = shape_and_derivatives_local_ellipsoid(xlocal, shape, grad, hess);
-      break;
-    }
-    case 1: {
-      shapefunc = shape_and_derivatives_local_n1equaln2(xlocal, shape, block[0], grad, hess);
-      break;
-    }
-    case 2: {
-      shapefunc = shape_and_derivatives_local_superquad(xlocal, shape, block, grad, hess);
-      break;
-    }
-  }
-
-  apply_regularization_shape_function(n1, &shapefunc, grad, hess);
-  return shapefunc;
-}
 
 double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
   double shapefunc;
@@ -429,7 +347,8 @@ double regularized_shape_and_derivatives_global(const double* xc, const double R
   double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
   MathExtra::sub3(X0, xc, tmp_v);
   MathExtra::transpose_matvec(R, tmp_v, xlocal);
-  shapefunc = regularized_shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
+  shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
+  apply_regularization_shape_function(block[0], &shapefunc, tmp_v, hess);
   MathExtra::matvec(R, tmp_v, grad);
   MathExtra::times3_transpose(hess, R, tmp_m);
   MathExtra::times3(R, tmp_m, hess);
@@ -456,7 +375,6 @@ double compute_residual(const double shapefunci, const double* gradi_global, con
   // Gradient equality F1' + mu2 * F2' evaluated relative to magnitude of gradient ||F1'|| = ||mu2 * F2'||
   // Shape function equality F1 - F2 evaluated relative to magnitude of shape function + 1
   //    the shift f = polynomial - 1 is not necessary and cancels out in F1 - F2
-  // TODO: based on line above, consider removing the -1 in definition of shape function, and compare inside outside to 1 instead of 0.
   // Last component homogeneous to shape function
   return MathExtra::lensq3(residual) / MathExtra::lensq3(gradi_global) +
          residual[3] * residual[3] / ((shapefunci + 1) * (shapefunci + 1));
@@ -506,8 +424,8 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   double blockmax = std::fmax(std::fmax(blocki[0],blocki[1]), std::fmax(blockj[0], blockj[1]));
 
   norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
-  // TODO: would it be wise or crazy to test for convergence before even attempting Newton's method?
-  //       the initial guess is the old X0, so with temporal coherence, it might still pass deformation is slow!
+  // TODO: consider testing for convergence before attempting Newton's method.
+  //       the initial guess is the old X0, so with temporal coherence, it might still pass tolerance if deformation is slow!
 
   for (int iter = 0 ; iter < ITERMAX_NR ; iter++) {
     norm_old = norm;
@@ -593,9 +511,10 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
           (MathExtra::lensq3(rhs) * a * a <= TOL_NR_POS * lsq)) {
         converged = true;
         // TODO: consider testing picking the normal with the least error
-        //       i.e., likely the grain with the smallest curvature (Hessian norm)
+        //       i.e., likely the grain with the smallest curvature (Hessian norm?)
+        //       or with the largest gradient?
         //       or some other measure like average gradients.
-        //       right now we use the gradient on grain i for simplicity and performance. When testing, we could see if using  is just as good
+        //       right now we use the gradient on grain i for simplicity and performance
         MathExtra::normalize3(gradi, nij);
         break;
       } else if (norm > norm_old - PARAMETER_LS * a * norm_old) { // Armijo - Goldstein condition not met
@@ -637,7 +556,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   // 1 = converged but grains not touching
   // 0 = converged and grains touching
   if (!converged)
-    return 2;
+    return 2; // TODO: consider not failing if not converged but shapefuncs positive (i.e., no contact)
   if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0)
     return 1;
   return 0;
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 93e4693615c..1d6fca884e3 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -12,7 +12,7 @@
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-    Contributing author: Jacopo Bilotto (EPFL), Jibril Coulibaly (??)
+    Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_MATH_EXTRA_SUPERELLIPOIDS_H
@@ -24,18 +24,13 @@
 
 namespace MathExtraSuperellipsoids {
   inline constexpr double TIKHONOV_SCALE = 1e-14; // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
-  void volume_superellipsoid(const double *blockiness, const double *shape, double volume); // duplicated from math_extra might remove
-  void inertia_superellipsoid(const double *shape, const double *blockiness, double density, double *inertia); // duplicated from math_extra might remove
 
   // needed for shape functions grad and matrix 
-  void local2global_vector(const double v[3], const double *quat, double global_v[3]);
-  void global2local_vector(const double v[3], const double *quat, double local_v[3]);
-  void local2global_matrix(const double m[3][3], const double *quat, double global_m[3][3]);
-  void global2local_matrix(const double m[3][3], const double *quat, double local_m[3][3]);
+  void global2local_vector(const double v[3], const double *quat, double local_v[3]); // TODO: TBD if still useful once we implement Hertz. There might be a cheaper way with the rotation matrix that we need for contact detection anyway
 
   // shape function computations
+  // TODO Jacopo: Let's make sure we only have one set of such functions and that they are optimized. Deleted some but cannot do all without messing with your code.
   void shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f);
-  void shape_function_global(const double *shape, const double *block, const double *quat, const double *point, double global_f);
   void shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad);
   void shape_function_local_hessian(const double *shape, const double *block, const double *quat, const double *point, double local_hessian[3][3]);
 
@@ -63,14 +58,12 @@ namespace MathExtraSuperellipsoids {
                                         const double* x_wall, const double* n_wall, double* X0, double* nij, double* overlap);
 
   // Jibril's versions of the functions for contact detection
+  // TODO @Jacopo: you might need to add a function that only computed the shape, or shape+grad if still needed in overlap / curvature calculation after answering the other TODOs I left
   double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]);
   double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]);
-  
-  double regularized_shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]);
-  double regularized_shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]);
 
   double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual);
   void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian);
@@ -80,14 +73,17 @@ namespace MathExtraSuperellipsoids {
   int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                               const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
                               double* X0, double* nij);
- 
+
+  // TODO: Jacopo the global function is never used. Can we delete? Is this duplicating the `stable_shape_and_gradient` methods?
+  double regularized_shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]);
+  void apply_regularization_shape_function(double n1, double *value, double *grad, double hess[3][3]);
   // functions to compute shape function and gradient only when called for newton method
+  // TODO: rename those. I don't think `stable` is a good terminolgy here. Maybe "..._local_superquad_surfacesearch", or "modified_shape_..."" TBD
   double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad);
   double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad);
   double stable_shape_and_gradient_local_ellipsoid(const double* xlocal, const double* shape, double* grad);
-  double compute_overlap_distance(const double* shape, const double* block, const double Rot[3][3], const int flag, const double* global_point, const double* global_normal, const double* center);
 
-  void apply_regularization_shape_function(double n1, double *value, double *grad, double hess[3][3]);
+  double compute_overlap_distance(const double* shape, const double* block, const double Rot[3][3], const int flag, const double* global_point, const double* global_normal, const double* center);
   
 };
 
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 6503d3465cc..b84ec91f77d 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -229,7 +229,6 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
             // Continued contact: use grain true shape and last contact point
-            // TODO: implement neigh history!
             // TODO: move contact point with rigid body motion of the pair ?
             //       not sure if enough information to do that
             X0[0] = X0_prev[0];
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index 10ffc361366..b726127edb5 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -126,10 +126,6 @@ TEST(ContactPointAndNormal, supersphere_mono)
     // Analytical solution
     double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};
     double nij_analytical[3] = {1.0, 0.0, 0.0};
-    // TODO / WIP:
-    // Gradients can be smaller in different directions, hard to naviguate canyon on high blockiness
-    // Little progress made along the flat faces. Maybe use Levenberg-Marquardt or Newton with momentum (previous step memory) or other methods.
-    // I think this might be an intrinsic problem with the solution having a Hessian of zero, leading to slow convergence + bad conditioning in Newton's method.
 
     std::cout<<n<<" "<<status<<" "<<X0[0]<<" "<<X0[1]<<" "<<X0[2]<<" "<<X0[3]<<std::endl;
     ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
@@ -143,5 +139,7 @@ TEST(ContactPointAndNormal, supersphere_mono)
   }
 }
 
-
+// TODO: supersphere_mono with grains overlapping
+// TODO: supersphere_poly with grains overlapping
+// TODO: more
 // for polydisperse solution should be at the radii ratio
\ No newline at end of file

From 8c260380a71b84bb34470e7db501cad452778352 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Sun, 4 Jan 2026 11:20:39 +0100
Subject: [PATCH 080/174] Cleaned superquadrics helper by using HPC version of
 shape function calculation

---
 src/ASPHERE/math_extra_superellipsoids.cpp    | 174 +++++-------------
 src/ASPHERE/math_extra_superellipsoids.h      |  27 +--
 .../pair_gran_hooke_history_ellipsoid.cpp     |   4 +
 unittest/utils/CMakeLists.txt                 |   2 +-
 4 files changed, 59 insertions(+), 148 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 6d8e8f4c777..7cc12ab06e7 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -51,60 +51,49 @@ static constexpr double MINSLOPE_OVERLAP = 1e-12;
    curvature of superellipsoid
    source https://en.wikipedia.org/wiki/Mean_curvature
 ------------------------------------------------------------------------- */
-// TODO Jacopo: please refactor using the high-performance functions.
-//              This recomputes a lot of expensive things twice or more
-void mean_curvature_superellipsoid(const double *shape, const double *blockiness, const double* quat, const double *global_point, double curvature)
+double mean_curvature_superellipsoid(const double *shape, const double *block, const int flag, const double R[3][3], const double *surf_global_point, const double *xc)
 {
   // this code computes the mean curvature on the superellipsoid surface
   // for the given global point
-  double local_point[3],hessian[3][3], nablaF[3], f, normal[3];
-  global2local_vector(global_point, quat, local_point);
-  shape_function_local(shape, blockiness, quat, local_point, f);  
-  double koef = pow(fabs(0.5), std::max(blockiness[0], blockiness[1])-2.0);
-  double alpha = 1.0 / pow(fabs(f/koef + 1.0), 1.0/blockiness[0]);
-  for(int i = 0; i < 3; i++)
-    local_point[i] *= alpha; // TODO: why is the local point moved after the shape function is computed? This does not seem to appear in Eq (39) of Podlozhnyuk
-                             //       If not, we may directly use the function that computes shape func, grad and hess for cheaper
-  shape_function_local_grad(shape, blockiness, quat, local_point, nablaF);  
-  shape_function_local_hessian(shape, blockiness, quat, local_point, hessian);  
-  MathExtra::normalize3(nablaF, normal);
-  double temp[3];
-  MathExtra::matvec(hessian, normal, temp);
-  double F_mag = sqrt(MathExtra::dot3(nablaF, nablaF));
-  curvature = fabs(MathExtra::dot3(normal, temp) - (hessian[0][0] + hessian[1][1] + hessian[2][2])) / fabs(2.0 * F_mag);
+  double hess[3][3], grad[3], normal[3];
+  double shapefunc, xlocal[3], tmp_v[3];
+  MathExtra::sub3(surf_global_point, xc, tmp_v); // here tmp_v is the vector from center to surface point
+  MathExtra::transpose_matvec(R, tmp_v, xlocal);
+  shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, grad, hess); // computation of curvature is independent of local or global frame
+  MathExtra::normalize3(grad, normal);
+  MathExtra::matvec(hess, normal, tmp_v); // here tmp_v is intermediate product
+  double F_mag = sqrt(MathExtra::dot3(grad, grad));
+  double curvature = fabs(MathExtra::dot3(normal, tmp_v) - (hess[0][0] + hess[1][1] + hess[2][2])) / (2.0 * F_mag);
+  return curvature;
 }
 
-void gaussian_curvature_superellipsoid(const double *shape, const double *blockiness, const double* quat, const double *global_point, double curvature)
+double gaussian_curvature_superellipsoid(const double *shape, const double *block, const int flag, const double R[3][3], const double *surf_global_point, const double *xc)
 {
   // this code computes the gaussian curvature coefficient
   // for the given global point
-  double local_point[3],hessian[3][3], nablaF[3], f, normal[3];
-  global2local_vector(global_point, quat, local_point); 
-  shape_function_local(shape, blockiness, quat, local_point, f);
-  double koef = pow(fabs(0.5), std::max(blockiness[0], blockiness[1])-2.0);
-  double alpha = 1.0 / pow(fabs(f/koef + 1.0), 1.0/blockiness[0]);
-  for(int i = 0; i < 3; i++)
-    local_point[i] *= alpha; // TODO: why is the local point moved after the shape function is computed? This does not seem to appear in Eq (39) of Podlozhnyuk
-                             //       If not, we may directly use the function that computes shape func, grad and hess for cheaper
-  shape_function_local_grad(shape, blockiness, quat, local_point, nablaF);
-  shape_function_local_hessian(shape, blockiness, quat, local_point, hessian);
-  MathExtra::normalize3(nablaF, normal);
+  double hess[3][3], grad[3], normal[3];
+  double shapefunc, xlocal[3], tmp_v[3];
+  MathExtra::sub3(surf_global_point, xc, tmp_v); // here tmp_v is the vector from center to surface point
+  MathExtra::transpose_matvec(R, tmp_v, xlocal);
+  shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, grad, hess); // computation of curvature is independent of local or global frame
+  MathExtra::normalize3(grad, normal);
+
   double temp[3];
-  MathExtra::matvec(hessian, normal, temp);
-  double F_mag = sqrt(MathExtra::dot3(nablaF, nablaF));
+  MathExtra::matvec(hess, normal, temp);
+  double F_mag = sqrt(MathExtra::dot3(grad, grad));
 
-  double fx = nablaF[0];
-  double fy = nablaF[1];
-  double fz = nablaF[2];
+  double fx = grad[0];
+  double fy = grad[1];
+  double fz = grad[2];
 
-  double fxx = hessian[0][0];
-  double fxy = hessian[0][1];
-  double fxz = hessian[0][2];
+  double fxx = hess[0][0];
+  double fxy = hess[0][1];
+  double fxz = hess[0][2];
 
-  double fyy = hessian[1][1];
-  double fyz = hessian[1][2];
+  double fyy = hess[1][1];
+  double fyz = hess[1][2];
 
-  double fzz = hessian[2][2];
+  double fzz = hess[2][2];
 
   double mat[4][4] = {
     {fxx, fxy, fxz, fx},
@@ -113,8 +102,9 @@ void gaussian_curvature_superellipsoid(const double *shape, const double *blocki
     {fx,  fy,  fz, 0.0} 
   };
 
-    double K = -det4_M44_zero(mat) / (F_mag*F_mag*F_mag*F_mag);
-    curvature =  sqrt(fabs(K));
+  double K = -det4_M44_zero(mat) / (F_mag*F_mag*F_mag*F_mag);
+  double curvature =  sqrt(fabs(K));
+  return curvature;
 }
 
   
@@ -130,62 +120,8 @@ void global2local_vector(const double *v, const double *quat, double *local_v){
 
 };
 
-
 /* ----------------------------------------------------------------------
-   shape function computations for superellipsoids
-------------------------------------------------------------------------- */
-// TODO Jacopo: this function does nothing (f is passed by value), return double instead
-//              Please refactor using ideas from the high-performance functions and distinguish between cases
-//              Also, this function only seems to be used in curvature calculation. After we discuss why the local_point is moved, we may not even need a function that only computes the shape function without cumputing its derivatives
-void shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f){
-  const double n1 = block[0], n2 = block[1];
-  
-  local_f = pow( pow(abs(point[0]/shape[0]), n2) + pow(abs(point[1]/shape[1]), n2) , n1/ n2) + pow(abs(point[2]/shape[2]), n1)  - 1.0;
-};
-
-void shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad){
-  // point is in local coordinates
-  const double n1 = block[0], n2 = block[1];
-  const double ainv = 1.0 / shape[0];
-  const double binv = 1.0 / shape[1];
-  const double cinv = 1.0 / shape[2];
-
-  const double nu = pow(abs(point[0] * ainv), n2) + pow(abs(point[1] * binv), n2);
-  const double nu_12 = pow(nu, n1 / n2 - 1.0);
-
-  local_grad[0] = n1*ainv * pow(abs(point[0] * ainv), n2 - 1.0) * nu_12 * copysign(1.0, point[0]);
-  local_grad[1] = n1*binv * pow(abs(point[1] * binv), n2 - 1.0) * nu_12 * copysign(1.0, point[1]);
-  local_grad[2] = n1*cinv * pow(abs(point[2] * cinv), n1 - 1.0) * copysign(1.0, point[2]);
-
-};
-
-void shape_function_local_hessian(
-  const double *shape, const double *block, const double *quat, const double *point, double local_hess[3][3]) {
-  const double n1 = block[0], n2 = block[1];
-  const double ainv = 1.0 / shape[0];
-  const double binv = 1.0 / shape[1];
-  const double cinv = 1.0 / shape[2];
-
-  const double nu = pow(abs(point[0] * ainv), n2) + pow(abs(point[1] * binv), n2);
-  const double nu_12_1 = pow(nu, n1 / n2 - 1.0);
-  const double nu_12_2 = pow(nu, n1 / n2 - 2.0);
-
-  local_hess[0][2] = local_hess[2][0] = local_hess[1][2] = local_hess[2][1] =0;
-
-  local_hess[0][0] = n1 * (n2 - 1) * ainv * ainv * pow(abs(point[0] * ainv), n2 - 2.0)* nu_12_1 +
-                     n1 * (n1 - n2) * ainv * ainv * pow(abs(point[0] * ainv), 2*n2 - 2.0)* nu_12_2;
-
-  local_hess[1][1] = n1 * (n2 - 1) * binv * binv * pow(abs(point[1] * binv), n2 - 2.0)* nu_12_1 +
-                     n1 * (n1 - n2) * ainv * ainv * pow(abs(point[1] * binv), 2*n2 - 2.0)* nu_12_2;
-
-  local_hess[2][2] = n1 * (n1 - 1) * cinv * cinv * pow(abs(point[2] * cinv), n1-2);
-
-  local_hess[0][1] = n1 * (n1 - n2) * ainv * binv * pow(abs(point[0]*ainv), n2 - 1) *
-                     pow(abs(point[1]*binv), n2 -1) * pow(nu, n1 / n2 - 2) * copysign(1.0, shape[0] * shape[1]); 
-                
-  }
-/* ----------------------------------------------------------------------
-   Possible regularization for the shape functions
+   Possible regularization for the shape functions (WIP)
    Instead of F(x,y,z) - 1 = 0 we use (F(x,y,z))^(1/n1) -1 = G(x,y,z) = 0
    The gradient is simply nabla G = (1/n1) * (F)^(1/n1 - 1) * nabla F
    The hessian is H(G) = (1/n1) * (F)^(1/n1 - 1) * H(F) + (1/n1) * (1/n1 - 1) * (F)^(1/n1 - 2) * nabla F (nabla F)^T
@@ -218,6 +154,9 @@ void apply_regularization_shape_function(double n1, double *value, double *grad,
 };
 
 
+/* ----------------------------------------------------------------------
+   shape function computations for superellipsoids
+------------------------------------------------------------------------- */
 double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
   double shapefunc;
   // TODO: Not sure how to make flag values more clear
@@ -357,9 +296,9 @@ double regularized_shape_and_derivatives_global(const double* xc, const double R
 
 double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
   double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
-  MathExtra::sub3(X0, xc, tmp_v);
+  MathExtra::sub3(X0, xc, tmp_v); // here temp_v is X0 - xc
   MathExtra::transpose_matvec(R, tmp_v, xlocal);
-  shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
+  shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess); // here temp_v is grad in local
   MathExtra::matvec(R, tmp_v, grad);
   MathExtra::times3_transpose(hess, R, tmp_m);
   MathExtra::times3(R, tmp_m, hess);
@@ -495,7 +434,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
       double xilocal[3], gradi[3], hessi[3][3], xjlocal[3], gradj[3], hessj[3][3], tmp_v[3];
 
-      MathExtra::sub3(X_line, xci, tmp_v);
+      MathExtra::sub3(X_line, xci, tmp_v); 
       MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
       shapefunc[0] = shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hessi);
       MathExtra::matvec(Ri, tmp_v, gradi);
@@ -562,13 +501,10 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   return 0;
 }
 
-
-
-
 // Functions to compute shape function and gradient only when called for newton method
 // to avoid computing hessian when not needed and having smoother landscape for the line search
 // General case for n1 != n2 > 2
-double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad) {
+double shape_and_gradient_local_superquad_surfacesearch(const double* xlocal, const double* shape, const double* block, double* grad) {
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
@@ -609,7 +545,7 @@ double stable_shape_and_gradient_local_superquad(const double* xlocal, const dou
 }
 
 // Special case for n2 = n2 = n > 2
-double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad) {
+double shape_and_gradient_local_n1equaln2_surfacesearch(const double* xlocal, const double* shape, const double n, double* grad) {
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
@@ -641,28 +577,6 @@ double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const dou
   return std::pow(F, 1.0/n) - 1.0;
 }
 
-
-// Special case for n1 = n2 = 2
-double stable_shape_and_gradients_local_ellipsoid(const double* xlocal, const double* shape, double* grad) {
-  double a = 2.0 / (shape[0] * shape[0]);
-  double b = 2.0 / (shape[1] * shape[1]);
-  double c = 2.0 / (shape[2] * shape[2]);
-
-  // Equation (14) simplified for n1 = n2 = 2
-  grad[0] = a * xlocal[0];
-  grad[1] = b * xlocal[1];
-  grad[2] = c * xlocal[2];
-
-  double F = 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]);
-  double scale_factor = std::sqrt(F) / 2.0;
-  
-  grad[0] *= scale_factor;
-  grad[1] *= scale_factor;
-  grad[2] *= scale_factor;
-
-  return std::sqrt(F) - 1.0;
-}
-
 // Newton Rapson method to find the overlap distance from the contact point given the normal
 double compute_overlap_distance(
   const double* shape, const double* block, const double Rot[3][3], const int flag,
@@ -723,9 +637,9 @@ double compute_overlap_distance(
 
       // Calculate Distance Estimator value and Gradient
       if (flag == 1) {
-        val = stable_shape_and_gradient_local_n1equaln2(current_p, shape, block[0], local_grad);
+        val = shape_and_gradient_local_n1equaln2_surfacesearch(current_p, shape, block[0], local_grad);
       } else {
-        val = stable_shape_and_gradient_local_superquad(current_p, shape, block, local_grad);
+        val = shape_and_gradient_local_superquad_surfacesearch(current_p, shape, block, local_grad);
       }
 
       // Convergence Check
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 1d6fca884e3..52ce5bb44dc 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -28,12 +28,6 @@ namespace MathExtraSuperellipsoids {
   // needed for shape functions grad and matrix 
   void global2local_vector(const double v[3], const double *quat, double local_v[3]); // TODO: TBD if still useful once we implement Hertz. There might be a cheaper way with the rotation matrix that we need for contact detection anyway
 
-  // shape function computations
-  // TODO Jacopo: Let's make sure we only have one set of such functions and that they are optimized. Deleted some but cannot do all without messing with your code.
-  void shape_function_local(const double *shape, const double *block, const double *quat, const double *point, double local_f);
-  void shape_function_local_grad(const double *shape, const double *block, const double *quat, const double *point, double *local_grad);
-  void shape_function_local_hessian(const double *shape, const double *block, const double *quat, const double *point, double local_hessian[3][3]);
-
   inline double det4_M44_zero(const double m[4][4]);
 
   // 4 by 4 sytems solvers, they all overwrite b with the solution
@@ -57,8 +51,8 @@ namespace MathExtraSuperellipsoids {
   inline int determine_contact_point_wall(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                                         const double* x_wall, const double* n_wall, double* X0, double* nij, double* overlap);
 
-  // Jibril's versions of the functions for contact detection
-  // TODO @Jacopo: you might need to add a function that only computed the shape, or shape+grad if still needed in overlap / curvature calculation after answering the other TODOs I left
+  
+  // shape function computations, using flag to optimize for special cases (ellipsoid, superquadric with n1=n2)
   double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]);
@@ -74,17 +68,16 @@ namespace MathExtraSuperellipsoids {
                               const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
                               double* X0, double* nij);
 
-  // TODO: Jacopo the global function is never used. Can we delete? Is this duplicating the `stable_shape_and_gradient` methods?
-  double regularized_shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]);
   void apply_regularization_shape_function(double n1, double *value, double *grad, double hess[3][3]);
-  // functions to compute shape function and gradient only when called for newton method
-  // TODO: rename those. I don't think `stable` is a good terminolgy here. Maybe "..._local_superquad_surfacesearch", or "modified_shape_..."" TBD
-  double stable_shape_and_gradient_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad);
-  double stable_shape_and_gradient_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad);
-  double stable_shape_and_gradient_local_ellipsoid(const double* xlocal, const double* shape, double* grad);
-
+  // functions to compute shape function and gradient only when called for surface point calculation given contact point
+  double shape_and_gradient_local_superquad_surfacesearch(const double* xlocal, const double* shape, const double* block, double* grad);
+  double shape_and_gradient_local_n1equaln2_surfacesearch(const double* xlocal, const double* shape, const double n, double* grad);
+  
   double compute_overlap_distance(const double* shape, const double* block, const double Rot[3][3], const int flag, const double* global_point, const double* global_normal, const double* center);
   
+  double mean_curvature_superellipsoid(const double *shape, const double *block, const int flag, const double R[3][3], const double *surf_global_point, const double *xc);
+  double gaussian_curvature_superellipsoid(const double *shape, const double *block, const int flag, const double R[3][3], const double *surf_global_point, const double *xc);
+
 };
 
 
@@ -128,7 +121,7 @@ inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
 
 inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4]) {
     
-    // Tikhonov regularization
+    // Tikhonov regularization (avoiding the constraint on the last row)
     // High blockiness grains can have zero curvature / singular Hessian
     // along principal local axes (x=0, y=0, z=0)
     const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10]);
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index b84ec91f77d..6a99461f3fe 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -305,6 +305,10 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
         overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]); // TODO: Jibril: I wonder if we'd get the correct, but negative overlap if we picked nji, which might be cheaper than computing nji
 
+        // TODO: for the hertzian contact pass the surface points directly to the 
+        // curvature calculations. Need to add the normal scaled by the overlap to the contact point
+
+
         // branch vectors 
         double cr1[3], cr2[3];
         MathExtra::sub3(X0, x[i], cr1);
diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index 812d71e4169..5d7ca35e964 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -158,6 +158,6 @@ endif()
 # Extra math tests for superellipsoids
 if(PKG_ASPHERE AND PKG_GRANULAR)
   add_executable(test_math_extra_superellipsoids test_math_extra_superellipsoids.cpp)
-  target_link_libraries(test_math_extra_superellipsoids PRIVATE lammps GTest::GMockMain)
+  target_link_libraries(test_math_extra_superellipsoids PRIVATE lammps GTest::GMockMain ${LAPACK_LIBRARIES})
   add_test(NAME MathExtraSuperellipsoids COMMAND test_math_extra_superellipsoids)
 endif()

From b8ed27ee430d0cc70a2951af896e89d9c8885f05 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Sun, 4 Jan 2026 19:50:22 +0100
Subject: [PATCH 081/174] Added hertz contact implementation in old pair gran
 style. Picked mean curvature as an approximation, but gaussian curvature is
 also an option

---
 .../superellipsoid_gran/in.ellipsoid_gran     |   1 +
 .../pair_gran_hertz_history_ellipsoid.cpp     | 629 ++++++++++++++++++
 .../pair_gran_hertz_history_ellipsoid.h       |  38 ++
 3 files changed, 668 insertions(+)
 create mode 100644 src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
 create mode 100644 src/GRANULAR/pair_gran_hertz_history_ellipsoid.h

diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 551d304bde7..87713b106aa 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -22,6 +22,7 @@ set             type 1 shape 2.0 1.0 1.0
 # set             type 1 block 4.0 4.0
 
 pair_style      gran/hooke/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0
+# pair_style      gran/hertz/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0
 pair_coeff      * *
 
 compute diameter all property/atom shapex shapey shapez
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
new file mode 100644
index 00000000000..633c46efbf7
--- /dev/null
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -0,0 +1,629 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+
+#include "pair_gran_hertz_history_ellipsoid.h"
+
+#include "atom.h"
+#include "atom_vec_ellipsoid.h"
+#include "comm.h"
+#include "error.h"
+#include "fix.h"
+#include "fix_dummy.h"
+#include "fix_neigh_history.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+#include "math_extra.h" // probably needed for some computations
+#include "math_extra_superellipsoids.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+
+static constexpr int NUMSTEP_INITIAL_GUESS = 8;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHertzHistoryEllipsoid::PairGranHertzHistoryEllipsoid(LAMMPS *lmp) : 
+PairGranHookeHistoryEllipsoid(lmp) {}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
+{
+  int i, j, ii, jj, inum, jnum;
+  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
+  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double wr1, wr2, wr3;
+  double vtr1, vtr2, vtr3, vrel;
+  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
+  double fn, fs, fs1, fs2, fs3;
+  double shrmag, rsht, polyhertz;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  int *touch, **firsttouch;
+  double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
+
+  double shapex, shapey, shapez; // ellipsoid shape params
+  double quat1, quat2, quat3, quat4;
+  double block1, block2;
+
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1, overlap2, omegai[3], omegaj[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+
+  ev_init(eflag, vflag);
+
+  int shearupdate = 1;
+  if (update->setupflag) shearupdate = 0;
+
+  // update rigid body info for owned & ghost atoms if using FixRigid masses
+  // body[i] = which body atom I is in, -1 if none
+  // mass_body = mass of each rigid body
+
+  if (fix_rigid && neighbor->ago == 0) {
+    int tmp;
+    int *body = (int *) fix_rigid->extract("body", tmp);
+    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
+    if (atom->nmax > nmax) {
+      memory->destroy(mass_rigid);
+      nmax = atom->nmax;
+      memory->create(mass_rigid, nmax, "pair:mass_rigid");
+    }
+    int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++)
+      if (body[i] >= 0)
+        mass_rigid[i] = mass_body[body[i]];
+      else
+        mass_rigid[i] = 0.0;
+    comm->forward_comm(this);
+  }
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **angmom = atom->angmom;
+  double **torque = atom->torque;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+  double *special_lj = force->special_lj;
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = fix_history->firstflag;
+  firsthistory = fix_history->firstvalue;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+
+
+    touch = firsttouch[i];
+    allhistory = firsthistory[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      if (factor_lj == 0) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      X0_prev = &allhistory[3 + size_history * jj];
+
+      // TODO: Below could be a `touch()` function
+      bool touching;
+      if (rsq >= radsum * radsum) {
+        touching = false;
+      } else {
+        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+        bool skip_contact_detection(false);
+        if(bounding_box) {
+          separating_axis = &allhistory[7 + size_history * jj];
+          skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+                                       x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
+        }
+        if (skip_contact_detection)
+          touching = false;
+        else {
+          // Super-ellipsoid contact detection between atoms i and j
+          flagi = bonus[ellipsoid[i]].type;
+          flagj = bonus[ellipsoid[j]].type;
+          if (touch[jj] == 1) {
+            // Continued contact: use grain true shape and last contact point
+            // TODO: move contact point with rigid body motion of the pair ?
+            //       not sure if enough information to do that
+            X0[0] = X0_prev[0];
+            X0[1] = X0_prev[1];
+            X0[2] = X0_prev[2];
+            X0[3] = X0_prev[3];
+            int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
+                                                                           x[j], Rj, shapej, blockj, flagj,
+                                                                           X0, nij);
+            if (status == 0)
+              touching = true;
+            else if(status == 1)
+              touching = false;
+            else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
+              error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+          } else {
+            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
+
+            // TODO: there might be better heuristic for the "volume equivalent spheres" suggested in the paper
+            //       but this is good enough. We might even be able to use radi and radj which is cheaper, TBD when testing
+            //       If we pick a small radius, we could guaranteed to start outise the grains, would that be better for the Newton?
+            //       If we pick a large radius (e.g. radi, radj) we are more likely to start inside the grains, is this an easier minimization landscape to navigate?
+            //       I don't think there is a general answer because we don't know the shape, and contact point may be far from spherical initial guess
+            //       This makes me think using radi and radj could be fine! To be investigated
+            //       MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+
+            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+            X0[3] = reqj / reqi; // Lagrange multiplier mu^2
+            for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+              shapei[0] = shapei[1] = shapei[2] = reqi;
+              shapej[0] = shapej[1] = shapej[2] = reqj;
+              MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+              MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+              // force ellipsoid flag for first initial guess iteration.
+              // Avoid incorrect values of n1/n2 -1 in derivatives.
+              int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? 0 : flagi,
+                                                                             x[j], Rj, shapej, blockj, iter_ig == 1 ? 0 : flagj,
+                                                                             X0, nij);
+              if (status == 0)
+                touching = true;
+              else if(status == 1)
+                touching = false;
+              else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
+                error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+            }
+          }
+        }
+      }
+
+
+      if (!touching) {
+        // unset non-touching neighbors
+
+        touch[jj] = 0;
+        history = &allhistory[size_history * jj];
+        for (int k = 0; k < size_history; k++) history[k] = 0.0;
+      } else {
+        X0_prev[0] = X0[0];
+        X0_prev[1] = X0[1];
+        X0_prev[2] = X0[2];
+        X0_prev[3] = X0[3];
+
+        double nji[3] = { -nij[0], -nij[1], -nij[2] };
+        // compute overlap depth along normal direction for each grain
+        // overlap is positive for both grains
+        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]); // TODO: Jibril: I wonder if we'd get the correct, but negative overlap if we picked nji, which might be cheaper than computing nji
+
+        // TODO: for the hertzian contact pass the surface points directly to the 
+        // curvature calculations. Need to add the normal scaled by the overlap to the contact point
+        double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
+        
+        for (int dim = 0; dim < 3; dim++) {
+          surf_point_i[dim] = X0[dim] + nij[dim] * overlap1;
+          surf_point_j[dim] = X0[dim] - nij[dim] * overlap2;
+        }
+
+        curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+        curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        polyhertz =  sqrt( (overlap1+overlap2) / (curvature_i + curvature_j)); // hertzian contact radius approximation
+        
+        // branch vectors 
+        double cr1[3], cr2[3];
+        MathExtra::sub3(X0, x[i], cr1);
+        MathExtra::sub3(X0, x[j], cr2);
+
+        // we need to take the cross product of omega
+
+        double ex_space[3],ey_space[3],ez_space[3];
+        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat,ex_space,ey_space,ez_space);
+        MathExtra::angmom_to_omega(angmom[i],ex_space,ey_space,ez_space,
+                                   bonus[ellipsoid[i]].inertia,omegai);
+        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat,ex_space,ey_space,ez_space);
+        MathExtra::angmom_to_omega(angmom[j],ex_space,ey_space,ez_space,
+                                   bonus[ellipsoid[j]].inertia,omegaj);
+
+        double omega_cross_r1[3], omega_cross_r2[3];
+        MathExtra::cross3(omegai, cr1, omega_cross_r1);
+        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+        // relative translational velocity 
+        // compute directly the sum of relative translational velocity at contact point
+        // since rotational velocity contribution is different for superellipsoids
+        double cv1[3], cv2[3];
+
+        cv1[0] = v[i][0] + omega_cross_r1[0];
+        cv1[1] = v[i][1] + omega_cross_r1[1];
+        cv1[2] = v[i][2] + omega_cross_r1[2];
+
+        cv2[0] = v[j][0] + omega_cross_r2[0];
+        cv2[1] = v[j][1] + omega_cross_r2[1];
+        cv2[2] = v[j][2] + omega_cross_r2[2];
+
+        // total relavtive velocity at contact point
+        vr1 = cv1[0] - cv2[0];
+        vr2 = cv1[1] - cv2[1];
+        vr3 = cv1[2] - cv2[2];
+
+        // normal component
+
+        vn1 = nij[0] * vr1; // dot product 
+        vn2 = nij[1] * vr2;
+        vn3 = nij[2] * vr3;
+
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitu
+
+        // tangential component
+
+        vtr1 = vr1 - vnnr * nij[0];
+        vtr2 = vr2 - vnnr * nij[1];
+        vtr3 = vr3 - vnnr * nij[2];
+
+        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+        vrel = sqrt(vrel);
+
+        // meff = effective mass of pair of particles
+        // if I or J part of rigid body, use body mass
+        // if I or J is frozen, meff is other particle
+
+        mi = rmass[i]; // JB I assume this is the mass of particle i, need to check
+        mj = rmass[j];
+        if (fix_rigid) {
+          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+        }
+
+        meff = mi * mj / (mi + mj);
+        if (mask[i] & freeze_group_bit) meff = mj;
+        if (mask[j] & freeze_group_bit) meff = mi;
+
+        // normal forces = Hookian contact + normal velocity damping
+
+        damp = meff * gamman * vnnr;
+        ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
+        ccel *= polyhertz;
+        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+        // shear history effects
+
+        touch[jj] = 1;
+        shear = &allhistory[size_history * jj];
+
+        if (shearupdate) {
+          shear[0] += vtr1 * dt;
+          shear[1] += vtr2 * dt;
+          shear[2] += vtr3 * dt;
+        }
+        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+        if (shearupdate) {
+
+          // rotate shear displacements
+
+          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
+          shear[0] -= rsht * nij[0];
+          shear[1] -= rsht * nij[1];
+          shear[2] -= rsht * nij[2];
+        }
+
+        // tangential forces = shear + tangential velocity damping
+
+        fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
+        fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
+        fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
+
+        // rescale frictional displacements and forces if needed
+
+        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+        fn = xmu * fabs(ccel);
+
+        if (fs > fn) {
+          if (shrmag != 0.0) {
+            shear[0] =
+                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
+            shear[1] =
+                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
+            shear[2] =
+                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
+            fs1 *= fn / fs;
+            fs2 *= fn / fs;
+            fs3 *= fn / fs;
+          } else
+            fs1 = fs2 = fs3 = 0.0;
+        }
+
+        // forces & torques
+
+        fx = -nij[0] * ccel + fs1;
+        fy = -nij[1] * ccel + fs2;
+        fz = -nij[2] * ccel + fs3;
+        fx *= factor_lj; // I think factor lj is just 1 except for special bonds
+        fy *= factor_lj;
+        fz *= factor_lj;
+        f[i][0] += fx;
+        f[i][1] += fy;
+        f[i][2] += fz;
+
+        // torques are cross prodcuts of branch vector with the entire force at contact point
+
+        tor1 = cr1[1] * fz - cr1[2] * fy;
+        tor2 = cr1[2] * fx - cr1[0] * fz;
+        tor3 = cr1[0] * fy - cr1[1] * fx; 
+
+        tor1 *= factor_lj;
+        tor2 *= factor_lj;
+        tor3 *= factor_lj;
+        torque[i][0] += tor1;
+        torque[i][1] += tor2;
+        torque[i][2] += tor3;
+
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fx;
+          f[j][1] -= fy;
+          f[j][2] -= fz;
+          
+          tor1 = cr2[1] * fz - cr2[2] * fy;
+          tor2 = cr2[2] * fx - cr2[0] * fz;
+          tor3 = cr2[0] * fy - cr2[1] * fx; 
+
+          torque[j][0] -= tor1;
+          torque[j][1] -= tor2;
+          torque[j][2] -= tor3;
+        }
+
+        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // need to check this, it is for virial stress, but needs some tweaking for non-spherical particles
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
+{
+  if (narg != 6 && narg != 7 && narg != 8) error->all(FLERR, "Illegal pair_style command");
+
+  kn = utils::numeric(FLERR, arg[0], false, lmp);
+  if (strcmp(arg[1], "NULL") == 0)
+    kt = kn * 2.0 / 7.0;
+  else
+    kt = utils::numeric(FLERR, arg[1], false, lmp);
+
+  gamman = utils::numeric(FLERR, arg[2], false, lmp);
+  if (strcmp(arg[3], "NULL") == 0)
+    gammat = 0.5 * gamman;
+  else
+    gammat = utils::numeric(FLERR, arg[3], false, lmp);
+
+  xmu = utils::numeric(FLERR, arg[4], false, lmp);
+  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
+  if (dampflag == 0) gammat = 0.0;
+
+  limit_damping = 0;
+  bounding_box = 0;
+  for (int iarg = 6 ; iarg < narg ; iarg++) {
+    if (strcmp(arg[iarg], "limit_damping") == 0)
+      limit_damping = 1;
+    else if (strcmp(arg[iarg], "bounding_box") == 0)
+      bounding_box = 1;
+    else
+      error->all(FLERR, "Illegal pair_style command");
+  }
+
+  if (bounding_box == 0) size_history--;
+
+  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
+      dampflag < 0 || dampflag > 1)
+    error->all(FLERR, "Illegal pair_style command");
+}
+
+/* ---------------------------------------------------------------------- */
+
+// TODO: implement the single() function for the super-ellipsoid
+double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
+                                    double /*factor_coul*/, double /*factor_lj*/, double &fforce)
+{
+  double radi, radj, radsum;
+  double r, rinv, rsqinv, delx, dely, delz;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3, wr1, wr2, wr3;
+  double mi, mj, meff, damp, ccel, polyhertz;
+  double vtr1, vtr2, vtr3, vrel, shrmag;
+  double fs1, fs2, fs3, fs, fn;
+
+  double *radius = atom->radius;
+  radi = radius[i];
+  radj = radius[j];
+  radsum = radi + radj;
+
+  if (rsq >= radsum * radsum) {
+    fforce = 0.0;
+    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+    return 0.0;
+  }
+
+  r = sqrt(rsq);
+  rinv = 1.0 / r;
+  rsqinv = 1.0 / rsq;
+
+  // relative translational velocity
+
+  double **v = atom->v;
+  vr1 = v[i][0] - v[j][0];
+  vr2 = v[i][1] - v[j][1];
+  vr3 = v[i][2] - v[j][2];
+
+  // normal component
+
+  double **x = atom->x;
+  delx = x[i][0] - x[j][0];
+  dely = x[i][1] - x[j][1];
+  delz = x[i][2] - x[j][2];
+
+  vnnr = vr1 * delx + vr2 * dely + vr3 * delz;
+  vn1 = delx * vnnr * rsqinv;
+  vn2 = dely * vnnr * rsqinv;
+  vn3 = delz * vnnr * rsqinv;
+
+  // tangential component
+
+  vt1 = vr1 - vn1;
+  vt2 = vr2 - vn2;
+  vt3 = vr3 - vn3;
+
+  // relative rotational velocity
+
+  double **omega = atom->omega;
+  wr1 = (radi * omega[i][0] + radj * omega[j][0]) * rinv;
+  wr2 = (radi * omega[i][1] + radj * omega[j][1]) * rinv;
+  wr3 = (radi * omega[i][2] + radj * omega[j][2]) * rinv;
+
+  // meff = effective mass of pair of particles
+  // if I or J part of rigid body, use body mass
+  // if I or J is frozen, meff is other particle
+
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+
+  mi = rmass[i];
+  mj = rmass[j];
+  if (fix_rigid) {
+    // NOTE: ensure mass_rigid is current for owned+ghost atoms?
+    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+  }
+
+  meff = mi * mj / (mi + mj);
+  if (mask[i] & freeze_group_bit) meff = mj;
+  if (mask[j] & freeze_group_bit) meff = mi;
+
+  // normal forces = Hookian contact + normal velocity damping
+
+  damp = meff * gamman * vnnr * rsqinv;
+  ccel = kn * (radsum - r) * rinv - damp;
+  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+  // relative velocities
+
+  vtr1 = vt1 - (delz * wr2 - dely * wr3);
+  vtr2 = vt2 - (delx * wr3 - delz * wr1);
+  vtr3 = vt3 - (dely * wr1 - delx * wr2);
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
+
+  // shear history effects
+  // neighprev = index of found neigh on previous call
+  // search entire jnum list of neighbors of I for neighbor J
+  // start from neighprev, since will typically be next neighbor
+  // reset neighprev to 0 as necessary
+
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+  double *allshear = fix_history->firstvalue[i];
+
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+
+  double *shear = &allshear[3 * neighprev];
+  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+  // tangential forces = shear + tangential velocity damping
+
+  fs1 = -(kt * shear[0] + meff * gammat * vtr1);
+  fs2 = -(kt * shear[1] + meff * gammat * vtr2);
+  fs3 = -(kt * shear[2] + meff * gammat * vtr3);
+
+  // rescale frictional displacements and forces if needed
+
+  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+  fn = xmu * fabs(ccel * r);
+
+  if (fs > fn) {
+    if (shrmag != 0.0) {
+      fs1 *= fn / fs;
+      fs2 *= fn / fs;
+      fs3 *= fn / fs;
+      fs *= fn / fs;
+    } else
+      fs1 = fs2 = fs3 = fs = 0.0;
+  }
+
+  // set force and return no energy
+
+  fforce = ccel;
+
+  // set single_extra quantities
+
+  svector[0] = fs1;
+  svector[1] = fs2;
+  svector[2] = fs3;
+  svector[3] = fs;
+  svector[4] = vn1;
+  svector[5] = vn2;
+  svector[6] = vn3;
+  svector[7] = vt1;
+  svector[8] = vt2;
+  svector[9] = vt3;
+
+  return 0.0;
+}
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
new file mode 100644
index 00000000000..f644ac515b6
--- /dev/null
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
@@ -0,0 +1,38 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(gran/hertz/history/ellipsoid,PairGranHertzHistoryEllipsoid);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
+#define LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
+
+#include "pair_gran_hooke_history_ellipsoid.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
+ public:
+  PairGranHertzHistoryEllipsoid(class LAMMPS *);
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  double single(int, int, int, int, double, double, double, double &) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif

From 1bf5c2e6ce6d25ffcaaff033188d1894007a3533 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 4 Jan 2026 19:47:27 -0600
Subject: [PATCH 082/174] cleanup pair hertz + minor edits

---
 .../pair_gran_hertz_history_ellipsoid.cpp     | 32 +++++++++----------
 .../pair_gran_hooke_history_ellipsoid.cpp     | 20 +++++-------
 2 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 633c46efbf7..c09b5dd215d 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -215,9 +215,9 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
               blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
 
               // force ellipsoid flag for first initial guess iteration.
-              // Avoid incorrect values of n1/n2 -1 in derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? 0 : flagi,
-                                                                             x[j], Rj, shapej, blockj, iter_ig == 1 ? 0 : flagj,
+              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+              int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+                                                                             x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
                                                                              X0, nij);
               if (status == 0)
                 touching = true;
@@ -247,16 +247,11 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         // compute overlap depth along normal direction for each grain
         // overlap is positive for both grains
         overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]); // TODO: Jibril: I wonder if we'd get the correct, but negative overlap if we picked nji, which might be cheaper than computing nji
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
 
-        // TODO: for the hertzian contact pass the surface points directly to the 
-        // curvature calculations. Need to add the normal scaled by the overlap to the contact point
         double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
-        
-        for (int dim = 0; dim < 3; dim++) {
-          surf_point_i[dim] = X0[dim] + nij[dim] * overlap1;
-          surf_point_j[dim] = X0[dim] - nij[dim] * overlap2;
-        }
+        MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
+        MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
 
         curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri, surf_point_i, x[i]);
         curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj, surf_point_j, x[j]);
@@ -320,7 +315,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         // if I or J part of rigid body, use body mass
         // if I or J is frozen, meff is other particle
 
-        mi = rmass[i]; // JB I assume this is the mass of particle i, need to check
+        mi = rmass[i];
         mj = rmass[j];
         if (fix_rigid) {
           if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
@@ -331,7 +326,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         if (mask[i] & freeze_group_bit) meff = mj;
         if (mask[j] & freeze_group_bit) meff = mi;
 
-        // normal forces = Hookian contact + normal velocity damping
+        // normal forces = Hertzian contact + normal velocity damping
 
         damp = meff * gamman * vnnr;
         ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
@@ -388,9 +383,9 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
 
         // forces & torques
 
-        fx = -nij[0] * ccel + fs1;
-        fy = -nij[1] * ccel + fs2;
-        fz = -nij[2] * ccel + fs3;
+        fx = nji[0] * ccel + fs1;
+        fy = nji[1] * ccel + fs2;
+        fz = nji[2] * ccel + fs3;
         fx *= factor_lj; // I think factor lj is just 1 except for special bonds
         fy *= factor_lj;
         fz *= factor_lj;
@@ -473,6 +468,11 @@ void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
   if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
       dampflag < 0 || dampflag > 1)
     error->all(FLERR, "Illegal pair_style command");
+
+  // convert Kn and Kt from pressure units to force/distance^2
+
+  kn /= force->nktv2p;
+  kt /= force->nktv2p;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 6a99461f3fe..26c1fd9f95e 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -271,9 +271,9 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
               blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
 
               // force ellipsoid flag for first initial guess iteration.
-              // Avoid incorrect values of n1/n2 -1 in derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? 0 : flagi,
-                                                                             x[j], Rj, shapej, blockj, iter_ig == 1 ? 0 : flagj,
+              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+              int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+                                                                             x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
                                                                              X0, nij);
               if (status == 0)
                 touching = true;
@@ -303,11 +303,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // compute overlap depth along normal direction for each grain
         // overlap is positive for both grains
         overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]); // TODO: Jibril: I wonder if we'd get the correct, but negative overlap if we picked nji, which might be cheaper than computing nji
-
-        // TODO: for the hertzian contact pass the surface points directly to the 
-        // curvature calculations. Need to add the normal scaled by the overlap to the contact point
-
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
 
         // branch vectors 
         double cr1[3], cr2[3];
@@ -367,7 +363,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // if I or J part of rigid body, use body mass
         // if I or J is frozen, meff is other particle
 
-        mi = rmass[i]; // JB I assume this is the mass of particle i, need to check
+        mi = rmass[i];
         mj = rmass[j];
         if (fix_rigid) {
           if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
@@ -434,9 +430,9 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
         // forces & torques
 
-        fx = -nij[0] * ccel + fs1;
-        fy = -nij[1] * ccel + fs2;
-        fz = -nij[2] * ccel + fs3;
+        fx = nji[0] * ccel + fs1;
+        fy = nji[1] * ccel + fs2;
+        fz = nji[2] * ccel + fs3;
         fx *= factor_lj; // I think factor lj is just 1 except for special bonds
         fy *= factor_lj;
         fz *= factor_lj;

From 8c0a47cf2ba93ec0090d7e2b2c4cd5ab2d6f3f0b Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 4 Jan 2026 22:27:08 -0600
Subject: [PATCH 083/174] Possible bug in the legacy pair style single function
 for relative tangent velocity written as TODO

---
 src/GRANULAR/pair_gran_hertz_history.cpp | 5 +++++
 src/GRANULAR/pair_gran_hooke_history.cpp | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/src/GRANULAR/pair_gran_hertz_history.cpp b/src/GRANULAR/pair_gran_hertz_history.cpp
index 1cd7b9444e0..8037bb52a8a 100644
--- a/src/GRANULAR/pair_gran_hertz_history.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history.cpp
@@ -475,6 +475,11 @@ double PairGranHertzHistory::single(int i, int j, int /*itype*/, int /*jtype*/,
   svector[7] = vt1;
   svector[8] = vt2;
   svector[9] = vt3;
+  // TODO to LAMMPS:
+  // doc says The last 3 (8-10) the components of the relative velocity in the tangential direction
+  // `vt` is the relative translational velocity only, i.e., it ignores the angular velocity.
+  // the total relative tangent velocity should be `vtr`.
+  // Should that be corrected? That would break backward compatibility, and this is "legacy code" anyway
 
   return 0.0;
 }
diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp
index 60e6edf4d9d..0679be9173c 100644
--- a/src/GRANULAR/pair_gran_hooke_history.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history.cpp
@@ -768,6 +768,11 @@ double PairGranHookeHistory::single(int i, int j, int /*itype*/, int /*jtype*/,
   svector[7] = vt1;
   svector[8] = vt2;
   svector[9] = vt3;
+  // TODO to LAMMPS:
+  // doc says The last 3 (8-10) the components of the relative velocity in the tangential direction
+  // `vt` is the relative translational velocity only, i.e., it ignores the angular velocity.
+  // the total relative tangent velocity should be `vtr`.
+  // Should that be corrected? That would break backward compatibility, and this is "legacy code" anyway
 
   return 0.0;
 }

From 032da470d3cd9ab5d2018c6160060718556165bf Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 4 Jan 2026 22:29:00 -0600
Subject: [PATCH 084/174] minor form fixes and clarified TODOs

---
 .../pair_gran_hertz_history_ellipsoid.cpp         |  6 +++---
 .../pair_gran_hooke_history_ellipsoid.cpp         | 15 +++++----------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index c09b5dd215d..f6cc5a15987 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -184,7 +184,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
                                                                            X0, nij);
             if (status == 0)
               touching = true;
-            else if(status == 1)
+            else if (status == 1)
               touching = false;
             else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
               error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
@@ -221,7 +221,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
                                                                              X0, nij);
               if (status == 0)
                 touching = true;
-              else if(status == 1)
+              else if (status == 1)
                 touching = false;
               else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
                 error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
@@ -420,7 +420,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
           torque[j][2] -= tor3;
         }
 
-        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // need to check this, it is for virial stress, but needs some tweaking for non-spherical particles
+        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // TODO: is this correct for non-spherical particles? I don't think so, how to correct it?
       }
     }
   }
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 26c1fd9f95e..a2a15b6a8ef 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -106,8 +106,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   int i, j, ii, jj, inum, jnum;
   double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
   double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double wr1, wr2, wr3;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3;
   double vtr1, vtr2, vtr3, vrel;
   double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
   double fn, fs, fs1, fs2, fs3;
@@ -116,10 +115,6 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   int *touch, **firsttouch;
   double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
 
-  double shapex, shapey, shapez; // ellipsoid shape params
-  double quat1, quat2, quat3, quat4;
-  double block1, block2;
-
   double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1, overlap2, omegai[3], omegaj[3];
   AtomVecEllipsoid::BlockType flagi, flagj;
 
@@ -216,7 +211,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
         MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
         bool skip_contact_detection(false);
-        if(bounding_box) {
+        if (bounding_box) {
           separating_axis = &allhistory[7 + size_history * jj];
           skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
                                        x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
@@ -240,7 +235,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
                                                                            X0, nij);
             if (status == 0)
               touching = true;
-            else if(status == 1)
+            else if (status == 1)
               touching = false;
             else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
               error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
@@ -277,7 +272,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
                                                                              X0, nij);
               if (status == 0)
                 touching = true;
-              else if(status == 1)
+              else if (status == 1)
                 touching = false;
               else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
                 error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
@@ -467,7 +462,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           torque[j][2] -= tor3;
         }
 
-        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // need to check this, it is for virial stress, but needs some tweaking for non-spherical particles
+        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // TODO: is this correct for non-spherical particles? I don't think so, how to correct it?
       }
     }
   }

From 5549e9ee78277fee5d1c3ab39cd9f1c857776a49 Mon Sep 17 00:00:00 2001
From: jibril-b-coulibaly <jibril.coulibaly@gmail.com>
Date: Sun, 4 Jan 2026 22:30:29 -0600
Subject: [PATCH 085/174] implement single() function for pair hooke ellipsoid

---
 .../pair_gran_hooke_history_ellipsoid.cpp     | 208 +++++++++++++-----
 1 file changed, 147 insertions(+), 61 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index a2a15b6a8ef..f87bd8fb086 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -758,11 +758,10 @@ void PairGranHookeHistoryEllipsoid::reset_dt()
 
 // TODO: implement the single() function for the super-ellipsoid
 double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                    double /*factor_coul*/, double /*factor_lj*/, double &fforce)
+                                             double /*factor_coul*/, double /*factor_lj*/, double &fforce)
 {
   double radi, radj, radsum;
-  double r, rinv, rsqinv, delx, dely, delz;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3, wr1, wr2, wr3;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
   double mi, mj, meff, damp, ccel;
   double vtr1, vtr2, vtr3, vrel, shrmag;
   double fs1, fs2, fs3, fs, fn;
@@ -772,59 +771,169 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   radj = radius[j];
   radsum = radi + radj;
 
+  double **x = atom->x;
+
+  // history effects
+  // neighprev = index of found neigh on previous call
+  // search entire jnum list of neighbors of I for neighbor J
+  // start from neighprev, since will typically be next neighbor
+  // reset neighprev to 0 as necessary
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+  int *touch = fix_history->firstflag[i];
+  double *allhistory = fix_history->firstvalue[i];
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+
   if (rsq >= radsum * radsum) {
     fforce = 0.0;
     for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
     return 0.0;
   }
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
+  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+  if (bounding_box) {
+    double separating_axis = allhistory[7 + size_history * neighprev]; // Copy: no update of history in single
+    bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+                                      x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
+    if (no_bouding_box_contact) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+  }
+  // Super-ellipsoid contact detection between atoms i and j
+  double X0[4], nij[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+  flagi = bonus[ellipsoid[i]].type;
+  flagj = bonus[ellipsoid[j]].type;
+  double* X0_prev = &allhistory[3 + size_history * neighprev];
+  if (touch[neighprev] == 1) {
+    // Continued contact: use grain true shape and last contact point
+    X0[0] = X0_prev[0];
+    X0[1] = X0_prev[1];
+    X0[2] = X0_prev[2];
+    X0[3] = X0_prev[3];
+    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
+                                                                   x[j], Rj, shapej, blockj, flagj,
+                                                                   X0, nij);
+    if (status == 1) {
+        fforce = 0.0;
+        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+        return 0.0;
+    }
+    if (status != 0)
+        error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+  } else {
+    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+    X0[3] = reqj / reqi; // Lagrange multiplier mu^2
+    for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+      shapei[0] = shapei[1] = shapei[2] = reqi;
+      shapej[0] = shapej[1] = shapej[2] = reqj;
+      MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+      // force ellipsoid flag for first initial guess iteration.
+      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+      int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+                                                                     x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+                                                                     X0, nij);
+      if (status == 1) {
+        fforce = 0.0;
+        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+        return 0.0;
+      }
+      if (status != 0)
+        error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+    }
+  }
+  double overlap1, overlap2, omegai[3], omegaj[3];
+  double nji[3] = { -nij[0], -nij[1], -nij[2] };
+  overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+  overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+
+  double cr1[3], cr2[3];
+  MathExtra::sub3(X0, x[i], cr1);
+  MathExtra::sub3(X0, x[j], cr2);
+
+  double ex_space[3],ey_space[3],ez_space[3];
+  double **angmom = atom->angmom;
+  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat,ex_space,ey_space,ez_space);
+  MathExtra::angmom_to_omega(angmom[i],ex_space,ey_space,ez_space,
+                             bonus[ellipsoid[i]].inertia,omegai);
+  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat,ex_space,ey_space,ez_space);
+  MathExtra::angmom_to_omega(angmom[j],ex_space,ey_space,ez_space,
+                             bonus[ellipsoid[j]].inertia,omegaj);
 
-  r = sqrt(rsq);
-  rinv = 1.0 / r;
-  rsqinv = 1.0 / rsq;
+  double omega_cross_r1[3], omega_cross_r2[3];
+  MathExtra::cross3(omegai, cr1, omega_cross_r1);
+  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
 
   // relative translational velocity
+  // compute directly the sum of relative translational velocity at contact point
+  // since rotational velocity contribution is different for superellipsoids
 
   double **v = atom->v;
-  vr1 = v[i][0] - v[j][0];
-  vr2 = v[i][1] - v[j][1];
-  vr3 = v[i][2] - v[j][2];
+  double cv1[3], cv2[3];
+
+  cv1[0] = v[i][0] + omega_cross_r1[0];
+  cv1[1] = v[i][1] + omega_cross_r1[1];
+  cv1[2] = v[i][2] + omega_cross_r1[2];
+
+  cv2[0] = v[j][0] + omega_cross_r2[0];
+  cv2[1] = v[j][1] + omega_cross_r2[1];
+  cv2[2] = v[j][2] + omega_cross_r2[2];
+
+  // total relavtive velocity at contact point
+
+  vr1 = cv1[0] - cv2[0];
+  vr2 = cv1[1] - cv2[1];
+  vr3 = cv1[2] - cv2[2];
 
   // normal component
 
-  double **x = atom->x;
-  delx = x[i][0] - x[j][0];
-  dely = x[i][1] - x[j][1];
-  delz = x[i][2] - x[j][2];
+  vn1 = nij[0] * vr1; // dot product
+  vn2 = nij[1] * vr2;
+  vn3 = nij[2] * vr3;
 
-  vnnr = vr1 * delx + vr2 * dely + vr3 * delz;
-  vn1 = delx * vnnr * rsqinv;
-  vn2 = dely * vnnr * rsqinv;
-  vn3 = delz * vnnr * rsqinv;
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitu
 
   // tangential component
 
-  vt1 = vr1 - vn1;
-  vt2 = vr2 - vn2;
-  vt3 = vr3 - vn3;
-
-  // relative rotational velocity
+  vtr1 = vr1 - vnnr * nij[0];
+  vtr2 = vr2 - vnnr * nij[1];
+  vtr3 = vr3 - vnnr * nij[2];
 
-  double **omega = atom->omega;
-  wr1 = (radi * omega[i][0] + radj * omega[j][0]) * rinv;
-  wr2 = (radi * omega[i][1] + radj * omega[j][1]) * rinv;
-  wr3 = (radi * omega[i][2] + radj * omega[j][2]) * rinv;
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
 
   // meff = effective mass of pair of particles
   // if I or J part of rigid body, use body mass
   // if I or J is frozen, meff is other particle
-
   double *rmass = atom->rmass;
   int *mask = atom->mask;
 
   mi = rmass[i];
   mj = rmass[j];
   if (fix_rigid) {
-    // NOTE: ensure mass_rigid is current for owned+ghost atoms?
     if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
     if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
   }
@@ -835,35 +944,12 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
 
   // normal forces = Hookian contact + normal velocity damping
 
-  damp = meff * gamman * vnnr * rsqinv;
-  ccel = kn * (radsum - r) * rinv - damp;
+  damp = meff * gamman * vnnr;
+  ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
   if (limit_damping && (ccel < 0.0)) ccel = 0.0;
 
-  // relative velocities
-
-  vtr1 = vt1 - (delz * wr2 - dely * wr3);
-  vtr2 = vt2 - (delx * wr3 - delz * wr1);
-  vtr3 = vt3 - (dely * wr1 - delx * wr2);
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
-
-  // shear history effects
-  // neighprev = index of found neigh on previous call
-  // search entire jnum list of neighbors of I for neighbor J
-  // start from neighprev, since will typically be next neighbor
-  // reset neighprev to 0 as necessary
-
-  int jnum = list->numneigh[i];
-  int *jlist = list->firstneigh[i];
-  double *allshear = fix_history->firstvalue[i];
-
-  for (int jj = 0; jj < jnum; jj++) {
-    neighprev++;
-    if (neighprev >= jnum) neighprev = 0;
-    if (jlist[neighprev] == j) break;
-  }
 
-  double *shear = &allshear[3 * neighprev];
+  double *shear = &allhistory[size_history * neighprev];
   shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
 
   // tangential forces = shear + tangential velocity damping
@@ -875,7 +961,7 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   // rescale frictional displacements and forces if needed
 
   fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  fn = xmu * fabs(ccel * r);
+  fn = xmu * fabs(ccel);
 
   if (fs > fn) {
     if (shrmag != 0.0) {
@@ -884,12 +970,12 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
       fs3 *= fn / fs;
       fs *= fn / fs;
     } else
-      fs1 = fs2 = fs3 = fs = 0.0;
+      fs1 = fs2 = fs3 = 0.0;
   }
 
-  // set force and return no energy
+  // set force (normalized by r) and return no energy
 
-  fforce = ccel;
+  fforce = ccel / sqrt(rsq);
 
   // set single_extra quantities
 
@@ -900,9 +986,9 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   svector[4] = vn1;
   svector[5] = vn2;
   svector[6] = vn3;
-  svector[7] = vt1;
-  svector[8] = vt2;
-  svector[9] = vt3;
+  svector[7] = vtr1;
+  svector[8] = vtr2;
+  svector[9] = vtr3;
 
   return 0.0;
 }

From 2a0c8fe2e144f7e09a9eb8865c0ccc9b19cb8b1d Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 7 Jan 2026 11:25:27 +0100
Subject: [PATCH 086/174] implement single() function for pair hertz ellipsoid.
 Not sure it is very useful since it does not compute the torque

---
 src/ASPHERE/math_extra_superellipsoids.cpp    |   2 +-
 .../pair_gran_hertz_history_ellipsoid.cpp     | 233 ++++++++++++------
 .../pair_gran_hooke_history_ellipsoid.cpp     |   3 +-
 3 files changed, 165 insertions(+), 73 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 7cc12ab06e7..f38d9142087 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -138,7 +138,7 @@ void apply_regularization_shape_function(double n1, double *value, double *grad,
   // B = (1/n) * (1/n - 1) * F^(1/n - 2) simplifies to scale_grad * (inv_n1 - 1.0) / F
   double scale_hess_add = scale_grad_hess1 * (inv_n1 - 1.0) / F;
 
-  *value = pow(F, inv_n1) - 1.0; 
+  *value = (F * F_pow_1_n1_m1) - 1.0; // avoid computing pow twice
 
   // hessian update
   for (int i = 0; i < 3; i++) {
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index f6cc5a15987..b6354709cc4 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -300,7 +300,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         vn2 = nij[1] * vr2;
         vn3 = nij[2] * vr3;
 
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitu
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitude
 
         // tangential component
 
@@ -420,7 +420,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
           torque[j][2] -= tor3;
         }
 
-        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // TODO: is this correct for non-spherical particles? I don't think so, how to correct it?
+        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // Correct even for non-spherical particles
       }
     }
   }
@@ -477,15 +477,13 @@ void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
 
 /* ---------------------------------------------------------------------- */
 
-// TODO: implement the single() function for the super-ellipsoid
 double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                    double /*factor_coul*/, double /*factor_lj*/, double &fforce)
+                                             double /*factor_coul*/, double /*factor_lj*/, double &fforce)
 {
   double radi, radj, radsum;
-  double r, rinv, rsqinv, delx, dely, delz;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3, wr1, wr2, wr3;
-  double mi, mj, meff, damp, ccel, polyhertz;
-  double vtr1, vtr2, vtr3, vrel, shrmag;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double mi, mj, meff, damp, ccel;
+  double vtr1, vtr2, vtr3, vrel, shrmag, polyhertz;
   double fs1, fs2, fs3, fs, fn;
 
   double *radius = atom->radius;
@@ -493,59 +491,177 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   radj = radius[j];
   radsum = radi + radj;
 
+  double **x = atom->x;
+
+  // history effects
+  // neighprev = index of found neigh on previous call
+  // search entire jnum list of neighbors of I for neighbor J
+  // start from neighprev, since will typically be next neighbor
+  // reset neighprev to 0 as necessary
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+  int *touch = fix_history->firstflag[i];
+  double *allhistory = fix_history->firstvalue[i];
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+
   if (rsq >= radsum * radsum) {
     fforce = 0.0;
     for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
     return 0.0;
   }
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
+  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+  if (bounding_box) {
+    double separating_axis = allhistory[7 + size_history * neighprev]; // Copy: no update of history in single
+    bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+                                      x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
+    if (no_bouding_box_contact) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+  }
+  // Super-ellipsoid contact detection between atoms i and j
+  double X0[4], nij[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+  flagi = bonus[ellipsoid[i]].type;
+  flagj = bonus[ellipsoid[j]].type;
+  double* X0_prev = &allhistory[3 + size_history * neighprev];
+  if (touch[neighprev] == 1) {
+    // Continued contact: use grain true shape and last contact point
+    X0[0] = X0_prev[0];
+    X0[1] = X0_prev[1];
+    X0[2] = X0_prev[2];
+    X0[3] = X0_prev[3];
+    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
+                                                                   x[j], Rj, shapej, blockj, flagj,
+                                                                   X0, nij);
+    if (status == 1) {
+        fforce = 0.0;
+        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+        return 0.0;
+    }
+    if (status != 0)
+        error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+  } else {
+    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+    X0[3] = reqj / reqi; // Lagrange multiplier mu^2
+    for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+      shapei[0] = shapei[1] = shapei[2] = reqi;
+      shapej[0] = shapej[1] = shapej[2] = reqj;
+      MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+      // force ellipsoid flag for first initial guess iteration.
+      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+      int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+                                                                     x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+                                                                     X0, nij);
+      if (status == 1) {
+        fforce = 0.0;
+        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+        return 0.0;
+      }
+      if (status != 0)
+        error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+    }
+  }
+  double overlap1, overlap2, omegai[3], omegaj[3];
+  double nji[3] = { -nij[0], -nij[1], -nij[2] };
+  overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+  overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+
+  double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
+  MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
+  MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
+
+  curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+  curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+  polyhertz =  sqrt( (overlap1+overlap2) / (curvature_i + curvature_j)); // hertzian contact radius approximation
+  
+  double cr1[3], cr2[3];
+  MathExtra::sub3(X0, x[i], cr1);
+  MathExtra::sub3(X0, x[j], cr2);
+
+  double ex_space[3],ey_space[3],ez_space[3];
+  double **angmom = atom->angmom;
+  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat,ex_space,ey_space,ez_space);
+  MathExtra::angmom_to_omega(angmom[i],ex_space,ey_space,ez_space,
+                             bonus[ellipsoid[i]].inertia,omegai);
+  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat,ex_space,ey_space,ez_space);
+  MathExtra::angmom_to_omega(angmom[j],ex_space,ey_space,ez_space,
+                             bonus[ellipsoid[j]].inertia,omegaj);
 
-  r = sqrt(rsq);
-  rinv = 1.0 / r;
-  rsqinv = 1.0 / rsq;
+  double omega_cross_r1[3], omega_cross_r2[3];
+  MathExtra::cross3(omegai, cr1, omega_cross_r1);
+  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
 
   // relative translational velocity
+  // compute directly the sum of relative translational velocity at contact point
+  // since rotational velocity contribution is different for superellipsoids
 
   double **v = atom->v;
-  vr1 = v[i][0] - v[j][0];
-  vr2 = v[i][1] - v[j][1];
-  vr3 = v[i][2] - v[j][2];
+  double cv1[3], cv2[3];
+
+  cv1[0] = v[i][0] + omega_cross_r1[0];
+  cv1[1] = v[i][1] + omega_cross_r1[1];
+  cv1[2] = v[i][2] + omega_cross_r1[2];
+
+  cv2[0] = v[j][0] + omega_cross_r2[0];
+  cv2[1] = v[j][1] + omega_cross_r2[1];
+  cv2[2] = v[j][2] + omega_cross_r2[2];
+
+  // total relavtive velocity at contact point
+
+  vr1 = cv1[0] - cv2[0];
+  vr2 = cv1[1] - cv2[1];
+  vr3 = cv1[2] - cv2[2];
 
   // normal component
 
-  double **x = atom->x;
-  delx = x[i][0] - x[j][0];
-  dely = x[i][1] - x[j][1];
-  delz = x[i][2] - x[j][2];
+  vn1 = nij[0] * vr1; // dot product
+  vn2 = nij[1] * vr2;
+  vn3 = nij[2] * vr3;
 
-  vnnr = vr1 * delx + vr2 * dely + vr3 * delz;
-  vn1 = delx * vnnr * rsqinv;
-  vn2 = dely * vnnr * rsqinv;
-  vn3 = delz * vnnr * rsqinv;
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitude
 
   // tangential component
 
-  vt1 = vr1 - vn1;
-  vt2 = vr2 - vn2;
-  vt3 = vr3 - vn3;
+  vtr1 = vr1 - vnnr * nij[0];
+  vtr2 = vr2 - vnnr * nij[1];
+  vtr3 = vr3 - vnnr * nij[2];
 
-  // relative rotational velocity
-
-  double **omega = atom->omega;
-  wr1 = (radi * omega[i][0] + radj * omega[j][0]) * rinv;
-  wr2 = (radi * omega[i][1] + radj * omega[j][1]) * rinv;
-  wr3 = (radi * omega[i][2] + radj * omega[j][2]) * rinv;
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
 
   // meff = effective mass of pair of particles
   // if I or J part of rigid body, use body mass
   // if I or J is frozen, meff is other particle
-
   double *rmass = atom->rmass;
   int *mask = atom->mask;
 
   mi = rmass[i];
   mj = rmass[j];
   if (fix_rigid) {
-    // NOTE: ensure mass_rigid is current for owned+ghost atoms?
     if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
     if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
   }
@@ -556,47 +672,24 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
 
   // normal forces = Hookian contact + normal velocity damping
 
-  damp = meff * gamman * vnnr * rsqinv;
-  ccel = kn * (radsum - r) * rinv - damp;
+  damp = meff * gamman * vnnr;
+  ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
+  ccel *= polyhertz;
   if (limit_damping && (ccel < 0.0)) ccel = 0.0;
 
-  // relative velocities
-
-  vtr1 = vt1 - (delz * wr2 - dely * wr3);
-  vtr2 = vt2 - (delx * wr3 - delz * wr1);
-  vtr3 = vt3 - (dely * wr1 - delx * wr2);
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
-
-  // shear history effects
-  // neighprev = index of found neigh on previous call
-  // search entire jnum list of neighbors of I for neighbor J
-  // start from neighprev, since will typically be next neighbor
-  // reset neighprev to 0 as necessary
-
-  int jnum = list->numneigh[i];
-  int *jlist = list->firstneigh[i];
-  double *allshear = fix_history->firstvalue[i];
-
-  for (int jj = 0; jj < jnum; jj++) {
-    neighprev++;
-    if (neighprev >= jnum) neighprev = 0;
-    if (jlist[neighprev] == j) break;
-  }
-
-  double *shear = &allshear[3 * neighprev];
+  double *shear = &allhistory[size_history * neighprev];
   shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
 
   // tangential forces = shear + tangential velocity damping
 
-  fs1 = -(kt * shear[0] + meff * gammat * vtr1);
-  fs2 = -(kt * shear[1] + meff * gammat * vtr2);
-  fs3 = -(kt * shear[2] + meff * gammat * vtr3);
+  fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
+  fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
+  fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
 
   // rescale frictional displacements and forces if needed
 
   fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  fn = xmu * fabs(ccel * r);
+  fn = xmu * fabs(ccel);
 
   if (fs > fn) {
     if (shrmag != 0.0) {
@@ -605,12 +698,12 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
       fs3 *= fn / fs;
       fs *= fn / fs;
     } else
-      fs1 = fs2 = fs3 = fs = 0.0;
+      fs1 = fs2 = fs3 = 0.0;
   }
 
-  // set force and return no energy
+  // set force (normalized by r) and return no energy
 
-  fforce = ccel;
+  fforce = ccel / sqrt(rsq);
 
   // set single_extra quantities
 
@@ -621,9 +714,9 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   svector[4] = vn1;
   svector[5] = vn2;
   svector[6] = vn3;
-  svector[7] = vt1;
-  svector[8] = vt2;
-  svector[9] = vt3;
+  svector[7] = vtr1;
+  svector[8] = vtr2;
+  svector[9] = vtr3;
 
   return 0.0;
 }
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index f87bd8fb086..01bc236ea7a 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -462,7 +462,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           torque[j][2] -= tor3;
         }
 
-        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // TODO: is this correct for non-spherical particles? I don't think so, how to correct it?
+        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // Correct even for non-spherical particles
       }
     }
   }
@@ -756,7 +756,6 @@ void PairGranHookeHistoryEllipsoid::reset_dt()
 
 /* ---------------------------------------------------------------------- */
 
-// TODO: implement the single() function for the super-ellipsoid
 double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
                                              double /*factor_coul*/, double /*factor_lj*/, double &fforce)
 {

From f3fa0c89814d583f5b171312eacc4ac8eec4de92 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 7 Jan 2026 12:12:05 +0100
Subject: [PATCH 087/174] Skip newton-rapson if the initial guess is within
 tolerance (temporal coherence)

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 29 ++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index f38d9142087..94f27cfd625 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -363,8 +363,31 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   double blockmax = std::fmax(std::fmax(blocki[0],blocki[1]), std::fmax(blockj[0], blockj[1]));
 
   norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
-  // TODO: consider testing for convergence before attempting Newton's method.
-  //       the initial guess is the old X0, so with temporal coherence, it might still pass tolerance if deformation is slow!
+  // testing for convergence before attempting Newton's method.
+  // the initial guess is the old X0, so with temporal coherence, it might still pass tolerance if deformation is slow!
+  if (norm < TOL_NR_RES) {
+    
+    //  must compute the normal vector nij before returning since the Newton loop normally handles this upon convergence.
+    double xilocal[3], tmp_v[3], gradi[3], hess_dummy[3][3];
+
+    // Transform global X0 to local frame of particle I
+    MathExtra::sub3(X0, xci, tmp_v);
+    MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
+
+    // Compute local gradient (we could ignore the Hessian here)
+    shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hess_dummy);
+
+    // Rotate gradient back to global frame to get normal
+    MathExtra::matvec(Ri, tmp_v, gradi);
+    MathExtra::normalize3(gradi, nij);
+
+    // Return status
+    if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) 
+      return 1; // Converged, but no contact (separated)
+    
+    return 0; // Converged and Contacting
+  }
+
 
   for (int iter = 0 ; iter < ITERMAX_NR ; iter++) {
     norm_old = norm;
@@ -496,6 +519,8 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   // 0 = converged and grains touching
   if (!converged)
     return 2; // TODO: consider not failing if not converged but shapefuncs positive (i.e., no contact)
+              // JB: might be risky to assume no contact if not converged, NR might have gone to a far away point
+              // but no guarantee there is no contact
   if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0)
     return 1;
   return 0;

From 07da0d8a67d79d2889c0d940b56f8b5da1d4b755 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 7 Jan 2026 15:02:15 +0100
Subject: [PATCH 088/174] Made gauss elimination default, fallback on LAPACK
 solver in case of failure

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 72 ++++++++++++++++------
 src/ASPHERE/math_extra_superellipsoids.h   | 30 ++-------
 2 files changed, 58 insertions(+), 44 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 94f27cfd625..fd1efc7386a 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -19,7 +19,6 @@
 #include "math_extra_superellipsoids.h"
 #include "math_extra.h"
 #include <cmath>
-
 // #include "math_special.h"
 // #include "math_const.h"
 
@@ -392,26 +391,59 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   for (int iter = 0 ; iter < ITERMAX_NR ; iter++) {
     norm_old = norm;
 
-    // Solve Newton step
-    int lapack_error, ipiv[16];
-    const int n = 4;
-    const char trans = 'N';
-    const int nrhs = 1;
-    double rhs[4] = {-residual[0], -residual[1], -residual[2], -residual[3]};
-    dgetrf_(&n, &n, jacobian, &n, ipiv, &lapack_error);
-    if (lapack_error < 0)
-      return lapack_error;
-    else if (lapack_error > 0) { // Singular matrix: Tikhonov regularization
-      // High blockiness grains can have zero curvature / singular Hessian
-      // along principal local axes (x=0, y=0, z=0)
-      double diag_weight = TIKHONOV_SCALE * (jacobian[0] + jacobian[5] + jacobian[10]);
-      jacobian[0]  += diag_weight;
-      jacobian[5]  += diag_weight;
-      jacobian[10] += diag_weight;
+    double rhs[4];
+    bool gauss_elim_solved = false;
+    double A_fast[16];
+    double b_fast[4];
+
+    for(int r=0; r<4; ++r) {
+        for(int c=0; c<4; ++c) {
+            A_fast[r*4 + c] = jacobian[c*4 + r];
+        }
+    }
+
+    b_fast[0] = -residual[0]; b_fast[1] = -residual[1]; 
+    b_fast[2] = -residual[2]; b_fast[3] = -residual[3];
+
+    // 2. Try Fast Solver
+    if (MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A_fast, b_fast)) {
+        rhs[0] = b_fast[0]; rhs[1] = b_fast[1]; 
+        rhs[2] = b_fast[2]; rhs[3] = b_fast[3];
+        gauss_elim_solved = true;
+    }
+
+    // Fallback to LAPACK
+    if (!gauss_elim_solved) {
+
+        rhs[0] = -residual[0]; rhs[1] = -residual[1]; 
+        rhs[2] = -residual[2]; rhs[3] = -residual[3];
+
+        int lapack_error = 0;
+        int ipiv[16];
+        const int n = 4;
+        const char trans = 'N'; 
+        const int nrhs = 1;
+        
+        dgetrf_(&n, &n, jacobian, &n, ipiv, &lapack_error);
+        
+        if (lapack_error < 0) {
+            return lapack_error;
+        } else if (lapack_error > 0) { 
+            // Singular: Apply Tikhonov "Patch" to the LU FACTORS
+            // This is the "Dirty Hack" that makes the aligned test pass.
+            // It modifies the pivot U_ii, not the original matrix diagonal.
+            double diag_weight = TIKHONOV_SCALE * (jacobian[0] + jacobian[5] + jacobian[10]);
+            jacobian[0]  += diag_weight;
+            jacobian[5]  += diag_weight;
+            jacobian[10] += diag_weight;
+          
+        }
+
+        // Solve using the (patched) factors
+        dgetrs_(&trans, &n, &nrhs, jacobian, &n, ipiv, rhs, &n, &lapack_error);
+        
+        if (lapack_error) return lapack_error;
     }
-    dgetrs_(&trans, &n, &nrhs, jacobian, &n, ipiv, rhs, &n, &lapack_error);
-    if (lapack_error)
-      return lapack_error;
 
     if (iter > 0)
       multiplicity = std::fmin(std::fmax(1.0, 1.0 / (1.0 - std::sqrt(MathExtra::lensq3(rhs)/MathExtra::lensq3(rhs_old)))), blockmax - 1.0);
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 52ce5bb44dc..35ac03043a0 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -121,14 +121,6 @@ inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
 
 inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4]) {
     
-    // Tikhonov regularization (avoiding the constraint on the last row)
-    // High blockiness grains can have zero curvature / singular Hessian
-    // along principal local axes (x=0, y=0, z=0)
-    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10]);
-    A[0]  += diag_weight;
-    A[5]  += diag_weight;
-    A[10] += diag_weight;
-    
     // 1. Pivot 0 
     double inv0 = 1.0 / A[0];
     double m1 = A[4] * inv0;
@@ -166,14 +158,6 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]
     // Helper lambda to access A[row, col]
     auto at = [&](int r, int c) -> double& { return A[r * 4 + c]; };
 
-    // Tikhonov regularization
-    // High blockiness grains can have zero curvature / singular Hessian
-    // along principal local axes (x=0, y=0, z=0)
-    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10]);
-    A[0]  += diag_weight;
-    A[5]  += diag_weight;
-    A[10] += diag_weight;
-
     // --- FORWARD ELIMINATION with PARTIAL PIVOTING ---
     
     for (int i = 0; i < 3; ++i) { // Loop over columns 0, 1, 2
@@ -229,14 +213,12 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]
 
 inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], double b[4]) {
     
-    // Tikhonov regularization
-    // High blockiness grains can have zero curvature / singular Hessian
-    // along principal local axes (x=0, y=0, z=0)
-    const double diag_weight = TIKHONOV_SCALE * (A[0] + A[5] + A[10] );
-    A[0]  += diag_weight;
-    A[5]  += diag_weight;
-    A[10] += diag_weight;
-    
+    // // Tikhonov regularization could be applied here
+    // double trace = A[0] + A[5] + A[10];
+    // A[0]  += TIKHONOV_SCALE * trace;
+    // A[5]  += TIKHONOV_SCALE * trace;
+    // A[10] += TIKHONOV_SCALE * trace;
+
      // --- COLUMN 0 ---
     // 1. Find Pivot in Col 0
     int p = 0; 

From 0acded90d89623117e1f8d1db84741a4ac95c87f Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 7 Jan 2026 17:25:20 +0100
Subject: [PATCH 089/174] Made choice between mean and gaussian curvature in
 hertz contact optional

---
 .../in.ellipsoid_box_compress                 |  2 +-
 src/ASPHERE/math_extra_superellipsoids.h      |  4 +--
 .../pair_gran_hertz_history_ellipsoid.cpp     | 32 ++++++++++++++++---
 .../pair_gran_hertz_history_ellipsoid.h       |  4 +++
 .../pair_gran_hooke_history_ellipsoid.cpp     |  2 --
 5 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
index 00f66737488..26267fcfe9f 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
@@ -22,7 +22,7 @@ set             group all block 4.0 4.0
 # Rotation: Randomize orientation so we get Face-Face, Edge-Edge, and Corner-Corner
 set             group all quat/random 12345
 
-pair_style      gran/hooke/history/ellipsoid 1000.0 0.0 1.0 0.0 0.5 0
+pair_style      gran/hooke/history/ellipsoid 1000.0 0.0 1.0 0.0 0.5 0 bounding_box
 pair_coeff      * *
 
 # Neighbor list, need to check
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 35ac03043a0..78fc7b5b8ec 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -44,7 +44,7 @@ namespace MathExtraSuperellipsoids {
                                       const double* center_distance_box1, const double* center_distance_box2,
                                       const double* a, const double* b);
 
-  inline bool check_collision_and_get_seed(const double* xc1, const double R1[3][3], const double* shape1,
+  inline bool check_intersection_axis_and_get_seed(const double* xc1, const double R1[3][3], const double* shape1,
                                          const double* xc2, const double R2[3][3], const double* shape2,
                                         double* cached_axis, double* contact_point);
 
@@ -482,7 +482,7 @@ inline bool MathExtraSuperellipsoids::check_intersection_axis(
 }
 
 
-inline bool MathExtraSuperellipsoids::check_collision_and_get_seed(
+inline bool MathExtraSuperellipsoids::check_intersection_axis_and_get_seed(
     const double* xc1, const double R1[3][3], const double* shape1,
     const double* xc2, const double R2[3][3], const double* shape2, 
     double* cached_axis, double* contact_point
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index b6354709cc4..7ee213ca03b 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -253,8 +253,18 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
         MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
 
-        curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-        curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        if (curvature_model == CURV_MEAN) {
+            curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+                            shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+            curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+                            shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        } else {
+            curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+                            shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+            curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+                            shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        }
+        
         polyhertz =  sqrt( (overlap1+overlap2) / (curvature_i + curvature_j)); // hertzian contact radius approximation
         
         // branch vectors 
@@ -454,11 +464,15 @@ void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
 
   limit_damping = 0;
   bounding_box = 0;
+  curvature_model = CURV_MEAN; // Default to Mean curvature
+
   for (int iarg = 6 ; iarg < narg ; iarg++) {
     if (strcmp(arg[iarg], "limit_damping") == 0)
       limit_damping = 1;
     else if (strcmp(arg[iarg], "bounding_box") == 0)
       bounding_box = 1;
+    else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
+      curvature_model = CURV_GAUSSIAN;
     else
       error->all(FLERR, "Illegal pair_style command");
   }
@@ -594,8 +608,18 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
   MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
 
-  curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-  curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+  if (curvature_model == CURV_MEAN) {
+    curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+                    shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+    curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+                    shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+  } else {
+    curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+                    shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+    curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+                    shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+  }
+
   polyhertz =  sqrt( (overlap1+overlap2) / (curvature_i + curvature_j)); // hertzian contact radius approximation
   
   double cr1[3], cr2[3];
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
index f644ac515b6..b594cffe1d7 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
@@ -30,6 +30,10 @@ class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
   void compute(int, int) override;
   void settings(int, char **) override;
   double single(int, int, int, int, double, double, double, double &) override;
+
+ protected:
+  int curvature_model; 
+  enum { CURV_MEAN, CURV_GAUSSIAN };
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 01bc236ea7a..6c4c13ff851 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -224,8 +224,6 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
             // Continued contact: use grain true shape and last contact point
-            // TODO: move contact point with rigid body motion of the pair ?
-            //       not sure if enough information to do that
             X0[0] = X0_prev[0];
             X0[1] = X0_prev[1];
             X0[2] = X0_prev[2];

From b717eea4de7a69ed1a8c2b6c6ae60037430c3649 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 13 Jan 2026 16:05:33 +0100
Subject: [PATCH 090/174] Added option for Geometric contact point formulation
 (radial distance function reformulation)

---
 doc/src/pair_gran_ellipsoid.rst               |   1 +
 .../superellipsoid_gran/in.ellipsoid_gran     |   2 +-
 src/ASPHERE/math_extra_superellipsoids.cpp    | 119 ++++++----
 src/ASPHERE/math_extra_superellipsoids.h      |  16 +-
 .../pair_gran_hertz_history_ellipsoid.cpp     |  12 +-
 .../pair_gran_hertz_history_ellipsoid.h       |   1 -
 .../pair_gran_hooke_history_ellipsoid.cpp     |   7 +-
 .../pair_gran_hooke_history_ellipsoid.h       |   2 +
 .../utils/test_math_extra_superellipsoids.cpp | 220 ++++++++++++++++--
 9 files changed, 306 insertions(+), 74 deletions(-)

diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
index 57979874ac1..155f2e6516c 100644
--- a/doc/src/pair_gran_ellipsoid.rst
+++ b/doc/src/pair_gran_ellipsoid.rst
@@ -119,6 +119,7 @@ Newton's method is used to solve this equation for the scalars
 
 .. note::
     TODO: Jacopo: a modified representation of the particle surface is defined
+    :math:`G(\mathbf{X}) = (F(\mathbf{X}))^{1/n_1}-1`
     to make the function more linear and accelerate convergence.
 
 A hierarchical approach is used to limit the cost of contact detection.
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 87713b106aa..91d64e0f5df 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -21,7 +21,7 @@ set             type 1 shape 2.0 1.0 1.0
 # TODO: Line below currently fails in contact detection with status 1
 # set             type 1 block 4.0 4.0
 
-pair_style      gran/hooke/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0
+pair_style      gran/hooke/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0 geometric bounding_box
 # pair_style      gran/hertz/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0
 pair_coeff      * *
 
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index fd1efc7386a..60d05052b36 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -120,43 +120,47 @@ void global2local_vector(const double *v, const double *quat, double *local_v){
 };
 
 /* ----------------------------------------------------------------------
-   Possible regularization for the shape functions (WIP)
-   Instead of F(x,y,z) - 1 = 0 we use (F(x,y,z))^(1/n1) -1 = G(x,y,z) = 0
-   The gradient is simply nabla G = (1/n1) * (F)^(1/n1 - 1) * nabla F
-   The hessian is H(G) = (1/n1) * (F)^(1/n1 - 1) * H(F) + (1/n1) * (1/n1 - 1) * (F)^(1/n1 - 2) * nabla F (nabla F)^T
+   Possible regularization for the shape functions 
+   Instead of F(x,y,z) = 0 we use (F(x,y,z)+1)^(1/n1) -1 = G(x,y,z) = 0
+   We also scale G by the average radius to have better "midway" points
 ------------------------------------------------------------------------- */
-void apply_regularization_shape_function(double n1, double *value, double *grad, double hess[3][3]){
+void apply_regularization_shape_function(double n1, const double avg_radius, double *value, double *grad, double hess[3][3]){
   // value is F - 1
-  double F = *value + 1.0; // should be fine as long as one does not start from the center (otherwise we could guard against it)
-  double inv_n1 = 1.0 / n1;
-  double F_pow_1_n1_m1 = pow(F, inv_n1 - 1.0);
-
-  // scale factor for grainet and first term in the hessian
-  double scale_grad_hess1 = inv_n1 * F_pow_1_n1_m1;
+  double base = *value + 1.0; // should be fine as long as one does not start from the center (otherwise we could guard against it)
+  const double inv_F = 1.0 / base;
+  const double inv_n1 = 1.0 / n1;
+  
+  // P = base^(1/n)
+  const double F_pow_inv_n1 = std::pow(base, inv_n1);
 
-  // B = (1/n) * (1/n - 1) * F^(1/n - 2) simplifies to scale_grad * (inv_n1 - 1.0) / F
-  double scale_hess_add = scale_grad_hess1 * (inv_n1 - 1.0) / F;
+  // Scale for Gradient: S1 = R * (1/n) * base^(1/n - 1)
+  const double scale_grad = avg_radius * inv_n1 * F_pow_inv_n1 * inv_F;
 
-  *value = (F * F_pow_1_n1_m1) - 1.0; // avoid computing pow twice
+  // Scale for Hessian addition: S2 = S1 * (1/n - 1) * base^-1
+  const double scale_hess_add = scale_grad * (inv_n1 - 1.0) * inv_F;
 
-  // hessian update
+  // H_new = scale_grad * H_old + scale_hess_add * (grad_old x grad_old^T)
   for (int i = 0; i < 3; i++) {
-    for (int j = 0; j < 3; j++) {
-      double grad_gratT = grad[i] * grad[j];
-      hess[i][j] = (hess[i][j] * scale_grad_hess1) + (scale_hess_add * grad_gratT);
-    }
+      for (int j = 0; j < 3; j++) {
+          double grad_outer_prod = grad[i] * grad[j];
+          hess[i][j] = (hess[i][j] * scale_grad) + (scale_hess_add * grad_outer_prod);
+      }
   }
 
+  // grad_new = scale_grad * grad_old
   for (int i = 0; i < 3; i++) {
-    grad[i] *= scale_grad_hess1;
+      grad[i] *= scale_grad;
   }
+
+  // G = R * (base^(1/n) - 1)
+  *value = avg_radius * (F_pow_inv_n1 - 1.0);
 };
 
 
 /* ----------------------------------------------------------------------
    shape function computations for superellipsoids
 ------------------------------------------------------------------------- */
-double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]) {
+double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]){
   double shapefunc;
   // TODO: Not sure how to make flag values more clear
   // Cannot forward declare the enum AtomVecEllipsoid::BlockType
@@ -181,6 +185,7 @@ double shape_and_derivatives_local(const double* xlocal, const double* shape, co
       break;
     }
   }
+
   return shapefunc;
 }
 
@@ -281,26 +286,23 @@ double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double*
   return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
 }
 
-double regularized_shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
-  double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
-  MathExtra::sub3(X0, xc, tmp_v);
-  MathExtra::transpose_matvec(R, tmp_v, xlocal);
-  shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
-  apply_regularization_shape_function(block[0], &shapefunc, tmp_v, hess);
-  MathExtra::matvec(R, tmp_v, grad);
-  MathExtra::times3_transpose(hess, R, tmp_m);
-  MathExtra::times3(R, tmp_m, hess);
-  return shapefunc;
-}
 
-double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]) {
-  double shapefunc, xlocal[3], tmp_v[3], tmp_m[3][3];
-  MathExtra::sub3(X0, xc, tmp_v); // here temp_v is X0 - xc
+double shape_and_derivatives_global(const double* xc, const double R[3][3], 
+    const double* shape, const double* block, const int flag, 
+    const double* X0, double* grad, double hess[3][3],
+    int formulation, double avg_radius) 
+{
+  double xlocal[3], tmp_v[3], tmp_m[3][3];
+  MathExtra::sub3(X0, xc, tmp_v); 
   MathExtra::transpose_matvec(R, tmp_v, xlocal);
-  shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess); // here temp_v is grad in local
+  double shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
+  if (formulation == FORMULATION_GEOMETRIC) {
+      apply_regularization_shape_function(block[0], avg_radius, &shapefunc, tmp_v, hess);
+  }
   MathExtra::matvec(R, tmp_v, grad);
   MathExtra::times3_transpose(hess, R, tmp_m);
   MathExtra::times3(R, tmp_m, hess);
+
   return shapefunc;
 }
 
@@ -335,10 +337,11 @@ void compute_jacobian(const double* gradi_global, const double hessi_global[3][3
 
 double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                                      const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                                     const double* X, double* shapefunc, double* residual, double* jacobian) {
+                                     const double* X, double* shapefunc, double* residual, double* jacobian, 
+                                     const int formulation, const double avg_radius_i, const double avg_radius_j) {
   double gradi[3], hessi[3][3], gradj[3], hessj[3][3];
-  shapefunc[0] = shape_and_derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi);
-  shapefunc[1] = shape_and_derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj);
+  shapefunc[0] = shape_and_derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi, formulation, avg_radius_i);
+  shapefunc[1] = shape_and_derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj, formulation, avg_radius_j);
   compute_jacobian(gradi, hessi, gradj, hessj, X[3], jacobian);
   return compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X[3], residual);
 }
@@ -346,7 +349,7 @@ double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], c
 
 int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                             const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                            double* X0, double* nij) {
+                            double* X0, double* nij, int formulation) {
   double norm, norm_old, shapefunc[2], residual[4], jacobian[16];
   double lsq = MathExtra::distsq3(xci, xcj);
   bool converged(false);
@@ -361,7 +364,17 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   double rhs_old[3];
   double blockmax = std::fmax(std::fmax(blocki[0],blocki[1]), std::fmax(blockj[0], blockj[1]));
 
-  norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
+  // avg radii for regularization if GEOMETRIC formulation
+  double avg_radius_i = 1;
+  double avg_radius_j = 1;
+  double max_step;
+  if (formulation == FORMULATION_GEOMETRIC) {
+    avg_radius_i = (shapei[0] + shapei[1] + shapei[2]) / 3.0;
+    avg_radius_j = (shapej[0] + shapej[1] + shapej[2]) / 3.0;
+    max_step = std::sqrt(lsq) / 3.0;
+  }
+
+  norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian, formulation, avg_radius_i, avg_radius_j);
   // testing for convergence before attempting Newton's method.
   // the initial guess is the old X0, so with temporal coherence, it might still pass tolerance if deformation is slow!
   if (norm < TOL_NR_RES) {
@@ -374,6 +387,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
 
     // Compute local gradient (we could ignore the Hessian here)
+    // Algebraic gradient is fine for direction even if we used Geometric for solving
     shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hess_dummy);
 
     // Rotate gradient back to global frame to get normal
@@ -459,6 +473,19 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       X_line[2] = X0[2] + a * rhs[2];
       X_line[3] = X0[3] + a * rhs[3];
 
+      if (formulation == FORMULATION_GEOMETRIC) {
+          // Limit the max step size to avoid jumping too far
+          // normalize residual vector if step was limited
+          double spatial_residual_norm = std::sqrt(residual[0]*residual[0] + residual[1]*residual[1] + residual[2]*residual[2]);
+          a = 1; // reset a to 1 for proper step size in geometric formulation
+          if (spatial_residual_norm > max_step) {
+              double scale = max_step / spatial_residual_norm;
+              residual[0] *= scale;
+              residual[1] *= scale;
+              residual[2] *= scale;
+          }
+      }
+
       // Line search iterates not selected for the next Newton iteration
       // do not need to compute the expensive Jacobian, only the residual.
       // We want to avoid calling `compute_residual_and_jacobian()` for each
@@ -492,13 +519,21 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       MathExtra::sub3(X_line, xci, tmp_v); 
       MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
       shapefunc[0] = shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hessi);
+      if (formulation == FORMULATION_GEOMETRIC) {
+          apply_regularization_shape_function(blocki[0], avg_radius_i, &shapefunc[0], tmp_v, hessi);
+      } 
       MathExtra::matvec(Ri, tmp_v, gradi);
 
       MathExtra::sub3(X_line, xcj, tmp_v);
       MathExtra::transpose_matvec(Rj, tmp_v, xjlocal);
       shapefunc[1] = shape_and_derivatives_local(xjlocal, shapej, blockj, flagj, tmp_v, hessj);
+      if (formulation == FORMULATION_GEOMETRIC) {
+          apply_regularization_shape_function(blockj[0], avg_radius_j, &shapefunc[1], tmp_v, hessj);
+      }
       MathExtra::matvec(Rj, tmp_v, gradj);
 
+
+
       norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual);
 
       if ((norm <= TOL_NR_RES) &&
@@ -533,7 +568,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       X0[1] += rhs[1];
       X0[2] += rhs[2];
       X0[3] += rhs[3];
-      norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian);
+      norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian, formulation, avg_radius_i, avg_radius_j);
     } else {
       X0[0] = X_line[0];
       X0[1] = X_line[1];
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 78fc7b5b8ec..f1ed6831730 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -25,6 +25,16 @@
 namespace MathExtraSuperellipsoids {
   inline constexpr double TIKHONOV_SCALE = 1e-14; // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
 
+  enum ContactFormulation {
+    FORMULATION_ALGEBRAIC = 0,
+    FORMULATION_GEOMETRIC = 1
+    };
+
+  enum CurvatureModel {
+    CURV_MEAN = 0,
+    CURV_GAUSSIAN = 1
+    };
+
   // needed for shape functions grad and matrix 
   void global2local_vector(const double v[3], const double *quat, double local_v[3]); // TODO: TBD if still useful once we implement Hertz. There might be a cheaper way with the rotation matrix that we need for contact detection anyway
 
@@ -63,12 +73,12 @@ namespace MathExtraSuperellipsoids {
   void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian);
   double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                                        const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                                       const double* X, double* shapefunc, double* residual, double* jacobian);
+                                       const double* X, double* shapefunc, double* residual, double* jacobian, const int formulation, const double avg_radius_i, const double avg_radius_j);
   int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                               const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                              double* X0, double* nij);
+                              double* X0, double* nij, int formulation = FORMULATION_ALGEBRAIC);
 
-  void apply_regularization_shape_function(double n1, double *value, double *grad, double hess[3][3]);
+  void apply_regularization_shape_function(double n1, const double avg_radius, double *value, double *grad, double hess[3][3]); 
   // functions to compute shape function and gradient only when called for surface point calculation given contact point
   double shape_and_gradient_local_superquad_surfacesearch(const double* xlocal, const double* shape, const double* block, double* grad);
   double shape_and_gradient_local_n1equaln2_surfacesearch(const double* xlocal, const double* shape, const double n, double* grad);
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 7ee213ca03b..f6bc7a17a74 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -253,7 +253,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
         MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
 
-        if (curvature_model == CURV_MEAN) {
+        if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
             curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
                             shapei, blocki, flagi, Ri, surf_point_i, x[i]);
             curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
@@ -444,7 +444,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
 
 void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
 {
-  if (narg != 6 && narg != 7 && narg != 8) error->all(FLERR, "Illegal pair_style command");
+  if (narg <6) error->all(FLERR, "Illegal pair_style command");
 
   kn = utils::numeric(FLERR, arg[0], false, lmp);
   if (strcmp(arg[1], "NULL") == 0)
@@ -464,15 +464,17 @@ void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
 
   limit_damping = 0;
   bounding_box = 0;
-  curvature_model = CURV_MEAN; // Default to Mean curvature
+  curvature_model = MathExtraSuperellipsoids::CURV_MEAN; // Default to Mean curvature
 
   for (int iarg = 6 ; iarg < narg ; iarg++) {
     if (strcmp(arg[iarg], "limit_damping") == 0)
       limit_damping = 1;
     else if (strcmp(arg[iarg], "bounding_box") == 0)
       bounding_box = 1;
+    else if (strcmp(arg[iarg], "geometric") == 0)
+      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
     else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
-      curvature_model = CURV_GAUSSIAN;
+      curvature_model = MathExtraSuperellipsoids::CURV_GAUSSIAN;
     else
       error->all(FLERR, "Illegal pair_style command");
   }
@@ -608,7 +610,7 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
   MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
 
-  if (curvature_model == CURV_MEAN) {
+  if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
     curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
                     shapei, blocki, flagi, Ri, surf_point_i, x[i]);
     curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
index b594cffe1d7..ab2ab26dffe 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
@@ -33,7 +33,6 @@ class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
 
  protected:
   int curvature_model; 
-  enum { CURV_MEAN, CURV_GAUSSIAN };
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 6c4c13ff851..b657313c617 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -71,6 +71,9 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
   fix_history = nullptr;
   fix_dummy = dynamic_cast<FixDummy *>(
       modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
+
+  contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
+
 }
 
 /* ---------------------------------------------------------------------- */
@@ -495,7 +498,7 @@ void PairGranHookeHistoryEllipsoid::allocate()
 
 void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
 {
-  if (narg != 6 && narg != 7 && narg != 8) error->all(FLERR, "Illegal pair_style command");
+  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
 
   kn = utils::numeric(FLERR, arg[0], false, lmp);
   if (strcmp(arg[1], "NULL") == 0)
@@ -520,6 +523,8 @@ void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
       limit_damping = 1;
     else if (strcmp(arg[iarg], "bounding_box") == 0)
       bounding_box = 1;
+    else if (strcmp(arg[iarg], "geometric") == 0)
+      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
     else
       error->all(FLERR, "Illegal pair_style command");
   }
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index d7aa150374b..7f21331cf8c 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -67,6 +67,8 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   double *mass_rigid;      // rigid mass for owned+ghost atoms
   int nmax;                // allocated size of mass_rigid
 
+  int contact_formulation;
+  
   void allocate();
 
  private:
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index b726127edb5..04e8e6ce9d6 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -44,12 +44,7 @@ TEST(ContactPointAndNormal, sphere)
   double blockj[2] = {2.0, 2.0};
   int flagj = 0;
 
-  // Contact detection
-  double X0[4] = {0.0, 0.0, 0.0, 0.0}, nij[3];
-  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
-                                                    xcj, Rj, shapej, blockj, flagj,
-                                                    X0, nij);
-  // Analytical solution
+   // Analytical solution
   double X0_analytical[4] = {rj * xci[0] / (ri+rj) + ri * xcj[0] / (ri+rj),
                              rj * xci[1] / (ri+rj) + ri * xcj[1] / (ri+rj),
                              rj * xci[2] / (ri+rj) + ri * xcj[2] / (ri+rj),
@@ -57,6 +52,14 @@ TEST(ContactPointAndNormal, sphere)
   double nij_analytical[3] = {xcj[0] - xci[0], xcj[1] - xci[1], xcj[2] - xci[2]};
   MathExtra::norm3(nij_analytical);
 
+  int method = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
+  
+     // Contact detection
+  double X0[4] = {0.0, 0.0, 0.0, 0.0}, nij[3];
+  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
+                                                    xcj, Rj, shapej, blockj, flagj,
+                                                    X0, nij, method);
+
   ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
   ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
   ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
@@ -88,18 +91,17 @@ TEST(ContactPointAndNormal, sphere)
   X0[0] = X0[1] = X0[2] = X0[3] = 0.0;
   MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
                                                     xcj, Rj, shapej, blockj, flagj,
-                                                    X0, nij);
+                                                    X0, nij, method);
 
-  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
-  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
-  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
-  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
+  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
+  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
+  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
+  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
 
   ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
   ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
   ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
 
- 
 }
 
 TEST(ContactPointAndNormal, supersphere_mono)
@@ -113,6 +115,12 @@ TEST(ContactPointAndNormal, supersphere_mono)
                     {0.0, 0.0, 1.0}};
 
   std::vector<double> blocks = {2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0};
+  int method = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
+
+  // Analytical solution
+    double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};
+    double nij_analytical[3] = {1.0, 0.0, 0.0};
+
   for (auto n : blocks) {
     double block[2] = {n, n};
     int flag =  (n < 2.01) ? 0 : 1;
@@ -120,18 +128,16 @@ TEST(ContactPointAndNormal, supersphere_mono)
     // Contact detection
     // Some starting point away from (0,0,0). Possibly bad initial guess so test is demanding
     double X0[4] = {r, -r, 2*r, 0.0}, nij[3];
+
     int status = MathExtraSuperellipsoids::determine_contact_point(xci, R, shape, block, flag,
-                                                                   xcj, R, shape, block, flag,
-                                                                   X0, nij);
-    // Analytical solution
-    double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};
-    double nij_analytical[3] = {1.0, 0.0, 0.0};
+                                                                  xcj, R, shape, block, flag,
+                                                                  X0, nij, method);
 
     std::cout<<n<<" "<<status<<" "<<X0[0]<<" "<<X0[1]<<" "<<X0[2]<<" "<<X0[3]<<std::endl;
-    ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
-    ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
-    ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
-    ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
+    ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
+    ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
+    ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
+    ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
 
     ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
     ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
@@ -139,6 +145,178 @@ TEST(ContactPointAndNormal, supersphere_mono)
   }
 }
 
+TEST(ContactPointAndNormal, sphere_geometric)
+{
+  // First grain
+  double ri = 2.5;
+  double rj = 1.25;
+  double overlap = -0.5;
+  double xci[3] = {-(ri - overlap/2.0), 0.0, 0.0};
+  double shapei[3] = {ri, ri, ri};
+  double Ri[3][3] = {{1.0, 0.0, 0.0},
+                     {0.0, 1.0, 0.0},
+                     {0.0, 0.0, 1.0}};
+  double blocki[2] = {2.0, 2.0};
+  int flagi = 0;
+
+  // Second grains
+  double xcj[3] = {rj - overlap/2.0, 0.0, 0.0};
+  
+  double shapej[3] = {rj, rj, rj};
+  double Rj[3][3] = {{1.0, 0.0, 0.0},
+                     {0.0, 1.0, 0.0},
+                     {0.0, 0.0, 1.0}};
+  double blockj[2] = {2.0, 2.0};
+  int flagj = 0;
+
+   // Analytical solution
+  double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};
+  double nij_analytical[3] = {xcj[0] - xci[0], xcj[1] - xci[1], xcj[2] - xci[2]};
+  MathExtra::norm3(nij_analytical);
+
+  int method = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+  
+  // Contact detection
+  double X0[4] = {.1, .1, .1, 1.0}, nij[3];
+  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
+                                                    xcj, Rj, shapej, blockj, flagj,
+                                                    X0, nij, method);
+
+  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
+  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
+  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
+  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
+
+  ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+  ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+  ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+
+  // Rotational invariance
+  double anglei = 0.456;
+  double axisi[3] = {1,2,3};
+  MathExtra::norm3(axisi);
+  double quati[4] = {std::cos(anglei),
+                     std::sin(anglei)*axisi[0],
+                     std::sin(anglei)*axisi[1],
+                     std::sin(anglei)*axisi[2]};
+  MathExtra::quat_to_mat(quati, Ri);
+
+  double anglej = 0.123;
+  double axisj[3] = {-1,2,1};
+  MathExtra::norm3(axisj);
+  double quatj[4] = {std::cos(anglej),
+                     std::sin(anglej)*axisj[0],
+                     std::sin(anglej)*axisj[1],
+                     std::sin(anglej)*axisj[2]};
+  MathExtra::quat_to_mat(quatj, Rj);
+
+  X0[0] = X0[1] = X0[2] = X0[3] = 0.0;
+  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
+                                                    xcj, Rj, shapej, blockj, flagj,
+                                                    X0, nij, method);
+
+  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
+  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
+  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
+  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
+
+  ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+  ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+  ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+
+}
+
+// TEST(ContactPointAndNormal, supersphere_mono_geometric)
+// {
+//   double r = 3.456;
+//   double overlap = -r / 5.0;
+//   double xci[3] = {-(r - overlap/2.0), 0.0, 0.0};
+//   double xcj[3] = {r - overlap/2.0, 0.0, 0.0};
+//   double shape[3] = {r, r, r};
+//   double R[3][3] = {{1.0, 0.0, 0.0},
+//                     {0.0, 1.0, 0.0},
+//                     {0.0, 0.0, 1.0}};
+
+//   std::vector<double> blocks = {2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0};
+//   int method = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+  
+//   // Analytical solution
+//   double nij_analytical[3] = {1.0, 0.0, 0.0};
+//   double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};    
+  
+//   for (auto n : blocks) {
+//     double block[2] = {n, n};
+//     int flag =  (n < 2.01) ? 0 : 1;
+
+//     // Contact detection
+//     // Some starting point away from (0,0,0). Possibly bad initial guess so test is demanding
+//     double X0[4] = {overlap, overlap, 2*overlap, 1.0}, nij[3];
+
+//     int status = MathExtraSuperellipsoids::determine_contact_point(xci, R, shape, block, flag,
+//                                                                   xcj, R, shape, block, flag,
+//                                                                   X0, nij, method);
+
+//     std::cout<<n<<" "<<status<<" "<<X0[0]<<" "<<X0[1]<<" "<<X0[2]<<" "<<X0[3]<<std::endl;
+//     ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
+//     ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
+//     ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
+//     ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
+
+//     ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+//     ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+//     ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+//   }
+// }
+
+TEST(ContactPointAndNormal, supersphere_poly_geometric)
+{
+  double r1 = 3.456;
+  double r2 = 3.0 * r1; // Polydisperse: radius 2 = 3 * radius 1
+  double overlap = r1 / 5.0; 
+  double xci[3] = {-(r1 - overlap/2.0), 0.0, 0.0};
+  double xcj[3] = {  r2 - overlap/2.0 , 0.0, 0.0}; 
+  
+  double shapei[3] = {r1, r1, r1};
+  double shapej[3] = {r2, r2, r2};
+  
+  // Identity Rotation
+  double R[3][3] = {{1.0, 0.0, 0.0},
+                    {0.0, 1.0, 0.0},
+                    {0.0, 0.0, 1.0}};
+
+  std::vector<double> blocks = {2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; // test would no converge for higher n if not starting along the line connecting the centers
+  int method = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+  
+  double nij_analytical[3] = {1.0, 0.0, 0.0};
+  double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};    
+  
+  for (auto n : blocks) {
+    double block[2] = {n, n};
+    int flag =  (n < 2.01) ? 0 : 1;
+
+    // Initial Guess: Offset from 0 to test convergence
+    double X0[4] = {overlap, overlap, overlap, 1.0}, nij[3]; 
+
+    int status = MathExtraSuperellipsoids::determine_contact_point(xci, R, shapei, block, flag,
+                                                                  xcj, R, shapej, block, flag,
+                                                                  X0, nij, method);
+
+    std::cout << "n=" << n << " Status=" << status << " Res: " 
+              << X0[0] << " " << X0[1] << " " << X0[2] << " mu=" << X0[3] << std::endl;
+
+    ASSERT_EQ(status, 0) << "Failed to converge/detect contact for n=" << n;
+    
+    ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Position X failed for n=" << n;
+    ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Position Y failed for n=" << n;
+    ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Position Z failed for n=" << n;
+    ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Lagrange Multiplier failed for n=" << n;
+
+    ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON) << "Normal X failed for n=" << n;
+    ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON) << "Normal Y failed for n=" << n;
+    ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON) << "Normal Z failed for n=" << n;
+  }
+}
+
 // TODO: supersphere_mono with grains overlapping
 // TODO: supersphere_poly with grains overlapping
 // TODO: more

From 0726e241a55aa840a24ee8e743d3365449775442 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 14 Jan 2026 11:50:57 +0100
Subject: [PATCH 091/174] Fixed bug for old contact point when moving through
 periodic boundaries

---
 .../pair_gran_hertz_history_ellipsoid.cpp     | 19 ++++++-------
 .../pair_gran_hooke_history_ellipsoid.cpp     | 27 ++++++++++---------
 2 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index f6bc7a17a74..3589b474046 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -172,12 +172,10 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
           flagi = bonus[ellipsoid[i]].type;
           flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
-            // Continued contact: use grain true shape and last contact point
-            // TODO: move contact point with rigid body motion of the pair ?
-            //       not sure if enough information to do that
-            X0[0] = X0_prev[0];
-            X0[1] = X0_prev[1];
-            X0[2] = X0_prev[2];
+            // Continued contact: use grain true shape and last contact point with respect to grain i
+            X0[0] = x[i][0] + X0_prev[0];
+            X0[1] = x[i][1] + X0_prev[1];
+            X0[2] = x[i][2] + X0_prev[2];
             X0[3] = X0_prev[3];
             int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
                                                                            x[j], Rj, shapej, blockj, flagj,
@@ -238,9 +236,12 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         history = &allhistory[size_history * jj];
         for (int k = 0; k < size_history; k++) history[k] = 0.0;
       } else {
-        X0_prev[0] = X0[0];
-        X0_prev[1] = X0[1];
-        X0_prev[2] = X0[2];
+        // Store contact point with respect to grain i for next time step
+        // This is crucial for periodic BCs when grains can move by large amount in one time step
+        // Keeping the previous contact point relative to global frame would lead to bad initial guess
+        X0_prev[0] = X0[0] - x[i][0];
+        X0_prev[1] = X0[1] - x[i][1];
+        X0_prev[2] = X0[2] - x[i][2];
         X0_prev[3] = X0[3];
 
         double nji[3] = { -nij[0], -nij[1], -nij[2] };
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index b657313c617..11081b108ae 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -226,10 +226,10 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           flagi = bonus[ellipsoid[i]].type;
           flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
-            // Continued contact: use grain true shape and last contact point
-            X0[0] = X0_prev[0];
-            X0[1] = X0_prev[1];
-            X0[2] = X0_prev[2];
+            // Continued contact: use grain true shape and last contact point with respect to grain i
+            X0[0] = x[i][0] + X0_prev[0];
+            X0[1] = x[i][1] + X0_prev[1];
+            X0[2] = x[i][2] + X0_prev[2];
             X0[3] = X0_prev[3];
             int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
                                                                            x[j], Rj, shapej, blockj, flagj,
@@ -239,7 +239,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
             else if (status == 1)
               touching = false;
             else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-              error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+              error->all(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, i, j);
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
@@ -276,7 +276,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
               else if (status == 1)
                 touching = false;
               else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-                error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+                error->all(FLERR, "Ellipsoid contact detection (new contact) failed with status {} ", status);
             }
           }
         }
@@ -290,9 +290,12 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         history = &allhistory[size_history * jj];
         for (int k = 0; k < size_history; k++) history[k] = 0.0;
       } else {
-        X0_prev[0] = X0[0];
-        X0_prev[1] = X0[1];
-        X0_prev[2] = X0[2];
+        // Store contact point with respect to grain i for next time step
+        // This is crucial for periodic BCs when grains can move by large amount in one time step
+        // Keeping the previous contact point relative to global frame would lead to bad initial guess
+        X0_prev[0] = X0[0] - x[i][0];
+        X0_prev[1] = X0[1] - x[i][1];
+        X0_prev[2] = X0[2] - x[i][2];
         X0_prev[3] = X0[3];
 
         double nji[3] = { -nij[0], -nij[1], -nij[2] };
@@ -823,9 +826,9 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   double* X0_prev = &allhistory[3 + size_history * neighprev];
   if (touch[neighprev] == 1) {
     // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0];
-    X0[1] = X0_prev[1];
-    X0[2] = X0_prev[2];
+    X0[0] = X0_prev[0] + x[i][0];
+    X0[1] = X0_prev[1] + x[i][1];
+    X0[2] = X0_prev[2] + x[i][2];
     X0[3] = X0_prev[3];
     int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
                                                                    x[j], Rj, shapej, blockj, flagj,

From 583339dfc90af7901c853dbbcf31141e18d6ed9d Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 14 Jan 2026 15:24:46 +0100
Subject: [PATCH 092/174] Updated documentation for the hertz formulation.
 Added example script for particles drop test

---
 doc/src/pair_gran_ellipsoid.rst               | 45 +++++++----
 .../ASPHERE/superellipsoid_gran/in.drop_test  | 81 +++++++++++++++++++
 .../in.ellipsoid_box_compress                 | 55 -------------
 3 files changed, 112 insertions(+), 69 deletions(-)
 create mode 100644 examples/ASPHERE/superellipsoid_gran/in.drop_test
 delete mode 100644 examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress

diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
index 155f2e6516c..d8159165fe8 100644
--- a/doc/src/pair_gran_ellipsoid.rst
+++ b/doc/src/pair_gran_ellipsoid.rst
@@ -22,7 +22,7 @@ Syntax
 * xmu = static yield criterion (unitless value between 0.0 and 1.0e4)
 * dampflag = 0 or 1 if tangential damping force is excluded or included
 
-* keyword = *limit_damping*, *bounding_box*
+* keyword = *limit_damping*, *bounding_box*, *geometric*, *curvature_gaussian*
 
   .. parsed-literal::
 
@@ -30,6 +30,10 @@ Syntax
          limit damping to prevent attractive interaction
       *bounding_box* value = none
          use oriented bounding box in contact detection
+      *geometric* value = none
+         use geometric radial distance formulation for the contact formulation
+      *curvature_gaussian* value = none
+         use Gaussian curvature formulation for the contact detection (Hertz only)
 
 .. note::
 
@@ -48,9 +52,8 @@ Examples
 .. code-block:: LAMMPS
 
    pair_style gran/hooke/history/ellipsoid 200000.0 NULL 50.0 NULL 0.5 1 bounding_box
-   pair_style gran/hooke/history/ellipsoid 200000.0 70000.0 50.0 30.0 0.5 0
    pair_style gran/hooke/history/ellipsoid 200000.0 70000.0 50.0 30.0 0.5 0 limit_damping
-
+   pair_style gran/hertz/history/ellipsoid 200000.0 70000.0 50.0 30.0 0.5 0 geometric curvature_gaussian
 
 Description
 """""""""""
@@ -74,8 +77,8 @@ more details.
 
 Contact detection for these aspherical particles uses the so-called ''midway''
 minimization approach from :ref:`(Houlsby) <Houlsby>`. Considering two
-particles with shape functions :math:`F_i` and :math:`F_j`,
-the contact point :math:`\mathbf{X}_0` is obtained as:
+particles with shape functions,  :math:`F_i` and :math:`F_j`,
+the contact point :math:`\mathbf{X}_0` in the global frame is obtained as:
 
 .. math::
 
@@ -87,7 +90,7 @@ where the shape function is given by
 :math:`F_i(\mathbf{X}) = f(\mathbf{R}_i^T (\mathbf{X} - \mathbf{X}_i))`
 and where :math:`\mathbf{X}_i` and :math:`\mathbf{R}_i` are the center of mass
 and rotation matrix of the particle, respectively.
-The constrained minimization problem is solved using Lagrang multipliers and
+The constrained minimization problem is solved using Lagrange multipliers and
 Newton's method with a line search as described by :ref:`(Podlozhnyuk) <Podlozhnyuk>`.
 
 .. note::
@@ -118,9 +121,14 @@ Newton's method is used to solve this equation for the scalars
 :math:`\mathbf{X}_i^{\mathrm{surf}}` and :math:`\mathbf{X}_j^{\mathrm{surf}}`.
 
 .. note::
-    TODO: Jacopo: a modified representation of the particle surface is defined
-    :math:`G(\mathbf{X}) = (F(\mathbf{X}))^{1/n_1}-1`
-    to make the function more linear and accelerate convergence.
+    A modified representation of the particle surface is defined as
+    :math:`G(\mathbf{X}) = (F(\mathbf{X})+1)^{1/n_1}-1` which is a radial distance function formulation.
+    This formulation is used to compute the surface points once the midway contact point is found.
+    This formulation is also used when the *geometric* keyword is specified in the pair_style command and the following optimization problem is solved instead for the contact point:
+    :math:`\mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}} r_i \ G_i(\mathbf{X}) + r_j G_j(\mathbf{X})  \text{, subject to } r_i G_i(\mathbf{X}) = r_j G_j(\mathbf{X})`, 
+    where :math:`r_i` and :math:`r_j` are the average radii of the two particles.
+    The geometric formulation thus yields a better approximation of the contact point
+    for particles with different sizes, and it can be slightly more robust for particles with high *block* exponents.    
 
 A hierarchical approach is used to limit the cost of contact detection.
 First, intersection of the bounding spheres of the two particles of bounding
@@ -157,8 +165,14 @@ The Hookean style use this formula:
 
 The Hertzian style uses this formula:
 
-.. note::
-    TODO: Jacopo, Hertz force calculation using the mean curvature
+.. math::
+
+   F_{hz} = \sqrt{\delta} \sqrt{\frac{R_i R_j}{R_i + R_j}} F_{hk} =
+     \sqrt{\delta} \sqrt{\frac{R_i R_j}{R_i + R_j}}
+     \Big[ (k_n \delta \mathbf{n}_{ij} -
+       m_{eff} \: \gamma_n \mathbf{ v}_n) -
+       (k_t \boldsymbol{\Delta} \mathbf{s}_t +
+       m_{eff} \: \gamma_t \mathbf{v}_t) \Big]
 
 In both equations the first parenthesized term is the normal force
 between the two particles and the second parenthesized term is the
@@ -182,6 +196,7 @@ The other quantities in the equations are as follows:
 * :math:`n_{ij} =` unit vector along the line connecting the centers of the 2 particles
 * :math:`V_n =` normal component of the relative velocity of the 2 particles
 * :math:`V_t =` tangential component of the relative velocity of the 2 particles
+* :math:`R_i, R_j =` approximated radii of the curvature of the two particles at the contact point
 
 The :math:`K_n`, :math:`K_t`, :math:`\gamma_n`, and :math:`\gamma_t`
 coefficients are specified as parameters to the pair_style command.  If
@@ -202,12 +217,14 @@ in the force equation so that the specified :math:`\gamma_n` is in units
 of (1/time), :math:`K_t` is in units of (force/distance), and
 :math:`\gamma_t` is in units of (1/time).
 
-.. note::
-    TODO: Jacopo, modify below.
-
 The Hertzian model is one where the normal push-back force for two
 overlapping particles is proportional to the area of overlap of the
 two particles, and is thus a non-linear function of overlap distance.
+At each point on the surface of superellipsoids there are two
+principal radii of curvature. For simplicity, the Hertzian model
+approximates the contact radius of each particle, :math:`R`, as either 
+the inverse of the mean curvature or as the gaussian curvature coefficient
+if the *curvature_gaussian* keyword is used.
 Thus Kn has units of force per area and is thus specified in units of
 (pressure).  The effects of absolute particle size (monodispersity)
 and relative size (polydispersity) are captured in the radii-dependent
diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
new file mode 100644
index 00000000000..c3c2240f24b
--- /dev/null
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -0,0 +1,81 @@
+# Test demo for LAPACK functions in ASPHERE package
+# Lattice wall drop test
+
+units           si
+atom_style      ellipsoid
+dimension       3
+boundary        p p p
+comm_modify     vel yes
+newton          off
+
+# 1. Setup Simulation Box
+# Expanded box to accommodate the floor and falling height
+region          box block 0 15 0 15 0 20
+create_box      2 box
+
+# 3. Create Lattice Wall (Type 1)
+# Use a dense lattice (spacing 1.0) to ensure no gaps in the Y-direction (size 1.0).
+# Note: Overlap in X (size 2.0) is ignored because this group is frozen.
+lattice         sc 1.0
+region          floor_reg block 0 15 0 15 0 0.5
+create_atoms    1 region floor_reg
+
+# 4. Create Falling Particles (Type 2)
+# Create 50 random particles in the air
+region          drop_zone block 2 13 2 13 5 15
+create_atoms    2 random 100 12345 drop_zone overlap 4.0 maxtry 10000
+
+# 2. Material Properties
+# Type 1: Wall particles
+set             type 1 mass 1.0
+set             type 1 shape 1.8 1.0 1.0
+
+# Type 2: Falling particles
+set             type 2 mass 1.0
+set             type 2 shape 2.0 1.0 1.0
+
+# Initialize Superquadric "Blockiness" to 2.0 (Ellipsoid) 
+# Necessary to prevent division-by-zero in the dump variables below
+set             group all block 2.0 2.0 
+
+
+# 5. Define Groups
+group           wall type 1
+group           mobile type 2
+
+# Prevent the frozen wall particles (type 1) from interacting with each other
+neigh_modify exclude type 1 1
+
+# 6. Interaction / Pair Style
+# Kept exactly as requested
+pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 1.0 0 bounding_box
+pair_coeff      * *
+
+# 7. Computes and Output
+compute         diameter all property/atom shapex shapey shapez
+compute         orient all property/atom quatw quati quatj quatk
+compute         block all property/atom block1 block2
+
+# Define atom variables from block
+variable        phi atom "2/c_block[2]"
+variable        theta atom "2/c_block[1]"
+
+dump            mydump all custom 10000 dump_drop.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+# Explicit mapping for Ovito
+dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
+
+# 8. Dynamics
+# Freeze the wall (Type 1) so it acts as a static floor
+fix             1 wall freeze
+
+# Apply gravity only to the mobile particles (Type 2)
+fix             2 mobile gravity 9.81 vector 0 0 -1
+
+# Integrate Equations of Motion (NVE/Asphere)
+# Applied to all, but fix freeze overrides integration for the wall
+fix             3 all nve/asphere
+
+# 9. Run
+thermo          1000
+timestep        0.00001
+run             2000000
\ No newline at end of file
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
deleted file mode 100644
index 26267fcfe9f..00000000000
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_box_compress
+++ /dev/null
@@ -1,55 +0,0 @@
-# Test demo for LAPACK functions in ASPHERE packagecmake -C ../cmake/presets/most.cmake     [OPTIONS] ../cmake
-
-units           lj
-atom_style      ellipsoid
-dimension       3
-boundary        p p p       
-comm_modify     vel yes
-newton          off         
-
-# Create a sparse box first to avoid initial overlaps
-region          box block 0 15 0 15 0 15
-create_box      1 box
-
-# Create particles on a lattice (10x10x10 = 1000 particles)
-lattice         sc 1.2      # Spacing > 1.0 to ensure no initial overlap
-create_atoms    1 box
-
-set             group all mass 1.0
-set             group all shape 0.5 0.4 0.4 
-set             group all block 4.0 4.0
-
-# Rotation: Randomize orientation so we get Face-Face, Edge-Edge, and Corner-Corner
-set             group all quat/random 12345
-
-pair_style      gran/hooke/history/ellipsoid 1000.0 0.0 1.0 0.0 0.5 0 bounding_box
-pair_coeff      * *
-
-# Neighbor list, need to check
-neighbor        0.2 bin
-neigh_modify    delay 0 every 1 check yes
-
-# 5. Computes for Visualization
-compute shape all property/atom shapex shapey shapez
-compute orient all property/atom quatw quati quatj quatk
-compute block all property/atom block1 block2
-variable phi atom "2/c_block[2]"
-variable theta atom "2/c_block[1]"
-dump 1 all custom 100 compress.lammpstrj id x y z vx vy vz fx fy fz c_shape[*] c_orient[*] v_phi v_theta
-# Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
-dump_modify 1 colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
-
-
-fix             1 all nve/asphere
-
-timestep        0.001
-thermo_style    custom step atoms ke vol cpu
-thermo          100
-
-# Settling particles
-run             1000
-
-# run compression
-# Compress the periodic box by 1% every unit of time, forces the particles to jam together
-fix             2 all deform 1 x erate -0.01 y erate -0.01 z erate -0.01 remap v
-run             5000
\ No newline at end of file

From 89fcf42b4baa9fc78fa05a7f5461c7006cf99e22 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 16 Jan 2026 15:20:20 +0100
Subject: [PATCH 093/174] Improved previous contact point carry
 over.implemented transfer_history

---
 .../ASPHERE/superellipsoid_gran/in.drop_test  | 40 +++++++++----
 src/ASPHERE/math_extra_superellipsoids.cpp    | 60 +++++++++++++++----
 src/ASPHERE/math_extra_superellipsoids.h      |  5 +-
 .../pair_gran_hertz_history_ellipsoid.cpp     | 32 +++++-----
 src/GRANULAR/pair_gran_hooke_history.cpp      |  2 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     | 49 +++++++++------
 .../pair_gran_hooke_history_ellipsoid.h       |  1 +
 7 files changed, 130 insertions(+), 59 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index c3c2240f24b..332af7a5650 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -4,10 +4,13 @@
 units           si
 atom_style      ellipsoid
 dimension       3
-boundary        p p p
+boundary        p p p 
 comm_modify     vel yes
 newton          off
 
+neighbor 0.5 bin
+neigh_modify   delay 0 every 1 check yes
+
 # 1. Setup Simulation Box
 # Expanded box to accommodate the floor and falling height
 region          box block 0 15 0 15 0 20
@@ -17,18 +20,26 @@ create_box      2 box
 # Use a dense lattice (spacing 1.0) to ensure no gaps in the Y-direction (size 1.0).
 # Note: Overlap in X (size 2.0) is ignored because this group is frozen.
 lattice         sc 1.0
-region          floor_reg block 0 15 0 15 0 0.5
+region          floor_reg block 0 15 0 15 0 1
+region          side1_reg block 0 1 0 15 0 20
+region          side2_reg block 14 15 0 15 0 20
+region          side3_reg block 0 15 0 1 0 20
+region          side4_reg block 0 15 14 15 0 20
+
+# make a union of regions for the walls
+region          wall_reg union 5 floor_reg side1_reg side2_reg side3_reg side4_reg
+
 create_atoms    1 region floor_reg
 
 # 4. Create Falling Particles (Type 2)
 # Create 50 random particles in the air
 region          drop_zone block 2 13 2 13 5 15
-create_atoms    2 random 100 12345 drop_zone overlap 4.0 maxtry 10000
+create_atoms    2 random 100 12345 drop_zone overlap 2.0 maxtry 1000 
 
 # 2. Material Properties
 # Type 1: Wall particles
 set             type 1 mass 1.0
-set             type 1 shape 1.8 1.0 1.0
+set             type 1 shape 1.0 1.0 1.0
 
 # Type 2: Falling particles
 set             type 2 mass 1.0
@@ -36,19 +47,18 @@ set             type 2 shape 2.0 1.0 1.0
 
 # Initialize Superquadric "Blockiness" to 2.0 (Ellipsoid) 
 # Necessary to prevent division-by-zero in the dump variables below
-set             group all block 2.0 2.0 
+set             type 1 block 8.0 8.0 
+set             type 2 block 8.0 8.0 
 
 
 # 5. Define Groups
 group           wall type 1
 group           mobile type 2
 
-# Prevent the frozen wall particles (type 1) from interacting with each other
-neigh_modify exclude type 1 1
 
 # 6. Interaction / Pair Style
 # Kept exactly as requested
-pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 1.0 0 bounding_box
+pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 1.0 0 bounding_box geometric
 pair_coeff      * *
 
 # 7. Computes and Output
@@ -60,22 +70,26 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 10000 dump_drop.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump            mydump all custom 1000 dump_drop.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Explicit mapping for Ovito
 dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
 # 8. Dynamics
 # Freeze the wall (Type 1) so it acts as a static floor
-fix             1 wall freeze
+# fix             1 wall freeze
+
+# Prevent the frozen wall particles (type 1) from interacting with each other
+neigh_modify exclude group wall wall
+
 
 # Apply gravity only to the mobile particles (Type 2)
 fix             2 mobile gravity 9.81 vector 0 0 -1
 
 # Integrate Equations of Motion (NVE/Asphere)
 # Applied to all, but fix freeze overrides integration for the wall
-fix             3 all nve/asphere
+fix             3 mobile nve/asphere
 
 # 9. Run
 thermo          1000
-timestep        0.00001
-run             2000000
\ No newline at end of file
+timestep        0.0001
+run             30000
\ No newline at end of file
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 60d05052b36..c9ee9f28eee 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -23,7 +23,6 @@
 // #include "math_const.h"
 
 // #include <algorithm>
-// #include <cstdio>
 // #include <cstring>
 
 extern "C" { // General Matrices
@@ -34,7 +33,7 @@ extern "C" { // General Matrices
 namespace MathExtraSuperellipsoids {
 
 static constexpr int ITERMAX_NR = 100;
-static constexpr double TOL_NR_RES = 1e-10 * 1e-10;
+static constexpr double TOL_NR_RES = 1e-5 * 1e-5;
 static constexpr double TOL_NR_POS = 1e-6 * 1e-6;
 
 static constexpr int ITERMAX_LS = 10;
@@ -290,14 +289,14 @@ double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double*
 double shape_and_derivatives_global(const double* xc, const double R[3][3], 
     const double* shape, const double* block, const int flag, 
     const double* X0, double* grad, double hess[3][3],
-    int formulation, double avg_radius) 
+    const int formulation, const double avg_radius) 
 {
   double xlocal[3], tmp_v[3], tmp_m[3][3];
   MathExtra::sub3(X0, xc, tmp_v); 
   MathExtra::transpose_matvec(R, tmp_v, xlocal);
   double shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
   if (formulation == FORMULATION_GEOMETRIC) {
-      apply_regularization_shape_function(block[0], avg_radius, &shapefunc, tmp_v, hess);
+     apply_regularization_shape_function(block[0], avg_radius, &shapefunc, tmp_v, hess);
   }
   MathExtra::matvec(R, tmp_v, grad);
   MathExtra::times3_transpose(hess, R, tmp_m);
@@ -480,9 +479,9 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
           a = 1; // reset a to 1 for proper step size in geometric formulation
           if (spatial_residual_norm > max_step) {
               double scale = max_step / spatial_residual_norm;
-              residual[0] *= scale;
-              residual[1] *= scale;
-              residual[2] *= scale;
+              rhs[0] *= scale;
+              rhs[1] *= scale;
+              rhs[2] *= scale;
           }
       }
 
@@ -569,6 +568,23 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       X0[2] += rhs[2];
       X0[3] += rhs[3];
       norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian, formulation, avg_radius_i, avg_radius_j);
+      if (norm < TOL_NR_RES) {
+        converged = true;
+        // must re-compute the normal 'nij' for this final point
+        double xilocal[3], tmp_v[3], gradi[3], hess_dummy[3][3];
+        MathExtra::sub3(X0, xci, tmp_v);
+        MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
+        
+        // We only need the gradient for the normal
+        shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hess_dummy);
+        if (formulation == FORMULATION_GEOMETRIC) {
+            // If you use regularization, apply it here too for consistency
+            apply_regularization_shape_function(blocki[0], avg_radius_i, &shapefunc[0], tmp_v, hess_dummy);
+        }
+        MathExtra::matvec(Ri, tmp_v, gradi);
+        MathExtra::normalize3(gradi, nij);
+      }
+
     } else {
       X0[0] = X_line[0];
       X0[1] = X_line[1];
@@ -580,16 +596,36 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       break;
   }
 
+  // If we ran out of iterations, check if the residual is acceptable.
+  // We ignore the "step size" check here because sliding on flat faces (N=6,8)
+  // often keeps moving while maintaining a perfect residual.
+  if (!converged && norm < TOL_NR_RES) {
+       converged = true;
+       
+       // Re-compute the normal 'nij' for this final point
+       // because the loop broke without updating it for the final X0.
+       double xilocal[3], tmp_v[3], gradi[3], hess_dummy[3][3];
+       MathExtra::sub3(X0, xci, tmp_v);
+       MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
+       
+       shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hess_dummy);
+       if (formulation == FORMULATION_GEOMETRIC) {
+           apply_regularization_shape_function(blocki[0], avg_radius_i, &shapefunc[0], tmp_v, hess_dummy);
+       }
+       MathExtra::matvec(Ri, tmp_v, gradi);
+       MathExtra::normalize3(gradi, nij);
+  }
+
   // LAPACK dgetrs() error values are negative, return values:
   // 2 = failed convergence
   // 1 = converged but grains not touching
   // 0 = converged and grains touching
-  if (!converged)
-    return 2; // TODO: consider not failing if not converged but shapefuncs positive (i.e., no contact)
-              // JB: might be risky to assume no contact if not converged, NR might have gone to a far away point
+  if (!converged){
+    if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) return 1;
+    return 2;} // not failing if not converged but shapefuncs positive (i.e., no contact)
+              // might be risky to assume no contact if not converged, NR might have gone to a far away point
               // but no guarantee there is no contact
-  if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0)
-    return 1;
+  if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) return 1;
   return 0;
 }
 
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index f1ed6831730..2b9c664a668 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -20,6 +20,7 @@
 
 #include <cmath>
 #include "math_extra.h"
+#include <iostream>
 
 
 namespace MathExtraSuperellipsoids {
@@ -67,7 +68,7 @@ namespace MathExtraSuperellipsoids {
   double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]);
   double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]);
-  double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3]);
+  double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3], const int formulation, const double avg_radius);
 
   double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual);
   void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian);
@@ -76,7 +77,7 @@ namespace MathExtraSuperellipsoids {
                                        const double* X, double* shapefunc, double* residual, double* jacobian, const int formulation, const double avg_radius_i, const double avg_radius_j);
   int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                               const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                              double* X0, double* nij, int formulation = FORMULATION_ALGEBRAIC);
+                              double* X0, double* nij, const int formulation);
 
   void apply_regularization_shape_function(double n1, const double avg_radius, double *value, double *grad, double hess[3][3]); 
   // functions to compute shape function and gradient only when called for surface point calculation given contact point
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 3589b474046..52eab6076df 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -147,6 +147,8 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
       radsum = radi + radj;
 
       X0_prev = &allhistory[3 + size_history * jj];
+      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+
 
       // TODO: Below could be a `touch()` function
       bool touching;
@@ -173,19 +175,19 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
           flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
             // Continued contact: use grain true shape and last contact point with respect to grain i
-            X0[0] = x[i][0] + X0_prev[0];
-            X0[1] = x[i][1] + X0_prev[1];
-            X0[2] = x[i][2] + X0_prev[2];
+            X0[0] = x[ref_index][0] + X0_prev[0];
+            X0[1] = x[ref_index][1] + X0_prev[1];
+            X0[2] = x[ref_index][2] + X0_prev[2];
             X0[3] = X0_prev[3];
             int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
                                                                            x[j], Rj, shapej, blockj, flagj,
-                                                                           X0, nij);
+                                                                           X0, nij, contact_formulation);
             if (status == 0)
               touching = true;
             else if (status == 1)
               touching = false;
             else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-              error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+              error->all(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, atom->tag[i], atom->tag[j]);
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
@@ -216,7 +218,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
               // Avoid incorrect values of n1/n2 - 2 in second derivatives.
               int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
                                                                              x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-                                                                             X0, nij);
+                                                                             X0, nij, contact_formulation);
               if (status == 0)
                 touching = true;
               else if (status == 1)
@@ -239,9 +241,9 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         // Store contact point with respect to grain i for next time step
         // This is crucial for periodic BCs when grains can move by large amount in one time step
         // Keeping the previous contact point relative to global frame would lead to bad initial guess
-        X0_prev[0] = X0[0] - x[i][0];
-        X0_prev[1] = X0[1] - x[i][1];
-        X0_prev[2] = X0[2] - x[i][2];
+        X0_prev[0] = X0[0] - x[ref_index][0];
+        X0_prev[1] = X0[1] - x[ref_index][1];
+        X0_prev[2] = X0[2] - x[ref_index][2];
         X0_prev[3] = X0[3];
 
         double nji[3] = { -nij[0], -nij[1], -nij[2] };
@@ -480,6 +482,7 @@ void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
       error->all(FLERR, "Illegal pair_style command");
   }
 
+  size_history = 8; // reset to default size
   if (bounding_box == 0) size_history--;
 
   if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
@@ -557,14 +560,15 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   flagj = bonus[ellipsoid[j]].type;
   double* X0_prev = &allhistory[3 + size_history * neighprev];
   if (touch[neighprev] == 1) {
+    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
     // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0];
-    X0[1] = X0_prev[1];
-    X0[2] = X0_prev[2];
+    X0[0] = X0_prev[0] + x[ref_index][0];
+    X0[1] = X0_prev[1] + x[ref_index][1];
+    X0[2] = X0_prev[2] + x[ref_index][2];
     X0[3] = X0_prev[3];
     int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
                                                                    x[j], Rj, shapej, blockj, flagj,
-                                                                   X0, nij);
+                                                                   X0, nij, contact_formulation);
     if (status == 1) {
         fforce = 0.0;
         for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
@@ -592,7 +596,7 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
       // Avoid incorrect values of n1/n2 - 2 in second derivatives.
       int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
                                                                      x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-                                                                     X0, nij);
+                                                                     X0, nij, contact_formulation);
       if (status == 1) {
         fforce = 0.0;
         for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp
index 0679be9173c..c2662476d0a 100644
--- a/src/GRANULAR/pair_gran_hooke_history.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history.cpp
@@ -811,4 +811,4 @@ double PairGranHookeHistory::memory_usage()
 {
   double bytes = (double) nmax * sizeof(double);
   return bytes;
-}
+}
\ No newline at end of file
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 11081b108ae..a14e2afb2be 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -29,6 +29,7 @@
 #include "update.h"
 #include "math_extra.h" // probably needed for some computations
 #include "math_extra_superellipsoids.h"
+#include <iostream>
 
 #include <cmath>
 #include <cstring>
@@ -36,7 +37,7 @@
 using namespace LAMMPS_NS;
 
 
-static constexpr int NUMSTEP_INITIAL_GUESS = 8;
+static constexpr int NUMSTEP_INITIAL_GUESS = 5;
 
 /* ---------------------------------------------------------------------- */
 
@@ -63,7 +64,7 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
 
   // keep default behavior of history[i][j] = -history[j][i]
 
-  nondefault_history_transfer = 0;
+  nondefault_history_transfer = 1;
 
   // create dummy fix as placeholder for FixNeighHistory
   // this is so final order of Modify:fix will conform to input script
@@ -201,6 +202,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
       radsum = radi + radj;
 
       X0_prev = &allhistory[3 + size_history * jj];
+      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
 
       // TODO: Below could be a `touch()` function
       bool touching;
@@ -227,19 +229,21 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
             // Continued contact: use grain true shape and last contact point with respect to grain i
-            X0[0] = x[i][0] + X0_prev[0];
-            X0[1] = x[i][1] + X0_prev[1];
-            X0[2] = x[i][2] + X0_prev[2];
+            X0[0] = x[ref_index][0] + X0_prev[0];
+            X0[1] = x[ref_index][1] + X0_prev[1];
+            X0[2] = x[ref_index][2] + X0_prev[2];
             X0[3] = X0_prev[3];
+            // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
+            //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
             int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
                                                                            x[j], Rj, shapej, blockj, flagj,
-                                                                           X0, nij);
+                                                                           X0, nij, contact_formulation);
             if (status == 0)
               touching = true;
             else if (status == 1)
               touching = false;
             else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-              error->all(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, i, j);
+              error->all(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, atom->tag[i], atom->tag[j]);
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
@@ -270,13 +274,13 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
               // Avoid incorrect values of n1/n2 - 2 in second derivatives.
               int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
                                                                              x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-                                                                             X0, nij);
+                                                                             X0, nij, contact_formulation);
               if (status == 0)
                 touching = true;
               else if (status == 1)
                 touching = false;
               else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-                error->all(FLERR, "Ellipsoid contact detection (new contact) failed with status {} ", status);
+                error->all(FLERR, "Ellipsoid contact detection (new contact) failed with status {} betwen particle {} and particle {} at iteration morph {}", status, atom->tag[i], atom->tag[j], iter_ig);
             }
           }
         }
@@ -293,9 +297,9 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // Store contact point with respect to grain i for next time step
         // This is crucial for periodic BCs when grains can move by large amount in one time step
         // Keeping the previous contact point relative to global frame would lead to bad initial guess
-        X0_prev[0] = X0[0] - x[i][0];
-        X0_prev[1] = X0[1] - x[i][1];
-        X0_prev[2] = X0[2] - x[i][2];
+        X0_prev[0] = X0[0] - x[ref_index][0];
+        X0_prev[1] = X0[1] - x[ref_index][1];
+        X0_prev[2] = X0[2] - x[ref_index][2];
         X0_prev[3] = X0[3];
 
         double nji[3] = { -nij[0], -nij[1], -nij[2] };
@@ -532,6 +536,7 @@ void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
       error->all(FLERR, "Illegal pair_style command");
   }
 
+  size_history = 8; // reset to default for safety
   if (bounding_box == 0) size_history--;
 
   if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
@@ -825,14 +830,15 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   flagj = bonus[ellipsoid[j]].type;
   double* X0_prev = &allhistory[3 + size_history * neighprev];
   if (touch[neighprev] == 1) {
+    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
     // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0] + x[i][0];
-    X0[1] = X0_prev[1] + x[i][1];
-    X0[2] = X0_prev[2] + x[i][2];
+    X0[0] = X0_prev[0] + x[ref_index][0];
+    X0[1] = X0_prev[1] + x[ref_index][1];
+    X0[2] = X0_prev[2] + x[ref_index][2];
     X0[3] = X0_prev[3];
     int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
                                                                    x[j], Rj, shapej, blockj, flagj,
-                                                                   X0, nij);
+                                                                   X0, nij, contact_formulation);
     if (status == 1) {
         fforce = 0.0;
         for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
@@ -860,7 +866,7 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
       // Avoid incorrect values of n1/n2 - 2 in second derivatives.
       int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
                                                                      x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-                                                                     X0, nij);
+                                                                     X0, nij, contact_formulation);
       if (status == 1) {
         fforce = 0.0;
         for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
@@ -1033,3 +1039,12 @@ double PairGranHookeHistoryEllipsoid::memory_usage()
   double bytes = (double) nmax * sizeof(double);
   return bytes;
 }
+
+void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *target, int /*itype*/, int /*jtype*/)
+{
+  // Simple direct copy of all history variables (shear, contact point, axis)
+  for (int i = 0; i < size_history; i++) {
+    // if (i < 3) target[i] = -source[i]; //shear
+    target[i] = source[i];
+  }
+}
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 7f21331cf8c..707b2d26f2b 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -42,6 +42,7 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   int pack_forward_comm(int, int *, double *, int, int *) override;
   void unpack_forward_comm(int, int, double *) override;
   double memory_usage() override;
+  void transfer_history(double *, double *, int, int) override;
 
  protected:
   double kn, kt, gamman, gammat, xmu;

From 06bc8a97f38637a9714efca2a398b4fadfa75b71 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 20 Jan 2026 17:20:32 +0100
Subject: [PATCH 094/174] Rescale residuals for both formualtions. Switched to
 warnings for failed tolerance in NR

---
 .../ASPHERE/superellipsoid_gran/in.drop_test  | 15 +++--
 src/ASPHERE/math_extra_superellipsoids.cpp    | 59 +++++++++++++++----
 src/ASPHERE/math_extra_superellipsoids.h      |  2 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     | 12 ++--
 .../utils/test_math_extra_superellipsoids.cpp | 47 +--------------
 5 files changed, 65 insertions(+), 70 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 332af7a5650..818847c2a1c 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -20,7 +20,7 @@ create_box      2 box
 # Use a dense lattice (spacing 1.0) to ensure no gaps in the Y-direction (size 1.0).
 # Note: Overlap in X (size 2.0) is ignored because this group is frozen.
 lattice         sc 1.0
-region          floor_reg block 0 15 0 15 0 1
+region          floor_reg block 0 15 0 15 0 0.5
 region          side1_reg block 0 1 0 15 0 20
 region          side2_reg block 14 15 0 15 0 20
 region          side3_reg block 0 15 0 1 0 20
@@ -47,9 +47,8 @@ set             type 2 shape 2.0 1.0 1.0
 
 # Initialize Superquadric "Blockiness" to 2.0 (Ellipsoid) 
 # Necessary to prevent division-by-zero in the dump variables below
-set             type 1 block 8.0 8.0 
-set             type 2 block 8.0 8.0 
-
+set             type 1 block 4.0 4.0 
+set             type 2 block 4.0 4.0 
 
 # 5. Define Groups
 group           wall type 1
@@ -58,7 +57,7 @@ group           mobile type 2
 
 # 6. Interaction / Pair Style
 # Kept exactly as requested
-pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 1.0 0 bounding_box geometric
+pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 0.5 0 bounding_box
 pair_coeff      * *
 
 # 7. Computes and Output
@@ -70,7 +69,7 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 1000 dump_drop.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump            mydump all custom 100 dump_drop.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Explicit mapping for Ovito
 dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
@@ -91,5 +90,5 @@ fix             3 mobile nve/asphere
 
 # 9. Run
 thermo          1000
-timestep        0.0001
-run             30000
\ No newline at end of file
+timestep        0.001
+run             300000
\ No newline at end of file
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index c9ee9f28eee..9708581e9da 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -305,18 +305,49 @@ double shape_and_derivatives_global(const double* xc, const double R[3][3],
   return shapefunc;
 }
 
-double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual) {
-  // Equation (23)
+// double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual) {
+//   // Equation (23)
+//   MathExtra::scaleadd3(mu2, gradj_global, gradi_global, residual);
+//   residual[3] = shapefunci - shapefuncj;
+//   // Normalize residual Equation (23)
+//   // shape functions and gradients dimensions are not homogeneous
+//   // Gradient equality F1' + mu2 * F2' evaluated relative to magnitude of gradient ||F1'|| = ||mu2 * F2'||
+//   // Shape function equality F1 - F2 evaluated relative to magnitude of shape function + 1
+//   //    the shift f = polynomial - 1 is not necessary and cancels out in F1 - F2
+//   // Last component homogeneous to shape function
+//   return MathExtra::lensq3(residual) / MathExtra::lensq3(gradi_global) +
+//          residual[3] * residual[3] / ((shapefunci + 1) * (shapefunci + 1));
+// }
+
+double compute_residual(const double shapefunci, const double* gradi_global, 
+                        const double shapefuncj, const double* gradj_global, 
+                        const double mu2, double* residual, 
+                        const int formulation, const double radius_scale) {
+
+  // Equation (23): Spatial residual (Gradient match)
   MathExtra::scaleadd3(mu2, gradj_global, gradi_global, residual);
   residual[3] = shapefunci - shapefuncj;
-  // Normalize residual Equation (23)
-  // shape functions and gradients dimensions are not homogeneous
-  // Gradient equality F1' + mu2 * F2' evaluated relative to magnitude of gradient ||F1'|| = ||mu2 * F2'||
-  // Shape function equality F1 - F2 evaluated relative to magnitude of shape function + 1
-  //    the shift f = polynomial - 1 is not necessary and cancels out in F1 - F2
-  // Last component homogeneous to shape function
-  return MathExtra::lensq3(residual) / MathExtra::lensq3(gradi_global) +
-         residual[3] * residual[3] / ((shapefunci + 1) * (shapefunci + 1));
+
+  // --- Spatial Normalization ---
+  // Algebraic: Gradients are ~1/R. Dividing by lensq3 normalizes this.
+  // Geometric: Gradients are unit vectors. lensq3 is 1.0. This works for both.
+  double spatial_norm = MathExtra::lensq3(residual) / MathExtra::lensq3(gradi_global);
+
+  // --- Scalar Normalization ---
+  double scalar_denom;
+
+  if (formulation == FORMULATION_GEOMETRIC) {
+      // GEOMETRIC: F is a distance (Length).
+      scalar_denom = radius_scale; 
+  } else {
+      // ALGEBRAIC: F is dimensionless (approx 0 at surface).
+      scalar_denom = shapefunci + 1.0;
+  }
+  
+  // Prevent division by zero in weird edge cases (e.g. very negative shape function)
+  if (fabs(scalar_denom) < 1e-12) scalar_denom = 1.0;
+
+  return spatial_norm + (residual[3] * residual[3]) / (scalar_denom * scalar_denom);
 }
 
 void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian) {
@@ -342,7 +373,7 @@ double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], c
   shapefunc[0] = shape_and_derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi, formulation, avg_radius_i);
   shapefunc[1] = shape_and_derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj, formulation, avg_radius_j);
   compute_jacobian(gradi, hessi, gradj, hessj, X[3], jacobian);
-  return compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X[3], residual);
+  return compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X[3], residual, formulation, (avg_radius_i + avg_radius_j)/2.0);
 }
 
 
@@ -533,7 +564,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
 
 
-      norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual);
+      norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual, formulation, (avg_radius_i + avg_radius_j)/2.0);
 
       if ((norm <= TOL_NR_RES) &&
           (MathExtra::lensq3(rhs) * a * a <= TOL_NR_POS * lsq)) {
@@ -622,6 +653,10 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   // 0 = converged and grains touching
   if (!converged){
     if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) return 1;
+    std::cout << "Current residual norm: " << norm << std::endl;
+    std::cout << "Shape functions: " << shapefunc[0] << ", " << shapefunc[1] << std::endl;
+    std::cout << "Positions X0: " << X0[0] << ", " << X0[1] << ", " << X0[2] << ", mu2: " << X0[3] << std::endl;
+    std::cout << "Normal nij: " << nij[0] << ", " << nij[1] << ", " << nij[2] << std::endl;
     return 2;} // not failing if not converged but shapefuncs positive (i.e., no contact)
               // might be risky to assume no contact if not converged, NR might have gone to a far away point
               // but no guarantee there is no contact
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 2b9c664a668..1e458ea4875 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -70,7 +70,7 @@ namespace MathExtraSuperellipsoids {
   double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]);
   double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3], const int formulation, const double avg_radius);
 
-  double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual);
+  double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual, const int formulation, const double radius_scale);
   void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian);
   double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                                        const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index a14e2afb2be..0cb35ffee92 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -243,7 +243,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
             else if (status == 1)
               touching = false;
             else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-              error->all(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, atom->tag[i], atom->tag[j]);
+              error->one(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, atom->tag[i], atom->tag[j]);
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
@@ -279,8 +279,10 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
                 touching = true;
               else if (status == 1)
                 touching = false;
-              else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-                error->all(FLERR, "Ellipsoid contact detection (new contact) failed with status {} betwen particle {} and particle {} at iteration morph {}", status, atom->tag[i], atom->tag[j], iter_ig);
+              else if (iter_ig == NUMSTEP_INITIAL_GUESS){
+                // keep trying until last iteration to avoid erroring out too early
+                error->warning(FLERR, "Ellipsoid contact detection (new contact) failed with status {} betwen particle {} and particle {}", status, atom->tag[i], atom->tag[j]);
+              }
             }
           }
         }
@@ -845,7 +847,7 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
         return 0.0;
     }
     if (status != 0)
-        error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+        error->one(FLERR, "Ellipsoid contact detection failed with status {} ", status);
   } else {
     double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
     double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
@@ -873,7 +875,7 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
         return 0.0;
       }
       if (status != 0)
-        error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+        error->one(FLERR, "Ellipsoid contact detection failed with status {} ", status);
     }
   }
   double overlap1, overlap2, omegai[3], omegaj[3];
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index 04e8e6ce9d6..51faa10c43d 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -226,52 +226,10 @@ TEST(ContactPointAndNormal, sphere_geometric)
 
 }
 
-// TEST(ContactPointAndNormal, supersphere_mono_geometric)
-// {
-//   double r = 3.456;
-//   double overlap = -r / 5.0;
-//   double xci[3] = {-(r - overlap/2.0), 0.0, 0.0};
-//   double xcj[3] = {r - overlap/2.0, 0.0, 0.0};
-//   double shape[3] = {r, r, r};
-//   double R[3][3] = {{1.0, 0.0, 0.0},
-//                     {0.0, 1.0, 0.0},
-//                     {0.0, 0.0, 1.0}};
-
-//   std::vector<double> blocks = {2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0};
-//   int method = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-  
-//   // Analytical solution
-//   double nij_analytical[3] = {1.0, 0.0, 0.0};
-//   double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};    
-  
-//   for (auto n : blocks) {
-//     double block[2] = {n, n};
-//     int flag =  (n < 2.01) ? 0 : 1;
-
-//     // Contact detection
-//     // Some starting point away from (0,0,0). Possibly bad initial guess so test is demanding
-//     double X0[4] = {overlap, overlap, 2*overlap, 1.0}, nij[3];
-
-//     int status = MathExtraSuperellipsoids::determine_contact_point(xci, R, shape, block, flag,
-//                                                                   xcj, R, shape, block, flag,
-//                                                                   X0, nij, method);
-
-//     std::cout<<n<<" "<<status<<" "<<X0[0]<<" "<<X0[1]<<" "<<X0[2]<<" "<<X0[3]<<std::endl;
-//     ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
-//     ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
-//     ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
-//     ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
-
-//     ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
-//     ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
-//     ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
-//   }
-// }
-
 TEST(ContactPointAndNormal, supersphere_poly_geometric)
 {
   double r1 = 3.456;
-  double r2 = 3.0 * r1; // Polydisperse: radius 2 = 3 * radius 1
+  double r2 = 3.0 * r1; // Polydisperse: radius_2 = 3 * radius_1
   double overlap = r1 / 5.0; 
   double xci[3] = {-(r1 - overlap/2.0), 0.0, 0.0};
   double xcj[3] = {  r2 - overlap/2.0 , 0.0, 0.0}; 
@@ -295,7 +253,7 @@ TEST(ContactPointAndNormal, supersphere_poly_geometric)
     int flag =  (n < 2.01) ? 0 : 1;
 
     // Initial Guess: Offset from 0 to test convergence
-    double X0[4] = {overlap, overlap, overlap, 1.0}, nij[3]; 
+    double X0[4] = {overlap/80, overlap/80, overlap/80, 1.0}, nij[3]; 
 
     int status = MathExtraSuperellipsoids::determine_contact_point(xci, R, shapei, block, flag,
                                                                   xcj, R, shapej, block, flag,
@@ -303,6 +261,7 @@ TEST(ContactPointAndNormal, supersphere_poly_geometric)
 
     std::cout << "n=" << n << " Status=" << status << " Res: " 
               << X0[0] << " " << X0[1] << " " << X0[2] << " mu=" << X0[3] << std::endl;
+    
 
     ASSERT_EQ(status, 0) << "Failed to converge/detect contact for n=" << n;
     

From 80b0db324adc15ad658ca8acb8eaadc0f0dfbe82 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 20 Jan 2026 17:49:21 +0100
Subject: [PATCH 095/174] Formatted with clang

---
 .../ASPHERE/superellipsoid_gran/in.drop_test  |    6 +-
 src/ASPHERE/math_extra_superellipsoids.cpp    |    2 -
 src/ASPHERE/math_extra_superellipsoids.h      | 1651 +++++++++--------
 .../pair_gran_hertz_history_ellipsoid.cpp     |   12 +-
 .../pair_gran_hertz_history_ellipsoid.h       |    2 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     |  178 +-
 .../pair_gran_hooke_history_ellipsoid.h       |   11 +-
 .../utils/test_math_extra_superellipsoids.cpp |  426 ++---
 8 files changed, 1180 insertions(+), 1108 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 818847c2a1c..5dff3952976 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -47,8 +47,8 @@ set             type 2 shape 2.0 1.0 1.0
 
 # Initialize Superquadric "Blockiness" to 2.0 (Ellipsoid) 
 # Necessary to prevent division-by-zero in the dump variables below
-set             type 1 block 4.0 4.0 
-set             type 2 block 4.0 4.0 
+set             type 1 block 5.0 5.0 
+set             type 2 block 5.0 5.0 
 
 # 5. Define Groups
 group           wall type 1
@@ -57,7 +57,7 @@ group           mobile type 2
 
 # 6. Interaction / Pair Style
 # Kept exactly as requested
-pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 0.5 0 bounding_box
+pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 0.5 0 bounding_box geometric
 pair_coeff      * *
 
 # 7. Computes and Output
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 9708581e9da..2510b57077a 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -562,8 +562,6 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       }
       MathExtra::matvec(Rj, tmp_v, gradj);
 
-
-
       norm = compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X_line[3], residual, formulation, (avg_radius_i + avg_radius_j)/2.0);
 
       if ((norm <= TOL_NR_RES) &&
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 1e458ea4875..d6f3fe77616 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -18,331 +18,397 @@
 #ifndef LMP_MATH_EXTRA_SUPERELLIPOIDS_H
 #define LMP_MATH_EXTRA_SUPERELLIPOIDS_H
 
-#include <cmath>
 #include "math_extra.h"
+#include <cmath>
 #include <iostream>
 
-
 namespace MathExtraSuperellipsoids {
-  inline constexpr double TIKHONOV_SCALE = 1e-14; // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
-
-  enum ContactFormulation {
-    FORMULATION_ALGEBRAIC = 0,
-    FORMULATION_GEOMETRIC = 1
-    };
-
-  enum CurvatureModel {
-    CURV_MEAN = 0,
-    CURV_GAUSSIAN = 1
-    };
-
-  // needed for shape functions grad and matrix 
-  void global2local_vector(const double v[3], const double *quat, double local_v[3]); // TODO: TBD if still useful once we implement Hertz. There might be a cheaper way with the rotation matrix that we need for contact detection anyway
-
-  inline double det4_M44_zero(const double m[4][4]);
-
-  // 4 by 4 sytems solvers, they all overwrite b with the solution
-  inline bool solve_4x4_manual(double A[16], double b[4]);  
-  inline bool solve_4x4_robust(double A[16], double b[4]);
-  inline bool solve_4x4_robust_unrolled(double A[16], double b[4]); 
-
-  // ADD CONTACT DETECTION HERE
-  inline bool check_oriented_bounding_boxes(const double* xc1, const double R1[3][3], const double* shape1,
-                                        const double* xc2, const double R2[3][3], const double* shape2, 
-                                        double* cached_axis);
-
-  inline bool check_intersection_axis(const int axis_id, const double C[3][3], const double AbsC[3][3], 
-                                      const double* center_distance_box1, const double* center_distance_box2,
-                                      const double* a, const double* b);
-
-  inline bool check_intersection_axis_and_get_seed(const double* xc1, const double R1[3][3], const double* shape1,
-                                         const double* xc2, const double R2[3][3], const double* shape2,
-                                        double* cached_axis, double* contact_point);
-
-  inline int determine_contact_point_wall(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
-                                        const double* x_wall, const double* n_wall, double* X0, double* nij, double* overlap);
-
-  
-  // shape function computations, using flag to optimize for special cases (ellipsoid, superquadric with n1=n2)
-  double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]);
-  double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]);
-  double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]);
-  double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]);
-  double shape_and_derivatives_global(const double* xc, const double R[3][3], const double* shape, const double* block, const int flag, const double* X0, double* grad, double hess[3][3], const int formulation, const double avg_radius);
-
-  double compute_residual(const double shapefunci, const double* gradi_global, const double shapefuncj, const double* gradj_global, const double mu2, double* residual, const int formulation, const double radius_scale);
-  void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian);
-  double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
-                                       const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                                       const double* X, double* shapefunc, double* residual, double* jacobian, const int formulation, const double avg_radius_i, const double avg_radius_j);
-  int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
-                              const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                              double* X0, double* nij, const int formulation);
-
-  void apply_regularization_shape_function(double n1, const double avg_radius, double *value, double *grad, double hess[3][3]); 
-  // functions to compute shape function and gradient only when called for surface point calculation given contact point
-  double shape_and_gradient_local_superquad_surfacesearch(const double* xlocal, const double* shape, const double* block, double* grad);
-  double shape_and_gradient_local_n1equaln2_surfacesearch(const double* xlocal, const double* shape, const double n, double* grad);
-  
-  double compute_overlap_distance(const double* shape, const double* block, const double Rot[3][3], const int flag, const double* global_point, const double* global_normal, const double* center);
-  
-  double mean_curvature_superellipsoid(const double *shape, const double *block, const int flag, const double R[3][3], const double *surf_global_point, const double *xc);
-  double gaussian_curvature_superellipsoid(const double *shape, const double *block, const int flag, const double R[3][3], const double *surf_global_point, const double *xc);
-
-};
-
+inline constexpr double TIKHONOV_SCALE =
+    1e-14;    // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
+
+enum ContactFormulation { FORMULATION_ALGEBRAIC = 0, FORMULATION_GEOMETRIC = 1 };
+
+enum CurvatureModel { CURV_MEAN = 0, CURV_GAUSSIAN = 1 };
+
+// needed for shape functions grad and matrix
+void global2local_vector(
+    const double v[3], const double *quat,
+    double local_v
+        [3]);    // TODO: TBD if still useful once we implement Hertz. There might be a cheaper way with the rotation matrix that we need for contact detection anyway
+
+inline double det4_M44_zero(const double m[4][4]);
+
+// 4 by 4 sytems solvers, they all overwrite b with the solution
+inline bool solve_4x4_manual(double A[16], double b[4]);
+inline bool solve_4x4_robust(double A[16], double b[4]);
+inline bool solve_4x4_robust_unrolled(double A[16], double b[4]);
+
+// ADD CONTACT DETECTION HERE
+inline bool check_oriented_bounding_boxes(const double *xc1, const double R1[3][3],
+                                          const double *shape1, const double *xc2,
+                                          const double R2[3][3], const double *shape2,
+                                          double *cached_axis);
+
+inline bool check_intersection_axis(const int axis_id, const double C[3][3],
+                                    const double AbsC[3][3], const double *center_distance_box1,
+                                    const double *center_distance_box2, const double *a,
+                                    const double *b);
+
+inline bool check_intersection_axis_and_get_seed(const double *xc1, const double R1[3][3],
+                                                 const double *shape1, const double *xc2,
+                                                 const double R2[3][3], const double *shape2,
+                                                 double *cached_axis, double *contact_point);
+
+inline int determine_contact_point_wall(const double *xci, const double Ri[3][3],
+                                        const double *shapei, const double *blocki, const int flagi,
+                                        const double *x_wall, const double *n_wall, double *X0,
+                                        double *nij, double *overlap);
+
+// shape function computations, using flag to optimize for special cases (ellipsoid, superquadric with n1=n2)
+double shape_and_derivatives_local(const double *xlocal, const double *shape, const double *block,
+                                   const int flag, double *grad, double hess[3][3]);
+double shape_and_derivatives_local_superquad(const double *xlocal, const double *shape,
+                                             const double *block, double *grad, double hess[3][3]);
+double shape_and_derivatives_local_n1equaln2(const double *xlocal, const double *shape,
+                                             const double n, double *grad, double hess[3][3]);
+double shape_and_derivatives_local_ellipsoid(const double *xlocal, const double *shape,
+                                             double *grad, double hess[3][3]);
+double shape_and_derivatives_global(const double *xc, const double R[3][3], const double *shape,
+                                    const double *block, const int flag, const double *X0,
+                                    double *grad, double hess[3][3], const int formulation,
+                                    const double avg_radius);
+
+double compute_residual(const double shapefunci, const double *gradi_global,
+                        const double shapefuncj, const double *gradj_global, const double mu2,
+                        double *residual, const int formulation, const double radius_scale);
+void compute_jacobian(const double *gradi_global, const double hessi_global[3][3],
+                      const double *gradj_global, const double hessj_global[3][3], const double mu2,
+                      double *jacobian);
+double compute_residual_and_jacobian(const double *xci, const double Ri[3][3], const double *shapei,
+                                     const double *blocki, const int flagi, const double *xcj,
+                                     const double Rj[3][3], const double *shapej,
+                                     const double *blockj, const int flagj, const double *X,
+                                     double *shapefunc, double *residual, double *jacobian,
+                                     const int formulation, const double avg_radius_i,
+                                     const double avg_radius_j);
+int determine_contact_point(const double *xci, const double Ri[3][3], const double *shapei,
+                            const double *blocki, const int flagi, const double *xcj,
+                            const double Rj[3][3], const double *shapej, const double *blockj,
+                            const int flagj, double *X0, double *nij, const int formulation);
+
+void apply_regularization_shape_function(double n1, const double avg_radius, double *value,
+                                         double *grad, double hess[3][3]);
+// functions to compute shape function and gradient only when called for surface point calculation given contact point
+double shape_and_gradient_local_superquad_surfacesearch(const double *xlocal, const double *shape,
+                                                        const double *block, double *grad);
+double shape_and_gradient_local_n1equaln2_surfacesearch(const double *xlocal, const double *shape,
+                                                        const double n, double *grad);
+
+double compute_overlap_distance(const double *shape, const double *block, const double Rot[3][3],
+                                const int flag, const double *global_point,
+                                const double *global_normal, const double *center);
+
+double mean_curvature_superellipsoid(const double *shape, const double *block, const int flag,
+                                     const double R[3][3], const double *surf_global_point,
+                                     const double *xc);
+double gaussian_curvature_superellipsoid(const double *shape, const double *block, const int flag,
+                                         const double R[3][3], const double *surf_global_point,
+                                         const double *xc);
+
+};    // namespace MathExtraSuperellipsoids
 
 /* ----------------------------------------------------------------------
    determinant of a 4x4 matrix M with M[3][3] assumed to be zero
 ------------------------------------------------------------------------- */
 inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
 {
-    // Define the 3x3 submatrices (M_41, M_42, M_43)
-
-    // Submatrix M_41 
-    double m41[3][3] = {
-        {m[0][1], m[0][2], m[0][3]},
-        {m[1][1], m[1][2], m[1][3]},
-        {m[2][1], m[2][2], m[2][3]}
-    };
-
-    // Submatrix M_42 
-    double m42[3][3] = {
-        {m[0][0], m[0][2], m[0][3]},
-        {m[1][0], m[1][2], m[1][3]},
-        {m[2][0], m[2][2], m[2][3]}
-    };
-
-    // Submatrix M_43
-    double m43[3][3] = {
-        {m[0][0], m[0][1], m[0][3]},
-        {m[1][0], m[1][1], m[1][3]},
-        {m[2][0], m[2][1], m[2][3]}
-    };
-    
-    // Calculate the determinant using the simplified Laplace expansion (M_44=0)
-    // det(M) = -M[3][0]*det(M_41) + M[3][1]*det(M_42) - M[3][2]*det(M_43)
-    
-    double ans = -m[3][0] * MathExtra::det3(m41) 
-                 + m[3][1] * MathExtra::det3(m42) 
-                 - m[3][2] * MathExtra::det3(m43);
-                 
-    return ans;
-}
+  // Define the 3x3 submatrices (M_41, M_42, M_43)
 
-inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4]) {
-    
-    // 1. Pivot 0 
-    double inv0 = 1.0 / A[0];
-    double m1 = A[4] * inv0;
-    double m2 = A[8] * inv0;
-    double m3 = A[12] * inv0;
+  // Submatrix M_41
+  double m41[3][3] = {
+      {m[0][1], m[0][2], m[0][3]}, {m[1][1], m[1][2], m[1][3]}, {m[2][1], m[2][2], m[2][3]}};
 
-    A[5] -= m1 * A[1]; A[6] -= m1 * A[2]; A[7] -= m1 * A[3]; b[1] -= m1 * b[0];
-    A[9] -= m2 * A[1]; A[10] -= m2 * A[2]; A[11] -= m2 * A[3]; b[2] -= m2 * b[0];
-    A[13] -= m3 * A[1]; A[14] -= m3 * A[2]; A[15] -= m3 * A[3]; b[3] -= m3 * b[0];
+  // Submatrix M_42
+  double m42[3][3] = {
+      {m[0][0], m[0][2], m[0][3]}, {m[1][0], m[1][2], m[1][3]}, {m[2][0], m[2][2], m[2][3]}};
 
-    // 2. Pivot 1 
-    double inv1 = 1.0 / A[5];
-    double m4 = A[9] * inv1;
-    double m5 = A[13] * inv1;
+  // Submatrix M_43
+  double m43[3][3] = {
+      {m[0][0], m[0][1], m[0][3]}, {m[1][0], m[1][1], m[1][3]}, {m[2][0], m[2][1], m[2][3]}};
 
-    A[10] -= m4 * A[6]; A[11] -= m4 * A[7]; b[2] -= m4 * b[1];
-    A[14] -= m5 * A[6]; A[15] -= m5 * A[7]; b[3] -= m5 * b[1];
+  // Calculate the determinant using the simplified Laplace expansion (M_44=0)
+  // det(M) = -M[3][0]*det(M_41) + M[3][1]*det(M_42) - M[3][2]*det(M_43)
 
-    // 3. Pivot 2
-    double inv2 = 1.0 / A[10];
-    double m6 = A[14] * inv2;
+  double ans = -m[3][0] * MathExtra::det3(m41) + m[3][1] * MathExtra::det3(m42) -
+      m[3][2] * MathExtra::det3(m43);
 
-    A[15] -= m6 * A[11]; b[3] -= m6 * b[2];
-
-    // 4. Backward Substitution
-    b[3] = b[3] / A[15];
-    b[2] = (b[2] - A[11] * b[3]) * inv2;
-    b[1] = (b[1] - A[7] * b[3] - A[6] * b[2]) * inv1;
-    b[0] = (b[0] - A[3] * b[3] - A[2] * b[2] - A[1] * b[1]) * inv0;
-
-    return true;
+  return ans;
 }
 
-inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4]) {
-    // Helper lambda to access A[row, col]
-    auto at = [&](int r, int c) -> double& { return A[r * 4 + c]; };
-
-    // --- FORWARD ELIMINATION with PARTIAL PIVOTING ---
-    
-    for (int i = 0; i < 3; ++i) { // Loop over columns 0, 1, 2
-        // 1. Find the Pivot (Max absolute value in this column)
-        int pivot_row = i;
-        double max_val = std::abs(at(i, i));
-
-        for (int k = i + 1; k < 4; ++k) {
-            double val = std::abs(at(k, i));
-            if (val > max_val) {
-                max_val = val;
-                pivot_row = k;
-            }
-        }
-
-        // 2. Singularity Check (The "Flat Particle" Guard)
-        if (max_val < 1e-14) return false;
-
-        // 3. Swap Rows if needed (Swap A rows AND b elements)
-        if (pivot_row != i) {
-            std::swap(b[i], b[pivot_row]);
-            for (int k = i; k < 4; ++k) { // Only need to swap from column 'i' onwards
-                std::swap(at(i, k), at(pivot_row, k));
-            }
-        }
-
-        // 4. Eliminate
-        double inv_pivot = 1.0 / at(i, i);
-        for (int k = i + 1; k < 4; ++k) {
-            double factor = at(k, i) * inv_pivot;
-            // A[k, i] becomes 0, no need to compute it.
-            // Update the rest of the row:
-            for (int j = i + 1; j < 4; ++j) {
-                at(k, j) -= factor * at(i, j);
-            }
-            // Update RHS
-            b[k] -= factor * b[i];
-        }
-    }
-
-    // Final Pivot Check for the last element
-    if (std::abs(at(3, 3)) < 1e-14) return false;
-
-    // --- BACKWARD SUBSTITUTION ---
-    b[3] /= at(3, 3);
-    b[2] = (b[2] - at(2, 3) * b[3]) / at(2, 2);
-    b[1] = (b[1] - at(1, 2) * b[2] - at(1, 3) * b[3]) / at(1, 1);
-    b[0] = (b[0] - at(0, 1) * b[1] - at(0, 2) * b[2] - at(0, 3) * b[3]) / at(0, 0);
-
-    return true;
+inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4])
+{
 
+  // 1. Pivot 0
+  double inv0 = 1.0 / A[0];
+  double m1 = A[4] * inv0;
+  double m2 = A[8] * inv0;
+  double m3 = A[12] * inv0;
+
+  A[5] -= m1 * A[1];
+  A[6] -= m1 * A[2];
+  A[7] -= m1 * A[3];
+  b[1] -= m1 * b[0];
+  A[9] -= m2 * A[1];
+  A[10] -= m2 * A[2];
+  A[11] -= m2 * A[3];
+  b[2] -= m2 * b[0];
+  A[13] -= m3 * A[1];
+  A[14] -= m3 * A[2];
+  A[15] -= m3 * A[3];
+  b[3] -= m3 * b[0];
+
+  // 2. Pivot 1
+  double inv1 = 1.0 / A[5];
+  double m4 = A[9] * inv1;
+  double m5 = A[13] * inv1;
+
+  A[10] -= m4 * A[6];
+  A[11] -= m4 * A[7];
+  b[2] -= m4 * b[1];
+  A[14] -= m5 * A[6];
+  A[15] -= m5 * A[7];
+  b[3] -= m5 * b[1];
+
+  // 3. Pivot 2
+  double inv2 = 1.0 / A[10];
+  double m6 = A[14] * inv2;
+
+  A[15] -= m6 * A[11];
+  b[3] -= m6 * b[2];
+
+  // 4. Backward Substitution
+  b[3] = b[3] / A[15];
+  b[2] = (b[2] - A[11] * b[3]) * inv2;
+  b[1] = (b[1] - A[7] * b[3] - A[6] * b[2]) * inv1;
+  b[0] = (b[0] - A[3] * b[3] - A[2] * b[2] - A[1] * b[1]) * inv0;
+
+  return true;
 }
 
-inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], double b[4]) {
-    
-    // // Tikhonov regularization could be applied here
-    // double trace = A[0] + A[5] + A[10];
-    // A[0]  += TIKHONOV_SCALE * trace;
-    // A[5]  += TIKHONOV_SCALE * trace;
-    // A[10] += TIKHONOV_SCALE * trace;
-
-     // --- COLUMN 0 ---
-    // 1. Find Pivot in Col 0
-    int p = 0; 
-    double max_val = std::abs(A[0]);
-    double val;
-
-    val = std::abs(A[4]); 
-    if (val > max_val) { max_val = val; p = 1; }
-    val = std::abs(A[8]); 
-    if (val > max_val) { max_val = val; p = 2; }
-    val = std::abs(A[12]);
-    if (val > max_val) { max_val = val; p = 3; }
-
-    if (max_val < 1e-14) return false;
-    // 2. Swap Row 0 with Row p
-    if (p != 0) {
-        int row_offset = p * 4;
-        std::swap(b[0], b[p]);
-        std::swap(A[0], A[row_offset]);     std::swap(A[1], A[row_offset + 1]);
-        std::swap(A[2], A[row_offset + 2]); std::swap(A[3], A[row_offset + 3]);
-    }
-
-    // 3. Eliminate Col 0
-    {
-        double inv = 1.0 / A[0];
-        // Row 1
-        double f1 = A[4] * inv;
-        A[5] -= f1 * A[1]; A[6] -= f1 * A[2]; A[7] -= f1 * A[3]; b[1] -= f1 * b[0];
-        // Row 2
-        double f2 = A[8] * inv;
-        A[9] -= f2 * A[1]; A[10] -= f2 * A[2]; A[11] -= f2 * A[3]; b[2] -= f2 * b[0];
-        // Row 3
-        double f3 = A[12] * inv;
-        A[13] -= f3 * A[1]; A[14] -= f3 * A[2]; A[15] -= f3 * A[3]; b[3] -= f3 * b[0];
-    }
-
-    // --- COLUMN 1 ---
-    // 1. Find Pivot in Col 1 (starting from row 1)
-    p = 1;
-    max_val = std::abs(A[5]);
-    
-    val = std::abs(A[9]);  if (val > max_val) { max_val = val; p = 2; }
-    val = std::abs(A[13]); if (val > max_val) { max_val = val; p = 3; }
-
-    if (max_val < 1e-14) return false;
-
-    // 2. Swap Row 1 with Row p
-    if (p != 1) {
-        int row_offset = p * 4;
-        std::swap(b[1], b[p]);
-        // Optimization: Col 0 is already 0, so we only swap cols 1,2,3
-        std::swap(A[5], A[row_offset + 1]);
-        std::swap(A[6], A[row_offset + 2]);
-        std::swap(A[7], A[row_offset + 3]);
-    }
-
-    // 3. Eliminate Col 1
-    {
-        double inv = 1.0 / A[5];
-        // Row 2
-        double f2 = A[9] * inv;
-        A[10] -= f2 * A[6]; A[11] -= f2 * A[7]; b[2] -= f2 * b[1];
-        // Row 3
-        double f3 = A[13] * inv;
-        A[14] -= f3 * A[6]; A[15] -= f3 * A[7]; b[3] -= f3 * b[1];
+inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4])
+{
+  // Helper lambda to access A[row, col]
+  auto at = [&](int r, int c) -> double & {
+    return A[r * 4 + c];
+  };
+
+  // --- FORWARD ELIMINATION with PARTIAL PIVOTING ---
+
+  for (int i = 0; i < 3; ++i) {    // Loop over columns 0, 1, 2
+    // 1. Find the Pivot (Max absolute value in this column)
+    int pivot_row = i;
+    double max_val = std::abs(at(i, i));
+
+    for (int k = i + 1; k < 4; ++k) {
+      double val = std::abs(at(k, i));
+      if (val > max_val) {
+        max_val = val;
+        pivot_row = k;
+      }
     }
 
-    // --- COLUMN 2 ---
-    // 1. Find Pivot in Col 2 (starting from row 2)
-    p = 2;
-    max_val = std::abs(A[10]);
-
-    val = std::abs(A[14]); if (val > max_val) { max_val = val; p = 3; }
-
+    // 2. Singularity Check (The "Flat Particle" Guard)
     if (max_val < 1e-14) return false;
 
-    // 2. Swap Row 2 with Row p
-    if (p != 2) {
-        std::swap(b[2], b[3]);
-        // Optimization: Only swap cols 2,3
-        std::swap(A[10], A[14]);
-        std::swap(A[11], A[15]);
+    // 3. Swap Rows if needed (Swap A rows AND b elements)
+    if (pivot_row != i) {
+      std::swap(b[i], b[pivot_row]);
+      for (int k = i; k < 4; ++k) {    // Only need to swap from column 'i' onwards
+        std::swap(at(i, k), at(pivot_row, k));
+      }
     }
 
-    // 3. Eliminate Col 2
-    {
-        double inv = 1.0 / A[10];
-        // Row 3
-        double f3 = A[14] * inv;
-        A[15] -= f3 * A[11]; b[3] -= f3 * b[2];
+    // 4. Eliminate
+    double inv_pivot = 1.0 / at(i, i);
+    for (int k = i + 1; k < 4; ++k) {
+      double factor = at(k, i) * inv_pivot;
+      // A[k, i] becomes 0, no need to compute it.
+      // Update the rest of the row:
+      for (int j = i + 1; j < 4; ++j) { at(k, j) -= factor * at(i, j); }
+      // Update RHS
+      b[k] -= factor * b[i];
     }
+  }
 
-    // --- BACKWARD SUBSTITUTION ---
-    // Check last pivot
-    if (std::abs(A[15]) < 1e-14) return false;
-
-    double inv3 = 1.0 / A[15];
-    b[3] *= inv3;
-
-    double inv2 = 1.0 / A[10];
-    b[2] = (b[2] - A[11] * b[3]) * inv2;
+  // Final Pivot Check for the last element
+  if (std::abs(at(3, 3)) < 1e-14) return false;
 
-    double inv1 = 1.0 / A[5];
-    b[1] = (b[1] - A[6] * b[2] - A[7] * b[3]) * inv1;
+  // --- BACKWARD SUBSTITUTION ---
+  b[3] /= at(3, 3);
+  b[2] = (b[2] - at(2, 3) * b[3]) / at(2, 2);
+  b[1] = (b[1] - at(1, 2) * b[2] - at(1, 3) * b[3]) / at(1, 1);
+  b[0] = (b[0] - at(0, 1) * b[1] - at(0, 2) * b[2] - at(0, 3) * b[3]) / at(0, 0);
 
-    double inv0 = 1.0 / A[0];
-    b[0] = (b[0] - A[1] * b[1] - A[2] * b[2] - A[3] * b[3]) * inv0;
+  return true;
+}
 
-    return true;
+inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], double b[4])
+{
 
+  // // Tikhonov regularization could be applied here
+  // double trace = A[0] + A[5] + A[10];
+  // A[0]  += TIKHONOV_SCALE * trace;
+  // A[5]  += TIKHONOV_SCALE * trace;
+  // A[10] += TIKHONOV_SCALE * trace;
+
+  // --- COLUMN 0 ---
+  // 1. Find Pivot in Col 0
+  int p = 0;
+  double max_val = std::abs(A[0]);
+  double val;
+
+  val = std::abs(A[4]);
+  if (val > max_val) {
+    max_val = val;
+    p = 1;
+  }
+  val = std::abs(A[8]);
+  if (val > max_val) {
+    max_val = val;
+    p = 2;
+  }
+  val = std::abs(A[12]);
+  if (val > max_val) {
+    max_val = val;
+    p = 3;
+  }
+
+  if (max_val < 1e-14) return false;
+  // 2. Swap Row 0 with Row p
+  if (p != 0) {
+    int row_offset = p * 4;
+    std::swap(b[0], b[p]);
+    std::swap(A[0], A[row_offset]);
+    std::swap(A[1], A[row_offset + 1]);
+    std::swap(A[2], A[row_offset + 2]);
+    std::swap(A[3], A[row_offset + 3]);
+  }
+
+  // 3. Eliminate Col 0
+  {
+    double inv = 1.0 / A[0];
+    // Row 1
+    double f1 = A[4] * inv;
+    A[5] -= f1 * A[1];
+    A[6] -= f1 * A[2];
+    A[7] -= f1 * A[3];
+    b[1] -= f1 * b[0];
+    // Row 2
+    double f2 = A[8] * inv;
+    A[9] -= f2 * A[1];
+    A[10] -= f2 * A[2];
+    A[11] -= f2 * A[3];
+    b[2] -= f2 * b[0];
+    // Row 3
+    double f3 = A[12] * inv;
+    A[13] -= f3 * A[1];
+    A[14] -= f3 * A[2];
+    A[15] -= f3 * A[3];
+    b[3] -= f3 * b[0];
+  }
+
+  // --- COLUMN 1 ---
+  // 1. Find Pivot in Col 1 (starting from row 1)
+  p = 1;
+  max_val = std::abs(A[5]);
+
+  val = std::abs(A[9]);
+  if (val > max_val) {
+    max_val = val;
+    p = 2;
+  }
+  val = std::abs(A[13]);
+  if (val > max_val) {
+    max_val = val;
+    p = 3;
+  }
+
+  if (max_val < 1e-14) return false;
+
+  // 2. Swap Row 1 with Row p
+  if (p != 1) {
+    int row_offset = p * 4;
+    std::swap(b[1], b[p]);
+    // Optimization: Col 0 is already 0, so we only swap cols 1,2,3
+    std::swap(A[5], A[row_offset + 1]);
+    std::swap(A[6], A[row_offset + 2]);
+    std::swap(A[7], A[row_offset + 3]);
+  }
+
+  // 3. Eliminate Col 1
+  {
+    double inv = 1.0 / A[5];
+    // Row 2
+    double f2 = A[9] * inv;
+    A[10] -= f2 * A[6];
+    A[11] -= f2 * A[7];
+    b[2] -= f2 * b[1];
+    // Row 3
+    double f3 = A[13] * inv;
+    A[14] -= f3 * A[6];
+    A[15] -= f3 * A[7];
+    b[3] -= f3 * b[1];
+  }
+
+  // --- COLUMN 2 ---
+  // 1. Find Pivot in Col 2 (starting from row 2)
+  p = 2;
+  max_val = std::abs(A[10]);
+
+  val = std::abs(A[14]);
+  if (val > max_val) {
+    max_val = val;
+    p = 3;
+  }
+
+  if (max_val < 1e-14) return false;
+
+  // 2. Swap Row 2 with Row p
+  if (p != 2) {
+    std::swap(b[2], b[3]);
+    // Optimization: Only swap cols 2,3
+    std::swap(A[10], A[14]);
+    std::swap(A[11], A[15]);
+  }
+
+  // 3. Eliminate Col 2
+  {
+    double inv = 1.0 / A[10];
+    // Row 3
+    double f3 = A[14] * inv;
+    A[15] -= f3 * A[11];
+    b[3] -= f3 * b[2];
+  }
+
+  // --- BACKWARD SUBSTITUTION ---
+  // Check last pivot
+  if (std::abs(A[15]) < 1e-14) return false;
+
+  double inv3 = 1.0 / A[15];
+  b[3] *= inv3;
+
+  double inv2 = 1.0 / A[10];
+  b[2] = (b[2] - A[11] * b[3]) * inv2;
+
+  double inv1 = 1.0 / A[5];
+  b[1] = (b[1] - A[6] * b[2] - A[7] * b[3]) * inv1;
+
+  double inv0 = 1.0 / A[0];
+  b[0] = (b[0] - A[1] * b[1] - A[2] * b[2] - A[3] * b[3]) * inv0;
+
+  return true;
 }
 
-
 // algorithm from https://www.geometrictools.com/Documentation/DynamicCollisionDetection.pdf
 /* * Oriented Bounding Box intersection test.
  * Logic and optimization strategies adapted from LIGGGHTS (CFDEMproject).
@@ -351,525 +417,536 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
  * for temporal coherence.
  */
 inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-    const double* xc1, const double R1[3][3], const double* shape1,
-    const double* xc2, const double R2[3][3], const double* shape2, 
-    double* cached_axis
-){
-    // cache axis is the axis that separated the boxes last time
-    // due to temporal coherence we check it first
-
-    bool separated = false;
-
-    // for orientated bounding boxes we check the 15 separating axes
-    double C[3][3], AbsC[3][3];
-    MathExtra::transpose_times3(R1, R2, C); // C = R1^T * R2
-    for (unsigned int i=0; i<3; i++){
-        for (unsigned int j=0; j<3; j++){
-            AbsC[i][j] = std::fabs(C[i][j]); // for when absolute values are needed
-        }
-    }
+    const double *xc1, const double R1[3][3], const double *shape1, const double *xc2,
+    const double R2[3][3], const double *shape2, double *cached_axis)
+{
+  // cache axis is the axis that separated the boxes last time
+  // due to temporal coherence we check it first
 
-    double center_distance[3];
-    for (unsigned int i=0; i<3; i++){
-        center_distance[i] = xc2[i] - xc1[i];
-    } 
-
-    // Project center distance into both local frames
-    double center_distance_box1[3], center_distance_box2[3];
-    MathExtra::transpose_matvec(R1, center_distance,  center_distance_box1);
-    MathExtra::transpose_matvec(R2, center_distance,  center_distance_box2);
-
-    // first check the cached axis
-    const int axis = (int) (*cached_axis);
-    separated = check_intersection_axis(axis, C, AbsC, center_distance_box1, center_distance_box2, shape1, shape2);
-
-    if (separated) return true;
-    // then check all the other axes
-    for (int axis_id = 0; axis_id < 15; axis_id++){
-        if (axis_id == axis) continue; // already checked
-        separated = check_intersection_axis(axis_id, C, AbsC, center_distance_box1, center_distance_box2, shape1, shape2);
-        if (separated) {
-            *cached_axis = axis_id; // update cached axis
-            return true;
-        }
-    }
-    return false; // no separation found
-}
+  bool separated = false;
 
-inline bool MathExtraSuperellipsoids::check_intersection_axis(
-    const int axis_id, const double C[3][3], const double AbsC[3][3], 
-    const double* center_distance_box1, const double* center_distance_box2,
-    const double* a, const double* b
-){
-    // here axis_id goes from 0 to 14
-    // a and b are the half-sizes of the boxes along their local axes
-    // returns true if there is a separation along this axis
-    // changes the cached axis if separation found
-    double R1, R2, R;
-
-    switch(axis_id){
-        case 0: // A0
-            R1 = a[0];
-            R2 = b[0] * AbsC[0][0] + b[1] * AbsC[0][1] + b[2] * AbsC[0][2];
-            R = std::fabs(center_distance_box1[0]);
-            break;
-        case 1: // A1
-            R1 = a[1];
-            R2 = b[0] * AbsC[1][0] + b[1] * AbsC[1][1] + b[2] * AbsC[1][2];
-            R = std::fabs(center_distance_box1[1]);
-            break;
-        case 2: // A2
-            R1 = a[2];
-            R2 = b[0] * AbsC[2][0] + b[1] * AbsC[2][1] + b[2] * AbsC[2][2];
-            R = std::fabs(center_distance_box1[2]);
-            break;
-        case 3: // B0
-            R1 = a[0] * AbsC[0][0] + a[1] * AbsC[1][0] + a[2] * AbsC[2][0];
-            R2 = b[0];
-            R = std::fabs(center_distance_box2[0]);
-            break;
-        case 4: // B1
-            R1 = a[0] * AbsC[0][1] + a[1] * AbsC[1][1] + a[2] * AbsC[2][1];
-            R2 = b[1];
-            R = std::fabs(center_distance_box2[1]);
-            break;
-        case 5: // B2
-            R1 = a[0] * AbsC[0][2] + a[1] * AbsC[1][2] + a[2] * AbsC[2][2];
-            R2 = b[2];
-            R = std::fabs(center_distance_box2[2]);
-            break;
-        case 6: // A0 x B0
-            R1 = a[1] * AbsC[2][0] + a[2] * AbsC[1][0];
-            R2 = b[1] * AbsC[0][2] + b[2] * AbsC[0][1];
-            R = std::fabs(center_distance_box1[2] * C[1][0] - center_distance_box1[1] * C[2][0]);
-            break;
-        case 7: // A0 x B1
-            R1 = a[1] * AbsC[2][1] + a[2] * AbsC[1][1];
-            R2 = b[0] * AbsC[0][2] + b[2] * AbsC[0][0];
-            R = std::fabs(center_distance_box1[2] * C[1][1] - center_distance_box1[1] * C[2][1]);
-            break;
-        case 8: // A0 x B2
-            R1 = a[1] * AbsC[2][2] + a[2] * AbsC[1][2];
-            R2 = b[0] * AbsC[0][1] + b[1] * AbsC[0][0];
-            R = std::fabs(center_distance_box1[2] * C[1][2] - center_distance_box1[1] * C[2][2]);
-            break;
-        case 9: // A1 x B0
-            R1 = a[0] * AbsC[2][0] + a[2] * AbsC[0][0];
-            R2 = b[1] * AbsC[1][2] + b[2] * AbsC[1][1];
-            R = std::fabs(center_distance_box1[0] * C[2][0] - center_distance_box1[2] * C[0][0]);
-            break;
-        case 10: // A1 x B1
-            R1 = a[0] * AbsC[2][1] + a[2] * AbsC[0][1];
-            R2 = b[0] * AbsC[1][2] + b[2] * AbsC[1][0];
-            R = std::fabs(center_distance_box1[0] * C[2][1] - center_distance_box1[2] * C[0][1]);
-            break;
-        case 11: // A1 x B2
-            R1 = a[0] * AbsC[2][2] + a[2] * AbsC[0][2];
-            R2 = b[0] * AbsC[1][1] + b[1] * AbsC[1][0];
-            R = std::fabs(center_distance_box1[0] * C[2][2] - center_distance_box1[2] * C[0][2]);
-            break;
-        case 12: // A2 x B0
-            R1 = a[0] * AbsC[1][0] + a[1] * AbsC[0][0];
-            R2 = b[1] * AbsC[2][2] + b[2] * AbsC[2][1];
-            R = std::fabs(center_distance_box1[1] * C[0][0] - center_distance_box1[0] * C[1][0]);
-            break;
-        case 13: // A2 x B1
-            R1 = a[0] * AbsC[1][1] + a[1] * AbsC[0][1];
-            R2 = b[0] * AbsC[2][2] + b[2] * AbsC[2][0];
-            R = std::fabs(center_distance_box1[1] * C[0][1] - center_distance_box1[0] * C[1][1]);
-            break;
-        case 14: // A2 x B2
-            R1 = a[0] * AbsC[1][2] + a[1] * AbsC[0][2];
-            R2 = b[0] * AbsC[2][1] + b[1] * AbsC[2][0];
-            R = std::fabs(center_distance_box1[1] * C[0][2] - center_distance_box1[0] * C[1][2]);
-            break;
+  // for orientated bounding boxes we check the 15 separating axes
+  double C[3][3], AbsC[3][3];
+  MathExtra::transpose_times3(R1, R2, C);    // C = R1^T * R2
+  for (unsigned int i = 0; i < 3; i++) {
+    for (unsigned int j = 0; j < 3; j++) {
+      AbsC[i][j] = std::fabs(C[i][j]);    // for when absolute values are needed
     }
-
-    if (R > R1 + R2){
-        return true; // separation found
-    } else {
-        return false; // no separation
+  }
+
+  double center_distance[3];
+  for (unsigned int i = 0; i < 3; i++) { center_distance[i] = xc2[i] - xc1[i]; }
+
+  // Project center distance into both local frames
+  double center_distance_box1[3], center_distance_box2[3];
+  MathExtra::transpose_matvec(R1, center_distance, center_distance_box1);
+  MathExtra::transpose_matvec(R2, center_distance, center_distance_box2);
+
+  // first check the cached axis
+  const int axis = (int) (*cached_axis);
+  separated = check_intersection_axis(axis, C, AbsC, center_distance_box1, center_distance_box2,
+                                      shape1, shape2);
+
+  if (separated) return true;
+  // then check all the other axes
+  for (int axis_id = 0; axis_id < 15; axis_id++) {
+    if (axis_id == axis) continue;    // already checked
+    separated = check_intersection_axis(axis_id, C, AbsC, center_distance_box1,
+                                        center_distance_box2, shape1, shape2);
+    if (separated) {
+      *cached_axis = axis_id;    // update cached axis
+      return true;
     }
+  }
+  return false;    // no separation found
 }
 
+inline bool MathExtraSuperellipsoids::check_intersection_axis(const int axis_id,
+                                                              const double C[3][3],
+                                                              const double AbsC[3][3],
+                                                              const double *center_distance_box1,
+                                                              const double *center_distance_box2,
+                                                              const double *a, const double *b)
+{
+  // here axis_id goes from 0 to 14
+  // a and b are the half-sizes of the boxes along their local axes
+  // returns true if there is a separation along this axis
+  // changes the cached axis if separation found
+  double R1, R2, R;
+
+  switch (axis_id) {
+    case 0:    // A0
+      R1 = a[0];
+      R2 = b[0] * AbsC[0][0] + b[1] * AbsC[0][1] + b[2] * AbsC[0][2];
+      R = std::fabs(center_distance_box1[0]);
+      break;
+    case 1:    // A1
+      R1 = a[1];
+      R2 = b[0] * AbsC[1][0] + b[1] * AbsC[1][1] + b[2] * AbsC[1][2];
+      R = std::fabs(center_distance_box1[1]);
+      break;
+    case 2:    // A2
+      R1 = a[2];
+      R2 = b[0] * AbsC[2][0] + b[1] * AbsC[2][1] + b[2] * AbsC[2][2];
+      R = std::fabs(center_distance_box1[2]);
+      break;
+    case 3:    // B0
+      R1 = a[0] * AbsC[0][0] + a[1] * AbsC[1][0] + a[2] * AbsC[2][0];
+      R2 = b[0];
+      R = std::fabs(center_distance_box2[0]);
+      break;
+    case 4:    // B1
+      R1 = a[0] * AbsC[0][1] + a[1] * AbsC[1][1] + a[2] * AbsC[2][1];
+      R2 = b[1];
+      R = std::fabs(center_distance_box2[1]);
+      break;
+    case 5:    // B2
+      R1 = a[0] * AbsC[0][2] + a[1] * AbsC[1][2] + a[2] * AbsC[2][2];
+      R2 = b[2];
+      R = std::fabs(center_distance_box2[2]);
+      break;
+    case 6:    // A0 x B0
+      R1 = a[1] * AbsC[2][0] + a[2] * AbsC[1][0];
+      R2 = b[1] * AbsC[0][2] + b[2] * AbsC[0][1];
+      R = std::fabs(center_distance_box1[2] * C[1][0] - center_distance_box1[1] * C[2][0]);
+      break;
+    case 7:    // A0 x B1
+      R1 = a[1] * AbsC[2][1] + a[2] * AbsC[1][1];
+      R2 = b[0] * AbsC[0][2] + b[2] * AbsC[0][0];
+      R = std::fabs(center_distance_box1[2] * C[1][1] - center_distance_box1[1] * C[2][1]);
+      break;
+    case 8:    // A0 x B2
+      R1 = a[1] * AbsC[2][2] + a[2] * AbsC[1][2];
+      R2 = b[0] * AbsC[0][1] + b[1] * AbsC[0][0];
+      R = std::fabs(center_distance_box1[2] * C[1][2] - center_distance_box1[1] * C[2][2]);
+      break;
+    case 9:    // A1 x B0
+      R1 = a[0] * AbsC[2][0] + a[2] * AbsC[0][0];
+      R2 = b[1] * AbsC[1][2] + b[2] * AbsC[1][1];
+      R = std::fabs(center_distance_box1[0] * C[2][0] - center_distance_box1[2] * C[0][0]);
+      break;
+    case 10:    // A1 x B1
+      R1 = a[0] * AbsC[2][1] + a[2] * AbsC[0][1];
+      R2 = b[0] * AbsC[1][2] + b[2] * AbsC[1][0];
+      R = std::fabs(center_distance_box1[0] * C[2][1] - center_distance_box1[2] * C[0][1]);
+      break;
+    case 11:    // A1 x B2
+      R1 = a[0] * AbsC[2][2] + a[2] * AbsC[0][2];
+      R2 = b[0] * AbsC[1][1] + b[1] * AbsC[1][0];
+      R = std::fabs(center_distance_box1[0] * C[2][2] - center_distance_box1[2] * C[0][2]);
+      break;
+    case 12:    // A2 x B0
+      R1 = a[0] * AbsC[1][0] + a[1] * AbsC[0][0];
+      R2 = b[1] * AbsC[2][2] + b[2] * AbsC[2][1];
+      R = std::fabs(center_distance_box1[1] * C[0][0] - center_distance_box1[0] * C[1][0]);
+      break;
+    case 13:    // A2 x B1
+      R1 = a[0] * AbsC[1][1] + a[1] * AbsC[0][1];
+      R2 = b[0] * AbsC[2][2] + b[2] * AbsC[2][0];
+      R = std::fabs(center_distance_box1[1] * C[0][1] - center_distance_box1[0] * C[1][1]);
+      break;
+    case 14:    // A2 x B2
+      R1 = a[0] * AbsC[1][2] + a[1] * AbsC[0][2];
+      R2 = b[0] * AbsC[2][1] + b[1] * AbsC[2][0];
+      R = std::fabs(center_distance_box1[1] * C[0][2] - center_distance_box1[0] * C[1][2]);
+      break;
+  }
+
+  if (R > R1 + R2) {
+    return true;    // separation found
+  } else {
+    return false;    // no separation
+  }
+}
 
 inline bool MathExtraSuperellipsoids::check_intersection_axis_and_get_seed(
-    const double* xc1, const double R1[3][3], const double* shape1,
-    const double* xc2, const double R2[3][3], const double* shape2, 
-    double* cached_axis, double* contact_point
-){  
-    // cache axis is the axis that separated the boxes last time
-    // due to temporal coherence we check it first
-
-    double C[3][3], AbsC[3][3];
-    MathExtra::transpose_times3(R1, R2, C); // C = R1^T * R2
-    
-    // for orientated bounding boxes we check the 15 separating axes
-    const double eps = 1e-20;
-    for (unsigned int i=0; i<3; i++){
-        for (unsigned int j=0; j<3; j++){
-            AbsC[i][j] = std::fabs(C[i][j]) + eps; // Add epsilon to prevent division by zero in edge cases
-        }
+    const double *xc1, const double R1[3][3], const double *shape1, const double *xc2,
+    const double R2[3][3], const double *shape2, double *cached_axis, double *contact_point)
+{
+  // cache axis is the axis that separated the boxes last time
+  // due to temporal coherence we check it first
+
+  double C[3][3], AbsC[3][3];
+  MathExtra::transpose_times3(R1, R2, C);    // C = R1^T * R2
+
+  // for orientated bounding boxes we check the 15 separating axes
+  const double eps = 1e-20;
+  for (unsigned int i = 0; i < 3; i++) {
+    for (unsigned int j = 0; j < 3; j++) {
+      AbsC[i][j] =
+          std::fabs(C[i][j]) + eps;    // Add epsilon to prevent division by zero in edge cases
     }
+  }
+
+  double center_distance[3];    // Center distance in Global Frame
+  for (unsigned int i = 0; i < 3; i++) { center_distance[i] = xc2[i] - xc1[i]; }
+
+  // Project center distance into both local frames
+  double center_distance_box1[3], center_distance_box2[3];
+  MathExtra::transpose_matvec(R1, center_distance, center_distance_box1);
+  MathExtra::transpose_matvec(R2, center_distance, center_distance_box2);
+
+  int best_axis = -1;
+  double min_overlap = 0.0;
+  const double edge_bias = 1.05;    // Prefer face contacts over edge contacts
+
+  // Lambda to test an axis. Returns TRUE if SEPARATED.
+  // I was reading that lambdas can be optimized away by the compiler.
+  // and have less overhead than function calls.
+  auto test_axis_separated = [&](int i) -> bool {
+    double R1_rad, R2_rad, dist, overlap;
+
+    // Switch is efficient here; compiler generates a jump table.
+    switch (i) {
+      case 0:    // A0
+        R1_rad = shape1[0];
+        R2_rad = shape2[0] * AbsC[0][0] + shape2[1] * AbsC[0][1] + shape2[2] * AbsC[0][2];
+        dist = std::fabs(center_distance_box1[0]);
+        break;
+      case 1:    // A1
+        R1_rad = shape1[1];
+        R2_rad = shape2[0] * AbsC[1][0] + shape2[1] * AbsC[1][1] + shape2[2] * AbsC[1][2];
+        dist = std::fabs(center_distance_box1[1]);
+        break;
+      case 2:    // A2
+        R1_rad = shape1[2];
+        R2_rad = shape2[0] * AbsC[2][0] + shape2[1] * AbsC[2][1] + shape2[2] * AbsC[2][2];
+        dist = std::fabs(center_distance_box1[2]);
+        break;
+      case 3:    // B0
+        R1_rad = shape1[0] * AbsC[0][0] + shape1[1] * AbsC[1][0] + shape1[2] * AbsC[2][0];
+        R2_rad = shape2[0];
+        dist = std::fabs(center_distance_box2[0]);
+        break;
+      case 4:    // B1
+        R1_rad = shape1[0] * AbsC[0][1] + shape1[1] * AbsC[1][1] + shape1[2] * AbsC[2][1];
+        R2_rad = shape2[1];
+        dist = std::fabs(center_distance_box2[1]);
+        break;
+      case 5:    // B2
+        R1_rad = shape1[0] * AbsC[0][2] + shape1[1] * AbsC[1][2] + shape1[2] * AbsC[2][2];
+        R2_rad = shape2[2];
+        dist = std::fabs(center_distance_box2[2]);
+        break;
+      case 6:    // A0 x B0
+        R1_rad = shape1[1] * AbsC[2][0] + shape1[2] * AbsC[1][0];
+        R2_rad = shape2[1] * AbsC[0][2] + shape2[2] * AbsC[0][1];
+        dist = std::fabs(center_distance_box1[2] * C[1][0] - center_distance_box1[1] * C[2][0]);
+        break;
+      case 7:    // A0 x B1
+        R1_rad = shape1[1] * AbsC[2][1] + shape1[2] * AbsC[1][1];
+        R2_rad = shape2[0] * AbsC[0][2] + shape2[2] * AbsC[0][0];
+        dist = std::fabs(center_distance_box1[2] * C[1][1] - center_distance_box1[1] * C[2][1]);
+        break;
+      case 8:    // A0 x B2
+        R1_rad = shape1[1] * AbsC[2][2] + shape1[2] * AbsC[1][2];
+        R2_rad = shape2[0] * AbsC[0][1] + shape2[1] * AbsC[0][0];
+        dist = std::fabs(center_distance_box1[2] * C[1][2] - center_distance_box1[1] * C[2][2]);
+        break;
+      case 9:    // A1 x B0
+        R1_rad = shape1[0] * AbsC[2][0] + shape1[2] * AbsC[0][0];
+        R2_rad = shape2[1] * AbsC[1][2] + shape2[2] * AbsC[1][1];
+        dist = std::fabs(center_distance_box1[0] * C[2][0] - center_distance_box1[2] * C[0][0]);
+        break;
+      case 10:    // A1 x B1
+        R1_rad = shape1[0] * AbsC[2][1] + shape1[2] * AbsC[0][1];
+        R2_rad = shape2[0] * AbsC[1][2] + shape2[2] * AbsC[1][0];
+        dist = std::fabs(center_distance_box1[0] * C[2][1] - center_distance_box1[2] * C[0][1]);
+        break;
+      case 11:    // A1 x B2
+        R1_rad = shape1[0] * AbsC[2][2] + shape1[2] * AbsC[0][2];
+        R2_rad = shape2[0] * AbsC[1][1] + shape2[1] * AbsC[1][0];
+        dist = std::fabs(center_distance_box1[0] * C[2][2] - center_distance_box1[2] * C[0][2]);
+        break;
+      case 12:    // A2 x B0
+        R1_rad = shape1[0] * AbsC[1][0] + shape1[1] * AbsC[0][0];
+        R2_rad = shape2[1] * AbsC[2][2] + shape2[2] * AbsC[2][1];
+        dist = std::fabs(center_distance_box1[1] * C[0][0] - center_distance_box1[0] * C[1][0]);
+        break;
+      case 13:    // A2 x B1
+        R1_rad = shape1[0] * AbsC[1][1] + shape1[1] * AbsC[0][1];
+        R2_rad = shape2[0] * AbsC[2][2] + shape2[2] * AbsC[2][0];
+        dist = std::fabs(center_distance_box1[1] * C[0][1] - center_distance_box1[0] * C[1][1]);
+        break;
+      case 14:    // A2 x B2
+        R1_rad = shape1[0] * AbsC[1][2] + shape1[1] * AbsC[0][2];
+        R2_rad = shape2[0] * AbsC[2][1] + shape2[1] * AbsC[2][0];
+        dist = std::fabs(center_distance_box1[1] * C[0][2] - center_distance_box1[0] * C[1][2]);
+        break;
+      default:
+        return false;
+    }
+
+    if (dist > R1_rad + R2_rad) return true;    // Separated!
 
-    double center_distance[3]; // Center distance in Global Frame
-        for (unsigned int i=0; i<3; i++){
-        center_distance[i] = xc2[i] - xc1[i];
-    } 
-
-    // Project center distance into both local frames
-    double center_distance_box1[3], center_distance_box2[3];
-    MathExtra::transpose_matvec(R1, center_distance, center_distance_box1);
-    MathExtra::transpose_matvec(R2, center_distance, center_distance_box2);
-
-    int best_axis = -1;
-    double min_overlap = 0.0;
-    const double edge_bias = 1.05; // Prefer face contacts over edge contacts
-
-    // Lambda to test an axis. Returns TRUE if SEPARATED.
-    // I was reading that lambdas can be optimized away by the compiler.
-    // and have less overhead than function calls.
-    auto test_axis_separated = [&](int i) -> bool {
-        double R1_rad, R2_rad, dist, overlap;
-
-        // Switch is efficient here; compiler generates a jump table.
-        switch(i){
-            case 0: // A0
-                R1_rad = shape1[0];
-                R2_rad = shape2[0] * AbsC[0][0] + shape2[1] * AbsC[0][1] + shape2[2] * AbsC[0][2];
-                dist = std::fabs(center_distance_box1[0]);
-                break;
-            case 1: // A1
-                R1_rad = shape1[1];
-                R2_rad = shape2[0] * AbsC[1][0] + shape2[1] * AbsC[1][1] + shape2[2] * AbsC[1][2];
-                dist = std::fabs(center_distance_box1[1]);
-                break;
-            case 2: // A2
-                R1_rad = shape1[2];
-                R2_rad = shape2[0] * AbsC[2][0] + shape2[1] * AbsC[2][1] + shape2[2] * AbsC[2][2];
-                dist = std::fabs(center_distance_box1[2]);
-                break;
-            case 3: // B0
-                R1_rad = shape1[0] * AbsC[0][0] + shape1[1] * AbsC[1][0] + shape1[2] * AbsC[2][0];
-                R2_rad = shape2[0];
-                dist = std::fabs(center_distance_box2[0]);
-                break;
-            case 4: // B1
-                R1_rad = shape1[0] * AbsC[0][1] + shape1[1] * AbsC[1][1] + shape1[2] * AbsC[2][1];
-                R2_rad = shape2[1];
-                dist = std::fabs(center_distance_box2[1]);
-                break;
-            case 5: // B2
-                R1_rad = shape1[0] * AbsC[0][2] + shape1[1] * AbsC[1][2] + shape1[2] * AbsC[2][2];
-                R2_rad = shape2[2];
-                dist = std::fabs(center_distance_box2[2]);
-                break;
-            case 6: // A0 x B0
-                R1_rad = shape1[1] * AbsC[2][0] + shape1[2] * AbsC[1][0];
-                R2_rad = shape2[1] * AbsC[0][2] + shape2[2] * AbsC[0][1];
-                dist = std::fabs(center_distance_box1[2] * C[1][0] - center_distance_box1[1] * C[2][0]);
-                break;
-            case 7: // A0 x B1
-                R1_rad = shape1[1] * AbsC[2][1] + shape1[2] * AbsC[1][1];
-                R2_rad = shape2[0] * AbsC[0][2] + shape2[2] * AbsC[0][0];
-                dist = std::fabs(center_distance_box1[2] * C[1][1] - center_distance_box1[1] * C[2][1]);
-                break;
-            case 8: // A0 x B2
-                R1_rad = shape1[1] * AbsC[2][2] + shape1[2] * AbsC[1][2];
-                R2_rad = shape2[0] * AbsC[0][1] + shape2[1] * AbsC[0][0];
-                dist = std::fabs(center_distance_box1[2] * C[1][2] - center_distance_box1[1] * C[2][2]);
-                break;
-            case 9: // A1 x B0
-                R1_rad = shape1[0] * AbsC[2][0] + shape1[2] * AbsC[0][0];
-                R2_rad = shape2[1] * AbsC[1][2] + shape2[2] * AbsC[1][1];
-                dist = std::fabs(center_distance_box1[0] * C[2][0] - center_distance_box1[2] * C[0][0]);
-                break;
-            case 10: // A1 x B1
-                R1_rad = shape1[0] * AbsC[2][1] + shape1[2] * AbsC[0][1];
-                R2_rad = shape2[0] * AbsC[1][2] + shape2[2] * AbsC[1][0];
-                dist = std::fabs(center_distance_box1[0] * C[2][1] - center_distance_box1[2] * C[0][1]);
-                break;
-            case 11: // A1 x B2
-                R1_rad = shape1[0] * AbsC[2][2] + shape1[2] * AbsC[0][2];
-                R2_rad = shape2[0] * AbsC[1][1] + shape2[1] * AbsC[1][0];
-                dist = std::fabs(center_distance_box1[0] * C[2][2] - center_distance_box1[2] * C[0][2]);
-                break;
-            case 12: // A2 x B0
-                R1_rad = shape1[0] * AbsC[1][0] + shape1[1] * AbsC[0][0];
-                R2_rad = shape2[1] * AbsC[2][2] + shape2[2] * AbsC[2][1];
-                dist = std::fabs(center_distance_box1[1] * C[0][0] - center_distance_box1[0] * C[1][0]);
-                break;
-            case 13: // A2 x B1
-                R1_rad = shape1[0] * AbsC[1][1] + shape1[1] * AbsC[0][1];
-                R2_rad = shape2[0] * AbsC[2][2] + shape2[2] * AbsC[2][0];
-                dist = std::fabs(center_distance_box1[1] * C[0][1] - center_distance_box1[0] * C[1][1]);
-                break;
-            case 14: // A2 x B2
-                R1_rad = shape1[0] * AbsC[1][2] + shape1[1] * AbsC[0][2];
-                R2_rad = shape2[0] * AbsC[2][1] + shape2[1] * AbsC[2][0];
-                dist = std::fabs(center_distance_box1[1] * C[0][2] - center_distance_box1[0] * C[1][2]);
-                break;
-            default: return false;
-        }
-
-        if (dist > R1_rad + R2_rad) return true; // Separated!
-
-        // If not separated, track the overlap depth
-        overlap = (R1_rad + R2_rad) - dist;
-        
-        // Bias: Penalize edge axes slightly to prefer stable face contacts
-        if (i >= 6) overlap *= edge_bias;
-
-        if (overlap < min_overlap) {
-            min_overlap = overlap;
-            best_axis = i;
-        }
-        return false; // Not separated
-    };
-
-    // Check Cached Axis First (Temporal Coherence)
-    int c_axis = (int)(*cached_axis);
-    if (test_axis_separated(c_axis)) return false; 
-
-    // Check remaining axes
-    for (int i = 0; i < 15; i++){
-        if (i == c_axis) continue;
-        if (test_axis_separated(i)) {
-            *cached_axis = (double)i;
-            return false;
-        }
+    // If not separated, track the overlap depth
+    overlap = (R1_rad + R2_rad) - dist;
+
+    // Bias: Penalize edge axes slightly to prefer stable face contacts
+    if (i >= 6) overlap *= edge_bias;
+
+    if (overlap < min_overlap) {
+      min_overlap = overlap;
+      best_axis = i;
     }
-   
-    // If we reached here, 'best_axis' holds the axis index where the overlap is minimal
-    if (best_axis < 6) {
-        // Face-to-Face contact logic: Project "Incident" box onto "Reference" face, clip to find overlap center.
-        // Pointers to define who is Reference (the face) and who is Incident
-        const double* posRef = xc1;
-        const double* posInc = xc2;
-        const double (*RRef)[3] = R1;
-        const double (*RInc)[3] = R2;
-        const double* shapeRef = shape1;
-        const double* shapeInc = shape2;
-        double* D_local_Ref = center_distance_box1; // Center dist in Ref frame
-
-        int axis = best_axis; 
-
-        // Swap if Reference is Box 2 (Indices 3, 4, 5)
-        if (best_axis >= 3) {
-            posRef = xc2;
-            posInc = xc1;
-            RRef = R2;
-            RInc = R1;
-            shapeRef = shape2;
-            shapeInc = shape1;
-            D_local_Ref = center_distance_box2;
-            axis -= 3;
-        }
-
-        double seed_local[3];
-
-        //Normal Component: Midway through the penetration depth
-        // Calculate projected radius of Incident block onto this axis
-        
-        double dir = (D_local_Ref[axis] > 0) ? 1.0 : -1.0;
-        double radInc_proj = 0.0;
-        for(int k=0; k<3; k++) {
-            // If swapped (Box 2 is Ref), we need AbsC^T, so we swap AbsC indices
-            double val = (best_axis < 3) ? AbsC[axis][k] : AbsC[k][axis];
-            radInc_proj += shapeInc[k] * val;
-        }
-
-        double surfRef = dir * shapeRef[axis];
-        double surfInc = D_local_Ref[axis] - (dir * radInc_proj);
-        seed_local[axis] = 0.5 * (surfRef + surfInc);
-
-        // Lateral Components: 1D Interval Overlap
-        for(int k=0; k<3; k++) {
-            if (k == axis) continue; // Skip the normal axis
-
-            double minRef = -shapeRef[k];
-            double maxRef =  shapeRef[k];
-
-            double radInc = 0.0;
-            for(int j=0; j<3; j++) {
-                double val = (best_axis < 3) ? AbsC[k][j] : AbsC[j][k]; 
-                radInc += shapeInc[j] * val;
-            }
-            double centerInc = D_local_Ref[k];
-            
-            double minInc = centerInc - radInc;
-            double maxInc = centerInc + radInc;
-
-            // Find intersection of intervals [minRef, maxRef] and [minInc, maxInc]
-            double start = (minRef > minInc) ? minRef : minInc; 
-            double end   = (maxRef < maxInc) ? maxRef : maxInc; 
-            seed_local[k] = 0.5 * (start + end); // Midpoint of overlap
-
-        }
-
-        // Transform Local Seed -> World Space
-        MathExtra::matvec(RRef, seed_local, contact_point);
-        for(int k=0; k<3; k++) contact_point[k] += posRef[k];
-    } 
-    else {
-        // Edge-to-edge contact logic: Midpoint of the closest points on the two skew edge lines.
-        // The logic is that index 6 corresponds to A_0 x B_0, 7 to A_0 x B_1, ..., 14 to A_2 x B_2
-        int edgeA_idx = (best_axis - 6) / 3;
-        int edgeB_idx = (best_axis - 6) % 3;
-
-        // Get World directions of the edges
-        double u[3] = { R1[0][edgeA_idx], R1[1][edgeA_idx], R1[2][edgeA_idx] };
-        double v[3] = { R2[0][edgeB_idx], R2[1][edgeB_idx], R2[2][edgeB_idx] };
-
-        // Identify the specific edges by checking the normal direction
-        // The normal N is roughly the distance vector center_distance for the closest edges
-        double N_loc1[3], N_loc2[3];
-        MathExtra::transpose_matvec(R1, center_distance, N_loc1);
-        MathExtra::transpose_matvec(R2, center_distance, N_loc2);
-
-        // Find Center of Edge A in World Space
-        double midA[3]; for(int k=0; k<3; k++) midA[k] = xc1[k];
-        for(int k=0; k<3; k++){
-            if(k == edgeA_idx) continue;
-            // Move to the face pointing towards B
-            double sign = (N_loc1[k] > 0) ? 1.0 : -1.0;
-            double offset = sign * shape1[k];
-            midA[0] += R1[0][k]*offset; midA[1] += R1[1][k]*offset; midA[2] += R1[2][k]*offset;
-        }
-
-        // Find Center of Edge B in World Space
-        double midB[3]; for(int k=0; k<3; k++) midB[k] = xc2[k];
-        for(int k=0; k<3; k++){
-            if(k == edgeB_idx) continue;
-            // Move to the face pointing away from A (Since center_distance is A->B, we check -N_loc2)
-            double sign = (N_loc2[k] < 0) ? 1.0 : -1.0; 
-            double offset = sign * shape2[k];
-            midB[0] += R2[0][k]*offset; midB[1] += R2[1][k]*offset; midB[2] += R2[2][k]*offset;
-        }
-
-        // Closest Points on Two Skew Lines 
-        // Line1 parameterized by s: P_A = midA + s*u
-        // Line2 parameterized by t: P_B = midB + t*v
-        double r[3] = { midB[0]-midA[0], midB[1]-midA[1], midB[2]-midA[2] };
-        double u_dot_v = u[0]*v[0]+u[1]*v[1]+u[2]*v[2];
-        double u_dot_r = u[0]*r[0]+u[1]*r[1]+u[2]*r[2];
-        double v_dot_r = v[0]*r[0]+v[1]*r[1]+v[2]*r[2];
-        
-        // Denom is 1 - (u.v)^2 because u and v are unit vectors
-        double denom = 1.0 - u_dot_v*u_dot_v + eps; 
-        double s = (u_dot_r - u_dot_v * v_dot_r) / denom;
-        double t = (u_dot_v * u_dot_r - v_dot_r) / denom; // Note: simplified derivation
-
-        // Compute World Points
-        double PA[3] = { midA[0]+s*u[0], midA[1]+s*u[1], midA[2]+s*u[2] };
-        double PB[3] = { midB[0]+t*v[0], midB[1]+t*v[1], midB[2]+t*v[2] };
-
-        // Seed is the midpoint
-        for(int k=0; k<3; k++) contact_point[k] = 0.5 * (PA[k] + PB[k]);
+    return false;    // Not separated
+  };
+
+  // Check Cached Axis First (Temporal Coherence)
+  int c_axis = (int) (*cached_axis);
+  if (test_axis_separated(c_axis)) return false;
+
+  // Check remaining axes
+  for (int i = 0; i < 15; i++) {
+    if (i == c_axis) continue;
+    if (test_axis_separated(i)) {
+      *cached_axis = (double) i;
+      return false;
+    }
+  }
+
+  // If we reached here, 'best_axis' holds the axis index where the overlap is minimal
+  if (best_axis < 6) {
+    // Face-to-Face contact logic: Project "Incident" box onto "Reference" face, clip to find overlap center.
+    // Pointers to define who is Reference (the face) and who is Incident
+    const double *posRef = xc1;
+    const double *posInc = xc2;
+    const double(*RRef)[3] = R1;
+    const double(*RInc)[3] = R2;
+    const double *shapeRef = shape1;
+    const double *shapeInc = shape2;
+    double *D_local_Ref = center_distance_box1;    // Center dist in Ref frame
+
+    int axis = best_axis;
+
+    // Swap if Reference is Box 2 (Indices 3, 4, 5)
+    if (best_axis >= 3) {
+      posRef = xc2;
+      posInc = xc1;
+      RRef = R2;
+      RInc = R1;
+      shapeRef = shape2;
+      shapeInc = shape1;
+      D_local_Ref = center_distance_box2;
+      axis -= 3;
     }
 
-    return true; // Collision confirmed
-}
+    double seed_local[3];
 
-inline int MathExtraSuperellipsoids::determine_contact_point_wall(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
-                                        const double* x_wall, const double* n_wall, double* X0, double* nij, double* overlap){
-    //x_wall is a point on the wall TODO: is this actually stored somewhere?
-    // n_wall is the wall normal pointing from wall to particle in the global frame
-    // We might hav to change the fix wall gran files to achieve contact with the wall.
-    // I implemented the function but we might not use it.
-    // Unlike for particle-particle contacts, here we get directly the overlap value.
-
-    double n_local[3];
-    // Transform wall normal into local frame
-    // If n_wall points from Wall->Particle, we want surface normal -n_wall.
-    double n_search[3] = {-n_wall[0], -n_wall[1], -n_wall[2]};
-    MathExtra::transpose_matvec(Ri, n_search, n_local);
-    
-    double nx = n_local[0], ny = n_local[1], nz = n_local[2];
-    double a = shapei[0], b = shapei[1], c = shapei[2];
-    double X0_local[3];
-
-    // Calculate Deepest Point
-    if (flagi == 0){ 
-        // Ellipsoid
-        double norm = std::sqrt(a*a*nx*nx + b*b*ny*ny + c*c*nz*nz);
-        double inv_norm = (norm > 1e-14) ? 1.0/norm : 0.0;
-
-        X0_local[0] = a*a * nx * inv_norm;
-        X0_local[1] = b*b * ny * inv_norm;
-        X0_local[2] = c*c * nz * inv_norm;
-    }
-    else{ 
-        // General Superellipsoid
-        double nx_abs = std::fabs(nx);
-        double ny_abs = std::fabs(ny);
-        double nz_abs = std::fabs(nz);
-        double n1 = blocki[0];
-        double n2 = blocki[1];
-        
-        double x, y, z;
-
-        if (nx_abs < 1e-14 && ny_abs < 1e-14) {
-            x = 0.0; y = 0.0; 
-            z = c * ((nz > 0) ? 1.0 : -1.0);
-        } 
-        else {
-            double p2 = 1.0 / (n2 - 1.0);
-            double p1 = 1.0 / (n1 - 1.0);
-            
-            if (nx_abs > ny_abs) {
-                double alpha = std::pow((b * ny_abs) / (a * nx_abs), p2);
-                double gamma = std::pow(1.0 + std::pow(alpha, n2), n1/n2 - 1.0);
-                double beta = std::pow((c * nz_abs) / (a * nx_abs) * gamma, p1);
-                
-                double den = std::pow(std::pow(1.0 + std::pow(alpha, n2), n1/n2) + std::pow(beta, n1), 1.0/n1);
-                x = 1.0 / den;
-                y = alpha * x;
-                z = beta * x;
-            } else {
-                double alpha = std::pow((a * nx_abs) / (b * ny_abs), p2);
-                double gamma = std::pow(1.0 + std::pow(alpha, n2), n1/n2 - 1.0);
-                double beta = std::pow((c * nz_abs) / (b * ny_abs) * gamma, p1);
-                
-                double den = std::pow(std::pow(1.0 + std::pow(alpha, n2), n1/n2) + std::pow(beta, n1), 1.0/n1);
-                y = 1.0 / den;
-                x = alpha * y;
-                z = beta * y;
-            }
-            
-            x *= a; y *= b; z *= c;
-            
-            if (n_local[0] < 0) x = -x;
-            if (n_local[1] < 0) y = -y;
-            if (n_local[2] < 0) z = -z;
-        }
-        X0_local[0] = x; X0_local[1] = y; X0_local[2] = z;
+    //Normal Component: Midway through the penetration depth
+    // Calculate projected radius of Incident block onto this axis
+
+    double dir = (D_local_Ref[axis] > 0) ? 1.0 : -1.0;
+    double radInc_proj = 0.0;
+    for (int k = 0; k < 3; k++) {
+      // If swapped (Box 2 is Ref), we need AbsC^T, so we swap AbsC indices
+      double val = (best_axis < 3) ? AbsC[axis][k] : AbsC[k][axis];
+      radInc_proj += shapeInc[k] * val;
     }
 
-    // Transform to Global Frame
-    MathExtra::matvec(Ri, X0_local, X0);
-    for(int k=0; k<3; k++) X0[k] += xci[k]; // Translate to Global Position
+    double surfRef = dir * shapeRef[axis];
+    double surfInc = D_local_Ref[axis] - (dir * radInc_proj);
+    seed_local[axis] = 0.5 * (surfRef + surfInc);
+
+    // Lateral Components: 1D Interval Overlap
+    for (int k = 0; k < 3; k++) {
+      if (k == axis) continue;    // Skip the normal axis
+
+      double minRef = -shapeRef[k];
+      double maxRef = shapeRef[k];
 
-    // Set Contact Normal (Always wall normal for plane contacts)
-    nij[0] = n_wall[0];
-    nij[1] = n_wall[1];
-    nij[2] = n_wall[2];
+      double radInc = 0.0;
+      for (int j = 0; j < 3; j++) {
+        double val = (best_axis < 3) ? AbsC[k][j] : AbsC[j][k];
+        radInc += shapeInc[j] * val;
+      }
+      double centerInc = D_local_Ref[k];
 
-    // Check Overlap
-    double dx = X0[0] - x_wall[0];
-    double dy = X0[1] - x_wall[1];
-    double dz = X0[2] - x_wall[2];
+      double minInc = centerInc - radInc;
+      double maxInc = centerInc + radInc;
 
-    // Project onto Wall Normal, if dist < 0, the point is "behind" the wall face.
-    double dist = dx*n_wall[0] + dy*n_wall[1] + dz*n_wall[2];
+      // Find intersection of intervals [minRef, maxRef] and [minInc, maxInc]
+      double start = (minRef > minInc) ? minRef : minInc;
+      double end = (maxRef < maxInc) ? maxRef : maxInc;
+      seed_local[k] = 0.5 * (start + end);    // Midpoint of overlap
+    }
+
+    // Transform Local Seed -> World Space
+    MathExtra::matvec(RRef, seed_local, contact_point);
+    for (int k = 0; k < 3; k++) contact_point[k] += posRef[k];
+  } else {
+    // Edge-to-edge contact logic: Midpoint of the closest points on the two skew edge lines.
+    // The logic is that index 6 corresponds to A_0 x B_0, 7 to A_0 x B_1, ..., 14 to A_2 x B_2
+    int edgeA_idx = (best_axis - 6) / 3;
+    int edgeB_idx = (best_axis - 6) % 3;
+
+    // Get World directions of the edges
+    double u[3] = {R1[0][edgeA_idx], R1[1][edgeA_idx], R1[2][edgeA_idx]};
+    double v[3] = {R2[0][edgeB_idx], R2[1][edgeB_idx], R2[2][edgeB_idx]};
+
+    // Identify the specific edges by checking the normal direction
+    // The normal N is roughly the distance vector center_distance for the closest edges
+    double N_loc1[3], N_loc2[3];
+    MathExtra::transpose_matvec(R1, center_distance, N_loc1);
+    MathExtra::transpose_matvec(R2, center_distance, N_loc2);
+
+    // Find Center of Edge A in World Space
+    double midA[3];
+    for (int k = 0; k < 3; k++) midA[k] = xc1[k];
+    for (int k = 0; k < 3; k++) {
+      if (k == edgeA_idx) continue;
+      // Move to the face pointing towards B
+      double sign = (N_loc1[k] > 0) ? 1.0 : -1.0;
+      double offset = sign * shape1[k];
+      midA[0] += R1[0][k] * offset;
+      midA[1] += R1[1][k] * offset;
+      midA[2] += R1[2][k] * offset;
+    }
 
-    if (dist < 0.0) {
-        *overlap = -dist; // Store positive overlap value
-        return 0; // contact
+    // Find Center of Edge B in World Space
+    double midB[3];
+    for (int k = 0; k < 3; k++) midB[k] = xc2[k];
+    for (int k = 0; k < 3; k++) {
+      if (k == edgeB_idx) continue;
+      // Move to the face pointing away from A (Since center_distance is A->B, we check -N_loc2)
+      double sign = (N_loc2[k] < 0) ? 1.0 : -1.0;
+      double offset = sign * shape2[k];
+      midB[0] += R2[0][k] * offset;
+      midB[1] += R2[1][k] * offset;
+      midB[2] += R2[2][k] * offset;
     }
 
-    *overlap = 0.0;
-    return 1; // no contact
+    // Closest Points on Two Skew Lines
+    // Line1 parameterized by s: P_A = midA + s*u
+    // Line2 parameterized by t: P_B = midB + t*v
+    double r[3] = {midB[0] - midA[0], midB[1] - midA[1], midB[2] - midA[2]};
+    double u_dot_v = u[0] * v[0] + u[1] * v[1] + u[2] * v[2];
+    double u_dot_r = u[0] * r[0] + u[1] * r[1] + u[2] * r[2];
+    double v_dot_r = v[0] * r[0] + v[1] * r[1] + v[2] * r[2];
+
+    // Denom is 1 - (u.v)^2 because u and v are unit vectors
+    double denom = 1.0 - u_dot_v * u_dot_v + eps;
+    double s = (u_dot_r - u_dot_v * v_dot_r) / denom;
+    double t = (u_dot_v * u_dot_r - v_dot_r) / denom;    // Note: simplified derivation
+
+    // Compute World Points
+    double PA[3] = {midA[0] + s * u[0], midA[1] + s * u[1], midA[2] + s * u[2]};
+    double PB[3] = {midB[0] + t * v[0], midB[1] + t * v[1], midB[2] + t * v[2]};
+
+    // Seed is the midpoint
+    for (int k = 0; k < 3; k++) contact_point[k] = 0.5 * (PA[k] + PB[k]);
+  }
+
+  return true;    // Collision confirmed
+}
+
+inline int MathExtraSuperellipsoids::determine_contact_point_wall(
+    const double *xci, const double Ri[3][3], const double *shapei, const double *blocki,
+    const int flagi, const double *x_wall, const double *n_wall, double *X0, double *nij,
+    double *overlap)
+{
+  //x_wall is a point on the wall TODO: is this actually stored somewhere?
+  // n_wall is the wall normal pointing from wall to particle in the global frame
+  // We might hav to change the fix wall gran files to achieve contact with the wall.
+  // I implemented the function but we might not use it.
+  // Unlike for particle-particle contacts, here we get directly the overlap value.
+
+  double n_local[3];
+  // Transform wall normal into local frame
+  // If n_wall points from Wall->Particle, we want surface normal -n_wall.
+  double n_search[3] = {-n_wall[0], -n_wall[1], -n_wall[2]};
+  MathExtra::transpose_matvec(Ri, n_search, n_local);
+
+  double nx = n_local[0], ny = n_local[1], nz = n_local[2];
+  double a = shapei[0], b = shapei[1], c = shapei[2];
+  double X0_local[3];
+
+  // Calculate Deepest Point
+  if (flagi == 0) {
+    // Ellipsoid
+    double norm = std::sqrt(a * a * nx * nx + b * b * ny * ny + c * c * nz * nz);
+    double inv_norm = (norm > 1e-14) ? 1.0 / norm : 0.0;
+
+    X0_local[0] = a * a * nx * inv_norm;
+    X0_local[1] = b * b * ny * inv_norm;
+    X0_local[2] = c * c * nz * inv_norm;
+  } else {
+    // General Superellipsoid
+    double nx_abs = std::fabs(nx);
+    double ny_abs = std::fabs(ny);
+    double nz_abs = std::fabs(nz);
+    double n1 = blocki[0];
+    double n2 = blocki[1];
+
+    double x, y, z;
+
+    if (nx_abs < 1e-14 && ny_abs < 1e-14) {
+      x = 0.0;
+      y = 0.0;
+      z = c * ((nz > 0) ? 1.0 : -1.0);
+    } else {
+      double p2 = 1.0 / (n2 - 1.0);
+      double p1 = 1.0 / (n1 - 1.0);
+
+      if (nx_abs > ny_abs) {
+        double alpha = std::pow((b * ny_abs) / (a * nx_abs), p2);
+        double gamma = std::pow(1.0 + std::pow(alpha, n2), n1 / n2 - 1.0);
+        double beta = std::pow((c * nz_abs) / (a * nx_abs) * gamma, p1);
+
+        double den =
+            std::pow(std::pow(1.0 + std::pow(alpha, n2), n1 / n2) + std::pow(beta, n1), 1.0 / n1);
+        x = 1.0 / den;
+        y = alpha * x;
+        z = beta * x;
+      } else {
+        double alpha = std::pow((a * nx_abs) / (b * ny_abs), p2);
+        double gamma = std::pow(1.0 + std::pow(alpha, n2), n1 / n2 - 1.0);
+        double beta = std::pow((c * nz_abs) / (b * ny_abs) * gamma, p1);
+
+        double den =
+            std::pow(std::pow(1.0 + std::pow(alpha, n2), n1 / n2) + std::pow(beta, n1), 1.0 / n1);
+        y = 1.0 / den;
+        x = alpha * y;
+        z = beta * y;
+      }
+
+      x *= a;
+      y *= b;
+      z *= c;
+
+      if (n_local[0] < 0) x = -x;
+      if (n_local[1] < 0) y = -y;
+      if (n_local[2] < 0) z = -z;
+    }
+    X0_local[0] = x;
+    X0_local[1] = y;
+    X0_local[2] = z;
+  }
+
+  // Transform to Global Frame
+  MathExtra::matvec(Ri, X0_local, X0);
+  for (int k = 0; k < 3; k++) X0[k] += xci[k];    // Translate to Global Position
+
+  // Set Contact Normal (Always wall normal for plane contacts)
+  nij[0] = n_wall[0];
+  nij[1] = n_wall[1];
+  nij[2] = n_wall[2];
+
+  // Check Overlap
+  double dx = X0[0] - x_wall[0];
+  double dy = X0[1] - x_wall[1];
+  double dz = X0[2] - x_wall[2];
+
+  // Project onto Wall Normal, if dist < 0, the point is "behind" the wall face.
+  double dist = dx * n_wall[0] + dy * n_wall[1] + dz * n_wall[2];
+
+  if (dist < 0.0) {
+    *overlap = -dist;    // Store positive overlap value
+    return 0;            // contact
+  }
+
+  *overlap = 0.0;
+  return 1;    // no contact
 }
 
 #endif
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 52eab6076df..4a3c1a954b5 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -36,7 +36,7 @@
 using namespace LAMMPS_NS;
 
 
-static constexpr int NUMSTEP_INITIAL_GUESS = 8;
+static constexpr int NUMSTEP_INITIAL_GUESS = 5;
 
 /* ---------------------------------------------------------------------- */
 
@@ -186,8 +186,8 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
               touching = true;
             else if (status == 1)
               touching = false;
-            else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-              error->all(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, atom->tag[i], atom->tag[j]);
+            else 
+              error->warning(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, atom->tag[i], atom->tag[j]);
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
@@ -223,8 +223,10 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
                 touching = true;
               else if (status == 1)
                 touching = false;
-              else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-                error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+              else if (iter_ig == NUMSTEP_INITIAL_GUESS){
+                // keep trying until last iteration to avoid erroring out too early
+                error->warning(FLERR, "Ellipsoid contact detection (new contact) failed with status {} betwen particle {} and particle {}", status, atom->tag[i], atom->tag[j]);
+              }  
             }
           }
         }
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
index ab2ab26dffe..a4d5e381b4c 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
@@ -32,7 +32,7 @@ class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
   double single(int, int, int, int, double, double, double, double &) override;
 
  protected:
-  int curvature_model; 
+  int curvature_model;
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 0cb35ffee92..a67b6655ff8 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -11,7 +11,6 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-
 #include "pair_gran_hooke_history_ellipsoid.h"
 
 #include "atom.h"
@@ -22,13 +21,13 @@
 #include "fix_dummy.h"
 #include "fix_neigh_history.h"
 #include "force.h"
+#include "math_extra.h"    // probably needed for some computations
+#include "math_extra_superellipsoids.h"
 #include "memory.h"
 #include "modify.h"
 #include "neigh_list.h"
 #include "neighbor.h"
 #include "update.h"
-#include "math_extra.h" // probably needed for some computations
-#include "math_extra_superellipsoids.h"
 #include <iostream>
 
 #include <cmath>
@@ -36,7 +35,6 @@
 
 using namespace LAMMPS_NS;
 
-
 static constexpr int NUMSTEP_INITIAL_GUESS = 5;
 
 /* ---------------------------------------------------------------------- */
@@ -48,7 +46,8 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
   centroidstressflag = CENTROID_NOTAVAIL;
   finitecutflag = 1;
   use_history = 1;
-  size_history = 8;  // shear[3], contact_point_and_Lagrange_multiplier[4], bounding_box_separating_axis_index
+  size_history =
+      8;    // shear[3], contact_point_and_Lagrange_multiplier[4], bounding_box_separating_axis_index
 
   single_extra = 10;
   svector = new double[10];
@@ -74,7 +73,6 @@ PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair
       modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
 
   contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
-
 }
 
 /* ---------------------------------------------------------------------- */
@@ -119,7 +117,8 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   int *touch, **firsttouch;
   double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
 
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1, overlap2, omegai[3], omegaj[3];
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
+      overlap2, omegai[3], omegaj[3];
   AtomVecEllipsoid::BlockType flagi, flagj;
 
   ev_init(eflag, vflag);
@@ -181,7 +180,6 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
     ztmp = x[i][2];
     radi = radius[i];
 
-
     touch = firsttouch[i];
     allhistory = firsthistory[i];
     jlist = firstneigh[i];
@@ -219,7 +217,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         if (bounding_box) {
           separating_axis = &allhistory[7 + size_history * jj];
           skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-                                       x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
+              x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
         }
         if (skip_contact_detection)
           touching = false;
@@ -235,15 +233,19 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
             X0[3] = X0_prev[3];
             // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
             //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
-            int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
-                                                                           x[j], Rj, shapej, blockj, flagj,
-                                                                           X0, nij, contact_formulation);
+            int status = MathExtraSuperellipsoids::determine_contact_point(
+                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
+                contact_formulation);
             if (status == 0)
               touching = true;
             else if (status == 1)
               touching = false;
-            else // TODO: Consider making an else if and print warning if LAPACK ok, but NR not converged, instead of error and fail the run ?
-              error->one(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, atom->tag[i], atom->tag[j]);
+            else if (status == 2) {
+              error->warning(FLERR,
+                             "Ellipsoid contact detection (old contact) failed with status {} "
+                             "betwen particle {} and particle {} ",
+                             status, atom->tag[i], atom->tag[j]);
+            }
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
@@ -258,13 +260,13 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
             double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
             double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
             MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-            X0[3] = reqj / reqi; // Lagrange multiplier mu^2
-            for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
               double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
               shapei[0] = shapei[1] = shapei[2] = reqi;
               shapej[0] = shapej[1] = shapej[2] = reqj;
-              MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-              MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
               blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
               blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
               blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
@@ -272,23 +274,27 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
               // force ellipsoid flag for first initial guess iteration.
               // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-                                                                             x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-                                                                             X0, nij, contact_formulation);
+              int status = MathExtraSuperellipsoids::determine_contact_point(
+                  x[i], Ri, shapei, blocki,
+                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
+                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
+                  contact_formulation);
               if (status == 0)
                 touching = true;
               else if (status == 1)
                 touching = false;
-              else if (iter_ig == NUMSTEP_INITIAL_GUESS){
+              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
                 // keep trying until last iteration to avoid erroring out too early
-                error->warning(FLERR, "Ellipsoid contact detection (new contact) failed with status {} betwen particle {} and particle {}", status, atom->tag[i], atom->tag[j]);
+                error->warning(FLERR,
+                               "Ellipsoid contact detection (new contact) failed with status {} "
+                               "betwen particle {} and particle {}",
+                               status, atom->tag[i], atom->tag[j]);
               }
             }
           }
         }
       }
 
-
       if (!touching) {
         // unset non-touching neighbors
 
@@ -304,32 +310,34 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         X0_prev[2] = X0[2] - x[ref_index][2];
         X0_prev[3] = X0[3];
 
-        double nji[3] = { -nij[0], -nij[1], -nij[2] };
+        double nji[3] = {-nij[0], -nij[1], -nij[2]};
         // compute overlap depth along normal direction for each grain
         // overlap is positive for both grains
-        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
+                                                                      nij, x[i]);
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
+                                                                      nji, x[j]);
 
-        // branch vectors 
+        // branch vectors
         double cr1[3], cr2[3];
         MathExtra::sub3(X0, x[i], cr1);
         MathExtra::sub3(X0, x[j], cr2);
 
         // we need to take the cross product of omega
 
-        double ex_space[3],ey_space[3],ez_space[3];
-        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat,ex_space,ey_space,ez_space);
-        MathExtra::angmom_to_omega(angmom[i],ex_space,ey_space,ez_space,
-                                   bonus[ellipsoid[i]].inertia,omegai);
-        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat,ex_space,ey_space,ez_space);
-        MathExtra::angmom_to_omega(angmom[j],ex_space,ey_space,ez_space,
-                                   bonus[ellipsoid[j]].inertia,omegaj);
+        double ex_space[3], ey_space[3], ez_space[3];
+        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[i]].inertia, omegai);
+        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[j]].inertia, omegaj);
 
         double omega_cross_r1[3], omega_cross_r2[3];
         MathExtra::cross3(omegai, cr1, omega_cross_r1);
         MathExtra::cross3(omegaj, cr2, omega_cross_r2);
 
-        // relative translational velocity 
+        // relative translational velocity
         // compute directly the sum of relative translational velocity at contact point
         // since rotational velocity contribution is different for superellipsoids
         double cv1[3], cv2[3];
@@ -349,11 +357,11 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
         // normal component
 
-        vn1 = nij[0] * vr1; // dot product 
+        vn1 = nij[0] * vr1;    // dot product
         vn2 = nij[1] * vr2;
         vn3 = nij[2] * vr3;
 
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitu
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
 
         // tangential component
 
@@ -382,7 +390,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         // normal forces = Hookian contact + normal velocity damping
 
         damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
+        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
         if (limit_damping && (ccel < 0.0)) ccel = 0.0;
 
         // shear history effects
@@ -438,7 +446,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         fx = nji[0] * ccel + fs1;
         fy = nji[1] * ccel + fs2;
         fz = nji[2] * ccel + fs3;
-        fx *= factor_lj; // I think factor lj is just 1 except for special bonds
+        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
         fy *= factor_lj;
         fz *= factor_lj;
         f[i][0] += fx;
@@ -449,7 +457,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
         tor1 = cr1[1] * fz - cr1[2] * fy;
         tor2 = cr1[2] * fx - cr1[0] * fz;
-        tor3 = cr1[0] * fy - cr1[1] * fx; 
+        tor3 = cr1[0] * fy - cr1[1] * fx;
 
         tor1 *= factor_lj;
         tor2 *= factor_lj;
@@ -462,17 +470,19 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
           f[j][0] -= fx;
           f[j][1] -= fy;
           f[j][2] -= fz;
-          
+
           tor1 = cr2[1] * fz - cr2[2] * fy;
           tor2 = cr2[2] * fx - cr2[0] * fz;
-          tor3 = cr2[0] * fy - cr2[1] * fx; 
+          tor3 = cr2[0] * fy - cr2[1] * fx;
 
           torque[j][0] -= tor1;
           torque[j][1] -= tor2;
           torque[j][2] -= tor3;
         }
 
-        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // Correct even for non-spherical particles
+        if (evflag)
+          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
+                       delz);    // Correct even for non-spherical particles
       }
     }
   }
@@ -527,7 +537,7 @@ void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
 
   limit_damping = 0;
   bounding_box = 0;
-  for (int iarg = 6 ; iarg < narg ; iarg++) {
+  for (int iarg = 6; iarg < narg; iarg++) {
     if (strcmp(arg[iarg], "limit_damping") == 0)
       limit_damping = 1;
     else if (strcmp(arg[iarg], "bounding_box") == 0)
@@ -538,7 +548,7 @@ void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
       error->all(FLERR, "Illegal pair_style command");
   }
 
-  size_history = 8; // reset to default for safety
+  size_history = 8;    // reset to default for safety
   if (bounding_box == 0) size_history--;
 
   if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
@@ -581,7 +591,9 @@ void PairGranHookeHistoryEllipsoid::init_style()
   // error and warning checks
 
   if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->ellipsoid_flag)
-    error->all(FLERR, "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and ellipdoid flag");
+    error->all(
+        FLERR,
+        "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and ellipdoid flag");
   if (comm->ghost_velocity == 0)
     error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
 
@@ -599,7 +611,8 @@ void PairGranHookeHistoryEllipsoid::init_style()
   // this is so its order in the fix list is preserved
 
   if (use_history && (fix_history == nullptr)) {
-    auto cmd = fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
+    auto cmd =
+        fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
     fix_history = dynamic_cast<FixNeighHistory *>(
         modify->replace_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me), cmd, 1));
     fix_history->pair = this;
@@ -770,7 +783,8 @@ void PairGranHookeHistoryEllipsoid::reset_dt()
 /* ---------------------------------------------------------------------- */
 
 double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double /*factor_lj*/, double &fforce)
+                                             double /*factor_coul*/, double /*factor_lj*/,
+                                             double &fforce)
 {
   double radi, radj, radsum;
   double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
@@ -816,9 +830,10 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
   MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
   if (bounding_box) {
-    double separating_axis = allhistory[7 + size_history * neighprev]; // Copy: no update of history in single
+    double separating_axis =
+        allhistory[7 + size_history * neighprev];    // Copy: no update of history in single
     bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-                                      x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
+        x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
     if (no_bouding_box_contact) {
       fforce = 0.0;
       for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
@@ -830,7 +845,7 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   AtomVecEllipsoid::BlockType flagi, flagj;
   flagi = bonus[ellipsoid[i]].type;
   flagj = bonus[ellipsoid[j]].type;
-  double* X0_prev = &allhistory[3 + size_history * neighprev];
+  double *X0_prev = &allhistory[3 + size_history * neighprev];
   if (touch[neighprev] == 1) {
     int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
     // Continued contact: use grain true shape and last contact point
@@ -842,23 +857,23 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
                                                                    x[j], Rj, shapej, blockj, flagj,
                                                                    X0, nij, contact_formulation);
     if (status == 1) {
-        fforce = 0.0;
-        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-        return 0.0;
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
     }
     if (status != 0)
-        error->one(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+      error->one(FLERR, "Ellipsoid contact detection failed with status {} ", status);
   } else {
     double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
     double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
     MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-    X0[3] = reqj / reqi; // Lagrange multiplier mu^2
-    for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
       double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
       shapei[0] = shapei[1] = shapei[2] = reqi;
       shapej[0] = shapej[1] = shapej[2] = reqj;
-      MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-      MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
       blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
       blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
       blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
@@ -866,9 +881,10 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
 
       // force ellipsoid flag for first initial guess iteration.
       // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-      int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-                                                                     x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-                                                                     X0, nij, contact_formulation);
+      int status = MathExtraSuperellipsoids::determine_contact_point(
+          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+          X0, nij, contact_formulation);
       if (status == 1) {
         fforce = 0.0;
         for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
@@ -879,22 +895,24 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
     }
   }
   double overlap1, overlap2, omegai[3], omegaj[3];
-  double nji[3] = { -nij[0], -nij[1], -nij[2] };
-  overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-  overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+  double nji[3] = {-nij[0], -nij[1], -nij[2]};
+  overlap1 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+  overlap2 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
 
   double cr1[3], cr2[3];
   MathExtra::sub3(X0, x[i], cr1);
   MathExtra::sub3(X0, x[j], cr2);
 
-  double ex_space[3],ey_space[3],ez_space[3];
+  double ex_space[3], ey_space[3], ez_space[3];
   double **angmom = atom->angmom;
-  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat,ex_space,ey_space,ez_space);
-  MathExtra::angmom_to_omega(angmom[i],ex_space,ey_space,ez_space,
-                             bonus[ellipsoid[i]].inertia,omegai);
-  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat,ex_space,ey_space,ez_space);
-  MathExtra::angmom_to_omega(angmom[j],ex_space,ey_space,ez_space,
-                             bonus[ellipsoid[j]].inertia,omegaj);
+  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
+                             omegai);
+  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
+                             omegaj);
 
   double omega_cross_r1[3], omega_cross_r2[3];
   MathExtra::cross3(omegai, cr1, omega_cross_r1);
@@ -923,11 +941,11 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
 
   // normal component
 
-  vn1 = nij[0] * vr1; // dot product
+  vn1 = nij[0] * vr1;    // dot product
   vn2 = nij[1] * vr2;
   vn3 = nij[2] * vr3;
 
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitu
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
 
   // tangential component
 
@@ -958,10 +976,9 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   // normal forces = Hookian contact + normal velocity damping
 
   damp = meff * gamman * vnnr;
-  ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
+  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
   if (limit_damping && (ccel < 0.0)) ccel = 0.0;
 
-
   double *shear = &allhistory[size_history * neighprev];
   shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
 
@@ -1008,8 +1025,8 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
 
 /* ---------------------------------------------------------------------- */
 
-int PairGranHookeHistoryEllipsoid::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/,
-                                            int * /*pbc*/)
+int PairGranHookeHistoryEllipsoid::pack_forward_comm(int n, int *list, double *buf,
+                                                     int /*pbc_flag*/, int * /*pbc*/)
 {
   int i, j, m;
 
@@ -1042,7 +1059,8 @@ double PairGranHookeHistoryEllipsoid::memory_usage()
   return bytes;
 }
 
-void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *target, int /*itype*/, int /*jtype*/)
+void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *target, int /*itype*/,
+                                                     int /*jtype*/)
 {
   // Simple direct copy of all history variables (shear, contact point, axis)
   for (int i = 0; i < size_history; i++) {
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 707b2d26f2b..38d0df8806a 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -69,14 +69,17 @@ class PairGranHookeHistoryEllipsoid : public Pair {
   int nmax;                // allocated size of mass_rigid
 
   int contact_formulation;
-  
+
   void allocate();
 
  private:
-
   // Below not implemented. Placeholder if we decide not to compute local hessian in line search
-  static double shape_and_gradient_local(const double*, const double*, const double*, double*); // would return a vector of temporary variables
-  static double hessian_local(const double*, const double*, const double*, double*); // would use the above vector of temporary variables to compute local hessian
+  static double
+  shape_and_gradient_local(const double *, const double *, const double *,
+                           double *);    // would return a vector of temporary variables
+  static double hessian_local(
+      const double *, const double *, const double *,
+      double *);    // would use the above vector of temporary variables to compute local hessian
 };
 
 }    // namespace LAMMPS_NS
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index 51faa10c43d..9127cde85c8 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -24,116 +24,173 @@ static constexpr double EPSILON = 1e-4;
 
 TEST(ContactPointAndNormal, sphere)
 {
-  // First grain
-  double xci[3] = {1.0, 5.246, 3.123};
-  double ri = 2.5;
-  double shapei[3] = {ri, ri, ri};
-  double Ri[3][3] = {{1.0, 0.0, 0.0},
-                     {0.0, 1.0, 0.0},
-                     {0.0, 0.0, 1.0}};
-  double blocki[2] = {2.0, 2.0};
-  int flagi = 0;
-
-  // Second grains
-  double xcj[3] = {2.0, -1.562, 4.607};
-  double rj = 1.25;
-  double shapej[3] = {rj, rj, rj};
-  double Rj[3][3] = {{1.0, 0.0, 0.0},
-                     {0.0, 1.0, 0.0},
-                     {0.0, 0.0, 1.0}};
-  double blockj[2] = {2.0, 2.0};
-  int flagj = 0;
-
-   // Analytical solution
-  double X0_analytical[4] = {rj * xci[0] / (ri+rj) + ri * xcj[0] / (ri+rj),
-                             rj * xci[1] / (ri+rj) + ri * xcj[1] / (ri+rj),
-                             rj * xci[2] / (ri+rj) + ri * xcj[2] / (ri+rj),
-                             rj / ri};
-  double nij_analytical[3] = {xcj[0] - xci[0], xcj[1] - xci[1], xcj[2] - xci[2]};
-  MathExtra::norm3(nij_analytical);
-
-  int method = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
-  
-     // Contact detection
-  double X0[4] = {0.0, 0.0, 0.0, 0.0}, nij[3];
-  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
-                                                    xcj, Rj, shapej, blockj, flagj,
-                                                    X0, nij, method);
-
-  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
-  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
-  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
-  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
-
-  ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
-  ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
-  ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
-
-  // Rotational invariance
-  double anglei = 0.456;
-  double axisi[3] = {1,2,3};
-  MathExtra::norm3(axisi);
-  double quati[4] = {std::cos(anglei),
-                     std::sin(anglei)*axisi[0],
-                     std::sin(anglei)*axisi[1],
-                     std::sin(anglei)*axisi[2]};
-  MathExtra::quat_to_mat(quati, Ri);
-
-  double anglej = 0.123;
-  double axisj[3] = {-1,2,1};
-  MathExtra::norm3(axisj);
-  double quatj[4] = {std::cos(anglej),
-                     std::sin(anglej)*axisj[0],
-                     std::sin(anglej)*axisj[1],
-                     std::sin(anglej)*axisj[2]};
-  MathExtra::quat_to_mat(quatj, Rj);
-
-  X0[0] = X0[1] = X0[2] = X0[3] = 0.0;
-  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
-                                                    xcj, Rj, shapej, blockj, flagj,
-                                                    X0, nij, method);
-
-  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
-  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
-  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
-  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
-
-  ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
-  ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
-  ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+    // First grain
+    double xci[3]    = {1.0, 5.246, 3.123};
+    double ri        = 2.5;
+    double shapei[3] = {ri, ri, ri};
+    double Ri[3][3]  = {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}};
+    double blocki[2] = {2.0, 2.0};
+    int flagi        = 0;
+
+    // Second grains
+    double xcj[3]    = {2.0, -1.562, 4.607};
+    double rj        = 1.25;
+    double shapej[3] = {rj, rj, rj};
+    double Rj[3][3]  = {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}};
+    double blockj[2] = {2.0, 2.0};
+    int flagj        = 0;
+
+    // Analytical solution
+    double X0_analytical[4]  = {rj * xci[0] / (ri + rj) + ri * xcj[0] / (ri + rj),
+                                rj * xci[1] / (ri + rj) + ri * xcj[1] / (ri + rj),
+                                rj * xci[2] / (ri + rj) + ri * xcj[2] / (ri + rj), rj / ri};
+    double nij_analytical[3] = {xcj[0] - xci[0], xcj[1] - xci[1], xcj[2] - xci[2]};
+    MathExtra::norm3(nij_analytical);
+
+    int method = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
 
+    // Contact detection
+    double X0[4] = {0.0, 0.0, 0.0, 0.0}, nij[3];
+    MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi, xcj, Rj,
+                                                      shapej, blockj, flagj, X0, nij, method);
+
+    ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
+    ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
+    ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
+    ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
+
+    ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+    ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+    ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+
+    // Rotational invariance
+    double anglei   = 0.456;
+    double axisi[3] = {1, 2, 3};
+    MathExtra::norm3(axisi);
+    double quati[4] = {std::cos(anglei), std::sin(anglei) * axisi[0], std::sin(anglei) * axisi[1],
+                       std::sin(anglei) * axisi[2]};
+    MathExtra::quat_to_mat(quati, Ri);
+
+    double anglej   = 0.123;
+    double axisj[3] = {-1, 2, 1};
+    MathExtra::norm3(axisj);
+    double quatj[4] = {std::cos(anglej), std::sin(anglej) * axisj[0], std::sin(anglej) * axisj[1],
+                       std::sin(anglej) * axisj[2]};
+    MathExtra::quat_to_mat(quatj, Rj);
+
+    X0[0] = X0[1] = X0[2] = X0[3] = 0.0;
+    MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi, xcj, Rj,
+                                                      shapej, blockj, flagj, X0, nij, method);
+
+    ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
+    ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
+    ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
+    ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
+
+    ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+    ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+    ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
 }
 
 TEST(ContactPointAndNormal, supersphere_mono)
 {
-  double r = 3.456;
-  double xci[3] = {-2*r, 0.0, 0.0};
-  double xcj[3] = {2*r, 0.0, 0.0};
-  double shape[3] = {r, r, r};
-  double R[3][3] = {{1.0, 0.0, 0.0},
-                    {0.0, 1.0, 0.0},
-                    {0.0, 0.0, 1.0}};
-
-  std::vector<double> blocks = {2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0};
-  int method = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
-
-  // Analytical solution
-    double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};
+    double r        = 3.456;
+    double xci[3]   = {-2 * r, 0.0, 0.0};
+    double xcj[3]   = {2 * r, 0.0, 0.0};
+    double shape[3] = {r, r, r};
+    double R[3][3]  = {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}};
+
+    std::vector<double> blocks = {2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0};
+    int method                 = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
+
+    // Analytical solution
+    double X0_analytical[4]  = {0.0, 0.0, 0.0, 1.0};
     double nij_analytical[3] = {1.0, 0.0, 0.0};
 
-  for (auto n : blocks) {
-    double block[2] = {n, n};
-    int flag =  (n < 2.01) ? 0 : 1;
+    for (auto n : blocks) {
+        double block[2] = {n, n};
+        int flag        = (n < 2.01) ? 0 : 1;
+
+        // Contact detection
+        // Some starting point away from (0,0,0). Possibly bad initial guess so test is demanding
+        double X0[4] = {r, -r, 2 * r, 0.0}, nij[3];
+
+        int status = MathExtraSuperellipsoids::determine_contact_point(
+            xci, R, shape, block, flag, xcj, R, shape, block, flag, X0, nij, method);
+
+        std::cout << n << " " << status << " " << X0[0] << " " << X0[1] << " " << X0[2] << " "
+                  << X0[3] << std::endl;
+        ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
+        ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
+        ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
+        ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
+
+        ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+        ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+        ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+    }
+}
+
+TEST(ContactPointAndNormal, sphere_geometric)
+{
+    // First grain
+    double ri        = 2.5;
+    double rj        = 1.25;
+    double overlap   = -0.5;
+    double xci[3]    = {-(ri - overlap / 2.0), 0.0, 0.0};
+    double shapei[3] = {ri, ri, ri};
+    double Ri[3][3]  = {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}};
+    double blocki[2] = {2.0, 2.0};
+    int flagi        = 0;
+
+    // Second grains
+    double xcj[3] = {rj - overlap / 2.0, 0.0, 0.0};
+
+    double shapej[3] = {rj, rj, rj};
+    double Rj[3][3]  = {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}};
+    double blockj[2] = {2.0, 2.0};
+    int flagj        = 0;
+
+    // Analytical solution
+    double X0_analytical[4]  = {0.0, 0.0, 0.0, 1.0};
+    double nij_analytical[3] = {xcj[0] - xci[0], xcj[1] - xci[1], xcj[2] - xci[2]};
+    MathExtra::norm3(nij_analytical);
+
+    int method = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
 
     // Contact detection
-    // Some starting point away from (0,0,0). Possibly bad initial guess so test is demanding
-    double X0[4] = {r, -r, 2*r, 0.0}, nij[3];
+    double X0[4] = {.1, .1, .1, 1.0}, nij[3];
+    MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi, xcj, Rj,
+                                                      shapej, blockj, flagj, X0, nij, method);
+
+    ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
+    ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
+    ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
+    ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
+
+    ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
+    ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
+    ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
 
-    int status = MathExtraSuperellipsoids::determine_contact_point(xci, R, shape, block, flag,
-                                                                  xcj, R, shape, block, flag,
-                                                                  X0, nij, method);
+    // Rotational invariance
+    double anglei   = 0.456;
+    double axisi[3] = {1, 2, 3};
+    MathExtra::norm3(axisi);
+    double quati[4] = {std::cos(anglei), std::sin(anglei) * axisi[0], std::sin(anglei) * axisi[1],
+                       std::sin(anglei) * axisi[2]};
+    MathExtra::quat_to_mat(quati, Ri);
+
+    double anglej   = 0.123;
+    double axisj[3] = {-1, 2, 1};
+    MathExtra::norm3(axisj);
+    double quatj[4] = {std::cos(anglej), std::sin(anglej) * axisj[0], std::sin(anglej) * axisj[1],
+                       std::sin(anglej) * axisj[2]};
+    MathExtra::quat_to_mat(quatj, Rj);
+
+    X0[0] = X0[1] = X0[2] = X0[3] = 0.0;
+    MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi, xcj, Rj,
+                                                      shapej, blockj, flagj, X0, nij, method);
 
-    std::cout<<n<<" "<<status<<" "<<X0[0]<<" "<<X0[1]<<" "<<X0[2]<<" "<<X0[3]<<std::endl;
     ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
     ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
     ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
@@ -142,138 +199,55 @@ TEST(ContactPointAndNormal, supersphere_mono)
     ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
     ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
     ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
-  }
 }
 
-TEST(ContactPointAndNormal, sphere_geometric)
+TEST(ContactPointAndNormal, supersphere_poly_geometric)
 {
-  // First grain
-  double ri = 2.5;
-  double rj = 1.25;
-  double overlap = -0.5;
-  double xci[3] = {-(ri - overlap/2.0), 0.0, 0.0};
-  double shapei[3] = {ri, ri, ri};
-  double Ri[3][3] = {{1.0, 0.0, 0.0},
-                     {0.0, 1.0, 0.0},
-                     {0.0, 0.0, 1.0}};
-  double blocki[2] = {2.0, 2.0};
-  int flagi = 0;
-
-  // Second grains
-  double xcj[3] = {rj - overlap/2.0, 0.0, 0.0};
-  
-  double shapej[3] = {rj, rj, rj};
-  double Rj[3][3] = {{1.0, 0.0, 0.0},
-                     {0.0, 1.0, 0.0},
-                     {0.0, 0.0, 1.0}};
-  double blockj[2] = {2.0, 2.0};
-  int flagj = 0;
-
-   // Analytical solution
-  double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};
-  double nij_analytical[3] = {xcj[0] - xci[0], xcj[1] - xci[1], xcj[2] - xci[2]};
-  MathExtra::norm3(nij_analytical);
-
-  int method = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-  
-  // Contact detection
-  double X0[4] = {.1, .1, .1, 1.0}, nij[3];
-  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
-                                                    xcj, Rj, shapej, blockj, flagj,
-                                                    X0, nij, method);
-
-  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON);
-  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON);
-  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON);
-  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON);
-
-  ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
-  ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
-  ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
-
-  // Rotational invariance
-  double anglei = 0.456;
-  double axisi[3] = {1,2,3};
-  MathExtra::norm3(axisi);
-  double quati[4] = {std::cos(anglei),
-                     std::sin(anglei)*axisi[0],
-                     std::sin(anglei)*axisi[1],
-                     std::sin(anglei)*axisi[2]};
-  MathExtra::quat_to_mat(quati, Ri);
-
-  double anglej = 0.123;
-  double axisj[3] = {-1,2,1};
-  MathExtra::norm3(axisj);
-  double quatj[4] = {std::cos(anglej),
-                     std::sin(anglej)*axisj[0],
-                     std::sin(anglej)*axisj[1],
-                     std::sin(anglej)*axisj[2]};
-  MathExtra::quat_to_mat(quatj, Rj);
-
-  X0[0] = X0[1] = X0[2] = X0[3] = 0.0;
-  MathExtraSuperellipsoids::determine_contact_point(xci, Ri, shapei, blocki, flagi,
-                                                    xcj, Rj, shapej, blockj, flagj,
-                                                    X0, nij, method);
-
-  ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
-  ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
-  ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
-  ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Method: " << method;
-
-  ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON);
-  ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON);
-  ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON);
+    double r1      = 3.456;
+    double r2      = 3.0 * r1; // Polydisperse: radius_2 = 3 * radius_1
+    double overlap = r1 / 5.0;
+    double xci[3]  = {-(r1 - overlap / 2.0), 0.0, 0.0};
+    double xcj[3]  = {r2 - overlap / 2.0, 0.0, 0.0};
 
-}
+    double shapei[3] = {r1, r1, r1};
+    double shapej[3] = {r2, r2, r2};
 
-TEST(ContactPointAndNormal, supersphere_poly_geometric)
-{
-  double r1 = 3.456;
-  double r2 = 3.0 * r1; // Polydisperse: radius_2 = 3 * radius_1
-  double overlap = r1 / 5.0; 
-  double xci[3] = {-(r1 - overlap/2.0), 0.0, 0.0};
-  double xcj[3] = {  r2 - overlap/2.0 , 0.0, 0.0}; 
-  
-  double shapei[3] = {r1, r1, r1};
-  double shapej[3] = {r2, r2, r2};
-  
-  // Identity Rotation
-  double R[3][3] = {{1.0, 0.0, 0.0},
-                    {0.0, 1.0, 0.0},
-                    {0.0, 0.0, 1.0}};
-
-  std::vector<double> blocks = {2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; // test would no converge for higher n if not starting along the line connecting the centers
-  int method = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-  
-  double nij_analytical[3] = {1.0, 0.0, 0.0};
-  double X0_analytical[4] = {0.0, 0.0, 0.0, 1.0};    
-  
-  for (auto n : blocks) {
-    double block[2] = {n, n};
-    int flag =  (n < 2.01) ? 0 : 1;
-
-    // Initial Guess: Offset from 0 to test convergence
-    double X0[4] = {overlap/80, overlap/80, overlap/80, 1.0}, nij[3]; 
-
-    int status = MathExtraSuperellipsoids::determine_contact_point(xci, R, shapei, block, flag,
-                                                                  xcj, R, shapej, block, flag,
-                                                                  X0, nij, method);
-
-    std::cout << "n=" << n << " Status=" << status << " Res: " 
-              << X0[0] << " " << X0[1] << " " << X0[2] << " mu=" << X0[3] << std::endl;
-    
-
-    ASSERT_EQ(status, 0) << "Failed to converge/detect contact for n=" << n;
-    
-    ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Position X failed for n=" << n;
-    ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Position Y failed for n=" << n;
-    ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Position Z failed for n=" << n;
-    ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Lagrange Multiplier failed for n=" << n;
-
-    ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON) << "Normal X failed for n=" << n;
-    ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON) << "Normal Y failed for n=" << n;
-    ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON) << "Normal Z failed for n=" << n;
-  }
+    // Identity Rotation
+    double R[3][3] = {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}};
+
+    std::vector<double> blocks = {
+        2.0, 3.0, 4.0, 5.0, 6.0,
+        7.0, 8.0, 9.0, 10.0}; // test would no converge for higher n if not starting along the line
+                              // connecting the centers
+    int method = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+
+    double nij_analytical[3] = {1.0, 0.0, 0.0};
+    double X0_analytical[4]  = {0.0, 0.0, 0.0, 1.0};
+
+    for (auto n : blocks) {
+        double block[2] = {n, n};
+        int flag        = (n < 2.01) ? 0 : 1;
+
+        // Initial Guess: Offset from 0 to test convergence
+        double X0[4] = {overlap / 80, overlap / 80, overlap / 80, 1.0}, nij[3];
+
+        int status = MathExtraSuperellipsoids::determine_contact_point(
+            xci, R, shapei, block, flag, xcj, R, shapej, block, flag, X0, nij, method);
+
+        std::cout << "n=" << n << " Status=" << status << " Res: " << X0[0] << " " << X0[1] << " "
+                  << X0[2] << " mu=" << X0[3] << std::endl;
+
+        ASSERT_EQ(status, 0) << "Failed to converge/detect contact for n=" << n;
+
+        ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Position X failed for n=" << n;
+        ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Position Y failed for n=" << n;
+        ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Position Z failed for n=" << n;
+        ASSERT_NEAR(X0[3], X0_analytical[3], EPSILON) << "Lagrange Multiplier failed for n=" << n;
+
+        ASSERT_NEAR(nij[0], nij_analytical[0], EPSILON) << "Normal X failed for n=" << n;
+        ASSERT_NEAR(nij[1], nij_analytical[1], EPSILON) << "Normal Y failed for n=" << n;
+        ASSERT_NEAR(nij[2], nij_analytical[2], EPSILON) << "Normal Z failed for n=" << n;
+    }
 }
 
 // TODO: supersphere_mono with grains overlapping

From 3f34ce56baba27934f65c7129eb4f2d8bc9a8753 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 20 Jan 2026 18:11:36 +0100
Subject: [PATCH 096/174] Added details in documentation about failed contacts

---
 doc/src/pair_gran_ellipsoid.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
index d8159165fe8..5245b011add 100644
--- a/doc/src/pair_gran_ellipsoid.rst
+++ b/doc/src/pair_gran_ellipsoid.rst
@@ -291,6 +291,14 @@ All settings are global and are made via the pair_style command.
 However you must still use the :doc:`pair_coeff <pair_coeff>` for all
 pairs of granular atom types.  For example the command
 
+.. note::
+   For particles with high blockiness exponents (:math:`n > 4`) involved in edge-to-edge
+   or corner-to-corner contacts, the surface normal vector varies rapidly over microscopic
+   distances. The Newton solver may occasionally fail to converge to the strict gradient
+   alignment tolerance (typically :math:`10^{-10}`).
+   You may see warning messages in the log indicating that the solver returned a sub-optimal solution, 
+   but the simulation will proceed using this best-effort contact point.
+
 .. code-block:: LAMMPS
 
    pair_coeff * *

From dd58ef92d233d9d16a48a7c029796f16c69416b6 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 21 Jan 2026 10:48:27 +0100
Subject: [PATCH 097/174] Fixed minor bug in math_special after rebase

---
 .../ASPHERE/superellipsoid_gran/in.drop_test  | 22 ++++++++-----------
 src/math_special.h                            |  3 +--
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 5dff3952976..12fa9996f39 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -13,28 +13,24 @@ neigh_modify   delay 0 every 1 check yes
 
 # 1. Setup Simulation Box
 # Expanded box to accommodate the floor and falling height
-region          box block 0 15 0 15 0 20
+variable        box_length equal 25
+variable        box_height equal 30
+
+region          box block 0 25 0 25 0 30
 create_box      2 box
 
 # 3. Create Lattice Wall (Type 1)
 # Use a dense lattice (spacing 1.0) to ensure no gaps in the Y-direction (size 1.0).
 # Note: Overlap in X (size 2.0) is ignored because this group is frozen.
-lattice         sc 1.0
-region          floor_reg block 0 15 0 15 0 0.5
-region          side1_reg block 0 1 0 15 0 20
-region          side2_reg block 14 15 0 15 0 20
-region          side3_reg block 0 15 0 1 0 20
-region          side4_reg block 0 15 14 15 0 20
 
-# make a union of regions for the walls
-region          wall_reg union 5 floor_reg side1_reg side2_reg side3_reg side4_reg
+lattice         sc 1.0
+region          floor_reg block 0 ${box_length} 0 ${box_length} 0 0.5
 
 create_atoms    1 region floor_reg
 
 # 4. Create Falling Particles (Type 2)
-# Create 50 random particles in the air
-region          drop_zone block 2 13 2 13 5 15
-create_atoms    2 random 100 12345 drop_zone overlap 2.0 maxtry 1000 
+region          drop_zone block 5 20 5 20 10 ${box_height}
+create_atoms    2 random 600 12345 drop_zone overlap 2.0 maxtry 1000 
 
 # 2. Material Properties
 # Type 1: Wall particles
@@ -57,7 +53,7 @@ group           mobile type 2
 
 # 6. Interaction / Pair Style
 # Kept exactly as requested
-pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 0.5 0 bounding_box geometric
+pair_style      gran/hooke/history/ellipsoid 1e3 0.0 10.0 0.0 0.5 0 bounding_box
 pair_coeff      * *
 
 # 7. Computes and Output
diff --git a/src/math_special.h b/src/math_special.h
index c1b5c355410..077d2b4c65d 100644
--- a/src/math_special.h
+++ b/src/math_special.h
@@ -184,8 +184,7 @@ namespace LAMMPS_NS::MathSpecial {
 
     return yy;
   }
-
 }    // namespace MathSpecial
-}    // namespace LAMMPS_NS
+
 
 #endif

From d2d447931693b2ba0ba434fefb3affd9fd2f7fc4 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 30 Jan 2026 13:10:23 +0100
Subject: [PATCH 098/174] Corrected previous bug in NR exit. Fixed bugs for
 step clumping and LS for the geometric formulation

---
 .../ASPHERE/superellipsoid_gran/in.drop_test  |  2 +-
 src/ASPHERE/math_extra_superellipsoids.cpp    | 63 +++++++------------
 2 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 12fa9996f39..bf81326589c 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -85,6 +85,6 @@ fix             2 mobile gravity 9.81 vector 0 0 -1
 fix             3 mobile nve/asphere
 
 # 9. Run
-thermo          1000
+thermo          100
 timestep        0.001
 run             300000
\ No newline at end of file
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 2510b57077a..47448d26355 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -33,7 +33,7 @@ extern "C" { // General Matrices
 namespace MathExtraSuperellipsoids {
 
 static constexpr int ITERMAX_NR = 100;
-static constexpr double TOL_NR_RES = 1e-5 * 1e-5;
+static constexpr double TOL_NR_RES = 1e-10 * 1e-10;
 static constexpr double TOL_NR_POS = 1e-6 * 1e-6;
 
 static constexpr int ITERMAX_LS = 10;
@@ -337,7 +337,7 @@ double compute_residual(const double shapefunci, const double* gradi_global,
   double scalar_denom;
 
   if (formulation == FORMULATION_GEOMETRIC) {
-      // GEOMETRIC: F is a distance (Length).
+      // GEOMETRIC: G is a distance (Length).
       scalar_denom = radius_scale; 
   } else {
       // ALGEBRAIC: F is dimensionless (approx 0 at surface).
@@ -410,15 +410,20 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   if (norm < TOL_NR_RES) {
     
     //  must compute the normal vector nij before returning since the Newton loop normally handles this upon convergence.
-    double xilocal[3], tmp_v[3], gradi[3], hess_dummy[3][3];
+    double xilocal[3], tmp_v[3], gradi[3], val_dummy;
 
     // Transform global X0 to local frame of particle I
     MathExtra::sub3(X0, xci, tmp_v);
     MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
 
-    // Compute local gradient (we could ignore the Hessian here)
+    // Compute local gradient
     // Algebraic gradient is fine for direction even if we used Geometric for solving
-    shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hess_dummy);
+    // TODO: might use a simpler function to simply compute the gradient, to 
+    // avoid computing quantities already computed in compute_residual_and_jacobian
+    if (flagi <= 1)
+      val_dummy = shape_and_gradient_local_n1equaln2_surfacesearch(xilocal, shapei, blocki[0], gradi);
+    else
+      val_dummy = shape_and_gradient_local_superquad_surfacesearch(xilocal, shapei, blocki, gradi);
 
     // Rotate gradient back to global frame to get normal
     MathExtra::matvec(Ri, tmp_v, gradi);
@@ -449,7 +454,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     b_fast[0] = -residual[0]; b_fast[1] = -residual[1]; 
     b_fast[2] = -residual[2]; b_fast[3] = -residual[3];
 
-    // 2. Try Fast Solver
+    // Try Fast Solver
     if (MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A_fast, b_fast)) {
         rhs[0] = b_fast[0]; rhs[1] = b_fast[1]; 
         rhs[2] = b_fast[2]; rhs[3] = b_fast[3];
@@ -497,25 +502,25 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     double a(multiplicity), X_line[4];
     int iter_ls;
 
+    if (formulation == FORMULATION_GEOMETRIC) {
+      a = 1.0; // no need for multiplicity scaling 
+      // Limit the max step size to avoid jumping too far
+      // normalize residual vector if step was limited
+      double spatial_residual_norm = std::sqrt(rhs[0]*rhs[0] + rhs[1]*rhs[1] + rhs[2]*rhs[2]);
+      if (spatial_residual_norm > max_step) {
+          double scale = max_step / spatial_residual_norm;
+          rhs[0] *= scale;
+          rhs[1] *= scale;
+          rhs[2] *= scale;
+      }
+    }
+
     for (iter_ls = 0 ; iter_ls < ITERMAX_LS ; iter_ls++) {
       X_line[0] = X0[0] + a * rhs[0];
       X_line[1] = X0[1] + a * rhs[1];
       X_line[2] = X0[2] + a * rhs[2];
       X_line[3] = X0[3] + a * rhs[3];
 
-      if (formulation == FORMULATION_GEOMETRIC) {
-          // Limit the max step size to avoid jumping too far
-          // normalize residual vector if step was limited
-          double spatial_residual_norm = std::sqrt(residual[0]*residual[0] + residual[1]*residual[1] + residual[2]*residual[2]);
-          a = 1; // reset a to 1 for proper step size in geometric formulation
-          if (spatial_residual_norm > max_step) {
-              double scale = max_step / spatial_residual_norm;
-              rhs[0] *= scale;
-              rhs[1] *= scale;
-              rhs[2] *= scale;
-          }
-      }
-
       // Line search iterates not selected for the next Newton iteration
       // do not need to compute the expensive Jacobian, only the residual.
       // We want to avoid calling `compute_residual_and_jacobian()` for each
@@ -625,26 +630,6 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       break;
   }
 
-  // If we ran out of iterations, check if the residual is acceptable.
-  // We ignore the "step size" check here because sliding on flat faces (N=6,8)
-  // often keeps moving while maintaining a perfect residual.
-  if (!converged && norm < TOL_NR_RES) {
-       converged = true;
-       
-       // Re-compute the normal 'nij' for this final point
-       // because the loop broke without updating it for the final X0.
-       double xilocal[3], tmp_v[3], gradi[3], hess_dummy[3][3];
-       MathExtra::sub3(X0, xci, tmp_v);
-       MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
-       
-       shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hess_dummy);
-       if (formulation == FORMULATION_GEOMETRIC) {
-           apply_regularization_shape_function(blocki[0], avg_radius_i, &shapefunc[0], tmp_v, hess_dummy);
-       }
-       MathExtra::matvec(Ri, tmp_v, gradi);
-       MathExtra::normalize3(gradi, nij);
-  }
-
   // LAPACK dgetrs() error values are negative, return values:
   // 2 = failed convergence
   // 1 = converged but grains not touching

From c85052b93fe3daa7f4d0c7225e384a762d5a1230 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 20 Feb 2026 15:28:27 +0100
Subject: [PATCH 099/174] Removed Lapack solve and redundant handwritten solve
 functions. Added test for handwritten solve.

---
 .../ASPHERE/superellipsoid_gran/in.drop_test  |  26 ++--
 src/ASPHERE/math_extra_superellipsoids.cpp    |  87 +++++-------
 src/ASPHERE/math_extra_superellipsoids.h      | 125 +-----------------
 .../utils/test_math_extra_superellipsoids.cpp |  29 +++-
 4 files changed, 79 insertions(+), 188 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index bf81326589c..89e7d594358 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -4,9 +4,10 @@
 units           si
 atom_style      ellipsoid
 dimension       3
-boundary        p p p 
+boundary        p p f 
 comm_modify     vel yes
 newton          off
+processors      * * 1
 
 neighbor 0.5 bin
 neigh_modify   delay 0 every 1 check yes
@@ -14,9 +15,9 @@ neigh_modify   delay 0 every 1 check yes
 # 1. Setup Simulation Box
 # Expanded box to accommodate the floor and falling height
 variable        box_length equal 25
-variable        box_height equal 30
+variable        box_height equal 300
 
-region          box block 0 25 0 25 0 30
+region          box block 0 ${box_length} 0 ${box_length} 0 ${box_height}
 create_box      2 box
 
 # 3. Create Lattice Wall (Type 1)
@@ -30,7 +31,7 @@ create_atoms    1 region floor_reg
 
 # 4. Create Falling Particles (Type 2)
 region          drop_zone block 5 20 5 20 10 ${box_height}
-create_atoms    2 random 600 12345 drop_zone overlap 2.0 maxtry 1000 
+create_atoms    2 random 3000 12345 drop_zone overlap 2.0 maxtry 100
 
 # 2. Material Properties
 # Type 1: Wall particles
@@ -43,8 +44,9 @@ set             type 2 shape 2.0 1.0 1.0
 
 # Initialize Superquadric "Blockiness" to 2.0 (Ellipsoid) 
 # Necessary to prevent division-by-zero in the dump variables below
-set             type 1 block 5.0 5.0 
-set             type 2 block 5.0 5.0 
+variable        blockiness equal 8.0
+set             type 1 block 4.0 4.0 
+set             type 2 block ${blockiness} ${blockiness} 
 
 # 5. Define Groups
 group           wall type 1
@@ -53,7 +55,7 @@ group           mobile type 2
 
 # 6. Interaction / Pair Style
 # Kept exactly as requested
-pair_style      gran/hooke/history/ellipsoid 1e3 0.0 10.0 0.0 0.5 0 bounding_box
+pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 0.5 0 bounding_box geometric
 pair_coeff      * *
 
 # 7. Computes and Output
@@ -65,7 +67,7 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 100 dump_drop.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump            mydump all custom 100 dump_drop_geometric_${blockiness}.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Explicit mapping for Ovito
 dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
@@ -84,7 +86,11 @@ fix             2 mobile gravity 9.81 vector 0 0 -1
 # Applied to all, but fix freeze overrides integration for the wall
 fix             3 mobile nve/asphere
 
+compute rke all erotate/asphere
+
 # 9. Run
 thermo          100
-timestep        0.001
-run             300000
\ No newline at end of file
+thermo_style  custom time step ke c_rke 
+
+timestep        0.0001
+run             100000
\ No newline at end of file
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 47448d26355..de11933cb60 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -22,16 +22,11 @@
 // #include "math_special.h"
 // #include "math_const.h"
 
-// #include <algorithm>
-// #include <cstring>
-
-extern "C" { // General Matrices
-    void dgetrf_(const int *m, const int *n, double *a, const int *lda, int *ipiv, int *info); // Factorize
-    void dgetrs_(const char *trans, const int *n, const int *nrhs, double *a, const int *lda, int *ipiv, double *b, const int *ldb, int *info); // Solve (using factorzation)
-}
-
 namespace MathExtraSuperellipsoids {
 
+inline constexpr double TIKHONOV_SCALE =
+    1e-14;    // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
+
 static constexpr int ITERMAX_NR = 100;
 static constexpr double TOL_NR_RES = 1e-10 * 1e-10;
 static constexpr double TOL_NR_POS = 1e-6 * 1e-6;
@@ -397,11 +392,10 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   // avg radii for regularization if GEOMETRIC formulation
   double avg_radius_i = 1;
   double avg_radius_j = 1;
-  double max_step;
+  double max_step = std::sqrt(lsq) / 3.0;
   if (formulation == FORMULATION_GEOMETRIC) {
     avg_radius_i = (shapei[0] + shapei[1] + shapei[2]) / 3.0;
     avg_radius_j = (shapej[0] + shapej[1] + shapej[2]) / 3.0;
-    max_step = std::sqrt(lsq) / 3.0;
   }
 
   norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian, formulation, avg_radius_i, avg_radius_j);
@@ -460,38 +454,27 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
         rhs[2] = b_fast[2]; rhs[3] = b_fast[3];
         gauss_elim_solved = true;
     }
-
-    // Fallback to LAPACK
-    if (!gauss_elim_solved) {
-
-        rhs[0] = -residual[0]; rhs[1] = -residual[1]; 
-        rhs[2] = -residual[2]; rhs[3] = -residual[3];
-
-        int lapack_error = 0;
-        int ipiv[16];
-        const int n = 4;
-        const char trans = 'N'; 
-        const int nrhs = 1;
-        
-        dgetrf_(&n, &n, jacobian, &n, ipiv, &lapack_error);
-        
-        if (lapack_error < 0) {
-            return lapack_error;
-        } else if (lapack_error > 0) { 
-            // Singular: Apply Tikhonov "Patch" to the LU FACTORS
-            // This is the "Dirty Hack" that makes the aligned test pass.
-            // It modifies the pivot U_ii, not the original matrix diagonal.
-            double diag_weight = TIKHONOV_SCALE * (jacobian[0] + jacobian[5] + jacobian[10]);
-            jacobian[0]  += diag_weight;
-            jacobian[5]  += diag_weight;
-            jacobian[10] += diag_weight;
-          
+    else {
+      // restore matrix
+      for(int r=0; r<4; ++r) {
+        for(int c=0; c<4; ++c) {
+            A_fast[r*4 + c] = jacobian[c*4 + r];
         }
-
-        // Solve using the (patched) factors
-        dgetrs_(&trans, &n, &nrhs, jacobian, &n, ipiv, rhs, &n, &lapack_error);
-        
-        if (lapack_error) return lapack_error;
+      }
+      b_fast[0] = -residual[0]; b_fast[1] = -residual[1]; 
+      b_fast[2] = -residual[2]; b_fast[3] = -residual[3];
+
+      double trace = jacobian[0] + jacobian[5] + jacobian[10];
+      double diag_weight = TIKHONOV_SCALE * trace;
+      A_fast[0]  += diag_weight;
+      A_fast[5]  += diag_weight;
+      A_fast[10] += diag_weight;
+
+      if (MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A_fast, b_fast)) {
+          rhs[0] = b_fast[0]; rhs[1] = b_fast[1]; 
+          rhs[2] = b_fast[2]; rhs[3] = b_fast[3];
+          gauss_elim_solved = true;
+      }
     }
 
     if (iter > 0)
@@ -504,15 +487,15 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
     if (formulation == FORMULATION_GEOMETRIC) {
       a = 1.0; // no need for multiplicity scaling 
-      // Limit the max step size to avoid jumping too far
-      // normalize residual vector if step was limited
-      double spatial_residual_norm = std::sqrt(rhs[0]*rhs[0] + rhs[1]*rhs[1] + rhs[2]*rhs[2]);
-      if (spatial_residual_norm > max_step) {
-          double scale = max_step / spatial_residual_norm;
-          rhs[0] *= scale;
-          rhs[1] *= scale;
-          rhs[2] *= scale;
-      }
+    }
+    // Limit the max step size to avoid jumping too far
+    // normalize residual vector if step was limited
+    double spatial_residual_norm = std::sqrt(rhs[0]*rhs[0] + rhs[1]*rhs[1] + rhs[2]*rhs[2]);
+    if (spatial_residual_norm > max_step) {
+        double scale = max_step / spatial_residual_norm;
+        rhs[0] *= scale;
+        rhs[1] *= scale;
+        rhs[2] *= scale;
     }
 
     for (iter_ls = 0 ; iter_ls < ITERMAX_LS ; iter_ls++) {
@@ -630,10 +613,6 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       break;
   }
 
-  // LAPACK dgetrs() error values are negative, return values:
-  // 2 = failed convergence
-  // 1 = converged but grains not touching
-  // 0 = converged and grains touching
   if (!converged){
     if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) return 1;
     std::cout << "Current residual norm: " << norm << std::endl;
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index d6f3fe77616..12a0c6348f0 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -21,10 +21,9 @@
 #include "math_extra.h"
 #include <cmath>
 #include <iostream>
+#include <limits>
 
 namespace MathExtraSuperellipsoids {
-inline constexpr double TIKHONOV_SCALE =
-    1e-14;    // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
 
 enum ContactFormulation { FORMULATION_ALGEBRAIC = 0, FORMULATION_GEOMETRIC = 1 };
 
@@ -145,122 +144,8 @@ inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
   return ans;
 }
 
-inline bool MathExtraSuperellipsoids::solve_4x4_manual(double A[16], double b[4])
-{
-
-  // 1. Pivot 0
-  double inv0 = 1.0 / A[0];
-  double m1 = A[4] * inv0;
-  double m2 = A[8] * inv0;
-  double m3 = A[12] * inv0;
-
-  A[5] -= m1 * A[1];
-  A[6] -= m1 * A[2];
-  A[7] -= m1 * A[3];
-  b[1] -= m1 * b[0];
-  A[9] -= m2 * A[1];
-  A[10] -= m2 * A[2];
-  A[11] -= m2 * A[3];
-  b[2] -= m2 * b[0];
-  A[13] -= m3 * A[1];
-  A[14] -= m3 * A[2];
-  A[15] -= m3 * A[3];
-  b[3] -= m3 * b[0];
-
-  // 2. Pivot 1
-  double inv1 = 1.0 / A[5];
-  double m4 = A[9] * inv1;
-  double m5 = A[13] * inv1;
-
-  A[10] -= m4 * A[6];
-  A[11] -= m4 * A[7];
-  b[2] -= m4 * b[1];
-  A[14] -= m5 * A[6];
-  A[15] -= m5 * A[7];
-  b[3] -= m5 * b[1];
-
-  // 3. Pivot 2
-  double inv2 = 1.0 / A[10];
-  double m6 = A[14] * inv2;
-
-  A[15] -= m6 * A[11];
-  b[3] -= m6 * b[2];
-
-  // 4. Backward Substitution
-  b[3] = b[3] / A[15];
-  b[2] = (b[2] - A[11] * b[3]) * inv2;
-  b[1] = (b[1] - A[7] * b[3] - A[6] * b[2]) * inv1;
-  b[0] = (b[0] - A[3] * b[3] - A[2] * b[2] - A[1] * b[1]) * inv0;
-
-  return true;
-}
-
-inline bool MathExtraSuperellipsoids::solve_4x4_robust(double A[16], double b[4])
-{
-  // Helper lambda to access A[row, col]
-  auto at = [&](int r, int c) -> double & {
-    return A[r * 4 + c];
-  };
-
-  // --- FORWARD ELIMINATION with PARTIAL PIVOTING ---
-
-  for (int i = 0; i < 3; ++i) {    // Loop over columns 0, 1, 2
-    // 1. Find the Pivot (Max absolute value in this column)
-    int pivot_row = i;
-    double max_val = std::abs(at(i, i));
-
-    for (int k = i + 1; k < 4; ++k) {
-      double val = std::abs(at(k, i));
-      if (val > max_val) {
-        max_val = val;
-        pivot_row = k;
-      }
-    }
-
-    // 2. Singularity Check (The "Flat Particle" Guard)
-    if (max_val < 1e-14) return false;
-
-    // 3. Swap Rows if needed (Swap A rows AND b elements)
-    if (pivot_row != i) {
-      std::swap(b[i], b[pivot_row]);
-      for (int k = i; k < 4; ++k) {    // Only need to swap from column 'i' onwards
-        std::swap(at(i, k), at(pivot_row, k));
-      }
-    }
-
-    // 4. Eliminate
-    double inv_pivot = 1.0 / at(i, i);
-    for (int k = i + 1; k < 4; ++k) {
-      double factor = at(k, i) * inv_pivot;
-      // A[k, i] becomes 0, no need to compute it.
-      // Update the rest of the row:
-      for (int j = i + 1; j < 4; ++j) { at(k, j) -= factor * at(i, j); }
-      // Update RHS
-      b[k] -= factor * b[i];
-    }
-  }
-
-  // Final Pivot Check for the last element
-  if (std::abs(at(3, 3)) < 1e-14) return false;
-
-  // --- BACKWARD SUBSTITUTION ---
-  b[3] /= at(3, 3);
-  b[2] = (b[2] - at(2, 3) * b[3]) / at(2, 2);
-  b[1] = (b[1] - at(1, 2) * b[2] - at(1, 3) * b[3]) / at(1, 1);
-  b[0] = (b[0] - at(0, 1) * b[1] - at(0, 2) * b[2] - at(0, 3) * b[3]) / at(0, 0);
-
-  return true;
-}
-
 inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], double b[4])
 {
-
-  // // Tikhonov regularization could be applied here
-  // double trace = A[0] + A[5] + A[10];
-  // A[0]  += TIKHONOV_SCALE * trace;
-  // A[5]  += TIKHONOV_SCALE * trace;
-  // A[10] += TIKHONOV_SCALE * trace;
-
   // --- COLUMN 0 ---
   // 1. Find Pivot in Col 0
   int p = 0;
@@ -283,7 +168,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
     p = 3;
   }
 
-  if (max_val < 1e-14) return false;
+  if (max_val < 0.0) return false;
   // 2. Swap Row 0 with Row p
   if (p != 0) {
     int row_offset = p * 4;
@@ -333,7 +218,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
     p = 3;
   }
 
-  if (max_val < 1e-14) return false;
+  if (max_val < 0.0) return false;
 
   // 2. Swap Row 1 with Row p
   if (p != 1) {
@@ -371,7 +256,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
     p = 3;
   }
 
-  if (max_val < 1e-14) return false;
+  if (max_val < 0.0) return false;
 
   // 2. Swap Row 2 with Row p
   if (p != 2) {
@@ -392,7 +277,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
 
   // --- BACKWARD SUBSTITUTION ---
   // Check last pivot
-  if (std::abs(A[15]) < 1e-14) return false;
+  if (std::abs(A[15]) < 0.0) return false;
 
   double inv3 = 1.0 / A[15];
   b[3] *= inv3;
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index 9127cde85c8..6c20b0f1d4c 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -17,10 +17,32 @@
 #include "gtest/gtest.h"
 #include <cmath>
 #include <vector>
-
+#include <limits>
 // TODO: consider making a fixture with several setup functions?
 
 static constexpr double EPSILON = 1e-4;
+static constexpr double SOLV_EPSILON = std::numeric_limits<double>::epsilon() * 100; 
+
+TEST(HandwrittenSolver, invertible)
+{
+    double A[16] = {4, 2, 1, 3, 
+                    0, 5, 2, 1, 
+                    1, 0, 3, 2, 
+                    2, 1, 0, 4};
+                    
+    double b[4] = {23.0, 20.0, 18.0, 20.0};
+    
+    double expected_solution[4] = {1.0, 2.0, 3.0, 4.0};
+
+    bool success = MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A, b);
+
+    ASSERT_TRUE(success) << "The solver falsely flagged an invertible matrix as singular.";
+    
+    for (int i = 0; i < 4; ++i) {
+        ASSERT_NEAR(b[i], expected_solution[i], SOLV_EPSILON) << "Failed at index " << i;
+    }
+}
+
 
 TEST(ContactPointAndNormal, sphere)
 {
@@ -205,7 +227,7 @@ TEST(ContactPointAndNormal, supersphere_poly_geometric)
 {
     double r1      = 3.456;
     double r2      = 3.0 * r1; // Polydisperse: radius_2 = 3 * radius_1
-    double overlap = r1 / 5.0;
+    double overlap = r1 / 20.0;
     double xci[3]  = {-(r1 - overlap / 2.0), 0.0, 0.0};
     double xcj[3]  = {r2 - overlap / 2.0, 0.0, 0.0};
 
@@ -229,8 +251,7 @@ TEST(ContactPointAndNormal, supersphere_poly_geometric)
         int flag        = (n < 2.01) ? 0 : 1;
 
         // Initial Guess: Offset from 0 to test convergence
-        double X0[4] = {overlap / 80, overlap / 80, overlap / 80, 1.0}, nij[3];
-
+        double X0[4] = {overlap, EPSILON, EPSILON, 1.0 / 2.0}, nij[3];
         int status = MathExtraSuperellipsoids::determine_contact_point(
             xci, R, shapei, block, flag, xcj, R, shapej, block, flag, X0, nij, method);
 

From f4e60315da6f5d39fbe350c7f6501e765a40b2e5 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 25 Feb 2026 11:07:20 +0100
Subject: [PATCH 100/174] Fixed instability in geometric formulation by
 imposing strict lower bound

---
 examples/ASPHERE/superellipsoid_gran/in.drop_test  | 4 ++--
 src/ASPHERE/math_extra_superellipsoids.cpp         | 9 ++++-----
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 1 +
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 89e7d594358..b972c751c27 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -67,7 +67,7 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 100 dump_drop_geometric_${blockiness}.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump            mydump all custom 100 dump_drop_pushed_${blockiness}.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Explicit mapping for Ovito
 dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
@@ -93,4 +93,4 @@ thermo          100
 thermo_style  custom time step ke c_rke 
 
 timestep        0.0001
-run             100000
\ No newline at end of file
+run             200000
\ No newline at end of file
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index de11933cb60..c832c6dbefd 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -106,12 +106,10 @@ double gaussian_curvature_superellipsoid(const double *shape, const double *bloc
 ------------------------------------------------------------------------- */
 
 void global2local_vector(const double *v, const double *quat, double *local_v){
-
     double qc[4];
     MathExtra::qconjugate(const_cast<double*>(quat), qc);
     MathExtra::quatrotvec(qc, const_cast<double*>(v), local_v);
-
-};
+}
 
 /* ----------------------------------------------------------------------
    Possible regularization for the shape functions 
@@ -120,7 +118,7 @@ void global2local_vector(const double *v, const double *quat, double *local_v){
 ------------------------------------------------------------------------- */
 void apply_regularization_shape_function(double n1, const double avg_radius, double *value, double *grad, double hess[3][3]){
   // value is F - 1
-  double base = *value + 1.0; // should be fine as long as one does not start from the center (otherwise we could guard against it)
+  double base = std::fmax(*value + 1.0, 1e-12); 
   const double inv_F = 1.0 / base;
   const double inv_n1 = 1.0 / n1;
   
@@ -392,7 +390,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   // avg radii for regularization if GEOMETRIC formulation
   double avg_radius_i = 1;
   double avg_radius_j = 1;
-  double max_step = std::sqrt(lsq) / 3.0;
+  double max_step = sqrt(lsq) / 5.0;  
   if (formulation == FORMULATION_GEOMETRIC) {
     avg_radius_i = (shapei[0] + shapei[1] + shapei[2]) / 3.0;
     avg_radius_j = (shapej[0] + shapej[1] + shapej[2]) / 3.0;
@@ -491,6 +489,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     // Limit the max step size to avoid jumping too far
     // normalize residual vector if step was limited
     double spatial_residual_norm = std::sqrt(rhs[0]*rhs[0] + rhs[1]*rhs[1] + rhs[2]*rhs[2]);
+    
     if (spatial_residual_norm > max_step) {
         double scale = max_step / spatial_residual_norm;
         rhs[0] *= scale;
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index a67b6655ff8..208ecc79ca1 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -279,6 +279,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
                   iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
                   blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
                   contact_formulation);
+              
               if (status == 0)
                 touching = true;
               else if (status == 1)

From f70a06f920e80db5a9a129c9c27e4f5908fbc8b5 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 25 Feb 2026 18:57:01 +0100
Subject: [PATCH 101/174] Fixed bug in moment of inertia assignment. Added
 inertia to the compute to get it as output

---
 .../pair_gran_hooke_history_ellipsoid.cpp     |  2 +-
 src/atom_vec_ellipsoid.cpp                    |  4 +-
 src/compute_property_atom.cpp                 | 65 ++++++++++++++++++-
 src/compute_property_atom.h                   |  3 +
 4 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 208ecc79ca1..9d2aa3ca857 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -279,7 +279,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
                   iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
                   blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
                   contact_formulation);
-              
+
               if (status == 0)
                 touching = true;
               else if (status == 1)
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 3131c0f99f6..ab2d522b417 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -731,7 +731,7 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
   } else {
     double *shape = bonus[ellipsoid[i]].shape;
     double *block = bonus[ellipsoid[i]].block;
-    double *inertia = bonus[nlocal_bonus].inertia;
+    double *inertia = bonus[ellipsoid[i]].inertia;
     BlockType type = bonus[ellipsoid[i]].type;
     shape[0] = shapex;
     shape[1] = shapey;
@@ -773,7 +773,7 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
   } else {
     double *shape = bonus[ellipsoid[i]].shape;
     double *block = bonus[ellipsoid[i]].block;
-    double *inertia = bonus[nlocal_bonus].inertia;
+    double *inertia = bonus[ellipsoid[i]].inertia;
     BlockType &type = bonus[ellipsoid[i]].type;
     block[0] = blockn1;
     block[1] = blockn2;
diff --git a/src/compute_property_atom.cpp b/src/compute_property_atom.cpp
index 0abe1b8de7a..944efd46828 100644
--- a/src/compute_property_atom.cpp
+++ b/src/compute_property_atom.cpp
@@ -263,7 +263,18 @@ ComputePropertyAtom::ComputePropertyAtom(LAMMPS *lmp, int narg, char **arg) :
       if (!avec_ellipsoid && !avec_body && !atom->quat_flag)
         error->all(FLERR,"Compute property/atom {} is not available", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_quatk;
-
+    } else if (strcmp(arg[iarg],"inertiax") == 0) {
+      if (!avec_ellipsoid) 
+        error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
+      pack_choice[i] = &ComputePropertyAtom::pack_inertiax;
+    } else if (strcmp(arg[iarg],"inertiay") == 0) {
+      if (!avec_ellipsoid) 
+        error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
+      pack_choice[i] = &ComputePropertyAtom::pack_inertiay;
+    } else if (strcmp(arg[iarg],"inertiaz") == 0) {
+      if (!avec_ellipsoid) 
+        error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
+      pack_choice[i] = &ComputePropertyAtom::pack_inertiaz;
     } else if (strcmp(arg[iarg],"tqx") == 0) {
       if (!atom->torque_flag)
         error->all(FLERR,"Compute property/atom {} is not available", arg[iarg]);
@@ -1407,6 +1418,58 @@ void ComputePropertyAtom::pack_block2(int n)
 
 /* ---------------------------------------------------------------------- */
 
+/* ---------------------------------------------------------------------- */
+
+void ComputePropertyAtom::pack_inertiax(int n)
+{
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+      buf[n] = bonus[ellipsoid[i]].inertia[0];
+    else buf[n] = 1.0;
+    n += nvalues;
+  }
+}
+/* ---------------------------------------------------------------------- */
+
+void ComputePropertyAtom::pack_inertiay(int n)
+{
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+      buf[n] = bonus[ellipsoid[i]].inertia[1];
+    else buf[n] = 1.0;
+    n += nvalues;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePropertyAtom::pack_inertiaz(int n)
+{
+  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  int *ellipsoid = atom->ellipsoid;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+      buf[n] = bonus[ellipsoid[i]].inertia[2];
+    else buf[n] = 1.0;
+    n += nvalues;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 void ComputePropertyAtom::pack_quatw(int n)
 {
   if (avec_ellipsoid) {
diff --git a/src/compute_property_atom.h b/src/compute_property_atom.h
index fc55acb54c2..ccd88f45d60 100644
--- a/src/compute_property_atom.h
+++ b/src/compute_property_atom.h
@@ -110,6 +110,9 @@ class ComputePropertyAtom : public Compute {
   void pack_quati(int);
   void pack_quatj(int);
   void pack_quatk(int);
+  void pack_inertiax(int);
+  void pack_inertiay(int);
+  void pack_inertiaz(int);
   void pack_tqx(int);
   void pack_tqy(int);
   void pack_tqz(int);

From 21686ddbf6ccf025cf212b7449c3c5a6670d6a09 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 26 Feb 2026 11:15:20 +0100
Subject: [PATCH 102/174] Fixed minor typos in doc and examples

---
 doc/src/pair_gran_ellipsoid.rst               | 15 ++----
 .../ASPHERE/superellipsoid_gran/in.bowling    |  3 --
 .../ASPHERE/superellipsoid_gran/in.drop_test  | 54 +++++++------------
 .../superellipsoid_gran/in.ellipsoid_gran     |  7 +--
 4 files changed, 27 insertions(+), 52 deletions(-)

diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
index 5245b011add..148fac54b62 100644
--- a/doc/src/pair_gran_ellipsoid.rst
+++ b/doc/src/pair_gran_ellipsoid.rst
@@ -87,7 +87,7 @@ the contact point :math:`\mathbf{X}_0` in the global frame is obtained as:
                    \text{, subject to } F_i(\mathbf{X}) = F_j(\mathbf{X}) 
 
 where the shape function is given by
-:math:`F_i(\mathbf{X}) = f(\mathbf{R}_i^T (\mathbf{X} - \mathbf{X}_i))`
+:math:`F_i(\mathbf{X}) = f_i(\mathbf{R}_i^T (\mathbf{X} - \mathbf{X}_i))`
 and where :math:`\mathbf{X}_i` and :math:`\mathbf{R}_i` are the center of mass
 and rotation matrix of the particle, respectively.
 The constrained minimization problem is solved using Lagrange multipliers and
@@ -125,10 +125,10 @@ Newton's method is used to solve this equation for the scalars
     :math:`G(\mathbf{X}) = (F(\mathbf{X})+1)^{1/n_1}-1` which is a radial distance function formulation.
     This formulation is used to compute the surface points once the midway contact point is found.
     This formulation is also used when the *geometric* keyword is specified in the pair_style command and the following optimization problem is solved instead for the contact point:
-    :math:`\mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}} r_i \ G_i(\mathbf{X}) + r_j G_j(\mathbf{X})  \text{, subject to } r_i G_i(\mathbf{X}) = r_j G_j(\mathbf{X})`, 
+    :math:`\mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}} \, r_i \ G_i(\mathbf{X}) + r_j G_j(\mathbf{X})  \text{, subject to } r_i G_i(\mathbf{X}) = r_j G_j(\mathbf{X})`, 
     where :math:`r_i` and :math:`r_j` are the average radii of the two particles.
     The geometric formulation thus yields a better approximation of the contact point
-    for particles with different sizes, and it can be slightly more robust for particles with high *block* exponents.    
+    for particles with different sizes, and it is more robust for particles with high *block* exponents.    
 
 A hierarchical approach is used to limit the cost of contact detection.
 First, intersection of the bounding spheres of the two particles of bounding
@@ -139,13 +139,8 @@ Then, if the bounding spheres intersect, intersection of the oriented
 bounding box is checked. This is done following the equations of
 :ref:`(Eberly) <GeometricTools>`.
 This check is only performed if the *bounding_box* keyword is used.
-This can be advantageous for particles with high aspect ratio, where the
-bounding sphere encompasses a large empty volume, and minimization
-would otherwise be performed for particles that might be separated by
-large distances, and using a poor initial guess that might cause
-poor convergence of Newton's method.
-However, for grains with aspect ratio near 1:1,
-the additional bounding box check may be detrimental to performance.
+This is advantageous for all particles except for superellipses with 
+aspect ratio close to one and both blockiness indexes close to 2.
 
 The force calculation is similar to the :doc:`gran/hooke/history <pair_gran>`
 and :doc:`gran/hertz/history <pair_gran>` styles for spherical particles,
diff --git a/examples/ASPHERE/superellipsoid_gran/in.bowling b/examples/ASPHERE/superellipsoid_gran/in.bowling
index 516858147ed..a597a410955 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.bowling
+++ b/examples/ASPHERE/superellipsoid_gran/in.bowling
@@ -1,12 +1,9 @@
-# Test demo for LAPACK functions in ASPHERE packagecmake -C ../cmake/presets/most.cmake     [OPTIONS] ../cmake
-
 units           si
 atom_style      ellipsoid
 dimension       3
 boundary p p p
 comm_modify vel yes
 newton off
-# create big ellipsoidal particles
 
 region          box block 0 10 0 10 0 10
 create_box      2 box
diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index b972c751c27..97e886ea9de 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -1,4 +1,3 @@
-# Test demo for LAPACK functions in ASPHERE package
 # Lattice wall drop test
 
 units           si
@@ -12,28 +11,26 @@ processors      * * 1
 neighbor 0.5 bin
 neigh_modify   delay 0 every 1 check yes
 
-# 1. Setup Simulation Box
-# Expanded box to accommodate the floor and falling height
-variable        box_length equal 25
-variable        box_height equal 300
+# Setup Simulation Box
+variable        box_length equal 50
+variable        box_height equal 350
 
 region          box block 0 ${box_length} 0 ${box_length} 0 ${box_height}
 create_box      2 box
 
-# 3. Create Lattice Wall (Type 1)
-# Use a dense lattice (spacing 1.0) to ensure no gaps in the Y-direction (size 1.0).
-# Note: Overlap in X (size 2.0) is ignored because this group is frozen.
-
+# Create Lattice Wall (Type 1)
 lattice         sc 1.0
-region          floor_reg block 0 ${box_length} 0 ${box_length} 0 0.5
+region          floor_reg block 0 ${box_length} 0 ${box_length} 0 0.1
+
+variable        insertion_length equal ${box_length}-5.0
 
 create_atoms    1 region floor_reg
 
-# 4. Create Falling Particles (Type 2)
-region          drop_zone block 5 20 5 20 10 ${box_height}
-create_atoms    2 random 3000 12345 drop_zone overlap 2.0 maxtry 100
+# Create Falling Particles (Type 2)
+region          drop_zone block 5 ${insertion_length} 5 ${insertion_length} 5 ${box_height}
+create_atoms    2 random 30000 12345 drop_zone overlap 2.0 maxtry 100
 
-# 2. Material Properties
+# Material Properties
 # Type 1: Wall particles
 set             type 1 mass 1.0
 set             type 1 shape 1.0 1.0 1.0
@@ -41,24 +38,19 @@ set             type 1 shape 1.0 1.0 1.0
 # Type 2: Falling particles
 set             type 2 mass 1.0
 set             type 2 shape 2.0 1.0 1.0
-
-# Initialize Superquadric "Blockiness" to 2.0 (Ellipsoid) 
-# Necessary to prevent division-by-zero in the dump variables below
-variable        blockiness equal 8.0
+variable        blockiness equal 4.0
 set             type 1 block 4.0 4.0 
 set             type 2 block ${blockiness} ${blockiness} 
 
-# 5. Define Groups
+# Define Groups
 group           wall type 1
 group           mobile type 2
 
-
-# 6. Interaction / Pair Style
-# Kept exactly as requested
-pair_style      gran/hooke/history/ellipsoid 1e4 0.0 10.0 0.0 0.5 0 bounding_box geometric
+# Interaction / Pair Style
+pair_style      gran/hooke/history/ellipsoid 1e4 0.0 20.0 0.0 0.5 0 bounding_box
 pair_coeff      * *
 
-# 7. Computes and Output
+# Computes and Output
 compute         diameter all property/atom shapex shapey shapez
 compute         orient all property/atom quatw quati quatj quatk
 compute         block all property/atom block1 block2
@@ -67,30 +59,24 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 100 dump_drop_pushed_${blockiness}.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump            mydump all custom 1000 dump_drop_algebraic_block_${blockiness}.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Explicit mapping for Ovito
 dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
-# 8. Dynamics
-# Freeze the wall (Type 1) so it acts as a static floor
-# fix             1 wall freeze
-
 # Prevent the frozen wall particles (type 1) from interacting with each other
 neigh_modify exclude group wall wall
 
-
 # Apply gravity only to the mobile particles (Type 2)
 fix             2 mobile gravity 9.81 vector 0 0 -1
 
-# Integrate Equations of Motion (NVE/Asphere)
-# Applied to all, but fix freeze overrides integration for the wall
+# Integrate Equations of Motion
 fix             3 mobile nve/asphere
 
 compute rke all erotate/asphere
 
-# 9. Run
+# Run
 thermo          100
 thermo_style  custom time step ke c_rke 
 
 timestep        0.0001
-run             200000
\ No newline at end of file
+run             20000000
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 91d64e0f5df..1f52c5e1535 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -1,5 +1,3 @@
-# Test demo for LAPACK functions in ASPHERE packagecmake -C ../cmake/presets/most.cmake     [OPTIONS] ../cmake
-
 units           si
 atom_style      ellipsoid
 dimension       3
@@ -11,15 +9,14 @@ newton off
 region          box block 0 10 0 10 0 10
 create_box      2 box
 create_atoms 1 single 5 5 4.5
-create_atoms 1 single 5 5 6
+create_atoms 1 single 5.000000000001 5 6
 group bot id 1
 group top id 2
 #create_atoms    1 region box
 
 set             type 1 mass 1.0
 set             type 1 shape 2.0 1.0 1.0
-# TODO: Line below currently fails in contact detection with status 1
-# set             type 1 block 4.0 4.0
+set             type 1 block 8.0 8.0
 
 pair_style      gran/hooke/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0 geometric bounding_box
 # pair_style      gran/hertz/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0

From 3ff12e7c2b341e6a7cb78a773ea31c42c2ceb3d4 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 26 Feb 2026 12:03:27 +0100
Subject: [PATCH 103/174] Added author info to files

---
 src/ASPHERE/math_extra_superellipsoids.cpp    |   6 +-
 src/ASPHERE/math_extra_superellipsoids.h      |   5 +-
 .../pair_gran_hertz_history_ellipsoid.cpp     | 218 ++++++++++--------
 .../pair_gran_hertz_history_ellipsoid.h       |   3 +
 .../pair_gran_hooke_history_ellipsoid.cpp     |   3 +
 .../pair_gran_hooke_history_ellipsoid.h       |   3 +
 6 files changed, 130 insertions(+), 108 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index c832c6dbefd..1259ca1a121 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -554,11 +554,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       if ((norm <= TOL_NR_RES) &&
           (MathExtra::lensq3(rhs) * a * a <= TOL_NR_POS * lsq)) {
         converged = true;
-        // TODO: consider testing picking the normal with the least error
-        //       i.e., likely the grain with the smallest curvature (Hessian norm?)
-        //       or with the largest gradient?
-        //       or some other measure like average gradients.
-        //       right now we use the gradient on grain i for simplicity and performance
+        
         MathExtra::normalize3(gradi, nij);
         break;
       } else if (norm > norm_old - PARAMETER_LS * a * norm_old) { // Armijo - Goldstein condition not met
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 12a0c6348f0..ac4f4d377a0 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -30,10 +30,7 @@ enum ContactFormulation { FORMULATION_ALGEBRAIC = 0, FORMULATION_GEOMETRIC = 1 }
 enum CurvatureModel { CURV_MEAN = 0, CURV_GAUSSIAN = 1 };
 
 // needed for shape functions grad and matrix
-void global2local_vector(
-    const double v[3], const double *quat,
-    double local_v
-        [3]);    // TODO: TBD if still useful once we implement Hertz. There might be a cheaper way with the rotation matrix that we need for contact detection anyway
+void global2local_vector(const double v[3], const double *quat, double local_v[3]);
 
 inline double det4_M44_zero(const double m[4][4]);
 
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 4a3c1a954b5..6466f3554ca 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -10,7 +10,9 @@
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
-
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
 
 #include "pair_gran_hertz_history_ellipsoid.h"
 
@@ -22,26 +24,27 @@
 #include "fix_dummy.h"
 #include "fix_neigh_history.h"
 #include "force.h"
+#include "math_extra.h"    // probably needed for some computations
+#include "math_extra_superellipsoids.h"
 #include "memory.h"
 #include "modify.h"
 #include "neigh_list.h"
 #include "neighbor.h"
 #include "update.h"
-#include "math_extra.h" // probably needed for some computations
-#include "math_extra_superellipsoids.h"
 
 #include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
 
-
 static constexpr int NUMSTEP_INITIAL_GUESS = 5;
 
 /* ---------------------------------------------------------------------- */
 
-PairGranHertzHistoryEllipsoid::PairGranHertzHistoryEllipsoid(LAMMPS *lmp) : 
-PairGranHookeHistoryEllipsoid(lmp) {}
+PairGranHertzHistoryEllipsoid::PairGranHertzHistoryEllipsoid(LAMMPS *lmp) :
+    PairGranHookeHistoryEllipsoid(lmp)
+{
+}
 
 /* ---------------------------------------------------------------------- */
 
@@ -60,11 +63,12 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
   int *touch, **firsttouch;
   double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
 
-  double shapex, shapey, shapez; // ellipsoid shape params
+  double shapex, shapey, shapez;    // ellipsoid shape params
   double quat1, quat2, quat3, quat4;
   double block1, block2;
 
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1, overlap2, omegai[3], omegaj[3];
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
+      overlap2, omegai[3], omegaj[3];
   AtomVecEllipsoid::BlockType flagi, flagj;
 
   ev_init(eflag, vflag);
@@ -126,7 +130,6 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
     ztmp = x[i][2];
     radi = radius[i];
 
-
     touch = firsttouch[i];
     allhistory = firsthistory[i];
     jlist = firstneigh[i];
@@ -149,7 +152,6 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
       X0_prev = &allhistory[3 + size_history * jj];
       int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
 
-
       // TODO: Below could be a `touch()` function
       bool touching;
       if (rsq >= radsum * radsum) {
@@ -162,10 +164,10 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
         MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
         bool skip_contact_detection(false);
-        if(bounding_box) {
+        if (bounding_box) {
           separating_axis = &allhistory[7 + size_history * jj];
           skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-                                       x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
+              x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
         }
         if (skip_contact_detection)
           touching = false;
@@ -179,15 +181,18 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
             X0[1] = x[ref_index][1] + X0_prev[1];
             X0[2] = x[ref_index][2] + X0_prev[2];
             X0[3] = X0_prev[3];
-            int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
-                                                                           x[j], Rj, shapej, blockj, flagj,
-                                                                           X0, nij, contact_formulation);
+            int status = MathExtraSuperellipsoids::determine_contact_point(
+                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
+                contact_formulation);
             if (status == 0)
               touching = true;
             else if (status == 1)
               touching = false;
-            else 
-              error->warning(FLERR, "Ellipsoid contact detection (old contact) failed with status {} betwen particle {} and particle {} ", status, atom->tag[i], atom->tag[j]);
+            else
+              error->warning(FLERR,
+                             "Ellipsoid contact detection (old contact) failed with status {} "
+                             "betwen particle {} and particle {} ",
+                             status, atom->tag[i], atom->tag[j]);
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
@@ -202,13 +207,13 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
             double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
             double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
             MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-            X0[3] = reqj / reqi; // Lagrange multiplier mu^2
-            for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
               double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
               shapei[0] = shapei[1] = shapei[2] = reqi;
               shapej[0] = shapej[1] = shapej[2] = reqj;
-              MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-              MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
               blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
               blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
               blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
@@ -216,23 +221,27 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
 
               // force ellipsoid flag for first initial guess iteration.
               // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-                                                                             x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-                                                                             X0, nij, contact_formulation);
+              int status = MathExtraSuperellipsoids::determine_contact_point(
+                  x[i], Ri, shapei, blocki,
+                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
+                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
+                  contact_formulation);
               if (status == 0)
                 touching = true;
               else if (status == 1)
                 touching = false;
-              else if (iter_ig == NUMSTEP_INITIAL_GUESS){
+              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
                 // keep trying until last iteration to avoid erroring out too early
-                error->warning(FLERR, "Ellipsoid contact detection (new contact) failed with status {} betwen particle {} and particle {}", status, atom->tag[i], atom->tag[j]);
-              }  
+                error->warning(FLERR,
+                               "Ellipsoid contact detection (new contact) failed with status {} "
+                               "betwen particle {} and particle {}",
+                               status, atom->tag[i], atom->tag[j]);
+              }
             }
           }
         }
       }
 
-
       if (!touching) {
         // unset non-touching neighbors
 
@@ -248,50 +257,53 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         X0_prev[2] = X0[2] - x[ref_index][2];
         X0_prev[3] = X0[3];
 
-        double nji[3] = { -nij[0], -nij[1], -nij[2] };
+        double nji[3] = {-nij[0], -nij[1], -nij[2]};
         // compute overlap depth along normal direction for each grain
         // overlap is positive for both grains
-        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
+                                                                      nij, x[i]);
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
+                                                                      nji, x[j]);
 
         double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
         MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
         MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
 
         if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-            curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-                            shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-            curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-                            shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+          curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+          curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
         } else {
-            curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-                            shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-            curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-                            shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+          curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+          curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
         }
-        
-        polyhertz =  sqrt( (overlap1+overlap2) / (curvature_i + curvature_j)); // hertzian contact radius approximation
-        
-        // branch vectors 
+
+        polyhertz = sqrt((overlap1 + overlap2) /
+                         (curvature_i + curvature_j));    // hertzian contact radius approximation
+
+        // branch vectors
         double cr1[3], cr2[3];
         MathExtra::sub3(X0, x[i], cr1);
         MathExtra::sub3(X0, x[j], cr2);
 
         // we need to take the cross product of omega
 
-        double ex_space[3],ey_space[3],ez_space[3];
-        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat,ex_space,ey_space,ez_space);
-        MathExtra::angmom_to_omega(angmom[i],ex_space,ey_space,ez_space,
-                                   bonus[ellipsoid[i]].inertia,omegai);
-        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat,ex_space,ey_space,ez_space);
-        MathExtra::angmom_to_omega(angmom[j],ex_space,ey_space,ez_space,
-                                   bonus[ellipsoid[j]].inertia,omegaj);
+        double ex_space[3], ey_space[3], ez_space[3];
+        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[i]].inertia, omegai);
+        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[j]].inertia, omegaj);
 
         double omega_cross_r1[3], omega_cross_r2[3];
         MathExtra::cross3(omegai, cr1, omega_cross_r1);
         MathExtra::cross3(omegaj, cr2, omega_cross_r2);
 
-        // relative translational velocity 
+        // relative translational velocity
         // compute directly the sum of relative translational velocity at contact point
         // since rotational velocity contribution is different for superellipsoids
         double cv1[3], cv2[3];
@@ -311,11 +323,11 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
 
         // normal component
 
-        vn1 = nij[0] * vr1; // dot product 
+        vn1 = nij[0] * vr1;    // dot product
         vn2 = nij[1] * vr2;
         vn3 = nij[2] * vr3;
 
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitude
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
 
         // tangential component
 
@@ -344,7 +356,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         // normal forces = Hertzian contact + normal velocity damping
 
         damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
+        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
         ccel *= polyhertz;
         if (limit_damping && (ccel < 0.0)) ccel = 0.0;
 
@@ -401,7 +413,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         fx = nji[0] * ccel + fs1;
         fy = nji[1] * ccel + fs2;
         fz = nji[2] * ccel + fs3;
-        fx *= factor_lj; // I think factor lj is just 1 except for special bonds
+        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
         fy *= factor_lj;
         fz *= factor_lj;
         f[i][0] += fx;
@@ -412,7 +424,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
 
         tor1 = cr1[1] * fz - cr1[2] * fy;
         tor2 = cr1[2] * fx - cr1[0] * fz;
-        tor3 = cr1[0] * fy - cr1[1] * fx; 
+        tor3 = cr1[0] * fy - cr1[1] * fx;
 
         tor1 *= factor_lj;
         tor2 *= factor_lj;
@@ -425,17 +437,19 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
           f[j][0] -= fx;
           f[j][1] -= fy;
           f[j][2] -= fz;
-          
+
           tor1 = cr2[1] * fz - cr2[2] * fy;
           tor2 = cr2[2] * fx - cr2[0] * fz;
-          tor3 = cr2[0] * fy - cr2[1] * fx; 
+          tor3 = cr2[0] * fy - cr2[1] * fx;
 
           torque[j][0] -= tor1;
           torque[j][1] -= tor2;
           torque[j][2] -= tor3;
         }
 
-        if (evflag) ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely, delz); // Correct even for non-spherical particles
+        if (evflag)
+          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
+                       delz);    // Correct even for non-spherical particles
       }
     }
   }
@@ -449,7 +463,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
 
 void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
 {
-  if (narg <6) error->all(FLERR, "Illegal pair_style command");
+  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
 
   kn = utils::numeric(FLERR, arg[0], false, lmp);
   if (strcmp(arg[1], "NULL") == 0)
@@ -469,9 +483,9 @@ void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
 
   limit_damping = 0;
   bounding_box = 0;
-  curvature_model = MathExtraSuperellipsoids::CURV_MEAN; // Default to Mean curvature
+  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
 
-  for (int iarg = 6 ; iarg < narg ; iarg++) {
+  for (int iarg = 6; iarg < narg; iarg++) {
     if (strcmp(arg[iarg], "limit_damping") == 0)
       limit_damping = 1;
     else if (strcmp(arg[iarg], "bounding_box") == 0)
@@ -484,7 +498,7 @@ void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
       error->all(FLERR, "Illegal pair_style command");
   }
 
-  size_history = 8; // reset to default size
+  size_history = 8;    // reset to default size
   if (bounding_box == 0) size_history--;
 
   if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
@@ -500,7 +514,8 @@ void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
 /* ---------------------------------------------------------------------- */
 
 double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double /*factor_lj*/, double &fforce)
+                                             double /*factor_coul*/, double /*factor_lj*/,
+                                             double &fforce)
 {
   double radi, radj, radsum;
   double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
@@ -546,9 +561,10 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
   MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
   if (bounding_box) {
-    double separating_axis = allhistory[7 + size_history * neighprev]; // Copy: no update of history in single
+    double separating_axis =
+        allhistory[7 + size_history * neighprev];    // Copy: no update of history in single
     bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-                                      x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
+        x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
     if (no_bouding_box_contact) {
       fforce = 0.0;
       for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
@@ -560,7 +576,7 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   AtomVecEllipsoid::BlockType flagi, flagj;
   flagi = bonus[ellipsoid[i]].type;
   flagj = bonus[ellipsoid[j]].type;
-  double* X0_prev = &allhistory[3 + size_history * neighprev];
+  double *X0_prev = &allhistory[3 + size_history * neighprev];
   if (touch[neighprev] == 1) {
     int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
     // Continued contact: use grain true shape and last contact point
@@ -572,23 +588,23 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
                                                                    x[j], Rj, shapej, blockj, flagj,
                                                                    X0, nij, contact_formulation);
     if (status == 1) {
-        fforce = 0.0;
-        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-        return 0.0;
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
     }
     if (status != 0)
-        error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+      error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
   } else {
     double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
     double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
     MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-    X0[3] = reqj / reqi; // Lagrange multiplier mu^2
-    for (int iter_ig = 1 ; iter_ig <= NUMSTEP_INITIAL_GUESS ; iter_ig++) {
+    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
       double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
       shapei[0] = shapei[1] = shapei[2] = reqi;
       shapej[0] = shapej[1] = shapej[2] = reqj;
-      MathExtra::scaleadd3(1.0-frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-      MathExtra::scaleadd3(1.0-frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
       blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
       blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
       blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
@@ -596,9 +612,10 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
 
       // force ellipsoid flag for first initial guess iteration.
       // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-      int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-                                                                     x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-                                                                     X0, nij, contact_formulation);
+      int status = MathExtraSuperellipsoids::determine_contact_point(
+          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+          X0, nij, contact_formulation);
       if (status == 1) {
         fforce = 0.0;
         for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
@@ -609,40 +626,43 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
     }
   }
   double overlap1, overlap2, omegai[3], omegaj[3];
-  double nji[3] = { -nij[0], -nij[1], -nij[2] };
-  overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-  overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+  double nji[3] = {-nij[0], -nij[1], -nij[2]};
+  overlap1 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+  overlap2 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
 
   double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
   MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
   MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
 
   if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-    curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-                    shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-    curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-                    shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+    curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri,
+                                                                          surf_point_i, x[i]);
+    curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj,
+                                                                          surf_point_j, x[j]);
   } else {
     curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-                    shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+        shapei, blocki, flagi, Ri, surf_point_i, x[i]);
     curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-                    shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        shapej, blockj, flagj, Rj, surf_point_j, x[j]);
   }
 
-  polyhertz =  sqrt( (overlap1+overlap2) / (curvature_i + curvature_j)); // hertzian contact radius approximation
-  
+  polyhertz = sqrt((overlap1 + overlap2) /
+                   (curvature_i + curvature_j));    // hertzian contact radius approximation
+
   double cr1[3], cr2[3];
   MathExtra::sub3(X0, x[i], cr1);
   MathExtra::sub3(X0, x[j], cr2);
 
-  double ex_space[3],ey_space[3],ez_space[3];
+  double ex_space[3], ey_space[3], ez_space[3];
   double **angmom = atom->angmom;
-  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat,ex_space,ey_space,ez_space);
-  MathExtra::angmom_to_omega(angmom[i],ex_space,ey_space,ez_space,
-                             bonus[ellipsoid[i]].inertia,omegai);
-  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat,ex_space,ey_space,ez_space);
-  MathExtra::angmom_to_omega(angmom[j],ex_space,ey_space,ez_space,
-                             bonus[ellipsoid[j]].inertia,omegaj);
+  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
+                             omegai);
+  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
+                             omegaj);
 
   double omega_cross_r1[3], omega_cross_r2[3];
   MathExtra::cross3(omegai, cr1, omega_cross_r1);
@@ -671,11 +691,11 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
 
   // normal component
 
-  vn1 = nij[0] * vr1; // dot product
+  vn1 = nij[0] * vr1;    // dot product
   vn2 = nij[1] * vr2;
   vn3 = nij[2] * vr3;
 
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2]; // magnitude
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
 
   // tangential component
 
@@ -706,7 +726,7 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   // normal forces = Hookian contact + normal velocity damping
 
   damp = meff * gamman * vnnr;
-  ccel = kn * (overlap1 + overlap2) + damp; // assuming we get the overlap depth
+  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
   ccel *= polyhertz;
   if (limit_damping && (ccel < 0.0)) ccel = 0.0;
 
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
index a4d5e381b4c..b02f6b733e7 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
@@ -10,6 +10,9 @@
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 // clang-format off
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 9d2aa3ca857..6446d4820d3 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -10,6 +10,9 @@
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
 
 #include "pair_gran_hooke_history_ellipsoid.h"
 
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
index 38d0df8806a..e3bcf3e038c 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
@@ -10,6 +10,9 @@
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 // clang-format off

From 2a44a3657ae8d6b128696186fb44c3824e7d50d6 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Sat, 28 Feb 2026 17:52:38 +0100
Subject: [PATCH 104/174] Fixed minor bug in torque calculation. Added guard
 againt mathematical singularity of flat to flat contacts.

---
 .../superellipsoid_gran/in.ellipsoid_gran     | 12 ++++-----
 src/ASPHERE/math_extra_superellipsoids.cpp    | 27 ++++++++++++-------
 src/ASPHERE/math_extra_superellipsoids.h      |  8 +++---
 .../pair_gran_hertz_history_ellipsoid.cpp     |  3 ---
 .../pair_gran_hooke_history_ellipsoid.cpp     |  4 ---
 .../utils/test_math_extra_superellipsoids.cpp |  6 ++---
 6 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 1f52c5e1535..a5ba559c079 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -9,16 +9,16 @@ newton off
 region          box block 0 10 0 10 0 10
 create_box      2 box
 create_atoms 1 single 5 5 4.5
-create_atoms 1 single 5.000000000001 5 6
+create_atoms 1 single 5 5 6
 group bot id 1
 group top id 2
 #create_atoms    1 region box
 
 set             type 1 mass 1.0
 set             type 1 shape 2.0 1.0 1.0
-set             type 1 block 8.0 8.0
+set             type 1 block 5.0 5.0
 
-pair_style      gran/hooke/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0 geometric bounding_box
+pair_style      gran/hooke/history/ellipsoid 1e3 NULL 0.0 0.0 0.5 0 bounding_box
 # pair_style      gran/hertz/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0
 pair_coeff      * *
 
@@ -31,7 +31,7 @@ compute block all property/atom block1 block2
 variable phi atom "2/c_block[2]"
 variable theta atom "2/c_block[1]"
 
-dump mydump all custom 1 dump.lammpstrj id x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump mydump all custom 10 dump.lammpstrj id x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
 dump_modify mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
@@ -40,5 +40,5 @@ fix 2 top gravity 9.81 vector 0 0 -1
 fix 3 all nve/asphere
 
 thermo 10
-timestep 0.01
-run             1000
+timestep 0.001
+run             3000
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 1259ca1a121..84d81c0fb98 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -447,12 +447,22 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     b_fast[2] = -residual[2]; b_fast[3] = -residual[3];
 
     // Try Fast Solver
-    if (MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A_fast, b_fast)) {
-        rhs[0] = b_fast[0]; rhs[1] = b_fast[1]; 
-        rhs[2] = b_fast[2]; rhs[3] = b_fast[3];
-        gauss_elim_solved = true;
+    gauss_elim_solved = MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A_fast, b_fast);
+    
+    // check for divergence or numerical issues in the fast solver
+    // and fall back to regularized solver if necessary
+    bool fail0 = !std::isfinite(b_fast[0]) | (std::abs(b_fast[0]) > 1e30);
+    bool fail1 = !std::isfinite(b_fast[1]) | (std::abs(b_fast[1]) > 1e30);
+    bool fail2 = !std::isfinite(b_fast[2]) | (std::abs(b_fast[2]) > 1e30);
+    bool fail3 = !std::isfinite(b_fast[3]) | (std::abs(b_fast[3]) > 1e30);
+    if (fail0 | fail1 | fail2 | fail3) {
+        gauss_elim_solved = false;
     }
-    else {
+    
+    rhs[0] = b_fast[0]; rhs[1] = b_fast[1]; 
+    rhs[2] = b_fast[2]; rhs[3] = b_fast[3];
+    
+    if (!gauss_elim_solved) {
       // restore matrix
       for(int r=0; r<4; ++r) {
         for(int c=0; c<4; ++c) {
@@ -461,9 +471,9 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       }
       b_fast[0] = -residual[0]; b_fast[1] = -residual[1]; 
       b_fast[2] = -residual[2]; b_fast[3] = -residual[3];
-
+       // enforce a minimum regularization to avoid zero pivots in edge cases (flat on flat)
       double trace = jacobian[0] + jacobian[5] + jacobian[10];
-      double diag_weight = TIKHONOV_SCALE * trace;
+      double diag_weight = std::fmax(TIKHONOV_SCALE * trace, TIKHONOV_SCALE);
       A_fast[0]  += diag_weight;
       A_fast[5]  += diag_weight;
       A_fast[10] += diag_weight;
@@ -604,8 +614,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       X0[3] = X_line[3];
     }
 
-    if (converged)
-      break;
+    if (converged) break;
   }
 
   if (!converged){
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index ac4f4d377a0..dc8dd63abc2 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -165,7 +165,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
     p = 3;
   }
 
-  if (max_val < 0.0) return false;
+  if (max_val <= 0.0) return false;
   // 2. Swap Row 0 with Row p
   if (p != 0) {
     int row_offset = p * 4;
@@ -215,7 +215,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
     p = 3;
   }
 
-  if (max_val < 0.0) return false;
+  if (max_val <= 0.0) return false;
 
   // 2. Swap Row 1 with Row p
   if (p != 1) {
@@ -253,7 +253,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
     p = 3;
   }
 
-  if (max_val < 0.0) return false;
+  if (max_val <= 0.0) return false;
 
   // 2. Swap Row 2 with Row p
   if (p != 2) {
@@ -274,7 +274,7 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
 
   // --- BACKWARD SUBSTITUTION ---
   // Check last pivot
-  if (std::abs(A[15]) < 0.0) return false;
+  if (std::abs(A[15]) <= 0.0) return false;
 
   double inv3 = 1.0 / A[15];
   b[3] *= inv3;
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 6466f3554ca..80a545d49cf 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -426,9 +426,6 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         tor2 = cr1[2] * fx - cr1[0] * fz;
         tor3 = cr1[0] * fy - cr1[1] * fx;
 
-        tor1 *= factor_lj;
-        tor2 *= factor_lj;
-        tor3 *= factor_lj;
         torque[i][0] += tor1;
         torque[i][1] += tor2;
         torque[i][2] += tor3;
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 6446d4820d3..5146b3e334c 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -456,16 +456,12 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
-
         // torques are cross prodcuts of branch vector with the entire force at contact point
 
         tor1 = cr1[1] * fz - cr1[2] * fy;
         tor2 = cr1[2] * fx - cr1[0] * fz;
         tor3 = cr1[0] * fy - cr1[1] * fx;
 
-        tor1 *= factor_lj;
-        tor2 *= factor_lj;
-        tor3 *= factor_lj;
         torque[i][0] += tor1;
         torque[i][1] += tor2;
         torque[i][2] += tor3;
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index 6c20b0f1d4c..bbecbaa1001 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -226,8 +226,8 @@ TEST(ContactPointAndNormal, sphere_geometric)
 TEST(ContactPointAndNormal, supersphere_poly_geometric)
 {
     double r1      = 3.456;
-    double r2      = 3.0 * r1; // Polydisperse: radius_2 = 3 * radius_1
-    double overlap = r1 / 20.0;
+    double r2      = 2.0 * r1; // Polydisperse: radius_2 = 3 * radius_1
+    double overlap = r1 / 10.0;
     double xci[3]  = {-(r1 - overlap / 2.0), 0.0, 0.0};
     double xcj[3]  = {r2 - overlap / 2.0, 0.0, 0.0};
 
@@ -251,7 +251,7 @@ TEST(ContactPointAndNormal, supersphere_poly_geometric)
         int flag        = (n < 2.01) ? 0 : 1;
 
         // Initial Guess: Offset from 0 to test convergence
-        double X0[4] = {overlap, EPSILON, EPSILON, 1.0 / 2.0}, nij[3];
+        double X0[4] = {overlap, overlap, overlap, 1.0 / 2.0}, nij[3];
         int status = MathExtraSuperellipsoids::determine_contact_point(
             xci, R, shapei, block, flag, xcj, R, shapej, block, flag, X0, nij, method);
 

From 51d611959db6538b716dea7dec519f3ed4d938d4 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 2 Mar 2026 09:27:09 +0100
Subject: [PATCH 105/174] Removed LAPACK dependency from CMakaLists file, since
 it is no longer needed

---
 cmake/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index d19f9192d2c..af12ad24fc0 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -520,7 +520,7 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUA
 endif()
 
 option(USE_INTERNAL_LINALG "Prefer internal library with BLAS/LAPACK subset over system BLAS/LAPACK" OFF)
-if(PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR (PKG_ASPHERE AND PKG_GRANULAR) OR BUILD_TOOLS)
+if(PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR BUILD_TOOLS)
   if(NOT USE_INTERNAL_LINALG)
     find_package(LAPACK)
     find_package(BLAS)

From 746546f08a51d324a3ce4632af4c4cfb3941666e Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 2 Mar 2026 09:33:05 +0100
Subject: [PATCH 106/174] Added inertia from compute property atom to the doc

---
 doc/src/compute_property_atom.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst
index 332c64c3feb..2b32318a008 100644
--- a/doc/src/compute_property_atom.rst
+++ b/doc/src/compute_property_atom.rst
@@ -27,6 +27,7 @@ Syntax
                              angmomx, angmomy, angmomz,
                              shapex, shapey, shapez,
                              block1, block2,
+                             inertiax, inertiay, inertiaz,
                              quatw, quati, quatj, quatk, tqx, tqy, tqz,
                              end1x, end1y, end1z, end2x, end2y, end2z,
                              corner1x, corner1y, corner1z,
@@ -66,6 +67,7 @@ Syntax
            *angmomx,angmomy,angmomz* = angular momentum of aspherical particle
            *shapex,shapey,shapez* = 3 diameters of aspherical particle
            *block1,block2* = 2 blockiness exponents of aspherical particle
+           *inertiax,inertiay,inertiaz* = 3 principal moments of inertia of aspherical particle
            *quatw,quati,quatj,quatk* = quaternion components for aspherical or body particles
            *tqx,tqy,tqz* = torque on finite-size particles
            *end12x, end12y, end12z* = end points of line segment
@@ -169,6 +171,10 @@ The quantities *block1*, and *block2*, are defined for ellipsoidal
 particles and define the blockiness of each super-ellipsoid particle.
 See the :doc:`set <set>` command for an explanation of the blockiness.
 
+The quantities *inertiax*, *inertiay*, and *inertiaz* are defined for ellipsoidal
+particles and define the 3 principal moments of inertia of each particle.
+These are with respect to the particle's center of mass and in a reference system
+aligned with the particle's principal axes.  
 
 The quantities *quatw*, *quati*, *quatj*, and *quatk* are defined for
 ellipsoidal particles and body particles and store the 4-vector quaternion

From 641479e8624c6ccf98e937a0ecf3737d7f38675d Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 3 Mar 2026 18:12:26 +0100
Subject: [PATCH 107/174] Removed LAPACK dependency from superellipsoid tests

---
 unittest/utils/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index 5d7ca35e964..812d71e4169 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -158,6 +158,6 @@ endif()
 # Extra math tests for superellipsoids
 if(PKG_ASPHERE AND PKG_GRANULAR)
   add_executable(test_math_extra_superellipsoids test_math_extra_superellipsoids.cpp)
-  target_link_libraries(test_math_extra_superellipsoids PRIVATE lammps GTest::GMockMain ${LAPACK_LIBRARIES})
+  target_link_libraries(test_math_extra_superellipsoids PRIVATE lammps GTest::GMockMain)
   add_test(NAME MathExtraSuperellipsoids COMMAND test_math_extra_superellipsoids)
 endif()

From 2e98ed62f4c70ba25f63361ea35e4acf1688acae Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 5 Mar 2026 15:43:22 +0100
Subject: [PATCH 108/174] Refactored code to 2 bonus data strcutres, with
 bonus_super inheriting from bonus

---
 src/atom.h                 |   2 +-
 src/atom_vec_ellipsoid.cpp | 716 +++++++++++++++++++++++++------------
 src/atom_vec_ellipsoid.h   |  25 +-
 src/math_extra.cpp         |  16 +-
 src/math_extra.h           |   3 +-
 5 files changed, 526 insertions(+), 236 deletions(-)

diff --git a/src/atom.h b/src/atom.h
index 5aa8cf0f507..6f782d7a595 100644
--- a/src/atom.h
+++ b/src/atom.h
@@ -191,7 +191,7 @@ class Atom : protected Pointers {
   // 1 if variable is used, 0 if not
 
   int labelmapflag, types_style;
-  int ellipsoid_flag, line_flag, tri_flag, body_flag;
+  int ellipsoid_flag, line_flag, tri_flag, body_flag, superellipsoid_flag;
   int peri_flag, electron_flag, sph_flag;
 
   int molecule_flag, molindex_flag, molatom_flag;
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index ab2d522b417..eeaf4ab2a81 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -42,16 +42,14 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
   bonus_flag = 1;
 
   size_forward_bonus = 4;
-  size_border_bonus = 13;
-  size_restart_bonus_one = 13;
-  size_data_bonus = 10;
+  size_border_bonus = 8;
+  size_restart_bonus_one = 8;
+  size_data_bonus = 8;
 
   atom->ellipsoid_flag = 1;
+  atom->superellipsoid_flag = 1;
   atom->rmass_flag = atom->angmom_flag = atom->torque_flag = 1;
 
-  // Circumscribed radius, not physical radius
-  atom->radius_flag = 1;
-
   nlocal_bonus = nghost_bonus = nmax_bonus = 0;
 
   // strings with peratom variables to include in each AtomVec method
@@ -59,15 +57,15 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
   // order of fields in a string does not matter
   // except: fields_data_atom & fields_data_vel must match data file
 
-  fields_grow = {"radius", "rmass", "angmom", "torque", "ellipsoid"};
-  fields_copy = {"radius", "rmass", "angmom"};
+  fields_grow = {"rmass", "angmom", "torque", "ellipsoid"};
+  fields_copy = {"rmass", "angmom"};
   fields_comm_vel = {"angmom"};
   fields_reverse = {"torque"};
   fields_border = {"radius", "rmass"};
-  fields_border_vel = {"radius", "rmass", "angmom"};
-  fields_exchange = {"radius", "rmass", "angmom"};
-  fields_restart = {"radius", "rmass", "angmom"};
-  fields_create = {"radius", "rmass", "angmom", "ellipsoid"};
+  fields_border_vel = {"rmass", "angmom"};
+  fields_exchange = {"rmass", "angmom"};
+  fields_restart = {"rmass", "angmom"};
+  fields_create = {"rmass", "angmom", "ellipsoid"};
   fields_data_atom = {"id", "type", "ellipsoid", "rmass", "x"};
   fields_data_vel = {"id", "v", "angmom"};
 
@@ -78,7 +76,10 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
 
 AtomVecEllipsoid::~AtomVecEllipsoid()
 {
-  memory->sfree(bonus);
+  if (atom->superellipsoid_flag)
+    memory->sfree(bonus_super);
+  else
+    memory->sfree(bonus);
 }
 
 /* ----------------------------------------------------------------------
@@ -89,9 +90,9 @@ AtomVecEllipsoid::~AtomVecEllipsoid()
 void AtomVecEllipsoid::grow_pointers()
 {
   ellipsoid = atom->ellipsoid;
-  radius = atom->radius;
   rmass = atom->rmass;
   angmom = atom->angmom;
+  if (atom->superellipsoid_flag) radius = atom->radius;
 }
 
 /* ----------------------------------------------------------------------
@@ -103,7 +104,12 @@ void AtomVecEllipsoid::grow_bonus()
   nmax_bonus = grow_nmax_bonus(nmax_bonus);
   if (nmax_bonus < 0) error->one(FLERR, "Per-processor system is too big");
 
-  bonus = (Bonus *) memory->srealloc(bonus, nmax_bonus * sizeof(Bonus), "atom:bonus");
+  if (atom->superellipsoid_flag) {
+    bonus_super = (BonusSuper *) memory->srealloc(bonus_super, nmax_bonus * sizeof(BonusSuper),
+                                                  "atom:bonus_super");
+  } else {
+    bonus = (Bonus *) memory->srealloc(bonus, nmax_bonus * sizeof(Bonus), "atom:bonus");
+  }
 }
 
 /* ----------------------------------------------------------------------
@@ -122,7 +128,10 @@ void AtomVecEllipsoid::copy_bonus(int i, int j, int delflag)
   // if atom I has bonus data, reset I's bonus.ilocal to loc J
   // do NOT do this if self-copy (I=J) since I's bonus data is already deleted
 
-  if (ellipsoid[i] >= 0 && i != j) bonus[ellipsoid[i]].ilocal = j;
+  if (atom->superellipsoid_flag) {
+    if (ellipsoid[i] >= 0 && i != j) bonus_super[ellipsoid[i]].ilocal = j;
+  } else if (ellipsoid[i] >= 0 && i != j)
+    bonus[ellipsoid[i]].ilocal = j;
   ellipsoid[j] = ellipsoid[i];
 }
 
@@ -133,8 +142,13 @@ void AtomVecEllipsoid::copy_bonus(int i, int j, int delflag)
 
 void AtomVecEllipsoid::copy_bonus_all(int i, int j)
 {
-  ellipsoid[bonus[i].ilocal] = j;
-  memcpy(&bonus[j], &bonus[i], sizeof(Bonus));
+  if (atom->superellipsoid_flag) {
+    ellipsoid[bonus_super[i].ilocal] = j;
+    memcpy(&bonus_super[j], &bonus_super[i], sizeof(BonusSuper));
+  } else {
+    ellipsoid[bonus[i].ilocal] = j;
+    memcpy(&bonus[j], &bonus[i], sizeof(Bonus));
+  }
 }
 
 /* ----------------------------------------------------------------------
@@ -152,8 +166,8 @@ void AtomVecEllipsoid::clear_bonus()
 }
 
 /* ---------------------------------------------------------------------- */
-
-int AtomVecEllipsoid::pack_comm_bonus(int n, int *list, double *buf)
+template <bool is_super>
+int AtomVecEllipsoid::pack_comm_bonus_templated(int n, int *list, double *buf)
 {
   int i, j, m;
   double *quat;
@@ -162,7 +176,11 @@ int AtomVecEllipsoid::pack_comm_bonus(int n, int *list, double *buf)
   for (i = 0; i < n; i++) {
     j = list[i];
     if (ellipsoid[j] >= 0) {
-      quat = bonus[ellipsoid[j]].quat;
+      if (is_super) {
+        quat = bonus_super[ellipsoid[j]].quat;
+      } else {
+        quat = bonus[ellipsoid[j]].quat;
+      }
       buf[m++] = quat[0];
       buf[m++] = quat[1];
       buf[m++] = quat[2];
@@ -173,9 +191,18 @@ int AtomVecEllipsoid::pack_comm_bonus(int n, int *list, double *buf)
   return m;
 }
 
-/* ---------------------------------------------------------------------- */
+int AtomVecEllipsoid::pack_comm_bonus(int n, int *list, double *buf)
+{
+  if (atom->superellipsoid_flag) {
+    return pack_comm_bonus_templated<true>(n, list, buf);
+  } else {
+    return pack_comm_bonus_templated<false>(n, list, buf);
+  }
+}
 
-void AtomVecEllipsoid::unpack_comm_bonus(int n, int first, double *buf)
+/* ---------------------------------------------------------------------- */
+template <bool is_super>
+void AtomVecEllipsoid::unpack_comm_bonus_templated(int n, int first, double *buf)
 {
   int i, m, last;
   double *quat;
@@ -184,7 +211,11 @@ void AtomVecEllipsoid::unpack_comm_bonus(int n, int first, double *buf)
   last = first + n;
   for (i = first; i < last; i++) {
     if (ellipsoid[i] >= 0) {
-      quat = bonus[ellipsoid[i]].quat;
+      if (is_super) {
+        quat = bonus_super[ellipsoid[i]].quat;
+      } else {
+        quat = bonus[ellipsoid[i]].quat;
+      }
       quat[0] = buf[m++];
       quat[1] = buf[m++];
       quat[2] = buf[m++];
@@ -193,9 +224,18 @@ void AtomVecEllipsoid::unpack_comm_bonus(int n, int first, double *buf)
   }
 }
 
-/* ---------------------------------------------------------------------- */
+void AtomVecEllipsoid::unpack_comm_bonus(int n, int first, double *buf)
+{
+  if (atom->superellipsoid_flag) {
+    unpack_comm_bonus_templated<true>(n, first, buf);
+  } else {
+    unpack_comm_bonus_templated<false>(n, first, buf);
+  }
+}
 
-int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
+/* ---------------------------------------------------------------------- */
+template <bool is_super>
+int AtomVecEllipsoid::pack_border_bonus_templated(int n, int *list, double *buf)
 {
   int i, j, m;
   double *shape, *quat, *block, *inertia;
@@ -207,10 +247,16 @@ int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
       buf[m++] = ubuf(0).d;
     else {
       buf[m++] = ubuf(1).d;
-      shape = bonus[ellipsoid[j]].shape;
-      quat = bonus[ellipsoid[j]].quat;
-      block = bonus[ellipsoid[j]].block;
-      inertia = bonus[ellipsoid[j]].inertia;
+      if (is_super) {
+        shape = bonus_super[ellipsoid[j]].shape;
+        quat = bonus_super[ellipsoid[j]].quat;
+        block = bonus_super[ellipsoid[j]].block;
+        inertia = bonus_super[ellipsoid[j]].inertia;
+      } else {
+        shape = bonus[ellipsoid[j]].shape;
+        quat = bonus[ellipsoid[j]].quat;
+      }
+
       buf[m++] = shape[0];
       buf[m++] = shape[1];
       buf[m++] = shape[2];
@@ -218,20 +264,32 @@ int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
       buf[m++] = quat[1];
       buf[m++] = quat[2];
       buf[m++] = quat[3];
-      buf[m++] = block[0];
-      buf[m++] = block[1];
-      buf[m++] = inertia[0];
-      buf[m++] = inertia[1];
-      buf[m++] = inertia[2];
+
+      if (is_super) {
+        buf[m++] = block[0];
+        buf[m++] = block[1];
+        buf[m++] = inertia[0];
+        buf[m++] = inertia[1];
+        buf[m++] = inertia[2];
+      }
     }
   }
 
   return m;
 }
 
-/* ---------------------------------------------------------------------- */
+int AtomVecEllipsoid::pack_border_bonus(int n, int *list, double *buf)
+{
+  if (atom->superellipsoid_flag) {
+    return pack_border_bonus_templated<true>(n, list, buf);
+  } else {
+    return pack_border_bonus_templated<false>(n, list, buf);
+  }
+}
 
-int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
+/* ---------------------------------------------------------------------- */
+template <bool is_super>
+int AtomVecEllipsoid::unpack_border_bonus_templated(int n, int first, double *buf)
 {
   int i, j, m, last;
   double *shape, *quat, *block, *inertia;
@@ -244,10 +302,15 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
     else {
       j = nlocal_bonus + nghost_bonus;
       if (j == nmax_bonus) grow_bonus();
-      shape = bonus[j].shape;
-      quat = bonus[j].quat;
-      block = bonus[j].block;
-      inertia = bonus[j].inertia;
+      if (is_super) {
+        shape = bonus_super[j].shape;
+        quat = bonus_super[j].quat;
+        block = bonus_super[j].block;
+        inertia = bonus_super[j].inertia;
+      } else {
+        shape = bonus[j].shape;
+        quat = bonus[j].quat;
+      }
       shape[0] = buf[m++];
       shape[1] = buf[m++];
       shape[2] = buf[m++];
@@ -255,24 +318,36 @@ int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
       quat[1] = buf[m++];
       quat[2] = buf[m++];
       quat[3] = buf[m++];
-      block[0] = buf[m++];
-      block[1] = buf[m++];
-      inertia[0] = buf[m++];
-      inertia[1] = buf[m++];
-      inertia[2] = buf[m++];
-      // Particle type inferred from block to reduce comm
-      // TODO: is this a good idea or is that not saving much compared to
-      //       passing the flag in the buffer?
-      bonus[j].type = determine_type(block);
-      bonus[j].ilocal = i;
+      if (is_super) {
+        block[0] = buf[m++];
+        block[1] = buf[m++];
+        inertia[0] = buf[m++];
+        inertia[1] = buf[m++];
+        inertia[2] = buf[m++];
+        // Particle type inferred from block to reduce comm
+        // TODO: is this a good idea or is that not saving much compared to
+        //       passing the flag in the buffer?
+        bonus_super[j].type = determine_type(block);
+        bonus_super[j].ilocal = i;
+      } else {
+        bonus[j].ilocal = i;
+      }
       ellipsoid[i] = j;
       nghost_bonus++;
     }
   }
-
   return m;
 }
 
+int AtomVecEllipsoid::unpack_border_bonus(int n, int first, double *buf)
+{
+  if (atom->superellipsoid_flag) {
+    return unpack_border_bonus_templated<true>(n, first, buf);
+  } else {
+    return unpack_border_bonus_templated<false>(n, first, buf);
+  }
+}
+
 /* ----------------------------------------------------------------------
    pack data for atom I for sending to another proc
    xyz must be 1st 3 values, so comm::exchange() can test on them
@@ -287,24 +362,38 @@ int AtomVecEllipsoid::pack_exchange_bonus(int i, double *buf)
   else {
     buf[m++] = ubuf(1).d;
     int j = ellipsoid[i];
-    double *shape = bonus[j].shape;
-    double *quat = bonus[j].quat;
-    double *block = bonus[j].block;
-    double *inertia = bonus[j].inertia;
-    buf[m++] = shape[0];
-    buf[m++] = shape[1];
-    buf[m++] = shape[2];
-    buf[m++] = quat[0];
-    buf[m++] = quat[1];
-    buf[m++] = quat[2];
-    buf[m++] = quat[3];
-    buf[m++] = block[0];
-    buf[m++] = block[1];
-    buf[m++] = inertia[0];
-    buf[m++] = inertia[1];
-    buf[m++] = inertia[2];
-  }
+    if (atom->superellipsoid_flag) {
+      double *shape = bonus_super[j].shape;
+      double *quat = bonus_super[j].quat;
+      double *block = bonus_super[j].block;
+      double *inertia = bonus_super[j].inertia;
+
+      buf[m++] = shape[0];
+      buf[m++] = shape[1];
+      buf[m++] = shape[2];
+      buf[m++] = quat[0];
+      buf[m++] = quat[1];
+      buf[m++] = quat[2];
+      buf[m++] = quat[3];
+      buf[m++] = block[0];
+      buf[m++] = block[1];
+      buf[m++] = inertia[0];
+      buf[m++] = inertia[1];
+      buf[m++] = inertia[2];
+
+    } else {
+      double *shape = bonus[j].shape;
+      double *quat = bonus[j].quat;
 
+      buf[m++] = shape[0];
+      buf[m++] = shape[1];
+      buf[m++] = shape[2];
+      buf[m++] = quat[0];
+      buf[m++] = quat[1];
+      buf[m++] = quat[2];
+      buf[m++] = quat[3];
+    }
+  }
   return m;
 }
 
@@ -318,25 +407,38 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
     ellipsoid[ilocal] = -1;
   else {
     if (nlocal_bonus == nmax_bonus) grow_bonus();
-    double *shape = bonus[nlocal_bonus].shape;
-    double *quat = bonus[nlocal_bonus].quat;
-    double *block = bonus[nlocal_bonus].block;
-    double *inertia = bonus[nlocal_bonus].inertia;
-    BlockType &type = bonus[nlocal_bonus].type;
-    shape[0] = buf[m++];
-    shape[1] = buf[m++];
-    shape[2] = buf[m++];
-    quat[0] = buf[m++];
-    quat[1] = buf[m++];
-    quat[2] = buf[m++];
-    quat[3] = buf[m++];
-    block[0] = buf[m++];
-    block[1] = buf[m++];
-    inertia[0] = buf[m++];
-    inertia[1] = buf[m++];
-    inertia[2] = buf[m++];
-    type = determine_type(block);
-    bonus[nlocal_bonus].ilocal = ilocal;
+    if (atom->superellipsoid_flag) {
+      double *shape = bonus_super[nlocal_bonus].shape;
+      double *quat = bonus_super[nlocal_bonus].quat;
+      double *block = bonus_super[nlocal_bonus].block;
+      double *inertia = bonus_super[nlocal_bonus].inertia;
+      BlockType &type = bonus_super[nlocal_bonus].type;
+      shape[0] = buf[m++];
+      shape[1] = buf[m++];
+      shape[2] = buf[m++];
+      quat[0] = buf[m++];
+      quat[1] = buf[m++];
+      quat[2] = buf[m++];
+      quat[3] = buf[m++];
+      block[0] = buf[m++];
+      block[1] = buf[m++];
+      inertia[0] = buf[m++];
+      inertia[1] = buf[m++];
+      inertia[2] = buf[m++];
+      type = determine_type(block);
+      bonus[nlocal_bonus].ilocal = ilocal;
+    } else {
+      double *shape = bonus[nlocal_bonus].shape;
+      double *quat = bonus[nlocal_bonus].quat;
+      shape[0] = buf[m++];
+      shape[1] = buf[m++];
+      shape[2] = buf[m++];
+      quat[0] = buf[m++];
+      quat[1] = buf[m++];
+      quat[2] = buf[m++];
+      quat[3] = buf[m++];
+      bonus[nlocal_bonus].ilocal = ilocal;
+    }
     ellipsoid[ilocal] = nlocal_bonus++;
   }
 
@@ -379,18 +481,28 @@ int AtomVecEllipsoid::pack_restart_bonus(int i, double *buf)
   else {
     buf[m++] = ubuf(1).d;
     int j = ellipsoid[i];
-    buf[m++] = bonus[j].shape[0];
-    buf[m++] = bonus[j].shape[1];
-    buf[m++] = bonus[j].shape[2];
-    buf[m++] = bonus[j].quat[0];
-    buf[m++] = bonus[j].quat[1];
-    buf[m++] = bonus[j].quat[2];
-    buf[m++] = bonus[j].quat[3];
-    buf[m++] = bonus[j].block[0];
-    buf[m++] = bonus[j].block[1];
-    buf[m++] = bonus[j].inertia[0];
-    buf[m++] = bonus[j].inertia[1];
-    buf[m++] = bonus[j].inertia[2];
+    if (atom->superellipsoid_flag) {
+      buf[m++] = bonus_super[j].shape[0];
+      buf[m++] = bonus_super[j].shape[1];
+      buf[m++] = bonus_super[j].shape[2];
+      buf[m++] = bonus_super[j].quat[0];
+      buf[m++] = bonus_super[j].quat[1];
+      buf[m++] = bonus_super[j].quat[2];
+      buf[m++] = bonus_super[j].quat[3];
+      buf[m++] = bonus_super[j].block[0];
+      buf[m++] = bonus_super[j].block[1];
+      buf[m++] = bonus_super[j].inertia[0];
+      buf[m++] = bonus_super[j].inertia[1];
+      buf[m++] = bonus_super[j].inertia[2];
+    } else {
+      buf[m++] = bonus[j].shape[0];
+      buf[m++] = bonus[j].shape[1];
+      buf[m++] = bonus[j].shape[2];
+      buf[m++] = bonus[j].quat[0];
+      buf[m++] = bonus[j].quat[1];
+      buf[m++] = bonus[j].quat[2];
+      buf[m++] = bonus[j].quat[3];
+    }
   }
 
   return m;
@@ -409,25 +521,38 @@ int AtomVecEllipsoid::unpack_restart_bonus(int ilocal, double *buf)
     ellipsoid[ilocal] = -1;
   else {
     if (nlocal_bonus == nmax_bonus) grow_bonus();
-    double *shape = bonus[nlocal_bonus].shape;
-    double *quat = bonus[nlocal_bonus].quat;
-    double *block = bonus[nlocal_bonus].block;
-    double *inertia = bonus[nlocal_bonus].inertia;
-    BlockType &type = bonus[nlocal_bonus].type;
-    shape[0] = buf[m++];
-    shape[1] = buf[m++];
-    shape[2] = buf[m++];
-    quat[0] = buf[m++];
-    quat[1] = buf[m++];
-    quat[2] = buf[m++];
-    quat[3] = buf[m++];
-    block[0] = buf[m++];
-    block[1] = buf[m++];
-    inertia[0] = buf[m++];
-    inertia[1] = buf[m++];
-    inertia[2] = buf[m++];
-    type = determine_type(block);
-    bonus[nlocal_bonus].ilocal = ilocal;
+    if (atom->superellipsoid_flag) {
+      double *shape = bonus_super[nlocal_bonus].shape;
+      double *quat = bonus_super[nlocal_bonus].quat;
+      double *block = bonus_super[nlocal_bonus].block;
+      double *inertia = bonus_super[nlocal_bonus].inertia;
+      BlockType &type = bonus_super[nlocal_bonus].type;
+      shape[0] = buf[m++];
+      shape[1] = buf[m++];
+      shape[2] = buf[m++];
+      quat[0] = buf[m++];
+      quat[1] = buf[m++];
+      quat[2] = buf[m++];
+      quat[3] = buf[m++];
+      block[0] = buf[m++];
+      block[1] = buf[m++];
+      inertia[0] = buf[m++];
+      inertia[1] = buf[m++];
+      inertia[2] = buf[m++];
+      type = determine_type(block);
+      bonus_super[nlocal_bonus].ilocal = ilocal;
+    } else {
+      double *shape = bonus[nlocal_bonus].shape;
+      double *quat = bonus[nlocal_bonus].quat;
+      shape[0] = buf[m++];
+      shape[1] = buf[m++];
+      shape[2] = buf[m++];
+      quat[0] = buf[m++];
+      quat[1] = buf[m++];
+      quat[2] = buf[m++];
+      quat[3] = buf[m++];
+      bonus[nlocal_bonus].ilocal = ilocal;
+    }
     ellipsoid[ilocal] = nlocal_bonus++;
   }
 
@@ -444,15 +569,15 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
 
   if (nlocal_bonus == nmax_bonus) grow_bonus();
 
-  double *shape = bonus[nlocal_bonus].shape;
   int ivalue = 1;
+  double shape[3];
   shape[0] = 0.5 * utils::numeric(FLERR, values[ivalue++], true, lmp);
   shape[1] = 0.5 * utils::numeric(FLERR, values[ivalue++], true, lmp);
   shape[2] = 0.5 * utils::numeric(FLERR, values[ivalue++], true, lmp);
   if (shape[0] <= 0.0 || shape[1] <= 0.0 || shape[2] <= 0.0)
     error->one(FLERR, "Invalid shape in Ellipsoids section of data file");
 
-  double *quat = bonus[nlocal_bonus].quat;
+  double quat[4];
   quat[0] = utils::numeric(FLERR, values[ivalue++], true, lmp);
   quat[1] = utils::numeric(FLERR, values[ivalue++], true, lmp);
   quat[2] = utils::numeric(FLERR, values[ivalue++], true, lmp);
@@ -460,30 +585,56 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
   MathExtra::qnormalize(quat);
 
   // Blockiness exponents can be given optionally for superellipsoids
+  if (atom->superellipsoid_flag) {
+    // assign shape and quat to bonus data structure
+    BonusSuper *b = &bonus_super[nlocal_bonus];
+    b->shape[0] = shape[0];
+    b->shape[1] = shape[1];
+    b->shape[2] = shape[2];
+    b->quat[0] = quat[0];
+    b->quat[1] = quat[1];
+    b->quat[2] = quat[2];
+    b->quat[3] = quat[3];
+
+    double *block = bonus_super[nlocal_bonus].block;
+    BlockType &type = bonus_super[nlocal_bonus].type;
+    if (ivalue == values.size()) {
+      block[0] = block[1] = 2.0;
+      type = BlockType::ELLIPSOID;
+    } else {
+      block[0] = utils::numeric(FLERR, values[ivalue++], true, lmp);
+      block[1] = utils::numeric(FLERR, values[ivalue++], true, lmp);
+      type = determine_type(block);
+    }
+    // reset ellipsoid mass
+    // previously stored density in rmass
 
-  double *block = bonus[nlocal_bonus].block;
-  BlockType &type = bonus[nlocal_bonus].type;
-  if (ivalue == values.size()) {
-    block[0] = block[1] = 2.0;
-    type = BlockType::ELLIPSOID;
-  }
-  else {
-    block[0] = utils::numeric(FLERR, values[ivalue++], true, lmp);
-    block[1] = utils::numeric(FLERR, values[ivalue++], true, lmp);
-    type = determine_type(block);
-  }
+    rmass[m] *= MathExtra::volume_ellipsoid(shape, block, type);
 
-  // reset ellipsoid mass
-  // previously stored density in rmass
+    // Principal moments of inertia
 
-  rmass[m] *= MathExtra::volume_ellipsoid(shape, block, type);
+    inertia_ellipsoid_principal(shape, rmass[m], bonus_super[nlocal_bonus].inertia, block, type);
 
-  // Principal moments of inertia
+    radius[m] = radius_ellipsoid(shape, block, type);
+    bonus[nlocal_bonus].ilocal = m;
 
-  inertia_ellipsoid_principal(shape, rmass[m], bonus[nlocal_bonus].inertia, block, type);
+  } else {
+    // assign shape and quat to bonus data structure
+    Bonus *b = &bonus[nlocal_bonus];
+    b->shape[0] = shape[0];
+    b->shape[1] = shape[1];
+    b->shape[2] = shape[2];
+    b->quat[0] = quat[0];
+    b->quat[1] = quat[1];
+    b->quat[2] = quat[2];
+    b->quat[3] = quat[3];
+
+    // reset ellipsoid mass
+    // previously stored density in rmass
+    rmass[m] *= MathExtra::volume_ellipsoid(shape);
+    bonus[nlocal_bonus].ilocal = m;
+  }
 
-  radius[m] = radius_ellipsoid(shape, block, type);
-  bonus[nlocal_bonus].ilocal = m;
   ellipsoid[m] = nlocal_bonus++;
 }
 
@@ -494,7 +645,10 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
 double AtomVecEllipsoid::memory_usage_bonus()
 {
   double bytes = 0;
-  bytes += nmax_bonus * sizeof(Bonus);
+  if (atom->superellipsoid_flag)
+    bytes += nmax_bonus * sizeof(BonusSuper);
+  else
+    bytes += nmax_bonus * sizeof(Bonus);
   return bytes;
 }
 
@@ -505,8 +659,8 @@ double AtomVecEllipsoid::memory_usage_bonus()
 void AtomVecEllipsoid::create_atom_post(int ilocal)
 {
   rmass[ilocal] = 1.0;
-  radius[ilocal] = 0.0;
   ellipsoid[ilocal] = -1;
+  if (atom->superellipsoid_flag) radius[ilocal] = 0.0;
 }
 
 /* ----------------------------------------------------------------------
@@ -538,8 +692,7 @@ void AtomVecEllipsoid::data_atom_post(int ilocal)
 
 void AtomVecEllipsoid::pack_data_pre(int ilocal)
 {
-  double *shape, *block;
-  BlockType type;
+  double *shape;
 
   ellipsoid_flag = atom->ellipsoid[ilocal];
   rmass_one = atom->rmass[ilocal];
@@ -550,10 +703,15 @@ void AtomVecEllipsoid::pack_data_pre(int ilocal)
     ellipsoid[ilocal] = 1;
 
   if (ellipsoid_flag >= 0) {
-    shape = bonus[ellipsoid_flag].shape;
-    block = bonus[ellipsoid_flag].block;
-    type = bonus[ellipsoid_flag].type;
-    rmass[ilocal] /= MathExtra::volume_ellipsoid(shape, block, type);
+    if (atom->superellipsoid_flag) {
+      shape = bonus_super[ellipsoid_flag].shape;
+      double *block = bonus_super[ellipsoid_flag].block;
+      BlockType type = bonus_super[ellipsoid_flag].type;
+      rmass[ilocal] /= MathExtra::volume_ellipsoid(shape, block, type);
+    } else {
+      shape = bonus[ellipsoid_flag].shape;
+      rmass[ilocal] /= MathExtra::volume_ellipsoid(shape);
+    }
   }
 }
 
@@ -585,15 +743,25 @@ int AtomVecEllipsoid::pack_data_bonus(double *buf, int /*flag*/)
     if (buf) {
       buf[m++] = ubuf(tag[i]).d;
       j = ellipsoid[i];
-      buf[m++] = 2.0 * bonus[j].shape[0];
-      buf[m++] = 2.0 * bonus[j].shape[1];
-      buf[m++] = 2.0 * bonus[j].shape[2];
-      buf[m++] = bonus[j].quat[0];
-      buf[m++] = bonus[j].quat[1];
-      buf[m++] = bonus[j].quat[2];
-      buf[m++] = bonus[j].quat[3];
-      buf[m++] = bonus[j].block[0];
-      buf[m++] = bonus[j].block[1];
+      if (atom->superellipsoid_flag) {
+        buf[m++] = 2.0 * bonus_super[j].shape[0];
+        buf[m++] = 2.0 * bonus_super[j].shape[1];
+        buf[m++] = 2.0 * bonus_super[j].shape[2];
+        buf[m++] = bonus_super[j].quat[0];
+        buf[m++] = bonus_super[j].quat[1];
+        buf[m++] = bonus_super[j].quat[2];
+        buf[m++] = bonus_super[j].quat[3];
+        buf[m++] = bonus_super[j].block[0];
+        buf[m++] = bonus_super[j].block[1];
+      } else {
+        buf[m++] = 2.0 * bonus[j].shape[0];
+        buf[m++] = 2.0 * bonus[j].shape[1];
+        buf[m++] = 2.0 * bonus[j].shape[2];
+        buf[m++] = bonus[j].quat[0];
+        buf[m++] = bonus[j].quat[1];
+        buf[m++] = bonus[j].quat[2];
+        buf[m++] = bonus[j].quat[3];
+      }
     } else
       m += size_data_bonus;
   }
@@ -608,11 +776,19 @@ int AtomVecEllipsoid::pack_data_bonus(double *buf, int /*flag*/)
 void AtomVecEllipsoid::write_data_bonus(FILE *fp, int n, double *buf, int /*flag*/)
 {
   int i = 0;
-  while (i < n) {
-    utils::print(fp, "{} {} {} {} {} {} {} {} {} {}\n", ubuf(buf[i]).i, buf[i + 1], buf[i + 2], buf[i + 3],
-               buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7], buf[i + 8], buf[i + 9]);
-
-    i += size_data_bonus;
+  if (atom->superellipsoid_flag) {
+    while (i < n) {
+      utils::print(fp, "{} {} {} {} {} {} {} {} {} {}\n", ubuf(buf[i]).i, buf[i + 1], buf[i + 2],
+                   buf[i + 3], buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7], buf[i + 8],
+                   buf[i + 9]);
+      i += size_data_bonus;
+    }
+  } else {
+    while (i < n) {
+      utils::print(fp, "{} {} {} {} {} {} {} {}\n", ubuf(buf[i]).i, buf[i + 1], buf[i + 2],
+                   buf[i + 3], buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]);
+      i += size_data_bonus;
+    }
   }
 }
 
@@ -631,17 +807,29 @@ void AtomVecEllipsoid::read_data_general_to_restricted(int nlocal_previous, int
   // quat_g2r = quat that rotates from general to restricted triclinic
   // quat_new = ellipsoid quat converted to restricted triclinic
 
-  double quat_g2r[4],quat_new[4];
-  MathExtra::mat_to_quat(domain->rotate_g2r,quat_g2r);
+  double quat_g2r[4], quat_new[4];
+  MathExtra::mat_to_quat(domain->rotate_g2r, quat_g2r);
 
-  for (int i = nlocal_previous; i < nlocal; i++) {
-    if (ellipsoid[i] < 0) continue;
-    j = ellipsoid[i];
-    MathExtra::quatquat(quat_g2r,bonus[j].quat,quat_new);
-    bonus[j].quat[0] = quat_new[0];
-    bonus[j].quat[1] = quat_new[1];
-    bonus[j].quat[2] = quat_new[2];
-    bonus[j].quat[3] = quat_new[3];
+  if (atom->superellipsoid_flag) {
+    for (int i = nlocal_previous; i < nlocal; i++) {
+      if (ellipsoid[i] < 0) continue;
+      j = ellipsoid[i];
+      MathExtra::quatquat(quat_g2r, bonus_super[j].quat, quat_new);
+      bonus_super[j].quat[0] = quat_new[0];
+      bonus_super[j].quat[1] = quat_new[1];
+      bonus_super[j].quat[2] = quat_new[2];
+      bonus_super[j].quat[3] = quat_new[3];
+    }
+  } else {
+    for (int i = nlocal_previous; i < nlocal; i++) {
+      if (ellipsoid[i] < 0) continue;
+      j = ellipsoid[i];
+      MathExtra::quatquat(quat_g2r, bonus[j].quat, quat_new);
+      bonus[j].quat[0] = quat_new[0];
+      bonus[j].quat[1] = quat_new[1];
+      bonus[j].quat[2] = quat_new[2];
+      bonus[j].quat[3] = quat_new[3];
+    }
   }
 }
 
@@ -655,23 +843,32 @@ void AtomVecEllipsoid::write_data_restricted_to_general()
 {
   AtomVec::write_data_restricted_to_general();
 
-  memory->create(quat_hold,nlocal_bonus,4,"atomvec:quat_hold");
+  memory->create(quat_hold, nlocal_bonus, 4, "atomvec:quat_hold");
 
-  for (int i = 0; i < nlocal_bonus; i++)
-    memcpy(quat_hold[i],bonus[i].quat,4*sizeof(double));
+  for (int i = 0; i < nlocal_bonus; i++) memcpy(quat_hold[i], bonus[i].quat, 4 * sizeof(double));
 
   // quat_r2g = quat that rotates from restricted to general triclinic
   // quat_new = ellipsoid quat converted to general triclinic
 
-  double quat_r2g[4],quat_new[4];
-  MathExtra::mat_to_quat(domain->rotate_r2g,quat_r2g);
+  double quat_r2g[4], quat_new[4];
+  MathExtra::mat_to_quat(domain->rotate_r2g, quat_r2g);
+  if (atom->superellipsoid_flag) {
 
-  for (int i = 0; i < nlocal_bonus; i++) {
-    MathExtra::quatquat(quat_r2g,bonus[i].quat,quat_new);
-    bonus[i].quat[0] = quat_new[0];
-    bonus[i].quat[1] = quat_new[1];
-    bonus[i].quat[2] = quat_new[2];
-    bonus[i].quat[3] = quat_new[3];
+    for (int i = 0; i < nlocal_bonus; i++) {
+      MathExtra::quatquat(quat_r2g, bonus_super[i].quat, quat_new);
+      bonus_super[i].quat[0] = quat_new[0];
+      bonus_super[i].quat[1] = quat_new[1];
+      bonus_super[i].quat[2] = quat_new[2];
+      bonus_super[i].quat[3] = quat_new[3];
+    }
+  } else {
+    for (int i = 0; i < nlocal_bonus; i++) {
+      MathExtra::quatquat(quat_r2g, bonus[i].quat, quat_new);
+      bonus[i].quat[0] = quat_new[0];
+      bonus[i].quat[1] = quat_new[1];
+      bonus[i].quat[2] = quat_new[2];
+      bonus[i].quat[3] = quat_new[3];
+    }
   }
 }
 
@@ -685,10 +882,12 @@ void AtomVecEllipsoid::write_data_restricted_to_general()
 void AtomVecEllipsoid::write_data_restore_restricted()
 {
   AtomVec::write_data_restore_restricted();
-
-  for (int i = 0; i < nlocal_bonus; i++)
-    memcpy(bonus[i].quat,quat_hold[i],4*sizeof(double));
-
+  if (atom->superellipsoid_flag) {
+    for (int i = 0; i < nlocal_bonus; i++)
+      memcpy(bonus_super[i].quat, quat_hold[i], 4 * sizeof(double));
+  } else {
+    for (int i = 0; i < nlocal_bonus; i++) memcpy(bonus[i].quat, quat_hold[i], 4 * sizeof(double));
+  }
   memory->destroy(quat_hold);
   quat_hold = nullptr;
 }
@@ -704,40 +903,60 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
   if (ellipsoid[i] < 0) {
     if (shapex == 0.0 && shapey == 0.0 && shapez == 0.0) return;
     if (nlocal_bonus == nmax_bonus) grow_bonus();
-    double *shape = bonus[nlocal_bonus].shape;
-    double *quat = bonus[nlocal_bonus].quat;
-    double *block = bonus[nlocal_bonus].block;
-    double *inertia = bonus[nlocal_bonus].inertia;
-    BlockType &type = bonus[nlocal_bonus].type;
-    shape[0] = shapex;
-    shape[1] = shapey;
-    shape[2] = shapez;
-    quat[0] = 1.0;
-    quat[1] = 0.0;
-    quat[2] = 0.0;
-    quat[3] = 0.0;
-    block[0] = 2;
-    block[1] = 2;
-    type = BlockType::ELLIPSOID;
-    inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
-    radius[i] = radius_ellipsoid(shape, block, type);
-    bonus[nlocal_bonus].ilocal = i;
+    if (atom->superellipsoid_flag) {
+      double *shape = bonus_super[nlocal_bonus].shape;
+      double *quat = bonus_super[nlocal_bonus].quat;
+      double *block = bonus_super[nlocal_bonus].block;
+      double *inertia = bonus_super[nlocal_bonus].inertia;
+      BlockType &type = bonus_super[nlocal_bonus].type;
+      shape[0] = shapex;
+      shape[1] = shapey;
+      shape[2] = shapez;
+      quat[0] = 1.0;
+      quat[1] = 0.0;
+      quat[2] = 0.0;
+      quat[3] = 0.0;
+      block[0] = 2;
+      block[1] = 2;
+      type = BlockType::ELLIPSOID;
+      inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
+      radius[i] = radius_ellipsoid(shape, block, type);
+      bonus_super[nlocal_bonus].ilocal = i;
+    } else {
+      double *shape = bonus[nlocal_bonus].shape;
+      double *quat = bonus[nlocal_bonus].quat;
+      shape[0] = shapex;
+      shape[1] = shapey;
+      shape[2] = shapez;
+      quat[0] = 1.0;
+      quat[1] = 0.0;
+      quat[2] = 0.0;
+      quat[3] = 0.0;
+      bonus[nlocal_bonus].ilocal = i;
+    }
     ellipsoid[i] = nlocal_bonus++;
   } else if (shapex == 0.0 && shapey == 0.0 && shapez == 0.0) {
     copy_bonus_all(nlocal_bonus - 1, ellipsoid[i]);
     nlocal_bonus--;
     ellipsoid[i] = -1;
-    radius[i] = 0.0;
+    if (atom->superellipsoid_flag) radius[i] = 0.0;
   } else {
-    double *shape = bonus[ellipsoid[i]].shape;
-    double *block = bonus[ellipsoid[i]].block;
-    double *inertia = bonus[ellipsoid[i]].inertia;
-    BlockType type = bonus[ellipsoid[i]].type;
-    shape[0] = shapex;
-    shape[1] = shapey;
-    shape[2] = shapez;
-    inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
-    radius[i] = radius_ellipsoid(shape, block, type);
+    if (atom->superellipsoid_flag) {
+      double *shape = bonus_super[ellipsoid[i]].shape;
+      double *block = bonus_super[ellipsoid[i]].block;
+      double *inertia = bonus_super[ellipsoid[i]].inertia;
+      BlockType type = bonus_super[ellipsoid[i]].type;
+      shape[0] = shapex;
+      shape[1] = shapey;
+      shape[2] = shapez;
+      inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
+      radius[i] = radius_ellipsoid(shape, block, type);
+    } else {
+      double *shape = bonus[ellipsoid[i]].shape;
+      shape[0] = shapex;
+      shape[1] = shapey;
+      shape[2] = shapez;
+    }
   }
 }
 
@@ -749,13 +968,15 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
 
 void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
 {
+  if (!atom->superellipsoid_flag)
+    error->one(FLERR, "Cannot set blockiness exponents for non-superellipsoid atom");
   if (ellipsoid[i] < 0) {
     if (nlocal_bonus == nmax_bonus) grow_bonus();
-    double *shape = bonus[nlocal_bonus].shape;
-    double *quat = bonus[nlocal_bonus].quat;
-    double *block = bonus[nlocal_bonus].block;
-    double *inertia = bonus[nlocal_bonus].inertia;
-    BlockType &type = bonus[nlocal_bonus].type;
+    double *shape = bonus_super[nlocal_bonus].shape;
+    double *quat = bonus_super[nlocal_bonus].quat;
+    double *block = bonus_super[nlocal_bonus].block;
+    double *inertia = bonus_super[nlocal_bonus].inertia;
+    BlockType &type = bonus_super[nlocal_bonus].type;
     shape[0] = 0.5;
     shape[1] = 0.5;
     shape[2] = 0.5;
@@ -765,16 +986,16 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
     quat[1] = 0.0;
     quat[2] = 0.0;
     quat[3] = 0.0;
-    bonus[nlocal_bonus].ilocal = i;
+    bonus_super[nlocal_bonus].ilocal = i;
     type = determine_type(block);
     inertia_ellipsoid_principal(shape, rmass[i], inertia, block, type);
     radius[i] = radius_ellipsoid(shape, block, type);
     ellipsoid[i] = nlocal_bonus++;
   } else {
-    double *shape = bonus[ellipsoid[i]].shape;
-    double *block = bonus[ellipsoid[i]].block;
-    double *inertia = bonus[ellipsoid[i]].inertia;
-    BlockType &type = bonus[ellipsoid[i]].type;
+    double *shape = bonus_super[ellipsoid[i]].shape;
+    double *block = bonus_super[ellipsoid[i]].block;
+    double *inertia = bonus_super[ellipsoid[i]].inertia;
+    BlockType &type = bonus_super[ellipsoid[i]].type;
     block[0] = blockn1;
     block[1] = blockn2;
     type = determine_type(block);
@@ -783,8 +1004,9 @@ void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
   }
 }
 
-AtomVecEllipsoid::BlockType AtomVecEllipsoid::determine_type(double* block) {
- BlockType flag(BlockType::GENERAL);
+AtomVecEllipsoid::BlockType AtomVecEllipsoid::determine_type(double *block)
+{
+  BlockType flag(BlockType::GENERAL);
   if ((std::fabs(block[0] - 2) <= EPSILON_BLOCK) && (std::fabs(block[1] - 2) <= EPSILON_BLOCK))
     flag = BlockType::ELLIPSOID;
   else if (std::fabs(block[0] - block[1]) <= EPSILON_BLOCK)
@@ -794,13 +1016,15 @@ AtomVecEllipsoid::BlockType AtomVecEllipsoid::determine_type(double* block) {
 
 double AtomVecEllipsoid::radius_ellipsoid(double *shape, double *block, BlockType flag_type)
 {
-  if (flag_type == BlockType::ELLIPSOID)
-    return std::max(std::max(shape[0], shape[1]), shape[2]);
+  if (flag_type == BlockType::ELLIPSOID) return std::max(std::max(shape[0], shape[1]), shape[2]);
 
   // Super ellipsoid
   double a = shape[0], b = shape[1], c = shape[2];
   double n1 = block[0], n2 = block[1];
-  if (shape[0] < shape[1]) {a = shape[1]; b = shape[0];}
+  if (shape[0] < shape[1]) {
+    a = shape[1];
+    b = shape[0];
+  }
 
   // Cylinder approximation for n2=2
 
@@ -859,3 +1083,37 @@ void AtomVecEllipsoid::inertia_ellipsoid_principal(double *shape, double mass, d
     idiag[2] = dens * (m0 + m1);
   }
 }
+
+void AtomVecEllipsoid::process_args(int narg, char **arg)
+{
+  if (narg == 0) return;
+
+  int iarg = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "superellipsoid") == 0) {
+      atom->superellipsoid_flag = 1;
+      // Circumscribed radius, not physical radius
+      atom->radius_flag = 1;
+
+      // Allocate bonus data for blockiness
+      size_border_bonus = 13;
+      size_restart_bonus_one = 13;
+      size_data_bonus = 10;
+
+      // Add radius to the arrays for communication
+      fields_grow.push_back("radius");
+      fields_copy.push_back("radius");
+      fields_border.push_back("radius");
+      fields_border_vel.push_back("radius");
+      fields_exchange.push_back("radius");
+      fields_restart.push_back("radius");
+      fields_create.push_back("radius");
+
+      setup_fields();
+
+      iarg++;
+    } else {
+      error->all(FLERR, fmt::format("Unknown atom_style ellipsoid argument: {}", arg[iarg]));
+    }
+  }
+}
\ No newline at end of file
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index 16d52ad4ad8..2af35411192 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -34,12 +34,16 @@ class AtomVecEllipsoid : virtual public AtomVec {
   struct Bonus {
     double shape[3];
     double quat[4];
+    int ilocal;
+  };
+  struct Bonus *bonus;
+
+  struct BonusSuper : public Bonus {
     double block[2];
     double inertia[3];
     BlockType type;
-    int ilocal;
   };
-  struct Bonus *bonus;
+  struct BonusSuper *bonus_super;
 
   AtomVecEllipsoid(class LAMMPS *);
   ~AtomVecEllipsoid() override;
@@ -95,6 +99,23 @@ class AtomVecEllipsoid : virtual public AtomVec {
   static double radius_ellipsoid(double *, double *, BlockType);
   static void inertia_ellipsoid_principal(double *, double, double *,
                                    double *block, BlockType);
+  
+
+  template <bool is_super>
+  int pack_comm_bonus_templated(int, int *, double *);
+
+  template <bool is_super>
+  void unpack_comm_bonus_templated(int, int, double *); 
+  
+  template <bool is_super>
+  int pack_border_bonus_templated(int, int *, double *);
+
+  template <bool is_super>
+  int unpack_border_bonus_templated(int, int, double *);
+
+  void process_args(int, char **) override;
+
+
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 33dc1c26c3f..5fa326453a7 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -610,10 +610,22 @@ void inertia_triangle(double *idiag, double *quat, double /*mass*/,
 /* ----------------------------------------------------------------------
    compute the volume of the ellipsoid
    shape = 3 radii of ellipsoid
-   block = blockiness exponents of super-ellipsoid
    return volume of the ellipsoid
 ------------------------------------------------------------------------- */
 
+double volume_ellipsoid(double *shape)
+{
+  double unitvol = MY_4PI3;
+  return unitvol * shape[0] * shape[1] * shape[2];
+}
+
+/* ----------------------------------------------------------------------
+   compute the volume of the (super)ellipsoid
+   shape = 3 radii of (super)ellipsoid
+   block = blockiness exponents of (super)ellipsoid
+   return volume of the (super)ellipsoid
+------------------------------------------------------------------------- */
+
 double volume_ellipsoid(double *shape, double *block, int flag_super)
 {
   double unitvol = MY_4PI3;
@@ -628,8 +640,6 @@ double volume_ellipsoid(double *shape, double *block, int flag_super)
   return unitvol * shape[0] * shape[1] * shape[2];
 }
 
-
-
 /* ----------------------------------------------------------------------
    build rotation matrix for a small angle rotation around the X axis
 ------------------------------------------------------------------------- */
diff --git a/src/math_extra.h b/src/math_extra.h
index e6da14c08a7..5671733672a 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -122,7 +122,8 @@ void inertia_triangle(double *v0, double *v1, double *v2, double mass, double *i
 void inertia_triangle(double *idiag, double *quat, double mass, double *inertia);
 
 // volume of ellipsoid
-double volume_ellipsoid(double *shape, double *block = nullptr, int flag_super = 0);
+double volume_ellipsoid(double *shape);
+double volume_ellipsoid(double *shape, double *block, int flag_super);
 
 // triclinic bounding box of a sphere
 

From f63da6d481e7b6e6997e6c5df7966b1e6e1ecb93 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 5 Mar 2026 15:57:41 +0100
Subject: [PATCH 109/174] Updated the compute-property-atom class to branch
 between bonus and bonus_super

---
 src/compute_property_atom.cpp | 145 ++++++++++++++++++++++++++--------
 1 file changed, 110 insertions(+), 35 deletions(-)

diff --git a/src/compute_property_atom.cpp b/src/compute_property_atom.cpp
index 944efd46828..aa0e829bc99 100644
--- a/src/compute_property_atom.cpp
+++ b/src/compute_property_atom.cpp
@@ -1336,58 +1336,89 @@ void ComputePropertyAtom::pack_angmomz(int n)
 
 void ComputePropertyAtom::pack_shapex(int n)
 {
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
-  for (int i = 0; i < nlocal; i++) {
+  if (atom->superellipsoid_flag){
+    AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+      for (int i = 0; i < nlocal; i++) {
     if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
       buf[n] = 2.0*bonus[ellipsoid[i]].shape[0];
     else buf[n] = 1.0;
     n += nvalues;
   }
+  } else {
+    AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+    for (int i = 0; i < nlocal; i++) {
+      if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+        buf[n] = 2.0*bonus[ellipsoid[i]].shape[0];
+      else buf[n] = 1.0;
+      n += nvalues;
+    }
+  }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyAtom::pack_shapey(int n)
 {
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
-  for (int i = 0; i < nlocal; i++) {
+  if (atom->superellipsoid_flag){
+    AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+      for (int i = 0; i < nlocal; i++) {
     if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
       buf[n] = 2.0*bonus[ellipsoid[i]].shape[1];
     else buf[n] = 1.0;
     n += nvalues;
   }
+  } else {
+    AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+    for (int i = 0; i < nlocal; i++) {
+      if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+        buf[n] = 2.0*bonus[ellipsoid[i]].shape[1];
+      else buf[n] = 1.0;
+      n += nvalues;
+    }
+  }
 }
 
 /* ---------------------------------------------------------------------- */
 
+
 void ComputePropertyAtom::pack_shapez(int n)
 {
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
-  for (int i = 0; i < nlocal; i++) {
+  if (atom->superellipsoid_flag){
+    AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+      for (int i = 0; i < nlocal; i++) {
     if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
       buf[n] = 2.0*bonus[ellipsoid[i]].shape[2];
     else buf[n] = 1.0;
     n += nvalues;
   }
+  } else {
+    AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+    for (int i = 0; i < nlocal; i++) {
+      if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+        buf[n] = 2.0*bonus[ellipsoid[i]].shape[2];
+      else buf[n] = 1.0;
+      n += nvalues;
+    }
+  }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyAtom::pack_block1(int n)
 {
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
@@ -1403,7 +1434,7 @@ void ComputePropertyAtom::pack_block1(int n)
 
 void ComputePropertyAtom::pack_block2(int n)
 {
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
@@ -1422,7 +1453,7 @@ void ComputePropertyAtom::pack_block2(int n)
 
 void ComputePropertyAtom::pack_inertiax(int n)
 {
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
@@ -1438,7 +1469,7 @@ void ComputePropertyAtom::pack_inertiax(int n)
 
 void ComputePropertyAtom::pack_inertiay(int n)
 {
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
@@ -1455,7 +1486,7 @@ void ComputePropertyAtom::pack_inertiay(int n)
 
 void ComputePropertyAtom::pack_inertiaz(int n)
 {
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
@@ -1473,16 +1504,27 @@ void ComputePropertyAtom::pack_inertiaz(int n)
 void ComputePropertyAtom::pack_quatw(int n)
 {
   if (avec_ellipsoid) {
-    AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+
     int *ellipsoid = atom->ellipsoid;
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
-    for (int i = 0; i < nlocal; i++) {
-      if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
-        buf[n] = bonus[ellipsoid[i]].quat[0];
-      else buf[n] = 1.0;
-      n += nvalues;
+    if (atom->superellipsoid_flag){
+      AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+      for (int i = 0; i < nlocal; i++) {
+        if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+          buf[n] = bonus[ellipsoid[i]].quat[0];
+        else buf[n] = 1.0;
+        n += nvalues;
+      }
+    } else {
+      AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+      for (int i = 0; i < nlocal; i++) {
+        if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+          buf[n] = bonus[ellipsoid[i]].quat[0];
+        else buf[n] = 1.0;
+        n += nvalues;
+      }
     }
 
   } else if (avec_body) {
@@ -1516,16 +1558,27 @@ void ComputePropertyAtom::pack_quatw(int n)
 void ComputePropertyAtom::pack_quati(int n)
 {
   if (avec_ellipsoid) {
-    AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+
     int *ellipsoid = atom->ellipsoid;
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
-    for (int i = 0; i < nlocal; i++) {
-      if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
-        buf[n] = bonus[ellipsoid[i]].quat[1];
-      else buf[n] = 0.0;
-      n += nvalues;
+    if (atom->superellipsoid_flag){
+      AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+      for (int i = 0; i < nlocal; i++) {
+        if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+          buf[n] = bonus[ellipsoid[i]].quat[1];
+        else buf[n] = 1.0;
+        n += nvalues;
+      }
+    } else {
+      AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+      for (int i = 0; i < nlocal; i++) {
+        if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+          buf[n] = bonus[ellipsoid[i]].quat[1];
+        else buf[n] = 1.0;
+        n += nvalues;
+      }
     }
 
   } else if (avec_body) {
@@ -1559,16 +1612,27 @@ void ComputePropertyAtom::pack_quati(int n)
 void ComputePropertyAtom::pack_quatj(int n)
 {
   if (avec_ellipsoid) {
-    AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+
     int *ellipsoid = atom->ellipsoid;
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
-    for (int i = 0; i < nlocal; i++) {
-      if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
-        buf[n] = bonus[ellipsoid[i]].quat[2];
-      else buf[n] = 0.0;
-      n += nvalues;
+    if (atom->superellipsoid_flag){
+      AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+      for (int i = 0; i < nlocal; i++) {
+        if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+          buf[n] = bonus[ellipsoid[i]].quat[2];
+        else buf[n] = 1.0;
+        n += nvalues;
+      }
+    } else {
+      AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+      for (int i = 0; i < nlocal; i++) {
+        if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+          buf[n] = bonus[ellipsoid[i]].quat[2];
+        else buf[n] = 1.0;
+        n += nvalues;
+      }
     }
 
   } else if (avec_body) {
@@ -1602,16 +1666,27 @@ void ComputePropertyAtom::pack_quatj(int n)
 void ComputePropertyAtom::pack_quatk(int n)
 {
   if (avec_ellipsoid) {
-    AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+
     int *ellipsoid = atom->ellipsoid;
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
-    for (int i = 0; i < nlocal; i++) {
-      if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
-        buf[n] = bonus[ellipsoid[i]].quat[3];
-      else buf[n] = 0.0;
-      n += nvalues;
+    if (atom->superellipsoid_flag){
+      AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+      for (int i = 0; i < nlocal; i++) {
+        if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+          buf[n] = bonus[ellipsoid[i]].quat[3];
+        else buf[n] = 1.0;
+        n += nvalues;
+      }
+    } else {
+      AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+      for (int i = 0; i < nlocal; i++) {
+        if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
+          buf[n] = bonus[ellipsoid[i]].quat[3];
+        else buf[n] = 1.0;
+        n += nvalues;
+      }
     }
 
   } else if (avec_body) {

From 8b424fa1df96b20e9168b6ee2c91bf5fefd3f000 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 5 Mar 2026 16:03:41 +0100
Subject: [PATCH 110/174] Updated pair gran ellipsoid to read the super_bonus
 correctly

---
 src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp | 4 ++--
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 80a545d49cf..6bc7ee3427e 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -111,7 +111,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double *special_lj = force->special_lj;
   auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
 
   inum = list->inum;
@@ -548,7 +548,7 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
     return 0.0;
   }
   auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
   double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
   MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 5146b3e334c..6f6b6d47e7e 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -164,7 +164,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double *special_lj = force->special_lj;
   auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
 
   inum = list->inum;
@@ -590,10 +590,10 @@ void PairGranHookeHistoryEllipsoid::init_style()
 
   // error and warning checks
 
-  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->ellipsoid_flag)
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
     error->all(
         FLERR,
-        "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and ellipdoid flag");
+        "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and superellipdoid flag");
   if (comm->ghost_velocity == 0)
     error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
 
@@ -820,7 +820,7 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
     return 0.0;
   }
   auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
   int *ellipsoid = atom->ellipsoid;
   double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
   MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);

From 6b8e92d7708c3d7a3fecff97d4fb7b6d05897aa0 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 5 Mar 2026 16:14:29 +0100
Subject: [PATCH 111/174] Updated compute_erotate_asphere

---
 src/ASPHERE/compute_erotate_asphere.cpp | 30 +++++++++++++++++++------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/ASPHERE/compute_erotate_asphere.cpp b/src/ASPHERE/compute_erotate_asphere.cpp
index 16c294e0e4a..a4b817e8110 100644
--- a/src/ASPHERE/compute_erotate_asphere.cpp
+++ b/src/ASPHERE/compute_erotate_asphere.cpp
@@ -79,7 +79,11 @@ double ComputeERotateAsphere::compute_scalar()
   invoked_scalar = update->ntimestep;
 
   AtomVecEllipsoid::Bonus *ebonus = nullptr;
-  if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
+  AtomVecEllipsoid::BonusSuper *ebonus_super = nullptr;
+  if (avec_ellipsoid) {
+    if (atom->superellipsoid_flag) ebonus_super = avec_ellipsoid->bonus_super;
+    else ebonus = avec_ellipsoid->bonus;
+  }
   AtomVecLine::Bonus *lbonus = nullptr;
   if (avec_line) lbonus = avec_line->bonus;
   AtomVecTri::Bonus *tbonus = nullptr;
@@ -98,6 +102,7 @@ double ComputeERotateAsphere::compute_scalar()
   // no point particles since divide by inertia
 
   double length;
+  double *shape, *quat, *block;
   double wbody[3], inertia[3];
   double rot[3][3];
   double erotate = 0.0;
@@ -106,12 +111,23 @@ double ComputeERotateAsphere::compute_scalar()
     if (mask[i] & groupbit) {
       if (ellipsoid && ebonus && (ellipsoid[i] >= 0)) {
 
-        // principal moments of inertia
-
-        inertia[0] = ebonus[ellipsoid[i]].inertia[0];
-        inertia[1] = ebonus[ellipsoid[i]].inertia[1];
-        inertia[2] = ebonus[ellipsoid[i]].inertia[2];
-
+        if (atom->superellipsoid_flag) {
+          shape = ebonus_super[ellipsoid[i]].shape;
+          quat = ebonus_super[ellipsoid[i]].quat;
+          block = ebonus_super[ellipsoid[i]].block;
+          // principal moments of inertia
+          inertia[0] = ebonus_super[ellipsoid[i]].inertia[0];
+          inertia[1] = ebonus_super[ellipsoid[i]].inertia[1];
+          inertia[2] = ebonus_super[ellipsoid[i]].inertia[2];
+        } else {
+          shape = ebonus[ellipsoid[i]].shape;
+          quat = ebonus[ellipsoid[i]].quat;
+          // principal moments of inertia
+          inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
+          inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
+          inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+        }
+          
         // wbody = angular velocity in body frame
 
         MathExtra::quat_to_mat(ebonus[ellipsoid[i]].quat,rot);

From 884545280c2d0ec5c50bcf3fd3db9e10c9713b21 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 09:54:53 +0100
Subject: [PATCH 112/174] Updated support for temperature for superellipsoids

---
 src/ASPHERE/compute_temp_asphere.cpp | 211 +++++++++++++--------------
 src/ASPHERE/compute_temp_asphere.h   |   3 +
 2 files changed, 102 insertions(+), 112 deletions(-)

diff --git a/src/ASPHERE/compute_temp_asphere.cpp b/src/ASPHERE/compute_temp_asphere.cpp
index ff4bb8178fe..64454a77c0d 100644
--- a/src/ASPHERE/compute_temp_asphere.cpp
+++ b/src/ASPHERE/compute_temp_asphere.cpp
@@ -33,6 +33,8 @@
 using namespace LAMMPS_NS;
 
 enum { ROTATE, ALL };
+static constexpr double INERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
+
 
 /* ---------------------------------------------------------------------- */
 
@@ -184,17 +186,11 @@ void ComputeTempAsphere::dof_compute()
 }
 
 /* ---------------------------------------------------------------------- */
-
-double ComputeTempAsphere::compute_scalar()
+template<bool is_super>
+void ComputeTempAsphere::compute_scalar_templated(double &t)
 {
-  invoked_scalar = update->ntimestep;
-
-  if (tempbias) {
-    if (tbias->invoked_scalar != update->ntimestep) tbias->compute_scalar();
-    tbias->remove_bias_all();
-  }
-
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus_super = avec->bonus_super;
   double **v = atom->v;
   double **angmom = atom->angmom;
   double *rmass = atom->rmass;
@@ -202,65 +198,67 @@ double ComputeTempAsphere::compute_scalar()
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
-  double *quat;
+  double *shape, *quat;
   double wbody[3],inertia[3];
   double rot[3][3];
 
   // sum translational and rotational energy for each particle
   // no point particles since divide by inertia
-
-  double t = 0.0;
-
-  if (mode == ALL) {
-    for (int i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit) {
+  for (int i = 0; i < nlocal; i++) {
+    if (mask[i] & groupbit) {
+      
+      if (mode == ALL) {
         t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) * rmass[i];
+      }
 
+      int j = ellipsoid[i];
+      
+      if (is_super) {
+        quat = bonus_super[j].quat;
         // principal moments of inertia
-
-        quat = bonus[ellipsoid[i]].quat;
-
-        inertia[0] = bonus[ellipsoid[i]].inertia[0];
-        inertia[1] = bonus[ellipsoid[i]].inertia[1];
-        inertia[2] = bonus[ellipsoid[i]].inertia[2];
-
-        // wbody = angular velocity in body frame
-
-        MathExtra::quat_to_mat(quat,rot);
-        MathExtra::transpose_matvec(rot,angmom[i],wbody);
-        wbody[0] /= inertia[0];
-        wbody[1] /= inertia[1];
-        wbody[2] /= inertia[2];
-
-        t += inertia[0]*wbody[0]*wbody[0] +
-          inertia[1]*wbody[1]*wbody[1] + inertia[2]*wbody[2]*wbody[2];
+        inertia[0] = bonus_super[j].inertia[0];
+        inertia[1] = bonus_super[j].inertia[1];
+        inertia[2] = bonus_super[j].inertia[2];
+      } else {
+        quat = bonus[j].quat;
+        shape = bonus[j].shape;
+        // principal moments of inertia
+        inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
       }
 
-  } else {
-    for (int i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit) {
 
-        // principal moments of inertia
+      MathExtra::quat_to_mat(quat, rot);
+      MathExtra::transpose_matvec(rot, angmom[i], wbody);
+      // wbody = angular velocity in body frame
+      wbody[0] /= inertia[0];
+      wbody[1] /= inertia[1];
+      wbody[2] /= inertia[2];
 
-        quat = bonus[ellipsoid[i]].quat;
+      t += inertia[0]*wbody[0]*wbody[0] +
+           inertia[1]*wbody[1]*wbody[1] + 
+           inertia[2]*wbody[2]*wbody[2];
+    }
+  }
+}
 
-        inertia[0] = bonus[ellipsoid[i]].inertia[0];
-        inertia[1] = bonus[ellipsoid[i]].inertia[1];
-        inertia[2] = bonus[ellipsoid[i]].inertia[2];
 
-        // wbody = angular velocity in body frame
 
-        MathExtra::quat_to_mat(quat,rot);
-        MathExtra::transpose_matvec(rot,angmom[i],wbody);
-        wbody[0] /= inertia[0];
-        wbody[1] /= inertia[1];
-        wbody[2] /= inertia[2];
+double ComputeTempAsphere::compute_scalar()
+{
+  invoked_scalar = update->ntimestep;
 
-        t += inertia[0]*wbody[0]*wbody[0] +
-          inertia[1]*wbody[1]*wbody[1] + inertia[2]*wbody[2]*wbody[2];
-      }
+  if (tempbias) {
+    if (tbias->invoked_scalar != update->ntimestep) tbias->compute_scalar();
+    tbias->remove_bias_all();
   }
 
+  double t = 0.0;
+
+  if (atom->superellipsoid_flag) compute_scalar_templated<true>(t);
+  else compute_scalar_templated<false>(t);
+
   if (tempbias) tbias->restore_bias_all();
 
   MPI_Allreduce(&t,&scalar,1,MPI_DOUBLE,MPI_SUM,world);
@@ -272,19 +270,11 @@ double ComputeTempAsphere::compute_scalar()
 }
 
 /* ---------------------------------------------------------------------- */
-
-void ComputeTempAsphere::compute_vector()
+template<bool is_super>
+void ComputeTempAsphere::compute_vector_templated(double *t)
 {
-  int i;
-
-  invoked_vector = update->ntimestep;
-
-  if (tempbias) {
-    if (tbias->invoked_vector != update->ntimestep) tbias->compute_vector();
-    tbias->remove_bias_all();
-  }
-
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
+  AtomVecEllipsoid::BonusSuper *bonus_super = avec->bonus_super;
   double **v = atom->v;
   double **angmom = atom->angmom;
   double *rmass = atom->rmass;
@@ -292,36 +282,44 @@ void ComputeTempAsphere::compute_vector()
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
-  double *quat;
-  double wbody[3],inertia[3],t[6];
+  double *shape, *quat;
+  double wbody[3],inertia[3];
   double rot[3][3];
   double massone;
 
-  // sum translational and rotational energy for each particle
-  // no point particles since divide by inertia
-
-  for (i = 0; i < 6; i++) t[i] = 0.0;
-
-  if (mode == ALL) {
-    for (i = 0; i < nlocal; i++)
+    for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
-        massone = rmass[i];
-        t[0] += massone * v[i][0]*v[i][0];
-        t[1] += massone * v[i][1]*v[i][1];
-        t[2] += massone * v[i][2]*v[i][2];
-        t[3] += massone * v[i][0]*v[i][1];
-        t[4] += massone * v[i][0]*v[i][2];
-        t[5] += massone * v[i][1]*v[i][2];
+         massone = rmass[i];
+         
+        if (mode == ALL) {
+          t[0] += massone * v[i][0]*v[i][0];
+          t[1] += massone * v[i][1]*v[i][1];
+          t[2] += massone * v[i][2]*v[i][2];
+          t[3] += massone * v[i][0]*v[i][1];
+          t[4] += massone * v[i][0]*v[i][2];
+          t[5] += massone * v[i][1]*v[i][2];
+        }
+
+        int j = ellipsoid[i];
 
         // principal moments of inertia
-
-        inertia[0] = bonus[ellipsoid[i]].inertia[0];
-        inertia[1] = bonus[ellipsoid[i]].inertia[1];
-        inertia[2] = bonus[ellipsoid[i]].inertia[2];
-
-        quat = bonus[ellipsoid[i]].quat;
-
-        // wbody = angular velocity in body frame
+        if (is_super) {
+          quat = bonus_super[j].quat;
+        
+          inertia[0] = bonus_super[j].inertia[0];
+          inertia[1] = bonus_super[j].inertia[1];
+          inertia[2] = bonus_super[j].inertia[2];
+
+        } else {
+          quat = bonus[j].quat;
+          shape = bonus[j].shape;
+        
+          inertia[0] = INERTIA*massone * (shape[1]*shape[1] + shape[2]*shape[2]);
+          inertia[1] = INERTIA*massone * (shape[0]*shape[0] + shape[2]*shape[2]);
+          inertia[2] = INERTIA*massone * (shape[0]*shape[0] + shape[1]*shape[1]);
+        }
+
+         // wbody = angular velocity in body frame
 
         MathExtra::quat_to_mat(quat,rot);
         MathExtra::transpose_matvec(rot,angmom[i],wbody);
@@ -338,38 +336,27 @@ void ComputeTempAsphere::compute_vector()
         t[4] += inertia[1]*wbody[0]*wbody[2];
         t[5] += inertia[2]*wbody[1]*wbody[2];
       }
+    }
+}
 
-  } else {
-    for (i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit) {
-
-        // principal moments of inertia
-
-        quat = bonus[ellipsoid[i]].quat;
-        massone = rmass[i];
-
-        inertia[0] = bonus[ellipsoid[i]].inertia[0];
-        inertia[1] = bonus[ellipsoid[i]].inertia[1];
-        inertia[2] = bonus[ellipsoid[i]].inertia[2];
 
-        // wbody = angular velocity in body frame
+void ComputeTempAsphere::compute_vector()
+{
+  int i;
+  invoked_vector = update->ntimestep;
 
-        MathExtra::quat_to_mat(quat,rot);
-        MathExtra::transpose_matvec(rot,angmom[i],wbody);
-        wbody[0] /= inertia[0];
-        wbody[1] /= inertia[1];
-        wbody[2] /= inertia[2];
+  if (tempbias) {
+    if (tbias->invoked_vector != update->ntimestep) tbias->compute_vector();
+    tbias->remove_bias_all();
+  }
 
-        // rotational kinetic energy
+  // sum translational and rotational energy for each particle
+  // no point particles since divide by inertia
+  double t[6];
+  for (i = 0; i < 6; i++) t[i] = 0.0;
 
-        t[0] += inertia[0]*wbody[0]*wbody[0];
-        t[1] += inertia[1]*wbody[1]*wbody[1];
-        t[2] += inertia[2]*wbody[2]*wbody[2];
-        t[3] += inertia[0]*wbody[0]*wbody[1];
-        t[4] += inertia[1]*wbody[0]*wbody[2];
-        t[5] += inertia[2]*wbody[1]*wbody[2];
-      }
-  }
+  if (atom->superellipsoid_flag) compute_vector_templated<true>(t);
+  else compute_vector_templated<false>(t);
 
   if (tempbias) tbias->restore_bias_all();
 
diff --git a/src/ASPHERE/compute_temp_asphere.h b/src/ASPHERE/compute_temp_asphere.h
index c8c09b445b1..3c86330b163 100644
--- a/src/ASPHERE/compute_temp_asphere.h
+++ b/src/ASPHERE/compute_temp_asphere.h
@@ -46,6 +46,9 @@ class ComputeTempAsphere : public Compute {
   class AtomVecEllipsoid *avec;
 
   void dof_compute();
+
+  template <bool is_super> void compute_scalar_templated(double &t);
+  template <bool is_super> void compute_vector_templated(double *t);
 };
 
 }    // namespace LAMMPS_NS

From 02f7ff801dd3676d0d9544f14413b60588ce0813 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 10:01:16 +0100
Subject: [PATCH 113/174] Revert unsupported fixes and computes to upstream
 state. Limiting PR to integrators needed

---
 src/ASPHERE/fix_nh_asphere.cpp          |  9 ++++--
 src/ASPHERE/fix_nve_asphere_noforce.cpp | 11 +++++--
 src/CG-DNA/fix_nve_dot.cpp              | 11 ++++---
 src/CG-DNA/fix_nve_dotc_langevin.cpp    | 11 ++++---
 src/GPU/fix_nve_asphere_gpu.cpp         | 16 +++++++----
 src/INTEL/fix_nve_asphere_intel.cpp     | 22 +++++++-------
 src/OPENMP/fix_nh_asphere_omp.cpp       |  9 ++++--
 src/OPENMP/fix_rigid_nh_omp.cpp         | 26 ++++++++++-------
 src/OPENMP/fix_rigid_omp.cpp            | 26 ++++++++++-------
 src/OPENMP/fix_rigid_small_omp.cpp      | 25 +++++++++-------
 src/RIGID/fix_rigid.cpp                 | 38 ++++++++++++++-----------
 src/RIGID/fix_rigid_small.cpp           | 37 +++++++++++++-----------
 src/RIGID/rigid_const.h                 |  2 ++
 src/SRD/fix_srd.cpp                     | 19 +++++++------
 src/fix_langevin.cpp                    |  9 ++++--
 src/fix_move.cpp                        | 20 +++++++++++--
 16 files changed, 181 insertions(+), 110 deletions(-)

diff --git a/src/ASPHERE/fix_nh_asphere.cpp b/src/ASPHERE/fix_nh_asphere.cpp
index d0a71d23ae8..35d0e404be5 100644
--- a/src/ASPHERE/fix_nh_asphere.cpp
+++ b/src/ASPHERE/fix_nh_asphere.cpp
@@ -106,16 +106,21 @@ void FixNHAsphere::nve_x()
   // returns new normalized quaternion
   // principal moments of inertia
 
-  double *inertia,*quat;
+  double *shape,*quat;
+  double inertia[3];
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
 
       // principal moments of inertia
 
-      inertia = bonus[ellipsoid[i]].inertia;
+      shape = bonus[ellipsoid[i]].shape;
       quat = bonus[ellipsoid[i]].quat;
 
+      inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
+      inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
+      inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
       // returns new normalized quaternion
diff --git a/src/ASPHERE/fix_nve_asphere_noforce.cpp b/src/ASPHERE/fix_nve_asphere_noforce.cpp
index a09b5dc396e..aaa21d9550f 100644
--- a/src/ASPHERE/fix_nve_asphere_noforce.cpp
+++ b/src/ASPHERE/fix_nve_asphere_noforce.cpp
@@ -66,13 +66,14 @@ void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
   double **x = atom->x;
   double **v = atom->v;
   double **angmom = atom->angmom;
+  double *rmass = atom->rmass;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
-  double *inertia,*quat;
-  double omega[3];
+  double *shape,*quat;
+  double inertia[3],omega[3];
 
   // update positions and quaternions for all particles
 
@@ -85,9 +86,13 @@ void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
 
       // principal moments of inertia
 
-      inertia = bonus[ellipsoid[i]].inertia;
+      shape = bonus[ellipsoid[i]].shape;
       quat = bonus[ellipsoid[i]].quat;
 
+      inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
+      inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
+      inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
       // returns new normalized quaternion
diff --git a/src/CG-DNA/fix_nve_dot.cpp b/src/CG-DNA/fix_nve_dot.cpp
index 4f25ecf1bf0..4fbf9bb9be4 100644
--- a/src/CG-DNA/fix_nve_dot.cpp
+++ b/src/CG-DNA/fix_nve_dot.cpp
@@ -26,6 +26,8 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
 
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
+
 /* ---------------------------------------------------------------------- */
 
 FixNVEDot::FixNVEDot(LAMMPS *lmp, int narg, char **arg) :
@@ -58,7 +60,7 @@ void FixNVEDot::init()
 
 void FixNVEDot::initial_integrate(int /*vflag*/)
 {
-  double *quat;
+  double *shape,*quat;
   double fquat[4],conjqm[4],inertia[3];
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
@@ -83,6 +85,7 @@ void FixNVEDot::initial_integrate(int /*vflag*/)
 
       dthlfm = dthlf / rmass[i];
       quat = bonus[ellipsoid[i]].quat;
+      shape = bonus[ellipsoid[i]].shape;
 
       // update momentum by 1/2 step
       v[i][0] += dthlfm * f[i][0];
@@ -110,9 +113,9 @@ void FixNVEDot::initial_integrate(int /*vflag*/)
       conjqm[3] += dt * fquat[3];
 
       // principal moments of inertia
-      inertia[0] = bonus[ellipsoid[i]].inertia[0];
-      inertia[1] = bonus[ellipsoid[i]].inertia[1];
-      inertia[2] = bonus[ellipsoid[i]].inertia[2];
+      inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+      inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+      inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
 
       // rotate quaternion and quaternion 4-momentum by full step
       no_squish_rotate(3,conjqm,quat,inertia,dthlf);
diff --git a/src/CG-DNA/fix_nve_dotc_langevin.cpp b/src/CG-DNA/fix_nve_dotc_langevin.cpp
index 7145af73885..8151cbe4fea 100644
--- a/src/CG-DNA/fix_nve_dotc_langevin.cpp
+++ b/src/CG-DNA/fix_nve_dotc_langevin.cpp
@@ -33,6 +33,8 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
 
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
+
 /* ---------------------------------------------------------------------- */
 
 FixNVEDotcLangevin::FixNVEDotcLangevin(LAMMPS *lmp, int narg, char **arg) :
@@ -127,7 +129,7 @@ void FixNVEDotcLangevin::compute_target()
 
 void FixNVEDotcLangevin::initial_integrate(int /*vflag*/)
 {
-  double *quat;
+  double *shape,*quat;
   double fquat[4],conjqm[4],inertia[3];
   double slq_conjqm[3];
 
@@ -158,6 +160,7 @@ void FixNVEDotcLangevin::initial_integrate(int /*vflag*/)
 
       dthlfm = dthlf / rmass[i];
       quat = bonus[ellipsoid[i]].quat;
+      shape = bonus[ellipsoid[i]].shape;
 
       // update momentum by 1/2 step
       v[i][0] += dthlfm * f[i][0];
@@ -185,9 +188,9 @@ void FixNVEDotcLangevin::initial_integrate(int /*vflag*/)
       conjqm[3] += dt * fquat[3];
 
       // principal moments of inertia
-      inertia[0] = bonus[ellipsoid[i]].inertia[0];
-      inertia[1] = bonus[ellipsoid[i]].inertia[1];
-      inertia[2] = bonus[ellipsoid[i]].inertia[2];
+      inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+      inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+      inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
 
       M = inertia[0]*inertia[1]*inertia[2];
       M /= inertia[1]*inertia[2]+inertia[0]*inertia[2]+inertia[0]*inertia[1];
diff --git a/src/GPU/fix_nve_asphere_gpu.cpp b/src/GPU/fix_nve_asphere_gpu.cpp
index ecf8dbc8d25..9b75964c791 100644
--- a/src/GPU/fix_nve_asphere_gpu.cpp
+++ b/src/GPU/fix_nve_asphere_gpu.cpp
@@ -35,6 +35,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
+
 #define ME_qnormalize(q)                                                \
 {                                                                       \
   double norm = 1.0 /                                                   \
@@ -382,13 +384,14 @@ double FixNVEAsphereGPU::reset_dt_omp(const int ifrom, const int ito,
       _dtfm[n++] = dtfir;
       _dtfm[n++] = dtfir;
       _dtfm[n++] = dtfir;
-      double idot = bonus[ellipsoid[i]].inertia[0];
+      double *shape = bonus[ellipsoid[i]].shape;
+      double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia0[i] = idot;
-      idot = bonus[ellipsoid[i]].inertia[1];
+      idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia1[i] = idot;
-      idot = bonus[ellipsoid[i]].inertia[2];
+      idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia2[i] = idot;
     }
@@ -401,13 +404,14 @@ double FixNVEAsphereGPU::reset_dt_omp(const int ifrom, const int ito,
         _dtfm[n++] = dtfir;
         _dtfm[n++] = dtfir;
         _dtfm[n++] = dtfir;
-        double idot = bonus[ellipsoid[i]].inertia[0];
+        double *shape = bonus[ellipsoid[i]].shape;
+        double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia0[i] = idot;
-        idot = bonus[ellipsoid[i]].inertia[1];
+        idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia1[i] = idot;
-        idot = bonus[ellipsoid[i]].inertia[2];
+        idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia2[i] = idot;
       } else {
diff --git a/src/INTEL/fix_nve_asphere_intel.cpp b/src/INTEL/fix_nve_asphere_intel.cpp
index 249ea0b6b43..848afa20cca 100644
--- a/src/INTEL/fix_nve_asphere_intel.cpp
+++ b/src/INTEL/fix_nve_asphere_intel.cpp
@@ -30,6 +30,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
+
 /* ---------------------------------------------------------------------- */
 
 FixNVEAsphereIntel::FixNVEAsphereIntel(LAMMPS *lmp, int narg, char **arg) :
@@ -200,19 +202,14 @@ void FixNVEAsphereIntel::reset_dt() {
       _dtfm[n++] = dtf / rmass[i];
       _dtfm[n++] = dtf / rmass[i];
       _dtfm[n++] = dtf / rmass[i];
-      if (inertia[0] != 0.0) inertia[0] = 1.0 / inertia[0];
-      _inertia0[i] = inertia[0];
-      if (inertia[1] != 0.0) inertia[1] = 1.0 / inertia[1];
-      _inertia1[i] = inertia[1];
-      if (inertia[2] != 0.0) inertia[2] = 1.0 / inertia[2];
-      _inertia2[i] = inertia[2];
-      double idot = bonus[ellipsoid[i]].inertia[0];
+      double *shape = bonus[ellipsoid[i]].shape;
+      double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia0[i] = idot;
-      idot = bonus[ellipsoid[i]].inertia[1];
+      idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia1[i] = idot;
-      idot = bonus[ellipsoid[i]].inertia[2];
+      idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
       if (idot != 0.0) idot = 1.0 / idot;
       _inertia2[i] = idot;
     }
@@ -224,13 +221,14 @@ void FixNVEAsphereIntel::reset_dt() {
         _dtfm[n++] = dtf / rmass[i];
         _dtfm[n++] = dtf / rmass[i];
         _dtfm[n++] = dtf / rmass[i];
-        double idot = bonus[ellipsoid[i]].inertia[0];
+        double *shape = bonus[ellipsoid[i]].shape;
+        double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia0[i] = idot;
-        idot = bonus[ellipsoid[i]].inertia[1];
+        idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia1[i] = idot;
-        idot = bonus[ellipsoid[i]].inertia[2];
+        idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
         if (idot != 0.0) idot = 1.0 / idot;
         _inertia2[i] = idot;
       } else {
diff --git a/src/OPENMP/fix_nh_asphere_omp.cpp b/src/OPENMP/fix_nh_asphere_omp.cpp
index ff123b0b292..35a42d2119d 100644
--- a/src/OPENMP/fix_nh_asphere_omp.cpp
+++ b/src/OPENMP/fix_nh_asphere_omp.cpp
@@ -105,6 +105,7 @@ void FixNHAsphereOMP::nve_x()
   auto * _noalias const x = (dbl3_t *) atom->x[0];
   const auto * _noalias const v = (dbl3_t *) atom->v[0];
   auto * _noalias const angmom = (dbl3_t *) atom->angmom[0];
+  const double * _noalias const rmass = atom->rmass;
   const int * _noalias const mask = atom->mask;
   AtomVecEllipsoid::Bonus * _noalias const bonus = avec->bonus;
   const int * _noalias const ellipsoid = atom->ellipsoid;
@@ -124,7 +125,7 @@ void FixNHAsphereOMP::nve_x()
 #endif
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
-      double omega[3];
+      double omega[3], inertia[3];
 
       x[i].x += dtv * v[i].x;
       x[i].y += dtv * v[i].y;
@@ -132,9 +133,13 @@ void FixNHAsphereOMP::nve_x()
 
       // principal moments of inertia
 
-      double * const inertia = bonus[ellipsoid[i]].inertia;
+      const double * const shape = bonus[ellipsoid[i]].shape;
       double * const quat = bonus[ellipsoid[i]].quat;
 
+      inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
+      inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
+      inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
       // returns new normalized quaternion
diff --git a/src/OPENMP/fix_rigid_nh_omp.cpp b/src/OPENMP/fix_rigid_nh_omp.cpp
index dbf0b26a972..c063a1db0c7 100644
--- a/src/OPENMP/fix_rigid_nh_omp.cpp
+++ b/src/OPENMP/fix_rigid_nh_omp.cpp
@@ -772,9 +772,9 @@ void FixRigidNHOMP::set_xv_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
     double theta_body,theta;
-    double exone[3],eyone[3],ezone[3],p[3][3];
+    double ione[3],exone[3],eyone[3],ezone[3],p[3][3];
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -798,13 +798,16 @@ void FixRigidNHOMP::set_xv_thr()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
-                                   inertiaatom,angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
+                                   angmom_one[i]);
       } else if (eflags[i] & LINE) {
         if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
         else theta_body = -2.0*acos(quat[ibody][0]);
@@ -960,8 +963,8 @@ void FixRigidNHOMP::set_v_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double *quatatom,*inertiaatom;
-    double exone[3],eyone[3],ezone[3];
+    double *shape,*quatatom,*inertiaatom;
+    double ione[3],exone[3],eyone[3],ezone[3];
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -981,11 +984,14 @@ void FixRigidNHOMP::set_v_thr()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
-                                   inertiaatom,angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
+                                   angmom_one[i]);
       } else if (eflags[i] & LINE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
diff --git a/src/OPENMP/fix_rigid_omp.cpp b/src/OPENMP/fix_rigid_omp.cpp
index a510a1fb6fc..5f548c4af9e 100644
--- a/src/OPENMP/fix_rigid_omp.cpp
+++ b/src/OPENMP/fix_rigid_omp.cpp
@@ -503,9 +503,9 @@ void FixRigidOMP::set_xv_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
     double theta_body,theta;
-    double exone[3],eyone[3],ezone[3],p[3][3];
+    double ione[3],exone[3],eyone[3],ezone[3],p[3][3];
 
     AtomVecEllipsoid::Bonus *ebonus = nullptr;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -529,13 +529,16 @@ void FixRigidOMP::set_xv_thr()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
-                                   inertiaatom,angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
+                                   angmom_one[i]);
       } else if (eflags[i] & LINE) {
         if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
         else theta_body = -2.0*acos(quat[ibody][0]);
@@ -691,8 +694,8 @@ void FixRigidOMP::set_v_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double *quatatom,*inertiaatom;
-    double exone[3],eyone[3],ezone[3];
+    double *shape,*quatatom,*inertiaatom;
+    double ione[3],exone[3],eyone[3],ezone[3];
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -712,11 +715,14 @@ void FixRigidOMP::set_v_thr()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
-                                   inertiaatom,angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
+                                   angmom_one[i]);
       } else if (eflags[i] & LINE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
diff --git a/src/OPENMP/fix_rigid_small_omp.cpp b/src/OPENMP/fix_rigid_small_omp.cpp
index d4f61af259e..98c411d11ae 100644
--- a/src/OPENMP/fix_rigid_small_omp.cpp
+++ b/src/OPENMP/fix_rigid_small_omp.cpp
@@ -435,9 +435,9 @@ void FixRigidSmallOMP::set_xv_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double exone[3],eyone[3],ezone[3],p[3][3];
+    double ione[3],exone[3],eyone[3],ezone[3],p[3][3];
     double theta_body,theta;
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -461,13 +461,15 @@ void FixRigidSmallOMP::set_xv_thr()
         omega[i][1] = b.omega[1];
         omega[i][2] = b.omega[2];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(b.quat,orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,
-                                   inertiaatom,angmom[i]);
+        MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,ione,angmom[i]);
       } else if (eflags[i] & LINE) {
         if (b.quat[3] >= 0.0) theta_body = 2.0*acos(b.quat[0]);
         else theta_body = -2.0*acos(b.quat[0]);
@@ -620,8 +622,8 @@ void FixRigidSmallOMP::set_v_thr()
   // XXX: extended particle info not yet multi-threaded
 
   if (extended) {
-    double exone[3],eyone[3],ezone[3];
-    double *quatatom,*inertiaatom;
+    double ione[3],exone[3],eyone[3],ezone[3];
+    double *shape,*quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -641,11 +643,14 @@ void FixRigidSmallOMP::set_v_thr()
         omega[i][1] = b.omega[1];
         omega[i][2] = b.omega[2];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,
-                                   inertiaatom,angmom[i]);
+        MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,ione,
+                                   angmom[i]);
       } else if (eflags[i] & LINE) {
         omega[i][0] = b.omega[0];
         omega[i][1] = b.omega[1];
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index 0028d337099..46d93ba8dfd 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -1299,7 +1299,7 @@ void FixRigid::set_xv()
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
   double xy,xz,yz;
-  double exone[3],eyone[3],ezone[3],vr[6],p[3][3];
+  double ione[3],exone[3],eyone[3],ezone[3],vr[6],p[3][3];
 
   double **x = atom->x;
   double **v = atom->v;
@@ -1404,7 +1404,7 @@ void FixRigid::set_xv()
 
   if (extended) {
     double theta_body,theta;
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus = nullptr;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -1428,13 +1428,16 @@ void FixRigid::set_xv()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
-                                   inertiaatom,angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
+                                   angmom_one[i]);
       } else if (eflags[i] & LINE) {
         if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
         else theta_body = -2.0*acos(quat[ibody][0]);
@@ -1479,7 +1482,7 @@ void FixRigid::set_v()
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
   double xy,xz,yz;
-  double exone[3],eyone[3],ezone[3],delta[3],vr[6];
+  double ione[3],exone[3],eyone[3],ezone[3],delta[3],vr[6];
 
   double **x = atom->x;
   double **v = atom->v;
@@ -1565,7 +1568,7 @@ void FixRigid::set_v()
   // set omega, angmom of each extended particle
 
   if (extended) {
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -1585,11 +1588,14 @@ void FixRigid::set_v()
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
-                                   inertiaatom,angmom_one[i]);
+        MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
+                                   angmom_one[i]);
       } else if (eflags[i] & LINE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
@@ -1841,7 +1847,7 @@ void FixRigid::setup_bodies_static()
 
   if (extended) {
     double ivec[6];
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
     double length,theta;
 
     for (i = 0; i < nlocal; i++) {
@@ -1855,9 +1861,9 @@ void FixRigid::setup_bodies_static()
         sum[ibody][1] += SINERTIA*massone * radius[i]*radius[i];
         sum[ibody][2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid(inertiaatom,quatatom,massone,ivec);
+        MathExtra::inertia_ellipsoid(shape,quatatom,massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
@@ -2066,7 +2072,7 @@ void FixRigid::setup_bodies_static()
 
   if (extended) {
     double ivec[6];
-    double *inertiaatom;
+    double *shape,*inertiaatom;
     double length;
 
     for (i = 0; i < nlocal; i++) {
@@ -2080,8 +2086,8 @@ void FixRigid::setup_bodies_static()
         sum[ibody][1] += SINERTIA*massone * radius[i]*radius[i];
         sum[ibody][2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
-        MathExtra::inertia_ellipsoid(inertiaatom,orient[i],massone,ivec);
+        shape = ebonus[ellipsoid[i]].shape;
+        MathExtra::inertia_ellipsoid(shape,orient[i],massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index 4b087eb77f4..441d7fb674c 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -1192,7 +1192,7 @@ void FixRigidSmall::set_xv()
 {
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
-  double exone[3],eyone[3],ezone[3],vr[6],p[3][3];
+  double ione[3],exone[3],eyone[3],ezone[3],vr[6],p[3][3];
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
@@ -1306,7 +1306,7 @@ void FixRigidSmall::set_xv()
 
   if (extended) {
     double theta_body,theta;
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -1330,13 +1330,15 @@ void FixRigidSmall::set_xv()
         omega[i][1] = b->omega[1];
         omega[i][2] = b->omega[2];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(b->quat,orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,
-                                   inertiaatom,angmom[i]);
+        MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,ione,angmom[i]);
       } else if (eflags[i] & LINE) {
         if (b->quat[3] >= 0.0) theta_body = 2.0*acos(b->quat[0]);
         else theta_body = -2.0*acos(b->quat[0]);
@@ -1380,7 +1382,7 @@ void FixRigidSmall::set_v()
 {
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
-  double exone[3],eyone[3],ezone[3],delta[3],vr[6];
+  double ione[3],exone[3],eyone[3],ezone[3],delta[3],vr[6];
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
@@ -1465,7 +1467,7 @@ void FixRigidSmall::set_v()
   // set omega, angmom of each extended particle
 
   if (extended) {
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -1485,11 +1487,14 @@ void FixRigidSmall::set_v()
         omega[i][1] = b->omega[1];
         omega[i][2] = b->omega[2];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
+        ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
+        ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
+        ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
-        MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,
-                                   inertiaatom,angmom[i]);
+        MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,ione,
+                                   angmom[i]);
       } else if (eflags[i] & LINE) {
         omega[i][0] = b->omega[0];
         omega[i][1] = b->omega[1];
@@ -1988,7 +1993,7 @@ void FixRigidSmall::setup_bodies_static()
 
   if (extended) {
     double ivec[6];
-    double *quatatom,*inertiaatom;
+    double *shape,*quatatom,*inertiaatom;
     double length,theta;
 
     for (i = 0; i < nlocal; i++) {
@@ -2003,9 +2008,9 @@ void FixRigidSmall::setup_bodies_static()
         inertia[1] += SINERTIA*massone * radius[i]*radius[i];
         inertia[2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
+        shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
-        MathExtra::inertia_ellipsoid(inertiaatom,quatatom,massone,ivec);
+        MathExtra::inertia_ellipsoid(shape,quatatom,massone,ivec);
         inertia[0] += ivec[0];
         inertia[1] += ivec[1];
         inertia[2] += ivec[2];
@@ -2225,7 +2230,7 @@ void FixRigidSmall::setup_bodies_static()
 
   if (extended) {
     double ivec[6];
-    double *inertiaatom;
+    double *shape,*inertiaatom;
     double length;
 
     for (i = 0; i < nlocal; i++) {
@@ -2240,8 +2245,8 @@ void FixRigidSmall::setup_bodies_static()
         inertia[1] += SINERTIA*massone * radius[i]*radius[i];
         inertia[2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
-        inertiaatom = ebonus[ellipsoid[i]].inertia;
-        MathExtra::inertia_ellipsoid(inertiaatom,orient[i],massone,ivec);
+        shape = ebonus[ellipsoid[i]].shape;
+        MathExtra::inertia_ellipsoid(shape,orient[i],massone,ivec);
         inertia[0] += ivec[0];
         inertia[1] += ivec[1];
         inertia[2] += ivec[2];
diff --git a/src/RIGID/rigid_const.h b/src/RIGID/rigid_const.h
index 1956f74687d..69bed21cfac 100644
--- a/src/RIGID/rigid_const.h
+++ b/src/RIGID/rigid_const.h
@@ -40,6 +40,8 @@
 
     // moment of inertia prefactor for sphere
     static constexpr double SINERTIA = 0.4;
+    // moment of inertia prefactor for ellipsoid
+    static constexpr double EINERTIA = 0.2;
     // moment of inertia prefactor for line segment
     static constexpr double LINERTIA = 1.0/12.0;
 
diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp
index 24501adaa44..5b6bd5988ce 100644
--- a/src/SRD/fix_srd.cpp
+++ b/src/SRD/fix_srd.cpp
@@ -52,6 +52,7 @@ enum { BIG_MOVE, SRD_MOVE, SRD_ROTATE };
 enum { CUBIC_ERROR, CUBIC_WARN };
 enum { SHIFT_NO, SHIFT_YES, SHIFT_POSSIBLE };
 
+static constexpr double EINERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
 static constexpr int ATOMPERBIN = 30;
 static constexpr double BIG = 1.0e20;
@@ -59,10 +60,6 @@ static constexpr int VBINSIZE = 5;
 static constexpr double TOLERANCE = 0.00001;
 static constexpr int MAXITER = 20;
 
-
-
-
-
 static const char cite_fix_srd[] =
     "fix srd command: https://doi.org/10.1063/1.3419070\n\n"
     "@Article{Petersen10,\n"
@@ -2653,10 +2650,10 @@ void FixSRD::parameterize()
       if (mask[i] & biggroupbit) {
         if (radius && radius[i] > 0.0) {
           double r = radfactor * radius[i];
-          volbig += MY_4PI3 * r * r * r;
+          volbig += 4.0 / 3.0 * MY_PI * r * r * r;
         } else if (ellipsoid && ellipsoid[i] >= 0) {
           double *shape = ebonus[ellipsoid[i]].shape;
-          volbig += MathExtra::volume_ellipsoid(shape) * radfactor * radfactor *
+          volbig += 4.0 / 3.0 * MY_PI * shape[0] * shape[1] * shape[2] * radfactor * radfactor *
               radfactor;
         } else if (tri && tri[i] >= 0) {
           double *c1 = tbonus[tri[i]].c1;
@@ -2935,7 +2932,8 @@ void FixSRD::big_static()
 void FixSRD::big_dynamic()
 {
   int i;
-  double *quat, *inertia;
+  double *shape, *quat, *inertia;
+  double inertiaone[3];
 
   AtomVecEllipsoid::Bonus *ebonus;
   if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
@@ -2968,8 +2966,11 @@ void FixSRD::big_dynamic()
     } else if (biglist[k].type == ELLIPSOID) {
       quat = ebonus[ellipsoid[i]].quat;
       MathExtra::q_to_exyz(quat, biglist[k].ex, biglist[k].ey, biglist[k].ez);
-      inertia = ebonus[ellipsoid[i]].inertia;
-      MathExtra::angmom_to_omega(angmom[i], biglist[k].ex, biglist[k].ey, biglist[k].ez, inertia,
+      shape = ebonus[ellipsoid[i]].shape;
+      inertiaone[0] = EINERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
+      inertiaone[1] = EINERTIA * rmass[i] * (shape[0] * shape[0] + shape[2] * shape[2]);
+      inertiaone[2] = EINERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
+      MathExtra::angmom_to_omega(angmom[i], biglist[k].ex, biglist[k].ey, biglist[k].ez, inertiaone,
                                  biglist[k].omega);
 
       // line
diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp
index 8c9c25dcfa8..9b637853a53 100644
--- a/src/fix_langevin.cpp
+++ b/src/fix_langevin.cpp
@@ -43,6 +43,7 @@ enum { NOBIAS, BIAS };
 enum { CONSTANT, EQUAL, ATOM };
 
 static constexpr double SINERTIA = 0.4;    // moment of inertia prefactor for sphere
+static constexpr double EINERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
@@ -611,13 +612,15 @@ void FixLangevin::angmom_thermostat()
   // gives correct rotational diffusivity behavior if (nearly) spherical
   // any value will be incorrect for rotational diffusivity if aspherical
 
-  double omega[3],tran[3];
-  double *shape,*quat,*inertia;
+  double inertia[3],omega[3],tran[3];
+  double *shape,*quat;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       shape = bonus[ellipsoid[i]].shape;
-      inertia = bonus[ellipsoid[i]].inertia;
+      inertia[0] = EINERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+      inertia[1] = EINERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+      inertia[2] = EINERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
       quat = bonus[ellipsoid[i]].quat;
       MathExtra::mq_to_omega(angmom[i],quat,inertia,omega);
 
diff --git a/src/fix_move.cpp b/src/fix_move.cpp
index df9810bc165..be170aea9e3 100644
--- a/src/fix_move.cpp
+++ b/src/fix_move.cpp
@@ -42,6 +42,8 @@ using namespace MathConst;
 enum { LINEAR, WIGGLE, ROTATE, VARIABLE, TRANSROT };
 enum { EQUAL, ATOM };
 
+static constexpr double INERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
+
 /* ---------------------------------------------------------------------- */
 
 FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) :
@@ -564,7 +566,7 @@ void FixMove::initial_integrate(int /*vflag*/)
   double ddotr, dx, dy, dz;
   double dtfm, theta_new;
   double xold[3], a[3], b[3], c[3], d[3], disp[3], w[3], ex[3], ey[3], ez[3];
-  double qrotate[4];
+  double inertia_ellipsoid[3], qrotate[4];
   double *quat, *inertia, *shape;
 
   double delta = (update->ntimestep - time_origin) * dt;
@@ -779,7 +781,13 @@ void FixMove::initial_integrate(int /*vflag*/)
             if (ellipsoid_flag && ellipsoid[i] >= 0) {
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
               shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
-              inertia = avec_ellipsoid->bonus[ellipsoid[i]].inertia;
+              inertia_ellipsoid[0] =
+                  INERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
+              inertia_ellipsoid[1] =
+                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[2] * shape[2]);
+              inertia_ellipsoid[2] =
+                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
+              inertia = inertia_ellipsoid;
             } else if (tri_flag && tri[i] >= 0) {
               quat = avec_tri->bonus[tri[i]].quat;
               inertia = avec_tri->bonus[tri[i]].inertia;
@@ -917,7 +925,13 @@ void FixMove::initial_integrate(int /*vflag*/)
             if (ellipsoid_flag && ellipsoid[i] >= 0) {
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
               shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
-              inertia = avec_ellipsoid->bonus[ellipsoid[i]].inertia;
+              inertia_ellipsoid[0] =
+                  INERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
+              inertia_ellipsoid[1] =
+                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[2] * shape[2]);
+              inertia_ellipsoid[2] =
+                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
+              inertia = inertia_ellipsoid;
             } else if (tri_flag && tri[i] >= 0) {
               quat = avec_tri->bonus[tri[i]].quat;
               inertia = avec_tri->bonus[tri[i]].inertia;

From 6ca3e1b006ff1c5c6c8348eea1635904319166b8 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 10:16:23 +0100
Subject: [PATCH 114/174] Added templates to nve_asphere integrators for
 superellipsoids

---
 src/ASPHERE/fix_nve_asphere.cpp         | 36 +++++++++++++++++++++----
 src/ASPHERE/fix_nve_asphere.h           |  2 ++
 src/ASPHERE/fix_nve_asphere_noforce.cpp | 35 ++++++++++++++++++------
 src/ASPHERE/fix_nve_asphere_noforce.h   |  1 +
 4 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/src/ASPHERE/fix_nve_asphere.cpp b/src/ASPHERE/fix_nve_asphere.cpp
index a57608ca10c..e541fe3d91c 100644
--- a/src/ASPHERE/fix_nve_asphere.cpp
+++ b/src/ASPHERE/fix_nve_asphere.cpp
@@ -26,6 +26,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
+
 /* ---------------------------------------------------------------------- */
 
 FixNVEAsphere::FixNVEAsphere(LAMMPS *lmp, int narg, char **arg) :
@@ -56,13 +58,18 @@ void FixNVEAsphere::init()
 
 /* ---------------------------------------------------------------------- */
 
-void FixNVEAsphere::initial_integrate(int /*vflag*/)
+template <bool is_super>
+void FixNVEAsphere::initial_integrate_templated()
 {
   double dtfm;
   double omega[3];
   double *inertia,*quat;
 
-  AtomVecEllipsoid::Bonus *bonus = avec->bonus;
+  AtomVecEllipsoid::Bonus *bonus = nullptr;
+  AtomVecEllipsoid::BonusSuper *bonus_super = nullptr;
+  if (is_super) bonus_super = avec->bonus_super;
+  else bonus = avec->bonus;
+
   int *ellipsoid = atom->ellipsoid;
   double **x = atom->x;
   double **v = atom->v;
@@ -95,9 +102,18 @@ void FixNVEAsphere::initial_integrate(int /*vflag*/)
       angmom[i][2] += dtf * torque[i][2];
 
       // principal moments of inertia
-
-      inertia = bonus[ellipsoid[i]].inertia;
-      quat = bonus[ellipsoid[i]].quat;
+      int j = ellipsoid[i];
+      if (is_super) {
+        inertia = bonus_super[j].inertia;
+        quat = bonus_super[j].quat;
+      } else {
+        quat = bonus[j].quat;
+        shape = bonus[j].shape;
+
+        inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+        inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+        inertia[2] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+      }
 
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
@@ -108,6 +124,16 @@ void FixNVEAsphere::initial_integrate(int /*vflag*/)
     }
 }
 
+
+
+/* ---------------------------------------------------------------------- */
+
+void FixNVEAsphere::initial_integrate(int /*vflag*/)
+{
+  if (atom->superellipsoid_flag) initial_integrate_templated<true>();
+  else initial_integrate_templated<false>();
+}
+
 /* ---------------------------------------------------------------------- */
 
 void FixNVEAsphere::final_integrate()
diff --git a/src/ASPHERE/fix_nve_asphere.h b/src/ASPHERE/fix_nve_asphere.h
index b614f4083f6..097948c2770 100644
--- a/src/ASPHERE/fix_nve_asphere.h
+++ b/src/ASPHERE/fix_nve_asphere.h
@@ -34,6 +34,8 @@ class FixNVEAsphere : public FixNVE {
  private:
   double dtq;
   class AtomVecEllipsoid *avec;
+  template <bool is_super> void initial_integrate_templated();
+
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/ASPHERE/fix_nve_asphere_noforce.cpp b/src/ASPHERE/fix_nve_asphere_noforce.cpp
index aaa21d9550f..afc2ee4618b 100644
--- a/src/ASPHERE/fix_nve_asphere_noforce.cpp
+++ b/src/ASPHERE/fix_nve_asphere_noforce.cpp
@@ -59,10 +59,15 @@ void FixNVEAsphereNoforce::init()
 
 /* ---------------------------------------------------------------------- */
 
-void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
+template <bool is_super>
+void FixNVEAsphereNoforce::initial_integrate_templated()
 {
-  AtomVecEllipsoid::Bonus *bonus;
-  if (avec) bonus = avec->bonus;
+  AtomVecEllipsoid::Bonus *bonus = nullptr;
+  AtomVecEllipsoid::BonusSuper *bonus_super = nullptr;  
+  if (avec) {
+    if (is_super) bonus_super = avec->bonus_super;
+    else bonus = avec->bonus;
+  }
   double **x = atom->x;
   double **v = atom->v;
   double **angmom = atom->angmom;
@@ -86,13 +91,20 @@ void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
 
       // principal moments of inertia
 
-      shape = bonus[ellipsoid[i]].shape;
-      quat = bonus[ellipsoid[i]].quat;
+      if (is_super) {
+        quat = bonus_super[ellipsoid[i]].quat;
 
-      inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
-      inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
-      inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+        inertia[0] = bonus_super[ellipsoid[i]].inertia[0];
+        inertia[1] = bonus_super[ellipsoid[i]].inertia[1];
+        inertia[2] = bonus_super[ellipsoid[i]].inertia[2];
+      } else {
+        shape = bonus[ellipsoid[i]].shape;
+        quat = bonus[ellipsoid[i]].quat;
 
+        inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
+        inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
+        inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
+      }
       // compute omega at 1/2 step from angmom at 1/2 step and current q
       // update quaternion a full step via Richardson iteration
       // returns new normalized quaternion
@@ -102,3 +114,10 @@ void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
     }
   }
 }
+/* ---------------------------------------------------------------------- */
+
+void FixNVEAsphereNoforce::initial_integrate(int /*vflag*/)
+{
+  if (atom->superellipsoid_flag) initial_integrate_templated<true>();
+  else initial_integrate_templated<false>();
+}
diff --git a/src/ASPHERE/fix_nve_asphere_noforce.h b/src/ASPHERE/fix_nve_asphere_noforce.h
index 8f7548633c1..28a938a0bec 100644
--- a/src/ASPHERE/fix_nve_asphere_noforce.h
+++ b/src/ASPHERE/fix_nve_asphere_noforce.h
@@ -33,6 +33,7 @@ class FixNVEAsphereNoforce : public FixNVENoforce {
  private:
   double dtq;
   class AtomVecEllipsoid *avec;
+  template <bool is_super> void initial_integrate_templated();
 };
 
 }    // namespace LAMMPS_NS

From 377363aa2afbc1a3008f0a0588d693293dc49537 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 10:47:57 +0100
Subject: [PATCH 115/174] Updated set to be compatible with bonus and
 bonus_super

---
 src/atom_vec_ellipsoid.cpp |  2 --
 src/math_extra.cpp         | 35 ++++++++++++++++++++++++++++-------
 src/math_extra.h           |  3 ++-
 src/math_special.cpp       |  1 -
 src/set.cpp                | 26 +++++++++++++++++---------
 5 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index eeaf4ab2a81..a705e205c9d 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -968,8 +968,6 @@ void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double sha
 
 void AtomVecEllipsoid::set_block(int i, double blockn1, double blockn2)
 {
-  if (!atom->superellipsoid_flag)
-    error->one(FLERR, "Cannot set blockiness exponents for non-superellipsoid atom");
   if (ellipsoid[i] < 0) {
     if (nlocal_bonus == nmax_bonus) grow_bonus();
     double *shape = bonus_super[nlocal_bonus].shape;
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index 5fa326453a7..d1093e32908 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -475,20 +475,41 @@ void quat_to_mat_trans(const double *quat, double mat[3][3])
   mat[2][2] = w2-i2-j2+k2;
 }
 
-
 /* ----------------------------------------------------------------------
    compute space-frame inertia tensor of an ellipsoid
-   shape = 3 radii of ellipsoid
+   shape = 3 semiaxes of ellipsoid
    quat = orientiation quaternion of ellipsoid
-   block = blockiness exponents of super-ellipsoid
    return symmetric inertia tensor as 6-vector in Voigt ordering
-
-   THIS IS EXACTLY THE SAME FUNCTION AS INERTIA_TRIANGLE
-   TAKES DIAG PRINCIPA INERTIA AND ROTATES IT. SHOULD WE CONSOLIDATE ???
 ------------------------------------------------------------------------- */
 
-void inertia_ellipsoid(double *idiag, double *quat, double /*mass*/,
+void inertia_ellipsoid(double *shape, double *quat, double mass,
                        double *inertia)
+{
+  double p[3][3],ptrans[3][3],itemp[3][3],tensor[3][3];
+  double idiag[3];
+
+  quat_to_mat(quat,p);
+  quat_to_mat_trans(quat,ptrans);
+  idiag[0] = 0.2*mass * (shape[1]*shape[1] + shape[2]*shape[2]);
+  idiag[1] = 0.2*mass * (shape[0]*shape[0] + shape[2]*shape[2]);
+  idiag[2] = 0.2*mass * (shape[0]*shape[0] + shape[1]*shape[1]);
+  diag_times3(idiag,ptrans,itemp);
+  times3(p,itemp,tensor);
+  inertia[0] = tensor[0][0];
+  inertia[1] = tensor[1][1];
+  inertia[2] = tensor[2][2];
+  inertia[3] = tensor[1][2];
+  inertia[4] = tensor[0][2];
+  inertia[5] = tensor[0][1];
+}
+
+/* ----------------------------------------------------------------------
+  Superellipsoid inertia tensor
+  No need to compute new inertia tensor
+  for superellipsoid since it is stored in bonus_super
+------------------------------------------------------------------------- */
+
+void inertia_ellipsoid(double *idiag, double *quat, double *inertia)
 {
   double p[3][3],ptrans[3][3],itemp[3][3],tensor[3][3];
 
diff --git a/src/math_extra.h b/src/math_extra.h
index 5671733672a..1da51386457 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -116,7 +116,8 @@ void BuildRyMatrix(double R[3][3], const double angle);
 void BuildRzMatrix(double R[3][3], const double angle);
 
 // moment of inertia operations
-void inertia_ellipsoid(double *idiag, double *quat, double mass, double *inertia);
+void inertia_ellipsoid(double *idiag, double *quat, double *inertia); //superellipsoid version
+void inertia_ellipsoid(double *shape, double *quat, double mass, double *inertia);
 void inertia_line(double length, double theta, double mass, double *inertia);
 void inertia_triangle(double *v0, double *v1, double *v2, double mass, double *inertia);
 void inertia_triangle(double *idiag, double *quat, double mass, double *inertia);
diff --git a/src/math_special.cpp b/src/math_special.cpp
index 3bc903a742d..c23cb23228c 100644
--- a/src/math_special.cpp
+++ b/src/math_special.cpp
@@ -668,7 +668,6 @@ double MathSpecial::erfcx_y100(const double y100)
     return 1.0;
 } /* erfcx_y100 */
 
-
 /* optimizer friendly implementation of exp2(x).
  *
  * strategy:
diff --git a/src/set.cpp b/src/set.cpp
index 3949a013c2b..5f82cdaefda 100644
--- a/src/set.cpp
+++ b/src/set.cpp
@@ -805,8 +805,10 @@ void Set::setrandom(int keyword, Action *action)
     if (domain->dimension == 3) {
       for (i = 0; i < nlocal; i++)
         if (select[i]) {
-          if (avec_ellipsoid && ellipsoid[i] >= 0)
-            quat_one = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+          if (avec_ellipsoid && ellipsoid[i] >= 0){
+            if (atom->superellipsoid_flag) quat_one = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+            else quat_one = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+          }
           else if (avec_tri && tri[i] >= 0)
             quat_one = avec_tri->bonus[tri[i]].quat;
           else if (avec_body && body[i] >= 0)
@@ -832,8 +834,10 @@ void Set::setrandom(int keyword, Action *action)
       double theta2;
       for (i = 0; i < nlocal; i++)
         if (select[i]) {
-          if (avec_ellipsoid && ellipsoid[i] >= 0)
-            quat_one = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+          if (avec_ellipsoid && ellipsoid[i] >= 0){
+              if (atom->superellipsoid_flag) quat_one = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+              else quat_one = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+          }
           else if (avec_body && body[i] >= 0)
             quat_one = avec_body->bonus[body[i]].quat;
           else if (quat_flag)
@@ -1120,8 +1124,8 @@ void Set::invoke_apip_lambda(Action *action)
 
 void Set::process_block(int &iarg, int narg, char **arg, Action *action)
 {
-  if (!atom->ellipsoid_flag)
-    error->all(FLERR,"Cannot set attribute {} for atom style {}", arg[iarg], atom->get_style());
+  if (!atom->superellipsoid_flag)
+    error->all(FLERR,"Cannot set attribute {} for atom style {} (available with ellipsoid with superellipsoid flag)", arg[iarg], atom->get_style());
   if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "set block", error);
   if (utils::strmatch(arg[iarg+1],"^v_")) varparse(arg[iarg+1],1,action);
   else {
@@ -1340,7 +1344,9 @@ void Set::invoke_density(Action *action)
       else rmass[i] = 4.0*MY_PI/3.0 * radius[i]*radius[i]*radius[i] * density;
 
     else if (ellipsoid_flag && ellipsoid[i] >= 0) {
-      double *shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
+      double *shape;
+      if (atom->superellipsoid_flag) shape = avec_ellipsoid->bonus_super[ellipsoid[i]].shape; 
+      else shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
       // could enable 2d ellipse (versus 3d ellipsoid) when time integration
       //   options (fix nve/asphere, fix nh/asphere) are also implemented
       // if (discflag)
@@ -2008,8 +2014,10 @@ void Set::invoke_quat(Action *action)
   for (int i = 0; i < nlocal; i++) {
     if (!select[i]) continue;
 
-    if (avec_ellipsoid && ellipsoid[i] >= 0)
-      quat_one = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+    if (avec_ellipsoid && ellipsoid[i] >= 0){
+      if (atom->superellipsoid_flag) quat_one = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+      else quat_one = avec_ellipsoid->bonus[ellipsoid[i]].quat; 
+    }
     else if (avec_tri && tri[i] >= 0)
       quat_one = avec_tri->bonus[tri[i]].quat;
     else if (avec_body && body[i] >= 0)

From 10dc90e5a73f85977fd69497e715da25c5696cac Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 11:31:21 +0100
Subject: [PATCH 116/174] Added error for superellipsoids in integrators othr
 than nve-asphere

---
 src/ASPHERE/fix_nh_asphere.cpp      | 2 ++
 src/GPU/fix_nve_asphere_gpu.cpp     | 3 +++
 src/INTEL/fix_nve_asphere_intel.cpp | 2 ++
 src/OPENMP/fix_nh_asphere_omp.cpp   | 3 ++-
 src/RIGID/fix_rigid.cpp             | 4 ++++
 src/RIGID/fix_rigid_small.cpp       | 4 ++++
 src/SRD/fix_srd.cpp                 | 3 ++-
 src/fix_langevin.cpp                | 5 +++++
 8 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/ASPHERE/fix_nh_asphere.cpp b/src/ASPHERE/fix_nh_asphere.cpp
index 35d0e404be5..0491e47f292 100644
--- a/src/ASPHERE/fix_nh_asphere.cpp
+++ b/src/ASPHERE/fix_nh_asphere.cpp
@@ -43,6 +43,8 @@ void FixNHAsphere::init()
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
+  if (atom->superellipsoid_flag) error->all(FLERR, "Fix {} does not support superellipsoids", style);
+
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit)
       if (ellipsoid[i] < 0)
diff --git a/src/GPU/fix_nve_asphere_gpu.cpp b/src/GPU/fix_nve_asphere_gpu.cpp
index 9b75964c791..6437cf6da6c 100644
--- a/src/GPU/fix_nve_asphere_gpu.cpp
+++ b/src/GPU/fix_nve_asphere_gpu.cpp
@@ -169,6 +169,9 @@ void FixNVEAsphereGPU::init()
   if (!avec)
     error->all(FLERR,"Compute nve/asphere requires atom style ellipsoid");
 
+  if (atom->superellipsoid_flag)
+    error->all(FLERR, "Fix nve/asphere_gpu does not support superellipsoids");
+
   // check that all particles are finite-size ellipsoids
   // no point particles allowed, spherical is OK
 
diff --git a/src/INTEL/fix_nve_asphere_intel.cpp b/src/INTEL/fix_nve_asphere_intel.cpp
index 848afa20cca..ad1c7329687 100644
--- a/src/INTEL/fix_nve_asphere_intel.cpp
+++ b/src/INTEL/fix_nve_asphere_intel.cpp
@@ -53,6 +53,8 @@ void FixNVEAsphereIntel::init()
   if (!avec)
     error->all(FLERR,"Compute nve/asphere requires atom style ellipsoid");
 
+  if (atom->superellipsoid_flag)
+    error->all(FLERR, "Fix nve/asphere_intel does not support superellipsoids");
   // check that all particles are finite-size ellipsoids
   // no point particles allowed, spherical is OK
 
diff --git a/src/OPENMP/fix_nh_asphere_omp.cpp b/src/OPENMP/fix_nh_asphere_omp.cpp
index 35a42d2119d..aefebf7ecea 100644
--- a/src/OPENMP/fix_nh_asphere_omp.cpp
+++ b/src/OPENMP/fix_nh_asphere_omp.cpp
@@ -46,7 +46,8 @@ void FixNHAsphereOMP::init()
   avec = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
   if (!avec)
     error->all(FLERR, Error::NOLASTLINE, "Fix {} requires atom style ellipsoid", style);
-
+  if (atom->superellipsoid_flag)
+    error->all(FLERR, Error::NOLASTLINE, "Fix {} does not support superellipsoids", style);
   // check that all particles are finite-size
   // no point particles allowed, spherical is OK
 
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index 46d93ba8dfd..2668852fa30 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -756,6 +756,10 @@ void FixRigid::init()
     gvec = (double *) ifix->extract("gvec", tmp);
   }
 
+  // error for not supported superellipsoids
+  
+  if (atom->superellipsoid_flag) error->all(FLERR,"Superellipsoids not supported in fix rigid");
+
   // timestep info
 
   dtv = update->dt;
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index 441d7fb674c..a2565f44ba7 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -580,6 +580,10 @@ void FixRigidSmall::init()
     gvec = (double *) ifix->extract("gvec", tmp);
   }
 
+  // error for not supported superellipsoids
+  
+  if (atom->superellipsoid_flag) error->all(FLERR,"Superellipsoids not supported in fix rigid");
+
   // timestep info
 
   dtv = update->dt;
diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp
index 5b6bd5988ce..ca45b211e91 100644
--- a/src/SRD/fix_srd.cpp
+++ b/src/SRD/fix_srd.cpp
@@ -358,7 +358,8 @@ void FixSRD::init()
     error->all(FLERR, Error::NOLASTLINE, "Cannot change timestep once fix srd is set up");
   if (comm->style != Comm::BRICK)
     error->all(FLERR, Error::NOLASTLINE, "Fix srd currently only be used with comm_style brick");
-
+  if (atom->superellipsoid_flag)
+    error->all(FLERR, Error::NOLASTLINE, "Fix srd does not currently support superellipsoids");
   // orthogonal vs triclinic simulation box
   // could be static or shearing box
 
diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp
index 9b637853a53..9f8df1362d9 100644
--- a/src/fix_langevin.cpp
+++ b/src/fix_langevin.cpp
@@ -238,6 +238,11 @@ void FixLangevin::init()
         if (ellipsoid[i] < 0) error->one(FLERR, "Fix langevin angmom requires extended particles");
   }
 
+  // check that superellipsoids are not used
+
+  if (atom->superellipsoid_flag)
+    error->all(FLERR, "Fix langevin does not support superellipsoids");
+
   // set force prefactors
 
   if (!atom->rmass) {

From f73d1a71ed082c077f39a6697dec949865cd9882 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 11:40:46 +0100
Subject: [PATCH 117/174] Fix move for superellipsoids

---
 src/fix_move.cpp | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/src/fix_move.cpp b/src/fix_move.cpp
index be170aea9e3..d263b2ae089 100644
--- a/src/fix_move.cpp
+++ b/src/fix_move.cpp
@@ -349,9 +349,12 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) :
     for (int i = 0; i < nlocal; i++) {
       quat = nullptr;
       if (mask[i] & groupbit) {
-        if (ellipsoid_flag && ellipsoid[i] >= 0)
-          quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
-        else if (tri_flag && tri[i] >= 0)
+        if (ellipsoid_flag && ellipsoid[i] >= 0) {
+          if (atom->superellipsoid_flag)
+            quat = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+          else
+            quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+        } else if (tri_flag && tri[i] >= 0)
           quat = avec_tri->bonus[tri[i]].quat;
         else if (body_flag && body[i] >= 0)
           quat = avec_body->bonus[body[i]].quat;
@@ -779,8 +782,13 @@ void FixMove::initial_integrate(int /*vflag*/)
           if (angmom_flag) {
             quat = inertia = nullptr;
             if (ellipsoid_flag && ellipsoid[i] >= 0) {
-              quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
-              shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
+              if (atom->superellipsoid_flag) {
+                quat = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+                shape = avec_ellipsoid->bonus_super[ellipsoid[i]].shape;
+              } else {
+                quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+                shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
+              }
               inertia_ellipsoid[0] =
                   INERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
               inertia_ellipsoid[1] =
@@ -816,7 +824,10 @@ void FixMove::initial_integrate(int /*vflag*/)
           if (quat_flag && !quat_atom_flag) {
             quat = nullptr;
             if (ellipsoid_flag && ellipsoid[i] >= 0)
-              quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+              if (atom->superellipsoid_flag)
+                quat = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+              else
+                quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
             else if (tri_flag && tri[i] >= 0)
               quat = avec_tri->bonus[tri[i]].quat;
             else if (body_flag && body[i] >= 0)
@@ -923,6 +934,10 @@ void FixMove::initial_integrate(int /*vflag*/)
           if (angmom_flag) {
             quat = inertia = nullptr;
             if (ellipsoid_flag && ellipsoid[i] >= 0) {
+              if (atom->superellipsoid_flag){
+                quat = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+                inertia = avec_ellipsoid->bonus_super[ellipsoid[i]].inertia;
+              } else {
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
               shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
               inertia_ellipsoid[0] =
@@ -932,6 +947,7 @@ void FixMove::initial_integrate(int /*vflag*/)
               inertia_ellipsoid[2] =
                   INERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
               inertia = inertia_ellipsoid;
+              }
             } else if (tri_flag && tri[i] >= 0) {
               quat = avec_tri->bonus[tri[i]].quat;
               inertia = avec_tri->bonus[tri[i]].inertia;
@@ -960,7 +976,10 @@ void FixMove::initial_integrate(int /*vflag*/)
           if (quat_flag && !quat_atom_flag) {
             quat = nullptr;
             if (ellipsoid_flag && ellipsoid[i] >= 0)
-              quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+              if (atom->superellipsoid_flag)
+                quat = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+              else
+                quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
             else if (tri_flag && tri[i] >= 0)
               quat = avec_tri->bonus[tri[i]].quat;
             else if (body_flag && body[i] >= 0)
@@ -1440,7 +1459,10 @@ void FixMove::set_arrays(int i)
       if (quat_flag & !quat_atom_flag) {
         quat = nullptr;
         if (ellipsoid_flag && ellipsoid[i] >= 0)
-          quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+          if (atom->superellipsoid_flag)
+            quat = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+          else
+            quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
         else if (tri_flag && tri[i] >= 0)
           quat = avec_tri->bonus[tri[i]].quat;
         else if (body_flag && body[i] >= 0)
@@ -1503,7 +1525,10 @@ void FixMove::set_arrays(int i)
       if (quat_flag && !quat_atom_flag) {
         quat = nullptr;
         if (ellipsoid_flag && ellipsoid[i] >= 0)
-          quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+          if (atom->superellipsoid_flag)
+            quat = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
+          else
+            quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
         else if (tri_flag && tri[i] >= 0)
           quat = avec_tri->bonus[tri[i]].quat;
         else if (body_flag && body[i] >= 0)

From 9261363c65353538b36c7a422eeac21f94c00c5a Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 13:23:15 +0100
Subject: [PATCH 118/174] Updated input scripts

---
 examples/ASPHERE/superellipsoid_gran/in.bowling        |  2 +-
 examples/ASPHERE/superellipsoid_gran/in.drop_test      | 10 +++++-----
 examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.bowling b/examples/ASPHERE/superellipsoid_gran/in.bowling
index a597a410955..6472e5e514c 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.bowling
+++ b/examples/ASPHERE/superellipsoid_gran/in.bowling
@@ -1,5 +1,5 @@
 units           si
-atom_style      ellipsoid
+atom_style      ellipsoid superellipsoid
 dimension       3
 boundary p p p
 comm_modify vel yes
diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 97e886ea9de..60f56f83ee3 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -1,7 +1,7 @@
 # Lattice wall drop test
 
 units           si
-atom_style      ellipsoid
+atom_style      ellipsoid superellipsoid
 dimension       3
 boundary        p p f 
 comm_modify     vel yes
@@ -13,7 +13,7 @@ neigh_modify   delay 0 every 1 check yes
 
 # Setup Simulation Box
 variable        box_length equal 50
-variable        box_height equal 350
+variable        box_height equal 35
 
 region          box block 0 ${box_length} 0 ${box_length} 0 ${box_height}
 create_box      2 box
@@ -28,7 +28,7 @@ create_atoms    1 region floor_reg
 
 # Create Falling Particles (Type 2)
 region          drop_zone block 5 ${insertion_length} 5 ${insertion_length} 5 ${box_height}
-create_atoms    2 random 30000 12345 drop_zone overlap 2.0 maxtry 100
+create_atoms    2 random 3000 12345 drop_zone overlap 2.0 maxtry 100
 
 # Material Properties
 # Type 1: Wall particles
@@ -47,7 +47,7 @@ group           wall type 1
 group           mobile type 2
 
 # Interaction / Pair Style
-pair_style      gran/hooke/history/ellipsoid 1e4 0.0 20.0 0.0 0.5 0 bounding_box
+pair_style      gran/hooke/history/ellipsoid 1e4 NULL 20.0 0.0 0.5 0 bounding_box
 pair_coeff      * *
 
 # Computes and Output
@@ -59,7 +59,7 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 1000 dump_drop_algebraic_block_${blockiness}.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump            mydump all custom 1000 dump_drop_algebraic_block_${blockiness}_friction.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Explicit mapping for Ovito
 dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index a5ba559c079..70b91da0d11 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -1,5 +1,5 @@
 units           si
-atom_style      ellipsoid
+atom_style      ellipsoid superellipsoid
 dimension       3
 boundary p p p
 comm_modify vel yes

From cf8daafdad58f1623d4f96729f0645473d766ac4 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 13:24:10 +0100
Subject: [PATCH 119/174] Fixed small bugs created during refactroring

---
 src/ASPHERE/compute_erotate_asphere.cpp | 4 ++--
 src/ASPHERE/fix_nve_asphere.cpp         | 2 +-
 src/atom_vec_ellipsoid.cpp              | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/ASPHERE/compute_erotate_asphere.cpp b/src/ASPHERE/compute_erotate_asphere.cpp
index a4b817e8110..1ae85c27fcd 100644
--- a/src/ASPHERE/compute_erotate_asphere.cpp
+++ b/src/ASPHERE/compute_erotate_asphere.cpp
@@ -109,7 +109,7 @@ double ComputeERotateAsphere::compute_scalar()
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
-      if (ellipsoid && ebonus && (ellipsoid[i] >= 0)) {
+      if (ellipsoid && (ebonus || ebonus_super) && (ellipsoid[i] >= 0)) {
 
         if (atom->superellipsoid_flag) {
           shape = ebonus_super[ellipsoid[i]].shape;
@@ -130,7 +130,7 @@ double ComputeERotateAsphere::compute_scalar()
           
         // wbody = angular velocity in body frame
 
-        MathExtra::quat_to_mat(ebonus[ellipsoid[i]].quat,rot);
+        MathExtra::quat_to_mat(quat,rot);
         MathExtra::transpose_matvec(rot,angmom[i],wbody);
         wbody[0] /= inertia[0];
         wbody[1] /= inertia[1];
diff --git a/src/ASPHERE/fix_nve_asphere.cpp b/src/ASPHERE/fix_nve_asphere.cpp
index e541fe3d91c..788864e843f 100644
--- a/src/ASPHERE/fix_nve_asphere.cpp
+++ b/src/ASPHERE/fix_nve_asphere.cpp
@@ -63,7 +63,7 @@ void FixNVEAsphere::initial_integrate_templated()
 {
   double dtfm;
   double omega[3];
-  double *inertia,*quat;
+  double *inertia,*quat, *shape;
 
   AtomVecEllipsoid::Bonus *bonus = nullptr;
   AtomVecEllipsoid::BonusSuper *bonus_super = nullptr;
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index a705e205c9d..0fd8c0121fa 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -47,7 +47,7 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
   size_data_bonus = 8;
 
   atom->ellipsoid_flag = 1;
-  atom->superellipsoid_flag = 1;
+  atom->superellipsoid_flag = 0;
   atom->rmass_flag = atom->angmom_flag = atom->torque_flag = 1;
 
   nlocal_bonus = nghost_bonus = nmax_bonus = 0;
@@ -61,7 +61,7 @@ AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
   fields_copy = {"rmass", "angmom"};
   fields_comm_vel = {"angmom"};
   fields_reverse = {"torque"};
-  fields_border = {"radius", "rmass"};
+  fields_border = {"rmass"};
   fields_border_vel = {"rmass", "angmom"};
   fields_exchange = {"rmass", "angmom"};
   fields_restart = {"rmass", "angmom"};
@@ -426,7 +426,7 @@ int AtomVecEllipsoid::unpack_exchange_bonus(int ilocal, double *buf)
       inertia[1] = buf[m++];
       inertia[2] = buf[m++];
       type = determine_type(block);
-      bonus[nlocal_bonus].ilocal = ilocal;
+      bonus_super[nlocal_bonus].ilocal = ilocal;
     } else {
       double *shape = bonus[nlocal_bonus].shape;
       double *quat = bonus[nlocal_bonus].quat;
@@ -616,7 +616,7 @@ void AtomVecEllipsoid::data_atom_bonus(int m, const std::vector<std::string> &va
     inertia_ellipsoid_principal(shape, rmass[m], bonus_super[nlocal_bonus].inertia, block, type);
 
     radius[m] = radius_ellipsoid(shape, block, type);
-    bonus[nlocal_bonus].ilocal = m;
+    bonus_super[nlocal_bonus].ilocal = m;
 
   } else {
     // assign shape and quat to bonus data structure

From 0cb4352f34547d7732d50e3209335c4bd594cd97 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 6 Mar 2026 13:45:48 +0100
Subject: [PATCH 120/174] Reverted test_atom_styles to upstream

---
 unittest/formats/test_atom_styles.cpp | 44 ---------------------------
 1 file changed, 44 deletions(-)

diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp
index 2bc9533ff52..f3004951b34 100644
--- a/unittest/formats/test_atom_styles.cpp
+++ b/unittest/formats/test_atom_styles.cpp
@@ -1135,7 +1135,6 @@ TEST_F(AtomStyleTest, ellipsoid)
     expected.molecular      = Atom::ATOMIC;
     expected.tag_enable     = 1;
     expected.ellipsoid_flag = 1;
-    expected.radius_flag    = 1;
     expected.rmass_flag     = 1;
     expected.angmom_flag    = 1;
     expected.torque_flag    = 1;
@@ -1227,7 +1226,6 @@ TEST_F(AtomStyleTest, ellipsoid)
     auto *type      = lmp->atom->type;
     auto *ellipsoid = lmp->atom->ellipsoid;
     auto *rmass     = lmp->atom->rmass;
-    auto *radius    = lmp->atom->radius;
     auto *avec      = dynamic_cast<AtomVecEllipsoid *>(lmp->atom->avec);
     auto *bonus     = avec->bonus;
     EXPECT_NEAR(x[GETIDX(1)][0], -2.0, EPSILON);
@@ -1313,46 +1311,6 @@ TEST_F(AtomStyleTest, ellipsoid)
     EXPECT_NEAR(bonus[3].quat[1], sqrt(5.0 / 30.0), EPSILON);
     EXPECT_NEAR(bonus[3].quat[2], sqrt(5.0 / 30.0), EPSILON);
     EXPECT_NEAR(bonus[3].quat[3], sqrt(5.0 / 30.0), EPSILON);
-    EXPECT_NEAR(bonus[0].block[0], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[0].block[1], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[1].block[0], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[1].block[1], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[2].block[0], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[2].block[1], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[3].block[0], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[3].block[1], 2.0, EPSILON);
-    EXPECT_NEAR(radius[GETIDX(1)], 0.5, EPSILON);
-    EXPECT_NEAR(radius[GETIDX(2)], 0.5, EPSILON);
-    EXPECT_NEAR(radius[GETIDX(3)], 1.5, EPSILON);
-    EXPECT_NEAR(radius[GETIDX(4)], 1.5, EPSILON);
-    ASSERT_FALSE(bonus[0].type);
-    ASSERT_FALSE(bonus[1].type);
-    ASSERT_FALSE(bonus[2].type);
-    ASSERT_FALSE(bonus[3].type);
-
-    BEGIN_HIDE_OUTPUT();
-    command("set atom 5 block 8.0 8.0");
-    command("set atom 6 shape 4.0 0.5 1.6 block 4.0 2.0");
-    END_HIDE_OUTPUT();
-    ASSERT_EQ(ellipsoid[GETIDX(5)], 4);
-    ASSERT_EQ(ellipsoid[GETIDX(6)], 5);
-    EXPECT_NEAR(bonus[4].shape[0], 0.5, EPSILON);
-    EXPECT_NEAR(bonus[4].shape[1], 0.5, EPSILON);
-    EXPECT_NEAR(bonus[4].shape[2], 0.5, EPSILON);
-    EXPECT_NEAR(bonus[5].shape[0], 2.0, EPSILON);
-    EXPECT_NEAR(bonus[5].shape[1], 0.25, EPSILON);
-    EXPECT_NEAR(bonus[5].shape[2], 0.8, EPSILON);
-    EXPECT_NEAR(bonus[4].block[0], 8.0, EPSILON);
-    EXPECT_NEAR(bonus[4].block[1], 8.0, EPSILON);
-    EXPECT_NEAR(bonus[5].block[0], 4.0, EPSILON);
-    EXPECT_NEAR(bonus[5].block[1], 2.0, EPSILON);
-    EXPECT_NEAR(radius[GETIDX(5)], 0.5*sqrt(3.0), EPSILON);
-    EXPECT_NEAR(radius[GETIDX(6)], sqrt(4.64), EPSILON);
-    ASSERT_TRUE(bonus[4].type);
-    ASSERT_TRUE(bonus[5].type);
-    BEGIN_HIDE_OUTPUT();
-    command("set atom 5*6 shape 0.0 0.0 0.0");
-    END_HIDE_OUTPUT();
 
     BEGIN_HIDE_OUTPUT();
     command("group two id 2:4:2");
@@ -4288,7 +4246,6 @@ TEST_F(AtomStyleTest, full_ellipsoid)
     expected.tag_enable     = 1;
     expected.molecule_flag  = 1;
     expected.ellipsoid_flag = 1;
-    expected.radius_flag    = 1;
     expected.q_flag         = 1;
     expected.rmass_flag     = 1;
     expected.torque_flag    = 1;
@@ -4944,7 +4901,6 @@ TEST_F(AtomStyleTest, oxdna)
     expected.tag_enable     = 1;
     expected.molecule_flag  = 1;
     expected.ellipsoid_flag = 1;
-    expected.radius_flag    = 1;
     expected.rmass_flag     = 1;
     expected.torque_flag    = 1;
     expected.angmom_flag    = 1;

From 1b8f1920519f9cbf90229549c72981ca1fadf55d Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 9 Mar 2026 10:57:53 +0100
Subject: [PATCH 121/174] Fixed bug in inertia passing for nve asphere
 integrators

---
 src/ASPHERE/fix_nve_asphere.cpp         |  2 ++
 src/ASPHERE/fix_nve_asphere_noforce.cpp | 10 ++++------
 src/compute_property_atom.cpp           |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/ASPHERE/fix_nve_asphere.cpp b/src/ASPHERE/fix_nve_asphere.cpp
index 788864e843f..7c7f170df2d 100644
--- a/src/ASPHERE/fix_nve_asphere.cpp
+++ b/src/ASPHERE/fix_nve_asphere.cpp
@@ -64,6 +64,7 @@ void FixNVEAsphere::initial_integrate_templated()
   double dtfm;
   double omega[3];
   double *inertia,*quat, *shape;
+  double inertia_to_compute[3];
 
   AtomVecEllipsoid::Bonus *bonus = nullptr;
   AtomVecEllipsoid::BonusSuper *bonus_super = nullptr;
@@ -109,6 +110,7 @@ void FixNVEAsphere::initial_integrate_templated()
       } else {
         quat = bonus[j].quat;
         shape = bonus[j].shape;
+        inertia = inertia_to_compute;
 
         inertia[0] = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
         inertia[1] = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
diff --git a/src/ASPHERE/fix_nve_asphere_noforce.cpp b/src/ASPHERE/fix_nve_asphere_noforce.cpp
index afc2ee4618b..45a96033d7e 100644
--- a/src/ASPHERE/fix_nve_asphere_noforce.cpp
+++ b/src/ASPHERE/fix_nve_asphere_noforce.cpp
@@ -77,8 +77,8 @@ void FixNVEAsphereNoforce::initial_integrate_templated()
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
-  double *shape,*quat;
-  double inertia[3],omega[3];
+  double *shape,*quat, *inertia;
+  double inertia_to_compute[3],omega[3];
 
   // update positions and quaternions for all particles
 
@@ -93,13 +93,11 @@ void FixNVEAsphereNoforce::initial_integrate_templated()
 
       if (is_super) {
         quat = bonus_super[ellipsoid[i]].quat;
-
-        inertia[0] = bonus_super[ellipsoid[i]].inertia[0];
-        inertia[1] = bonus_super[ellipsoid[i]].inertia[1];
-        inertia[2] = bonus_super[ellipsoid[i]].inertia[2];
+        inertia = bonus_super[ellipsoid[i]].inertia;
       } else {
         shape = bonus[ellipsoid[i]].shape;
         quat = bonus[ellipsoid[i]].quat;
+        inertia = inertia_to_compute;
 
         inertia[0] = rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]) / 5.0;
         inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
diff --git a/src/compute_property_atom.cpp b/src/compute_property_atom.cpp
index aa0e829bc99..bd49b3c5baa 100644
--- a/src/compute_property_atom.cpp
+++ b/src/compute_property_atom.cpp
@@ -1402,7 +1402,7 @@ void ComputePropertyAtom::pack_shapez(int n)
       buf[n] = 2.0*bonus[ellipsoid[i]].shape[2];
     else buf[n] = 1.0;
     n += nvalues;
-  }
+    }
   } else {
     AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus;
     for (int i = 0; i < nlocal; i++) {

From 19798d620e54667fc33574f3f95d1dd372f09a7e Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 10 Mar 2026 14:33:41 +0100
Subject: [PATCH 122/174] Updated doc to reflect changes in bonus struct of the
 ellipsoid atom style

---
 doc/src/Howto_spherical.rst       | 16 +++++++++-------
 doc/src/atom_style.rst            |  5 +++++
 doc/src/compute_property_atom.rst | 10 +++++-----
 doc/src/pair_gran_ellipsoid.rst   | 28 +++++++++++++++++++---------
 doc/src/read_data.rst             | 21 ++++++++++++---------
 doc/src/set.rst                   |  6 +++---
 6 files changed, 53 insertions(+), 33 deletions(-)

diff --git a/doc/src/Howto_spherical.rst b/doc/src/Howto_spherical.rst
index 583e7235e22..6df1fd387b0 100644
--- a/doc/src/Howto_spherical.rst
+++ b/doc/src/Howto_spherical.rst
@@ -51,13 +51,16 @@ individual particles, after then are created.
 The ellipsoid style defines particles that are ellipsoids and thus can
 be aspherical.  Each particle has a shape, specified by 3 diameters,
 and mass (or density).  Superellipsoid particles can be defined by
-specifying 2 blockiness exponents (block).  These particles store an angular
+specifying 2 blockiness exponents (block) and adding the `superellipsoid` 
+keyword to the `atom_style ellipsoid` command.  These particles store an angular
 momentum and their orientation (quaternion), and can be acted upon by
 torque.  They do not store an angular velocity (omega), which can be
 in a different direction than angular momentum, rather they compute it
-as needed.  The "set" command can be used to modify the diameter,
-blockiness, orientation, and mass of individual particles, after they
-are created.  It also has a brief explanation of what quaternions are.
+as needed.  The "set" command can be used to modify the diameter, orientation, 
+and mass of individual particles, after they are created.
+The "set" command can also be used to modify the blockiness of superellipsoid 
+particles.
+It also has a brief explanation of what quaternions are.
 
 The line style defines line segment particles with two end points and
 a mass (or density).  They can be used in 2d simulations, and they can
@@ -130,9 +133,8 @@ such interactions.  These are the various :doc:`pair styles <pair_style>` that g
 * :doc:`pair_style body/nparticle <pair_body_nparticle>`
 
 The granular pair styles are used with spherical particles.  The
-*gran/ellipsoid* granular pair styles are used with
-ellipsoidal and superellipsoid particles.  The
-dipole pair style is used with the dipole atom style, which could be
+*gran/ellipsoid* granular pair styles are used with superellipsoid particles.
+The dipole pair style is used with the dipole atom style, which could be
 applied to spherical or ellipsoidal particles.  The GayBerne and
 REsquared potentials require ellipsoidal particles, though they will
 also work if the 3 shape parameters are the same (a sphere).  The
diff --git a/doc/src/atom_style.rst b/doc/src/atom_style.rst
index f8ef7cd1e08..d1cc4c6d140 100644
--- a/doc/src/atom_style.rst
+++ b/doc/src/atom_style.rst
@@ -28,6 +28,7 @@ Syntax
          *template* arg = template-ID
            template-ID = ID of molecule template specified in a separate :doc:`molecule <molecule>` command
          *hybrid* args = list of one or more sub-styles, each with their args
+         *ellipsoid* arg = superellipsoid (optional) for superellipsoids instead of ellipsoids
 
 * accelerated styles (with same args) = *angle/kk* or *atomic/kk* or *bond/kk* or *charge/kk* or *full/kk* or *molecular/kk* or *spin/kk*
 
@@ -353,6 +354,10 @@ stores a shape vector with the 3 diameters of the ellipsoid and a
 quaternion 4-vector with its orientation.  Each particle stores a flag
 in the ellipsoid vector which indicates whether it is an ellipsoid (1)
 or a point particle (0).
+By adding the flag *superellipsoid* to the atom_style command, the particles
+can be superellipsoids, which are a generalization of ellipsoids with two 
+additional blockiness parameters that control the shape. Superellipsoids 
+also store the principal moments of inertia of the particle.
 
 For the *line* style, particles can be are idealized line segments
 which store a per-particle mass and length and orientation (i.e. the
diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst
index 2b32318a008..9e3bb7d3d0f 100644
--- a/doc/src/compute_property_atom.rst
+++ b/doc/src/compute_property_atom.rst
@@ -66,8 +66,8 @@ Syntax
            *heatflow* = internal heat flow of spherical particle
            *angmomx,angmomy,angmomz* = angular momentum of aspherical particle
            *shapex,shapey,shapez* = 3 diameters of aspherical particle
-           *block1,block2* = 2 blockiness exponents of aspherical particle
-           *inertiax,inertiay,inertiaz* = 3 principal moments of inertia of aspherical particle
+           *block1,block2* = 2 blockiness exponents of aspherical (superellipsoid) particle
+           *inertiax,inertiay,inertiaz* = 3 principal moments of inertia of aspherical (superellipsoid) particle
            *quatw,quati,quatj,quatk* = quaternion components for aspherical or body particles
            *tqx,tqy,tqz* = torque on finite-size particles
            *end12x, end12y, end12z* = end points of line segment
@@ -167,11 +167,11 @@ If :doc:`newton bond off <newton>` is set, it will be tallied with both atom
 The quantities *shapex*, *shapey*, and *shapez* are defined for ellipsoidal
 particles and define the 3d shape of each particle.
 
-The quantities *block1*, and *block2*, are defined for ellipsoidal
-particles and define the blockiness of each super-ellipsoid particle.
+The quantities *block1*, and *block2*, are defined for superellipsoidal
+particles and define the blockiness of each superellipsoid particle.
 See the :doc:`set <set>` command for an explanation of the blockiness.
 
-The quantities *inertiax*, *inertiay*, and *inertiaz* are defined for ellipsoidal
+The quantities *inertiax*, *inertiay*, and *inertiaz* are defined for superellipsoidal
 particles and define the 3 principal moments of inertia of each particle.
 These are with respect to the particle's center of mass and in a reference system
 aligned with the particle's principal axes.  
diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
index 148fac54b62..b68cdc3841b 100644
--- a/doc/src/pair_gran_ellipsoid.rst
+++ b/doc/src/pair_gran_ellipsoid.rst
@@ -75,6 +75,23 @@ property, and the exponents :math:`n_1` and :math:`n_2` to the *block* property
 of the ellipsoid atom. See the doc page for the :doc:`set <set>` command for
 more details.
 
+.. note::
+   
+    The contact solver strictly requires convex particle shapes to ensure a mathematically
+    unique point of deepest penetration. Therefore, the blockiness parameters must be
+    :math:`n_1 \ge 2.0` and :math:`n_2 \ge 2.0`. Attempting to simulate concave or "pointy"
+    particles (:math:`n < 2.0`) will result in an error.
+
+.. note::
+
+    For particles with high blockiness exponents (:math:`n > 4.0`) involved in edge-to-edge
+    or corner-to-corner contacts, the surface normal vector varies rapidly over small
+    distances. The Newton solver may occasionally fail to converge to the strict gradient
+    alignment tolerance (typically :math:`10^{-10}`).
+    You may see warning messages in the log indicating that the solver returned a sub-optimal solution, 
+    but the simulation will proceed using this best-effort contact point.
+    Using the `geometric` keyword will help mitigate this issue. 
+
 Contact detection for these aspherical particles uses the so-called ''midway''
 minimization approach from :ref:`(Houlsby) <Houlsby>`. Considering two
 particles with shape functions,  :math:`F_i` and :math:`F_j`,
@@ -286,14 +303,6 @@ All settings are global and are made via the pair_style command.
 However you must still use the :doc:`pair_coeff <pair_coeff>` for all
 pairs of granular atom types.  For example the command
 
-.. note::
-   For particles with high blockiness exponents (:math:`n > 4`) involved in edge-to-edge
-   or corner-to-corner contacts, the surface normal vector varies rapidly over microscopic
-   distances. The Newton solver may occasionally fail to converge to the strict gradient
-   alignment tolerance (typically :math:`10^{-10}`).
-   You may see warning messages in the log indicating that the solver returned a sub-optimal solution, 
-   but the simulation will proceed using this best-effort contact point.
-
 .. code-block:: LAMMPS
 
    pair_coeff * *
@@ -347,7 +356,8 @@ Restrictions
 All the granular pair styles are part of the GRANULAR package.  It is
 only enabled if LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
 
-These pair styles require the *ellipsoid* atom style, which is part of the ASPHERE package. It is
+These pair styles require the *ellipsoid* atom style with the *superellipsoid* flag enabled,
+(i.e. `atom_style ellipsoid superellipsoid`) which is part of the ASPHERE package. It is
 only enabled if LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
 
 This pair style requires you to use the :doc:`comm_modify vel yes <comm_modify>` command so that velocities are stored by ghost
diff --git a/doc/src/read_data.rst b/doc/src/read_data.rst
index 358f676d6a5..92ef8df8ece 100644
--- a/doc/src/read_data.rst
+++ b/doc/src/read_data.rst
@@ -1327,19 +1327,21 @@ and a general discussion of how type labels can be used.
 *Ellipsoids* section:
 
 * one line per ellipsoid
-* line syntax: atom-ID shapex shapey shapez quatw quati quatj quatk block1 block2
+* line syntax: atom-ID shapex shapey shapez quatw quati quatj quatk
+* line syntax (*superellipsoids*): atom-ID shapex shapey shapez quatw quati quatj quatk block1 block2
 
   .. parsed-literal::
 
        atom-ID = ID of atom which is an ellipsoid
        shapex,shapey,shapez = 3 diameters of ellipsoid (distance units)
        quatw,quati,quatj,quatk = quaternion components for orientation of atom
-       block1,block2 = 2 blockiness parameters for super-ellipsoids
+       block1,block2 = 2 blockiness parameters for superellipsoids only
 
-* example:
+* examples:
 
   .. parsed-literal::
 
+       12 1 2 1 1 0 0 0
        12 1 2 1 1 0 0 0 2 2
 
 The *Ellipsoids* section must appear if :doc:`atom_style ellipsoid
@@ -1363,18 +1365,19 @@ the quaternion that represents its new orientation is given by
 LAMMPS normalizes each atom's quaternion in case (a,b,c) is not
 specified as a unit vector.
 
-If the data file defines a general triclinic box, then the quaternion
-for each ellipsoid should be specified for its orientation relative to
-the standard x,y,z coordinate axes.  When the system is converted to a
-restricted triclinic box, the ellipsoid quaternions will be altered to
-reflect the new orientation of the ellipsoid.
 The blockiness values *block1*, *block2* generalize the geometry to a super
-ellipsoid for use in granualr simulations.  Sections through the center and
+ellipsoid for use in granular simulations. Sections through the center and
 parallel to the z-axis are superellipses with squareness *block1* and sections
 in the x-y plane are superellipses with squareness *block2*.  These parameters
 are optional and default to a value of 2, recovering ellipsoid geometry.
 When specified, both values must be greater than or equal to 2.
 
+If the data file defines a general triclinic box, then the quaternion
+for each ellipsoid should be specified for its orientation relative to
+the standard x,y,z coordinate axes.  When the system is converted to a
+restricted triclinic box, the ellipsoid quaternions will be altered to
+reflect the new orientation of the ellipsoid.
+
 The *Ellipsoids* section must appear after the *Atoms* section.
 
 ----------
diff --git a/doc/src/set.rst b/doc/src/set.rst
index 66c7939a8f9..a45a39a6ab2 100644
--- a/doc/src/set.rst
+++ b/doc/src/set.rst
@@ -46,7 +46,7 @@ Syntax
          precise = switching parameter of fast potential (0)
          float = constant float or atom-style variable (between 0 and 1)
        *block* value = block1, block2
-         block1,block2 = 2 blockiness parameters for super-ellipsoids
+         block1,block2 = 2 blockiness parameters for superellipsoids
        *bond* value = numeric bond type or bond type label, for all bonds between selected atoms
        *cc* values = index cc
          index = index of a chemical species (1 to Nspecies)
@@ -544,8 +544,8 @@ keyword.
 Keyword *block* sets the blockiness of the selected atoms.  The
 particles must be ellipsoids as defined by the :doc:`atom_style
 ellipsoid <atom_style>` command.  This command is used to define
-super-ellipsoid particle shapes for use in granular simulations.
-The *block1*, *block2* settings are the 2 exponents of the super-ellipsoid
+superellipsoid particle shapes for use in granular simulations.
+The *block1*, *block2* settings are the 2 exponents of the superellipsoid
 in the vertical and horizontal directions.  Vertical sections through the
 center are superellipses with squareness *block1* and horizontal sections
 are superellipses with squareness *block2*.  If both parameters are set to

From c453f35d84a9e1afab70db70b9ec83381bd823ca Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 10 Mar 2026 14:37:44 +0100
Subject: [PATCH 123/174] Renaming super-ellipsoid to superellipsoid

---
 src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp | 4 ++--
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 4 ++--
 src/atom_vec_ellipsoid.cpp                         | 4 ++--
 src/math_extra.cpp                                 | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 6bc7ee3427e..fa7f97f87f8 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -172,7 +172,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         if (skip_contact_detection)
           touching = false;
         else {
-          // Super-ellipsoid contact detection between atoms i and j
+          // superellipsoid contact detection between atoms i and j
           flagi = bonus[ellipsoid[i]].type;
           flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
@@ -568,7 +568,7 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
       return 0.0;
     }
   }
-  // Super-ellipsoid contact detection between atoms i and j
+  // superellipsoid contact detection between atoms i and j
   double X0[4], nij[3];
   AtomVecEllipsoid::BlockType flagi, flagj;
   flagi = bonus[ellipsoid[i]].type;
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 6f6b6d47e7e..53496978ccc 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -225,7 +225,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         if (skip_contact_detection)
           touching = false;
         else {
-          // Super-ellipsoid contact detection between atoms i and j
+          // superellipsoid contact detection between atoms i and j
           flagi = bonus[ellipsoid[i]].type;
           flagj = bonus[ellipsoid[j]].type;
           if (touch[jj] == 1) {
@@ -840,7 +840,7 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
       return 0.0;
     }
   }
-  // Super-ellipsoid contact detection between atoms i and j
+  // superellipsoid contact detection between atoms i and j
   double X0[4], nij[3];
   AtomVecEllipsoid::BlockType flagi, flagj;
   flagi = bonus[ellipsoid[i]].type;
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index 0fd8c0121fa..b56579d405b 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -1036,7 +1036,7 @@ double AtomVecEllipsoid::radius_ellipsoid(double *shape, double *block, BlockTyp
 
   return sqrt(a * a + b * b + c * c);
 
-  // General super-ellipsoid, Eq. (12) of Podlozhnyuk et al. 2017
+  // General superellipsoid, Eq. (12) of Podlozhnyuk et al. 2017
   // Not sure if exact solution worth it compared to boundig box diagonal
   // If both blockiness exponents are greater than 2, the exact radius does not
   // seem significantly smaller than the bounding box diagonal. At most sqrt(3)~ 70% too large
@@ -1067,7 +1067,7 @@ void AtomVecEllipsoid::inertia_ellipsoid_principal(double *shape, double mass, d
     idiag[1] = dens * (rsq0 + rsq2);
     idiag[2] = dens * (rsq0 + rsq1);
   } else {
-    // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003
+    // superellipsoid, Eq. (12) of Jaklic and Solina, 2003
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
     double beta_tmp1 = std::beta(0.5 * e1, 1 + 2 * e1);
     double beta_tmp2 = std::beta(0.5 * e2, 0.5 * e2);
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index d1093e32908..a58df62eda0 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -651,7 +651,7 @@ double volume_ellipsoid(double *shape, double *block, int flag_super)
 {
   double unitvol = MY_4PI3;
 
-  // super-ellipsoid, Eq. (12) of Jaklic and Solina, 2003, for p = q = r = 0
+  // superellipsoid, Eq. (12) of Jaklic and Solina, 2003, for p = q = r = 0
 
   if (flag_super) {
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];

From 8351d006d98150b3d3806721935fcdce6bbc78a6 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 10 Mar 2026 14:50:32 +0100
Subject: [PATCH 124/174] Added specification on where curvature is computed

---
 doc/src/pair_gran_ellipsoid.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
index b68cdc3841b..a0bf8288f24 100644
--- a/doc/src/pair_gran_ellipsoid.rst
+++ b/doc/src/pair_gran_ellipsoid.rst
@@ -237,6 +237,8 @@ principal radii of curvature. For simplicity, the Hertzian model
 approximates the contact radius of each particle, :math:`R`, as either 
 the inverse of the mean curvature or as the gaussian curvature coefficient
 if the *curvature_gaussian* keyword is used.
+These curvatures are calculated at the particle surface point that is closest 
+to the contact point in the direction of the contact normal. 
 Thus Kn has units of force per area and is thus specified in units of
 (pressure).  The effects of absolute particle size (monodispersity)
 and relative size (polydispersity) are captured in the radii-dependent

From 97ab6ce5293fffd8dcf81cb30359f1f160bc3019 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 10 Mar 2026 21:23:00 +0100
Subject: [PATCH 125/174] Added guard against point particles in ellipsoid
 granular pair_style

---
 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 53496978ccc..ffd71ec5303 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -597,6 +597,18 @@ void PairGranHookeHistoryEllipsoid::init_style()
   if (comm->ghost_velocity == 0)
     error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
 
+  // ensure all atoms have an allocated ellipsoid bonus structure (ellipsoidflag > 0)
+  int *ellipsoid = atom->ellipsoid;
+  if (!ellipsoid) 
+    error->all(FLERR, "Pair gran/h/ellipsoid* requires atom style ellipsoid");
+
+  int nlocal = atom->nlocal;
+  for (i = 0; i < nlocal; i++) {
+    if (ellipsoid[i] < 0) {
+      error->one(FLERR, "Pair gran/h/ellipsoid* requires all atoms to have ellipsoidflag = 1");
+    }
+  }
+
   // need a granular neighbor list
 
   if (use_history)
@@ -668,7 +680,6 @@ void PairGranHookeHistoryEllipsoid::init_style()
   double *radius = atom->radius;
   int *mask = atom->mask;
   int *type = atom->type;
-  int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     if (mask[i] & freeze_group_bit)

From ce2cb2aa699bc13137863fb3a47133b4eb6ba0ba Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 11 Mar 2026 09:45:39 +0100
Subject: [PATCH 126/174] Added unit test for atom style ellipsoid with
 superellipsoid flag. Fixed minor bug in bonus_super missing nullptr
 initialization

---
 src/atom_vec_ellipsoid.cpp            |   2 +-
 unittest/formats/test_atom_styles.cpp | 186 ++++++++++++++++++++++++++
 2 files changed, 187 insertions(+), 1 deletion(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index b56579d405b..cb4ddf79931 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -36,7 +36,7 @@ static constexpr double EPSILON_BLOCK = 1.0e-3;
 
 AtomVecEllipsoid::AtomVecEllipsoid(LAMMPS *lmp) :
     AtomVec(lmp), bonus(nullptr), ellipsoid(nullptr), rmass(nullptr), angmom(nullptr),
-    quat_hold(nullptr)
+    quat_hold(nullptr), bonus_super(nullptr)
 {
   molecular = Atom::ATOMIC;
   bonus_flag = 1;
diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp
index f3004951b34..de27139585e 100644
--- a/unittest/formats/test_atom_styles.cpp
+++ b/unittest/formats/test_atom_styles.cpp
@@ -1459,6 +1459,192 @@ TEST_F(AtomStyleTest, ellipsoid)
     EXPECT_NEAR(bonus[3].quat[3], 0.25056280708573159, EPSILON);
 }
 
+TEST_F(AtomStyleTest, superellipsoid)
+{
+    if (!Info::has_package("ASPHERE")) GTEST_SKIP();
+
+    BEGIN_HIDE_OUTPUT();
+    command("atom_style ellipsoid superellipsoid");
+    END_HIDE_OUTPUT();
+
+    AtomState expected;
+    expected.atom_style     = "ellipsoid";
+    expected.molecular      = Atom::ATOMIC;
+    expected.tag_enable     = 1;
+    expected.ellipsoid_flag = 1;
+    expected.rmass_flag     = 1;
+    expected.radius_flag    = 1;
+    expected.angmom_flag    = 1;
+    expected.torque_flag    = 1;
+    expected.has_type       = true;
+    expected.has_mask       = true;
+    expected.has_image      = true;
+    expected.has_x          = true;
+    expected.has_v          = true;
+    expected.has_f          = true;
+
+    ASSERT_ATOM_STATE_EQ(lmp->atom, expected);
+    ASSERT_EQ(lmp->atom->superellipsoid_flag, 1);
+
+    BEGIN_HIDE_OUTPUT();
+    command("create_box 4 box");
+    command("create_atoms 1 single -2.0  2.0  0.1"); // Point
+    command("create_atoms 2 single  2.0  2.0 -0.1"); // ELLIPSOID (n1=2, n2=2)
+    command("create_atoms 3 single  2.0  2.0 -2.1"); // GENERAL (n1!=n2)
+    command("create_atoms 4 single -2.0 -2.0  0.1"); // N1_EQUAL_N2 
+    command("set type 1 mass 4.0");
+    command("set type 2 mass 2.4");
+    command("set type 3 mass 4.4");
+    command("set type 4 mass 5.0");
+    command("set type 2 shape 1.0 1.0 1.0");
+    command("set type 3 shape 3.0 0.8 1.1");
+    command("set type 4 shape 2.0 2.0 2.0");
+    command("set type 3 block 4.0 3.0");
+    command("set type 4 block 3.5 3.5");
+    command("pair_coeff * *");
+    END_HIDE_OUTPUT();
+    ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("ellipsoid"));
+    ASSERT_NE(lmp->atom->avec, nullptr);
+    ASSERT_EQ(lmp->atom->natoms, 4);
+    ASSERT_EQ(lmp->atom->nellipsoids, 3);
+    ASSERT_EQ(lmp->atom->nlocal, 4);
+    ASSERT_EQ(lmp->atom->nghost, 0);
+    ASSERT_NE(lmp->atom->nmax, -1);
+    ASSERT_EQ(lmp->atom->tag_enable, 1);
+    ASSERT_EQ(lmp->atom->molecular, Atom::ATOMIC);
+    ASSERT_EQ(lmp->atom->ntypes, 4);
+    ASSERT_EQ(lmp->atom->nextra_grow, 0);
+    ASSERT_EQ(lmp->atom->nextra_restart, 0);
+    ASSERT_EQ(lmp->atom->nextra_border, 0);
+    ASSERT_EQ(lmp->atom->nextra_grow_max, 0);
+    ASSERT_EQ(lmp->atom->nextra_restart_max, 0);
+    ASSERT_EQ(lmp->atom->nextra_border_max, 0);
+    ASSERT_EQ(lmp->atom->nextra_store, 0);
+    ASSERT_EQ(lmp->atom->extra_grow, nullptr);
+    ASSERT_EQ(lmp->atom->extra_restart, nullptr);
+    ASSERT_EQ(lmp->atom->extra_border, nullptr);
+    ASSERT_EQ(lmp->atom->extra, nullptr);
+
+    ASSERT_EQ(lmp->atom->mass, nullptr);
+    ASSERT_NE(lmp->atom->rmass, nullptr);
+    ASSERT_NE(lmp->atom->radius, nullptr);
+    ASSERT_NE(lmp->atom->ellipsoid, nullptr);
+    ASSERT_EQ(lmp->atom->mass_setflag, nullptr);
+
+    BEGIN_HIDE_OUTPUT();
+    command("write_data test_atom_styles.data nocoeff");
+    command("clear");
+    command("atom_style ellipsoid superellipsoid");
+    command("pair_style zero 4.0");
+    command("units real");
+    command("atom_modify map array");
+    command("read_data test_atom_styles.data");
+    command("pair_coeff * *");
+    END_HIDE_OUTPUT();
+    ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("ellipsoid"));
+    ASSERT_NE(lmp->atom->avec, nullptr);
+    ASSERT_EQ(lmp->atom->natoms, 4);
+    ASSERT_EQ(lmp->atom->nlocal, 4);
+    ASSERT_EQ(lmp->atom->nellipsoids, 3);
+    ASSERT_EQ(lmp->atom->nghost, 0);
+    ASSERT_NE(lmp->atom->nmax, -1);
+    ASSERT_EQ(lmp->atom->tag_enable, 1);
+    ASSERT_EQ(lmp->atom->molecular, Atom::ATOMIC);
+    ASSERT_EQ(lmp->atom->ntypes, 4);
+    ASSERT_EQ(lmp->atom->ellipsoid_flag, 1);
+    ASSERT_NE(lmp->atom->ellipsoid, nullptr);
+    ASSERT_NE(lmp->atom->sametag, nullptr);
+    ASSERT_EQ(lmp->atom->tag_consecutive(), 1);
+    ASSERT_EQ(lmp->atom->map_style, Atom::MAP_ARRAY);
+    ASSERT_EQ(lmp->atom->map_user, 1);
+    ASSERT_EQ(lmp->atom->map_tag_max, 4);
+
+    auto *type      = lmp->atom->type;
+    auto *ellipsoid = lmp->atom->ellipsoid;
+    auto *rmass     = lmp->atom->rmass;
+    auto *avec      = dynamic_cast<AtomVecEllipsoid *>(lmp->atom->avec);
+    auto *bonus     = avec->bonus_super;
+
+    ASSERT_EQ(type[GETIDX(1)], 1);
+    ASSERT_EQ(ellipsoid[GETIDX(1)], -1);
+    EXPECT_NEAR(rmass[GETIDX(1)], 4.0, EPSILON);
+    ASSERT_EQ(type[GETIDX(2)], 2);
+    ASSERT_EQ(ellipsoid[GETIDX(2)], 0);
+    EXPECT_NEAR(rmass[GETIDX(2)], 2.4, EPSILON);
+    EXPECT_NEAR(bonus[0].shape[0], 0.5, EPSILON);
+    EXPECT_NEAR(bonus[0].shape[1], 0.5, EPSILON);
+    EXPECT_NEAR(bonus[0].shape[2], 0.5, EPSILON);
+    EXPECT_NEAR(bonus[0].block[0], 2.0, EPSILON); // set by default
+    EXPECT_NEAR(bonus[0].block[1], 2.0, EPSILON); // set by default
+    EXPECT_NEAR(bonus[0].type, 0, EPSILON); // BlockType::ELLIPSOID
+    ASSERT_EQ(type[GETIDX(3)], 3);
+    ASSERT_EQ(ellipsoid[GETIDX(3)], 1);
+    EXPECT_NEAR(rmass[GETIDX(3)], 4.4, EPSILON);
+    EXPECT_NEAR(bonus[1].shape[0], 1.5, EPSILON);
+    EXPECT_NEAR(bonus[1].shape[1], 0.4, EPSILON);
+    EXPECT_NEAR(bonus[1].shape[2], 0.55, EPSILON);
+    EXPECT_NEAR(bonus[1].block[0], 4.0, EPSILON);
+    EXPECT_NEAR(bonus[1].block[1], 3.0, EPSILON); 
+    EXPECT_NEAR(bonus[1].type, 2, EPSILON); // BlockType::GENERAL
+    ASSERT_EQ(type[GETIDX(4)], 4);
+    ASSERT_EQ(ellipsoid[GETIDX(4)], 2);
+    EXPECT_NEAR(rmass[GETIDX(4)], 5.0, EPSILON);
+    EXPECT_NEAR(bonus[2].shape[0], 1.0, EPSILON);
+    EXPECT_NEAR(bonus[2].shape[1], 1.0, EPSILON);
+    EXPECT_NEAR(bonus[2].shape[2], 1.0, EPSILON);
+    EXPECT_NEAR(bonus[2].block[0], 3.5, EPSILON);
+    EXPECT_NEAR(bonus[2].block[1], 3.5, EPSILON); 
+    EXPECT_NEAR(bonus[2].type, 1, EPSILON); // BlockType::N1_EQUAL_N2
+
+    BEGIN_HIDE_OUTPUT();
+    command("write_restart test_atom_styles.restart");
+    command("clear");
+    command("read_restart test_atom_styles.restart");
+    command("comm_style tiled");
+    command("replicate 1 1 2 bbox");
+    END_HIDE_OUTPUT();
+
+    ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("ellipsoid"));
+    ASSERT_NE(lmp->atom->avec, nullptr);
+    ASSERT_EQ(lmp->atom->natoms, 8);
+    ASSERT_EQ(lmp->atom->nlocal, 8);
+    ASSERT_EQ(lmp->atom->nellipsoids, 6);
+    ASSERT_EQ(lmp->atom->superellipsoid_flag, 1);
+
+    type      = lmp->atom->type;
+    ellipsoid = lmp->atom->ellipsoid;
+    rmass     = lmp->atom->rmass;
+    avec      = dynamic_cast<AtomVecEllipsoid *>(lmp->atom->avec);
+    bonus     = avec->bonus_super;
+
+    ASSERT_EQ(type[GETIDX(1)], 1);
+    ASSERT_EQ(type[GETIDX(2)], 2);
+    ASSERT_EQ(type[GETIDX(3)], 3);
+    ASSERT_EQ(type[GETIDX(4)], 4);
+    ASSERT_EQ(type[GETIDX(5)], 1);
+    ASSERT_EQ(type[GETIDX(6)], 2);
+    ASSERT_EQ(type[GETIDX(7)], 3);
+    ASSERT_EQ(type[GETIDX(8)], 4);
+    ASSERT_EQ(ellipsoid[GETIDX(1)], -1);
+    ASSERT_EQ(ellipsoid[GETIDX(2)], 0);
+    ASSERT_EQ(ellipsoid[GETIDX(3)], 1);
+    ASSERT_EQ(ellipsoid[GETIDX(4)], 2);
+    ASSERT_EQ(ellipsoid[GETIDX(5)], -1);
+    ASSERT_EQ(ellipsoid[GETIDX(6)], 3);
+    ASSERT_EQ(ellipsoid[GETIDX(7)], 4);
+    ASSERT_EQ(ellipsoid[GETIDX(8)], 5);
+    EXPECT_NEAR(bonus[3].shape[0], 0.5, EPSILON);
+    EXPECT_NEAR(bonus[3].block[0], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[3].block[1], 2.0, EPSILON);
+    EXPECT_NEAR(bonus[4].shape[0], 1.5, EPSILON);
+    EXPECT_NEAR(bonus[4].block[0], 4.0, EPSILON);
+    EXPECT_NEAR(bonus[4].block[1], 3.0, EPSILON);
+    EXPECT_NEAR(bonus[5].shape[0], 1.0, EPSILON);
+    EXPECT_NEAR(bonus[5].block[0], 3.5, EPSILON);
+    EXPECT_NEAR(bonus[5].block[1], 3.5, EPSILON);
+    EXPECT_NEAR(bonus[5].type, 1, EPSILON);
+}
+
 TEST_F(AtomStyleTest, line)
 {
     if (!Info::has_package("ASPHERE")) GTEST_SKIP();

From 75b8a2ddd388a6dc9fffbb434891dc825b235d45 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Wed, 11 Mar 2026 14:17:05 -0600
Subject: [PATCH 127/174] Minor style changes (white space comments, constants)
 and extra error checks

---
 doc/src/Howto_spherical.rst                |   9 +-
 src/.gitignore                             |   2 +
 src/ASPHERE/math_extra_superellipsoids.cpp | 192 +++++++++++++--------
 src/ASPHERE/math_extra_superellipsoids.h   |  25 ++-
 src/compute_property_atom.cpp              |  10 +-
 src/fix_move.cpp                           |  18 +-
 6 files changed, 154 insertions(+), 102 deletions(-)

diff --git a/doc/src/Howto_spherical.rst b/doc/src/Howto_spherical.rst
index 6df1fd387b0..162c3e148f5 100644
--- a/doc/src/Howto_spherical.rst
+++ b/doc/src/Howto_spherical.rst
@@ -51,16 +51,15 @@ individual particles, after then are created.
 The ellipsoid style defines particles that are ellipsoids and thus can
 be aspherical.  Each particle has a shape, specified by 3 diameters,
 and mass (or density).  Superellipsoid particles can be defined by
-specifying 2 blockiness exponents (block) and adding the `superellipsoid` 
+specifying 2 blockiness exponents (block) and adding the `superellipsoid`
 keyword to the `atom_style ellipsoid` command.  These particles store an angular
 momentum and their orientation (quaternion), and can be acted upon by
 torque.  They do not store an angular velocity (omega), which can be
 in a different direction than angular momentum, rather they compute it
-as needed.  The "set" command can be used to modify the diameter, orientation, 
+as needed.  The "set" command can be used to modify the diameter, orientation,
 and mass of individual particles, after they are created.
-The "set" command can also be used to modify the blockiness of superellipsoid 
-particles.
-It also has a brief explanation of what quaternions are.
+The "set" command can also be used to modify the blockiness of superellipsoid
+particles. It also has a brief explanation of what quaternions are.
 
 The line style defines line segment particles with two end points and
 a mass (or density).  They can be used in 2d simulations, and they can
diff --git a/src/.gitignore b/src/.gitignore
index b6da4da02e2..2f198273dc1 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -1201,6 +1201,8 @@
 /kissfft.h
 /lj_spica_common.h
 /math_complex.h
+/math_extra_superellipsoids.cpp
+/math_extra_superellipsoids.h
 /math_vector.h
 /message.cpp
 /message.h
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 84d81c0fb98..ec263043376 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -17,10 +17,11 @@
 ------------------------------------------------------------------------- */
 
 #include "math_extra_superellipsoids.h"
+
+#include "math_const.h"
 #include "math_extra.h"
+
 #include <cmath>
-// #include "math_special.h"
-// #include "math_const.h"
 
 namespace MathExtraSuperellipsoids {
 
@@ -39,11 +40,15 @@ static constexpr double TOL_OVERLAP = 1e-8;
 static constexpr unsigned int ITERMAX_OVERLAP = 20;
 static constexpr double MINSLOPE_OVERLAP = 1e-12;
 
+static constexpr double REGULARIZATION_EPSILON = 1e-12;
+static constexpr double MAX_B_FAST = 1e30;
+
 
 /* ----------------------------------------------------------------------
    curvature of superellipsoid
    source https://en.wikipedia.org/wiki/Mean_curvature
 ------------------------------------------------------------------------- */
+
 double mean_curvature_superellipsoid(const double *shape, const double *block, const int flag, const double R[3][3], const double *surf_global_point, const double *xc)
 {
   // this code computes the mean curvature on the superellipsoid surface
@@ -92,7 +97,7 @@ double gaussian_curvature_superellipsoid(const double *shape, const double *bloc
     {fxx, fxy, fxz, fx},
     {fxy, fyy, fyz, fy},
     {fxz, fyz, fzz, fz},
-    {fx,  fy,  fz, 0.0} 
+    {fx,  fy,  fz, 0.0}
   };
 
   double K = -det4_M44_zero(mat) / (F_mag*F_mag*F_mag*F_mag);
@@ -100,28 +105,31 @@ double gaussian_curvature_superellipsoid(const double *shape, const double *bloc
   return curvature;
 }
 
-  
+
 /* ----------------------------------------------------------------------
    express global (system level) to local (particle level) coordinates
 ------------------------------------------------------------------------- */
 
-void global2local_vector(const double *v, const double *quat, double *local_v){
+void global2local_vector(const double *v, const double *quat, double *local_v)
+{
     double qc[4];
     MathExtra::qconjugate(const_cast<double*>(quat), qc);
     MathExtra::quatrotvec(qc, const_cast<double*>(v), local_v);
 }
 
 /* ----------------------------------------------------------------------
-   Possible regularization for the shape functions 
+   Possible regularization for the shape functions
    Instead of F(x,y,z) = 0 we use (F(x,y,z)+1)^(1/n1) -1 = G(x,y,z) = 0
    We also scale G by the average radius to have better "midway" points
 ------------------------------------------------------------------------- */
-void apply_regularization_shape_function(double n1, const double avg_radius, double *value, double *grad, double hess[3][3]){
+
+void apply_regularization_shape_function(double n1, const double avg_radius, double *value, double *grad, double hess[3][3])
+{
   // value is F - 1
-  double base = std::fmax(*value + 1.0, 1e-12); 
+  double base = std::fmax(*value + 1.0, REGULARIZATION_EPSILON);
   const double inv_F = 1.0 / base;
   const double inv_n1 = 1.0 / n1;
-  
+
   // P = base^(1/n)
   const double F_pow_inv_n1 = std::pow(base, inv_n1);
 
@@ -148,11 +156,12 @@ void apply_regularization_shape_function(double n1, const double avg_radius, dou
   *value = avg_radius * (F_pow_inv_n1 - 1.0);
 };
 
-
 /* ----------------------------------------------------------------------
    shape function computations for superellipsoids
 ------------------------------------------------------------------------- */
-double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3]){
+
+double shape_and_derivatives_local(const double* xlocal, const double* shape, const double* block, const int flag, double* grad, double hess[3][3])
+{
   double shapefunc;
   // TODO: Not sure how to make flag values more clear
   // Cannot forward declare the enum AtomVecEllipsoid::BlockType
@@ -181,8 +190,12 @@ double shape_and_derivatives_local(const double* xlocal, const double* shape, co
   return shapefunc;
 }
 
-// General case for n1 != n2 > 2
-double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3]) {
+/* ----------------------------------------------------------------------
+   General case for n1 != n2 > 2
+------------------------------------------------------------------------- */
+
+double shape_and_derivatives_local_superquad(const double* xlocal, const double* shape, const double* block, double* grad, double hess[3][3])
+{
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
@@ -224,8 +237,12 @@ double shape_and_derivatives_local_superquad(const double* xlocal, const double*
   return (nu_pow_n1_n2_m1 * nu) + (z_c_pow_n1_m1 * z_c) - 1.0;
 }
 
-// Special case for n2 = n2 = n > 2
-double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3]) {
+/* ----------------------------------------------------------------------
+   Special case for n2 = n2 = n > 2
+------------------------------------------------------------------------- */
+
+double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double* shape, const double n, double* grad, double hess[3][3])
+{
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
@@ -257,9 +274,12 @@ double shape_and_derivatives_local_n1equaln2(const double* xlocal, const double*
   return (x_a_pow_n_m1 * x_a) + (y_b_pow_n_m1 * y_b) + (z_c_pow_n_m1 * z_c) - 1.0;
 }
 
+/* ----------------------------------------------------------------------
+   Special case for n1 = n2 = 2
+------------------------------------------------------------------------- */
 
-// Special case for n1 = n2 = 2
-double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3]) {
+double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double* shape, double* grad, double hess[3][3])
+{
   double a = 2.0 / (shape[0] * shape[0]);
   double b = 2.0 / (shape[1] * shape[1]);
   double c = 2.0 / (shape[2] * shape[2]);
@@ -278,14 +298,15 @@ double shape_and_derivatives_local_ellipsoid(const double* xlocal, const double*
   return 0.5 * (grad[0]*xlocal[0] + grad[1]*xlocal[1] + grad[2]*xlocal[2]) - 1.0;
 }
 
+/* ---------------------------------------------------------------------- */
 
-double shape_and_derivatives_global(const double* xc, const double R[3][3], 
-    const double* shape, const double* block, const int flag, 
+double shape_and_derivatives_global(const double* xc, const double R[3][3],
+    const double* shape, const double* block, const int flag,
     const double* X0, double* grad, double hess[3][3],
-    const int formulation, const double avg_radius) 
+    const int formulation, const double avg_radius)
 {
   double xlocal[3], tmp_v[3], tmp_m[3][3];
-  MathExtra::sub3(X0, xc, tmp_v); 
+  MathExtra::sub3(X0, xc, tmp_v);
   MathExtra::transpose_matvec(R, tmp_v, xlocal);
   double shapefunc = shape_and_derivatives_local(xlocal, shape, block, flag, tmp_v, hess);
   if (formulation == FORMULATION_GEOMETRIC) {
@@ -312,11 +333,13 @@ double shape_and_derivatives_global(const double* xc, const double R[3][3],
 //          residual[3] * residual[3] / ((shapefunci + 1) * (shapefunci + 1));
 // }
 
-double compute_residual(const double shapefunci, const double* gradi_global, 
-                        const double shapefuncj, const double* gradj_global, 
-                        const double mu2, double* residual, 
-                        const int formulation, const double radius_scale) {
+/* ---------------------------------------------------------------------- */
 
+double compute_residual(const double shapefunci, const double* gradi_global,
+                        const double shapefuncj, const double* gradj_global,
+                        const double mu2, double* residual,
+                        const int formulation, const double radius_scale)
+{
   // Equation (23): Spatial residual (Gradient match)
   MathExtra::scaleadd3(mu2, gradj_global, gradi_global, residual);
   residual[3] = shapefunci - shapefuncj;
@@ -331,19 +354,22 @@ double compute_residual(const double shapefunci, const double* gradi_global,
 
   if (formulation == FORMULATION_GEOMETRIC) {
       // GEOMETRIC: G is a distance (Length).
-      scalar_denom = radius_scale; 
+      scalar_denom = radius_scale;
   } else {
       // ALGEBRAIC: F is dimensionless (approx 0 at surface).
       scalar_denom = shapefunci + 1.0;
   }
-  
+
   // Prevent division by zero in weird edge cases (e.g. very negative shape function)
   if (fabs(scalar_denom) < 1e-12) scalar_denom = 1.0;
 
   return spatial_norm + (residual[3] * residual[3]) / (scalar_denom * scalar_denom);
 }
 
-void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian) {
+/* ---------------------------------------------------------------------- */
+
+void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian)
+{
   // Jacobian (derivative of residual)
   // 1D column-major matrix for LAPACK/linalg compatibility
   for (int row = 0 ; row < 3 ; row++) {
@@ -358,21 +384,26 @@ void compute_jacobian(const double* gradi_global, const double hessi_global[3][3
   jacobian[15] = 0.0;
 }
 
+/* ---------------------------------------------------------------------- */
+
 double compute_residual_and_jacobian(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                                      const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                                     const double* X, double* shapefunc, double* residual, double* jacobian, 
-                                     const int formulation, const double avg_radius_i, const double avg_radius_j) {
+                                     const double* X, double* shapefunc, double* residual, double* jacobian,
+                                     const int formulation, const double avg_radius_i, const double avg_radius_j)
+{
   double gradi[3], hessi[3][3], gradj[3], hessj[3][3];
   shapefunc[0] = shape_and_derivatives_global(xci, Ri, shapei, blocki, flagi, X, gradi, hessi, formulation, avg_radius_i);
   shapefunc[1] = shape_and_derivatives_global(xcj, Rj, shapej, blockj, flagj, X, gradj, hessj, formulation, avg_radius_j);
   compute_jacobian(gradi, hessi, gradj, hessj, X[3], jacobian);
-  return compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X[3], residual, formulation, (avg_radius_i + avg_radius_j)/2.0);
+  return compute_residual(shapefunc[0], gradi, shapefunc[1], gradj, X[3], residual, formulation, (avg_radius_i + avg_radius_j) * 0.5);
 }
 
+/* ---------------------------------------------------------------------- */
 
 int determine_contact_point(const double* xci, const double Ri[3][3], const double* shapei, const double* blocki, const int flagi,
                             const double* xcj, const double Rj[3][3], const double* shapej, const double* blockj, const int flagj,
-                            double* X0, double* nij, int formulation) {
+                            double* X0, double* nij, int formulation)
+{
   double norm, norm_old, shapefunc[2], residual[4], jacobian[16];
   double lsq = MathExtra::distsq3(xci, xcj);
   bool converged(false);
@@ -390,17 +421,17 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   // avg radii for regularization if GEOMETRIC formulation
   double avg_radius_i = 1;
   double avg_radius_j = 1;
-  double max_step = sqrt(lsq) / 5.0;  
+  double max_step = sqrt(lsq) * 0.2;
   if (formulation == FORMULATION_GEOMETRIC) {
-    avg_radius_i = (shapei[0] + shapei[1] + shapei[2]) / 3.0;
-    avg_radius_j = (shapej[0] + shapej[1] + shapej[2]) / 3.0;
+    avg_radius_i = (shapei[0] + shapei[1] + shapei[2]) * LAMMPS_NS::MathConst::THIRD;
+    avg_radius_j = (shapej[0] + shapej[1] + shapej[2]) * LAMMPS_NS::MathConst::THIRD;
   }
 
   norm = compute_residual_and_jacobian(xci, Ri, shapei, blocki, flagi, xcj, Rj, shapej, blockj, flagj, X0, shapefunc, residual, jacobian, formulation, avg_radius_i, avg_radius_j);
   // testing for convergence before attempting Newton's method.
   // the initial guess is the old X0, so with temporal coherence, it might still pass tolerance if deformation is slow!
   if (norm < TOL_NR_RES) {
-    
+
     //  must compute the normal vector nij before returning since the Newton loop normally handles this upon convergence.
     double xilocal[3], tmp_v[3], gradi[3], val_dummy;
 
@@ -410,7 +441,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
     // Compute local gradient
     // Algebraic gradient is fine for direction even if we used Geometric for solving
-    // TODO: might use a simpler function to simply compute the gradient, to 
+    // TODO: might use a simpler function to simply compute the gradient, to
     // avoid computing quantities already computed in compute_residual_and_jacobian
     if (flagi <= 1)
       val_dummy = shape_and_gradient_local_n1equaln2_surfacesearch(xilocal, shapei, blocki[0], gradi);
@@ -422,9 +453,9 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     MathExtra::normalize3(gradi, nij);
 
     // Return status
-    if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) 
+    if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0)
       return 1; // Converged, but no contact (separated)
-    
+
     return 0; // Converged and Contacting
   }
 
@@ -443,25 +474,25 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
         }
     }
 
-    b_fast[0] = -residual[0]; b_fast[1] = -residual[1]; 
+    b_fast[0] = -residual[0]; b_fast[1] = -residual[1];
     b_fast[2] = -residual[2]; b_fast[3] = -residual[3];
 
     // Try Fast Solver
     gauss_elim_solved = MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A_fast, b_fast);
-    
+
     // check for divergence or numerical issues in the fast solver
     // and fall back to regularized solver if necessary
-    bool fail0 = !std::isfinite(b_fast[0]) | (std::abs(b_fast[0]) > 1e30);
-    bool fail1 = !std::isfinite(b_fast[1]) | (std::abs(b_fast[1]) > 1e30);
-    bool fail2 = !std::isfinite(b_fast[2]) | (std::abs(b_fast[2]) > 1e30);
-    bool fail3 = !std::isfinite(b_fast[3]) | (std::abs(b_fast[3]) > 1e30);
+    bool fail0 = !std::isfinite(b_fast[0]) | (std::abs(b_fast[0]) > MAX_B_FAST);
+    bool fail1 = !std::isfinite(b_fast[1]) | (std::abs(b_fast[1]) > MAX_B_FAST);
+    bool fail2 = !std::isfinite(b_fast[2]) | (std::abs(b_fast[2]) > MAX_B_FAST);
+    bool fail3 = !std::isfinite(b_fast[3]) | (std::abs(b_fast[3]) > MAX_B_FAST);
     if (fail0 | fail1 | fail2 | fail3) {
         gauss_elim_solved = false;
     }
-    
-    rhs[0] = b_fast[0]; rhs[1] = b_fast[1]; 
+
+    rhs[0] = b_fast[0]; rhs[1] = b_fast[1];
     rhs[2] = b_fast[2]; rhs[3] = b_fast[3];
-    
+
     if (!gauss_elim_solved) {
       // restore matrix
       for(int r=0; r<4; ++r) {
@@ -469,7 +500,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
             A_fast[r*4 + c] = jacobian[c*4 + r];
         }
       }
-      b_fast[0] = -residual[0]; b_fast[1] = -residual[1]; 
+      b_fast[0] = -residual[0]; b_fast[1] = -residual[1];
       b_fast[2] = -residual[2]; b_fast[3] = -residual[3];
        // enforce a minimum regularization to avoid zero pivots in edge cases (flat on flat)
       double trace = jacobian[0] + jacobian[5] + jacobian[10];
@@ -479,7 +510,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       A_fast[10] += diag_weight;
 
       if (MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A_fast, b_fast)) {
-          rhs[0] = b_fast[0]; rhs[1] = b_fast[1]; 
+          rhs[0] = b_fast[0]; rhs[1] = b_fast[1];
           rhs[2] = b_fast[2]; rhs[3] = b_fast[3];
           gauss_elim_solved = true;
       }
@@ -494,12 +525,12 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     int iter_ls;
 
     if (formulation == FORMULATION_GEOMETRIC) {
-      a = 1.0; // no need for multiplicity scaling 
+      a = 1.0; // no need for multiplicity scaling
     }
     // Limit the max step size to avoid jumping too far
     // normalize residual vector if step was limited
     double spatial_residual_norm = std::sqrt(rhs[0]*rhs[0] + rhs[1]*rhs[1] + rhs[2]*rhs[2]);
-    
+
     if (spatial_residual_norm > max_step) {
         double scale = max_step / spatial_residual_norm;
         rhs[0] *= scale;
@@ -532,7 +563,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
       // One alternative would be to store the intermediate variables from
       // the local gradient calculation when calling `shape_and_gradient_local()`,
-      // and re-use them during the local hessian calculation (function that 
+      // and re-use them during the local hessian calculation (function that
       // calculates only the Hessian from these intermediate values would need
       // to be implemented).
       // This seems a bit clunky just to save the few multiplications of the
@@ -543,12 +574,12 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
       double xilocal[3], gradi[3], hessi[3][3], xjlocal[3], gradj[3], hessj[3][3], tmp_v[3];
 
-      MathExtra::sub3(X_line, xci, tmp_v); 
+      MathExtra::sub3(X_line, xci, tmp_v);
       MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
       shapefunc[0] = shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hessi);
       if (formulation == FORMULATION_GEOMETRIC) {
           apply_regularization_shape_function(blocki[0], avg_radius_i, &shapefunc[0], tmp_v, hessi);
-      } 
+      }
       MathExtra::matvec(Ri, tmp_v, gradi);
 
       MathExtra::sub3(X_line, xcj, tmp_v);
@@ -564,7 +595,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       if ((norm <= TOL_NR_RES) &&
           (MathExtra::lensq3(rhs) * a * a <= TOL_NR_POS * lsq)) {
         converged = true;
-        
+
         MathExtra::normalize3(gradi, nij);
         break;
       } else if (norm > norm_old - PARAMETER_LS * a * norm_old) { // Armijo - Goldstein condition not met
@@ -596,7 +627,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
         double xilocal[3], tmp_v[3], gradi[3], hess_dummy[3][3];
         MathExtra::sub3(X0, xci, tmp_v);
         MathExtra::transpose_matvec(Ri, tmp_v, xilocal);
-        
+
         // We only need the gradient for the normal
         shape_and_derivatives_local(xilocal, shapei, blocki, flagi, tmp_v, hess_dummy);
         if (formulation == FORMULATION_GEOMETRIC) {
@@ -630,10 +661,15 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   return 0;
 }
 
-// Functions to compute shape function and gradient only when called for newton method
-// to avoid computing hessian when not needed and having smoother landscape for the line search
-// General case for n1 != n2 > 2
-double shape_and_gradient_local_superquad_surfacesearch(const double* xlocal, const double* shape, const double* block, double* grad) {
+/* ----------------------------------------------------------------------
+   Functions to compute shape function and gradient only when called for
+     newton method to avoid computing hessian when not needed and having
+     smoother landscape for the line search
+   General case for n1 != n2 > 2
+------------------------------------------------------------------------- */
+
+double shape_and_gradient_local_superquad_surfacesearch(const double* xlocal, const double* shape, const double* block, double* grad)
+{
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
@@ -673,8 +709,12 @@ double shape_and_gradient_local_superquad_surfacesearch(const double* xlocal, co
   return std::pow(F, 1.0/n1) - 1.0;
 }
 
-// Special case for n2 = n2 = n > 2
-double shape_and_gradient_local_n1equaln2_surfacesearch(const double* xlocal, const double* shape, const double n, double* grad) {
+/* ----------------------------------------------------------------------
+   Special case for n2 = n2 = n > 2
+------------------------------------------------------------------------- */
+
+double shape_and_gradient_local_n1equaln2_surfacesearch(const double* xlocal, const double* shape, const double n, double* grad)
+{
   double a_inv = 1.0 / shape[0];
   double b_inv = 1.0 / shape[1];
   double c_inv = 1.0 / shape[2];
@@ -706,28 +746,32 @@ double shape_and_gradient_local_n1equaln2_surfacesearch(const double* xlocal, co
   return std::pow(F, 1.0/n) - 1.0;
 }
 
-// Newton Rapson method to find the overlap distance from the contact point given the normal
+/* ----------------------------------------------------------------------
+   Newton Rapson method to find the overlap distance from the contact point given the normal
+------------------------------------------------------------------------- */
+
 double compute_overlap_distance(
   const double* shape, const double* block, const double Rot[3][3], const int flag,
   const double* global_point, const double* global_normal,
-  const double* center) {
+  const double* center)
+{
   double local_point[3], local_normal[3];
   double del[3];
   double overlap;
   MathExtra::sub3(global_point, center, del);  // bring origin to 0.0
-  MathExtra::transpose_matvec(Rot, del, local_point); 
+  MathExtra::transpose_matvec(Rot, del, local_point);
   MathExtra::transpose_matvec(Rot, global_normal, local_normal);
-  
+
   double local_f;
   double local_grad[3];
-  
-  // elliposid analytical solution, might need to double check the math 
+
+  // elliposid analytical solution, might need to double check the math
   // there is an easy way to find this by parametrizing the straight line as
   // X0 + t * n anf then substituting in the ellipsoid equation  for x, y, z
   // this results in a quadratic equation and we take the positive solution since
   // we are taking the outward facing normal for each grain
 
-  if (flag == 0){
+  if (flag == 0) {
 
     double a_inv2 = 1.0 / (shape[0] * shape[0]);
     double b_inv2 = 1.0 / (shape[1] * shape[1]);
@@ -750,11 +794,11 @@ double compute_overlap_distance(
     double delta = B*B - 4.0*A*C;
 
     // Clamp delta to zero just in case numerical noise makes it negative
-    if (delta < 0.0) delta = 0.0; 
+    if (delta < 0.0) delta = 0.0;
     overlap = (-B + std::sqrt(delta)) / (2.0 * A);
   } else {
-      // --- Superquadric Case (Newton-Raphson on Distance Estimator) ---
-    
+    // --- Superquadric Case (Newton-Raphson on Distance Estimator) ---
+
     overlap = 0.0; // Distance along the normal
     double current_p[3];
     double val;
@@ -786,6 +830,6 @@ double compute_overlap_distance(
     }
   }
   return overlap;
-} 
+}
 
 } // namespace MathExtraSuperellipsoids
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index dc8dd63abc2..169a6591430 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -19,6 +19,7 @@
 #define LMP_MATH_EXTRA_SUPERELLIPOIDS_H
 
 #include "math_extra.h"
+
 #include <cmath>
 #include <iostream>
 #include <limits>
@@ -116,6 +117,7 @@ double gaussian_curvature_superellipsoid(const double *shape, const double *bloc
 /* ----------------------------------------------------------------------
    determinant of a 4x4 matrix M with M[3][3] assumed to be zero
 ------------------------------------------------------------------------- */
+
 inline double MathExtraSuperellipsoids::det4_M44_zero(const double m[4][4])
 {
   // Define the 3x3 submatrices (M_41, M_42, M_43)
@@ -291,13 +293,14 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
   return true;
 }
 
-// algorithm from https://www.geometrictools.com/Documentation/DynamicCollisionDetection.pdf
-/* * Oriented Bounding Box intersection test.
- * Logic and optimization strategies adapted from LIGGGHTS (CFDEMproject).
- * See: src/math_extra_liggghts_nonspherical.cpp in LIGGGHTS distribution.
- * * This implementation uses the "cached separating axis" optimization 
- * for temporal coherence.
- */
+/* ----------------------------------------------------------------------
+   Oriented Bounding Box intersection test
+     Logic and optimization strategies adapted from LIGGGHTS (CFDEMproject)
+     See: src/math_extra_liggghts_nonspherical.cpp in LIGGGHTS distribution
+     This implementation uses the "cached separating axis" optimization for temporal coherence
+     Algorithm from https://www.geometrictools.com/Documentation/DynamicCollisionDetection.pdf
+------------------------------------------------------------------------- */
+
 inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
     const double *xc1, const double R1[3][3], const double *shape1, const double *xc2,
     const double R2[3][3], const double *shape2, double *cached_axis)
@@ -343,6 +346,8 @@ inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
   return false;    // no separation found
 }
 
+/* ---------------------------------------------------------------------- */
+
 inline bool MathExtraSuperellipsoids::check_intersection_axis(const int axis_id,
                                                               const double C[3][3],
                                                               const double AbsC[3][3],
@@ -441,6 +446,8 @@ inline bool MathExtraSuperellipsoids::check_intersection_axis(const int axis_id,
   }
 }
 
+/* ---------------------------------------------------------------------- */
+
 inline bool MathExtraSuperellipsoids::check_intersection_axis_and_get_seed(
     const double *xc1, const double R1[3][3], const double *shape1, const double *xc2,
     const double R2[3][3], const double *shape2, double *cached_axis, double *contact_point)
@@ -455,8 +462,8 @@ inline bool MathExtraSuperellipsoids::check_intersection_axis_and_get_seed(
   const double eps = 1e-20;
   for (unsigned int i = 0; i < 3; i++) {
     for (unsigned int j = 0; j < 3; j++) {
-      AbsC[i][j] =
-          std::fabs(C[i][j]) + eps;    // Add epsilon to prevent division by zero in edge cases
+      // Add epsilon to prevent division by zero in edge cases
+      AbsC[i][j] = std::fabs(C[i][j]) + eps;
     }
   }
 
diff --git a/src/compute_property_atom.cpp b/src/compute_property_atom.cpp
index bd49b3c5baa..5084bd7a783 100644
--- a/src/compute_property_atom.cpp
+++ b/src/compute_property_atom.cpp
@@ -240,11 +240,11 @@ ComputePropertyAtom::ComputePropertyAtom(LAMMPS *lmp, int narg, char **arg) :
       pack_choice[i] = &ComputePropertyAtom::pack_shapez;
 
     } else if (strcmp(arg[iarg],"block1") == 0) {
-      if (!avec_ellipsoid) 
+      if (!avec_ellipsoid || !atom->superellipsoid_flag)
         error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_block1;
     } else if (strcmp(arg[iarg],"block2") == 0) {
-      if (!avec_ellipsoid) 
+      if (!avec_ellipsoid || !atom->superellipsoid_flag)
         error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_block2;
     } else if (strcmp(arg[iarg],"quatw") == 0) {
@@ -264,15 +264,15 @@ ComputePropertyAtom::ComputePropertyAtom(LAMMPS *lmp, int narg, char **arg) :
         error->all(FLERR,"Compute property/atom {} is not available", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_quatk;
     } else if (strcmp(arg[iarg],"inertiax") == 0) {
-      if (!avec_ellipsoid) 
+      if (!avec_ellipsoid || !atom->superellipsoid_flag)
         error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_inertiax;
     } else if (strcmp(arg[iarg],"inertiay") == 0) {
-      if (!avec_ellipsoid) 
+      if (!avec_ellipsoid || !atom->superellipsoid_flag)
         error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_inertiay;
     } else if (strcmp(arg[iarg],"inertiaz") == 0) {
-      if (!avec_ellipsoid) 
+      if (!avec_ellipsoid || !atom->superellipsoid_flag)
         error->all(FLERR,"Compute property/atom {} requires atom style ellipsoid with super flag", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_inertiaz;
     } else if (strcmp(arg[iarg],"tqx") == 0) {
diff --git a/src/fix_move.cpp b/src/fix_move.cpp
index d263b2ae089..051bbf903f3 100644
--- a/src/fix_move.cpp
+++ b/src/fix_move.cpp
@@ -938,15 +938,15 @@ void FixMove::initial_integrate(int /*vflag*/)
                 quat = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
                 inertia = avec_ellipsoid->bonus_super[ellipsoid[i]].inertia;
               } else {
-              quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
-              shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
-              inertia_ellipsoid[0] =
-                  INERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
-              inertia_ellipsoid[1] =
-                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[2] * shape[2]);
-              inertia_ellipsoid[2] =
-                  INERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
-              inertia = inertia_ellipsoid;
+                quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
+                shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
+                inertia_ellipsoid[0] =
+                    INERTIA * rmass[i] * (shape[1] * shape[1] + shape[2] * shape[2]);
+                inertia_ellipsoid[1] =
+                    INERTIA * rmass[i] * (shape[0] * shape[0] + shape[2] * shape[2]);
+                inertia_ellipsoid[2] =
+                    INERTIA * rmass[i] * (shape[0] * shape[0] + shape[1] * shape[1]);
+                inertia = inertia_ellipsoid;
               }
             } else if (tri_flag && tri[i] >= 0) {
               quat = avec_tri->bonus[tri[i]].quat;

From 7b136951a99aa4458cd7a15af3730b596b630b14 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 12 Mar 2026 08:49:42 +0100
Subject: [PATCH 128/174] Removed multiplicity trick for algebraic formulation

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index ec263043376..9a3294d6bf2 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -408,15 +408,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
   double lsq = MathExtra::distsq3(xci, xcj);
   bool converged(false);
 
-  // Accelerate convergence rate for high blockiness / flat faces
-  // with high root multiplicity N
-  // e.g.: f(x) = x^N , Newton's iterate: x_k+1 = x_k - x_k / N
-  // Estimate N from |x_k+1 - x_k| / |x_k - x_k-1| = 1 - 1/N
-  // within bounds 1 < N < max(block)-1
-  // then multiply Newton's step size by N to recover quadratic convergence
-  double multiplicity(1.0);
   double rhs_old[3];
-  double blockmax = std::fmax(std::fmax(blocki[0],blocki[1]), std::fmax(blockj[0], blockj[1]));
 
   // avg radii for regularization if GEOMETRIC formulation
   double avg_radius_i = 1;
@@ -516,17 +508,13 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       }
     }
 
-    if (iter > 0)
-      multiplicity = std::fmin(std::fmax(1.0, 1.0 / (1.0 - std::sqrt(MathExtra::lensq3(rhs)/MathExtra::lensq3(rhs_old)))), blockmax - 1.0);
     MathExtra::copy3(rhs, rhs_old);
 
     // Backtracking line search
-    double a(multiplicity), X_line[4];
+    double X_line[4];
     int iter_ls;
+    double a = 1.0;
 
-    if (formulation == FORMULATION_GEOMETRIC) {
-      a = 1.0; // no need for multiplicity scaling
-    }
     // Limit the max step size to avoid jumping too far
     // normalize residual vector if step was limited
     double spatial_residual_norm = std::sqrt(rhs[0]*rhs[0] + rhs[1]*rhs[1] + rhs[2]*rhs[2]);

From d9b1318d8c2c00e1f43778acb25e543b35d58f09 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 12 Mar 2026 09:07:02 +0100
Subject: [PATCH 129/174] Removed column major lapack matrix format, old
 redundancy

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 23 ++++++++++------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 9a3294d6bf2..79445a17498 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -368,18 +368,18 @@ double compute_residual(const double shapefunci, const double* gradi_global,
 
 /* ---------------------------------------------------------------------- */
 
-void compute_jacobian(const double* gradi_global, const double hessi_global[3][3], const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian)
+void compute_jacobian(const double* gradi_global, const double hessi_global[3][3],
+                     const double* gradj_global, const double hessj_global[3][3], const double mu2, double* jacobian)
 {
   // Jacobian (derivative of residual)
-  // 1D column-major matrix for LAPACK/linalg compatibility
   for (int row = 0 ; row < 3 ; row++) {
     for (int col = 0 ; col < 3 ; col++) {
-      jacobian[row + col*4] = hessi_global[row][col] + mu2 * hessj_global[row][col];
+      jacobian[row*4 + col] = hessi_global[row][col] + mu2 * hessj_global[row][col];
     }
-    jacobian[row + 3*4] = gradj_global[row];
+    jacobian[row*4 + 3] = gradj_global[row];
   }
   for (int col = 0 ; col < 3 ; col++) {
-    jacobian[3 + col*4] = gradi_global[col] - gradj_global[col];
+    jacobian[3*4 + col] = gradi_global[col] - gradj_global[col];
   }
   jacobian[15] = 0.0;
 }
@@ -460,10 +460,8 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     double A_fast[16];
     double b_fast[4];
 
-    for(int r=0; r<4; ++r) {
-        for(int c=0; c<4; ++c) {
-            A_fast[r*4 + c] = jacobian[c*4 + r];
-        }
+    for(int i = 0; i < 16; ++i) {
+        A_fast[i] = jacobian[i];
     }
 
     b_fast[0] = -residual[0]; b_fast[1] = -residual[1];
@@ -487,11 +485,10 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
     if (!gauss_elim_solved) {
       // restore matrix
-      for(int r=0; r<4; ++r) {
-        for(int c=0; c<4; ++c) {
-            A_fast[r*4 + c] = jacobian[c*4 + r];
-        }
+      for(int i = 0; i < 16; ++i) {
+        A_fast[i] = jacobian[i];
       }
+
       b_fast[0] = -residual[0]; b_fast[1] = -residual[1];
       b_fast[2] = -residual[2]; b_fast[3] = -residual[3];
        // enforce a minimum regularization to avoid zero pivots in edge cases (flat on flat)

From b70c0842a75f1eb97bab14d4fa8c650e0803541d Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 12 Mar 2026 09:31:14 +0100
Subject: [PATCH 130/174] Removed cout debug prints from math extra
 superellipsoids

---
 src/ASPHERE/math_extra_superellipsoids.cpp    |  4 --
 src/ASPHERE/math_extra_superellipsoids.h      |  3 --
 .../pair_gran_hertz_history_ellipsoid.cpp     | 22 ++++++----
 .../pair_gran_hooke_history_ellipsoid.cpp     | 43 ++++++++++---------
 4 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 79445a17498..48243857a64 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -635,10 +635,6 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
 
   if (!converged){
     if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) return 1;
-    std::cout << "Current residual norm: " << norm << std::endl;
-    std::cout << "Shape functions: " << shapefunc[0] << ", " << shapefunc[1] << std::endl;
-    std::cout << "Positions X0: " << X0[0] << ", " << X0[1] << ", " << X0[2] << ", mu2: " << X0[3] << std::endl;
-    std::cout << "Normal nij: " << nij[0] << ", " << nij[1] << ", " << nij[2] << std::endl;
     return 2;} // not failing if not converged but shapefuncs positive (i.e., no contact)
               // might be risky to assume no contact if not converged, NR might have gone to a far away point
               // but no guarantee there is no contact
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 169a6591430..878914fb578 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -36,11 +36,8 @@ void global2local_vector(const double v[3], const double *quat, double local_v[3
 inline double det4_M44_zero(const double m[4][4]);
 
 // 4 by 4 sytems solvers, they all overwrite b with the solution
-inline bool solve_4x4_manual(double A[16], double b[4]);
-inline bool solve_4x4_robust(double A[16], double b[4]);
 inline bool solve_4x4_robust_unrolled(double A[16], double b[4]);
 
-// ADD CONTACT DETECTION HERE
 inline bool check_oriented_bounding_boxes(const double *xc1, const double R1[3][3],
                                           const double *shape1, const double *xc2,
                                           const double R2[3][3], const double *shape2,
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index fa7f97f87f8..8db45d3825d 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -190,9 +190,9 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
               touching = false;
             else
               error->warning(FLERR,
-                             "Ellipsoid contact detection (old contact) failed with status {} "
-                             "betwen particle {} and particle {} ",
-                             status, atom->tag[i], atom->tag[j]);
+                             "Ellipsoid contact detection (old contact) failed"
+                             "between particle {} and particle {}",
+                             atom->tag[i], atom->tag[j]);
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
@@ -233,9 +233,9 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
               else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
                 // keep trying until last iteration to avoid erroring out too early
                 error->warning(FLERR,
-                               "Ellipsoid contact detection (new contact) failed with status {} "
-                               "betwen particle {} and particle {}",
-                               status, atom->tag[i], atom->tag[j]);
+                               "Ellipsoid contact detection (new contact) failed"
+                               "between particle {} and particle {}",
+                               atom->tag[i], atom->tag[j]);
               }
             }
           }
@@ -590,7 +590,10 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
       return 0.0;
     }
     if (status != 0)
-      error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+      error->all(FLERR,
+                 "Ellipsoid contact detection (old contact) failed"
+                 "between particle {} and particle {}",
+                 atom->tag[i], atom->tag[j]);
   } else {
     double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
     double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
@@ -619,7 +622,10 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
         return 0.0;
       }
       if (status != 0)
-        error->all(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+        error->all(FLERR,
+                   "Ellipsoid contact detection (new contact) failed"
+                   "between particle {} and particle {}",
+                   atom->tag[i], atom->tag[j]);
     }
   }
   double overlap1, overlap2, omegai[3], omegaj[3];
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index ffd71ec5303..a4473c339bb 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -243,22 +243,18 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
               touching = true;
             else if (status == 1)
               touching = false;
-            else if (status == 2) {
+            else {
               error->warning(FLERR,
-                             "Ellipsoid contact detection (old contact) failed with status {} "
-                             "betwen particle {} and particle {} ",
-                             status, atom->tag[i], atom->tag[j]);
+                             "Ellipsoid contact detection (old contact) failed "
+                             "between particle {} and particle {} ",
+                             atom->tag[i], atom->tag[j]);
             }
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
-            // TODO: there might be better heuristic for the "volume equivalent spheres" suggested in the paper
-            //       but this is good enough. We might even be able to use radi and radj which is cheaper, TBD when testing
-            //       If we pick a small radius, we could guaranteed to start outise the grains, would that be better for the Newton?
-            //       If we pick a large radius (e.g. radi, radj) we are more likely to start inside the grains, is this an easier minimization landscape to navigate?
-            //       I don't think there is a general answer because we don't know the shape, and contact point may be far from spherical initial guess
-            //       This makes me think using radi and radj could be fine! To be investigated
-            //       MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
+            // but this is good enough. We might even be able to use radi and radj which is cheaper
+            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
 
             double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
             double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
@@ -290,9 +286,9 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
               else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
                 // keep trying until last iteration to avoid erroring out too early
                 error->warning(FLERR,
-                               "Ellipsoid contact detection (new contact) failed with status {} "
-                               "betwen particle {} and particle {}",
-                               status, atom->tag[i], atom->tag[j]);
+                               "Ellipsoid contact detection (new contact) failed"
+                               "between particle {} and particle {}",
+                               atom->tag[i], atom->tag[j]);
               }
             }
           }
@@ -591,16 +587,15 @@ void PairGranHookeHistoryEllipsoid::init_style()
   // error and warning checks
 
   if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
-    error->all(
-        FLERR,
-        "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and superellipdoid flag");
+    error->all(FLERR,
+               "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and "
+               "superellipdoid flag");
   if (comm->ghost_velocity == 0)
     error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
 
   // ensure all atoms have an allocated ellipsoid bonus structure (ellipsoidflag > 0)
   int *ellipsoid = atom->ellipsoid;
-  if (!ellipsoid) 
-    error->all(FLERR, "Pair gran/h/ellipsoid* requires atom style ellipsoid");
+  if (!ellipsoid) error->all(FLERR, "Pair gran/h/ellipsoid* requires atom style ellipsoid");
 
   int nlocal = atom->nlocal;
   for (i = 0; i < nlocal; i++) {
@@ -873,7 +868,10 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
       return 0.0;
     }
     if (status != 0)
-      error->one(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+      error->warning(FLERR,
+                     "Ellipsoid contact detection (old contact) failed"
+                     "between particle {} and particle {}",
+                     atom->tag[i], atom->tag[j]);
   } else {
     double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
     double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
@@ -902,7 +900,10 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
         return 0.0;
       }
       if (status != 0)
-        error->one(FLERR, "Ellipsoid contact detection failed with status {} ", status);
+        error->one(FLERR,
+                   "Ellipsoid contact detection (new contact) failed"
+                   "between particle {} and particle {}",
+                   atom->tag[i], atom->tag[j]);
     }
   }
   double overlap1, overlap2, omegai[3], omegaj[3];

From 06b4dfb793e351d246679be8304961baf56f1779 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 12 Mar 2026 09:56:34 +0100
Subject: [PATCH 131/174] Removed outdated TODOs

---
 src/ASPHERE/math_extra_superellipsoids.cpp         |  4 ++--
 src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp | 10 +++-------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 48243857a64..166ba0d1596 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -26,7 +26,7 @@
 namespace MathExtraSuperellipsoids {
 
 inline constexpr double TIKHONOV_SCALE =
-    1e-14;    // TODO: inline constexpr are C++17, which is Okay as of 10Sep2025 version of LAMMPS!
+    1e-14;   
 
 static constexpr int ITERMAX_NR = 100;
 static constexpr double TOL_NR_RES = 1e-10 * 1e-10;
@@ -586,7 +586,7 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
       } else if (norm > norm_old - PARAMETER_LS * a * norm_old) { // Armijo - Goldstein condition not met
         // Tested after convergence check because tiny values of norm and norm_old < TOL_NR
         // Can still fail the Armijo - Goldstein condition`
-        a *= CUTBACK_LS; // TODO: Golden-section search? Simple cutback strategy is crude and might miss low residual loci along the line search
+        a *= CUTBACK_LS;
       } else {
         // Only compute the jacobian if there is another Newton iteration to come
         double tmp_m[3][3];
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 8db45d3825d..52d76d2598e 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -196,13 +196,9 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
           } else {
             // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
 
-            // TODO: there might be better heuristic for the "volume equivalent spheres" suggested in the paper
-            //       but this is good enough. We might even be able to use radi and radj which is cheaper, TBD when testing
-            //       If we pick a small radius, we could guaranteed to start outise the grains, would that be better for the Newton?
-            //       If we pick a large radius (e.g. radi, radj) we are more likely to start inside the grains, is this an easier minimization landscape to navigate?
-            //       I don't think there is a general answer because we don't know the shape, and contact point may be far from spherical initial guess
-            //       This makes me think using radi and radj could be fine! To be investigated
-            //       MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
+            // but this is good enough. We might even be able to use radi and radj which is cheaper
+            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
 
             double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
             double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);

From dac73b20047ec773e9362d74a8c693ed95d96ee3 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 12 Mar 2026 16:25:53 +0100
Subject: [PATCH 132/174] Fixed residual scaling for polydisperse cases

---
 src/ASPHERE/math_extra_superellipsoids.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 166ba0d1596..21ac70ab9bf 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -346,8 +346,11 @@ double compute_residual(const double shapefunci, const double* gradi_global,
 
   // --- Spatial Normalization ---
   // Algebraic: Gradients are ~1/R. Dividing by lensq3 normalizes this.
+  // We take average of gradient for polydisperse case
   // Geometric: Gradients are unit vectors. lensq3 is 1.0. This works for both.
-  double spatial_norm = MathExtra::lensq3(residual) / MathExtra::lensq3(gradi_global);
+
+  double gradi_global_mag = 0.5 * (MathExtra::lensq3(gradi_global) + MathExtra::lensq3(gradj_global));
+  double spatial_norm = MathExtra::lensq3(residual) / gradi_global_mag;
 
   // --- Scalar Normalization ---
   double scalar_denom;

From 503c499efab20f387101a032b09c62a835164153 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Thu, 12 Mar 2026 16:20:33 -0600
Subject: [PATCH 133/174] Sketching out superellipsoid version of granular

---
 src/.gitignore                                |    2 +
 src/GRANULAR/pair_granular.cpp                |   22 +-
 src/GRANULAR/pair_granular.h                  |    1 -
 src/GRANULAR/pair_granular_superellipsoid.cpp | 1173 +++++++++++++++++
 src/GRANULAR/pair_granular_superellipsoid.h   |  125 ++
 5 files changed, 1312 insertions(+), 11 deletions(-)
 create mode 100644 src/GRANULAR/pair_granular_superellipsoid.cpp
 create mode 100644 src/GRANULAR/pair_granular_superellipsoid.h

diff --git a/src/.gitignore b/src/.gitignore
index 2f198273dc1..b6aa7954824 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -1340,6 +1340,8 @@
 /pair_gayberne.h
 /pair_granular.cpp
 /pair_granular.h
+/pair_granular_superellipsoid.cpp
+/pair_granular_superellipsoid.h
 /pair_gran_easy.cpp
 /pair_gran_easy.h
 /pair_gran_hertz_history.cpp
diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp
index d26f0dc2569..4a019699909 100644
--- a/src/GRANULAR/pair_granular.cpp
+++ b/src/GRANULAR/pair_granular.cpp
@@ -63,6 +63,9 @@ PairGranular::PairGranular(LAMMPS *lmp) : Pair(lmp)
   maxrad_dynamic = nullptr;
   maxrad_frozen = nullptr;
 
+  types_indices = nullptr;
+  cutoff_type = nullptr;
+
   // set comm size needed by this Pair if used with fix rigid
 
   comm_forward = 1;
@@ -110,13 +113,13 @@ PairGranular::~PairGranular()
 
 void PairGranular::compute(int eflag, int vflag)
 {
-  int i,j,k,ii,jj,inum,jnum,itype,jtype;
-  double factor_lj,mi,mj,meff;
+  int i, j, k, ii, jj, inum, jnum, itype, jtype;
+  double factor_lj, mi, mj, meff;
   double *forces, *torquesi, *torquesj, dq;
 
-  int *ilist,*jlist,*numneigh,**firstneigh;
-  int *touch,**firsttouch;
-  double *history,*allhistory,**firsthistory;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  int *touch, **firsttouch;
+  double *history, *allhistory, **firsthistory;
 
   bool touchflag = false;
   const bool history_update = update->setupflag == 0;
@@ -148,10 +151,10 @@ void PairGranular::compute(int eflag, int vflag)
     comm->forward_comm(this);
   }
 
+  int *type = atom->type;
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
-  int *type = atom->type;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
@@ -275,10 +278,9 @@ void PairGranular::compute(int eflag, int vflag)
         if (force->newton_pair || j < nlocal) heatflow[j] -= dq;
       }
 
-      if (evflag) {
-        ev_tally_xyz(i,j,nlocal,force->newton_pair,
-          0.0,0.0,forces[0],forces[1],forces[2],model->dx[0],model->dx[1],model->dx[2]);
-      }
+      if (evflag)
+        ev_tally_xyz(i, j, nlocal, force->newton_pair, 0.0, 0.0, forces[0], forces[1], forces[2],
+            model->dx[0], model->dx[1], model->dx[2]);
     }
   }
 }
diff --git a/src/GRANULAR/pair_granular.h b/src/GRANULAR/pair_granular.h
index 0be649a7b51..995d5b13d47 100644
--- a/src/GRANULAR/pair_granular.h
+++ b/src/GRANULAR/pair_granular.h
@@ -58,7 +58,6 @@ class PairGranular : public Pair {
   int neighprev;
   double *onerad_dynamic, *onerad_frozen;
   double *maxrad_dynamic, *maxrad_frozen;
-  double **cut;
 
   class FixDummy *fix_dummy;
   class FixNeighHistory *fix_history;
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
new file mode 100644
index 00000000000..84088587c9f
--- /dev/null
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -0,0 +1,1173 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#include "pair_granular_superellipsoid.h"
+
+#include "atom.h"
+#include "atom_vec_ellipsoid.h"
+#include "comm.h"
+#include "error.h"
+#include "fix.h"
+#include "fix_dummy.h"
+#include "fix_neigh_history.h"
+#include "force.h"
+#include "math_extra.h"
+#include "math_extra_superellipsoids.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+#include <iostream>
+
+using namespace LAMMPS_NS;
+using namespace MathExtra;
+
+enum { HOOKE, HERTZ };
+enum { MASS_VELOCITY };
+enum { LINEAR_HISTORY };
+
+static constexpr int NUMSTEP_INITIAL_GUESS = 5;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranularSuperellipsoid::PairGranularSuperellipsoid(LAMMPS *lmp) : Pair(lmp)
+{
+  single_enable = 1;
+  no_virial_fdotr_compute = 1;
+  centroidstressflag = CENTROID_NOTAVAIL;
+  finitecutflag = 1;
+
+  single_extra = 17;
+  svector = new double[single_extra];
+
+  // Currently only option, generalize if more added
+  size_history = 0;
+  nondefault_history_transfer = 0;
+
+  neighprev = 0;
+  nmax = 0;
+  mass_rigid = nullptr;
+
+  onerad_dynamic = nullptr;
+  onerad_frozen = nullptr;
+  maxrad_dynamic = nullptr;
+  maxrad_frozen = nullptr;
+
+  cutoff_type = nullptr;
+
+  limit_damping = nullptr;
+  normal_model = nullptr;
+  damping_model = nullptr;
+  tangential_model = nullptr;
+
+  kn = nullptr;
+  gamman = nullptr;
+  kt = nullptr;
+  xt = nullptr;
+  xmu = nullptr;
+
+  // set comm size needed by this Pair if used with fix rigid
+
+  comm_forward = 1;
+
+  size_history = 0;
+  beyond_contact = 0;
+  nondefault_history_transfer = 0;
+  heat_flag = 0;
+
+  // create dummy fix as placeholder for FixNeighHistory
+  // this is so final order of Modify:fix will conform to input script
+
+  fix_history = nullptr;
+  fix_dummy = dynamic_cast<FixDummy *>(modify->add_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY all DUMMY"));
+
+  contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairGranularSuperellipsoid::~PairGranularSuperellipsoid()
+{
+  delete[] svector;
+
+  if (!fix_history) modify->delete_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY");
+  else modify->delete_fix("NEIGH_HISTORY_GRANULAR_SE");
+
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+    memory->destroy(cutoff_type);
+    memory->destroy(limit_damping);
+    memory->destroy(normal_model);
+    memory->destroy(damping_model);
+    memory->destroy(tangential_model);
+    memory->destroy(kn);
+    memory->destroy(gamman);
+    memory->destroy(kt);
+    memory->destroy(xt);
+    memory->destroy(xmu);
+
+    // model variables
+
+    delete[] onerad_dynamic;
+    delete[] onerad_frozen;
+    delete[] maxrad_dynamic;
+    delete[] maxrad_frozen;
+  }
+
+  memory->destroy(mass_rigid);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::compute(int eflag, int vflag)
+{
+  int i, j, k, ii, jj, inum, jnum;
+  double factor_lj, mi, mj, meff;
+
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  int *touch, **firsttouch;
+  double *history, *allhistory, **firsthistory;
+
+  bool touchflag = false;
+  const bool history_update = update->setupflag == 0;
+
+  ev_init(eflag, vflag);
+
+  // update rigid body info for owned & ghost atoms if using FixRigid masses
+  // body[i] = which body atom I is in, -1 if none
+  // mass_body = mass of each rigid body
+
+  if (fix_rigid && neighbor->ago == 0) {
+    int tmp;
+    int *body = (int *) fix_rigid->extract("body", tmp);
+    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
+    if (atom->nmax > nmax) {
+      memory->destroy(mass_rigid);
+      nmax = atom->nmax;
+      memory->create(mass_rigid, nmax, "pair:mass_rigid");
+    }
+    int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++)
+      if (body[i] >= 0)
+        mass_rigid[i] = mass_body[body[i]];
+      else
+        mass_rigid[i] = 0.0;
+    comm->forward_comm(this);
+  }
+
+  tagint *tag = atom->tag;
+  int *type = atom->type;
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **angmom = atom->angmom;
+  double **torque = atom->torque;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+  double *special_lj = force->special_lj;
+
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = fix_history->firstflag;
+  firsthistory = fix_history->firstvalue;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    itype = type[i];
+
+    touch = firsttouch[i];
+    allhistory = firsthistory[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      if (factor_lj == 0) continue;
+
+      jtype = type[j];
+
+      // Reset model and copy initial geometric data
+
+      xi = x[i];
+      xj = x[j];
+      radi = radius[i];
+      radj = radius[j];
+      itype = itype;
+      jtype = jtype;
+      shear = &allhistory[size_history * jj];
+      X0_prev = &allhistory[3 + size_history * jj];
+      separating_axis = &allhistory[7 + size_history * jj];
+      int indx_ref = (tag[i] < tag[j]) ? i : j;
+      xref = x[indx_ref];
+      tagi = tag[i];
+      tagj = tag[j];
+      flagi = bonus[ellipsoid[i]].type;
+      flagj = bonus[ellipsoid[j]].type;
+
+      MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei0);
+      MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej0);
+      MathExtra::copy3(bonus[ellipsoid[i]].block, blocki0);
+      MathExtra::copy3(bonus[ellipsoid[j]].block, blockj0);
+      MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+      MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+      MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+      MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+      MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+
+      touchjj = touch[jj];
+
+      touchflag = check_contact();
+
+      if (!touchflag) {
+        // unset non-touching neighbors
+        touch[jj] = 0;
+        history = &allhistory[size_history * jj];
+        for (k = 0; k < size_history; k++) history[k] = 0.0;
+        continue;
+      }
+
+      touch[jj] = 1;
+
+      // meff = effective mass of pair of particles
+      // if I or J part of rigid body, use body mass
+      // if I or J is frozen, meff is other particle
+      mi = rmass[i];
+      mj = rmass[j];
+      if (fix_rigid) {
+        if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+        if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+      }
+      meff = mi * mj / (mi + mj);
+      if (mask[i] & freeze_group_bit) meff = mj;
+      if (mask[j] & freeze_group_bit) meff = mi;
+
+      // Copy additional information and prepare force calculations
+
+      vi = v[i];
+      vj = v[j];
+      angmomi = angmom[i];
+      angmomj = angmom[j];
+      quati = bonus[ellipsoid[i]].quat;
+      quatj = bonus[ellipsoid[j]].quat;
+      inertiai = bonus[ellipsoid[i]].inertia;
+      inertiaj = bonus[ellipsoid[j]].inertia;
+
+      calculate_forces();
+
+      // apply forces & torques
+      scale3(factor_lj, forces);
+      add3(f[i], forces, f[i]);
+
+      scale3(factor_lj, torquesi);
+      add3(torque[i], torquesi, torque[i]);
+
+      if (force->newton_pair || j < nlocal) {
+        sub3(f[j], forces, f[j]);
+        scale3(factor_lj, torquesj);
+        add3(torque[j], torquesj, torque[j]);
+      }
+
+      if (evflag)
+        ev_tally_xyz(i, j, nlocal, force->newton_pair, 0.0, 0.0, forces[0], forces[1], forces[2],
+            dx[0], dx[1], dx[2]); // Correct even for non-spherical particles
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag, n + 1, n + 1, "pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  memory->create(cutsq,n+1,n+1,"pair:cutsq");
+  memory->create(cutoff_type,n+1,n+1,"pair:cutoff_type");
+
+  memory->create(limit_damping,n+1,n+1,"pair:limit_damping");
+  memory->create(normal_model,n+1,n+1,"pair:normal_model");
+  memory->create(damping_model,n+1,n+1,"pair:damping_model");
+  memory->create(tangential_model,n+1,n+1,"pair:tangential_model");
+
+  memory->create(kn,n+1,n+1,"pair:kn");
+  memory->create(gamman,n+1,n+1,"pair:gamman");
+  memory->create(kt,n+1,n+1,"pair:kt");
+  memory->create(xt,n+1,n+1,"pair:xt");
+  memory->create(xmu,n+1,n+1,"pair:xmu");
+
+  onerad_dynamic = new double[n + 1];
+  onerad_frozen = new double[n + 1];
+  maxrad_dynamic = new double[n + 1];
+  maxrad_frozen = new double[n + 1];
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::settings(int narg, char **arg)
+{
+  if (narg == 1) {
+    cutoff_global = utils::numeric(FLERR,arg[0],false,lmp);
+  } else {
+    cutoff_global = -1; // will be set based on particle sizes, model choice
+  }
+
+  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
+
+  for (int iarg = 1; iarg < narg; iarg++) {
+    if (strcmp(arg[iarg], "bounding_box") == 0)
+      bounding_box = 1;
+    else if (strcmp(arg[iarg], "geometric") == 0)
+      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+    else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
+      curvature_model = MathExtraSuperellipsoids::CURV_GAUSSIAN;
+    else
+      error->all(FLERR, "Illegal pair_style command");
+  }
+
+  if (bounding_box == 0) size_history--;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::coeff(int narg, char **arg)
+{
+  double cutoff_one = -1;
+
+  if (narg < 3)
+    error->all(FLERR,"Incorrect args for pair coefficients" + utils::errorurl(21));
+
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error);
+  utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error);
+
+  int normal_one, damping_one, tangential_one, limit_one;
+  double kn_one, gamman_one, kt_one, xt_one, xmu_one;
+
+  int iarg = 2;
+  if (strcmp(arg[iarg], "hooke") == 0) {
+    normal_one = HOOKE;
+    if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "pair granular/superellipsoid", error);
+    kn_one = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+    gamman_one = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+    if (kn_one < 0.0 || gamman_one < 0.0) error->all(FLERR, "Illegal linear normal model");
+    iarg += 3;
+  } else if (strcmp(arg[iarg], "hertz") == 0) {
+    normal_one = HOOKE;
+    if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "pair granular/superellipsoid", error);
+    kn_one = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+    gamman_one = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+    if (kn_one < 0.0 || gamman_one < 0.0) error->all(FLERR, "Illegal linear normal model");
+    iarg += 3;
+  } else {
+    error->all(FLERR, "Unknown normal model {}", arg[iarg]);
+  }
+
+  damping_one = -1;
+
+
+  // convert Kn and Kt from pressure units to force/distance^2
+
+  kn_one /= force->nktv2p; // TODO revert if updating normal force model
+  kt_one /= force->nktv2p;
+
+  //Parse optional arguments
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "tangential") == 0) {
+      if (strcmp(arg[iarg], "linear_history") == 0) {
+        tangential_one = LINEAR_HISTORY;
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "pair granular/superellipsoid", error);
+        kt_one = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+        xt_one = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        xmu_one = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+        if (kt_one < 0.0 || xt_one < 0.0 || xmu_one < 0.0) error->all(FLERR, "Illegal linear tangential model");
+        iarg += 4;
+      } else {
+        error->all(FLERR, "Unknown normal model {}", arg[iarg]);
+      }
+    } else if (strcmp(arg[iarg], "damping") == 0) {
+      if (strcmp(arg[iarg], "mass_velocity") == 0) {
+        damping_one = MASS_VELOCITY;
+        iarg += 1;
+      } else {
+        error->all(FLERR, "Unknown normal model {}", arg[iarg]);
+      }
+    } else if (strcmp(arg[iarg], "rolling") == 0) {
+      error->all(FLERR, "Rolling models not yet implemented for superellipsoids");
+    } else if (strcmp(arg[iarg], "twisting") == 0) {
+      error->all(FLERR, "Twisting models not yet implemented for superellipsoids");
+    } else if (strcmp(arg[iarg], "heat") == 0) {
+      error->all(FLERR, "Heat models not yet implemented for superellipsoids");
+      heat_flag = 1;
+    } else if (strcmp(arg[iarg], "cutoff") == 0) {
+      if (iarg + 1 >= narg)
+        error->all(FLERR, "Illegal pair_coeff command, not enough parameters for cutoff keyword");
+      cutoff_one = utils::numeric(FLERR,arg[iarg + 1],false,lmp);
+      iarg += 2;
+    } else if (strcmp(arg[iarg], "limit_damping") == 0) {
+      limit_one = 1;
+      iarg += 1;
+    } else error->all(FLERR, "Illegal pair_coeff command {}", arg[iarg]);
+  }
+
+  // Define default damping sub model if unspecified, has no coeffs
+  if (damping_one == -1)
+    damping_one = MASS_VELOCITY; // default in pair granular is VISCOELASTIC
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      cutoff_type[i][j] = cutoff_type[j][i] = cutoff_one;
+      limit_damping[i][j] = limit_damping[j][i] = limit_one;
+
+      normal_model[i][j] = normal_model[j][i] = normal_one;
+      damping_model[i][j] = damping_model[j][i] = damping_one;
+      tangential_model[i][j] = tangential_model[j][i] = tangential_one;
+
+      kn[i][j] = kn[j][i] = kn_one;
+      gamman[i][j] = gamman[j][i] = gamman_one;
+
+      kt[i][j] = kt[j][i] = kt_one;
+      xt[i][j] = xt[j][i] = xt_one;
+      xmu[i][j] = xmu[j][i] = xmu_one;
+
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients" + utils::errorurl(21));
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::init_style()
+{
+  int i;
+
+  // error and warning checks
+
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
+    error->all(FLERR, "Pair granular/superellipsoid requires atom attributes radius, rmass, "
+               "angmom and superellipsoid flag");
+  if (comm->ghost_velocity == 0)
+    error->all(FLERR, "Pair granular/superellipsoid requires ghost atoms store velocity");
+
+  if (heat_flag) {
+    if (!atom->temperature_flag)
+      error->all(FLERR,"Heat conduction in pair granular/superellipsoid requires atom style with temperature property");
+    if (!atom->heatflow_flag)
+      error->all(FLERR,"Heat conduction in pair granular/superellipsoid requires atom style with heatflow property");
+  }
+
+  for (i = 0; i < atom->nlocal; i++)
+    if (atom->ellipsoid[i] < 0)
+      error->one(FLERR, "Pair granular/superellipsoid requires all atoms are ellipsoids");
+
+  // need a granular neighbor list
+
+  neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_HISTORY);
+
+  dt = update->dt;
+
+  // if history is stored and first init, create Fix to store history
+  // it replaces FixDummy, created in the constructor
+  // this is so its order in the fix list is preserved
+
+  if (fix_history == nullptr) {
+    fix_history = dynamic_cast<FixNeighHistory *>(modify->replace_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY",
+                                                          "NEIGH_HISTORY_GRANULAR_SE"
+                                                          " all NEIGH_HISTORY "
+                                                          + std::to_string(size_history),1));
+    fix_history->pair = this;
+  } else {
+    fix_history = dynamic_cast<FixNeighHistory *>(modify->get_fix_by_id("NEIGH_HISTORY_GRANULAR_SE"));
+    if (!fix_history) error->all(FLERR,"Could not find pair fix neigh history ID");
+  }
+
+  // check for FixFreeze and set freeze_group_bit
+
+  auto fixlist = modify->get_fix_by_style("^freeze");
+  if (fixlist.size() == 0)
+    freeze_group_bit = 0;
+  else if (fixlist.size() > 1)
+    error->all(FLERR, "Only one fix freeze command at a time allowed");
+  else
+    freeze_group_bit = fixlist.front()->groupbit;
+
+  // check for FixRigid so can extract rigid body masses
+
+  fix_rigid = nullptr;
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (ifix->rigid_flag) {
+      if (fix_rigid)
+        error->all(FLERR, "Only one fix rigid command at a time allowed");
+      else fix_rigid = ifix;
+    }
+  }
+
+  // check for FixPour and FixDeposit so can extract particle radii
+
+  auto pours = modify->get_fix_by_style("^pour");
+  auto deps = modify->get_fix_by_style("^deposit");
+
+  // set maxrad_dynamic and maxrad_frozen for each type
+  // include future FixPour and FixDeposit particles as dynamic
+
+  int itype;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
+    for (auto &ipour : pours) {
+      itype = i;
+      double maxrad = *((double *) ipour->extract("radius", itype));
+      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
+    }
+    for (auto &idep : deps) {
+      itype = i;
+      double maxrad = *((double *) idep->extract("radius", itype));
+      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
+    }
+  }
+
+  double *radius = atom->radius;
+  int *mask = atom->mask;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (mask[i] & freeze_group_bit)
+      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]], radius[i]);
+    else
+      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
+  }
+
+  MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,MPI_DOUBLE,MPI_MAX,world);
+  MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,MPI_DOUBLE,MPI_MAX,world);
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairGranularSuperellipsoid::init_one(int i, int j)
+{
+  double cutoff = 0.0;
+
+  if (setflag[i][j] == 0) {
+
+    limit_damping[i][j] = MAX(limit_damping[i][i],limit_damping[j][j]);
+
+    if (normal_model[i][i] != normal_model[j][j] ||
+        tangential_model[i][i] != tangential_model[j][j] ||
+        damping_model[i][i] != damping_model[j][j])
+      error->all(FLERR,"Granular pair style functional forms are different, "
+                 "cannot mix coefficients for types {} and {}.\n"
+                 "This combination must be set explicitly via a "
+                 "pair_coeff command",i,j);
+
+    kn[i][j] = mix_geom(kn[i][i], kn[j][j]);
+    gamman[i][j] = mix_geom(gamman[i][i], gamman[j][j]);
+    kt[i][j] = mix_geom(kt[i][i], kt[j][j]);
+    xt[i][j] = mix_geom(xt[i][i], xt[j][j]);
+    xmu[i][j] = mix_geom(xmu[i][i], xmu[j][j]);
+
+    cutoff_type[i][j] = cutoff_type[j][i] = MAX(cutoff_type[i][i], cutoff_type[j][j]);
+  }
+
+  // It is possible that cut[i][j] at this point is still 0.0.
+  // This can happen when
+  // there is a future fix_pour after the current run. A cut[i][j] = 0.0 creates
+  // problems because neighbor.cpp uses min(cut[i][j]) to decide on the bin size
+  // To avoid this issue, for cases involving  cut[i][j] = 0.0 (possible only
+  // if there is no current information about radius/cutoff of type i and j).
+  // we assign cutoff = max(cut[i][j]) for i,j such that cut[i][j] > 0.0.
+
+  if (cutoff_type[i][j] < 0 && cutoff_global < 0) {
+    if (((maxrad_dynamic[i] > 0.0) && (maxrad_dynamic[j] > 0.0)) ||
+        ((maxrad_dynamic[i] > 0.0) &&  (maxrad_frozen[j] > 0.0)) ||
+        // radius info about both i and j exist
+        ((maxrad_frozen[i] > 0.0)  && (maxrad_dynamic[j] > 0.0))) {
+      cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
+      cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
+      cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
+    } else {
+      // radius info about either i or j does not exist
+      // (i.e. not present and not about to get poured;
+      // set to largest value to not interfere with neighbor list)
+
+      double cutmax = 0.0;
+      for (int k = 1; k <= atom->ntypes; k++) {
+        cutmax = MAX(cutmax,2.0*maxrad_dynamic[k]);
+        cutmax = MAX(cutmax,2.0*maxrad_frozen[k]);
+      }
+      cutoff = cutmax;
+    }
+  } else if (cutoff_type[i][j] > 0) {
+    cutoff = cutoff_type[i][j];
+  } else if (cutoff_global > 0) {
+    cutoff = cutoff_global;
+  }
+
+  dt = update->dt;
+  return cutoff;
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::write_restart(FILE *fp)
+{
+  int i, j;
+  for (i = 1; i <= atom->ntypes; i++) {
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+        fwrite(&cutoff_type[i][j],sizeof(double),1,fp);
+        fwrite(&limit_damping[i][j],sizeof(int),1,fp);
+        fwrite(&normal_model[i][j],sizeof(int),1,fp);
+        fwrite(&tangential_model[i][j],sizeof(int),1,fp);
+        fwrite(&damping_model[i][j],sizeof(int),1,fp);
+
+        fwrite(&kn[i][j],sizeof(double),1,fp);
+        fwrite(&gamman[i][j],sizeof(double),1,fp);
+        fwrite(&kt[i][j],sizeof(double),1,fp);
+        fwrite(&xt[i][j],sizeof(double),1,fp);
+        fwrite(&xmu[i][j],sizeof(double),1,fp);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::read_restart(FILE *fp)
+{
+  allocate();
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++) {
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,nullptr,error);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+        if (me == 0) {
+          utils::sfread(FLERR,&cutoff_type[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&limit_damping[i][j],sizeof(int),1,fp,nullptr,error);
+          utils::sfread(FLERR,&normal_model[i][j],sizeof(int),1,fp,nullptr,error);
+          utils::sfread(FLERR,&tangential_model[i][j],sizeof(int),1,fp,nullptr,error);
+          utils::sfread(FLERR,&damping_model[i][j],sizeof(int),1,fp,nullptr,error);
+
+          utils::sfread(FLERR,&kn[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&gamman[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&kt[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&xt[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&xmu[i][j],sizeof(double),1,fp,nullptr,error);
+        }
+        MPI_Bcast(&cutoff_type[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&limit_damping[i][j],1,MPI_INT,0,world);
+        MPI_Bcast(&normal_model[i][j],1,MPI_INT,0,world);
+        MPI_Bcast(&tangential_model[i][j],1,MPI_INT,0,world);
+        MPI_Bcast(&damping_model[i][j],1,MPI_INT,0,world);
+
+        MPI_Bcast(&kn[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&gamman[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&kt[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&xt[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&xmu[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::reset_dt()
+{
+  dt = update->dt;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranularSuperellipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
+                                             double /*factor_coul*/, double factor_lj,
+                                             double &fforce)
+{
+  if (factor_lj == 0) {
+    fforce = 0.0;
+    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+    return 0.0;
+  }
+
+  int nall = atom->nlocal + atom->nghost;
+  if ((i >= nall) || (j >= nall))
+    error->all(FLERR,"Not enough atoms for pair granular single function");
+
+  // Reset model and copy initial geometric data
+
+  // If history is needed
+  double *history,*allhistory;
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+
+  if ((fix_history == nullptr) || (fix_history->firstvalue == nullptr))
+    error->one(FLERR,"Pair granular single computation needs history");
+  allhistory = fix_history->firstvalue[i];
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+  touchjj = fix_history->firstflag[i][neighprev];
+
+  xi = atom->x[i];
+  xj = atom->x[j];
+  radi = atom->radius[i];
+  radj = atom->radius[j];
+  itype = itype;
+  jtype = jtype;
+  shear = &allhistory[size_history * neighprev];
+  X0_prev = &allhistory[3 + size_history * neighprev];
+  separating_axis = &allhistory[7 + size_history * neighprev];
+  int indx_ref = (atom->tag[i] < atom->tag[j]) ? i : j;
+  xref = atom->x[indx_ref];
+  tagi = atom->tag[i];
+  tagj = atom->tag[j];
+  history_update = 0; // Don't update history
+
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+
+  flagi = bonus[ellipsoid[i]].type;
+  flagj = bonus[ellipsoid[j]].type;
+
+  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei0);
+  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej0);
+  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki0);
+  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj0);
+  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+
+  int touchflag = check_contact();
+
+  if (!touchflag) {
+    fforce = 0.0;
+    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+    return 0.0;
+  }
+
+  // meff = effective mass of pair of particles
+  // if I or J part of rigid body, use body mass
+  // if I or J is frozen, meff is other particle
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+
+  double mi = rmass[i];
+  double mj = rmass[j];
+  if (fix_rigid) {
+    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+  }
+  meff = mi * mj / (mi + mj);
+  if (mask[i] & freeze_group_bit) meff = mj;
+  if (mask[j] & freeze_group_bit) meff = mi;
+
+  // Copy additional information and calculate forces
+
+  vi = atom->v[i];
+  vj = atom->v[j];
+  angmomi = atom->angmom[i];
+  angmomj = atom->angmom[j];
+  quati = bonus[ellipsoid[i]].quat;
+  quatj = bonus[ellipsoid[j]].quat;
+  inertiai = bonus[ellipsoid[i]].inertia;
+  inertiaj = bonus[ellipsoid[j]].inertia;
+
+  calculate_forces();
+
+  // set single_extra quantities
+  svector[0] = fs[0];
+  svector[1] = fs[1];
+  svector[2] = fs[2];
+  svector[3] = MathExtra::len3(fs);
+  svector[4] = 0.0;
+  svector[5] = 0.0;
+  svector[6] = 0.0;
+  svector[7] = 0.0;
+  svector[8] = 0.0;
+  svector[9] = dx[0];
+  svector[10] = dx[1];
+  svector[11] = dx[2];
+
+  // Superellipsoid specific values - were these included?
+
+  svector[12] = 0.0; //contact_point_and_Lagrange_multiplier[0]
+  svector[13] = 0.0; //contact_point_and_Lagrange_multiplier[1]
+  svector[14] = 0.0; //contact_point_and_Lagrange_multiplier[2]
+  svector[15] = 0.0; //contact_point_and_Lagrange_multiplier[3]
+  svector[16] = 0.0; //bounding_box_separating_axis_index
+
+  return 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairGranularSuperellipsoid::pack_forward_comm(int n, int *list, double *buf,
+                                                     int /*pbc_flag*/, int * /*pbc*/)
+{
+  int i, j, m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = mass_rigid[j];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::unpack_forward_comm(int n, int first, double *buf)
+{
+  int i, m, last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) mass_rigid[i] = buf[m++];
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+double PairGranularSuperellipsoid::memory_usage()
+{
+  double bytes = (double) nmax * sizeof(double);
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranularSuperellipsoid::mix_geom(double val1, double val2)
+{
+  return sqrt(val1 * val2);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranularSuperellipsoid::mix_mean(double val1, double val2)
+{
+  return 0.5 * (val1 + val2);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairGranularSuperellipsoid::check_contact()
+{
+  bool touching;
+  if (rsq >= radsum * radsum) {
+    touching = false;
+  } else {
+    bool skip_contact_detection(false);
+    if (bounding_box) {
+      skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+          xi, Ri, shapei, xj, Rj, shapej, separating_axis);
+    }
+    if (skip_contact_detection)
+      touching = false;
+    else {
+      // superellipsoid contact detection between atoms i and j
+      if (touchjj == 1) {
+        // Continued contact: use grain true shape and last contact point with respect to grain i
+        X0[0] = xref[0] + X0_prev[0];
+        X0[1] = xref[1] + X0_prev[1];
+        X0[2] = xref[2] + X0_prev[2];
+        X0[3] = X0_prev[3];
+        // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
+        //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
+        int status = MathExtraSuperellipsoids::determine_contact_point(
+            xi, Ri, shapei, blocki, flagi, xj, Rj, shapej, blockj, flagj, X0, nij, contact_formulation);
+        if (status == 0)
+          touching = true;
+        else if (status == 1)
+          touching = false;
+        else {
+          error->warning(FLERR,
+                         "Ellipsoid contact detection (old contact) failed "
+                         "between particle {} and particle {} ",
+                         tagi, tagj);
+        }
+      } else {
+        // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
+
+        // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
+        // but this is good enough. We might even be able to use radi and radj which is cheaper
+        // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+
+        double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+        double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+        MathExtra::scaleadd3(reqj / (reqi + reqj), xi, reqi / (reqi + reqj), xj, X0);
+        X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+        for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+          double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+          shapei[0] = shapei[1] = shapei[2] = reqi;
+          shapej[0] = shapej[1] = shapej[2] = reqj;
+          MathExtra::scaleadd3(1.0 - frac, shapei, frac, shapei0, shapei);
+          MathExtra::scaleadd3(1.0 - frac, shapej, frac, shapej0, shapej);
+          blocki[0] = 2.0 + frac * (blocki0[0] - 2.0);
+          blocki[1] = 2.0 + frac * (blocki0[1] - 2.0);
+          blockj[0] = 2.0 + frac * (blockj0[0] - 2.0);
+          blockj[1] = 2.0 + frac * (blockj0[1] - 2.0);
+
+          // force ellipsoid flag for first initial guess iteration.
+          // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+          int status = MathExtraSuperellipsoids::determine_contact_point(
+              xi, Ri, shapei, blocki,
+              iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, xj, Rj, shapej,
+              blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
+              contact_formulation);
+
+          if (status == 0)
+            touching = true;
+          else if (status == 1)
+            touching = false;
+          else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
+            // keep trying until last iteration to avoid erroring out too early
+            error->warning(FLERR,
+                           "Ellipsoid contact detection (new contact) failed"
+                           "between particle {} and particle {}",
+                           tagi, tagj);
+          }
+        }
+      }
+    }
+  }
+
+  return touching;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::calculate_forces()
+{
+  // Store contact point with respect to grain i for next time step
+  // This is crucial for periodic BCs when grains can move by large amount in one time step
+  // Keeping the previous contact point relative to global frame would lead to bad initial guess
+  X0_prev[0] = X0[0] - xref[0];
+  X0_prev[1] = X0[1] - xref[1];
+  X0_prev[2] = X0[2] - xref[2];
+  X0_prev[3] = X0[3];
+
+  double nji[3] = {-nij[0], -nij[1], -nij[2]};
+  // compute overlap depth along normal direction for each grain
+  // overlap is positive for both grains
+  double overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, xi);
+  double overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, xj);
+
+  // branch vectors
+  double cr1[3], cr2[3];
+  MathExtra::sub3(X0, xi, cr1);
+  MathExtra::sub3(X0, xj, cr2);
+
+  // we need to take the cross product of omega
+
+  double ex_space[3], ey_space[3], ez_space[3], omegai[3], omegaj[3];
+  MathExtra::q_to_exyz(quati, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmomi, ex_space, ey_space, ez_space,
+                             inertiai, omegai);
+  MathExtra::q_to_exyz(quatj, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmomj, ex_space, ey_space, ez_space,
+                             inertiaj, omegaj);
+
+  double omega_cross_r1[3], omega_cross_r2[3];
+  MathExtra::cross3(omegai, cr1, omega_cross_r1);
+  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+  // relative translational velocity
+  // compute directly the sum of relative translational velocity at contact point
+  // since rotational velocity contribution is different for superellipsoids
+  double cv1[3], cv2[3];
+
+  cv1[0] = vi[0] + omega_cross_r1[0];
+  cv1[1] = vi[1] + omega_cross_r1[1];
+  cv1[2] = vi[2] + omega_cross_r1[2];
+
+  cv2[0] = vj[0] + omega_cross_r2[0];
+  cv2[1] = vj[1] + omega_cross_r2[1];
+  cv2[2] = vj[2] + omega_cross_r2[2];
+
+  // total relavtive velocity at contact point
+  double vr1 = cv1[0] - cv2[0];
+  double vr2 = cv1[1] - cv2[1];
+  double vr3 = cv1[2] - cv2[2];
+
+  // normal component
+
+  double vn1 = nij[0] * vr1;    // dot product
+  double vn2 = nij[1] * vr2;
+  double vn3 = nij[2] * vr3;
+
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
+
+  // tangential component
+
+  double vtr1 = vr1 - vnnr * nij[0];
+  double vtr2 = vr2 - vnnr * nij[1];
+  double vtr3 = vr3 - vnnr * nij[2];
+
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
+
+  // normal forces = elastic contact + normal velocity damping
+
+  double damp = meff * gamman[itype][jtype] * vnnr;
+  double ccel = kn[itype][jtype] * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+
+  double polyhertz;
+  if (normal_model[itype][jtype] == HERTZ) {
+    double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
+    MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
+    MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
+
+    if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
+      curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+          shapei, blocki, flagi, Ri, surf_point_i, xi);
+      curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+          shapej, blockj, flagj, Rj, surf_point_j, xj);
+    } else {
+      curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+          shapei, blocki, flagi, Ri, surf_point_i, xi);
+      curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+          shapej, blockj, flagj, Rj, surf_point_j, xj);
+    }
+
+    // hertzian contact radius approximation
+    polyhertz = sqrt((overlap1 + overlap2) / (curvature_i + curvature_j));
+    ccel *= polyhertz;
+  }
+
+  if (limit_damping[itype][jtype] && (ccel < 0.0)) ccel = 0.0;
+
+  // shear history effects
+
+  if (history_update) {
+    shear[0] += vtr1 * dt;
+    shear[1] += vtr2 * dt;
+    shear[2] += vtr3 * dt;
+  }
+  double shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+  if (history_update) {
+
+    // rotate shear displacements
+
+    double rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
+    shear[0] -= rsht * nij[0];
+    shear[1] -= rsht * nij[1];
+    shear[2] -= rsht * nij[2];
+  }
+
+  // tangential forces = shear + tangential velocity damping
+
+  double gammat = xt[itype][jtype] * gamman[itype][jtype];
+  double fs1 = -(kt[itype][jtype] * shear[0] + meff * gammat * vtr1);
+  double fs2 = -(kt[itype][jtype] * shear[1] + meff * gammat * vtr2);
+  double fs3 = -(kt[itype][jtype] * shear[2] + meff * gammat * vtr3);
+
+  if (normal_model[itype][jtype] == HERTZ) {
+    fs1 *= polyhertz;
+    fs2 *= polyhertz;
+    fs3 *= polyhertz;
+  }
+
+  // rescale frictional displacements and forces if needed
+
+  double fs_mag = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+  double fn = xmu[itype][jtype] * fabs(ccel);
+
+  if (fs_mag > fn) {
+    if (shrmag != 0.0) {
+      shear[0] =
+          (fn / fs_mag) * (shear[0] + meff * gammat * vtr1 / kt[itype][jtype]) - meff * gammat * vtr1 / kt[itype][jtype];
+      shear[1] =
+          (fn / fs_mag) * (shear[1] + meff * gammat * vtr2 / kt[itype][jtype]) - meff * gammat * vtr2 / kt[itype][jtype];
+      shear[2] =
+          (fn / fs_mag) * (shear[2] + meff * gammat * vtr3 / kt[itype][jtype]) - meff * gammat * vtr3 / kt[itype][jtype];
+      fs1 *= fn / fs_mag;
+      fs2 *= fn / fs_mag;
+      fs3 *= fn / fs_mag;
+    } else
+      fs1 = fs2 = fs3 = 0.0;
+  }
+
+  // forces & torques
+
+  forces[0] = nji[0] * ccel + fs1;
+  forces[1] = nji[1] * ccel + fs2;
+  forces[2] = nji[2] * ccel + fs3;
+
+  torquesi[0] = cr1[1] * forces[2] - cr1[2] * forces[1];
+  torquesi[1] = cr1[2] * forces[0] - cr1[0] * forces[2];
+  torquesi[2] = cr1[0] * forces[1] - cr1[1] * forces[0];
+
+  torquesj[0] = -cr2[1] * forces[2] - cr2[2] * forces[1];
+  torquesj[1] = -cr2[2] * forces[0] - cr2[0] * forces[2];
+  torquesj[2] = -cr2[0] * forces[1] - cr2[1] * forces[0];
+}
diff --git a/src/GRANULAR/pair_granular_superellipsoid.h b/src/GRANULAR/pair_granular_superellipsoid.h
new file mode 100644
index 00000000000..60ef047e74e
--- /dev/null
+++ b/src/GRANULAR/pair_granular_superellipsoid.h
@@ -0,0 +1,125 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(granular/superellipsoid,PairGranularSuperellipsoid);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_GRANULAR_SUPERELLIPSOID_H
+#define LMP_PAIR_GRANULAR_SUPERELLIPSOID_H
+
+#include "pair.h"
+
+#include "atom_vec_ellipsoid.h"
+
+namespace LAMMPS_NS {
+
+class PairGranularSuperellipsoid : public Pair {
+ public:
+  PairGranularSuperellipsoid(class LAMMPS *);
+  ~PairGranularSuperellipsoid() override;
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void reset_dt() override;
+  double single(int, int, int, int, double, double, double, double &) override;
+  int pack_forward_comm(int, int *, double *, int, int *) override;
+  void unpack_forward_comm(int, int, double *) override;
+  double memory_usage() override;
+
+ protected:
+  int freeze_group_bit;
+
+  int neighprev;
+  double *onerad_dynamic, *onerad_frozen;
+  double *maxrad_dynamic, *maxrad_frozen;
+
+  class FixDummy *fix_dummy;
+  class FixNeighHistory *fix_history;
+
+  // storage of rigid body masses for use in granular interactions
+
+  class Fix *fix_rigid;    // ptr to rigid body fix, null pointer if none
+  double *mass_rigid;      // rigid mass for owned+ghost atoms
+  int nmax;                // allocated size of mass_rigid
+
+  // Model variables
+  double dt;
+  int **normal_model;
+  int **damping_model;
+  int **tangential_model;
+  int **limit_damping;
+
+  // Normal coefficients
+  double **kn, **gamman;     // Hooke + Hertz
+
+  // Tangential coefficients
+  double **kt, **xt, **xmu;  // linear_history
+
+  // Intermediate values for contact model
+  int history_update, touchjj, itype, jtype;
+  double Fnormal, forces[3], torquesi[3], torquesj[3];
+  double radi, radj, meff, Fntot;
+  double *xi, *xj, *vi, *vj;
+  double fs[3], ft[3];
+  double dx[3], nx[3], r, rsq, rinv, Reff, radsum, delta, dR;
+  double vr[3], vn[3], vnnr, vt[3], wr[3], vtr[3], vrel;
+
+  double *quati, *quatj, *angmomi, *angmomj, *inertiai, *inertiaj;
+  double X0[4], nij[3], Ri[3][3], Rj[3][3];
+  double shapei0[3], blocki0[3], shapej0[3], blockj0[3], shapei[3], blocki[3], shapej[3], blockj[3];
+  double *shear, *X0_prev, *separating_axis, *xref;
+  AtomVecEllipsoid::BlockType flagi, flagj;
+  tagint tagi, tagj;
+
+  void allocate();
+  double mix_geom(double, double);
+  double mix_mean(double, double);
+  int check_contact();
+  void calculate_forces();
+
+ private:
+  int size_history;
+  int heat_flag;
+
+  // optional user-specified global cutoff, per-type user-specified cutoffs
+  double **cutoff_type;
+  double cutoff_global;
+  int contact_formulation;
+  int bounding_box;
+  int curvature_model;
+
+  int extra_svector;
+
+  // Below not implemented. Placeholder if we decide not to compute local hessian in line search
+  static double
+  shape_and_gradient_local(const double *, const double *, const double *,
+                           double *);    // would return a vector of temporary variables
+  static double hessian_local(
+      const double *, const double *, const double *,
+      double *);    // would use the above vector of temporary variables to compute local hessian
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif

From 35a6d53cd69a37e6f9a163054d1a008ff0b65e96 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Fri, 13 Mar 2026 09:58:04 +0100
Subject: [PATCH 134/174] Fixed minor bugs in torque and history for the new
 pair style

---
 .../pair_gran_hooke_history_ellipsoid.cpp     |  2 +-
 src/GRANULAR/pair_granular_superellipsoid.cpp | 31 +++++++++++++------
 src/GRANULAR/pair_granular_superellipsoid.h   |  3 +-
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index a4473c339bb..b1696703d50 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -1076,7 +1076,7 @@ void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *tar
 {
   // Simple direct copy of all history variables (shear, contact point, axis)
   for (int i = 0; i < size_history; i++) {
-    // if (i < 3) target[i] = -source[i]; //shear
+    if (i < 3) target[i] = -source[i]; //shear
     target[i] = source[i];
   }
 }
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 84088587c9f..d7c064afa0a 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -58,8 +58,6 @@ PairGranularSuperellipsoid::PairGranularSuperellipsoid(LAMMPS *lmp) : Pair(lmp)
   svector = new double[single_extra];
 
   // Currently only option, generalize if more added
-  size_history = 0;
-  nondefault_history_transfer = 0;
 
   neighprev = 0;
   nmax = 0;
@@ -87,9 +85,9 @@ PairGranularSuperellipsoid::PairGranularSuperellipsoid(LAMMPS *lmp) : Pair(lmp)
 
   comm_forward = 1;
 
-  size_history = 0;
+  size_history = 8;
   beyond_contact = 0;
-  nondefault_history_transfer = 0;
+  nondefault_history_transfer = 1;
   heat_flag = 0;
 
   // create dummy fix as placeholder for FixNeighHistory
@@ -893,6 +891,21 @@ void PairGranularSuperellipsoid::unpack_forward_comm(int n, int first, double *b
   for (i = first; i < last; i++) mass_rigid[i] = buf[m++];
 }
 
+/* ----------------------------------------------------------------------
+   Transfer history
+------------------------------------------------------------------------- */
+
+void PairGranularSuperellipsoid::transfer_history(double *source, double *target, int /*itype*/,
+                                                     int /*jtype*/)
+{
+  // copy of all history variables (shear, contact point, axis)
+  // TODO: only shear needs to be reversed?
+  for (int i = 0; i < size_history; i++) {
+    if (i < 3) target[i] = -source[i]; //shear
+    target[i] = source[i];
+  }
+}
+
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
@@ -919,7 +932,7 @@ double PairGranularSuperellipsoid::mix_mean(double val1, double val2)
 
 /* ---------------------------------------------------------------------- */
 
-int PairGranularSuperellipsoid::check_contact()
+bool PairGranularSuperellipsoid::check_contact()
 {
   bool touching;
   if (rsq >= radsum * radsum) {
@@ -1167,7 +1180,7 @@ void PairGranularSuperellipsoid::calculate_forces()
   torquesi[1] = cr1[2] * forces[0] - cr1[0] * forces[2];
   torquesi[2] = cr1[0] * forces[1] - cr1[1] * forces[0];
 
-  torquesj[0] = -cr2[1] * forces[2] - cr2[2] * forces[1];
-  torquesj[1] = -cr2[2] * forces[0] - cr2[0] * forces[2];
-  torquesj[2] = -cr2[0] * forces[1] - cr2[1] * forces[0];
-}
+  torquesj[0] = -cr2[1] * forces[2] + cr2[2] * forces[1];
+  torquesj[1] = -cr2[2] * forces[0] + cr2[0] * forces[2];
+  torquesj[2] = -cr2[0] * forces[1] + cr2[1] * forces[0];
+}
\ No newline at end of file
diff --git a/src/GRANULAR/pair_granular_superellipsoid.h b/src/GRANULAR/pair_granular_superellipsoid.h
index 60ef047e74e..842e03d5612 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.h
+++ b/src/GRANULAR/pair_granular_superellipsoid.h
@@ -45,6 +45,7 @@ class PairGranularSuperellipsoid : public Pair {
   int pack_forward_comm(int, int *, double *, int, int *) override;
   void unpack_forward_comm(int, int, double *) override;
   double memory_usage() override;
+  void transfer_history(double *, double *, int, int) override;
 
  protected:
   int freeze_group_bit;
@@ -94,7 +95,7 @@ class PairGranularSuperellipsoid : public Pair {
   void allocate();
   double mix_geom(double, double);
   double mix_mean(double, double);
-  int check_contact();
+  bool check_contact();
   void calculate_forces();
 
  private:

From 62e6211c1bd498491e29e12aea073e7bd6302cd9 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Fri, 13 Mar 2026 10:19:52 -0600
Subject: [PATCH 135/174] third take at history

---
 src/GRANULAR/pair_granular_superellipsoid.cpp |  173 +--
 src/GRANULAR/pair_granular_superellipsoid.h   |    3 +-
 src/pair_gran_hertz_history_ellipsoid.cpp     |  774 ++++++++++++
 src/pair_gran_hertz_history_ellipsoid.h       |   44 +
 src/pair_gran_hooke_history_ellipsoid.cpp     | 1082 +++++++++++++++++
 src/pair_gran_hooke_history_ellipsoid.h       |   91 ++
 6 files changed, 2083 insertions(+), 84 deletions(-)
 create mode 100644 src/pair_gran_hertz_history_ellipsoid.cpp
 create mode 100644 src/pair_gran_hertz_history_ellipsoid.h
 create mode 100644 src/pair_gran_hooke_history_ellipsoid.cpp
 create mode 100644 src/pair_gran_hooke_history_ellipsoid.h

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index d7c064afa0a..910f00f10a0 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -85,7 +85,9 @@ PairGranularSuperellipsoid::PairGranularSuperellipsoid(LAMMPS *lmp) : Pair(lmp)
 
   comm_forward = 1;
 
-  size_history = 8;
+  default_hist_size = 5;
+  size_history = default_hist_size; // default of 5 values, x0[4] and separating axis
+
   beyond_contact = 0;
   nondefault_history_transfer = 1;
   heat_flag = 0;
@@ -225,11 +227,8 @@ void PairGranularSuperellipsoid::compute(int eflag, int vflag)
       radj = radius[j];
       itype = itype;
       jtype = jtype;
-      shear = &allhistory[size_history * jj];
-      X0_prev = &allhistory[3 + size_history * jj];
-      separating_axis = &allhistory[7 + size_history * jj];
-      int indx_ref = (tag[i] < tag[j]) ? i : j;
-      xref = x[indx_ref];
+      history_data = &allhistory[size_history * jj];
+      xref = (tag[i] < tag[j]) ? xi : xj;
       tagi = tag[i];
       tagj = tag[j];
       flagi = bonus[ellipsoid[i]].type;
@@ -367,7 +366,10 @@ void PairGranularSuperellipsoid::settings(int narg, char **arg)
       error->all(FLERR, "Illegal pair_style command");
   }
 
-  if (bounding_box == 0) size_history--;
+  if (bounding_box == 0) {
+    default_hist_size--;
+    size_history--;
+  }
 }
 
 /* ----------------------------------------------------------------------
@@ -411,12 +413,6 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
 
   damping_one = -1;
 
-
-  // convert Kn and Kt from pressure units to force/distance^2
-
-  kn_one /= force->nktv2p; // TODO revert if updating normal force model
-  kt_one /= force->nktv2p;
-
   //Parse optional arguments
   while (iarg < narg) {
     if (strcmp(arg[iarg], "tangential") == 0) {
@@ -533,6 +529,13 @@ void PairGranularSuperellipsoid::init_style()
     if (!fix_history) error->all(FLERR,"Could not find pair fix neigh history ID");
   }
 
+  // grow history for contact models, right now this is superfluous and is just a placeholder
+
+  for (int itype = 1; itype <= atom->ntypes; itype++)
+    for (int jtype = 1; jtype <= atom->ntypes; jtype++)
+      if (tangential_model[itype][jtype] == LINEAR_HISTORY)
+        size_history += 3;
+
   // check for FixFreeze and set freeze_group_bit
 
   auto fixlist = modify->get_fix_by_style("^freeze");
@@ -755,8 +758,7 @@ double PairGranularSuperellipsoid::single(int i, int j, int /*itype*/, int /*jty
 
   // Reset model and copy initial geometric data
 
-  // If history is needed
-  double *history,*allhistory;
+  double *allhistory;
   int jnum = list->numneigh[i];
   int *jlist = list->firstneigh[i];
 
@@ -776,9 +778,7 @@ double PairGranularSuperellipsoid::single(int i, int j, int /*itype*/, int /*jty
   radj = atom->radius[j];
   itype = itype;
   jtype = jtype;
-  shear = &allhistory[size_history * neighprev];
-  X0_prev = &allhistory[3 + size_history * neighprev];
-  separating_axis = &allhistory[7 + size_history * neighprev];
+  history_data = &allhistory[size_history * neighprev];
   int indx_ref = (atom->tag[i] < atom->tag[j]) ? i : j;
   xref = atom->x[indx_ref];
   tagi = atom->tag[i];
@@ -940,74 +940,76 @@ bool PairGranularSuperellipsoid::check_contact()
   } else {
     bool skip_contact_detection(false);
     if (bounding_box) {
+      double *separating_axis = &history_data[4];
       skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
           xi, Ri, shapei, xj, Rj, shapej, separating_axis);
     }
-    if (skip_contact_detection)
+    if (skip_contact_detection) {
       touching = false;
-    else {
-      // superellipsoid contact detection between atoms i and j
-      if (touchjj == 1) {
-        // Continued contact: use grain true shape and last contact point with respect to grain i
-        X0[0] = xref[0] + X0_prev[0];
-        X0[1] = xref[1] + X0_prev[1];
-        X0[2] = xref[2] + X0_prev[2];
-        X0[3] = X0_prev[3];
-        // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
-        //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
+      return touching;
+    }
+
+    double *X0_prev = history_data;
+
+    // superellipsoid contact detection between atoms i and j
+
+    if (touchjj == 1) {
+      // Continued contact: use grain true shape and last contact point with respect to grain i
+      X0[0] = X0_prev[0] + xref[0];
+      X0[1] = X0_prev[1] + xref[1];
+      X0[2] = X0_prev[2] + xref[2];
+      X0[3] = X0_prev[3];
+      // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
+      //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
+      int status = MathExtraSuperellipsoids::determine_contact_point(
+          xi, Ri, shapei, blocki, flagi, xj, Rj, shapej, blockj, flagj, X0, nij, contact_formulation);
+      if (status == 0) {
+        touching = true;
+      } else if (status == 1) {
+        touching = false;
+      } else {
+        error->warning(FLERR, "Ellipsoid contact detection (old contact) failed "
+                       "between particle {} and particle {} ", tagi, tagj);
+      }
+    } else {
+      // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
+
+      // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
+      // but this is good enough. We might even be able to use radi and radj which is cheaper
+      // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+
+      double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+      double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+      double rsuminv = 1.0 / (reqi + reqj);
+      MathExtra::scaleadd3(reqj * rsuminv, xi, reqi * rsuminv, xj, X0);
+      X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+      for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+        double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+        shapei[0] = shapei[1] = shapei[2] = reqi;
+        shapej[0] = shapej[1] = shapej[2] = reqj;
+        MathExtra::scaleadd3(1.0 - frac, shapei, frac, shapei0, shapei);
+        MathExtra::scaleadd3(1.0 - frac, shapej, frac, shapej0, shapej);
+        blocki[0] = 2.0 + frac * (blocki0[0] - 2.0);
+        blocki[1] = 2.0 + frac * (blocki0[1] - 2.0);
+        blockj[0] = 2.0 + frac * (blockj0[0] - 2.0);
+        blockj[1] = 2.0 + frac * (blockj0[1] - 2.0);
+
+        // force ellipsoid flag for first initial guess iteration.
+        // Avoid incorrect values of n1/n2 - 2 in second derivatives.
         int status = MathExtraSuperellipsoids::determine_contact_point(
-            xi, Ri, shapei, blocki, flagi, xj, Rj, shapej, blockj, flagj, X0, nij, contact_formulation);
-        if (status == 0)
+            xi, Ri, shapei, blocki,
+            iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, xj, Rj, shapej,
+            blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
+            contact_formulation);
+
+        if (status == 0) {
           touching = true;
-        else if (status == 1)
+        } else if (status == 1) {
           touching = false;
-        else {
-          error->warning(FLERR,
-                         "Ellipsoid contact detection (old contact) failed "
-                         "between particle {} and particle {} ",
-                         tagi, tagj);
-        }
-      } else {
-        // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
-
-        // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
-        // but this is good enough. We might even be able to use radi and radj which is cheaper
-        // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-
-        double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-        double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-        MathExtra::scaleadd3(reqj / (reqi + reqj), xi, reqi / (reqi + reqj), xj, X0);
-        X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-        for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-          double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-          shapei[0] = shapei[1] = shapei[2] = reqi;
-          shapej[0] = shapej[1] = shapej[2] = reqj;
-          MathExtra::scaleadd3(1.0 - frac, shapei, frac, shapei0, shapei);
-          MathExtra::scaleadd3(1.0 - frac, shapej, frac, shapej0, shapej);
-          blocki[0] = 2.0 + frac * (blocki0[0] - 2.0);
-          blocki[1] = 2.0 + frac * (blocki0[1] - 2.0);
-          blockj[0] = 2.0 + frac * (blockj0[0] - 2.0);
-          blockj[1] = 2.0 + frac * (blockj0[1] - 2.0);
-
-          // force ellipsoid flag for first initial guess iteration.
-          // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-          int status = MathExtraSuperellipsoids::determine_contact_point(
-              xi, Ri, shapei, blocki,
-              iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, xj, Rj, shapej,
-              blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
-              contact_formulation);
-
-          if (status == 0)
-            touching = true;
-          else if (status == 1)
-            touching = false;
-          else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
-            // keep trying until last iteration to avoid erroring out too early
-            error->warning(FLERR,
-                           "Ellipsoid contact detection (new contact) failed"
-                           "between particle {} and particle {}",
-                           tagi, tagj);
-          }
+        } else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
+          // keep trying until last iteration to avoid erroring out too early
+          error->warning(FLERR, "Ellipsoid contact detection (new contact) failed"
+                         "between particle {} and particle {}", tagi, tagj);
         }
       }
     }
@@ -1023,10 +1025,14 @@ void PairGranularSuperellipsoid::calculate_forces()
   // Store contact point with respect to grain i for next time step
   // This is crucial for periodic BCs when grains can move by large amount in one time step
   // Keeping the previous contact point relative to global frame would lead to bad initial guess
-  X0_prev[0] = X0[0] - xref[0];
-  X0_prev[1] = X0[1] - xref[1];
-  X0_prev[2] = X0[2] - xref[2];
-  X0_prev[3] = X0[3];
+
+  if (history_update) {
+    double *X0_prev = history_data;
+    X0_prev[0] = X0[0] - xref[0];
+    X0_prev[1] = X0[1] - xref[1];
+    X0_prev[2] = X0[2] - xref[2];
+    X0_prev[3] = X0[3];
+  }
 
   double nji[3] = {-nij[0], -nij[1], -nij[2]};
   // compute overlap depth along normal direction for each grain
@@ -1119,6 +1125,7 @@ void PairGranularSuperellipsoid::calculate_forces()
   if (limit_damping[itype][jtype] && (ccel < 0.0)) ccel = 0.0;
 
   // shear history effects
+  double *shear = &history_data[default_hist_size];
 
   if (history_update) {
     shear[0] += vtr1 * dt;
diff --git a/src/GRANULAR/pair_granular_superellipsoid.h b/src/GRANULAR/pair_granular_superellipsoid.h
index 842e03d5612..7fdbca533fe 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.h
+++ b/src/GRANULAR/pair_granular_superellipsoid.h
@@ -69,6 +69,7 @@ class PairGranularSuperellipsoid : public Pair {
   int **damping_model;
   int **tangential_model;
   int **limit_damping;
+  int default_hist_size;
 
   // Normal coefficients
   double **kn, **gamman;     // Hooke + Hertz
@@ -88,7 +89,7 @@ class PairGranularSuperellipsoid : public Pair {
   double *quati, *quatj, *angmomi, *angmomj, *inertiai, *inertiaj;
   double X0[4], nij[3], Ri[3][3], Rj[3][3];
   double shapei0[3], blocki0[3], shapej0[3], blockj0[3], shapei[3], blocki[3], shapej[3], blockj[3];
-  double *shear, *X0_prev, *separating_axis, *xref;
+  double *history_data, *xref;
   AtomVecEllipsoid::BlockType flagi, flagj;
   tagint tagi, tagj;
 
diff --git a/src/pair_gran_hertz_history_ellipsoid.cpp b/src/pair_gran_hertz_history_ellipsoid.cpp
new file mode 100644
index 00000000000..52d76d2598e
--- /dev/null
+++ b/src/pair_gran_hertz_history_ellipsoid.cpp
@@ -0,0 +1,774 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#include "pair_gran_hertz_history_ellipsoid.h"
+
+#include "atom.h"
+#include "atom_vec_ellipsoid.h"
+#include "comm.h"
+#include "error.h"
+#include "fix.h"
+#include "fix_dummy.h"
+#include "fix_neigh_history.h"
+#include "force.h"
+#include "math_extra.h"    // probably needed for some computations
+#include "math_extra_superellipsoids.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+static constexpr int NUMSTEP_INITIAL_GUESS = 5;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHertzHistoryEllipsoid::PairGranHertzHistoryEllipsoid(LAMMPS *lmp) :
+    PairGranHookeHistoryEllipsoid(lmp)
+{
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
+{
+  int i, j, ii, jj, inum, jnum;
+  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
+  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double wr1, wr2, wr3;
+  double vtr1, vtr2, vtr3, vrel;
+  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
+  double fn, fs, fs1, fs2, fs3;
+  double shrmag, rsht, polyhertz;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  int *touch, **firsttouch;
+  double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
+
+  double shapex, shapey, shapez;    // ellipsoid shape params
+  double quat1, quat2, quat3, quat4;
+  double block1, block2;
+
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
+      overlap2, omegai[3], omegaj[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+
+  ev_init(eflag, vflag);
+
+  int shearupdate = 1;
+  if (update->setupflag) shearupdate = 0;
+
+  // update rigid body info for owned & ghost atoms if using FixRigid masses
+  // body[i] = which body atom I is in, -1 if none
+  // mass_body = mass of each rigid body
+
+  if (fix_rigid && neighbor->ago == 0) {
+    int tmp;
+    int *body = (int *) fix_rigid->extract("body", tmp);
+    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
+    if (atom->nmax > nmax) {
+      memory->destroy(mass_rigid);
+      nmax = atom->nmax;
+      memory->create(mass_rigid, nmax, "pair:mass_rigid");
+    }
+    int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++)
+      if (body[i] >= 0)
+        mass_rigid[i] = mass_body[body[i]];
+      else
+        mass_rigid[i] = 0.0;
+    comm->forward_comm(this);
+  }
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **angmom = atom->angmom;
+  double **torque = atom->torque;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+  double *special_lj = force->special_lj;
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = fix_history->firstflag;
+  firsthistory = fix_history->firstvalue;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+
+    touch = firsttouch[i];
+    allhistory = firsthistory[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      if (factor_lj == 0) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      X0_prev = &allhistory[3 + size_history * jj];
+      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+
+      // TODO: Below could be a `touch()` function
+      bool touching;
+      if (rsq >= radsum * radsum) {
+        touching = false;
+      } else {
+        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+        bool skip_contact_detection(false);
+        if (bounding_box) {
+          separating_axis = &allhistory[7 + size_history * jj];
+          skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+              x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
+        }
+        if (skip_contact_detection)
+          touching = false;
+        else {
+          // superellipsoid contact detection between atoms i and j
+          flagi = bonus[ellipsoid[i]].type;
+          flagj = bonus[ellipsoid[j]].type;
+          if (touch[jj] == 1) {
+            // Continued contact: use grain true shape and last contact point with respect to grain i
+            X0[0] = x[ref_index][0] + X0_prev[0];
+            X0[1] = x[ref_index][1] + X0_prev[1];
+            X0[2] = x[ref_index][2] + X0_prev[2];
+            X0[3] = X0_prev[3];
+            int status = MathExtraSuperellipsoids::determine_contact_point(
+                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
+                contact_formulation);
+            if (status == 0)
+              touching = true;
+            else if (status == 1)
+              touching = false;
+            else
+              error->warning(FLERR,
+                             "Ellipsoid contact detection (old contact) failed"
+                             "between particle {} and particle {}",
+                             atom->tag[i], atom->tag[j]);
+          } else {
+            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
+
+            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
+            // but this is good enough. We might even be able to use radi and radj which is cheaper
+            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+
+            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+              shapei[0] = shapei[1] = shapei[2] = reqi;
+              shapej[0] = shapej[1] = shapej[2] = reqj;
+              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+              // force ellipsoid flag for first initial guess iteration.
+              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+              int status = MathExtraSuperellipsoids::determine_contact_point(
+                  x[i], Ri, shapei, blocki,
+                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
+                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
+                  contact_formulation);
+              if (status == 0)
+                touching = true;
+              else if (status == 1)
+                touching = false;
+              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
+                // keep trying until last iteration to avoid erroring out too early
+                error->warning(FLERR,
+                               "Ellipsoid contact detection (new contact) failed"
+                               "between particle {} and particle {}",
+                               atom->tag[i], atom->tag[j]);
+              }
+            }
+          }
+        }
+      }
+
+      if (!touching) {
+        // unset non-touching neighbors
+
+        touch[jj] = 0;
+        history = &allhistory[size_history * jj];
+        for (int k = 0; k < size_history; k++) history[k] = 0.0;
+      } else {
+        // Store contact point with respect to grain i for next time step
+        // This is crucial for periodic BCs when grains can move by large amount in one time step
+        // Keeping the previous contact point relative to global frame would lead to bad initial guess
+        X0_prev[0] = X0[0] - x[ref_index][0];
+        X0_prev[1] = X0[1] - x[ref_index][1];
+        X0_prev[2] = X0[2] - x[ref_index][2];
+        X0_prev[3] = X0[3];
+
+        double nji[3] = {-nij[0], -nij[1], -nij[2]};
+        // compute overlap depth along normal direction for each grain
+        // overlap is positive for both grains
+        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
+                                                                      nij, x[i]);
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
+                                                                      nji, x[j]);
+
+        double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
+        MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
+        MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
+
+        if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
+          curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+          curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        } else {
+          curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+          curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        }
+
+        polyhertz = sqrt((overlap1 + overlap2) /
+                         (curvature_i + curvature_j));    // hertzian contact radius approximation
+
+        // branch vectors
+        double cr1[3], cr2[3];
+        MathExtra::sub3(X0, x[i], cr1);
+        MathExtra::sub3(X0, x[j], cr2);
+
+        // we need to take the cross product of omega
+
+        double ex_space[3], ey_space[3], ez_space[3];
+        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[i]].inertia, omegai);
+        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[j]].inertia, omegaj);
+
+        double omega_cross_r1[3], omega_cross_r2[3];
+        MathExtra::cross3(omegai, cr1, omega_cross_r1);
+        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+        // relative translational velocity
+        // compute directly the sum of relative translational velocity at contact point
+        // since rotational velocity contribution is different for superellipsoids
+        double cv1[3], cv2[3];
+
+        cv1[0] = v[i][0] + omega_cross_r1[0];
+        cv1[1] = v[i][1] + omega_cross_r1[1];
+        cv1[2] = v[i][2] + omega_cross_r1[2];
+
+        cv2[0] = v[j][0] + omega_cross_r2[0];
+        cv2[1] = v[j][1] + omega_cross_r2[1];
+        cv2[2] = v[j][2] + omega_cross_r2[2];
+
+        // total relavtive velocity at contact point
+        vr1 = cv1[0] - cv2[0];
+        vr2 = cv1[1] - cv2[1];
+        vr3 = cv1[2] - cv2[2];
+
+        // normal component
+
+        vn1 = nij[0] * vr1;    // dot product
+        vn2 = nij[1] * vr2;
+        vn3 = nij[2] * vr3;
+
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
+
+        // tangential component
+
+        vtr1 = vr1 - vnnr * nij[0];
+        vtr2 = vr2 - vnnr * nij[1];
+        vtr3 = vr3 - vnnr * nij[2];
+
+        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+        vrel = sqrt(vrel);
+
+        // meff = effective mass of pair of particles
+        // if I or J part of rigid body, use body mass
+        // if I or J is frozen, meff is other particle
+
+        mi = rmass[i];
+        mj = rmass[j];
+        if (fix_rigid) {
+          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+        }
+
+        meff = mi * mj / (mi + mj);
+        if (mask[i] & freeze_group_bit) meff = mj;
+        if (mask[j] & freeze_group_bit) meff = mi;
+
+        // normal forces = Hertzian contact + normal velocity damping
+
+        damp = meff * gamman * vnnr;
+        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+        ccel *= polyhertz;
+        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+        // shear history effects
+
+        touch[jj] = 1;
+        shear = &allhistory[size_history * jj];
+
+        if (shearupdate) {
+          shear[0] += vtr1 * dt;
+          shear[1] += vtr2 * dt;
+          shear[2] += vtr3 * dt;
+        }
+        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+        if (shearupdate) {
+
+          // rotate shear displacements
+
+          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
+          shear[0] -= rsht * nij[0];
+          shear[1] -= rsht * nij[1];
+          shear[2] -= rsht * nij[2];
+        }
+
+        // tangential forces = shear + tangential velocity damping
+
+        fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
+        fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
+        fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
+
+        // rescale frictional displacements and forces if needed
+
+        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+        fn = xmu * fabs(ccel);
+
+        if (fs > fn) {
+          if (shrmag != 0.0) {
+            shear[0] =
+                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
+            shear[1] =
+                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
+            shear[2] =
+                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
+            fs1 *= fn / fs;
+            fs2 *= fn / fs;
+            fs3 *= fn / fs;
+          } else
+            fs1 = fs2 = fs3 = 0.0;
+        }
+
+        // forces & torques
+
+        fx = nji[0] * ccel + fs1;
+        fy = nji[1] * ccel + fs2;
+        fz = nji[2] * ccel + fs3;
+        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
+        fy *= factor_lj;
+        fz *= factor_lj;
+        f[i][0] += fx;
+        f[i][1] += fy;
+        f[i][2] += fz;
+
+        // torques are cross prodcuts of branch vector with the entire force at contact point
+
+        tor1 = cr1[1] * fz - cr1[2] * fy;
+        tor2 = cr1[2] * fx - cr1[0] * fz;
+        tor3 = cr1[0] * fy - cr1[1] * fx;
+
+        torque[i][0] += tor1;
+        torque[i][1] += tor2;
+        torque[i][2] += tor3;
+
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fx;
+          f[j][1] -= fy;
+          f[j][2] -= fz;
+
+          tor1 = cr2[1] * fz - cr2[2] * fy;
+          tor2 = cr2[2] * fx - cr2[0] * fz;
+          tor3 = cr2[0] * fy - cr2[1] * fx;
+
+          torque[j][0] -= tor1;
+          torque[j][1] -= tor2;
+          torque[j][2] -= tor3;
+        }
+
+        if (evflag)
+          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
+                       delz);    // Correct even for non-spherical particles
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
+{
+  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
+
+  kn = utils::numeric(FLERR, arg[0], false, lmp);
+  if (strcmp(arg[1], "NULL") == 0)
+    kt = kn * 2.0 / 7.0;
+  else
+    kt = utils::numeric(FLERR, arg[1], false, lmp);
+
+  gamman = utils::numeric(FLERR, arg[2], false, lmp);
+  if (strcmp(arg[3], "NULL") == 0)
+    gammat = 0.5 * gamman;
+  else
+    gammat = utils::numeric(FLERR, arg[3], false, lmp);
+
+  xmu = utils::numeric(FLERR, arg[4], false, lmp);
+  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
+  if (dampflag == 0) gammat = 0.0;
+
+  limit_damping = 0;
+  bounding_box = 0;
+  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
+
+  for (int iarg = 6; iarg < narg; iarg++) {
+    if (strcmp(arg[iarg], "limit_damping") == 0)
+      limit_damping = 1;
+    else if (strcmp(arg[iarg], "bounding_box") == 0)
+      bounding_box = 1;
+    else if (strcmp(arg[iarg], "geometric") == 0)
+      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+    else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
+      curvature_model = MathExtraSuperellipsoids::CURV_GAUSSIAN;
+    else
+      error->all(FLERR, "Illegal pair_style command");
+  }
+
+  size_history = 8;    // reset to default size
+  if (bounding_box == 0) size_history--;
+
+  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
+      dampflag < 0 || dampflag > 1)
+    error->all(FLERR, "Illegal pair_style command");
+
+  // convert Kn and Kt from pressure units to force/distance^2
+
+  kn /= force->nktv2p;
+  kt /= force->nktv2p;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
+                                             double /*factor_coul*/, double /*factor_lj*/,
+                                             double &fforce)
+{
+  double radi, radj, radsum;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double mi, mj, meff, damp, ccel;
+  double vtr1, vtr2, vtr3, vrel, shrmag, polyhertz;
+  double fs1, fs2, fs3, fs, fn;
+
+  double *radius = atom->radius;
+  radi = radius[i];
+  radj = radius[j];
+  radsum = radi + radj;
+
+  double **x = atom->x;
+
+  // history effects
+  // neighprev = index of found neigh on previous call
+  // search entire jnum list of neighbors of I for neighbor J
+  // start from neighprev, since will typically be next neighbor
+  // reset neighprev to 0 as necessary
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+  int *touch = fix_history->firstflag[i];
+  double *allhistory = fix_history->firstvalue[i];
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+
+  if (rsq >= radsum * radsum) {
+    fforce = 0.0;
+    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+    return 0.0;
+  }
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+  if (bounding_box) {
+    double separating_axis =
+        allhistory[7 + size_history * neighprev];    // Copy: no update of history in single
+    bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+        x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
+    if (no_bouding_box_contact) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+  }
+  // superellipsoid contact detection between atoms i and j
+  double X0[4], nij[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+  flagi = bonus[ellipsoid[i]].type;
+  flagj = bonus[ellipsoid[j]].type;
+  double *X0_prev = &allhistory[3 + size_history * neighprev];
+  if (touch[neighprev] == 1) {
+    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+    // Continued contact: use grain true shape and last contact point
+    X0[0] = X0_prev[0] + x[ref_index][0];
+    X0[1] = X0_prev[1] + x[ref_index][1];
+    X0[2] = X0_prev[2] + x[ref_index][2];
+    X0[3] = X0_prev[3];
+    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
+                                                                   x[j], Rj, shapej, blockj, flagj,
+                                                                   X0, nij, contact_formulation);
+    if (status == 1) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+    if (status != 0)
+      error->all(FLERR,
+                 "Ellipsoid contact detection (old contact) failed"
+                 "between particle {} and particle {}",
+                 atom->tag[i], atom->tag[j]);
+  } else {
+    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+      shapei[0] = shapei[1] = shapei[2] = reqi;
+      shapej[0] = shapej[1] = shapej[2] = reqj;
+      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+      // force ellipsoid flag for first initial guess iteration.
+      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+      int status = MathExtraSuperellipsoids::determine_contact_point(
+          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+          X0, nij, contact_formulation);
+      if (status == 1) {
+        fforce = 0.0;
+        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+        return 0.0;
+      }
+      if (status != 0)
+        error->all(FLERR,
+                   "Ellipsoid contact detection (new contact) failed"
+                   "between particle {} and particle {}",
+                   atom->tag[i], atom->tag[j]);
+    }
+  }
+  double overlap1, overlap2, omegai[3], omegaj[3];
+  double nji[3] = {-nij[0], -nij[1], -nij[2]};
+  overlap1 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+  overlap2 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+
+  double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
+  MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
+  MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
+
+  if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
+    curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri,
+                                                                          surf_point_i, x[i]);
+    curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj,
+                                                                          surf_point_j, x[j]);
+  } else {
+    curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+        shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+    curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+        shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+  }
+
+  polyhertz = sqrt((overlap1 + overlap2) /
+                   (curvature_i + curvature_j));    // hertzian contact radius approximation
+
+  double cr1[3], cr2[3];
+  MathExtra::sub3(X0, x[i], cr1);
+  MathExtra::sub3(X0, x[j], cr2);
+
+  double ex_space[3], ey_space[3], ez_space[3];
+  double **angmom = atom->angmom;
+  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
+                             omegai);
+  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
+                             omegaj);
+
+  double omega_cross_r1[3], omega_cross_r2[3];
+  MathExtra::cross3(omegai, cr1, omega_cross_r1);
+  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+  // relative translational velocity
+  // compute directly the sum of relative translational velocity at contact point
+  // since rotational velocity contribution is different for superellipsoids
+
+  double **v = atom->v;
+  double cv1[3], cv2[3];
+
+  cv1[0] = v[i][0] + omega_cross_r1[0];
+  cv1[1] = v[i][1] + omega_cross_r1[1];
+  cv1[2] = v[i][2] + omega_cross_r1[2];
+
+  cv2[0] = v[j][0] + omega_cross_r2[0];
+  cv2[1] = v[j][1] + omega_cross_r2[1];
+  cv2[2] = v[j][2] + omega_cross_r2[2];
+
+  // total relavtive velocity at contact point
+
+  vr1 = cv1[0] - cv2[0];
+  vr2 = cv1[1] - cv2[1];
+  vr3 = cv1[2] - cv2[2];
+
+  // normal component
+
+  vn1 = nij[0] * vr1;    // dot product
+  vn2 = nij[1] * vr2;
+  vn3 = nij[2] * vr3;
+
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
+
+  // tangential component
+
+  vtr1 = vr1 - vnnr * nij[0];
+  vtr2 = vr2 - vnnr * nij[1];
+  vtr3 = vr3 - vnnr * nij[2];
+
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
+
+  // meff = effective mass of pair of particles
+  // if I or J part of rigid body, use body mass
+  // if I or J is frozen, meff is other particle
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+
+  mi = rmass[i];
+  mj = rmass[j];
+  if (fix_rigid) {
+    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+  }
+
+  meff = mi * mj / (mi + mj);
+  if (mask[i] & freeze_group_bit) meff = mj;
+  if (mask[j] & freeze_group_bit) meff = mi;
+
+  // normal forces = Hookian contact + normal velocity damping
+
+  damp = meff * gamman * vnnr;
+  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+  ccel *= polyhertz;
+  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+  double *shear = &allhistory[size_history * neighprev];
+  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+  // tangential forces = shear + tangential velocity damping
+
+  fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
+  fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
+  fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
+
+  // rescale frictional displacements and forces if needed
+
+  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+  fn = xmu * fabs(ccel);
+
+  if (fs > fn) {
+    if (shrmag != 0.0) {
+      fs1 *= fn / fs;
+      fs2 *= fn / fs;
+      fs3 *= fn / fs;
+      fs *= fn / fs;
+    } else
+      fs1 = fs2 = fs3 = 0.0;
+  }
+
+  // set force (normalized by r) and return no energy
+
+  fforce = ccel / sqrt(rsq);
+
+  // set single_extra quantities
+
+  svector[0] = fs1;
+  svector[1] = fs2;
+  svector[2] = fs3;
+  svector[3] = fs;
+  svector[4] = vn1;
+  svector[5] = vn2;
+  svector[6] = vn3;
+  svector[7] = vtr1;
+  svector[8] = vtr2;
+  svector[9] = vtr3;
+
+  return 0.0;
+}
diff --git a/src/pair_gran_hertz_history_ellipsoid.h b/src/pair_gran_hertz_history_ellipsoid.h
new file mode 100644
index 00000000000..b02f6b733e7
--- /dev/null
+++ b/src/pair_gran_hertz_history_ellipsoid.h
@@ -0,0 +1,44 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(gran/hertz/history/ellipsoid,PairGranHertzHistoryEllipsoid);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
+#define LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
+
+#include "pair_gran_hooke_history_ellipsoid.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
+ public:
+  PairGranHertzHistoryEllipsoid(class LAMMPS *);
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  double single(int, int, int, int, double, double, double, double &) override;
+
+ protected:
+  int curvature_model;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/pair_gran_hooke_history_ellipsoid.cpp b/src/pair_gran_hooke_history_ellipsoid.cpp
new file mode 100644
index 00000000000..b1696703d50
--- /dev/null
+++ b/src/pair_gran_hooke_history_ellipsoid.cpp
@@ -0,0 +1,1082 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#include "pair_gran_hooke_history_ellipsoid.h"
+
+#include "atom.h"
+#include "atom_vec_ellipsoid.h"
+#include "comm.h"
+#include "error.h"
+#include "fix.h"
+#include "fix_dummy.h"
+#include "fix_neigh_history.h"
+#include "force.h"
+#include "math_extra.h"    // probably needed for some computations
+#include "math_extra_superellipsoids.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+#include <iostream>
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+static constexpr int NUMSTEP_INITIAL_GUESS = 5;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair(lmp)
+{
+  single_enable = 1;
+  no_virial_fdotr_compute = 1;
+  centroidstressflag = CENTROID_NOTAVAIL;
+  finitecutflag = 1;
+  use_history = 1;
+  size_history =
+      8;    // shear[3], contact_point_and_Lagrange_multiplier[4], bounding_box_separating_axis_index
+
+  single_extra = 10;
+  svector = new double[10];
+
+  neighprev = 0;
+
+  nmax = 0;
+  mass_rigid = nullptr;
+
+  // set comm size needed by this Pair if used with fix rigid
+
+  comm_forward = 1;
+
+  // keep default behavior of history[i][j] = -history[j][i]
+
+  nondefault_history_transfer = 1;
+
+  // create dummy fix as placeholder for FixNeighHistory
+  // this is so final order of Modify:fix will conform to input script
+
+  fix_history = nullptr;
+  fix_dummy = dynamic_cast<FixDummy *>(
+      modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
+
+  contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHookeHistoryEllipsoid::~PairGranHookeHistoryEllipsoid()
+{
+  if (copymode) return;
+
+  delete[] svector;
+
+  if (!fix_history)
+    modify->delete_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me));
+  else
+    modify->delete_fix("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me));
+
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+
+    delete[] onerad_dynamic;
+    delete[] onerad_frozen;
+    delete[] maxrad_dynamic;
+    delete[] maxrad_frozen;
+  }
+
+  memory->destroy(mass_rigid);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
+{
+  int i, j, ii, jj, inum, jnum;
+  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
+  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3;
+  double vtr1, vtr2, vtr3, vrel;
+  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
+  double fn, fs, fs1, fs2, fs3;
+  double shrmag, rsht;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  int *touch, **firsttouch;
+  double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
+
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
+      overlap2, omegai[3], omegaj[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+
+  ev_init(eflag, vflag);
+
+  int shearupdate = 1;
+  if (update->setupflag) shearupdate = 0;
+
+  // update rigid body info for owned & ghost atoms if using FixRigid masses
+  // body[i] = which body atom I is in, -1 if none
+  // mass_body = mass of each rigid body
+
+  if (fix_rigid && neighbor->ago == 0) {
+    int tmp;
+    int *body = (int *) fix_rigid->extract("body", tmp);
+    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
+    if (atom->nmax > nmax) {
+      memory->destroy(mass_rigid);
+      nmax = atom->nmax;
+      memory->create(mass_rigid, nmax, "pair:mass_rigid");
+    }
+    int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++)
+      if (body[i] >= 0)
+        mass_rigid[i] = mass_body[body[i]];
+      else
+        mass_rigid[i] = 0.0;
+    comm->forward_comm(this);
+  }
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **angmom = atom->angmom;
+  double **torque = atom->torque;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+  double *special_lj = force->special_lj;
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = fix_history->firstflag;
+  firsthistory = fix_history->firstvalue;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+
+    touch = firsttouch[i];
+    allhistory = firsthistory[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      if (factor_lj == 0) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      X0_prev = &allhistory[3 + size_history * jj];
+      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+
+      // TODO: Below could be a `touch()` function
+      bool touching;
+      if (rsq >= radsum * radsum) {
+        touching = false;
+      } else {
+        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+        bool skip_contact_detection(false);
+        if (bounding_box) {
+          separating_axis = &allhistory[7 + size_history * jj];
+          skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+              x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
+        }
+        if (skip_contact_detection)
+          touching = false;
+        else {
+          // superellipsoid contact detection between atoms i and j
+          flagi = bonus[ellipsoid[i]].type;
+          flagj = bonus[ellipsoid[j]].type;
+          if (touch[jj] == 1) {
+            // Continued contact: use grain true shape and last contact point with respect to grain i
+            X0[0] = x[ref_index][0] + X0_prev[0];
+            X0[1] = x[ref_index][1] + X0_prev[1];
+            X0[2] = x[ref_index][2] + X0_prev[2];
+            X0[3] = X0_prev[3];
+            // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
+            //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
+            int status = MathExtraSuperellipsoids::determine_contact_point(
+                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
+                contact_formulation);
+            if (status == 0)
+              touching = true;
+            else if (status == 1)
+              touching = false;
+            else {
+              error->warning(FLERR,
+                             "Ellipsoid contact detection (old contact) failed "
+                             "between particle {} and particle {} ",
+                             atom->tag[i], atom->tag[j]);
+            }
+          } else {
+            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
+
+            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
+            // but this is good enough. We might even be able to use radi and radj which is cheaper
+            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+
+            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+              shapei[0] = shapei[1] = shapei[2] = reqi;
+              shapej[0] = shapej[1] = shapej[2] = reqj;
+              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+              // force ellipsoid flag for first initial guess iteration.
+              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+              int status = MathExtraSuperellipsoids::determine_contact_point(
+                  x[i], Ri, shapei, blocki,
+                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
+                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
+                  contact_formulation);
+
+              if (status == 0)
+                touching = true;
+              else if (status == 1)
+                touching = false;
+              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
+                // keep trying until last iteration to avoid erroring out too early
+                error->warning(FLERR,
+                               "Ellipsoid contact detection (new contact) failed"
+                               "between particle {} and particle {}",
+                               atom->tag[i], atom->tag[j]);
+              }
+            }
+          }
+        }
+      }
+
+      if (!touching) {
+        // unset non-touching neighbors
+
+        touch[jj] = 0;
+        history = &allhistory[size_history * jj];
+        for (int k = 0; k < size_history; k++) history[k] = 0.0;
+      } else {
+        // Store contact point with respect to grain i for next time step
+        // This is crucial for periodic BCs when grains can move by large amount in one time step
+        // Keeping the previous contact point relative to global frame would lead to bad initial guess
+        X0_prev[0] = X0[0] - x[ref_index][0];
+        X0_prev[1] = X0[1] - x[ref_index][1];
+        X0_prev[2] = X0[2] - x[ref_index][2];
+        X0_prev[3] = X0[3];
+
+        double nji[3] = {-nij[0], -nij[1], -nij[2]};
+        // compute overlap depth along normal direction for each grain
+        // overlap is positive for both grains
+        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
+                                                                      nij, x[i]);
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
+                                                                      nji, x[j]);
+
+        // branch vectors
+        double cr1[3], cr2[3];
+        MathExtra::sub3(X0, x[i], cr1);
+        MathExtra::sub3(X0, x[j], cr2);
+
+        // we need to take the cross product of omega
+
+        double ex_space[3], ey_space[3], ez_space[3];
+        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[i]].inertia, omegai);
+        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[j]].inertia, omegaj);
+
+        double omega_cross_r1[3], omega_cross_r2[3];
+        MathExtra::cross3(omegai, cr1, omega_cross_r1);
+        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+        // relative translational velocity
+        // compute directly the sum of relative translational velocity at contact point
+        // since rotational velocity contribution is different for superellipsoids
+        double cv1[3], cv2[3];
+
+        cv1[0] = v[i][0] + omega_cross_r1[0];
+        cv1[1] = v[i][1] + omega_cross_r1[1];
+        cv1[2] = v[i][2] + omega_cross_r1[2];
+
+        cv2[0] = v[j][0] + omega_cross_r2[0];
+        cv2[1] = v[j][1] + omega_cross_r2[1];
+        cv2[2] = v[j][2] + omega_cross_r2[2];
+
+        // total relavtive velocity at contact point
+        vr1 = cv1[0] - cv2[0];
+        vr2 = cv1[1] - cv2[1];
+        vr3 = cv1[2] - cv2[2];
+
+        // normal component
+
+        vn1 = nij[0] * vr1;    // dot product
+        vn2 = nij[1] * vr2;
+        vn3 = nij[2] * vr3;
+
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
+
+        // tangential component
+
+        vtr1 = vr1 - vnnr * nij[0];
+        vtr2 = vr2 - vnnr * nij[1];
+        vtr3 = vr3 - vnnr * nij[2];
+
+        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+        vrel = sqrt(vrel);
+
+        // meff = effective mass of pair of particles
+        // if I or J part of rigid body, use body mass
+        // if I or J is frozen, meff is other particle
+
+        mi = rmass[i];
+        mj = rmass[j];
+        if (fix_rigid) {
+          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+        }
+
+        meff = mi * mj / (mi + mj);
+        if (mask[i] & freeze_group_bit) meff = mj;
+        if (mask[j] & freeze_group_bit) meff = mi;
+
+        // normal forces = Hookian contact + normal velocity damping
+
+        damp = meff * gamman * vnnr;
+        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+        // shear history effects
+
+        touch[jj] = 1;
+        shear = &allhistory[size_history * jj];
+
+        if (shearupdate) {
+          shear[0] += vtr1 * dt;
+          shear[1] += vtr2 * dt;
+          shear[2] += vtr3 * dt;
+        }
+        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+        if (shearupdate) {
+
+          // rotate shear displacements
+
+          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
+          shear[0] -= rsht * nij[0];
+          shear[1] -= rsht * nij[1];
+          shear[2] -= rsht * nij[2];
+        }
+
+        // tangential forces = shear + tangential velocity damping
+
+        fs1 = -(kt * shear[0] + meff * gammat * vtr1);
+        fs2 = -(kt * shear[1] + meff * gammat * vtr2);
+        fs3 = -(kt * shear[2] + meff * gammat * vtr3);
+
+        // rescale frictional displacements and forces if needed
+
+        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+        fn = xmu * fabs(ccel);
+
+        if (fs > fn) {
+          if (shrmag != 0.0) {
+            shear[0] =
+                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
+            shear[1] =
+                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
+            shear[2] =
+                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
+            fs1 *= fn / fs;
+            fs2 *= fn / fs;
+            fs3 *= fn / fs;
+          } else
+            fs1 = fs2 = fs3 = 0.0;
+        }
+
+        // forces & torques
+
+        fx = nji[0] * ccel + fs1;
+        fy = nji[1] * ccel + fs2;
+        fz = nji[2] * ccel + fs3;
+        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
+        fy *= factor_lj;
+        fz *= factor_lj;
+        f[i][0] += fx;
+        f[i][1] += fy;
+        f[i][2] += fz;
+        // torques are cross prodcuts of branch vector with the entire force at contact point
+
+        tor1 = cr1[1] * fz - cr1[2] * fy;
+        tor2 = cr1[2] * fx - cr1[0] * fz;
+        tor3 = cr1[0] * fy - cr1[1] * fx;
+
+        torque[i][0] += tor1;
+        torque[i][1] += tor2;
+        torque[i][2] += tor3;
+
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fx;
+          f[j][1] -= fy;
+          f[j][2] -= fz;
+
+          tor1 = cr2[1] * fz - cr2[2] * fy;
+          tor2 = cr2[2] * fx - cr2[0] * fz;
+          tor3 = cr2[0] * fy - cr2[1] * fx;
+
+          torque[j][0] -= tor1;
+          torque[j][1] -= tor2;
+          torque[j][2] -= tor3;
+        }
+
+        if (evflag)
+          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
+                       delz);    // Correct even for non-spherical particles
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag, n + 1, n + 1, "pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++) setflag[i][j] = 0;
+
+  memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
+
+  onerad_dynamic = new double[n + 1];
+  onerad_frozen = new double[n + 1];
+  maxrad_dynamic = new double[n + 1];
+  maxrad_frozen = new double[n + 1];
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
+{
+  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
+
+  kn = utils::numeric(FLERR, arg[0], false, lmp);
+  if (strcmp(arg[1], "NULL") == 0)
+    kt = kn * 2.0 / 7.0;
+  else
+    kt = utils::numeric(FLERR, arg[1], false, lmp);
+
+  gamman = utils::numeric(FLERR, arg[2], false, lmp);
+  if (strcmp(arg[3], "NULL") == 0)
+    gammat = 0.5 * gamman;
+  else
+    gammat = utils::numeric(FLERR, arg[3], false, lmp);
+
+  xmu = utils::numeric(FLERR, arg[4], false, lmp);
+  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
+  if (dampflag == 0) gammat = 0.0;
+
+  limit_damping = 0;
+  bounding_box = 0;
+  for (int iarg = 6; iarg < narg; iarg++) {
+    if (strcmp(arg[iarg], "limit_damping") == 0)
+      limit_damping = 1;
+    else if (strcmp(arg[iarg], "bounding_box") == 0)
+      bounding_box = 1;
+    else if (strcmp(arg[iarg], "geometric") == 0)
+      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+    else
+      error->all(FLERR, "Illegal pair_style command");
+  }
+
+  size_history = 8;    // reset to default for safety
+  if (bounding_box == 0) size_history--;
+
+  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
+      dampflag < 0 || dampflag > 1)
+    error->all(FLERR, "Illegal pair_style command");
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::coeff(int narg, char **arg)
+{
+  if (narg > 2) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
+  if (!allocated) allocate();
+
+  int ilo, ihi, jlo, jhi;
+  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
+  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo, i); j <= jhi; j++) {
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::init_style()
+{
+  int i;
+
+  // error and warning checks
+
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
+    error->all(FLERR,
+               "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and "
+               "superellipdoid flag");
+  if (comm->ghost_velocity == 0)
+    error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
+
+  // ensure all atoms have an allocated ellipsoid bonus structure (ellipsoidflag > 0)
+  int *ellipsoid = atom->ellipsoid;
+  if (!ellipsoid) error->all(FLERR, "Pair gran/h/ellipsoid* requires atom style ellipsoid");
+
+  int nlocal = atom->nlocal;
+  for (i = 0; i < nlocal; i++) {
+    if (ellipsoid[i] < 0) {
+      error->one(FLERR, "Pair gran/h/ellipsoid* requires all atoms to have ellipsoidflag = 1");
+    }
+  }
+
+  // need a granular neighbor list
+
+  if (use_history)
+    neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_HISTORY);
+  else
+    neighbor->add_request(this, NeighConst::REQ_SIZE);
+
+  dt = update->dt;
+
+  // if history is stored and first init, create Fix to store history
+  // it replaces FixDummy, created in the constructor
+  // this is so its order in the fix list is preserved
+
+  if (use_history && (fix_history == nullptr)) {
+    auto cmd =
+        fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
+    fix_history = dynamic_cast<FixNeighHistory *>(
+        modify->replace_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me), cmd, 1));
+    fix_history->pair = this;
+  }
+
+  // check for FixFreeze and set freeze_group_bit
+
+  auto fixlist = modify->get_fix_by_style("^freeze");
+  if (fixlist.size() == 0)
+    freeze_group_bit = 0;
+  else if (fixlist.size() > 1)
+    error->all(FLERR, "Only one fix freeze command at a time allowed");
+  else
+    freeze_group_bit = fixlist.front()->groupbit;
+
+  // check for FixRigid so can extract rigid body masses
+
+  fix_rigid = nullptr;
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (ifix->rigid_flag) {
+      if (fix_rigid)
+        error->all(FLERR, "Only one fix rigid command at a time allowed");
+      else
+        fix_rigid = ifix;
+    }
+  }
+
+  // check for FixPour and FixDeposit so can extract particle radii
+
+  auto pours = modify->get_fix_by_style("^pour");
+  auto deps = modify->get_fix_by_style("^deposit");
+
+  // set maxrad_dynamic and maxrad_frozen for each type
+  // include future FixPour and FixDeposit particles as dynamic
+
+  int itype;
+  for (i = 1; i <= atom->ntypes; i++) {
+    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
+    for (auto &ipour : pours) {
+      itype = i;
+      double maxrad = *((double *) ipour->extract("radius", itype));
+      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
+    }
+    for (auto &idep : deps) {
+      itype = i;
+      double maxrad = *((double *) idep->extract("radius", itype));
+      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
+    }
+  }
+
+  // since for ellipsoids radius is the maximum of the three axes, no need to change this part
+
+  double *radius = atom->radius;
+  int *mask = atom->mask;
+  int *type = atom->type;
+
+  for (i = 0; i < nlocal; i++) {
+    if (mask[i] & freeze_group_bit)
+      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]], radius[i]);
+    else
+      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
+  }
+
+  MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
+  MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
+
+  // set fix which stores history info
+
+  if (use_history) {
+    fix_history = dynamic_cast<FixNeighHistory *>(
+        modify->get_fix_by_id("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me)));
+    if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairGranHookeHistoryEllipsoid::init_one(int i, int j)
+{
+  if (!allocated) allocate();
+
+  // cutoff = sum of max I,J radii for
+  // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
+
+  double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
+  cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
+  cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
+  return cutoff;
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i, j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) fwrite(&setflag[i][j], sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+
+  int i, j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
+      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
+    }
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::write_restart_settings(FILE *fp)
+{
+  fwrite(&kn, sizeof(double), 1, fp);
+  fwrite(&kt, sizeof(double), 1, fp);
+  fwrite(&gamman, sizeof(double), 1, fp);
+  fwrite(&gammat, sizeof(double), 1, fp);
+  fwrite(&xmu, sizeof(double), 1, fp);
+  fwrite(&dampflag, sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &kn, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &kt, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &gamman, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &gammat, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &xmu, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &dampflag, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&kn, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&kt, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&gamman, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&gammat, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&xmu, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&dampflag, 1, MPI_INT, 0, world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::reset_dt()
+{
+  dt = update->dt;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
+                                             double /*factor_coul*/, double /*factor_lj*/,
+                                             double &fforce)
+{
+  double radi, radj, radsum;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double mi, mj, meff, damp, ccel;
+  double vtr1, vtr2, vtr3, vrel, shrmag;
+  double fs1, fs2, fs3, fs, fn;
+
+  double *radius = atom->radius;
+  radi = radius[i];
+  radj = radius[j];
+  radsum = radi + radj;
+
+  double **x = atom->x;
+
+  // history effects
+  // neighprev = index of found neigh on previous call
+  // search entire jnum list of neighbors of I for neighbor J
+  // start from neighprev, since will typically be next neighbor
+  // reset neighprev to 0 as necessary
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+  int *touch = fix_history->firstflag[i];
+  double *allhistory = fix_history->firstvalue[i];
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+
+  if (rsq >= radsum * radsum) {
+    fforce = 0.0;
+    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+    return 0.0;
+  }
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+  if (bounding_box) {
+    double separating_axis =
+        allhistory[7 + size_history * neighprev];    // Copy: no update of history in single
+    bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+        x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
+    if (no_bouding_box_contact) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+  }
+  // superellipsoid contact detection between atoms i and j
+  double X0[4], nij[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+  flagi = bonus[ellipsoid[i]].type;
+  flagj = bonus[ellipsoid[j]].type;
+  double *X0_prev = &allhistory[3 + size_history * neighprev];
+  if (touch[neighprev] == 1) {
+    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+    // Continued contact: use grain true shape and last contact point
+    X0[0] = X0_prev[0] + x[ref_index][0];
+    X0[1] = X0_prev[1] + x[ref_index][1];
+    X0[2] = X0_prev[2] + x[ref_index][2];
+    X0[3] = X0_prev[3];
+    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
+                                                                   x[j], Rj, shapej, blockj, flagj,
+                                                                   X0, nij, contact_formulation);
+    if (status == 1) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+    if (status != 0)
+      error->warning(FLERR,
+                     "Ellipsoid contact detection (old contact) failed"
+                     "between particle {} and particle {}",
+                     atom->tag[i], atom->tag[j]);
+  } else {
+    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+      shapei[0] = shapei[1] = shapei[2] = reqi;
+      shapej[0] = shapej[1] = shapej[2] = reqj;
+      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+      // force ellipsoid flag for first initial guess iteration.
+      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+      int status = MathExtraSuperellipsoids::determine_contact_point(
+          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+          X0, nij, contact_formulation);
+      if (status == 1) {
+        fforce = 0.0;
+        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+        return 0.0;
+      }
+      if (status != 0)
+        error->one(FLERR,
+                   "Ellipsoid contact detection (new contact) failed"
+                   "between particle {} and particle {}",
+                   atom->tag[i], atom->tag[j]);
+    }
+  }
+  double overlap1, overlap2, omegai[3], omegaj[3];
+  double nji[3] = {-nij[0], -nij[1], -nij[2]};
+  overlap1 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+  overlap2 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+
+  double cr1[3], cr2[3];
+  MathExtra::sub3(X0, x[i], cr1);
+  MathExtra::sub3(X0, x[j], cr2);
+
+  double ex_space[3], ey_space[3], ez_space[3];
+  double **angmom = atom->angmom;
+  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
+                             omegai);
+  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
+                             omegaj);
+
+  double omega_cross_r1[3], omega_cross_r2[3];
+  MathExtra::cross3(omegai, cr1, omega_cross_r1);
+  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+  // relative translational velocity
+  // compute directly the sum of relative translational velocity at contact point
+  // since rotational velocity contribution is different for superellipsoids
+
+  double **v = atom->v;
+  double cv1[3], cv2[3];
+
+  cv1[0] = v[i][0] + omega_cross_r1[0];
+  cv1[1] = v[i][1] + omega_cross_r1[1];
+  cv1[2] = v[i][2] + omega_cross_r1[2];
+
+  cv2[0] = v[j][0] + omega_cross_r2[0];
+  cv2[1] = v[j][1] + omega_cross_r2[1];
+  cv2[2] = v[j][2] + omega_cross_r2[2];
+
+  // total relavtive velocity at contact point
+
+  vr1 = cv1[0] - cv2[0];
+  vr2 = cv1[1] - cv2[1];
+  vr3 = cv1[2] - cv2[2];
+
+  // normal component
+
+  vn1 = nij[0] * vr1;    // dot product
+  vn2 = nij[1] * vr2;
+  vn3 = nij[2] * vr3;
+
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
+
+  // tangential component
+
+  vtr1 = vr1 - vnnr * nij[0];
+  vtr2 = vr2 - vnnr * nij[1];
+  vtr3 = vr3 - vnnr * nij[2];
+
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
+
+  // meff = effective mass of pair of particles
+  // if I or J part of rigid body, use body mass
+  // if I or J is frozen, meff is other particle
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+
+  mi = rmass[i];
+  mj = rmass[j];
+  if (fix_rigid) {
+    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+  }
+
+  meff = mi * mj / (mi + mj);
+  if (mask[i] & freeze_group_bit) meff = mj;
+  if (mask[j] & freeze_group_bit) meff = mi;
+
+  // normal forces = Hookian contact + normal velocity damping
+
+  damp = meff * gamman * vnnr;
+  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+  double *shear = &allhistory[size_history * neighprev];
+  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+  // tangential forces = shear + tangential velocity damping
+
+  fs1 = -(kt * shear[0] + meff * gammat * vtr1);
+  fs2 = -(kt * shear[1] + meff * gammat * vtr2);
+  fs3 = -(kt * shear[2] + meff * gammat * vtr3);
+
+  // rescale frictional displacements and forces if needed
+
+  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+  fn = xmu * fabs(ccel);
+
+  if (fs > fn) {
+    if (shrmag != 0.0) {
+      fs1 *= fn / fs;
+      fs2 *= fn / fs;
+      fs3 *= fn / fs;
+      fs *= fn / fs;
+    } else
+      fs1 = fs2 = fs3 = 0.0;
+  }
+
+  // set force (normalized by r) and return no energy
+
+  fforce = ccel / sqrt(rsq);
+
+  // set single_extra quantities
+
+  svector[0] = fs1;
+  svector[1] = fs2;
+  svector[2] = fs3;
+  svector[3] = fs;
+  svector[4] = vn1;
+  svector[5] = vn2;
+  svector[6] = vn3;
+  svector[7] = vtr1;
+  svector[8] = vtr2;
+  svector[9] = vtr3;
+
+  return 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairGranHookeHistoryEllipsoid::pack_forward_comm(int n, int *list, double *buf,
+                                                     int /*pbc_flag*/, int * /*pbc*/)
+{
+  int i, j, m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = mass_rigid[j];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::unpack_forward_comm(int n, int first, double *buf)
+{
+  int i, m, last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) mass_rigid[i] = buf[m++];
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+double PairGranHookeHistoryEllipsoid::memory_usage()
+{
+  double bytes = (double) nmax * sizeof(double);
+  return bytes;
+}
+
+void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *target, int /*itype*/,
+                                                     int /*jtype*/)
+{
+  // Simple direct copy of all history variables (shear, contact point, axis)
+  for (int i = 0; i < size_history; i++) {
+    if (i < 3) target[i] = -source[i]; //shear
+    target[i] = source[i];
+  }
+}
diff --git a/src/pair_gran_hooke_history_ellipsoid.h b/src/pair_gran_hooke_history_ellipsoid.h
new file mode 100644
index 00000000000..e3bcf3e038c
--- /dev/null
+++ b/src/pair_gran_hooke_history_ellipsoid.h
@@ -0,0 +1,91 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(gran/hooke/history/ellipsoid,PairGranHookeHistoryEllipsoid);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
+#define LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
+
+#include "pair.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHookeHistoryEllipsoid : public Pair {
+ public:
+  PairGranHookeHistoryEllipsoid(class LAMMPS *);
+  ~PairGranHookeHistoryEllipsoid() override;
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_restart_settings(FILE *) override;
+  void read_restart_settings(FILE *) override;
+  void reset_dt() override;
+  double single(int, int, int, int, double, double, double, double &) override;
+  int pack_forward_comm(int, int *, double *, int, int *) override;
+  void unpack_forward_comm(int, int, double *) override;
+  double memory_usage() override;
+  void transfer_history(double *, double *, int, int) override;
+
+ protected:
+  double kn, kt, gamman, gammat, xmu;
+  int dampflag;
+  double dt;
+  int freeze_group_bit;
+  int use_history;
+  int limit_damping;
+  int bounding_box;
+
+  int neighprev;
+  double *onerad_dynamic, *onerad_frozen;
+  double *maxrad_dynamic, *maxrad_frozen;
+
+  int size_history;
+
+  class FixDummy *fix_dummy;
+  class FixNeighHistory *fix_history;
+
+  // storage of rigid body masses for use in granular interactions
+
+  class Fix *fix_rigid;    // ptr to rigid body fix, null pointer if none
+  double *mass_rigid;      // rigid mass for owned+ghost atoms
+  int nmax;                // allocated size of mass_rigid
+
+  int contact_formulation;
+
+  void allocate();
+
+ private:
+  // Below not implemented. Placeholder if we decide not to compute local hessian in line search
+  static double
+  shape_and_gradient_local(const double *, const double *, const double *,
+                           double *);    // would return a vector of temporary variables
+  static double hessian_local(
+      const double *, const double *, const double *,
+      double *);    // would use the above vector of temporary variables to compute local hessian
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif

From a8bb43ef88f2bb3b4b94587ac6d06cb8531a5ba2 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Fri, 13 Mar 2026 10:33:54 -0600
Subject: [PATCH 136/174] Adjusting history passing in bounding box check

---
 src/ASPHERE/math_extra_superellipsoids.h      |   24 +-
 src/GRANULAR/pair_granular_superellipsoid.cpp |    9 +-
 src/pair_gran_hertz_history_ellipsoid.cpp     |  774 ------------
 src/pair_gran_hertz_history_ellipsoid.h       |   44 -
 src/pair_gran_hooke_history_ellipsoid.cpp     | 1082 -----------------
 src/pair_gran_hooke_history_ellipsoid.h       |   91 --
 6 files changed, 17 insertions(+), 2007 deletions(-)
 delete mode 100644 src/pair_gran_hertz_history_ellipsoid.cpp
 delete mode 100644 src/pair_gran_hertz_history_ellipsoid.h
 delete mode 100644 src/pair_gran_hooke_history_ellipsoid.cpp
 delete mode 100644 src/pair_gran_hooke_history_ellipsoid.h

diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 878914fb578..1c6b7387be7 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -38,10 +38,10 @@ inline double det4_M44_zero(const double m[4][4]);
 // 4 by 4 sytems solvers, they all overwrite b with the solution
 inline bool solve_4x4_robust_unrolled(double A[16], double b[4]);
 
-inline bool check_oriented_bounding_boxes(const double *xc1, const double R1[3][3],
+inline int check_oriented_bounding_boxes(const double *xc1, const double R1[3][3],
                                           const double *shape1, const double *xc2,
                                           const double R2[3][3], const double *shape2,
-                                          double *cached_axis);
+                                          int cached_axis);
 
 inline bool check_intersection_axis(const int axis_id, const double C[3][3],
                                     const double AbsC[3][3], const double *center_distance_box1,
@@ -298,12 +298,11 @@ inline bool MathExtraSuperellipsoids::solve_4x4_robust_unrolled(double A[16], do
      Algorithm from https://www.geometrictools.com/Documentation/DynamicCollisionDetection.pdf
 ------------------------------------------------------------------------- */
 
-inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+inline int MathExtraSuperellipsoids::check_oriented_bounding_boxes(
     const double *xc1, const double R1[3][3], const double *shape1, const double *xc2,
-    const double R2[3][3], const double *shape2, double *cached_axis)
+    const double R2[3][3], const double *shape2, int axis)
 {
-  // cache axis is the axis that separated the boxes last time
-  // due to temporal coherence we check it first
+  // return -1 to skip contact detection
 
   bool separated = false;
 
@@ -324,23 +323,20 @@ inline bool MathExtraSuperellipsoids::check_oriented_bounding_boxes(
   MathExtra::transpose_matvec(R1, center_distance, center_distance_box1);
   MathExtra::transpose_matvec(R2, center_distance, center_distance_box2);
 
-  // first check the cached axis
-  const int axis = (int) (*cached_axis);
+  // first check the cached axis, for temporal coherence
   separated = check_intersection_axis(axis, C, AbsC, center_distance_box1, center_distance_box2,
                                       shape1, shape2);
 
-  if (separated) return true;
+  if (separated) return axis;
   // then check all the other axes
   for (int axis_id = 0; axis_id < 15; axis_id++) {
     if (axis_id == axis) continue;    // already checked
     separated = check_intersection_axis(axis_id, C, AbsC, center_distance_box1,
                                         center_distance_box2, shape1, shape2);
-    if (separated) {
-      *cached_axis = axis_id;    // update cached axis
-      return true;
-    }
+    if (separated)
+      return axis_id; // update cached axis
   }
-  return false;    // no separation found
+  return -1;    // no separation found
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 910f00f10a0..e1919f873ac 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -940,9 +940,14 @@ bool PairGranularSuperellipsoid::check_contact()
   } else {
     bool skip_contact_detection(false);
     if (bounding_box) {
-      double *separating_axis = &history_data[4];
-      skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+      int separating_axis = (int) (history_data[4]);
+      int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
           xi, Ri, shapei, xj, Rj, shapej, separating_axis);
+      if (new_axis != -1) {
+        skip_contact_detection = true;
+        if (history_update)
+          history_data[4] = (double) new_axis;
+      }
     }
     if (skip_contact_detection) {
       touching = false;
diff --git a/src/pair_gran_hertz_history_ellipsoid.cpp b/src/pair_gran_hertz_history_ellipsoid.cpp
deleted file mode 100644
index 52d76d2598e..00000000000
--- a/src/pair_gran_hertz_history_ellipsoid.cpp
+++ /dev/null
@@ -1,774 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#include "pair_gran_hertz_history_ellipsoid.h"
-
-#include "atom.h"
-#include "atom_vec_ellipsoid.h"
-#include "comm.h"
-#include "error.h"
-#include "fix.h"
-#include "fix_dummy.h"
-#include "fix_neigh_history.h"
-#include "force.h"
-#include "math_extra.h"    // probably needed for some computations
-#include "math_extra_superellipsoids.h"
-#include "memory.h"
-#include "modify.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-#include "update.h"
-
-#include <cmath>
-#include <cstring>
-
-using namespace LAMMPS_NS;
-
-static constexpr int NUMSTEP_INITIAL_GUESS = 5;
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHertzHistoryEllipsoid::PairGranHertzHistoryEllipsoid(LAMMPS *lmp) :
-    PairGranHookeHistoryEllipsoid(lmp)
-{
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
-{
-  int i, j, ii, jj, inum, jnum;
-  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
-  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double wr1, wr2, wr3;
-  double vtr1, vtr2, vtr3, vrel;
-  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
-  double fn, fs, fs1, fs2, fs3;
-  double shrmag, rsht, polyhertz;
-  int *ilist, *jlist, *numneigh, **firstneigh;
-  int *touch, **firsttouch;
-  double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
-
-  double shapex, shapey, shapez;    // ellipsoid shape params
-  double quat1, quat2, quat3, quat4;
-  double block1, block2;
-
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
-      overlap2, omegai[3], omegaj[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-
-  ev_init(eflag, vflag);
-
-  int shearupdate = 1;
-  if (update->setupflag) shearupdate = 0;
-
-  // update rigid body info for owned & ghost atoms if using FixRigid masses
-  // body[i] = which body atom I is in, -1 if none
-  // mass_body = mass of each rigid body
-
-  if (fix_rigid && neighbor->ago == 0) {
-    int tmp;
-    int *body = (int *) fix_rigid->extract("body", tmp);
-    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
-    if (atom->nmax > nmax) {
-      memory->destroy(mass_rigid);
-      nmax = atom->nmax;
-      memory->create(mass_rigid, nmax, "pair:mass_rigid");
-    }
-    int nlocal = atom->nlocal;
-    for (i = 0; i < nlocal; i++)
-      if (body[i] >= 0)
-        mass_rigid[i] = mass_body[body[i]];
-      else
-        mass_rigid[i] = 0.0;
-    comm->forward_comm(this);
-  }
-
-  double **x = atom->x;
-  double **v = atom->v;
-  double **f = atom->f;
-  double **angmom = atom->angmom;
-  double **torque = atom->torque;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
-  int newton_pair = force->newton_pair;
-  double *special_lj = force->special_lj;
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-
-  inum = list->inum;
-  ilist = list->ilist;
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
-  firsttouch = fix_history->firstflag;
-  firsthistory = fix_history->firstvalue;
-
-  // loop over neighbors of my atoms
-
-  for (ii = 0; ii < inum; ii++) {
-    i = ilist[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-
-    touch = firsttouch[i];
-    allhistory = firsthistory[i];
-    jlist = firstneigh[i];
-    jnum = numneigh[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
-      j &= NEIGHMASK;
-
-      if (factor_lj == 0) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      radj = radius[j];
-      radsum = radi + radj;
-
-      X0_prev = &allhistory[3 + size_history * jj];
-      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-
-      // TODO: Below could be a `touch()` function
-      bool touching;
-      if (rsq >= radsum * radsum) {
-        touching = false;
-      } else {
-        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-        bool skip_contact_detection(false);
-        if (bounding_box) {
-          separating_axis = &allhistory[7 + size_history * jj];
-          skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-              x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
-        }
-        if (skip_contact_detection)
-          touching = false;
-        else {
-          // superellipsoid contact detection between atoms i and j
-          flagi = bonus[ellipsoid[i]].type;
-          flagj = bonus[ellipsoid[j]].type;
-          if (touch[jj] == 1) {
-            // Continued contact: use grain true shape and last contact point with respect to grain i
-            X0[0] = x[ref_index][0] + X0_prev[0];
-            X0[1] = x[ref_index][1] + X0_prev[1];
-            X0[2] = x[ref_index][2] + X0_prev[2];
-            X0[3] = X0_prev[3];
-            int status = MathExtraSuperellipsoids::determine_contact_point(
-                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
-                contact_formulation);
-            if (status == 0)
-              touching = true;
-            else if (status == 1)
-              touching = false;
-            else
-              error->warning(FLERR,
-                             "Ellipsoid contact detection (old contact) failed"
-                             "between particle {} and particle {}",
-                             atom->tag[i], atom->tag[j]);
-          } else {
-            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
-
-            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
-            // but this is good enough. We might even be able to use radi and radj which is cheaper
-            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-
-            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-              shapei[0] = shapei[1] = shapei[2] = reqi;
-              shapej[0] = shapej[1] = shapej[2] = reqj;
-              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-              // force ellipsoid flag for first initial guess iteration.
-              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(
-                  x[i], Ri, shapei, blocki,
-                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
-                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
-                  contact_formulation);
-              if (status == 0)
-                touching = true;
-              else if (status == 1)
-                touching = false;
-              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
-                // keep trying until last iteration to avoid erroring out too early
-                error->warning(FLERR,
-                               "Ellipsoid contact detection (new contact) failed"
-                               "between particle {} and particle {}",
-                               atom->tag[i], atom->tag[j]);
-              }
-            }
-          }
-        }
-      }
-
-      if (!touching) {
-        // unset non-touching neighbors
-
-        touch[jj] = 0;
-        history = &allhistory[size_history * jj];
-        for (int k = 0; k < size_history; k++) history[k] = 0.0;
-      } else {
-        // Store contact point with respect to grain i for next time step
-        // This is crucial for periodic BCs when grains can move by large amount in one time step
-        // Keeping the previous contact point relative to global frame would lead to bad initial guess
-        X0_prev[0] = X0[0] - x[ref_index][0];
-        X0_prev[1] = X0[1] - x[ref_index][1];
-        X0_prev[2] = X0[2] - x[ref_index][2];
-        X0_prev[3] = X0[3];
-
-        double nji[3] = {-nij[0], -nij[1], -nij[2]};
-        // compute overlap depth along normal direction for each grain
-        // overlap is positive for both grains
-        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
-                                                                      nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
-                                                                      nji, x[j]);
-
-        double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
-        MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
-        MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
-
-        if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-          curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-          curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-        } else {
-          curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-          curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-        }
-
-        polyhertz = sqrt((overlap1 + overlap2) /
-                         (curvature_i + curvature_j));    // hertzian contact radius approximation
-
-        // branch vectors
-        double cr1[3], cr2[3];
-        MathExtra::sub3(X0, x[i], cr1);
-        MathExtra::sub3(X0, x[j], cr2);
-
-        // we need to take the cross product of omega
-
-        double ex_space[3], ey_space[3], ez_space[3];
-        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[i]].inertia, omegai);
-        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[j]].inertia, omegaj);
-
-        double omega_cross_r1[3], omega_cross_r2[3];
-        MathExtra::cross3(omegai, cr1, omega_cross_r1);
-        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-        // relative translational velocity
-        // compute directly the sum of relative translational velocity at contact point
-        // since rotational velocity contribution is different for superellipsoids
-        double cv1[3], cv2[3];
-
-        cv1[0] = v[i][0] + omega_cross_r1[0];
-        cv1[1] = v[i][1] + omega_cross_r1[1];
-        cv1[2] = v[i][2] + omega_cross_r1[2];
-
-        cv2[0] = v[j][0] + omega_cross_r2[0];
-        cv2[1] = v[j][1] + omega_cross_r2[1];
-        cv2[2] = v[j][2] + omega_cross_r2[2];
-
-        // total relavtive velocity at contact point
-        vr1 = cv1[0] - cv2[0];
-        vr2 = cv1[1] - cv2[1];
-        vr3 = cv1[2] - cv2[2];
-
-        // normal component
-
-        vn1 = nij[0] * vr1;    // dot product
-        vn2 = nij[1] * vr2;
-        vn3 = nij[2] * vr3;
-
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
-
-        // tangential component
-
-        vtr1 = vr1 - vnnr * nij[0];
-        vtr2 = vr2 - vnnr * nij[1];
-        vtr3 = vr3 - vnnr * nij[2];
-
-        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-        vrel = sqrt(vrel);
-
-        // meff = effective mass of pair of particles
-        // if I or J part of rigid body, use body mass
-        // if I or J is frozen, meff is other particle
-
-        mi = rmass[i];
-        mj = rmass[j];
-        if (fix_rigid) {
-          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-        }
-
-        meff = mi * mj / (mi + mj);
-        if (mask[i] & freeze_group_bit) meff = mj;
-        if (mask[j] & freeze_group_bit) meff = mi;
-
-        // normal forces = Hertzian contact + normal velocity damping
-
-        damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-        ccel *= polyhertz;
-        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-        // shear history effects
-
-        touch[jj] = 1;
-        shear = &allhistory[size_history * jj];
-
-        if (shearupdate) {
-          shear[0] += vtr1 * dt;
-          shear[1] += vtr2 * dt;
-          shear[2] += vtr3 * dt;
-        }
-        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-        if (shearupdate) {
-
-          // rotate shear displacements
-
-          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
-          shear[0] -= rsht * nij[0];
-          shear[1] -= rsht * nij[1];
-          shear[2] -= rsht * nij[2];
-        }
-
-        // tangential forces = shear + tangential velocity damping
-
-        fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
-        fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
-        fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
-
-        // rescale frictional displacements and forces if needed
-
-        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-        fn = xmu * fabs(ccel);
-
-        if (fs > fn) {
-          if (shrmag != 0.0) {
-            shear[0] =
-                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
-            shear[1] =
-                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
-            shear[2] =
-                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
-            fs1 *= fn / fs;
-            fs2 *= fn / fs;
-            fs3 *= fn / fs;
-          } else
-            fs1 = fs2 = fs3 = 0.0;
-        }
-
-        // forces & torques
-
-        fx = nji[0] * ccel + fs1;
-        fy = nji[1] * ccel + fs2;
-        fz = nji[2] * ccel + fs3;
-        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
-        fy *= factor_lj;
-        fz *= factor_lj;
-        f[i][0] += fx;
-        f[i][1] += fy;
-        f[i][2] += fz;
-
-        // torques are cross prodcuts of branch vector with the entire force at contact point
-
-        tor1 = cr1[1] * fz - cr1[2] * fy;
-        tor2 = cr1[2] * fx - cr1[0] * fz;
-        tor3 = cr1[0] * fy - cr1[1] * fx;
-
-        torque[i][0] += tor1;
-        torque[i][1] += tor2;
-        torque[i][2] += tor3;
-
-        if (newton_pair || j < nlocal) {
-          f[j][0] -= fx;
-          f[j][1] -= fy;
-          f[j][2] -= fz;
-
-          tor1 = cr2[1] * fz - cr2[2] * fy;
-          tor2 = cr2[2] * fx - cr2[0] * fz;
-          tor3 = cr2[0] * fy - cr2[1] * fx;
-
-          torque[j][0] -= tor1;
-          torque[j][1] -= tor2;
-          torque[j][2] -= tor3;
-        }
-
-        if (evflag)
-          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
-                       delz);    // Correct even for non-spherical particles
-      }
-    }
-  }
-
-  if (vflag_fdotr) virial_fdotr_compute();
-}
-
-/* ----------------------------------------------------------------------
-   global settings
-------------------------------------------------------------------------- */
-
-void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
-{
-  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
-
-  kn = utils::numeric(FLERR, arg[0], false, lmp);
-  if (strcmp(arg[1], "NULL") == 0)
-    kt = kn * 2.0 / 7.0;
-  else
-    kt = utils::numeric(FLERR, arg[1], false, lmp);
-
-  gamman = utils::numeric(FLERR, arg[2], false, lmp);
-  if (strcmp(arg[3], "NULL") == 0)
-    gammat = 0.5 * gamman;
-  else
-    gammat = utils::numeric(FLERR, arg[3], false, lmp);
-
-  xmu = utils::numeric(FLERR, arg[4], false, lmp);
-  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
-  if (dampflag == 0) gammat = 0.0;
-
-  limit_damping = 0;
-  bounding_box = 0;
-  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
-
-  for (int iarg = 6; iarg < narg; iarg++) {
-    if (strcmp(arg[iarg], "limit_damping") == 0)
-      limit_damping = 1;
-    else if (strcmp(arg[iarg], "bounding_box") == 0)
-      bounding_box = 1;
-    else if (strcmp(arg[iarg], "geometric") == 0)
-      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-    else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
-      curvature_model = MathExtraSuperellipsoids::CURV_GAUSSIAN;
-    else
-      error->all(FLERR, "Illegal pair_style command");
-  }
-
-  size_history = 8;    // reset to default size
-  if (bounding_box == 0) size_history--;
-
-  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
-      dampflag < 0 || dampflag > 1)
-    error->all(FLERR, "Illegal pair_style command");
-
-  // convert Kn and Kt from pressure units to force/distance^2
-
-  kn /= force->nktv2p;
-  kt /= force->nktv2p;
-}
-
-/* ---------------------------------------------------------------------- */
-
-double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double /*factor_lj*/,
-                                             double &fforce)
-{
-  double radi, radj, radsum;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double mi, mj, meff, damp, ccel;
-  double vtr1, vtr2, vtr3, vrel, shrmag, polyhertz;
-  double fs1, fs2, fs3, fs, fn;
-
-  double *radius = atom->radius;
-  radi = radius[i];
-  radj = radius[j];
-  radsum = radi + radj;
-
-  double **x = atom->x;
-
-  // history effects
-  // neighprev = index of found neigh on previous call
-  // search entire jnum list of neighbors of I for neighbor J
-  // start from neighprev, since will typically be next neighbor
-  // reset neighprev to 0 as necessary
-  int jnum = list->numneigh[i];
-  int *jlist = list->firstneigh[i];
-  int *touch = fix_history->firstflag[i];
-  double *allhistory = fix_history->firstvalue[i];
-  for (int jj = 0; jj < jnum; jj++) {
-    neighprev++;
-    if (neighprev >= jnum) neighprev = 0;
-    if (jlist[neighprev] == j) break;
-  }
-
-  if (rsq >= radsum * radsum) {
-    fforce = 0.0;
-    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-    return 0.0;
-  }
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
-  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-  if (bounding_box) {
-    double separating_axis =
-        allhistory[7 + size_history * neighprev];    // Copy: no update of history in single
-    bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-        x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
-    if (no_bouding_box_contact) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-  }
-  // superellipsoid contact detection between atoms i and j
-  double X0[4], nij[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-  flagi = bonus[ellipsoid[i]].type;
-  flagj = bonus[ellipsoid[j]].type;
-  double *X0_prev = &allhistory[3 + size_history * neighprev];
-  if (touch[neighprev] == 1) {
-    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-    // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0] + x[ref_index][0];
-    X0[1] = X0_prev[1] + x[ref_index][1];
-    X0[2] = X0_prev[2] + x[ref_index][2];
-    X0[3] = X0_prev[3];
-    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
-                                                                   x[j], Rj, shapej, blockj, flagj,
-                                                                   X0, nij, contact_formulation);
-    if (status == 1) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-    if (status != 0)
-      error->all(FLERR,
-                 "Ellipsoid contact detection (old contact) failed"
-                 "between particle {} and particle {}",
-                 atom->tag[i], atom->tag[j]);
-  } else {
-    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-      shapei[0] = shapei[1] = shapei[2] = reqi;
-      shapej[0] = shapej[1] = shapej[2] = reqj;
-      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-      // force ellipsoid flag for first initial guess iteration.
-      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-      int status = MathExtraSuperellipsoids::determine_contact_point(
-          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-          X0, nij, contact_formulation);
-      if (status == 1) {
-        fforce = 0.0;
-        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-        return 0.0;
-      }
-      if (status != 0)
-        error->all(FLERR,
-                   "Ellipsoid contact detection (new contact) failed"
-                   "between particle {} and particle {}",
-                   atom->tag[i], atom->tag[j]);
-    }
-  }
-  double overlap1, overlap2, omegai[3], omegaj[3];
-  double nji[3] = {-nij[0], -nij[1], -nij[2]};
-  overlap1 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-  overlap2 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
-
-  double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
-  MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
-  MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
-
-  if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-    curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri,
-                                                                          surf_point_i, x[i]);
-    curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj,
-                                                                          surf_point_j, x[j]);
-  } else {
-    curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-        shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-    curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-        shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-  }
-
-  polyhertz = sqrt((overlap1 + overlap2) /
-                   (curvature_i + curvature_j));    // hertzian contact radius approximation
-
-  double cr1[3], cr2[3];
-  MathExtra::sub3(X0, x[i], cr1);
-  MathExtra::sub3(X0, x[j], cr2);
-
-  double ex_space[3], ey_space[3], ez_space[3];
-  double **angmom = atom->angmom;
-  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
-                             omegai);
-  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
-                             omegaj);
-
-  double omega_cross_r1[3], omega_cross_r2[3];
-  MathExtra::cross3(omegai, cr1, omega_cross_r1);
-  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-  // relative translational velocity
-  // compute directly the sum of relative translational velocity at contact point
-  // since rotational velocity contribution is different for superellipsoids
-
-  double **v = atom->v;
-  double cv1[3], cv2[3];
-
-  cv1[0] = v[i][0] + omega_cross_r1[0];
-  cv1[1] = v[i][1] + omega_cross_r1[1];
-  cv1[2] = v[i][2] + omega_cross_r1[2];
-
-  cv2[0] = v[j][0] + omega_cross_r2[0];
-  cv2[1] = v[j][1] + omega_cross_r2[1];
-  cv2[2] = v[j][2] + omega_cross_r2[2];
-
-  // total relavtive velocity at contact point
-
-  vr1 = cv1[0] - cv2[0];
-  vr2 = cv1[1] - cv2[1];
-  vr3 = cv1[2] - cv2[2];
-
-  // normal component
-
-  vn1 = nij[0] * vr1;    // dot product
-  vn2 = nij[1] * vr2;
-  vn3 = nij[2] * vr3;
-
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
-
-  // tangential component
-
-  vtr1 = vr1 - vnnr * nij[0];
-  vtr2 = vr2 - vnnr * nij[1];
-  vtr3 = vr3 - vnnr * nij[2];
-
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
-
-  // meff = effective mass of pair of particles
-  // if I or J part of rigid body, use body mass
-  // if I or J is frozen, meff is other particle
-  double *rmass = atom->rmass;
-  int *mask = atom->mask;
-
-  mi = rmass[i];
-  mj = rmass[j];
-  if (fix_rigid) {
-    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-  }
-
-  meff = mi * mj / (mi + mj);
-  if (mask[i] & freeze_group_bit) meff = mj;
-  if (mask[j] & freeze_group_bit) meff = mi;
-
-  // normal forces = Hookian contact + normal velocity damping
-
-  damp = meff * gamman * vnnr;
-  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-  ccel *= polyhertz;
-  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-  double *shear = &allhistory[size_history * neighprev];
-  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-  // tangential forces = shear + tangential velocity damping
-
-  fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
-  fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
-  fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
-
-  // rescale frictional displacements and forces if needed
-
-  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  fn = xmu * fabs(ccel);
-
-  if (fs > fn) {
-    if (shrmag != 0.0) {
-      fs1 *= fn / fs;
-      fs2 *= fn / fs;
-      fs3 *= fn / fs;
-      fs *= fn / fs;
-    } else
-      fs1 = fs2 = fs3 = 0.0;
-  }
-
-  // set force (normalized by r) and return no energy
-
-  fforce = ccel / sqrt(rsq);
-
-  // set single_extra quantities
-
-  svector[0] = fs1;
-  svector[1] = fs2;
-  svector[2] = fs3;
-  svector[3] = fs;
-  svector[4] = vn1;
-  svector[5] = vn2;
-  svector[6] = vn3;
-  svector[7] = vtr1;
-  svector[8] = vtr2;
-  svector[9] = vtr3;
-
-  return 0.0;
-}
diff --git a/src/pair_gran_hertz_history_ellipsoid.h b/src/pair_gran_hertz_history_ellipsoid.h
deleted file mode 100644
index b02f6b733e7..00000000000
--- a/src/pair_gran_hertz_history_ellipsoid.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#ifdef PAIR_CLASS
-// clang-format off
-PairStyle(gran/hertz/history/ellipsoid,PairGranHertzHistoryEllipsoid);
-// clang-format on
-#else
-
-#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
-#define LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
-
-#include "pair_gran_hooke_history_ellipsoid.h"
-
-namespace LAMMPS_NS {
-
-class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
- public:
-  PairGranHertzHistoryEllipsoid(class LAMMPS *);
-  void compute(int, int) override;
-  void settings(int, char **) override;
-  double single(int, int, int, int, double, double, double, double &) override;
-
- protected:
-  int curvature_model;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/pair_gran_hooke_history_ellipsoid.cpp b/src/pair_gran_hooke_history_ellipsoid.cpp
deleted file mode 100644
index b1696703d50..00000000000
--- a/src/pair_gran_hooke_history_ellipsoid.cpp
+++ /dev/null
@@ -1,1082 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#include "pair_gran_hooke_history_ellipsoid.h"
-
-#include "atom.h"
-#include "atom_vec_ellipsoid.h"
-#include "comm.h"
-#include "error.h"
-#include "fix.h"
-#include "fix_dummy.h"
-#include "fix_neigh_history.h"
-#include "force.h"
-#include "math_extra.h"    // probably needed for some computations
-#include "math_extra_superellipsoids.h"
-#include "memory.h"
-#include "modify.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-#include "update.h"
-#include <iostream>
-
-#include <cmath>
-#include <cstring>
-
-using namespace LAMMPS_NS;
-
-static constexpr int NUMSTEP_INITIAL_GUESS = 5;
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair(lmp)
-{
-  single_enable = 1;
-  no_virial_fdotr_compute = 1;
-  centroidstressflag = CENTROID_NOTAVAIL;
-  finitecutflag = 1;
-  use_history = 1;
-  size_history =
-      8;    // shear[3], contact_point_and_Lagrange_multiplier[4], bounding_box_separating_axis_index
-
-  single_extra = 10;
-  svector = new double[10];
-
-  neighprev = 0;
-
-  nmax = 0;
-  mass_rigid = nullptr;
-
-  // set comm size needed by this Pair if used with fix rigid
-
-  comm_forward = 1;
-
-  // keep default behavior of history[i][j] = -history[j][i]
-
-  nondefault_history_transfer = 1;
-
-  // create dummy fix as placeholder for FixNeighHistory
-  // this is so final order of Modify:fix will conform to input script
-
-  fix_history = nullptr;
-  fix_dummy = dynamic_cast<FixDummy *>(
-      modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
-
-  contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
-}
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHookeHistoryEllipsoid::~PairGranHookeHistoryEllipsoid()
-{
-  if (copymode) return;
-
-  delete[] svector;
-
-  if (!fix_history)
-    modify->delete_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me));
-  else
-    modify->delete_fix("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me));
-
-  if (allocated) {
-    memory->destroy(setflag);
-    memory->destroy(cutsq);
-
-    delete[] onerad_dynamic;
-    delete[] onerad_frozen;
-    delete[] maxrad_dynamic;
-    delete[] maxrad_frozen;
-  }
-
-  memory->destroy(mass_rigid);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
-{
-  int i, j, ii, jj, inum, jnum;
-  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
-  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3;
-  double vtr1, vtr2, vtr3, vrel;
-  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
-  double fn, fs, fs1, fs2, fs3;
-  double shrmag, rsht;
-  int *ilist, *jlist, *numneigh, **firstneigh;
-  int *touch, **firsttouch;
-  double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
-
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
-      overlap2, omegai[3], omegaj[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-
-  ev_init(eflag, vflag);
-
-  int shearupdate = 1;
-  if (update->setupflag) shearupdate = 0;
-
-  // update rigid body info for owned & ghost atoms if using FixRigid masses
-  // body[i] = which body atom I is in, -1 if none
-  // mass_body = mass of each rigid body
-
-  if (fix_rigid && neighbor->ago == 0) {
-    int tmp;
-    int *body = (int *) fix_rigid->extract("body", tmp);
-    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
-    if (atom->nmax > nmax) {
-      memory->destroy(mass_rigid);
-      nmax = atom->nmax;
-      memory->create(mass_rigid, nmax, "pair:mass_rigid");
-    }
-    int nlocal = atom->nlocal;
-    for (i = 0; i < nlocal; i++)
-      if (body[i] >= 0)
-        mass_rigid[i] = mass_body[body[i]];
-      else
-        mass_rigid[i] = 0.0;
-    comm->forward_comm(this);
-  }
-
-  double **x = atom->x;
-  double **v = atom->v;
-  double **f = atom->f;
-  double **angmom = atom->angmom;
-  double **torque = atom->torque;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
-  int newton_pair = force->newton_pair;
-  double *special_lj = force->special_lj;
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-
-  inum = list->inum;
-  ilist = list->ilist;
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
-  firsttouch = fix_history->firstflag;
-  firsthistory = fix_history->firstvalue;
-
-  // loop over neighbors of my atoms
-
-  for (ii = 0; ii < inum; ii++) {
-    i = ilist[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-
-    touch = firsttouch[i];
-    allhistory = firsthistory[i];
-    jlist = firstneigh[i];
-    jnum = numneigh[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
-      j &= NEIGHMASK;
-
-      if (factor_lj == 0) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      radj = radius[j];
-      radsum = radi + radj;
-
-      X0_prev = &allhistory[3 + size_history * jj];
-      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-
-      // TODO: Below could be a `touch()` function
-      bool touching;
-      if (rsq >= radsum * radsum) {
-        touching = false;
-      } else {
-        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-        bool skip_contact_detection(false);
-        if (bounding_box) {
-          separating_axis = &allhistory[7 + size_history * jj];
-          skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-              x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
-        }
-        if (skip_contact_detection)
-          touching = false;
-        else {
-          // superellipsoid contact detection between atoms i and j
-          flagi = bonus[ellipsoid[i]].type;
-          flagj = bonus[ellipsoid[j]].type;
-          if (touch[jj] == 1) {
-            // Continued contact: use grain true shape and last contact point with respect to grain i
-            X0[0] = x[ref_index][0] + X0_prev[0];
-            X0[1] = x[ref_index][1] + X0_prev[1];
-            X0[2] = x[ref_index][2] + X0_prev[2];
-            X0[3] = X0_prev[3];
-            // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
-            //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
-            int status = MathExtraSuperellipsoids::determine_contact_point(
-                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
-                contact_formulation);
-            if (status == 0)
-              touching = true;
-            else if (status == 1)
-              touching = false;
-            else {
-              error->warning(FLERR,
-                             "Ellipsoid contact detection (old contact) failed "
-                             "between particle {} and particle {} ",
-                             atom->tag[i], atom->tag[j]);
-            }
-          } else {
-            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
-
-            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
-            // but this is good enough. We might even be able to use radi and radj which is cheaper
-            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-
-            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-              shapei[0] = shapei[1] = shapei[2] = reqi;
-              shapej[0] = shapej[1] = shapej[2] = reqj;
-              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-              // force ellipsoid flag for first initial guess iteration.
-              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(
-                  x[i], Ri, shapei, blocki,
-                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
-                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
-                  contact_formulation);
-
-              if (status == 0)
-                touching = true;
-              else if (status == 1)
-                touching = false;
-              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
-                // keep trying until last iteration to avoid erroring out too early
-                error->warning(FLERR,
-                               "Ellipsoid contact detection (new contact) failed"
-                               "between particle {} and particle {}",
-                               atom->tag[i], atom->tag[j]);
-              }
-            }
-          }
-        }
-      }
-
-      if (!touching) {
-        // unset non-touching neighbors
-
-        touch[jj] = 0;
-        history = &allhistory[size_history * jj];
-        for (int k = 0; k < size_history; k++) history[k] = 0.0;
-      } else {
-        // Store contact point with respect to grain i for next time step
-        // This is crucial for periodic BCs when grains can move by large amount in one time step
-        // Keeping the previous contact point relative to global frame would lead to bad initial guess
-        X0_prev[0] = X0[0] - x[ref_index][0];
-        X0_prev[1] = X0[1] - x[ref_index][1];
-        X0_prev[2] = X0[2] - x[ref_index][2];
-        X0_prev[3] = X0[3];
-
-        double nji[3] = {-nij[0], -nij[1], -nij[2]};
-        // compute overlap depth along normal direction for each grain
-        // overlap is positive for both grains
-        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
-                                                                      nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
-                                                                      nji, x[j]);
-
-        // branch vectors
-        double cr1[3], cr2[3];
-        MathExtra::sub3(X0, x[i], cr1);
-        MathExtra::sub3(X0, x[j], cr2);
-
-        // we need to take the cross product of omega
-
-        double ex_space[3], ey_space[3], ez_space[3];
-        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[i]].inertia, omegai);
-        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[j]].inertia, omegaj);
-
-        double omega_cross_r1[3], omega_cross_r2[3];
-        MathExtra::cross3(omegai, cr1, omega_cross_r1);
-        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-        // relative translational velocity
-        // compute directly the sum of relative translational velocity at contact point
-        // since rotational velocity contribution is different for superellipsoids
-        double cv1[3], cv2[3];
-
-        cv1[0] = v[i][0] + omega_cross_r1[0];
-        cv1[1] = v[i][1] + omega_cross_r1[1];
-        cv1[2] = v[i][2] + omega_cross_r1[2];
-
-        cv2[0] = v[j][0] + omega_cross_r2[0];
-        cv2[1] = v[j][1] + omega_cross_r2[1];
-        cv2[2] = v[j][2] + omega_cross_r2[2];
-
-        // total relavtive velocity at contact point
-        vr1 = cv1[0] - cv2[0];
-        vr2 = cv1[1] - cv2[1];
-        vr3 = cv1[2] - cv2[2];
-
-        // normal component
-
-        vn1 = nij[0] * vr1;    // dot product
-        vn2 = nij[1] * vr2;
-        vn3 = nij[2] * vr3;
-
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
-
-        // tangential component
-
-        vtr1 = vr1 - vnnr * nij[0];
-        vtr2 = vr2 - vnnr * nij[1];
-        vtr3 = vr3 - vnnr * nij[2];
-
-        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-        vrel = sqrt(vrel);
-
-        // meff = effective mass of pair of particles
-        // if I or J part of rigid body, use body mass
-        // if I or J is frozen, meff is other particle
-
-        mi = rmass[i];
-        mj = rmass[j];
-        if (fix_rigid) {
-          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-        }
-
-        meff = mi * mj / (mi + mj);
-        if (mask[i] & freeze_group_bit) meff = mj;
-        if (mask[j] & freeze_group_bit) meff = mi;
-
-        // normal forces = Hookian contact + normal velocity damping
-
-        damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-        // shear history effects
-
-        touch[jj] = 1;
-        shear = &allhistory[size_history * jj];
-
-        if (shearupdate) {
-          shear[0] += vtr1 * dt;
-          shear[1] += vtr2 * dt;
-          shear[2] += vtr3 * dt;
-        }
-        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-        if (shearupdate) {
-
-          // rotate shear displacements
-
-          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
-          shear[0] -= rsht * nij[0];
-          shear[1] -= rsht * nij[1];
-          shear[2] -= rsht * nij[2];
-        }
-
-        // tangential forces = shear + tangential velocity damping
-
-        fs1 = -(kt * shear[0] + meff * gammat * vtr1);
-        fs2 = -(kt * shear[1] + meff * gammat * vtr2);
-        fs3 = -(kt * shear[2] + meff * gammat * vtr3);
-
-        // rescale frictional displacements and forces if needed
-
-        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-        fn = xmu * fabs(ccel);
-
-        if (fs > fn) {
-          if (shrmag != 0.0) {
-            shear[0] =
-                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
-            shear[1] =
-                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
-            shear[2] =
-                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
-            fs1 *= fn / fs;
-            fs2 *= fn / fs;
-            fs3 *= fn / fs;
-          } else
-            fs1 = fs2 = fs3 = 0.0;
-        }
-
-        // forces & torques
-
-        fx = nji[0] * ccel + fs1;
-        fy = nji[1] * ccel + fs2;
-        fz = nji[2] * ccel + fs3;
-        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
-        fy *= factor_lj;
-        fz *= factor_lj;
-        f[i][0] += fx;
-        f[i][1] += fy;
-        f[i][2] += fz;
-        // torques are cross prodcuts of branch vector with the entire force at contact point
-
-        tor1 = cr1[1] * fz - cr1[2] * fy;
-        tor2 = cr1[2] * fx - cr1[0] * fz;
-        tor3 = cr1[0] * fy - cr1[1] * fx;
-
-        torque[i][0] += tor1;
-        torque[i][1] += tor2;
-        torque[i][2] += tor3;
-
-        if (newton_pair || j < nlocal) {
-          f[j][0] -= fx;
-          f[j][1] -= fy;
-          f[j][2] -= fz;
-
-          tor1 = cr2[1] * fz - cr2[2] * fy;
-          tor2 = cr2[2] * fx - cr2[0] * fz;
-          tor3 = cr2[0] * fy - cr2[1] * fx;
-
-          torque[j][0] -= tor1;
-          torque[j][1] -= tor2;
-          torque[j][2] -= tor3;
-        }
-
-        if (evflag)
-          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
-                       delz);    // Correct even for non-spherical particles
-      }
-    }
-  }
-
-  if (vflag_fdotr) virial_fdotr_compute();
-}
-
-/* ----------------------------------------------------------------------
-   allocate all arrays
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::allocate()
-{
-  allocated = 1;
-  int n = atom->ntypes;
-
-  memory->create(setflag, n + 1, n + 1, "pair:setflag");
-  for (int i = 1; i <= n; i++)
-    for (int j = i; j <= n; j++) setflag[i][j] = 0;
-
-  memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
-
-  onerad_dynamic = new double[n + 1];
-  onerad_frozen = new double[n + 1];
-  maxrad_dynamic = new double[n + 1];
-  maxrad_frozen = new double[n + 1];
-}
-
-/* ----------------------------------------------------------------------
-   global settings
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
-{
-  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
-
-  kn = utils::numeric(FLERR, arg[0], false, lmp);
-  if (strcmp(arg[1], "NULL") == 0)
-    kt = kn * 2.0 / 7.0;
-  else
-    kt = utils::numeric(FLERR, arg[1], false, lmp);
-
-  gamman = utils::numeric(FLERR, arg[2], false, lmp);
-  if (strcmp(arg[3], "NULL") == 0)
-    gammat = 0.5 * gamman;
-  else
-    gammat = utils::numeric(FLERR, arg[3], false, lmp);
-
-  xmu = utils::numeric(FLERR, arg[4], false, lmp);
-  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
-  if (dampflag == 0) gammat = 0.0;
-
-  limit_damping = 0;
-  bounding_box = 0;
-  for (int iarg = 6; iarg < narg; iarg++) {
-    if (strcmp(arg[iarg], "limit_damping") == 0)
-      limit_damping = 1;
-    else if (strcmp(arg[iarg], "bounding_box") == 0)
-      bounding_box = 1;
-    else if (strcmp(arg[iarg], "geometric") == 0)
-      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-    else
-      error->all(FLERR, "Illegal pair_style command");
-  }
-
-  size_history = 8;    // reset to default for safety
-  if (bounding_box == 0) size_history--;
-
-  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
-      dampflag < 0 || dampflag > 1)
-    error->all(FLERR, "Illegal pair_style command");
-}
-
-/* ----------------------------------------------------------------------
-   set coeffs for one or more type pairs
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::coeff(int narg, char **arg)
-{
-  if (narg > 2) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
-  if (!allocated) allocate();
-
-  int ilo, ihi, jlo, jhi;
-  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
-  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
-
-  int count = 0;
-  for (int i = ilo; i <= ihi; i++) {
-    for (int j = MAX(jlo, i); j <= jhi; j++) {
-      setflag[i][j] = 1;
-      count++;
-    }
-  }
-
-  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
-}
-
-/* ----------------------------------------------------------------------
-   init specific to this pair style
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::init_style()
-{
-  int i;
-
-  // error and warning checks
-
-  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
-    error->all(FLERR,
-               "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and "
-               "superellipdoid flag");
-  if (comm->ghost_velocity == 0)
-    error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
-
-  // ensure all atoms have an allocated ellipsoid bonus structure (ellipsoidflag > 0)
-  int *ellipsoid = atom->ellipsoid;
-  if (!ellipsoid) error->all(FLERR, "Pair gran/h/ellipsoid* requires atom style ellipsoid");
-
-  int nlocal = atom->nlocal;
-  for (i = 0; i < nlocal; i++) {
-    if (ellipsoid[i] < 0) {
-      error->one(FLERR, "Pair gran/h/ellipsoid* requires all atoms to have ellipsoidflag = 1");
-    }
-  }
-
-  // need a granular neighbor list
-
-  if (use_history)
-    neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_HISTORY);
-  else
-    neighbor->add_request(this, NeighConst::REQ_SIZE);
-
-  dt = update->dt;
-
-  // if history is stored and first init, create Fix to store history
-  // it replaces FixDummy, created in the constructor
-  // this is so its order in the fix list is preserved
-
-  if (use_history && (fix_history == nullptr)) {
-    auto cmd =
-        fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
-    fix_history = dynamic_cast<FixNeighHistory *>(
-        modify->replace_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me), cmd, 1));
-    fix_history->pair = this;
-  }
-
-  // check for FixFreeze and set freeze_group_bit
-
-  auto fixlist = modify->get_fix_by_style("^freeze");
-  if (fixlist.size() == 0)
-    freeze_group_bit = 0;
-  else if (fixlist.size() > 1)
-    error->all(FLERR, "Only one fix freeze command at a time allowed");
-  else
-    freeze_group_bit = fixlist.front()->groupbit;
-
-  // check for FixRigid so can extract rigid body masses
-
-  fix_rigid = nullptr;
-  for (const auto &ifix : modify->get_fix_list()) {
-    if (ifix->rigid_flag) {
-      if (fix_rigid)
-        error->all(FLERR, "Only one fix rigid command at a time allowed");
-      else
-        fix_rigid = ifix;
-    }
-  }
-
-  // check for FixPour and FixDeposit so can extract particle radii
-
-  auto pours = modify->get_fix_by_style("^pour");
-  auto deps = modify->get_fix_by_style("^deposit");
-
-  // set maxrad_dynamic and maxrad_frozen for each type
-  // include future FixPour and FixDeposit particles as dynamic
-
-  int itype;
-  for (i = 1; i <= atom->ntypes; i++) {
-    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
-    for (auto &ipour : pours) {
-      itype = i;
-      double maxrad = *((double *) ipour->extract("radius", itype));
-      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
-    }
-    for (auto &idep : deps) {
-      itype = i;
-      double maxrad = *((double *) idep->extract("radius", itype));
-      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
-    }
-  }
-
-  // since for ellipsoids radius is the maximum of the three axes, no need to change this part
-
-  double *radius = atom->radius;
-  int *mask = atom->mask;
-  int *type = atom->type;
-
-  for (i = 0; i < nlocal; i++) {
-    if (mask[i] & freeze_group_bit)
-      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]], radius[i]);
-    else
-      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
-  }
-
-  MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
-  MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
-
-  // set fix which stores history info
-
-  if (use_history) {
-    fix_history = dynamic_cast<FixNeighHistory *>(
-        modify->get_fix_by_id("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me)));
-    if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
-  }
-}
-
-/* ----------------------------------------------------------------------
-   init for one type pair i,j and corresponding j,i
-------------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::init_one(int i, int j)
-{
-  if (!allocated) allocate();
-
-  // cutoff = sum of max I,J radii for
-  // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
-
-  double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
-  cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
-  cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
-  return cutoff;
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::write_restart(FILE *fp)
-{
-  write_restart_settings(fp);
-
-  int i, j;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) fwrite(&setflag[i][j], sizeof(int), 1, fp);
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::read_restart(FILE *fp)
-{
-  read_restart_settings(fp);
-  allocate();
-
-  int i, j;
-  int me = comm->me;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) {
-      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
-      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
-    }
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::write_restart_settings(FILE *fp)
-{
-  fwrite(&kn, sizeof(double), 1, fp);
-  fwrite(&kt, sizeof(double), 1, fp);
-  fwrite(&gamman, sizeof(double), 1, fp);
-  fwrite(&gammat, sizeof(double), 1, fp);
-  fwrite(&xmu, sizeof(double), 1, fp);
-  fwrite(&dampflag, sizeof(int), 1, fp);
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::read_restart_settings(FILE *fp)
-{
-  if (comm->me == 0) {
-    utils::sfread(FLERR, &kn, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &kt, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &gamman, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &gammat, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &xmu, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &dampflag, sizeof(int), 1, fp, nullptr, error);
-  }
-  MPI_Bcast(&kn, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&kt, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&gamman, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&gammat, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&xmu, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&dampflag, 1, MPI_INT, 0, world);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::reset_dt()
-{
-  dt = update->dt;
-}
-
-/* ---------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double /*factor_lj*/,
-                                             double &fforce)
-{
-  double radi, radj, radsum;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double mi, mj, meff, damp, ccel;
-  double vtr1, vtr2, vtr3, vrel, shrmag;
-  double fs1, fs2, fs3, fs, fn;
-
-  double *radius = atom->radius;
-  radi = radius[i];
-  radj = radius[j];
-  radsum = radi + radj;
-
-  double **x = atom->x;
-
-  // history effects
-  // neighprev = index of found neigh on previous call
-  // search entire jnum list of neighbors of I for neighbor J
-  // start from neighprev, since will typically be next neighbor
-  // reset neighprev to 0 as necessary
-  int jnum = list->numneigh[i];
-  int *jlist = list->firstneigh[i];
-  int *touch = fix_history->firstflag[i];
-  double *allhistory = fix_history->firstvalue[i];
-  for (int jj = 0; jj < jnum; jj++) {
-    neighprev++;
-    if (neighprev >= jnum) neighprev = 0;
-    if (jlist[neighprev] == j) break;
-  }
-
-  if (rsq >= radsum * radsum) {
-    fforce = 0.0;
-    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-    return 0.0;
-  }
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
-  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-  if (bounding_box) {
-    double separating_axis =
-        allhistory[7 + size_history * neighprev];    // Copy: no update of history in single
-    bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-        x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
-    if (no_bouding_box_contact) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-  }
-  // superellipsoid contact detection between atoms i and j
-  double X0[4], nij[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-  flagi = bonus[ellipsoid[i]].type;
-  flagj = bonus[ellipsoid[j]].type;
-  double *X0_prev = &allhistory[3 + size_history * neighprev];
-  if (touch[neighprev] == 1) {
-    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-    // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0] + x[ref_index][0];
-    X0[1] = X0_prev[1] + x[ref_index][1];
-    X0[2] = X0_prev[2] + x[ref_index][2];
-    X0[3] = X0_prev[3];
-    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
-                                                                   x[j], Rj, shapej, blockj, flagj,
-                                                                   X0, nij, contact_formulation);
-    if (status == 1) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-    if (status != 0)
-      error->warning(FLERR,
-                     "Ellipsoid contact detection (old contact) failed"
-                     "between particle {} and particle {}",
-                     atom->tag[i], atom->tag[j]);
-  } else {
-    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-      shapei[0] = shapei[1] = shapei[2] = reqi;
-      shapej[0] = shapej[1] = shapej[2] = reqj;
-      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-      // force ellipsoid flag for first initial guess iteration.
-      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-      int status = MathExtraSuperellipsoids::determine_contact_point(
-          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-          X0, nij, contact_formulation);
-      if (status == 1) {
-        fforce = 0.0;
-        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-        return 0.0;
-      }
-      if (status != 0)
-        error->one(FLERR,
-                   "Ellipsoid contact detection (new contact) failed"
-                   "between particle {} and particle {}",
-                   atom->tag[i], atom->tag[j]);
-    }
-  }
-  double overlap1, overlap2, omegai[3], omegaj[3];
-  double nji[3] = {-nij[0], -nij[1], -nij[2]};
-  overlap1 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-  overlap2 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
-
-  double cr1[3], cr2[3];
-  MathExtra::sub3(X0, x[i], cr1);
-  MathExtra::sub3(X0, x[j], cr2);
-
-  double ex_space[3], ey_space[3], ez_space[3];
-  double **angmom = atom->angmom;
-  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
-                             omegai);
-  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
-                             omegaj);
-
-  double omega_cross_r1[3], omega_cross_r2[3];
-  MathExtra::cross3(omegai, cr1, omega_cross_r1);
-  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-  // relative translational velocity
-  // compute directly the sum of relative translational velocity at contact point
-  // since rotational velocity contribution is different for superellipsoids
-
-  double **v = atom->v;
-  double cv1[3], cv2[3];
-
-  cv1[0] = v[i][0] + omega_cross_r1[0];
-  cv1[1] = v[i][1] + omega_cross_r1[1];
-  cv1[2] = v[i][2] + omega_cross_r1[2];
-
-  cv2[0] = v[j][0] + omega_cross_r2[0];
-  cv2[1] = v[j][1] + omega_cross_r2[1];
-  cv2[2] = v[j][2] + omega_cross_r2[2];
-
-  // total relavtive velocity at contact point
-
-  vr1 = cv1[0] - cv2[0];
-  vr2 = cv1[1] - cv2[1];
-  vr3 = cv1[2] - cv2[2];
-
-  // normal component
-
-  vn1 = nij[0] * vr1;    // dot product
-  vn2 = nij[1] * vr2;
-  vn3 = nij[2] * vr3;
-
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
-
-  // tangential component
-
-  vtr1 = vr1 - vnnr * nij[0];
-  vtr2 = vr2 - vnnr * nij[1];
-  vtr3 = vr3 - vnnr * nij[2];
-
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
-
-  // meff = effective mass of pair of particles
-  // if I or J part of rigid body, use body mass
-  // if I or J is frozen, meff is other particle
-  double *rmass = atom->rmass;
-  int *mask = atom->mask;
-
-  mi = rmass[i];
-  mj = rmass[j];
-  if (fix_rigid) {
-    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-  }
-
-  meff = mi * mj / (mi + mj);
-  if (mask[i] & freeze_group_bit) meff = mj;
-  if (mask[j] & freeze_group_bit) meff = mi;
-
-  // normal forces = Hookian contact + normal velocity damping
-
-  damp = meff * gamman * vnnr;
-  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-  double *shear = &allhistory[size_history * neighprev];
-  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-  // tangential forces = shear + tangential velocity damping
-
-  fs1 = -(kt * shear[0] + meff * gammat * vtr1);
-  fs2 = -(kt * shear[1] + meff * gammat * vtr2);
-  fs3 = -(kt * shear[2] + meff * gammat * vtr3);
-
-  // rescale frictional displacements and forces if needed
-
-  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  fn = xmu * fabs(ccel);
-
-  if (fs > fn) {
-    if (shrmag != 0.0) {
-      fs1 *= fn / fs;
-      fs2 *= fn / fs;
-      fs3 *= fn / fs;
-      fs *= fn / fs;
-    } else
-      fs1 = fs2 = fs3 = 0.0;
-  }
-
-  // set force (normalized by r) and return no energy
-
-  fforce = ccel / sqrt(rsq);
-
-  // set single_extra quantities
-
-  svector[0] = fs1;
-  svector[1] = fs2;
-  svector[2] = fs3;
-  svector[3] = fs;
-  svector[4] = vn1;
-  svector[5] = vn2;
-  svector[6] = vn3;
-  svector[7] = vtr1;
-  svector[8] = vtr2;
-  svector[9] = vtr3;
-
-  return 0.0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int PairGranHookeHistoryEllipsoid::pack_forward_comm(int n, int *list, double *buf,
-                                                     int /*pbc_flag*/, int * /*pbc*/)
-{
-  int i, j, m;
-
-  m = 0;
-  for (i = 0; i < n; i++) {
-    j = list[i];
-    buf[m++] = mass_rigid[j];
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::unpack_forward_comm(int n, int first, double *buf)
-{
-  int i, m, last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) mass_rigid[i] = buf[m++];
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local atom-based arrays
-------------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::memory_usage()
-{
-  double bytes = (double) nmax * sizeof(double);
-  return bytes;
-}
-
-void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *target, int /*itype*/,
-                                                     int /*jtype*/)
-{
-  // Simple direct copy of all history variables (shear, contact point, axis)
-  for (int i = 0; i < size_history; i++) {
-    if (i < 3) target[i] = -source[i]; //shear
-    target[i] = source[i];
-  }
-}
diff --git a/src/pair_gran_hooke_history_ellipsoid.h b/src/pair_gran_hooke_history_ellipsoid.h
deleted file mode 100644
index e3bcf3e038c..00000000000
--- a/src/pair_gran_hooke_history_ellipsoid.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#ifdef PAIR_CLASS
-// clang-format off
-PairStyle(gran/hooke/history/ellipsoid,PairGranHookeHistoryEllipsoid);
-// clang-format on
-#else
-
-#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
-#define LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
-
-#include "pair.h"
-
-namespace LAMMPS_NS {
-
-class PairGranHookeHistoryEllipsoid : public Pair {
- public:
-  PairGranHookeHistoryEllipsoid(class LAMMPS *);
-  ~PairGranHookeHistoryEllipsoid() override;
-  void compute(int, int) override;
-  void settings(int, char **) override;
-  void coeff(int, char **) override;
-  void init_style() override;
-  double init_one(int, int) override;
-  void write_restart(FILE *) override;
-  void read_restart(FILE *) override;
-  void write_restart_settings(FILE *) override;
-  void read_restart_settings(FILE *) override;
-  void reset_dt() override;
-  double single(int, int, int, int, double, double, double, double &) override;
-  int pack_forward_comm(int, int *, double *, int, int *) override;
-  void unpack_forward_comm(int, int, double *) override;
-  double memory_usage() override;
-  void transfer_history(double *, double *, int, int) override;
-
- protected:
-  double kn, kt, gamman, gammat, xmu;
-  int dampflag;
-  double dt;
-  int freeze_group_bit;
-  int use_history;
-  int limit_damping;
-  int bounding_box;
-
-  int neighprev;
-  double *onerad_dynamic, *onerad_frozen;
-  double *maxrad_dynamic, *maxrad_frozen;
-
-  int size_history;
-
-  class FixDummy *fix_dummy;
-  class FixNeighHistory *fix_history;
-
-  // storage of rigid body masses for use in granular interactions
-
-  class Fix *fix_rigid;    // ptr to rigid body fix, null pointer if none
-  double *mass_rigid;      // rigid mass for owned+ghost atoms
-  int nmax;                // allocated size of mass_rigid
-
-  int contact_formulation;
-
-  void allocate();
-
- private:
-  // Below not implemented. Placeholder if we decide not to compute local hessian in line search
-  static double
-  shape_and_gradient_local(const double *, const double *, const double *,
-                           double *);    // would return a vector of temporary variables
-  static double hessian_local(
-      const double *, const double *, const double *,
-      double *);    // would use the above vector of temporary variables to compute local hessian
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif

From 488c2d34e33808f158baf4fdef780c2c497701f6 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Fri, 13 Mar 2026 10:46:44 -0600
Subject: [PATCH 137/174] Forgot to update transfer_history method

---
 src/GRANULAR/pair_granular_superellipsoid.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index e1919f873ac..7afa929eed4 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -895,14 +895,16 @@ void PairGranularSuperellipsoid::unpack_forward_comm(int n, int first, double *b
    Transfer history
 ------------------------------------------------------------------------- */
 
-void PairGranularSuperellipsoid::transfer_history(double *source, double *target, int /*itype*/,
-                                                     int /*jtype*/)
+void PairGranularSuperellipsoid::transfer_history(double *source, double *target, int itype, int jtype)
 {
   // copy of all history variables (shear, contact point, axis)
-  // TODO: only shear needs to be reversed?
+
   for (int i = 0; i < size_history; i++) {
-    if (i < 3) target[i] = -source[i]; //shear
-    target[i] = source[i];
+    if (i > default_hist_size && tangential_model[itype][jtype] == LINEAR_HISTORY) {
+      target[i] = -source[i]; //shear
+    } else {
+      target[i] = source[i];
+    }
   }
 }
 

From 3788d322911ebd37db19a6fb6f1d46570b3aa805 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Mon, 16 Mar 2026 12:53:18 -0600
Subject: [PATCH 138/174] Patching bugs, aligning more with original granular,
 adding more submodels

---
 src/GRANULAR/pair_granular_superellipsoid.cpp | 256 ++++++++++++------
 src/GRANULAR/pair_granular_superellipsoid.h   |   5 +-
 2 files changed, 171 insertions(+), 90 deletions(-)

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 7afa929eed4..560d252afe8 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -40,10 +40,11 @@ using namespace LAMMPS_NS;
 using namespace MathExtra;
 
 enum { HOOKE, HERTZ };
-enum { MASS_VELOCITY };
-enum { LINEAR_HISTORY };
+enum { MASS_VELOCITY , VISCOELASTIC};
+enum { CLASSIC , LINEAR_HISTORY};
 
 static constexpr int NUMSTEP_INITIAL_GUESS = 5;
+static constexpr double EPSILON = 1e-10;
 
 /* ---------------------------------------------------------------------- */
 
@@ -401,7 +402,7 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
     if (kn_one < 0.0 || gamman_one < 0.0) error->all(FLERR, "Illegal linear normal model");
     iarg += 3;
   } else if (strcmp(arg[iarg], "hertz") == 0) {
-    normal_one = HOOKE;
+    normal_one = HERTZ;
     if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "pair granular/superellipsoid", error);
     kn_one = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
     gamman_one = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
@@ -424,6 +425,14 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
         xmu_one = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
         if (kt_one < 0.0 || xt_one < 0.0 || xmu_one < 0.0) error->all(FLERR, "Illegal linear tangential model");
         iarg += 4;
+      } else if (strcmp(arg[iarg], "classic") == 0) {
+        tangential_one = CLASSIC;
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "pair granular/superellipsoid", error);
+        kt_one = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+        xt_one = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        xmu_one = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+        if (kt_one < 0.0 || xt_one < 0.0 || xmu_one < 0.0) error->all(FLERR, "Illegal linear tangential model");
+        iarg += 4;
       } else {
         error->all(FLERR, "Unknown normal model {}", arg[iarg]);
       }
@@ -431,6 +440,9 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
       if (strcmp(arg[iarg], "mass_velocity") == 0) {
         damping_one = MASS_VELOCITY;
         iarg += 1;
+      } else if (strcmp(arg[iarg], "viscoelastic") == 0) {
+        damping_one = VISCOELASTIC;
+        iarg += 1;
       } else {
         error->all(FLERR, "Unknown normal model {}", arg[iarg]);
       }
@@ -454,7 +466,11 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
 
   // Define default damping sub model if unspecified, has no coeffs
   if (damping_one == -1)
-    damping_one = MASS_VELOCITY; // default in pair granular is VISCOELASTIC
+    damping_one = VISCOELASTIC;
+
+  // granular model init
+  if (normal_one == HERTZ || damping_one == VISCOELASTIC)
+    contact_radius_flag = 1;
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
@@ -533,7 +549,7 @@ void PairGranularSuperellipsoid::init_style()
 
   for (int itype = 1; itype <= atom->ntypes; itype++)
     for (int jtype = 1; jtype <= atom->ntypes; jtype++)
-      if (tangential_model[itype][jtype] == LINEAR_HISTORY)
+      if (tangential_model[itype][jtype] == CLASSIC)
         size_history += 3;
 
   // check for FixFreeze and set freeze_group_bit
@@ -900,7 +916,7 @@ void PairGranularSuperellipsoid::transfer_history(double *source, double *target
   // copy of all history variables (shear, contact point, axis)
 
   for (int i = 0; i < size_history; i++) {
-    if (i > default_hist_size && tangential_model[itype][jtype] == LINEAR_HISTORY) {
+    if (i >= default_hist_size && tangential_model[itype][jtype] == CLASSIC) {
       target[i] = -source[i]; //shear
     } else {
       target[i] = source[i];
@@ -1070,44 +1086,30 @@ void PairGranularSuperellipsoid::calculate_forces()
   // compute directly the sum of relative translational velocity at contact point
   // since rotational velocity contribution is different for superellipsoids
   double cv1[3], cv2[3];
-
-  cv1[0] = vi[0] + omega_cross_r1[0];
-  cv1[1] = vi[1] + omega_cross_r1[1];
-  cv1[2] = vi[2] + omega_cross_r1[2];
-
-  cv2[0] = vj[0] + omega_cross_r2[0];
-  cv2[1] = vj[1] + omega_cross_r2[1];
-  cv2[2] = vj[2] + omega_cross_r2[2];
+  add3(vi, omega_cross_r1, cv1);
+  add3(vj, omega_cross_r2, cv2);
 
   // total relavtive velocity at contact point
-  double vr1 = cv1[0] - cv2[0];
-  double vr2 = cv1[1] - cv2[1];
-  double vr3 = cv1[2] - cv2[2];
+  double vr[3];
+  sub3(cv1, cv2, vr);
 
   // normal component
 
-  double vn1 = nij[0] * vr1;    // dot product
-  double vn2 = nij[1] * vr2;
-  double vn3 = nij[2] * vr3;
-
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
+  double vn[3];
+  double vnnr = dot3(vr, nij);
+  scale3(vnnr, nij, vn);
 
   // tangential component
 
-  double vtr1 = vr1 - vnnr * nij[0];
-  double vtr2 = vr2 - vnnr * nij[1];
-  double vtr3 = vr3 - vnnr * nij[2];
+  double vt[3];
+  sub3(vr, vn, vt);
 
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
+  vrel = len3(vt); // vtr in spherical model
 
-  // normal forces = elastic contact + normal velocity damping
-
-  double damp = meff * gamman[itype][jtype] * vnnr;
-  double ccel = kn[itype][jtype] * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+  // Approximate contact radius
 
-  double polyhertz;
-  if (normal_model[itype][jtype] == HERTZ) {
+  // hertzian contact radius approximation
+  if (contact_radius_flag) {
     double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
     MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
     MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
@@ -1125,76 +1127,152 @@ void PairGranularSuperellipsoid::calculate_forces()
     }
 
     // hertzian contact radius approximation
-    polyhertz = sqrt((overlap1 + overlap2) / (curvature_i + curvature_j));
-    ccel *= polyhertz;
+    contact_radius = sqrt((overlap1 + overlap2) / (curvature_i + curvature_j));
   }
 
-  if (limit_damping[itype][jtype] && (ccel < 0.0)) ccel = 0.0;
-
-  // shear history effects
-  double *shear = &history_data[default_hist_size];
+  if (normal_model[itype][jtype] == HOOKE) {
+    // assuming we get the overlap depth
+    Fnormal = kn[itype][jtype] * (overlap1 + overlap2);
+  } else if (normal_model[itype][jtype] == HERTZ) {
+    Fnormal = kn[itype][jtype] * (overlap1 + overlap2) * contact_radius;
+  }
 
-  if (history_update) {
-    shear[0] += vtr1 * dt;
-    shear[1] += vtr2 * dt;
-    shear[2] += vtr3 * dt;
+  double damp = gamman[itype][jtype];
+  double damp_prefactor, Fdamp;
+  if (damping_model[itype][jtype] == MASS_VELOCITY) {
+    damp_prefactor = damp * meff;
+    Fdamp = -damp_prefactor * vnnr;
+  } else {
+    damp_prefactor = damp * meff * contact_radius;
+    Fdamp = -damp_prefactor * vnnr;
   }
-  double shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
 
-  if (history_update) {
+  // normal forces = elastic contact + normal velocity damping
 
-    // rotate shear displacements
+  Fntot = Fnormal + Fdamp;
+  if (limit_damping[itype][jtype] && (Fntot < 0.0)) Fntot = 0.0;
+  double Fncrit = fabs(Fntot);
 
-    double rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
-    shear[0] -= rsht * nij[0];
-    shear[1] -= rsht * nij[1];
-    shear[2] -= rsht * nij[2];
-  }
+  // Tangential model
 
-  // tangential forces = shear + tangential velocity damping
+  double temp_array[3];
+  double *history = &history_data[default_hist_size];
+  double Fscrit = Fncrit * xmu[itype][jtype];
+  double dampt = xt[itype][jtype] * damp_prefactor;
+  if (tangential_model[itype][jtype] == LINEAR_HISTORY) {
+    // rotate and update displacements / force.
+    // see e.g. eq. 17 of Luding, Gran. Matter 2008, v10,p235
 
-  double gammat = xt[itype][jtype] * gamman[itype][jtype];
-  double fs1 = -(kt[itype][jtype] * shear[0] + meff * gammat * vtr1);
-  double fs2 = -(kt[itype][jtype] * shear[1] + meff * gammat * vtr2);
-  double fs3 = -(kt[itype][jtype] * shear[2] + meff * gammat * vtr3);
+    int frame_update = 0;
+    if (history_update) {
+      double rsht = dot3(history, nij);
+      frame_update = (fabs(rsht) * kt[itype][jtype]) > (EPSILON * Fscrit);
 
-  if (normal_model[itype][jtype] == HERTZ) {
-    fs1 *= polyhertz;
-    fs2 *= polyhertz;
-    fs3 *= polyhertz;
-  }
+      if (frame_update) rotate_rescale_vec(history, nij);
+
+      // update history, tangential force using velocities at half step
+      // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46
+      scale3(dt, vtr, temp_array);
+      add3(history, temp_array, history);
+    }
 
-  // rescale frictional displacements and forces if needed
-
-  double fs_mag = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  double fn = xmu[itype][jtype] * fabs(ccel);
-
-  if (fs_mag > fn) {
-    if (shrmag != 0.0) {
-      shear[0] =
-          (fn / fs_mag) * (shear[0] + meff * gammat * vtr1 / kt[itype][jtype]) - meff * gammat * vtr1 / kt[itype][jtype];
-      shear[1] =
-          (fn / fs_mag) * (shear[1] + meff * gammat * vtr2 / kt[itype][jtype]) - meff * gammat * vtr2 / kt[itype][jtype];
-      shear[2] =
-          (fn / fs_mag) * (shear[2] + meff * gammat * vtr3 / kt[itype][jtype]) - meff * gammat * vtr3 / kt[itype][jtype];
-      fs1 *= fn / fs_mag;
-      fs2 *= fn / fs_mag;
-      fs3 *= fn / fs_mag;
-    } else
-      fs1 = fs2 = fs3 = 0.0;
+    // tangential forces = history + tangential velocity damping
+    scale3(-kt[itype][jtype], history, fs);
+
+    double vtr2[3];
+    copy3(vtr, vtr2);
+    scale3(dampt, vtr2, temp_array);
+    sub3(fs, temp_array, fs);
+
+    // rescale frictional displacements and forces if needed
+    double magfs = len3(fs);
+    if (magfs > Fscrit) {
+      double shrmag = len3(history);
+      if (shrmag != 0.0) {
+        double magfs_inv = 1.0 / magfs;
+        scale3(Fscrit * magfs_inv, fs, history);
+        scale3(damp, vtr, temp_array);
+        add3(history, temp_array, history);
+        scale3(-1.0 / kt[itype][jtype], history);
+        scale3(Fscrit * magfs_inv, fs);
+      } else {
+        zero3(fs);
+      }
+    }
+
+  } else if (tangential_model[itype][jtype] == CLASSIC) {
+
+    // shear history effects
+
+    if (history_update) {
+      scale3(dt, vtr, temp_array);
+      add3(history, temp_array, history);
+    }
+    double shrmag = len3(history);
+
+    if (history_update) {
+      // rotate shear displacements
+      double rsht = dot3(history, nij);
+      scale3(rsht, nij, temp_array);
+      sub3(history, temp_array, history);
+    }
+
+    // tangential forces = history + tangential velocity damping
+    double fs[3];
+    if (contact_radius_flag)
+      scale3(-kt[itype][jtype] * contact_radius, history, fs);
+    else
+      scale3(-kt[itype][jtype], history, fs);
+
+    scale3(dampt, vtr, temp_array);
+    sub3(fs, temp_array, fs);
+
+    // rescale frictional displacements and forces if needed
+
+    double magfs = len3(fs);
+
+    if (magfs > Fscrit) {
+      if (shrmag != 0.0) {
+        double magfs_inv = 1.0 / magfs;
+        scale3(Fscrit * magfs_inv, fs, history);
+        scale3(damp, vtr, temp_array);
+        add3(history, temp_array, history);
+        scale3(-1.0 / kt[itype][jtype], history);
+        scale3(Fscrit * magfs_inv, fs);
+      } else
+        zero3(fs);
+    }
   }
 
   // forces & torques
 
-  forces[0] = nji[0] * ccel + fs1;
-  forces[1] = nji[1] * ccel + fs2;
-  forces[2] = nji[2] * ccel + fs3;
+  scale3(Fntot, nji, forces);
+  add3(forces, fs, forces);
+
+  cross3(cr1, forces, torquesi);
+  cross3(forces, cr2, torquesj);
+}
+
+/* ----------------------------------------------------------------------
+  rotate-rescale vector v so it is perpendicular to unit vector n
+  and has the same magnitude as before
+    Copied from GranSubMod
+  ---------------------------------------------------------------------- */
+void PairGranularSuperellipsoid::rotate_rescale_vec(double *v, double *n)
+{
+  double rsht, shrmag, prjmag, temp_dbl, temp_array[3];
+
+  rsht = dot3(v, n);
+  shrmag = len3(v);
 
-  torquesi[0] = cr1[1] * forces[2] - cr1[2] * forces[1];
-  torquesi[1] = cr1[2] * forces[0] - cr1[0] * forces[2];
-  torquesi[2] = cr1[0] * forces[1] - cr1[1] * forces[0];
+  scale3(rsht, n, temp_array);
+  sub3(v, temp_array, v);
 
-  torquesj[0] = -cr2[1] * forces[2] + cr2[2] * forces[1];
-  torquesj[1] = -cr2[2] * forces[0] + cr2[0] * forces[2];
-  torquesj[2] = -cr2[0] * forces[1] + cr2[1] * forces[0];
-}
\ No newline at end of file
+  // also rescale to preserve magnitude
+  prjmag = len3(v);
+  if (prjmag > 0)
+    temp_dbl = shrmag / prjmag;
+  else
+    temp_dbl = 0;
+  scale3(temp_dbl, v);
+}
diff --git a/src/GRANULAR/pair_granular_superellipsoid.h b/src/GRANULAR/pair_granular_superellipsoid.h
index 7fdbca533fe..abd4fa0a468 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.h
+++ b/src/GRANULAR/pair_granular_superellipsoid.h
@@ -70,6 +70,7 @@ class PairGranularSuperellipsoid : public Pair {
   int **tangential_model;
   int **limit_damping;
   int default_hist_size;
+  int contact_radius_flag;
 
   // Normal coefficients
   double **kn, **gamman;     // Hooke + Hertz
@@ -80,7 +81,7 @@ class PairGranularSuperellipsoid : public Pair {
   // Intermediate values for contact model
   int history_update, touchjj, itype, jtype;
   double Fnormal, forces[3], torquesi[3], torquesj[3];
-  double radi, radj, meff, Fntot;
+  double radi, radj, meff, Fntot, contact_radius;
   double *xi, *xj, *vi, *vj;
   double fs[3], ft[3];
   double dx[3], nx[3], r, rsq, rinv, Reff, radsum, delta, dR;
@@ -112,6 +113,8 @@ class PairGranularSuperellipsoid : public Pair {
 
   int extra_svector;
 
+  void rotate_rescale_vec(double *hislocal, double *n);
+
   // Below not implemented. Placeholder if we decide not to compute local hessian in line search
   static double
   shape_and_gradient_local(const double *, const double *, const double *,

From 7b9611cd046779b4d9389c8528f5e44de5cd5b70 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 17 Mar 2026 13:24:38 +0100
Subject: [PATCH 139/174] Fixed small bugs in damping and tangential velocity
 computation. Fixed function call for legacy pair styles

---
 .../pair_gran_hertz_history_ellipsoid.cpp     |  10 +-
 .../pair_gran_hooke_history_ellipsoid.cpp     |  10 +-
 src/GRANULAR/pair_granular_superellipsoid.cpp | 296 ++++++++++--------
 3 files changed, 167 insertions(+), 149 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 52d76d2598e..1ce2e01cc13 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -61,7 +61,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
   double shrmag, rsht, polyhertz;
   int *ilist, *jlist, *numneigh, **firstneigh;
   int *touch, **firsttouch;
-  double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
+  double *shear, *X0_prev, *history, *allhistory, **firsthistory;
 
   double shapex, shapey, shapez;    // ellipsoid shape params
   double quat1, quat2, quat3, quat4;
@@ -165,7 +165,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
         bool skip_contact_detection(false);
         if (bounding_box) {
-          separating_axis = &allhistory[7 + size_history * jj];
+          int separating_axis = (int) (allhistory[7 + size_history * jj]);
           skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
               x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
         }
@@ -554,10 +554,10 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
   MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
   if (bounding_box) {
-    double separating_axis =
-        allhistory[7 + size_history * neighprev];    // Copy: no update of history in single
+    int separating_axis = (int)
+        (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
     bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-        x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
+        x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
     if (no_bouding_box_contact) {
       fforce = 0.0;
       for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index b1696703d50..f090d8cf979 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -118,7 +118,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
   double shrmag, rsht;
   int *ilist, *jlist, *numneigh, **firstneigh;
   int *touch, **firsttouch;
-  double *shear, *X0_prev, *separating_axis, *history, *allhistory, **firsthistory;
+  double *shear, *X0_prev, *history, *allhistory, **firsthistory;
 
   double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
       overlap2, omegai[3], omegaj[3];
@@ -218,7 +218,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
         bool skip_contact_detection(false);
         if (bounding_box) {
-          separating_axis = &allhistory[7 + size_history * jj];
+          int separating_axis = (int) (allhistory[7 + size_history * jj]);
           skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
               x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
         }
@@ -836,10 +836,10 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
   MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
   if (bounding_box) {
-    double separating_axis =
-        allhistory[7 + size_history * neighprev];    // Copy: no update of history in single
+    int separating_axis = (int)
+        (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
     bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-        x[i], Ri, shapei, x[j], Rj, shapej, &separating_axis);
+        x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
     if (no_bouding_box_contact) {
       fforce = 0.0;
       for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 560d252afe8..ac229d0993a 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -40,8 +40,8 @@ using namespace LAMMPS_NS;
 using namespace MathExtra;
 
 enum { HOOKE, HERTZ };
-enum { MASS_VELOCITY , VISCOELASTIC};
-enum { CLASSIC , LINEAR_HISTORY};
+enum { MASS_VELOCITY, VISCOELASTIC };
+enum { CLASSIC, LINEAR_HISTORY };
 
 static constexpr int NUMSTEP_INITIAL_GUESS = 5;
 static constexpr double EPSILON = 1e-10;
@@ -87,7 +87,7 @@ PairGranularSuperellipsoid::PairGranularSuperellipsoid(LAMMPS *lmp) : Pair(lmp)
   comm_forward = 1;
 
   default_hist_size = 5;
-  size_history = default_hist_size; // default of 5 values, x0[4] and separating axis
+  size_history = default_hist_size;    // default of 5 values, x0[4] and separating axis
 
   beyond_contact = 0;
   nondefault_history_transfer = 1;
@@ -97,7 +97,8 @@ PairGranularSuperellipsoid::PairGranularSuperellipsoid(LAMMPS *lmp) : Pair(lmp)
   // this is so final order of Modify:fix will conform to input script
 
   fix_history = nullptr;
-  fix_dummy = dynamic_cast<FixDummy *>(modify->add_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY all DUMMY"));
+  fix_dummy =
+      dynamic_cast<FixDummy *>(modify->add_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY all DUMMY"));
 
   contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
 }
@@ -108,8 +109,10 @@ PairGranularSuperellipsoid::~PairGranularSuperellipsoid()
 {
   delete[] svector;
 
-  if (!fix_history) modify->delete_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY");
-  else modify->delete_fix("NEIGH_HISTORY_GRANULAR_SE");
+  if (!fix_history)
+    modify->delete_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY");
+  else
+    modify->delete_fix("NEIGH_HISTORY_GRANULAR_SE");
 
   if (allocated) {
     memory->destroy(setflag);
@@ -301,7 +304,7 @@ void PairGranularSuperellipsoid::compute(int eflag, int vflag)
 
       if (evflag)
         ev_tally_xyz(i, j, nlocal, force->newton_pair, 0.0, 0.0, forces[0], forces[1], forces[2],
-            dx[0], dx[1], dx[2]); // Correct even for non-spherical particles
+                     dx[0], dx[1], dx[2]);    // Correct even for non-spherical particles
     }
   }
 
@@ -319,22 +322,21 @@ void PairGranularSuperellipsoid::allocate()
 
   memory->create(setflag, n + 1, n + 1, "pair:setflag");
   for (int i = 1; i <= n; i++)
-    for (int j = i; j <= n; j++)
-      setflag[i][j] = 0;
+    for (int j = i; j <= n; j++) setflag[i][j] = 0;
 
-  memory->create(cutsq,n+1,n+1,"pair:cutsq");
-  memory->create(cutoff_type,n+1,n+1,"pair:cutoff_type");
+  memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
+  memory->create(cutoff_type, n + 1, n + 1, "pair:cutoff_type");
 
-  memory->create(limit_damping,n+1,n+1,"pair:limit_damping");
-  memory->create(normal_model,n+1,n+1,"pair:normal_model");
-  memory->create(damping_model,n+1,n+1,"pair:damping_model");
-  memory->create(tangential_model,n+1,n+1,"pair:tangential_model");
+  memory->create(limit_damping, n + 1, n + 1, "pair:limit_damping");
+  memory->create(normal_model, n + 1, n + 1, "pair:normal_model");
+  memory->create(damping_model, n + 1, n + 1, "pair:damping_model");
+  memory->create(tangential_model, n + 1, n + 1, "pair:tangential_model");
 
-  memory->create(kn,n+1,n+1,"pair:kn");
-  memory->create(gamman,n+1,n+1,"pair:gamman");
-  memory->create(kt,n+1,n+1,"pair:kt");
-  memory->create(xt,n+1,n+1,"pair:xt");
-  memory->create(xmu,n+1,n+1,"pair:xmu");
+  memory->create(kn, n + 1, n + 1, "pair:kn");
+  memory->create(gamman, n + 1, n + 1, "pair:gamman");
+  memory->create(kt, n + 1, n + 1, "pair:kt");
+  memory->create(xt, n + 1, n + 1, "pair:xt");
+  memory->create(xmu, n + 1, n + 1, "pair:xmu");
 
   onerad_dynamic = new double[n + 1];
   onerad_frozen = new double[n + 1];
@@ -349,9 +351,9 @@ void PairGranularSuperellipsoid::allocate()
 void PairGranularSuperellipsoid::settings(int narg, char **arg)
 {
   if (narg == 1) {
-    cutoff_global = utils::numeric(FLERR,arg[0],false,lmp);
+    cutoff_global = utils::numeric(FLERR, arg[0], false, lmp);
   } else {
-    cutoff_global = -1; // will be set based on particle sizes, model choice
+    cutoff_global = -1;    // will be set based on particle sizes, model choice
   }
 
   curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
@@ -381,14 +383,13 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
 {
   double cutoff_one = -1;
 
-  if (narg < 3)
-    error->all(FLERR,"Incorrect args for pair coefficients" + utils::errorurl(21));
+  if (narg < 3) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
 
   if (!allocated) allocate();
 
-  int ilo,ihi,jlo,jhi;
-  utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error);
-  utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error);
+  int ilo, ihi, jlo, jhi;
+  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
+  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
 
   int normal_one, damping_one, tangential_one, limit_one;
   double kn_one, gamman_one, kt_one, xt_one, xmu_one;
@@ -423,7 +424,8 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
         kt_one = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
         xt_one = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         xmu_one = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
-        if (kt_one < 0.0 || xt_one < 0.0 || xmu_one < 0.0) error->all(FLERR, "Illegal linear tangential model");
+        if (kt_one < 0.0 || xt_one < 0.0 || xmu_one < 0.0)
+          error->all(FLERR, "Illegal linear tangential model");
         iarg += 4;
       } else if (strcmp(arg[iarg], "classic") == 0) {
         tangential_one = CLASSIC;
@@ -431,7 +433,8 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
         kt_one = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
         xt_one = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         xmu_one = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
-        if (kt_one < 0.0 || xt_one < 0.0 || xmu_one < 0.0) error->all(FLERR, "Illegal linear tangential model");
+        if (kt_one < 0.0 || xt_one < 0.0 || xmu_one < 0.0)
+          error->all(FLERR, "Illegal linear tangential model");
         iarg += 4;
       } else {
         error->all(FLERR, "Unknown normal model {}", arg[iarg]);
@@ -456,25 +459,24 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
     } else if (strcmp(arg[iarg], "cutoff") == 0) {
       if (iarg + 1 >= narg)
         error->all(FLERR, "Illegal pair_coeff command, not enough parameters for cutoff keyword");
-      cutoff_one = utils::numeric(FLERR,arg[iarg + 1],false,lmp);
+      cutoff_one = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
       iarg += 2;
     } else if (strcmp(arg[iarg], "limit_damping") == 0) {
       limit_one = 1;
       iarg += 1;
-    } else error->all(FLERR, "Illegal pair_coeff command {}", arg[iarg]);
+    } else
+      error->all(FLERR, "Illegal pair_coeff command {}", arg[iarg]);
   }
 
   // Define default damping sub model if unspecified, has no coeffs
-  if (damping_one == -1)
-    damping_one = VISCOELASTIC;
+  if (damping_one == -1) damping_one = VISCOELASTIC;
 
   // granular model init
-  if (normal_one == HERTZ || damping_one == VISCOELASTIC)
-    contact_radius_flag = 1;
+  if (normal_one == HERTZ || damping_one == VISCOELASTIC) contact_radius_flag = 1;
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
-    for (int j = MAX(jlo,i); j <= jhi; j++) {
+    for (int j = MAX(jlo, i); j <= jhi; j++) {
       cutoff_type[i][j] = cutoff_type[j][i] = cutoff_one;
       limit_damping[i][j] = limit_damping[j][i] = limit_one;
 
@@ -494,7 +496,7 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
     }
   }
 
-  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients" + utils::errorurl(21));
+  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
 }
 
 /* ----------------------------------------------------------------------
@@ -508,16 +510,21 @@ void PairGranularSuperellipsoid::init_style()
   // error and warning checks
 
   if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
-    error->all(FLERR, "Pair granular/superellipsoid requires atom attributes radius, rmass, "
+    error->all(FLERR,
+               "Pair granular/superellipsoid requires atom attributes radius, rmass, "
                "angmom and superellipsoid flag");
   if (comm->ghost_velocity == 0)
     error->all(FLERR, "Pair granular/superellipsoid requires ghost atoms store velocity");
 
   if (heat_flag) {
     if (!atom->temperature_flag)
-      error->all(FLERR,"Heat conduction in pair granular/superellipsoid requires atom style with temperature property");
+      error->all(FLERR,
+                 "Heat conduction in pair granular/superellipsoid requires atom style with "
+                 "temperature property");
     if (!atom->heatflow_flag)
-      error->all(FLERR,"Heat conduction in pair granular/superellipsoid requires atom style with heatflow property");
+      error->all(FLERR,
+                 "Heat conduction in pair granular/superellipsoid requires atom style with "
+                 "heatflow property");
   }
 
   for (i = 0; i < atom->nlocal; i++)
@@ -535,22 +542,24 @@ void PairGranularSuperellipsoid::init_style()
   // this is so its order in the fix list is preserved
 
   if (fix_history == nullptr) {
-    fix_history = dynamic_cast<FixNeighHistory *>(modify->replace_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY",
-                                                          "NEIGH_HISTORY_GRANULAR_SE"
-                                                          " all NEIGH_HISTORY "
-                                                          + std::to_string(size_history),1));
+    fix_history =
+        dynamic_cast<FixNeighHistory *>(modify->replace_fix("NEIGH_HISTORY_GRANULAR_SE_DUMMY",
+                                                            "NEIGH_HISTORY_GRANULAR_SE"
+                                                            " all NEIGH_HISTORY " +
+                                                                std::to_string(size_history),
+                                                            1));
     fix_history->pair = this;
   } else {
-    fix_history = dynamic_cast<FixNeighHistory *>(modify->get_fix_by_id("NEIGH_HISTORY_GRANULAR_SE"));
-    if (!fix_history) error->all(FLERR,"Could not find pair fix neigh history ID");
+    fix_history =
+        dynamic_cast<FixNeighHistory *>(modify->get_fix_by_id("NEIGH_HISTORY_GRANULAR_SE"));
+    if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
   }
 
   // grow history for contact models, right now this is superfluous and is just a placeholder
 
   for (int itype = 1; itype <= atom->ntypes; itype++)
     for (int jtype = 1; jtype <= atom->ntypes; jtype++)
-      if (tangential_model[itype][jtype] == CLASSIC)
-        size_history += 3;
+      if (tangential_model[itype][jtype] == CLASSIC) size_history += 3;
 
   // check for FixFreeze and set freeze_group_bit
 
@@ -569,7 +578,8 @@ void PairGranularSuperellipsoid::init_style()
     if (ifix->rigid_flag) {
       if (fix_rigid)
         error->all(FLERR, "Only one fix rigid command at a time allowed");
-      else fix_rigid = ifix;
+      else
+        fix_rigid = ifix;
     }
   }
 
@@ -608,8 +618,8 @@ void PairGranularSuperellipsoid::init_style()
       onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
   }
 
-  MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,MPI_DOUBLE,MPI_MAX,world);
-  MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,MPI_DOUBLE,MPI_MAX,world);
+  MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
+  MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
 }
 
 /* ----------------------------------------------------------------------
@@ -622,15 +632,17 @@ double PairGranularSuperellipsoid::init_one(int i, int j)
 
   if (setflag[i][j] == 0) {
 
-    limit_damping[i][j] = MAX(limit_damping[i][i],limit_damping[j][j]);
+    limit_damping[i][j] = MAX(limit_damping[i][i], limit_damping[j][j]);
 
     if (normal_model[i][i] != normal_model[j][j] ||
         tangential_model[i][i] != tangential_model[j][j] ||
         damping_model[i][i] != damping_model[j][j])
-      error->all(FLERR,"Granular pair style functional forms are different, "
+      error->all(FLERR,
+                 "Granular pair style functional forms are different, "
                  "cannot mix coefficients for types {} and {}.\n"
                  "This combination must be set explicitly via a "
-                 "pair_coeff command",i,j);
+                 "pair_coeff command",
+                 i, j);
 
     kn[i][j] = mix_geom(kn[i][i], kn[j][j]);
     gamman[i][j] = mix_geom(gamman[i][i], gamman[j][j]);
@@ -651,9 +663,9 @@ double PairGranularSuperellipsoid::init_one(int i, int j)
 
   if (cutoff_type[i][j] < 0 && cutoff_global < 0) {
     if (((maxrad_dynamic[i] > 0.0) && (maxrad_dynamic[j] > 0.0)) ||
-        ((maxrad_dynamic[i] > 0.0) &&  (maxrad_frozen[j] > 0.0)) ||
+        ((maxrad_dynamic[i] > 0.0) && (maxrad_frozen[j] > 0.0)) ||
         // radius info about both i and j exist
-        ((maxrad_frozen[i] > 0.0)  && (maxrad_dynamic[j] > 0.0))) {
+        ((maxrad_frozen[i] > 0.0) && (maxrad_dynamic[j] > 0.0))) {
       cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
       cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
       cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
@@ -664,8 +676,8 @@ double PairGranularSuperellipsoid::init_one(int i, int j)
 
       double cutmax = 0.0;
       for (int k = 1; k <= atom->ntypes; k++) {
-        cutmax = MAX(cutmax,2.0*maxrad_dynamic[k]);
-        cutmax = MAX(cutmax,2.0*maxrad_frozen[k]);
+        cutmax = MAX(cutmax, 2.0 * maxrad_dynamic[k]);
+        cutmax = MAX(cutmax, 2.0 * maxrad_frozen[k]);
       }
       cutoff = cutmax;
     }
@@ -688,19 +700,19 @@ void PairGranularSuperellipsoid::write_restart(FILE *fp)
   int i, j;
   for (i = 1; i <= atom->ntypes; i++) {
     for (j = i; j <= atom->ntypes; j++) {
-      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      fwrite(&setflag[i][j], sizeof(int), 1, fp);
       if (setflag[i][j]) {
-        fwrite(&cutoff_type[i][j],sizeof(double),1,fp);
-        fwrite(&limit_damping[i][j],sizeof(int),1,fp);
-        fwrite(&normal_model[i][j],sizeof(int),1,fp);
-        fwrite(&tangential_model[i][j],sizeof(int),1,fp);
-        fwrite(&damping_model[i][j],sizeof(int),1,fp);
-
-        fwrite(&kn[i][j],sizeof(double),1,fp);
-        fwrite(&gamman[i][j],sizeof(double),1,fp);
-        fwrite(&kt[i][j],sizeof(double),1,fp);
-        fwrite(&xt[i][j],sizeof(double),1,fp);
-        fwrite(&xmu[i][j],sizeof(double),1,fp);
+        fwrite(&cutoff_type[i][j], sizeof(double), 1, fp);
+        fwrite(&limit_damping[i][j], sizeof(int), 1, fp);
+        fwrite(&normal_model[i][j], sizeof(int), 1, fp);
+        fwrite(&tangential_model[i][j], sizeof(int), 1, fp);
+        fwrite(&damping_model[i][j], sizeof(int), 1, fp);
+
+        fwrite(&kn[i][j], sizeof(double), 1, fp);
+        fwrite(&gamman[i][j], sizeof(double), 1, fp);
+        fwrite(&kt[i][j], sizeof(double), 1, fp);
+        fwrite(&xt[i][j], sizeof(double), 1, fp);
+        fwrite(&xmu[i][j], sizeof(double), 1, fp);
       }
     }
   }
@@ -713,37 +725,37 @@ void PairGranularSuperellipsoid::write_restart(FILE *fp)
 void PairGranularSuperellipsoid::read_restart(FILE *fp)
 {
   allocate();
-  int i,j;
+  int i, j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++) {
     for (j = i; j <= atom->ntypes; j++) {
-      if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,nullptr,error);
-      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
+      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
       if (setflag[i][j]) {
         if (me == 0) {
-          utils::sfread(FLERR,&cutoff_type[i][j],sizeof(double),1,fp,nullptr,error);
-          utils::sfread(FLERR,&limit_damping[i][j],sizeof(int),1,fp,nullptr,error);
-          utils::sfread(FLERR,&normal_model[i][j],sizeof(int),1,fp,nullptr,error);
-          utils::sfread(FLERR,&tangential_model[i][j],sizeof(int),1,fp,nullptr,error);
-          utils::sfread(FLERR,&damping_model[i][j],sizeof(int),1,fp,nullptr,error);
-
-          utils::sfread(FLERR,&kn[i][j],sizeof(double),1,fp,nullptr,error);
-          utils::sfread(FLERR,&gamman[i][j],sizeof(double),1,fp,nullptr,error);
-          utils::sfread(FLERR,&kt[i][j],sizeof(double),1,fp,nullptr,error);
-          utils::sfread(FLERR,&xt[i][j],sizeof(double),1,fp,nullptr,error);
-          utils::sfread(FLERR,&xmu[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR, &cutoff_type[i][j], sizeof(double), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &limit_damping[i][j], sizeof(int), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &normal_model[i][j], sizeof(int), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &tangential_model[i][j], sizeof(int), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &damping_model[i][j], sizeof(int), 1, fp, nullptr, error);
+
+          utils::sfread(FLERR, &kn[i][j], sizeof(double), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &gamman[i][j], sizeof(double), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &kt[i][j], sizeof(double), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &xt[i][j], sizeof(double), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &xmu[i][j], sizeof(double), 1, fp, nullptr, error);
         }
-        MPI_Bcast(&cutoff_type[i][j],1,MPI_DOUBLE,0,world);
-        MPI_Bcast(&limit_damping[i][j],1,MPI_INT,0,world);
-        MPI_Bcast(&normal_model[i][j],1,MPI_INT,0,world);
-        MPI_Bcast(&tangential_model[i][j],1,MPI_INT,0,world);
-        MPI_Bcast(&damping_model[i][j],1,MPI_INT,0,world);
-
-        MPI_Bcast(&kn[i][j],1,MPI_DOUBLE,0,world);
-        MPI_Bcast(&gamman[i][j],1,MPI_DOUBLE,0,world);
-        MPI_Bcast(&kt[i][j],1,MPI_DOUBLE,0,world);
-        MPI_Bcast(&xt[i][j],1,MPI_DOUBLE,0,world);
-        MPI_Bcast(&xmu[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&cutoff_type[i][j], 1, MPI_DOUBLE, 0, world);
+        MPI_Bcast(&limit_damping[i][j], 1, MPI_INT, 0, world);
+        MPI_Bcast(&normal_model[i][j], 1, MPI_INT, 0, world);
+        MPI_Bcast(&tangential_model[i][j], 1, MPI_INT, 0, world);
+        MPI_Bcast(&damping_model[i][j], 1, MPI_INT, 0, world);
+
+        MPI_Bcast(&kn[i][j], 1, MPI_DOUBLE, 0, world);
+        MPI_Bcast(&gamman[i][j], 1, MPI_DOUBLE, 0, world);
+        MPI_Bcast(&kt[i][j], 1, MPI_DOUBLE, 0, world);
+        MPI_Bcast(&xt[i][j], 1, MPI_DOUBLE, 0, world);
+        MPI_Bcast(&xmu[i][j], 1, MPI_DOUBLE, 0, world);
       }
     }
   }
@@ -759,8 +771,7 @@ void PairGranularSuperellipsoid::reset_dt()
 /* ---------------------------------------------------------------------- */
 
 double PairGranularSuperellipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double factor_lj,
-                                             double &fforce)
+                                          double /*factor_coul*/, double factor_lj, double &fforce)
 {
   if (factor_lj == 0) {
     fforce = 0.0;
@@ -770,7 +781,7 @@ double PairGranularSuperellipsoid::single(int i, int j, int /*itype*/, int /*jty
 
   int nall = atom->nlocal + atom->nghost;
   if ((i >= nall) || (j >= nall))
-    error->all(FLERR,"Not enough atoms for pair granular single function");
+    error->all(FLERR, "Not enough atoms for pair granular single function");
 
   // Reset model and copy initial geometric data
 
@@ -779,7 +790,7 @@ double PairGranularSuperellipsoid::single(int i, int j, int /*itype*/, int /*jty
   int *jlist = list->firstneigh[i];
 
   if ((fix_history == nullptr) || (fix_history->firstvalue == nullptr))
-    error->one(FLERR,"Pair granular single computation needs history");
+    error->one(FLERR, "Pair granular single computation needs history");
   allhistory = fix_history->firstvalue[i];
   for (int jj = 0; jj < jnum; jj++) {
     neighprev++;
@@ -799,7 +810,7 @@ double PairGranularSuperellipsoid::single(int i, int j, int /*itype*/, int /*jty
   xref = atom->x[indx_ref];
   tagi = atom->tag[i];
   tagj = atom->tag[j];
-  history_update = 0; // Don't update history
+  history_update = 0;    // Don't update history
 
   auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
   AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
@@ -872,19 +883,19 @@ double PairGranularSuperellipsoid::single(int i, int j, int /*itype*/, int /*jty
 
   // Superellipsoid specific values - were these included?
 
-  svector[12] = 0.0; //contact_point_and_Lagrange_multiplier[0]
-  svector[13] = 0.0; //contact_point_and_Lagrange_multiplier[1]
-  svector[14] = 0.0; //contact_point_and_Lagrange_multiplier[2]
-  svector[15] = 0.0; //contact_point_and_Lagrange_multiplier[3]
-  svector[16] = 0.0; //bounding_box_separating_axis_index
+  svector[12] = 0.0;    //contact_point_and_Lagrange_multiplier[0]
+  svector[13] = 0.0;    //contact_point_and_Lagrange_multiplier[1]
+  svector[14] = 0.0;    //contact_point_and_Lagrange_multiplier[2]
+  svector[15] = 0.0;    //contact_point_and_Lagrange_multiplier[3]
+  svector[16] = 0.0;    //bounding_box_separating_axis_index
 
   return 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-int PairGranularSuperellipsoid::pack_forward_comm(int n, int *list, double *buf,
-                                                     int /*pbc_flag*/, int * /*pbc*/)
+int PairGranularSuperellipsoid::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/,
+                                                  int * /*pbc*/)
 {
   int i, j, m;
 
@@ -911,13 +922,14 @@ void PairGranularSuperellipsoid::unpack_forward_comm(int n, int first, double *b
    Transfer history
 ------------------------------------------------------------------------- */
 
-void PairGranularSuperellipsoid::transfer_history(double *source, double *target, int itype, int jtype)
+void PairGranularSuperellipsoid::transfer_history(double *source, double *target, int itype,
+                                                  int jtype)
 {
   // copy of all history variables (shear, contact point, axis)
 
   for (int i = 0; i < size_history; i++) {
     if (i >= default_hist_size && tangential_model[itype][jtype] == CLASSIC) {
-      target[i] = -source[i]; //shear
+      target[i] = -source[i];    //shear
     } else {
       target[i] = source[i];
     }
@@ -963,8 +975,7 @@ bool PairGranularSuperellipsoid::check_contact()
           xi, Ri, shapei, xj, Rj, shapej, separating_axis);
       if (new_axis != -1) {
         skip_contact_detection = true;
-        if (history_update)
-          history_data[4] = (double) new_axis;
+        if (history_update) history_data[4] = (double) new_axis;
       }
     }
     if (skip_contact_detection) {
@@ -984,15 +995,18 @@ bool PairGranularSuperellipsoid::check_contact()
       X0[3] = X0_prev[3];
       // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
       //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
-      int status = MathExtraSuperellipsoids::determine_contact_point(
-          xi, Ri, shapei, blocki, flagi, xj, Rj, shapej, blockj, flagj, X0, nij, contact_formulation);
+      int status = MathExtraSuperellipsoids::determine_contact_point(xi, Ri, shapei, blocki, flagi,
+                                                                     xj, Rj, shapej, blockj, flagj,
+                                                                     X0, nij, contact_formulation);
       if (status == 0) {
         touching = true;
       } else if (status == 1) {
         touching = false;
       } else {
-        error->warning(FLERR, "Ellipsoid contact detection (old contact) failed "
-                       "between particle {} and particle {} ", tagi, tagj);
+        error->warning(FLERR,
+                       "Ellipsoid contact detection (old contact) failed "
+                       "between particle {} and particle {} ",
+                       tagi, tagj);
       }
     } else {
       // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
@@ -1020,10 +1034,9 @@ bool PairGranularSuperellipsoid::check_contact()
         // force ellipsoid flag for first initial guess iteration.
         // Avoid incorrect values of n1/n2 - 2 in second derivatives.
         int status = MathExtraSuperellipsoids::determine_contact_point(
-            xi, Ri, shapei, blocki,
-            iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, xj, Rj, shapej,
-            blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
-            contact_formulation);
+            xi, Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+            xj, Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+            X0, nij, contact_formulation);
 
         if (status == 0) {
           touching = true;
@@ -1031,8 +1044,10 @@ bool PairGranularSuperellipsoid::check_contact()
           touching = false;
         } else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
           // keep trying until last iteration to avoid erroring out too early
-          error->warning(FLERR, "Ellipsoid contact detection (new contact) failed"
-                         "between particle {} and particle {}", tagi, tagj);
+          error->warning(FLERR,
+                         "Ellipsoid contact detection (new contact) failed"
+                         "between particle {} and particle {}",
+                         tagi, tagj);
         }
       }
     }
@@ -1060,8 +1075,10 @@ void PairGranularSuperellipsoid::calculate_forces()
   double nji[3] = {-nij[0], -nij[1], -nij[2]};
   // compute overlap depth along normal direction for each grain
   // overlap is positive for both grains
-  double overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, xi);
-  double overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, xj);
+  double overlap1 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, xi);
+  double overlap2 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, xj);
 
   // branch vectors
   double cr1[3], cr2[3];
@@ -1072,11 +1089,9 @@ void PairGranularSuperellipsoid::calculate_forces()
 
   double ex_space[3], ey_space[3], ez_space[3], omegai[3], omegaj[3];
   MathExtra::q_to_exyz(quati, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmomi, ex_space, ey_space, ez_space,
-                             inertiai, omegai);
+  MathExtra::angmom_to_omega(angmomi, ex_space, ey_space, ez_space, inertiai, omegai);
   MathExtra::q_to_exyz(quatj, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmomj, ex_space, ey_space, ez_space,
-                             inertiaj, omegaj);
+  MathExtra::angmom_to_omega(angmomj, ex_space, ey_space, ez_space, inertiaj, omegaj);
 
   double omega_cross_r1[3], omega_cross_r2[3];
   MathExtra::cross3(omegai, cr1, omega_cross_r1);
@@ -1104,7 +1119,7 @@ void PairGranularSuperellipsoid::calculate_forces()
   double vt[3];
   sub3(vr, vn, vt);
 
-  vrel = len3(vt); // vtr in spherical model
+  vrel = len3(vt);    // vtr in spherical model
 
   // Approximate contact radius
 
@@ -1115,10 +1130,10 @@ void PairGranularSuperellipsoid::calculate_forces()
     MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
 
     if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-      curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-          shapei, blocki, flagi, Ri, surf_point_i, xi);
-      curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-          shapej, blockj, flagj, Rj, surf_point_j, xj);
+      curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi,
+                                                                            Ri, surf_point_i, xi);
+      curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj,
+                                                                            Rj, surf_point_j, xj);
     } else {
       curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
           shapei, blocki, flagi, Ri, surf_point_i, xi);
@@ -1172,7 +1187,7 @@ void PairGranularSuperellipsoid::calculate_forces()
 
       // update history, tangential force using velocities at half step
       // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46
-      scale3(dt, vtr, temp_array);
+      scale3(dt, vt, temp_array);
       add3(history, temp_array, history);
     }
 
@@ -1180,7 +1195,7 @@ void PairGranularSuperellipsoid::calculate_forces()
     scale3(-kt[itype][jtype], history, fs);
 
     double vtr2[3];
-    copy3(vtr, vtr2);
+    copy3(vt, vtr2);
     scale3(dampt, vtr2, temp_array);
     sub3(fs, temp_array, fs);
 
@@ -1191,7 +1206,7 @@ void PairGranularSuperellipsoid::calculate_forces()
       if (shrmag != 0.0) {
         double magfs_inv = 1.0 / magfs;
         scale3(Fscrit * magfs_inv, fs, history);
-        scale3(damp, vtr, temp_array);
+        scale3(dampt, vt, temp_array);
         add3(history, temp_array, history);
         scale3(-1.0 / kt[itype][jtype], history);
         scale3(Fscrit * magfs_inv, fs);
@@ -1205,7 +1220,7 @@ void PairGranularSuperellipsoid::calculate_forces()
     // shear history effects
 
     if (history_update) {
-      scale3(dt, vtr, temp_array);
+      scale3(dt, vt, temp_array);
       add3(history, temp_array, history);
     }
     double shrmag = len3(history);
@@ -1224,7 +1239,7 @@ void PairGranularSuperellipsoid::calculate_forces()
     else
       scale3(-kt[itype][jtype], history, fs);
 
-    scale3(dampt, vtr, temp_array);
+    scale3(dampt, vt, temp_array);
     sub3(fs, temp_array, fs);
 
     // rescale frictional displacements and forces if needed
@@ -1235,9 +1250,12 @@ void PairGranularSuperellipsoid::calculate_forces()
       if (shrmag != 0.0) {
         double magfs_inv = 1.0 / magfs;
         scale3(Fscrit * magfs_inv, fs, history);
-        scale3(damp, vtr, temp_array);
+        scale3(dampt, vt, temp_array);
         add3(history, temp_array, history);
-        scale3(-1.0 / kt[itype][jtype], history);
+        if (contact_radius_flag)
+          scale3(-1.0 / (kt[itype][jtype] * contact_radius), history);
+        else
+          scale3(-1.0 / kt[itype][jtype], history);
         scale3(Fscrit * magfs_inv, fs);
       } else
         zero3(fs);

From a5a2ad79d76dc247e21678000bc23791c214a03b Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 17 Mar 2026 16:25:49 +0100
Subject: [PATCH 140/174] Fixed few bugs in settings

---
 .../ASPHERE/superellipsoid_gran/in.drop_test  | 10 +++--
 .../superellipsoid_gran/in.ellipsoid_gran     | 12 ++++--
 .../pair_gran_hertz_history_ellipsoid.cpp     | 22 +++++++----
 .../pair_gran_hooke_history_ellipsoid.cpp     | 22 +++++++----
 src/GRANULAR/pair_granular_superellipsoid.cpp | 39 ++++++++++++-------
 5 files changed, 68 insertions(+), 37 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 60f56f83ee3..277856df528 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -47,8 +47,12 @@ group           wall type 1
 group           mobile type 2
 
 # Interaction / Pair Style
-pair_style      gran/hooke/history/ellipsoid 1e4 NULL 20.0 0.0 0.5 0 bounding_box
-pair_coeff      * *
+# pair_style      gran/hooke/history/ellipsoid 1e4 NULL 20.0 0.0 0.5 0 bounding_box
+# pair_coeff      * *
+pair_style      granular/superellipsoid bounding_box
+pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
+# Hertz model instead
+# pair_coeff      * * hertz 1000.0 0.0 tangential linear_history 285.714 0.0 0.5 damping viscoelastic
 
 # Computes and Output
 compute         diameter all property/atom shapex shapey shapez
@@ -59,7 +63,7 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 1000 dump_drop_algebraic_block_${blockiness}_friction.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump            mydump all custom 100 dump_drop_algebraic_block_${blockiness}_friction.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Explicit mapping for Ovito
 dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 70b91da0d11..7d251f84f16 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -16,11 +16,15 @@ group top id 2
 
 set             type 1 mass 1.0
 set             type 1 shape 2.0 1.0 1.0
-set             type 1 block 5.0 5.0
+set             type 1 block 2.0 2.0
 
-pair_style      gran/hooke/history/ellipsoid 1e3 NULL 0.0 0.0 0.5 0 bounding_box
-# pair_style      gran/hertz/history/ellipsoid 1e3 0.0 0.0 0.0 0.5 0
-pair_coeff      * *
+
+# pair_style      gran/hertz/history/ellipsoid 1e3 NULL 0.0 0.0 0.5 0 bounding_box
+# pair_coeff * *
+pair_style      granular/superellipsoid bounding_box
+# pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
+# Hertz model instead
+pair_coeff      * * hertz 1000.0 0.0 tangential linear_history 285.714 0.0 0.5 damping viscoelastic
 
 compute diameter all property/atom shapex shapey shapez
 compute orient all property/atom quatw quati quatj quatk
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index 1ce2e01cc13..e38c78d82cf 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -153,7 +153,7 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
       int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
 
       // TODO: Below could be a `touch()` function
-      bool touching;
+      bool touching = false;
       if (rsq >= radsum * radsum) {
         touching = false;
       } else {
@@ -165,9 +165,13 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
         bool skip_contact_detection(false);
         if (bounding_box) {
-          int separating_axis = (int) (allhistory[7 + size_history * jj]);
-          skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-              x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
+          int cached_axis = (int) (allhistory[7 + size_history * jj]);
+          int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+              x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
+          if (new_axis != -1) {
+            skip_contact_detection = true;
+            allhistory[7 + size_history * jj] = (double) new_axis;
+          }
         }
         if (skip_contact_detection)
           touching = false;
@@ -553,12 +557,14 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
   MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
   MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+  bool skip_contact_detection(false);
   if (bounding_box) {
-    int separating_axis = (int)
+    int cached_axis = (int)
         (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
-    bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-        x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
-    if (no_bouding_box_contact) {
+    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+        x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
+    if (new_axis !=-1) skip_contact_detection = true;
+    if (skip_contact_detection) {
       fforce = 0.0;
       for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
       return 0.0;
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index f090d8cf979..1b3508e958c 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -206,7 +206,7 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
       int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
 
       // TODO: Below could be a `touch()` function
-      bool touching;
+      bool touching = false;
       if (rsq >= radsum * radsum) {
         touching = false;
       } else {
@@ -218,9 +218,13 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
         MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
         bool skip_contact_detection(false);
         if (bounding_box) {
-          int separating_axis = (int) (allhistory[7 + size_history * jj]);
-          skip_contact_detection = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-              x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
+          int cached_axis = (int) (allhistory[7 + size_history * jj]);
+          int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+              x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
+          if (new_axis != -1) {
+            skip_contact_detection = true;
+            allhistory[7 + size_history * jj] = (double) new_axis;
+          }
         }
         if (skip_contact_detection)
           touching = false;
@@ -835,12 +839,14 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
   MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
   MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+  bool skip_contact_detection = false;
   if (bounding_box) {
-    int separating_axis = (int)
+    int cached_axis = (int)
         (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
-    bool no_bouding_box_contact = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-        x[i], Ri, shapei, x[j], Rj, shapej, separating_axis);
-    if (no_bouding_box_contact) {
+    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+        x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
+    if (new_axis !=-1) skip_contact_detection = true;
+    if (skip_contact_detection) {
       fforce = 0.0;
       for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
       return 0.0;
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index ac229d0993a..9e8e3a1cbbb 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -238,6 +238,10 @@ void PairGranularSuperellipsoid::compute(int eflag, int vflag)
       flagi = bonus[ellipsoid[i]].type;
       flagj = bonus[ellipsoid[j]].type;
 
+      radsum = radi + radj;
+      sub3(xi, xj, dx);
+      rsq = dot3(dx, dx);
+
       MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei0);
       MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej0);
       MathExtra::copy3(bonus[ellipsoid[i]].block, blocki0);
@@ -350,22 +354,25 @@ void PairGranularSuperellipsoid::allocate()
 
 void PairGranularSuperellipsoid::settings(int narg, char **arg)
 {
-  if (narg == 1) {
-    cutoff_global = utils::numeric(FLERR, arg[0], false, lmp);
-  } else {
-    cutoff_global = -1;    // will be set based on particle sizes, model choice
-  }
-
-  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
+  cutoff_global = -1;    // default: will be set based on particle sizes, model choice
+  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;
 
-  for (int iarg = 1; iarg < narg; iarg++) {
-    if (strcmp(arg[iarg], "bounding_box") == 0)
+  int iarg = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "bounding_box") == 0) {
       bounding_box = 1;
-    else if (strcmp(arg[iarg], "geometric") == 0)
+      iarg++;
+    } else if (strcmp(arg[iarg], "geometric") == 0) {
       contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-    else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
+      iarg++;
+    } else if (strcmp(arg[iarg], "curvature_gaussian") == 0) {
       curvature_model = MathExtraSuperellipsoids::CURV_GAUSSIAN;
-    else
+      iarg++;
+    } else if (iarg == 0) {
+      // if it is the first argument and not a keyword, assume it is a cutoff
+      cutoff_global = utils::numeric(FLERR, arg[iarg], false, lmp);
+      iarg++;
+    } else
       error->all(FLERR, "Illegal pair_style command");
   }
 
@@ -418,6 +425,7 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
   //Parse optional arguments
   while (iarg < narg) {
     if (strcmp(arg[iarg], "tangential") == 0) {
+      iarg++;
       if (strcmp(arg[iarg], "linear_history") == 0) {
         tangential_one = LINEAR_HISTORY;
         if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "pair granular/superellipsoid", error);
@@ -437,9 +445,10 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
           error->all(FLERR, "Illegal linear tangential model");
         iarg += 4;
       } else {
-        error->all(FLERR, "Unknown normal model {}", arg[iarg]);
+        error->all(FLERR, "Unknown tangential model {}", arg[iarg]);
       }
     } else if (strcmp(arg[iarg], "damping") == 0) {
+      iarg++;
       if (strcmp(arg[iarg], "mass_velocity") == 0) {
         damping_one = MASS_VELOCITY;
         iarg += 1;
@@ -559,7 +568,9 @@ void PairGranularSuperellipsoid::init_style()
 
   for (int itype = 1; itype <= atom->ntypes; itype++)
     for (int jtype = 1; jtype <= atom->ntypes; jtype++)
-      if (tangential_model[itype][jtype] == CLASSIC) size_history += 3;
+      if (tangential_model[itype][jtype] == CLASSIC ||
+          tangential_model[itype][jtype] == LINEAR_HISTORY)
+        size_history += 3;
 
   // check for FixFreeze and set freeze_group_bit
 

From a2dd94e51bfbe848d608103e3573287340380a1c Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 17 Mar 2026 18:03:46 +0100
Subject: [PATCH 141/174] Fixed cached axis incorrect overwriting

---
 .../pair_gran_hertz_history_ellipsoid.cpp       | 15 +++++++++------
 .../pair_gran_hooke_history_ellipsoid.cpp       | 17 ++++++++++-------
 src/GRANULAR/pair_granular_superellipsoid.cpp   |  5 ++++-
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
index e38c78d82cf..93ac6feab97 100644
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
@@ -247,7 +247,10 @@ void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
 
         touch[jj] = 0;
         history = &allhistory[size_history * jj];
-        for (int k = 0; k < size_history; k++) history[k] = 0.0;
+        for (int k = 0; k < size_history; k++) {
+          if (bounding_box && k == 7) continue;    // Do not delete cached axis information
+          history[k] = 0.0;
+        }
       } else {
         // Store contact point with respect to grain i for next time step
         // This is crucial for periodic BCs when grains can move by large amount in one time step
@@ -559,11 +562,11 @@ double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
   bool skip_contact_detection(false);
   if (bounding_box) {
-    int cached_axis = (int)
-        (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
-    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-        x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
-    if (new_axis !=-1) skip_contact_detection = true;
+    int cached_axis =
+        (int) (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
+    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(x[i], Ri, shapei, x[j],
+                                                                           Rj, shapej, cached_axis);
+    if (new_axis != -1) skip_contact_detection = true;
     if (skip_contact_detection) {
       fforce = 0.0;
       for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
index 1b3508e958c..01a6c4b0343 100644
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
@@ -304,7 +304,10 @@ void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
 
         touch[jj] = 0;
         history = &allhistory[size_history * jj];
-        for (int k = 0; k < size_history; k++) history[k] = 0.0;
+        for (int k = 0; k < size_history; k++) {
+          if (bounding_box && k == 7) continue;    // Do not delete cached axis information
+          history[k] = 0.0;
+        }
       } else {
         // Store contact point with respect to grain i for next time step
         // This is crucial for periodic BCs when grains can move by large amount in one time step
@@ -841,11 +844,11 @@ double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*
   MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
   bool skip_contact_detection = false;
   if (bounding_box) {
-    int cached_axis = (int)
-        (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
-    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-        x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
-    if (new_axis !=-1) skip_contact_detection = true;
+    int cached_axis =
+        (int) (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
+    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(x[i], Ri, shapei, x[j],
+                                                                           Rj, shapej, cached_axis);
+    if (new_axis != -1) skip_contact_detection = true;
     if (skip_contact_detection) {
       fforce = 0.0;
       for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
@@ -1082,7 +1085,7 @@ void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *tar
 {
   // Simple direct copy of all history variables (shear, contact point, axis)
   for (int i = 0; i < size_history; i++) {
-    if (i < 3) target[i] = -source[i]; //shear
+    if (i < 3) target[i] = -source[i];    //shear
     target[i] = source[i];
   }
 }
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 9e8e3a1cbbb..f08bf952d00 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -261,7 +261,10 @@ void PairGranularSuperellipsoid::compute(int eflag, int vflag)
         // unset non-touching neighbors
         touch[jj] = 0;
         history = &allhistory[size_history * jj];
-        for (k = 0; k < size_history; k++) history[k] = 0.0;
+        for (k = 0; k < size_history; k++) {
+          if (bounding_box && k == 4) continue;    // Do not delete cached axis information
+          history[k] = 0.0;
+        }
         continue;
       }
 

From 1616efb95d020d9ea33f0a6965d23236e72de57e Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Tue, 17 Mar 2026 15:12:38 -0600
Subject: [PATCH 142/174] Adding contact radius bugfix for classic model to
 pair granular

---
 src/GRANULAR/gran_sub_mod_tangential.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/GRANULAR/gran_sub_mod_tangential.cpp b/src/GRANULAR/gran_sub_mod_tangential.cpp
index b43acc73cd4..d068c08b4d9 100644
--- a/src/GRANULAR/gran_sub_mod_tangential.cpp
+++ b/src/GRANULAR/gran_sub_mod_tangential.cpp
@@ -245,7 +245,10 @@ void GranSubModTangentialLinearHistoryClassic::calculate_forces()
       scale3(Fscrit * magfs_inv, fs, history);
       scale3(damp, vtr, temp_array);
       add3(history, temp_array, history);
-      scale3(-1.0 / k, history);
+      if (contact_radius_flag)
+        scale3(-1.0 / (k * contact_radius), history);
+      else
+        scale3(-1.0 / k, history);
       scale3(Fscrit * magfs_inv, fs);
     } else {
       zero3(fs);

From 843549a2ecedf10a66bfc3aa26c2ac14572393b0 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Tue, 17 Mar 2026 20:09:49 -0600
Subject: [PATCH 143/174] Accidently overwrote update_history variable

---
 src/ASPHERE/math_extra_superellipsoids.cpp    | 7 ++++---
 src/GRANULAR/pair_granular_superellipsoid.cpp | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 21ac70ab9bf..1962d1951ae 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -26,7 +26,7 @@
 namespace MathExtraSuperellipsoids {
 
 inline constexpr double TIKHONOV_SCALE =
-    1e-14;   
+    1e-14;
 
 static constexpr int ITERMAX_NR = 100;
 static constexpr double TOL_NR_RES = 1e-10 * 1e-10;
@@ -636,9 +636,10 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     if (converged) break;
   }
 
-  if (!converged){
+  if (!converged) {
     if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) return 1;
-    return 2;} // not failing if not converged but shapefuncs positive (i.e., no contact)
+    return 2;
+  } // not failing if not converged but shapefuncs positive (i.e., no contact)
               // might be risky to assume no contact if not converged, NR might have gone to a far away point
               // but no guarantee there is no contact
   if (shapefunc[0] > 0.0 || shapefunc[1] > 0.0) return 1;
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index f08bf952d00..b6d5f0fc26e 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -151,7 +151,7 @@ void PairGranularSuperellipsoid::compute(int eflag, int vflag)
   double *history, *allhistory, **firsthistory;
 
   bool touchflag = false;
-  const bool history_update = update->setupflag == 0;
+  history_update = update->setupflag == 0;
 
   ev_init(eflag, vflag);
 
@@ -1059,7 +1059,7 @@ bool PairGranularSuperellipsoid::check_contact()
         } else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
           // keep trying until last iteration to avoid erroring out too early
           error->warning(FLERR,
-                         "Ellipsoid contact detection (new contact) failed"
+                         "Ellipsoid contact detection (new contact) failed "
                          "between particle {} and particle {}",
                          tagi, tagj);
         }

From a1f1ac27c724bb2f0d41e14a36f8332f0859d503 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 18 Mar 2026 10:23:10 +0100
Subject: [PATCH 144/174] Fixed order of calls for history_sizedetermination

---
 src/GRANULAR/pair_granular_superellipsoid.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index b6d5f0fc26e..4e65292158b 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -549,6 +549,14 @@ void PairGranularSuperellipsoid::init_style()
 
   dt = update->dt;
 
+  // grow history for contact models, right now this is superfluous and is just a placeholder
+
+  for (int itype = 1; itype <= atom->ntypes; itype++)
+    for (int jtype = 1; jtype <= atom->ntypes; jtype++)
+      if (tangential_model[itype][jtype] == CLASSIC ||
+          tangential_model[itype][jtype] == LINEAR_HISTORY)
+        size_history += 3;
+
   // if history is stored and first init, create Fix to store history
   // it replaces FixDummy, created in the constructor
   // this is so its order in the fix list is preserved
@@ -567,14 +575,6 @@ void PairGranularSuperellipsoid::init_style()
     if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
   }
 
-  // grow history for contact models, right now this is superfluous and is just a placeholder
-
-  for (int itype = 1; itype <= atom->ntypes; itype++)
-    for (int jtype = 1; jtype <= atom->ntypes; jtype++)
-      if (tangential_model[itype][jtype] == CLASSIC ||
-          tangential_model[itype][jtype] == LINEAR_HISTORY)
-        size_history += 3;
-
   // check for FixFreeze and set freeze_group_bit
 
   auto fixlist = modify->get_fix_by_style("^freeze");

From f45f1e7d44bbd3b924663dc6039153f5fb5725d2 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Wed, 18 Mar 2026 15:10:25 +0100
Subject: [PATCH 145/174] Added first draft for doc of the new
 pair_granular_supereliipsoid

---
 doc/src/pair_granular_superellipsoid.rst      | 574 ++++++++++++++++++
 .../ASPHERE/superellipsoid_gran/in.bowling    |   5 +-
 .../ASPHERE/superellipsoid_gran/in.drop_test  |   6 +-
 3 files changed, 580 insertions(+), 5 deletions(-)
 create mode 100644 doc/src/pair_granular_superellipsoid.rst

diff --git a/doc/src/pair_granular_superellipsoid.rst b/doc/src/pair_granular_superellipsoid.rst
new file mode 100644
index 00000000000..426162ca85c
--- /dev/null
+++ b/doc/src/pair_granular_superellipsoid.rst
@@ -0,0 +1,574 @@
+.. index:: pair_style granular
+
+pair_style granular command
+===========================
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   pair_style granular/superellipsoid [cutoff] bounding_box curvature_gaussian
+
+* cutoff = global cutoff value (optional).  See discussion below.
+* bounding_box = oriented bounding box check (optional).  See discussion below.
+* curvature_gaussian = gaussian curvature coeff approximation for contact patch
+  (optional).  See discussion below.
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   pair_style granular/superellipsoid bounding_box 
+   pair_coeff * * hooke 1000.0 50.0 tangential linear_history 1000.0 1.0 0.5 damping mass_velocity
+
+   pair_style granular/superellipsoid 10.0 curvature_gaussian
+   pair_coeff * * hertz 1000.0 50.0 tangential classic 500.0 1.0 0.4 damping mass_velocity
+
+Description
+"""""""""""
+
+The *granular/superellipsoid* styles support some of the options for the normal
+and tangential forces resulting from contact between two granular particles
+(rolling and twisting will be added later). The total computed forces
+and torques are the sum of various models selected for the normal and
+tangential.
+
+All model choices and parameters are entered in the
+:doc:`pair_coeff <pair_coeff>` command, as described below.  Unlike
+e.g. :doc:`pair gran/hooke <pair_gran>`, coefficient values are not
+global, but can be set to different values for different combinations
+of particle types, as determined by the :doc:`pair_coeff <pair_coeff>`
+command.  If the contact model choice is the same for two particle
+types, the mixing for the cross-coefficients can be carried out
+automatically. This is shown in the last example, where model
+choices are the same for type 1 - type 1 as for type 2 - type2
+interactions, but coefficients are different. In this case, the
+mixed coefficients for type 1 - type 2 interactions can be determined from
+mixing rules discussed below.  For additional flexibility,
+coefficients as well as model forms can vary between particle types.
+
+----------
+
+This pair_style allows granular contact between two superellipsoid particles
+whose surface is implicitly defined as:
+
+.. math::
+
+    f(\mathbf{x}) = \left(
+    \left|\frac{x}{a}\right|^{n_2} + \left|\frac{y}{b}\right|^{n_2}
+    \right)^{n_1 / n_2}
+    + \left|\frac{z}{c}\right|^{n_1} - 1 = 0
+
+for a point :math:`\mathbf{x} = (x, y, z)` where the coordinates are given
+in the reference of the principal directions of inertia of the particle.
+The half-diameters :math:`a`, :math:`b`, and :math:`c` correspond to the *shape*
+property, and the exponents :math:`n_1` and :math:`n_2` to the *block* property
+of the ellipsoid atom. See the doc page for the :doc:`set <set>` command for
+more details.
+
+.. note::
+   
+    The contact solver strictly requires convex particle shapes to ensure a mathematically
+    unique point of deepest penetration. Therefore, the blockiness parameters must be
+    :math:`n_1 \ge 2.0` and :math:`n_2 \ge 2.0`. Attempting to simulate concave or "pointy"
+    particles (:math:`n < 2.0`) will result in an error.
+
+.. note::
+
+    For particles with high blockiness exponents (:math:`n > 4.0`) involved in edge-to-edge
+    or corner-to-corner contacts, the surface normal vector varies rapidly over small
+    distances. The Newton solver may occasionally fail to converge to the strict gradient
+    alignment tolerance (typically :math:`10^{-10}`).
+    You may see warning messages in the log indicating that the solver returned a sub-optimal solution, 
+    but the simulation will proceed using this best-effort contact point. 
+
+Contact detection for these aspherical particles uses the so-called ''midway''
+minimization approach from :ref:`(Houlsby) <Houlsby>`. Considering two
+particles with shape functions,  :math:`F_i` and :math:`F_j`,
+the contact point :math:`\mathbf{X}_0` in the global frame is obtained as:
+
+.. math::
+
+    \mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}}
+                   \ F_i(\mathbf{X}) + F_j(\mathbf{X})
+                   \text{, subject to } F_i(\mathbf{X}) = F_j(\mathbf{X}) 
+
+where the shape function is given by
+:math:`F_i(\mathbf{X}) = f_i(\mathbf{R}_i^T (\mathbf{X} - \mathbf{X}_i))`
+and where :math:`\mathbf{X}_i` and :math:`\mathbf{R}_i` are the center of mass
+and rotation matrix of the particle, respectively.
+The constrained minimization problem is solved using Lagrange multipliers and
+Newton's method with a line search as described by :ref:`(Podlozhnyuk) <Podlozhnyuk>`.
+
+.. note::
+
+    The shape function :math:`F` is not a signed distance function and
+    does not have unit gradient :math:`\|\nabla F \| \neq 1` so that the
+    so-called ''midway'' point is not actually located at an equal distance from the
+    surface of both particles.
+    For contact between non-identical particles, the contact point tends to
+    be closer to the surface of the smaller and blockier particle.
+
+.. note::
+
+    This formulation leads to a 4x4 system of non-linear equations. Tikhonov
+    regularization and step clumping is used to ensure robustness of the direct
+    solver and high convergence rate, even for blocky particles with near flat
+    faces.
+
+The particles overlap if both shape functions are negative at the contact point.
+The contact normal is obtained as: :math:`\mathbf{n}_{ij} = \nabla F_i(\mathbf{X}_0) / \| \nabla F_i(\mathbf{X}_0)\| = - \nabla F_j(\mathbf{X}_0) / \| \nabla F_j(\mathbf{X}_0)\|`
+and the overlap :math:`\delta = \|\mathbf{X}_j^{\mathrm{surf}} - \mathbf{X}_i^{\mathrm{surf}}\|`
+is computed as the distance between the points on the
+particles surfaces that are closest to the contact point in the
+direction of the contact normal: :math:`F_i(\mathbf{X}_i^{\mathrm{surf}} = \mathbf{X}_0 + \lambda_i \mathbf{n}_{ij}) = 0`
+and :math:`F_j(\mathbf{X}_j^{\mathrm{surf}} = \mathbf{X}_0 + \lambda_j \mathbf{n}_{ij}) = 0`.
+Newton's method is used to solve this equation for the scalars
+:math:`\lambda_i` and :math:`\lambda_j` and find the surface points
+:math:`\mathbf{X}_i^{\mathrm{surf}}` and :math:`\mathbf{X}_j^{\mathrm{surf}}`.
+
+.. note::
+    A modified representation of the particle surface is defined as
+    :math:`G(\mathbf{X}) = (F(\mathbf{X})+1)^{1/n_1}-1` which is a quasi-radial distance function formulation.
+    This formulation is used to compute the surface points once the midway contact point is found.
+    This formulation is also used when the *geometric* keyword is specified in the pair_style command and the following optimization problem is solved instead for the contact point:
+    :math:`\mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}} \, \left( r_i G_i(\mathbf{X}) + r_j G_j(\mathbf{X}) \right) \text{, subject to } r_i G_i(\mathbf{X}) = r_j G_j(\mathbf{X})`, 
+    where :math:`r_i` and :math:`r_j` are the average radii of the two particles.
+    The geometric formulation thus yields a better approximation of the contact point
+    for particles with different sizes, and it is slightly more robust for particles with high *block* exponents, 
+    albeit more computationally expensive.    
+
+A hierarchical approach is used to limit the cost of contact detection.
+First, intersection of the bounding spheres of the two particles of bounding
+radii :math:`r_i` and :math:`r_j` is checked. If the distance
+between the particles center is more than the sum of the radii
+:math:`\|\mathbf{X}_j - \mathbf{X}_j\| > r_i + r_j`, the particles do not intersect.
+Then, if the bounding spheres intersect, intersection of the oriented
+bounding box is checked. This is done following the equations of
+:ref:`(Eberly) <GeometricTools>`.
+This check is only performed if the *bounding_box* keyword is used.
+This is advantageous for all particles except for superellipses with 
+aspect ratio close to one and both blockiness indexes close to 2.
+
+----------
+
+
+The first required keyword for the *pair_coeff* command is the normal
+contact model. Currently supported options for normal contact models
+and their required arguments are:
+
+1. *hooke* : :math:`k_n`, :math:`\eta_{n0}` (or :math:`e`)
+2. *hertz* : :math:`k_n`, :math:`\eta_{n0}` (or :math:`e`)
+
+Here, :math:`k_n` is spring stiffness (with units that depend on model
+choice, see below); :math:`\eta_{n0}` is a damping prefactor (or, in its
+place a coefficient of restitution :math:`e`, depending on the choice of
+damping mode, see below).
+
+For the *hooke* model, the normal, elastic component of force acting
+on particle *i* due to contact with particle *j* is given by:
+
+.. math::
+
+   \mathbf{F}_{ne, Hooke} = k_n \delta_{ij} \mathbf{n}
+
+Where :math:`\delta_{ij}` is the particle overlap, (note the i-j ordering so
+that :math:`\mathbf{F}_{ne}` is positive for repulsion), and :math:`\mathbf{n}`
+is the contact normal vector at the contact point. Therefore, for *hooke*, the units
+of the spring constant :math:`k_n` are *force*\ /\ *distance*, or equivalently
+*mass*\ /*time\^2*.
+
+For the *hertz* model, the normal component of force is given by:
+
+.. math::
+
+   \mathbf{F}_{ne, Hertz} = k_n R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
+
+Here, :math:`R_{eff} = R = \frac{R_i R_j}{R_i + R_j}` is the effective radius,
+and :math:`R_i` is the equivalent radius of the i-th particle at the surface
+contact point with the j-th particle. This radius is either the inverse of the
+mean curvature coefficient, :math:`R_i = 2 / (\kappa_1 + \kappa_2)`, or the
+gaussian curvature coefficient :math:`R_i = 1 / \sqrt{\kappa_1 \kappa_2}`, where
+:math:`\kappa_{1,2}` are the principal curvatures of the particle surface at the
+contact point. For *hertz*, the units of the spring constant :math:`k_n` are
+*force*\ /\ *length*\ \^2, or equivalently *pressure*\ .
+
+
+The *atom_style* must be set to *ellipsoid superellipsoid* to enable superellipsoid
+particles' shape parameters (3 lengths and two blockiness parameters), see 
+:doc:`atom_style <atom_style>` for more details.
+. 
+
+.. code-block:: LAMMPS
+
+   atom_style ellipsoid superellipsoid
+
+Newton's third law must be set to *off*.
+
+.. code-block:: LAMMPS
+
+   newton off
+
+
+*fix wall/gran* and *fix wall/gran/region* are currently not supported by this pair_style. 
+In addition to contact forces superellipsoids also tracks the following
+quantities for each contact: contact_point at the previous time step, bounding box separating axis
+index, if the *bounding_box* keyword is used.
+
+In addition, the normal force is augmented by a damping term of the
+following general form:
+
+.. math::
+
+   \mathbf{F}_{n,damp} = -\eta_n \mathbf{v}_{n,rel}
+
+Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot
+\mathbf{n}\ \mathbf{n}` is the component of relative velocity along
+:math:`\mathbf{n}`.
+
+The optional *damping* keyword to the *pair_coeff* command followed by a keyword
+determines the model form of the damping factor :math:`\eta_n`, and the
+interpretation of the :math:`\eta_{n0}` or :math:`e` coefficients specified as
+part of the normal contact model settings. The *damping* keyword and
+corresponding model form selection may be appended anywhere in the *pair coeff*
+command.  Note that the choice of damping model affects both the normal and
+tangential damping.  The options for the damping model currently supported are:
+
+1. *mass_velocity*
+2. *viscoelastic*
+
+If the *damping* keyword is not specified, the *viscoelastic* model is
+used by default.
+
+For *damping mass_velocity*, the normal damping is given by:
+
+.. math::
+
+   \eta_n = \eta_{n0} m_{eff}
+
+Here, :math:`\eta_{n0}` is the damping coefficient specified for the normal
+contact model, in units of 1/\ *time* and
+:math:`m_{eff} = m_i m_j/(m_i + m_j)` is the effective mass.
+Use *damping mass_velocity* to reproduce the damping behavior of
+*pair gran/hooke/\**.
+
+The *damping viscoelastic* model is based on the viscoelastic
+treatment of :ref:`(Brilliantov et al) <Brill1996>`, where the normal
+damping is given by:
+
+.. math::
+
+   \eta_n = \eta_{n0}\ a m_{eff}
+
+Here, *a* is the contact radius, given by :math:`a =\sqrt{R\delta}`
+for all models.  For *damping viscoelastic*,
+:math:`\eta_{n0}` is in units of 1/(\ *time*\ \*\ *distance*\ ).
+
+The total normal force is computed as the sum of the elastic and
+damping components:
+
+.. math::
+
+   \mathbf{F}_n = \mathbf{F}_{ne} + \mathbf{F}_{n,damp}
+
+----------
+
+The *pair_coeff* command also requires specification of the tangential
+contact model. The required keyword *tangential* is expected, followed
+by the model choice and associated parameters. Currently supported
+tangential model choices and their expected parameters are as follows:
+
+1. *linear_history* : :math:`k_t`, :math:`x_{\gamma,t}`, :math:`\mu_s`
+2. *classic* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
+
+Here, :math:`x_{\gamma,t}` is a dimensionless multiplier for the normal
+damping :math:`\eta_n` that determines the magnitude of the tangential
+damping, :math:`\mu_t` is the tangential (or sliding) friction
+coefficient, and :math:`k_t` is the tangential stiffness coefficient.
+
+The tangential damping force :math:`\mathbf{F}_\mathrm{t,damp}` is given by:
+
+.. math::
+
+   \mathbf{F}_\mathrm{t,damp} = -\eta_t \mathbf{v}_{t,rel}
+
+The tangential damping prefactor :math:`\eta_t` is calculated by scaling
+the normal damping :math:`\eta_n` (see above):
+
+.. math::
+
+   \eta_t = -x_{\gamma,t} \eta_n
+
+The normal damping prefactor :math:`\eta_n` is determined by the choice
+of the *damping* keyword, as discussed above.  Thus, the *damping*
+keyword also affects the tangential damping.  The parameter
+:math:`x_{\gamma,t}` is a scaling coefficient. Several works in the
+literature use :math:`x_{\gamma,t} = 1` (:ref:`Marshall <Marshall2009>`,
+:ref:`Tsuji et al <Tsuji1992>`, :ref:`Silbert et al <Silbert2001>`).  The relative
+tangential velocity at the point of contact is given by
+:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\boldsymbol{\Omega}_i + R_j\boldsymbol{\Omega}_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}\ \mathbf{n}`,
+:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i` .
+The direction of the applied force is :math:`\mathbf{t} = \mathbf{v_{t,rel}}/\|\mathbf{v_{t,rel}}\|` .
+
+The normal force value :math:`F_{n0}` used to compute the critical force
+depends on the form of the contact model. It is given by the magnitude of
+the normal force:
+
+.. math::
+
+   F_{n0} = \|\mathbf{F}_n\|
+
+The remaining tangential options all use accumulated tangential
+displacement (i.e. contact history).
+The accumulated tangential displacement is discussed in details below
+in the context of the *linear_history* option. The same treatment of
+the accumulated displacement applies to the other options as well.
+
+For *tangential linear_history*, the tangential force is given by:
+
+.. math::
+
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
+
+Here, :math:`\mathbf{\xi}` is the tangential displacement accumulated
+during the entire duration of the contact:
+
+.. math::
+
+   \mathbf{\xi} = \int_{t0}^t \mathbf{v}_{t,rel}(\tau) \mathrm{d}\tau
+
+This accumulated tangential displacement must be adjusted to account
+for changes in the frame of reference of the contacting pair of
+particles during contact. This occurs due to the overall motion of the
+contacting particles in a rigid-body-like fashion during the duration
+of the contact. There are two modes of motion that are relevant: the
+'tumbling' rotation of the contacting pair, which changes the
+orientation of the plane in which tangential displacement occurs; and
+'spinning' rotation of the contacting pair about the vector connecting
+their centers of mass (:math:`\mathbf{n}`).  Corrections due to the
+former mode of motion are made by rotating the accumulated
+displacement into the plane that is tangential to the contact vector
+at each step, or equivalently removing any component of the tangential
+displacement that lies along :math:`\mathbf{n}`, and rescaling to
+preserve the magnitude.  This follows the discussion in
+:ref:`Luding <Luding2008>`, see equation 17 and relevant discussion in that
+work:
+
+.. math::
+
+   \mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'} - (\mathbf{n}\cdot\mathbf{\xi'})\mathbf{n}\|}
+
+Here, :math:`\mathbf{\xi'}` is the accumulated displacement prior to the
+current time step and :math:`\mathbf{\xi}` is the corrected
+displacement. Corrections to the displacement due to the second mode
+of motion described above (rotations about :math:`\mathbf{n}`) are not
+currently implemented, but are expected to be minor for most
+simulations.
+
+Furthermore, when the tangential force exceeds the critical force, the
+tangential displacement is re-scaled to match the value for the
+critical force (see :ref:`Luding <Luding2008>`, equation 20 and related
+discussion):
+
+.. math::
+
+   \mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} - \mathbf{F}_{t,damp}\right)
+
+The tangential force is added to the total normal force (elastic plus
+damping) to produce the total force on the particle.
+
+Unlike perfect spheres, the surface normal at the contact point of a superellipsoid
+does not generally pass through the particle's center of mass. Therefore, both the
+normal and tangential forces act at the contact point to induce a torque on each
+particle. 
+
+Using the exact contact point :math:`\mathbf{X}_0` determined by the geometric solver, 
+the branch vectors from the particle centers of mass to the contact point are 
+defined as :math:`\mathbf{r}_{ci} = \mathbf{X}_0 - \mathbf{x}_i` and 
+:math:`\mathbf{r}_{cj} = \mathbf{X}_0 - \mathbf{x}_j`. The resulting torques 
+are calculated as:
+
+.. math::
+
+   \mathbf{\tau}_i = \mathbf{r}_{ci} \times \mathbf{F}_{tot}
+
+.. math::
+
+   \mathbf{\tau}_j = -\mathbf{r}_{cj} \times \mathbf{F}_{tot}
+
+----------
+
+If two particles are moving away from each other while in contact, there
+is a possibility that the particles could experience an effective attractive
+force due to damping. If the optional *limit_damping* keyword is used, this option
+will zero out the normal component of the force if there is an effective
+attractive force. 
+----------
+
+LAMMPS automatically sets pairwise cutoff values for *pair_style
+granular* based on particle radii (and in the case of *jkr* pull-off
+distances). In the vast majority of situations, this is adequate.
+However, a cutoff value can optionally be appended to the *pair_style
+granular* command to specify a global cutoff (i.e. a cutoff for all
+atom types). Additionally, the optional *cutoff* keyword can be passed
+to the *pair_coeff* command, followed by a cutoff value.  This will
+set a pairwise cutoff for the atom types in the *pair_coeff* command.
+These options may be useful in some rare cases where the automatic
+cutoff determination is not sufficient, e.g.  if particle diameters
+are being modified via the *fix adapt* command. In that case, the
+global cutoff specified as part of the *pair_style granular* command
+is applied to all atom types, unless it is overridden for a given atom
+type combination by the *cutoff* value specified in the *pair coeff*
+command.  If *cutoff* is only specified in the *pair coeff* command
+and no global cutoff is appended to the *pair_style granular* command,
+then LAMMPS will use that cutoff for the specified atom type
+combination, and automatically set pairwise cutoffs for the remaining
+atom types.
+
+----------
+
+Mixing, shift, table, tail correction, restart, rRESPA info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+The :doc:`pair_modify <pair_modify>` mix, shift, table, and tail options
+are not relevant for granular pair styles.
+
+Mixing of coefficients is carried out using geometric averaging for
+most quantities, e.g. if friction coefficient for type 1-type 1
+interactions is set to :math:`\mu_1`, and friction coefficient for type
+2-type 2 interactions is set to :math:`\mu_2`, the friction coefficient
+for type1-type2 interactions is computed as :math:`\sqrt{\mu_1\mu_2}`
+(unless explicitly specified to a different value by a *pair_coeff 1 2
+...* command). 
+
+These pair styles write their information to :doc:`binary restart files <restart>`,
+so a pair_style command does not need to be specified in an input script that reads
+a restart file.
+
+These pair styles can only be used via the *pair* keyword of the
+:doc:`run_style respa <run_style>` command.  They do not support the
+*inner*, *middle*, *outer* keywords.
+
+The single() function of these pair styles returns 0.0 for the energy of a
+pairwise interaction, since energy is not conserved in these dissipative
+potentials.  It also returns only the normal component of the pairwise
+interaction force.  However, the single() function also calculates at least 13
+extra pairwise quantities.  The first 3 are the components of the tangential
+force between particles I and J, acting on particle I.  The fourth is the
+magnitude of this tangential force. The next 3 (5-7) are the components of the
+rolling torque acting on particle I. The next entry (8) is the magnitude of the
+rolling torque. The next entry (9) is the magnitude of the twisting torque
+acting about the vector connecting the two particle centers. The next 3 (10-12)
+are the components of the vector connecting the centers of the two particles
+(x_I - x_J). If a granular sub-model calculates additional contact information
+(e.g. the contact_point, lagrange multiplier and separating axis index), these
+quantities are appended to the end of this list. First, any extra values from
+the normal sub-model are appended followed by the damping, tangential, rolling,
+twisting, then heat models. See the descriptions of specific granular sub-models
+above for information on any extra quantities. If two or more models are defined
+by pair coefficients, the size of the array is set by the maximum number of
+extra quantities in a model but the order of quantities is determined by each
+model's specific set of sub-models. Any unused quantities are zeroed.
+
+These extra quantities can be accessed by the :doc:`compute pair/local
+<compute_pair_local>` command, as *p1*, *p2*, ..., *p17*\ .
+
+----------
+
+Restrictions
+""""""""""""
+
+This pair style is part of the GRANULAR package.  It is
+only enabled if LAMMPS was built with that package.
+See the :doc:`Build package <Build_package>` page for more info.
+
+This pair style requires that atoms store per-particle bounding radius, shapes, blockiness, inertia,
+torque, and angular momentum (omega) as defined by the
+:doc:`atom_style ellipsoid superellipsoid <atom_style>`.
+
+This pair style requires you to use the :doc:`comm_modify vel yes <comm_modify>`
+command so that velocities are stored by ghost atoms.
+
+This pair style will not restart exactly when using the
+:doc:`read_restart <read_restart>` command, though it should provide
+statistically similar results.  This is because the forces it
+computes depend on atom velocities and the atom velocities have
+been propagated half a timestep between the force computation and
+when the restart is written, due to using Velocity Verlet time
+integration. See the :doc:`read_restart <read_restart>` command
+for more details.
+
+Accumulated values for individual contacts are saved to restart
+files but are not saved to data files. Therefore, forces may
+differ significantly when a system is reloaded using the
+:doc:`read_data <read_data>` command.
+
+Related commands
+""""""""""""""""
+
+:doc:`pair_coeff <pair_coeff>`
+:doc:`pair gran/\* <pair_gran>`
+
+Default
+"""""""
+
+For the *pair_coeff* settings: *damping viscoelastic*
+
+References
+""""""""""
+
+.. _Brill1996:
+
+**(Brilliantov et al, 1996)** Brilliantov, N. V., Spahn, F., Hertzsch,
+J. M., & Poschel, T. (1996).  Model for collisions in granular
+gases. Physical review E, 53(5), 5382.
+
+.. _Luding2008:
+
+**(Luding, 2008)** Luding, S. (2008). Cohesive, frictional powders:
+contact models for tension. Granular matter, 10(4), 235.
+
+.. _Marshall2009:
+
+**(Marshall, 2009)** Marshall, J. S. (2009). Discrete-element modeling
+of particulate aerosol flows.  Journal of Computational Physics,
+228(5), 1541-1561.
+
+.. _Silbert2001:
+
+**(Silbert, 2001)** Silbert, L. E., Ertas, D., Grest, G. S., Halsey,
+T. C., Levine, D., & Plimpton, S. J. (2001).  Granular flow down an
+inclined plane: Bagnold scaling and rheology. Physical Review E,
+64(5), 051302.
+
+
+.. _Thornton1991:
+
+**(Thornton, 1991)** Thornton, C. (1991). Interparticle sliding in the
+presence of adhesion.  J. Phys. D: Appl. Phys. 24 1942
+
+.. _Thornton2013:
+
+**(Thornton et al, 2013)** Thornton, C., Cummins, S. J., & Cleary,
+P. W. (2013).  An investigation of the comparative behavior of
+alternative contact force models during inelastic collisions. Powder
+Technology, 233, 30-46.
+
+.. _WaltonPC:
+
+**(Otis R. Walton)** Walton, O.R., Personal Communication
+
+.. _Podlozhnyuk:
+
+**(Podlozhnyuk)** Podlozhnyuk, Pirker, Kloss, Comp. Part. Mech., 4:101-118 (2017).
+
+.. _Houlsby:
+
+**(Houlsby)** Houlsby, Computers and Geotechnics, 36, 953-959 (2009).
+
+.. _GeometricTools:
+
+**(Eberly)** Eberly, Geometric Tools: Dynamic Collision Detection Using Oriented Bounding Boxes (2008).
+
diff --git a/examples/ASPHERE/superellipsoid_gran/in.bowling b/examples/ASPHERE/superellipsoid_gran/in.bowling
index 6472e5e514c..d52c8b21734 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.bowling
+++ b/examples/ASPHERE/superellipsoid_gran/in.bowling
@@ -34,8 +34,9 @@ set             type 2 block 2.0 2.0
 set             type 2 mass 10.0
 group ball type 2
 
-pair_style      gran/hooke/history/ellipsoid 1e3 5e2 0.0 0.0 0.5 0 bounding_box
-pair_coeff      * *
+pair_style      granular/superellipsoid bounding_box
+pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
+
 
 compute diameter all property/atom shapex shapey shapez
 compute orient all property/atom quatw quati quatj quatk
diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 277856df528..d3899295375 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -49,8 +49,8 @@ group           mobile type 2
 # Interaction / Pair Style
 # pair_style      gran/hooke/history/ellipsoid 1e4 NULL 20.0 0.0 0.5 0 bounding_box
 # pair_coeff      * *
-pair_style      granular/superellipsoid bounding_box
-pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
+pair_style granular/superellipsoid 10.0 curvature_gaussian
+pair_coeff * * hertz 1000.0 50.0 tangential classic 500.0 1.0 0.4 damping mass_velocity
 # Hertz model instead
 # pair_coeff      * * hertz 1000.0 0.0 tangential linear_history 285.714 0.0 0.5 damping viscoelastic
 
@@ -63,7 +63,7 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 100 dump_drop_algebraic_block_${blockiness}_friction.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+dump            mydump all custom 100 dump_drop_algebraic_block_${blockiness}_friction2.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # Explicit mapping for Ovito
 dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 

From 3777a6d8241bc2d75194605f8f57bbe9c722d3d4 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Wed, 18 Mar 2026 10:54:24 -0600
Subject: [PATCH 146/174] Clarifying variables and (I think) simplifying
 rescaling logic

---
 src/GRANULAR/gran_sub_mod_tangential.cpp      | 66 +++++++++++--------
 src/GRANULAR/pair_granular_superellipsoid.cpp | 40 +++++------
 2 files changed, 58 insertions(+), 48 deletions(-)

diff --git a/src/GRANULAR/gran_sub_mod_tangential.cpp b/src/GRANULAR/gran_sub_mod_tangential.cpp
index d068c08b4d9..4ceaa1bc366 100644
--- a/src/GRANULAR/gran_sub_mod_tangential.cpp
+++ b/src/GRANULAR/gran_sub_mod_tangential.cpp
@@ -121,7 +121,8 @@ void GranSubModTangentialLinearHistory::coeffs_to_local()
 void GranSubModTangentialLinearHistory::calculate_forces()
 {
   // Note: this is the same as the base Mindlin calculation except k isn't scaled by contact radius
-  double magfs, magfs_inv, rsht, shrmag, temp_array[3], vtr2[3];
+  double magfs, magfs_inv, rsht, shrmag;
+  double hist_increment[3], fdamp[3], vtr2[3];
   int frame_update = 0;
 
   double *nx = gm->nx;
@@ -145,8 +146,8 @@ void GranSubModTangentialLinearHistory::calculate_forces()
 
     // update history, tangential force using velocities at half step
     // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46
-    scale3(dt, vtr, temp_array);
-    add3(history, temp_array, history);
+    scale3(dt, vtr, hist_increment);
+    add3(history, hist_increment, history);
 
     if(gm->synchronized_verlet == 1) {
       rsht = dot3(history, nx_unrotated);
@@ -165,8 +166,8 @@ void GranSubModTangentialLinearHistory::calculate_forces()
   } else {
     copy3(vtr, vtr2);
   }
-  scale3(damp, vtr2, temp_array);
-  sub3(fs, temp_array, fs);
+  scale3(-damp, vtr2, fdamp);
+  add3(fs, fdamp, fs);
 
   // rescale frictional displacements and forces if needed
   magfs = len3(fs);
@@ -175,8 +176,7 @@ void GranSubModTangentialLinearHistory::calculate_forces()
     if (shrmag != 0.0) {
       magfs_inv = 1.0 / magfs;
       scale3(Fscrit * magfs_inv, fs, history);
-      scale3(damp, vtr, temp_array);
-      add3(history, temp_array, history);
+      sub3(history, fdamp, history);
       scale3(-1.0 / k, history);
       scale3(Fscrit * magfs_inv, fs);
     } else {
@@ -200,8 +200,8 @@ GranSubModTangentialLinearHistoryClassic::GranSubModTangentialLinearHistoryClass
 
 void GranSubModTangentialLinearHistoryClassic::calculate_forces()
 {
-  double magfs, magfs_inv, rsht, shrmag;
-  double temp_array[3];
+  double magfs, rsht, shrmag;
+  double hist_increment[3], fdamp[3];
 
   double *nx = gm->nx;
   double *vtr = gm->vtr;
@@ -216,8 +216,8 @@ void GranSubModTangentialLinearHistoryClassic::calculate_forces()
 
   // update history
   if (history_update) {
-    scale3(dt, vtr, temp_array);
-    add3(history, temp_array, history);
+    scale3(dt, vtr, hist_increment);
+    add3(history, hist_increment, history);
   }
 
   shrmag = len3(history);
@@ -225,31 +225,41 @@ void GranSubModTangentialLinearHistoryClassic::calculate_forces()
   // rotate shear displacements
   if (history_update) {
     rsht = dot3(history, nx);
-    scale3(rsht, nx, temp_array);
-    sub3(history, temp_array, history);
+    scale3(rsht, nx, hist_increment);
+    sub3(history, hist_increment, history);
   }
 
   // tangential forces = history + tangential velocity damping
+  // classic model can only set contact_radius_flag through hertz
   if (contact_radius_flag)
     scale3(-k * contact_radius, history, fs);
   else
     scale3(-k, history, fs);
-  scale3(damp, vtr, temp_array);
-  sub3(fs, temp_array, fs);
+
+  // damping force, note that damp automatically has a factor
+  //   of contact radius with hertz (sets viscoelastic damping)
+  //   but not with hooke (sets mass_velocity damping)
+
+  scale3(-damp, vtr, fdamp);
+  add(fs, fdamp, fs);
 
   // rescale frictional displacements and forces if needed
   magfs = len3(fs);
   if (magfs > Fscrit) {
     if (shrmag != 0.0) {
-      magfs_inv = 1.0 / magfs;
-      scale3(Fscrit * magfs_inv, fs, history);
-      scale3(damp, vtr, temp_array);
-      add3(history, temp_array, history);
+
+      // Rescale shear force
+      scale3(Fscrit / magfs, fs);
+
+      // Set shear to elastic component of rescaled force
+      //  has extra factor of kt (+ contact radius)
+      sub3(fs, fdamp, history);
+
+      // Remove extra prefactors from shear history
       if (contact_radius_flag)
         scale3(-1.0 / (k * contact_radius), history);
       else
         scale3(-1.0 / k, history);
-      scale3(Fscrit * magfs_inv, fs);
     } else {
       zero3(fs);
     }
@@ -327,7 +337,7 @@ void GranSubModTangentialMindlin::mix_coeffs(double *icoeffs, double *jcoeffs)
 void GranSubModTangentialMindlin::calculate_forces()
 {
   double k_scaled, magfs, magfs_inv, rsht, shrmag;
-  double temp_array[3], vtr2[3];
+  double hist_increment[3], fdamp[3], vtr2[3];
   int frame_update = 0;
 
   double *nx = gm->nx;
@@ -364,11 +374,11 @@ void GranSubModTangentialMindlin::calculate_forces()
     if (mindlin_force) {
       // tangential force
       // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46
-      scale3(-k_scaled * dt, vtr, temp_array);
+      scale3(-k_scaled * dt, vtr, hist_increment);
     } else {
-      scale3(dt, vtr, temp_array);
+      scale3(dt, vtr, hist_increment);
     }
-    add3(history, temp_array, history);
+    add3(history, hist_increment, history);
 
     if (mindlin_rescale) history[3] = contact_radius;
 
@@ -395,8 +405,8 @@ void GranSubModTangentialMindlin::calculate_forces()
   scale3(-damp, vtr2, fs);
 
   if (!mindlin_force) {
-    scale3(k_scaled, history, temp_array);
-    sub3(fs, temp_array, fs);
+    scale3(k_scaled, history, hist_increment);
+    sub3(fs, hist_increment, fs);
   } else {
     add3(fs, history, fs);
   }
@@ -408,8 +418,8 @@ void GranSubModTangentialMindlin::calculate_forces()
     if (shrmag != 0.0) {
       magfs_inv = 1.0 / magfs;
       scale3(Fscrit * magfs_inv, fs, history);
-      scale3(damp, vtr, temp_array);
-      add3(history, temp_array, history);
+      scale3(damp, vtr, fdamp);
+      add3(history, fdamp, history);
 
       if (!mindlin_force) scale3(-1.0 / k_scaled, history);
 
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 4e65292158b..ae3438c26f4 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -1184,7 +1184,7 @@ void PairGranularSuperellipsoid::calculate_forces()
 
   // Tangential model
 
-  double temp_array[3];
+  double hist_increment[3], fdamp[3];
   double *history = &history_data[default_hist_size];
   double Fscrit = Fncrit * xmu[itype][jtype];
   double dampt = xt[itype][jtype] * damp_prefactor;
@@ -1201,17 +1201,15 @@ void PairGranularSuperellipsoid::calculate_forces()
 
       // update history, tangential force using velocities at half step
       // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46
-      scale3(dt, vt, temp_array);
-      add3(history, temp_array, history);
+      scale3(dt, vt, hist_increment);
+      add3(history, hist_increment, history);
     }
 
     // tangential forces = history + tangential velocity damping
     scale3(-kt[itype][jtype], history, fs);
 
-    double vtr2[3];
-    copy3(vt, vtr2);
-    scale3(dampt, vtr2, temp_array);
-    sub3(fs, temp_array, fs);
+    scale3(-dampt, vt, fdamp);
+    add3(fs, fdamp, fs);
 
     // rescale frictional displacements and forces if needed
     double magfs = len3(fs);
@@ -1220,8 +1218,7 @@ void PairGranularSuperellipsoid::calculate_forces()
       if (shrmag != 0.0) {
         double magfs_inv = 1.0 / magfs;
         scale3(Fscrit * magfs_inv, fs, history);
-        scale3(dampt, vt, temp_array);
-        add3(history, temp_array, history);
+        sub3(history, fdamp, history);
         scale3(-1.0 / kt[itype][jtype], history);
         scale3(Fscrit * magfs_inv, fs);
       } else {
@@ -1234,16 +1231,16 @@ void PairGranularSuperellipsoid::calculate_forces()
     // shear history effects
 
     if (history_update) {
-      scale3(dt, vt, temp_array);
-      add3(history, temp_array, history);
+      scale3(dt, vt, hist_increment);
+      add3(history, hist_increment, history);
     }
     double shrmag = len3(history);
 
     if (history_update) {
       // rotate shear displacements
       double rsht = dot3(history, nij);
-      scale3(rsht, nij, temp_array);
-      sub3(history, temp_array, history);
+      scale3(rsht, nij, hist_increment);
+      sub3(history, hist_increment, history);
     }
 
     // tangential forces = history + tangential velocity damping
@@ -1253,8 +1250,8 @@ void PairGranularSuperellipsoid::calculate_forces()
     else
       scale3(-kt[itype][jtype], history, fs);
 
-    scale3(dampt, vt, temp_array);
-    sub3(fs, temp_array, fs);
+    scale3(-dampt, vt, fdamp);
+    add3(fs, fdamp, fs);
 
     // rescale frictional displacements and forces if needed
 
@@ -1262,15 +1259,18 @@ void PairGranularSuperellipsoid::calculate_forces()
 
     if (magfs > Fscrit) {
       if (shrmag != 0.0) {
-        double magfs_inv = 1.0 / magfs;
-        scale3(Fscrit * magfs_inv, fs, history);
-        scale3(dampt, vt, temp_array);
-        add3(history, temp_array, history);
+        // Rescale shear force
+        scale3(Fscrit / magfs, fs);
+
+        // Set shear to elastic component of rescaled force
+        //  has extra factor of kt (+ contact radius)
+        sub3(fs, fdamp, history);
+
+        // Remove extra prefactors from shear history
         if (contact_radius_flag)
           scale3(-1.0 / (kt[itype][jtype] * contact_radius), history);
         else
           scale3(-1.0 / kt[itype][jtype], history);
-        scale3(Fscrit * magfs_inv, fs);
       } else
         zero3(fs);
     }

From 4cdc295c2bd3b5bb8896592c1d7a8715c879d712 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Wed, 18 Mar 2026 11:05:07 -0600
Subject: [PATCH 147/174] Only incrementing history size by maximumneeded  from
 tangential

---
 src/GRANULAR/gran_sub_mod_tangential.cpp      | 1 -
 src/GRANULAR/pair_granular_superellipsoid.cpp | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/GRANULAR/gran_sub_mod_tangential.cpp b/src/GRANULAR/gran_sub_mod_tangential.cpp
index 4ceaa1bc366..f7778eca0ee 100644
--- a/src/GRANULAR/gran_sub_mod_tangential.cpp
+++ b/src/GRANULAR/gran_sub_mod_tangential.cpp
@@ -247,7 +247,6 @@ void GranSubModTangentialLinearHistoryClassic::calculate_forces()
   magfs = len3(fs);
   if (magfs > Fscrit) {
     if (shrmag != 0.0) {
-
       // Rescale shear force
       scale3(Fscrit / magfs, fs);
 
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index ae3438c26f4..f24f414e956 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -551,11 +551,13 @@ void PairGranularSuperellipsoid::init_style()
 
   // grow history for contact models, right now this is superfluous and is just a placeholder
 
+  int size_history_tangential = 0;
   for (int itype = 1; itype <= atom->ntypes; itype++)
     for (int jtype = 1; jtype <= atom->ntypes; jtype++)
       if (tangential_model[itype][jtype] == CLASSIC ||
           tangential_model[itype][jtype] == LINEAR_HISTORY)
-        size_history += 3;
+        size_history_tangential = 3;
+  size_history += size_history_tangential;
 
   // if history is stored and first init, create Fix to store history
   // it replaces FixDummy, created in the constructor

From 3a7e1b59bce06d40b1ab166b3ff3f48879f73dfa Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Wed, 18 Mar 2026 11:17:47 -0600
Subject: [PATCH 148/174] more tangential rescaling clean ups

---
 src/GRANULAR/gran_sub_mod_tangential.cpp | 46 +++++++++++++-----------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/GRANULAR/gran_sub_mod_tangential.cpp b/src/GRANULAR/gran_sub_mod_tangential.cpp
index f7778eca0ee..5f1ff46f4c7 100644
--- a/src/GRANULAR/gran_sub_mod_tangential.cpp
+++ b/src/GRANULAR/gran_sub_mod_tangential.cpp
@@ -121,7 +121,7 @@ void GranSubModTangentialLinearHistory::coeffs_to_local()
 void GranSubModTangentialLinearHistory::calculate_forces()
 {
   // Note: this is the same as the base Mindlin calculation except k isn't scaled by contact radius
-  double magfs, magfs_inv, rsht, shrmag;
+  double magfs, rsht, shrmag;
   double hist_increment[3], fdamp[3], vtr2[3];
   int frame_update = 0;
 
@@ -174,11 +174,13 @@ void GranSubModTangentialLinearHistory::calculate_forces()
   if (magfs > Fscrit) {
     shrmag = len3(history);
     if (shrmag != 0.0) {
-      magfs_inv = 1.0 / magfs;
-      scale3(Fscrit * magfs_inv, fs, history);
-      sub3(history, fdamp, history);
+      // Rescale shear force
+      scale3(Fscrit / magfs, fs);
+
+      // Set shear to elastic component of rescaled force
+      //  has extra factor of k that is then removed
+      sub3(fs, fdamp, history);
       scale3(-1.0 / k, history);
-      scale3(Fscrit * magfs_inv, fs);
     } else {
       zero3(fs);
     }
@@ -335,7 +337,7 @@ void GranSubModTangentialMindlin::mix_coeffs(double *icoeffs, double *jcoeffs)
 
 void GranSubModTangentialMindlin::calculate_forces()
 {
-  double k_scaled, magfs, magfs_inv, rsht, shrmag;
+  double k_scaled, magfs, rsht, shrmag;
   double hist_increment[3], fdamp[3], vtr2[3];
   int frame_update = 0;
 
@@ -394,6 +396,14 @@ void GranSubModTangentialMindlin::calculate_forces()
   }
 
   // tangential forces = history + tangential velocity damping
+
+  if (!mindlin_force) {
+    scale3(-k_scaled, history, fs);
+  } else {
+    copy3(history, fs);
+  }
+
+
   // Rotating vtr for damping term in nx direction
   if (frame_update && gm->synchronized_verlet) {
     copy3(vtr, vtr2);
@@ -401,28 +411,22 @@ void GranSubModTangentialMindlin::calculate_forces()
   } else {
     copy3(vtr, vtr2);
   }
-  scale3(-damp, vtr2, fs);
-
-  if (!mindlin_force) {
-    scale3(k_scaled, history, hist_increment);
-    sub3(fs, hist_increment, fs);
-  } else {
-    add3(fs, history, fs);
-  }
+  scale3(-damp, vtr2, fdamp);
+  add3(fs, fdamp, fs);
 
   // rescale frictional displacements and forces if needed
   magfs = len3(fs);
   if (magfs > Fscrit) {
     shrmag = len3(history);
     if (shrmag != 0.0) {
-      magfs_inv = 1.0 / magfs;
-      scale3(Fscrit * magfs_inv, fs, history);
-      scale3(damp, vtr, fdamp);
-      add3(history, fdamp, history);
-
-      if (!mindlin_force) scale3(-1.0 / k_scaled, history);
+      // Rescale shear force
+      scale3(Fscrit / magfs, fs);
 
-      scale3(Fscrit * magfs_inv, fs);
+      // Set shear to elastic component of rescaled force
+      //  may have extra factor of k_scaled that is then removed
+      sub3(fs, fdamp, history);
+      if (!mindlin_force)
+        scale3(-1.0 / k_scaled, history);
     } else {
       zero3(fs);
     }

From 074f6776d2f3c46cfd5468b5202f72f8b9f55b94 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Wed, 18 Mar 2026 11:40:29 -0600
Subject: [PATCH 149/174] add->add3

---
 src/GRANULAR/gran_sub_mod_tangential.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/GRANULAR/gran_sub_mod_tangential.cpp b/src/GRANULAR/gran_sub_mod_tangential.cpp
index 5f1ff46f4c7..2d2e1e036cb 100644
--- a/src/GRANULAR/gran_sub_mod_tangential.cpp
+++ b/src/GRANULAR/gran_sub_mod_tangential.cpp
@@ -243,7 +243,7 @@ void GranSubModTangentialLinearHistoryClassic::calculate_forces()
   //   but not with hooke (sets mass_velocity damping)
 
   scale3(-damp, vtr, fdamp);
-  add(fs, fdamp, fs);
+  add3(fs, fdamp, fs);
 
   // rescale frictional displacements and forces if needed
   magfs = len3(fs);

From 5b8896d97fd8e16d9fb65a21518391aee67b203d Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 19 Mar 2026 10:29:17 +0100
Subject: [PATCH 150/174] Renamed and cleaned variable in pair style

---
 src/GRANULAR/pair_granular_superellipsoid.cpp | 58 +++++++++----------
 1 file changed, 27 insertions(+), 31 deletions(-)

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index f24f414e956..2f387c0eb44 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -1091,15 +1091,15 @@ void PairGranularSuperellipsoid::calculate_forces()
   double nji[3] = {-nij[0], -nij[1], -nij[2]};
   // compute overlap depth along normal direction for each grain
   // overlap is positive for both grains
-  double overlap1 =
+  double overlap_i =
       MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, xi);
-  double overlap2 =
+  double overlap_j =
       MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, xj);
 
   // branch vectors
-  double cr1[3], cr2[3];
-  MathExtra::sub3(X0, xi, cr1);
-  MathExtra::sub3(X0, xj, cr2);
+  double cr_i[3], cr_j[3];
+  MathExtra::sub3(X0, xi, cr_i);
+  MathExtra::sub3(X0, xj, cr_j);
 
   // we need to take the cross product of omega
 
@@ -1109,41 +1109,38 @@ void PairGranularSuperellipsoid::calculate_forces()
   MathExtra::q_to_exyz(quatj, ex_space, ey_space, ez_space);
   MathExtra::angmom_to_omega(angmomj, ex_space, ey_space, ez_space, inertiaj, omegaj);
 
-  double omega_cross_r1[3], omega_cross_r2[3];
-  MathExtra::cross3(omegai, cr1, omega_cross_r1);
-  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+  double omega_cross_ri[3], omega_cross_rj[3];
+  MathExtra::cross3(omegai, cr_i, omega_cross_ri);
+  MathExtra::cross3(omegaj, cr_j, omega_cross_rj);
 
   // relative translational velocity
   // compute directly the sum of relative translational velocity at contact point
   // since rotational velocity contribution is different for superellipsoids
-  double cv1[3], cv2[3];
-  add3(vi, omega_cross_r1, cv1);
-  add3(vj, omega_cross_r2, cv2);
+  double cv_i[3], cv_j[3];
+  add3(vi, omega_cross_ri, cv_i);
+  add3(vj, omega_cross_rj, cv_j);
 
-  // total relavtive velocity at contact point
-  double vr[3];
-  sub3(cv1, cv2, vr);
+  // total relative velocity at contact point
+  sub3(cv_i, cv_j, vr);
 
   // normal component
 
-  double vn[3];
-  double vnnr = dot3(vr, nij);
+  vnnr = dot3(vr, nij);
   scale3(vnnr, nij, vn);
 
   // tangential component
 
-  double vt[3];
-  sub3(vr, vn, vt);
+  sub3(vr, vn, vtr);
 
-  vrel = len3(vt);    // vtr in spherical model
+  vrel = len3(vtr);   
 
   // Approximate contact radius
 
   // hertzian contact radius approximation
   if (contact_radius_flag) {
     double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
-    MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
-    MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
+    MathExtra::scaleadd3(overlap_i, nij, X0, surf_point_i);
+    MathExtra::scaleadd3(overlap_j, nji, X0, surf_point_j);
 
     if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
       curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi,
@@ -1158,14 +1155,14 @@ void PairGranularSuperellipsoid::calculate_forces()
     }
 
     // hertzian contact radius approximation
-    contact_radius = sqrt((overlap1 + overlap2) / (curvature_i + curvature_j));
+    contact_radius = sqrt((overlap_i + overlap_j) / (curvature_i + curvature_j));
   }
 
   if (normal_model[itype][jtype] == HOOKE) {
     // assuming we get the overlap depth
-    Fnormal = kn[itype][jtype] * (overlap1 + overlap2);
+    Fnormal = kn[itype][jtype] * (overlap_i + overlap_j);
   } else if (normal_model[itype][jtype] == HERTZ) {
-    Fnormal = kn[itype][jtype] * (overlap1 + overlap2) * contact_radius;
+    Fnormal = kn[itype][jtype] * (overlap_i + overlap_j) * contact_radius;
   }
 
   double damp = gamman[itype][jtype];
@@ -1203,14 +1200,14 @@ void PairGranularSuperellipsoid::calculate_forces()
 
       // update history, tangential force using velocities at half step
       // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46
-      scale3(dt, vt, hist_increment);
+      scale3(dt, vtr, hist_increment);
       add3(history, hist_increment, history);
     }
 
     // tangential forces = history + tangential velocity damping
     scale3(-kt[itype][jtype], history, fs);
 
-    scale3(-dampt, vt, fdamp);
+    scale3(-dampt, vtr, fdamp);
     add3(fs, fdamp, fs);
 
     // rescale frictional displacements and forces if needed
@@ -1233,7 +1230,7 @@ void PairGranularSuperellipsoid::calculate_forces()
     // shear history effects
 
     if (history_update) {
-      scale3(dt, vt, hist_increment);
+      scale3(dt, vtr, hist_increment);
       add3(history, hist_increment, history);
     }
     double shrmag = len3(history);
@@ -1246,13 +1243,12 @@ void PairGranularSuperellipsoid::calculate_forces()
     }
 
     // tangential forces = history + tangential velocity damping
-    double fs[3];
     if (contact_radius_flag)
       scale3(-kt[itype][jtype] * contact_radius, history, fs);
     else
       scale3(-kt[itype][jtype], history, fs);
 
-    scale3(-dampt, vt, fdamp);
+    scale3(-dampt, vtr, fdamp);
     add3(fs, fdamp, fs);
 
     // rescale frictional displacements and forces if needed
@@ -1283,8 +1279,8 @@ void PairGranularSuperellipsoid::calculate_forces()
   scale3(Fntot, nji, forces);
   add3(forces, fs, forces);
 
-  cross3(cr1, forces, torquesi);
-  cross3(forces, cr2, torquesj);
+  cross3(cr_i, forces, torquesi);
+  cross3(forces, cr_j, torquesj);
 }
 
 /* ----------------------------------------------------------------------

From 1aba54d195b309bfcc2414c653633ae2b4bc32e7 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 19 Mar 2026 10:32:55 +0100
Subject: [PATCH 151/174] Commented out dumping in examples to follow
 guidelines

---
 .../ASPHERE/superellipsoid_gran/in.bowling    |  6 ++--
 .../ASPHERE/superellipsoid_gran/in.drop_test  | 28 +++++++++----------
 .../superellipsoid_gran/in.ellipsoid_gran     |  8 ++----
 3 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.bowling b/examples/ASPHERE/superellipsoid_gran/in.bowling
index d52c8b21734..bc291fc737b 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.bowling
+++ b/examples/ASPHERE/superellipsoid_gran/in.bowling
@@ -47,9 +47,9 @@ compute block all property/atom block1 block2
 variable phi atom "2/c_block[2]"
 variable theta atom "2/c_block[1]"
 
-dump mydump all custom 1 shapes.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
-# Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
-dump_modify mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
+# dump mydump all custom 1 shapes.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+# # Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
+# dump_modify mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
 set group ball angmom 1.0 0.0 0.5 vy 7.0
 
diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index d3899295375..619b3dc05d5 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -12,8 +12,8 @@ neighbor 0.5 bin
 neigh_modify   delay 0 every 1 check yes
 
 # Setup Simulation Box
-variable        box_length equal 50
-variable        box_height equal 35
+variable        box_length equal 9
+variable        box_height equal 15
 
 region          box block 0 ${box_length} 0 ${box_length} 0 ${box_height}
 create_box      2 box
@@ -22,13 +22,13 @@ create_box      2 box
 lattice         sc 1.0
 region          floor_reg block 0 ${box_length} 0 ${box_length} 0 0.1
 
-variable        insertion_length equal ${box_length}-5.0
+variable        insertion_length equal ${box_length}-1.0
 
 create_atoms    1 region floor_reg
 
 # Create Falling Particles (Type 2)
-region          drop_zone block 5 ${insertion_length} 5 ${insertion_length} 5 ${box_height}
-create_atoms    2 random 3000 12345 drop_zone overlap 2.0 maxtry 100
+region          drop_zone block 1 ${insertion_length} 1 ${insertion_length} 5 ${box_height}
+create_atoms    2 random 100 12345 drop_zone overlap 2.0 maxtry 100
 
 # Material Properties
 # Type 1: Wall particles
@@ -41,6 +41,7 @@ set             type 2 shape 2.0 1.0 1.0
 variable        blockiness equal 4.0
 set             type 1 block 4.0 4.0 
 set             type 2 block ${blockiness} ${blockiness} 
+set             type 2 quat/random 84729
 
 # Define Groups
 group           wall type 1
@@ -49,11 +50,8 @@ group           mobile type 2
 # Interaction / Pair Style
 # pair_style      gran/hooke/history/ellipsoid 1e4 NULL 20.0 0.0 0.5 0 bounding_box
 # pair_coeff      * *
-pair_style granular/superellipsoid 10.0 curvature_gaussian
-pair_coeff * * hertz 1000.0 50.0 tangential classic 500.0 1.0 0.4 damping mass_velocity
-# Hertz model instead
-# pair_coeff      * * hertz 1000.0 0.0 tangential linear_history 285.714 0.0 0.5 damping viscoelastic
-
+pair_style granular/superellipsoid curvature_gaussian
+pair_coeff * * hertz 1000.0 500.0 tangential classic 500.0 1.0 0.4 damping viscoelastic
 # Computes and Output
 compute         diameter all property/atom shapex shapey shapez
 compute         orient all property/atom quatw quati quatj quatk
@@ -63,9 +61,9 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-dump            mydump all custom 100 dump_drop_algebraic_block_${blockiness}_friction2.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
-# Explicit mapping for Ovito
-dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
+# dump            mydump all custom 1000 dump_drop_algebraic_block_${blockiness}.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+# # Explicit mapping for Ovito
+# dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
 # Prevent the frozen wall particles (type 1) from interacting with each other
 neigh_modify exclude group wall wall
@@ -79,8 +77,8 @@ fix             3 mobile nve/asphere
 compute rke all erotate/asphere
 
 # Run
-thermo          100
+thermo          1000
 thermo_style  custom time step ke c_rke 
 
 timestep        0.0001
-run             20000000
+run             200000
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 7d251f84f16..69db6a171ee 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -23,8 +23,6 @@ set             type 1 block 2.0 2.0
 # pair_coeff * *
 pair_style      granular/superellipsoid bounding_box
 # pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
-# Hertz model instead
-pair_coeff      * * hertz 1000.0 0.0 tangential linear_history 285.714 0.0 0.5 damping viscoelastic
 
 compute diameter all property/atom shapex shapey shapez
 compute orient all property/atom quatw quati quatj quatk
@@ -35,9 +33,9 @@ compute block all property/atom block1 block2
 variable phi atom "2/c_block[2]"
 variable theta atom "2/c_block[1]"
 
-dump mydump all custom 10 dump.lammpstrj id x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
-# Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
-dump_modify mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
+# dump mydump all custom 10 dump.lammpstrj id x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+# # Ovito maps c_orient[*] on its XYZW axes, which is not correct. Map components explicitly
+# dump_modify mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
 fix 1 bot freeze
 fix 2 top gravity 9.81 vector 0 0 -1

From 84f7c2235334577daecfccb644a8fa2bc3424bbb Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Thu, 19 Mar 2026 14:52:03 +0100
Subject: [PATCH 152/174] Fixed bugs in meff declaration scope and damping sign

---
 examples/ASPHERE/superellipsoid_gran/in.drop_test      | 8 ++++----
 examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran | 4 +++-
 src/GRANULAR/pair_granular_superellipsoid.cpp          | 8 +++-----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 619b3dc05d5..70394b60cfa 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -48,10 +48,10 @@ group           wall type 1
 group           mobile type 2
 
 # Interaction / Pair Style
-# pair_style      gran/hooke/history/ellipsoid 1e4 NULL 20.0 0.0 0.5 0 bounding_box
+# pair_style      gran/hertz/history/ellipsoid 1e4 NULL 20.0 0.0 0.5 0 bounding_box curvature_gaussian
 # pair_coeff      * *
-pair_style granular/superellipsoid curvature_gaussian
-pair_coeff * * hertz 1000.0 500.0 tangential classic 500.0 1.0 0.4 damping viscoelastic
+pair_style granular/superellipsoid curvature_gaussian bounding_box
+pair_coeff * * hertz 10000.0 200.0 tangential classic 2850 0.0 0.5 damping viscoelastic
 # Computes and Output
 compute         diameter all property/atom shapex shapey shapez
 compute         orient all property/atom quatw quati quatj quatk
@@ -61,7 +61,7 @@ compute         block all property/atom block1 block2
 variable        phi atom "2/c_block[2]"
 variable        theta atom "2/c_block[1]"
 
-# dump            mydump all custom 1000 dump_drop_algebraic_block_${blockiness}.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
+# dump            mydump all custom 1000 dump_drop_algebraic_block_${blockiness}_new_code.lammpstrj id type x y z fx fy fz tqx tqy tqz c_diameter[*] c_orient[*] v_phi v_theta
 # # Explicit mapping for Ovito
 # dump_modify     mydump colname c_orient[1] quatw colname c_orient[2] quati colname c_orient[3] quatj colname c_orient[4] quatk
 
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 69db6a171ee..058dd736ef8 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -22,7 +22,9 @@ set             type 1 block 2.0 2.0
 # pair_style      gran/hertz/history/ellipsoid 1e3 NULL 0.0 0.0 0.5 0 bounding_box
 # pair_coeff * *
 pair_style      granular/superellipsoid bounding_box
-# pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
+pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
+# Hertz model instead
+# pair_coeff      * * hertz 1000.0 0.0 tangential linear_history 285.714 0.0 0.5 damping viscoelastic
 
 compute diameter all property/atom shapex shapey shapez
 compute orient all property/atom quatw quati quatj quatk
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 2f387c0eb44..2bf2672f2c1 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -144,7 +144,7 @@ PairGranularSuperellipsoid::~PairGranularSuperellipsoid()
 void PairGranularSuperellipsoid::compute(int eflag, int vflag)
 {
   int i, j, k, ii, jj, inum, jnum;
-  double factor_lj, mi, mj, meff;
+  double factor_lj, mi, mj;
 
   int *ilist, *jlist, *numneigh, **firstneigh;
   int *touch, **firsttouch;
@@ -229,8 +229,6 @@ void PairGranularSuperellipsoid::compute(int eflag, int vflag)
       xj = x[j];
       radi = radius[i];
       radj = radius[j];
-      itype = itype;
-      jtype = jtype;
       history_data = &allhistory[size_history * jj];
       xref = (tag[i] < tag[j]) ? xi : xj;
       tagi = tag[i];
@@ -1169,10 +1167,10 @@ void PairGranularSuperellipsoid::calculate_forces()
   double damp_prefactor, Fdamp;
   if (damping_model[itype][jtype] == MASS_VELOCITY) {
     damp_prefactor = damp * meff;
-    Fdamp = -damp_prefactor * vnnr;
+    Fdamp = damp_prefactor * vnnr;
   } else {
     damp_prefactor = damp * meff * contact_radius;
-    Fdamp = -damp_prefactor * vnnr;
+    Fdamp = damp_prefactor * vnnr;
   }
 
   // normal forces = elastic contact + normal velocity damping

From 1682e77c363bf15eb6685bb775201165885c6bf7 Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Thu, 19 Mar 2026 12:56:37 -0600
Subject: [PATCH 153/174] Forgot flag initialization

---
 src/GRANULAR/pair_granular_superellipsoid.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 2bf2672f2c1..6731628d6e8 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -482,6 +482,7 @@ void PairGranularSuperellipsoid::coeff(int narg, char **arg)
   if (damping_one == -1) damping_one = VISCOELASTIC;
 
   // granular model init
+  contact_radius_flag = 0;
   if (normal_one == HERTZ || damping_one == VISCOELASTIC) contact_radius_flag = 1;
 
   int count = 0;
@@ -1130,7 +1131,7 @@ void PairGranularSuperellipsoid::calculate_forces()
 
   sub3(vr, vn, vtr);
 
-  vrel = len3(vtr);   
+  vrel = len3(vtr);
 
   // Approximate contact radius
 

From a3e6b5d25ed6e9ed5f28d73cf36afbdadbcb1afb Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Thu, 19 Mar 2026 21:50:59 -0600
Subject: [PATCH 154/174] Another valgrind error

---
 src/GRANULAR/pair_granular_superellipsoid.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 6731628d6e8..5f919afe005 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -357,6 +357,7 @@ void PairGranularSuperellipsoid::settings(int narg, char **arg)
 {
   cutoff_global = -1;    // default: will be set based on particle sizes, model choice
   curvature_model = MathExtraSuperellipsoids::CURV_MEAN;
+  bounding_box = 0;
 
   int iarg = 0;
   while (iarg < narg) {

From 4be9d2928362ce54cb53e397421f841bdda3881e Mon Sep 17 00:00:00 2001
From: jtclemm <jtclemm@sandia.gov>
Date: Fri, 20 Mar 2026 14:17:19 -0600
Subject: [PATCH 155/174] Edits/suggestions for superellipsoid granular doc
 page

---
 doc/src/Howto_granular.rst                |    8 +-
 doc/src/pair_granular.rst                 |    2 +-
 doc/src/pair_granular_superellipsoid.rst  |  171 ++--
 src/pair_gran_hertz_history_ellipsoid.cpp |  783 +++++++++++++++
 src/pair_gran_hertz_history_ellipsoid.h   |   44 +
 src/pair_gran_hooke_history_ellipsoid.cpp | 1091 +++++++++++++++++++++
 src/pair_gran_hooke_history_ellipsoid.h   |   91 ++
 7 files changed, 2082 insertions(+), 108 deletions(-)
 create mode 100644 src/pair_gran_hertz_history_ellipsoid.cpp
 create mode 100644 src/pair_gran_hertz_history_ellipsoid.h
 create mode 100644 src/pair_gran_hooke_history_ellipsoid.cpp
 create mode 100644 src/pair_gran_hooke_history_ellipsoid.h

diff --git a/doc/src/Howto_granular.rst b/doc/src/Howto_granular.rst
index b0c801be112..d30b18ad357 100644
--- a/doc/src/Howto_granular.rst
+++ b/doc/src/Howto_granular.rst
@@ -1,17 +1,21 @@
 Granular models
 ===============
 
-Granular system are composed of spherical particles with a diameter,
+Granular system are typically composed of spherical particles with a diameter,
 as opposed to point particles.  This means they have an angular
 velocity and torque can be imparted to them to cause them to rotate.
 
-To run a simulation of a granular model, you will want to use
+To run a simulation of a granular model, you will typically want to use
 the following commands:
 
 * :doc:`atom_style sphere <atom_style>`
 * :doc:`fix nve/sphere <fix_nve_sphere>`
 * :doc:`fix gravity <fix_gravity>`
 
+Aspherical granular particles can be simulated by creating clusters of spherical
+particles using either the :doc:`rigid <fix_rigid>` or :doc:`BPM <Howto_bpm>`
+package or by using :doc:`superellipsoids <pair_granular_superellipsoids>`.
+
 This compute
 
 * :doc:`compute erotate/sphere <compute_erotate_sphere>`
diff --git a/doc/src/pair_granular.rst b/doc/src/pair_granular.rst
index 3c6e6fcefba..82403fe5433 100644
--- a/doc/src/pair_granular.rst
+++ b/doc/src/pair_granular.rst
@@ -63,7 +63,7 @@ global, but can be set to different values for different combinations
 of particle types, as determined by the :doc:`pair_coeff <pair_coeff>`
 command.  If the contact model choice is the same for two particle
 types, the mixing for the cross-coefficients can be carried out
-automatically. This is shown in the last example, where model
+automatically. This is shown in one of the examples, where model
 choices are the same for type 1 - type 1 as for type 2 - type2
 interactions, but coefficients are different. In this case, the
 mixed coefficients for type 1 - type 2 interactions can be determined from
diff --git a/doc/src/pair_granular_superellipsoid.rst b/doc/src/pair_granular_superellipsoid.rst
index 426162ca85c..6b49af80b2b 100644
--- a/doc/src/pair_granular_superellipsoid.rst
+++ b/doc/src/pair_granular_superellipsoid.rst
@@ -8,40 +8,42 @@ Syntax
 
 .. code-block:: LAMMPS
 
-   pair_style granular/superellipsoid [cutoff] bounding_box curvature_gaussian
+   pair_style granular/superellipsoid cutoff bounding_box curvature_gaussian
 
-* cutoff = global cutoff value (optional).  See discussion below.
-* bounding_box = oriented bounding box check (optional).  See discussion below.
+Optional settings, see discussion below.
+* cutoff = global cutoff value
+* bounding_box = oriented bounding box check
 * curvature_gaussian = gaussian curvature coeff approximation for contact patch
-  (optional).  See discussion below.
 
 Examples
 """"""""
 
 .. code-block:: LAMMPS
 
-   pair_style granular/superellipsoid bounding_box 
+   pair_style granular/superellipsoid bounding_box
    pair_coeff * * hooke 1000.0 50.0 tangential linear_history 1000.0 1.0 0.5 damping mass_velocity
 
    pair_style granular/superellipsoid 10.0 curvature_gaussian
-   pair_coeff * * hertz 1000.0 50.0 tangential classic 500.0 1.0 0.4 damping mass_velocity
+   pair_coeff 1 1 hertz 1000.0 50.0 tangential linear_history 500.0 1.0 0.4 damping viscoelastic
+   pair_coeff 2 2 hertz 500.0 50.0 tangential linear_history 250.0 1.0 0.1 damping viscoelastic
 
 Description
 """""""""""
 
-The *granular/superellipsoid* styles support some of the options for the normal
-and tangential forces resulting from contact between two granular particles
-(rolling and twisting will be added later). The total computed forces
-and torques are the sum of various models selected for the normal and
-tangential.
+The *granular/superellipsoid* style calculates granular contact forces
+between superellipsoidal particles (see :doc:`atom style ellipsoid
+<atom_style>`). Similar to the :doc:`granular pairstyle <pair_granular>`
+which is designed for spherical particles, various normal, damping, and
+tangential contact models are available (rolling and twisting may be
+added later). The total computed forces and torques are the sum of various
+models selected.
 
 All model choices and parameters are entered in the
-:doc:`pair_coeff <pair_coeff>` command, as described below.  Unlike
-e.g. :doc:`pair gran/hooke <pair_gran>`, coefficient values are not
-global, but can be set to different values for different combinations
-of particle types, as determined by the :doc:`pair_coeff <pair_coeff>`
-command.  If the contact model choice is the same for two particle
-types, the mixing for the cross-coefficients can be carried out
+:doc:`pair_coeff <pair_coeff>` command, as described below.  Coefficient
+values are not global, but can be set to different values for different
+combinations of particle types, as determined by the :doc:`pair_coeff
+<pair_coeff>` command.  If the contact model choice is the same for two
+particle types, the mixing for the cross-coefficients can be carried out
 automatically. This is shown in the last example, where model
 choices are the same for type 1 - type 1 as for type 2 - type2
 interactions, but coefficients are different. In this case, the
@@ -69,7 +71,7 @@ of the ellipsoid atom. See the doc page for the :doc:`set <set>` command for
 more details.
 
 .. note::
-   
+
     The contact solver strictly requires convex particle shapes to ensure a mathematically
     unique point of deepest penetration. Therefore, the blockiness parameters must be
     :math:`n_1 \ge 2.0` and :math:`n_2 \ge 2.0`. Attempting to simulate concave or "pointy"
@@ -80,9 +82,9 @@ more details.
     For particles with high blockiness exponents (:math:`n > 4.0`) involved in edge-to-edge
     or corner-to-corner contacts, the surface normal vector varies rapidly over small
     distances. The Newton solver may occasionally fail to converge to the strict gradient
-    alignment tolerance (typically :math:`10^{-10}`).
-    You may see warning messages in the log indicating that the solver returned a sub-optimal solution, 
-    but the simulation will proceed using this best-effort contact point. 
+    alignment tolerance (typically :math:`10^{-10}`). You may see warning messages in the
+    log indicating that the solver returned a sub-optimal solution, but the simulation will
+    proceed using this best-effort contact point.
 
 Contact detection for these aspherical particles uses the so-called ''midway''
 minimization approach from :ref:`(Houlsby) <Houlsby>`. Considering two
@@ -93,23 +95,23 @@ the contact point :math:`\mathbf{X}_0` in the global frame is obtained as:
 
     \mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}}
                    \ F_i(\mathbf{X}) + F_j(\mathbf{X})
-                   \text{, subject to } F_i(\mathbf{X}) = F_j(\mathbf{X}) 
+                   \text{, subject to } F_i(\mathbf{X}) = F_j(\mathbf{X})
 
 where the shape function is given by
 :math:`F_i(\mathbf{X}) = f_i(\mathbf{R}_i^T (\mathbf{X} - \mathbf{X}_i))`
 and where :math:`\mathbf{X}_i` and :math:`\mathbf{R}_i` are the center of mass
-and rotation matrix of the particle, respectively.
-The constrained minimization problem is solved using Lagrange multipliers and
-Newton's method with a line search as described by :ref:`(Podlozhnyuk) <Podlozhnyuk>`.
+and rotation matrix of the particle, respectively. The constrained minimization
+problem is solved using Lagrange multipliers and Newton's method with a line
+search as described by :ref:`(Podlozhnyuk) <Podlozhnyuk>`.
 
 .. note::
 
     The shape function :math:`F` is not a signed distance function and
     does not have unit gradient :math:`\|\nabla F \| \neq 1` so that the
-    so-called ''midway'' point is not actually located at an equal distance from the
-    surface of both particles.
-    For contact between non-identical particles, the contact point tends to
-    be closer to the surface of the smaller and blockier particle.
+    so-called ''midway'' point is not actually located at an equal distance from
+    the surface of both particles. For contact between non-identical particles,
+    the contact point tends to be closer to the surface of the smaller and
+    blockier particle.
 
 .. note::
 
@@ -134,11 +136,11 @@ Newton's method is used to solve this equation for the scalars
     :math:`G(\mathbf{X}) = (F(\mathbf{X})+1)^{1/n_1}-1` which is a quasi-radial distance function formulation.
     This formulation is used to compute the surface points once the midway contact point is found.
     This formulation is also used when the *geometric* keyword is specified in the pair_style command and the following optimization problem is solved instead for the contact point:
-    :math:`\mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}} \, \left( r_i G_i(\mathbf{X}) + r_j G_j(\mathbf{X}) \right) \text{, subject to } r_i G_i(\mathbf{X}) = r_j G_j(\mathbf{X})`, 
+    :math:`\mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}} \, \left( r_i G_i(\mathbf{X}) + r_j G_j(\mathbf{X}) \right) \text{, subject to } r_i G_i(\mathbf{X}) = r_j G_j(\mathbf{X})`,
     where :math:`r_i` and :math:`r_j` are the average radii of the two particles.
     The geometric formulation thus yields a better approximation of the contact point
-    for particles with different sizes, and it is slightly more robust for particles with high *block* exponents, 
-    albeit more computationally expensive.    
+    for particles with different sizes, and it is slightly more robust for particles with high *block* exponents,
+    albeit more computationally expensive.
 
 A hierarchical approach is used to limit the cost of contact detection.
 First, intersection of the bounding spheres of the two particles of bounding
@@ -149,11 +151,15 @@ Then, if the bounding spheres intersect, intersection of the oriented
 bounding box is checked. This is done following the equations of
 :ref:`(Eberly) <GeometricTools>`.
 This check is only performed if the *bounding_box* keyword is used.
-This is advantageous for all particles except for superellipses with 
+This is advantageous for all particle shapes except for superellipses with
 aspect ratio close to one and both blockiness indexes close to 2.
 
 ----------
 
+This section provides an overview of the various normal, tangential,
+and damping contact models available. For additional context, see the
+discussion in the :doc:`granular pairstyle <pair_granular>` doc page
+which includes all of these options.
 
 The first required keyword for the *pair_coeff* command is the normal
 contact model. Currently supported options for normal contact models
@@ -195,28 +201,6 @@ gaussian curvature coefficient :math:`R_i = 1 / \sqrt{\kappa_1 \kappa_2}`, where
 contact point. For *hertz*, the units of the spring constant :math:`k_n` are
 *force*\ /\ *length*\ \^2, or equivalently *pressure*\ .
 
-
-The *atom_style* must be set to *ellipsoid superellipsoid* to enable superellipsoid
-particles' shape parameters (3 lengths and two blockiness parameters), see 
-:doc:`atom_style <atom_style>` for more details.
-. 
-
-.. code-block:: LAMMPS
-
-   atom_style ellipsoid superellipsoid
-
-Newton's third law must be set to *off*.
-
-.. code-block:: LAMMPS
-
-   newton off
-
-
-*fix wall/gran* and *fix wall/gran/region* are currently not supported by this pair_style. 
-In addition to contact forces superellipsoids also tracks the following
-quantities for each contact: contact_point at the previous time step, bounding box separating axis
-index, if the *bounding_box* keyword is used.
-
 In addition, the normal force is augmented by a damping term of the
 following general form:
 
@@ -277,11 +261,10 @@ damping components:
 
 The *pair_coeff* command also requires specification of the tangential
 contact model. The required keyword *tangential* is expected, followed
-by the model choice and associated parameters. Currently supported
-tangential model choices and their expected parameters are as follows:
+by the model choice and associated parameters. Currently there is only
+one supported tangential model with expected parameters as follows:
 
 1. *linear_history* : :math:`k_t`, :math:`x_{\gamma,t}`, :math:`\mu_s`
-2. *classic* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
 
 Here, :math:`x_{\gamma,t}` is a dimensionless multiplier for the normal
 damping :math:`\eta_n` that determines the magnitude of the tangential
@@ -324,7 +307,7 @@ The remaining tangential options all use accumulated tangential
 displacement (i.e. contact history).
 The accumulated tangential displacement is discussed in details below
 in the context of the *linear_history* option. The same treatment of
-the accumulated displacement applies to the other options as well.
+the accumulated displacement will apply to other (future) options as well.
 
 For *tangential linear_history*, the tangential force is given by:
 
@@ -382,12 +365,12 @@ damping) to produce the total force on the particle.
 Unlike perfect spheres, the surface normal at the contact point of a superellipsoid
 does not generally pass through the particle's center of mass. Therefore, both the
 normal and tangential forces act at the contact point to induce a torque on each
-particle. 
+particle.
 
-Using the exact contact point :math:`\mathbf{X}_0` determined by the geometric solver, 
-the branch vectors from the particle centers of mass to the contact point are 
-defined as :math:`\mathbf{r}_{ci} = \mathbf{X}_0 - \mathbf{x}_i` and 
-:math:`\mathbf{r}_{cj} = \mathbf{X}_0 - \mathbf{x}_j`. The resulting torques 
+Using the exact contact point :math:`\mathbf{X}_0` determined by the geometric solver,
+the branch vectors from the particle centers of mass to the contact point are
+defined as :math:`\mathbf{r}_{ci} = \mathbf{X}_0 - \mathbf{x}_i` and
+:math:`\mathbf{r}_{cj} = \mathbf{X}_0 - \mathbf{x}_j`. The resulting torques
 are calculated as:
 
 .. math::
@@ -404,28 +387,16 @@ If two particles are moving away from each other while in contact, there
 is a possibility that the particles could experience an effective attractive
 force due to damping. If the optional *limit_damping* keyword is used, this option
 will zero out the normal component of the force if there is an effective
-attractive force. 
+attractive force.
+
 ----------
 
 LAMMPS automatically sets pairwise cutoff values for *pair_style
-granular* based on particle radii (and in the case of *jkr* pull-off
-distances). In the vast majority of situations, this is adequate.
-However, a cutoff value can optionally be appended to the *pair_style
-granular* command to specify a global cutoff (i.e. a cutoff for all
-atom types). Additionally, the optional *cutoff* keyword can be passed
-to the *pair_coeff* command, followed by a cutoff value.  This will
-set a pairwise cutoff for the atom types in the *pair_coeff* command.
-These options may be useful in some rare cases where the automatic
-cutoff determination is not sufficient, e.g.  if particle diameters
-are being modified via the *fix adapt* command. In that case, the
-global cutoff specified as part of the *pair_style granular* command
-is applied to all atom types, unless it is overridden for a given atom
-type combination by the *cutoff* value specified in the *pair coeff*
-command.  If *cutoff* is only specified in the *pair coeff* command
-and no global cutoff is appended to the *pair_style granular* command,
-then LAMMPS will use that cutoff for the specified atom type
-combination, and automatically set pairwise cutoffs for the remaining
-atom types.
+granular* based on particle radii. In the vast majority of situations,
+this is adequate. However, a cutoff value can optionally be appended
+to the *pair_style granular* command to specify a global cutoff (i.e.
+a cutoff for all atom types). This option may be useful in some rare
+cases where the automatic cutoff determination is not sufficient.
 
 ----------
 
@@ -441,7 +412,7 @@ interactions is set to :math:`\mu_1`, and friction coefficient for type
 2-type 2 interactions is set to :math:`\mu_2`, the friction coefficient
 for type1-type2 interactions is computed as :math:`\sqrt{\mu_1\mu_2}`
 (unless explicitly specified to a different value by a *pair_coeff 1 2
-...* command). 
+...* command).
 
 These pair styles write their information to :doc:`binary restart files <restart>`,
 so a pair_style command does not need to be specified in an input script that reads
@@ -454,32 +425,22 @@ These pair styles can only be used via the *pair* keyword of the
 The single() function of these pair styles returns 0.0 for the energy of a
 pairwise interaction, since energy is not conserved in these dissipative
 potentials.  It also returns only the normal component of the pairwise
-interaction force.  However, the single() function also calculates at least 13
-extra pairwise quantities.  The first 3 are the components of the tangential
-force between particles I and J, acting on particle I.  The fourth is the
-magnitude of this tangential force. The next 3 (5-7) are the components of the
-rolling torque acting on particle I. The next entry (8) is the magnitude of the
-rolling torque. The next entry (9) is the magnitude of the twisting torque
-acting about the vector connecting the two particle centers. The next 3 (10-12)
-are the components of the vector connecting the centers of the two particles
-(x_I - x_J). If a granular sub-model calculates additional contact information
-(e.g. the contact_point, lagrange multiplier and separating axis index), these
-quantities are appended to the end of this list. First, any extra values from
-the normal sub-model are appended followed by the damping, tangential, rolling,
-twisting, then heat models. See the descriptions of specific granular sub-models
-above for information on any extra quantities. If two or more models are defined
-by pair coefficients, the size of the array is set by the maximum number of
-extra quantities in a model but the order of quantities is determined by each
-model's specific set of sub-models. Any unused quantities are zeroed.
-
-These extra quantities can be accessed by the :doc:`compute pair/local
-<compute_pair_local>` command, as *p1*, *p2*, ..., *p17*\ .
+interaction force.
 
 ----------
 
 Restrictions
 """"""""""""
 
+The *atom_style* must be set to *ellipsoid superellipsoid* to enable superellipsoid
+particles' shape parameters (3 lengths and two blockiness parameters), see
+:doc:`atom_style <atom_style>` for more details.
+
+This pair style require Newton's third law be set to *off* for pair interactions..
+
+There are currently no versions of *fix wall/gran* or *fix wall/gran/region* that
+are compatible with the superellipsoid particles.
+
 This pair style is part of the GRANULAR package.  It is
 only enabled if LAMMPS was built with that package.
 See the :doc:`Build package <Build_package>` page for more info.
@@ -509,7 +470,7 @@ Related commands
 """"""""""""""""
 
 :doc:`pair_coeff <pair_coeff>`
-:doc:`pair gran/\* <pair_gran>`
+:doc:`pair granular <pair_granular>`
 
 Default
 """""""
diff --git a/src/pair_gran_hertz_history_ellipsoid.cpp b/src/pair_gran_hertz_history_ellipsoid.cpp
new file mode 100644
index 00000000000..93ac6feab97
--- /dev/null
+++ b/src/pair_gran_hertz_history_ellipsoid.cpp
@@ -0,0 +1,783 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#include "pair_gran_hertz_history_ellipsoid.h"
+
+#include "atom.h"
+#include "atom_vec_ellipsoid.h"
+#include "comm.h"
+#include "error.h"
+#include "fix.h"
+#include "fix_dummy.h"
+#include "fix_neigh_history.h"
+#include "force.h"
+#include "math_extra.h"    // probably needed for some computations
+#include "math_extra_superellipsoids.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+static constexpr int NUMSTEP_INITIAL_GUESS = 5;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHertzHistoryEllipsoid::PairGranHertzHistoryEllipsoid(LAMMPS *lmp) :
+    PairGranHookeHistoryEllipsoid(lmp)
+{
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
+{
+  int i, j, ii, jj, inum, jnum;
+  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
+  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double wr1, wr2, wr3;
+  double vtr1, vtr2, vtr3, vrel;
+  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
+  double fn, fs, fs1, fs2, fs3;
+  double shrmag, rsht, polyhertz;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  int *touch, **firsttouch;
+  double *shear, *X0_prev, *history, *allhistory, **firsthistory;
+
+  double shapex, shapey, shapez;    // ellipsoid shape params
+  double quat1, quat2, quat3, quat4;
+  double block1, block2;
+
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
+      overlap2, omegai[3], omegaj[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+
+  ev_init(eflag, vflag);
+
+  int shearupdate = 1;
+  if (update->setupflag) shearupdate = 0;
+
+  // update rigid body info for owned & ghost atoms if using FixRigid masses
+  // body[i] = which body atom I is in, -1 if none
+  // mass_body = mass of each rigid body
+
+  if (fix_rigid && neighbor->ago == 0) {
+    int tmp;
+    int *body = (int *) fix_rigid->extract("body", tmp);
+    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
+    if (atom->nmax > nmax) {
+      memory->destroy(mass_rigid);
+      nmax = atom->nmax;
+      memory->create(mass_rigid, nmax, "pair:mass_rigid");
+    }
+    int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++)
+      if (body[i] >= 0)
+        mass_rigid[i] = mass_body[body[i]];
+      else
+        mass_rigid[i] = 0.0;
+    comm->forward_comm(this);
+  }
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **angmom = atom->angmom;
+  double **torque = atom->torque;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+  double *special_lj = force->special_lj;
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = fix_history->firstflag;
+  firsthistory = fix_history->firstvalue;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+
+    touch = firsttouch[i];
+    allhistory = firsthistory[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      if (factor_lj == 0) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      X0_prev = &allhistory[3 + size_history * jj];
+      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+
+      // TODO: Below could be a `touch()` function
+      bool touching = false;
+      if (rsq >= radsum * radsum) {
+        touching = false;
+      } else {
+        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+        bool skip_contact_detection(false);
+        if (bounding_box) {
+          int cached_axis = (int) (allhistory[7 + size_history * jj]);
+          int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+              x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
+          if (new_axis != -1) {
+            skip_contact_detection = true;
+            allhistory[7 + size_history * jj] = (double) new_axis;
+          }
+        }
+        if (skip_contact_detection)
+          touching = false;
+        else {
+          // superellipsoid contact detection between atoms i and j
+          flagi = bonus[ellipsoid[i]].type;
+          flagj = bonus[ellipsoid[j]].type;
+          if (touch[jj] == 1) {
+            // Continued contact: use grain true shape and last contact point with respect to grain i
+            X0[0] = x[ref_index][0] + X0_prev[0];
+            X0[1] = x[ref_index][1] + X0_prev[1];
+            X0[2] = x[ref_index][2] + X0_prev[2];
+            X0[3] = X0_prev[3];
+            int status = MathExtraSuperellipsoids::determine_contact_point(
+                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
+                contact_formulation);
+            if (status == 0)
+              touching = true;
+            else if (status == 1)
+              touching = false;
+            else
+              error->warning(FLERR,
+                             "Ellipsoid contact detection (old contact) failed"
+                             "between particle {} and particle {}",
+                             atom->tag[i], atom->tag[j]);
+          } else {
+            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
+
+            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
+            // but this is good enough. We might even be able to use radi and radj which is cheaper
+            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+
+            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+              shapei[0] = shapei[1] = shapei[2] = reqi;
+              shapej[0] = shapej[1] = shapej[2] = reqj;
+              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+              // force ellipsoid flag for first initial guess iteration.
+              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+              int status = MathExtraSuperellipsoids::determine_contact_point(
+                  x[i], Ri, shapei, blocki,
+                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
+                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
+                  contact_formulation);
+              if (status == 0)
+                touching = true;
+              else if (status == 1)
+                touching = false;
+              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
+                // keep trying until last iteration to avoid erroring out too early
+                error->warning(FLERR,
+                               "Ellipsoid contact detection (new contact) failed"
+                               "between particle {} and particle {}",
+                               atom->tag[i], atom->tag[j]);
+              }
+            }
+          }
+        }
+      }
+
+      if (!touching) {
+        // unset non-touching neighbors
+
+        touch[jj] = 0;
+        history = &allhistory[size_history * jj];
+        for (int k = 0; k < size_history; k++) {
+          if (bounding_box && k == 7) continue;    // Do not delete cached axis information
+          history[k] = 0.0;
+        }
+      } else {
+        // Store contact point with respect to grain i for next time step
+        // This is crucial for periodic BCs when grains can move by large amount in one time step
+        // Keeping the previous contact point relative to global frame would lead to bad initial guess
+        X0_prev[0] = X0[0] - x[ref_index][0];
+        X0_prev[1] = X0[1] - x[ref_index][1];
+        X0_prev[2] = X0[2] - x[ref_index][2];
+        X0_prev[3] = X0[3];
+
+        double nji[3] = {-nij[0], -nij[1], -nij[2]};
+        // compute overlap depth along normal direction for each grain
+        // overlap is positive for both grains
+        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
+                                                                      nij, x[i]);
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
+                                                                      nji, x[j]);
+
+        double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
+        MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
+        MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
+
+        if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
+          curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+          curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
+              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        } else {
+          curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+          curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+        }
+
+        polyhertz = sqrt((overlap1 + overlap2) /
+                         (curvature_i + curvature_j));    // hertzian contact radius approximation
+
+        // branch vectors
+        double cr1[3], cr2[3];
+        MathExtra::sub3(X0, x[i], cr1);
+        MathExtra::sub3(X0, x[j], cr2);
+
+        // we need to take the cross product of omega
+
+        double ex_space[3], ey_space[3], ez_space[3];
+        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[i]].inertia, omegai);
+        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[j]].inertia, omegaj);
+
+        double omega_cross_r1[3], omega_cross_r2[3];
+        MathExtra::cross3(omegai, cr1, omega_cross_r1);
+        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+        // relative translational velocity
+        // compute directly the sum of relative translational velocity at contact point
+        // since rotational velocity contribution is different for superellipsoids
+        double cv1[3], cv2[3];
+
+        cv1[0] = v[i][0] + omega_cross_r1[0];
+        cv1[1] = v[i][1] + omega_cross_r1[1];
+        cv1[2] = v[i][2] + omega_cross_r1[2];
+
+        cv2[0] = v[j][0] + omega_cross_r2[0];
+        cv2[1] = v[j][1] + omega_cross_r2[1];
+        cv2[2] = v[j][2] + omega_cross_r2[2];
+
+        // total relavtive velocity at contact point
+        vr1 = cv1[0] - cv2[0];
+        vr2 = cv1[1] - cv2[1];
+        vr3 = cv1[2] - cv2[2];
+
+        // normal component
+
+        vn1 = nij[0] * vr1;    // dot product
+        vn2 = nij[1] * vr2;
+        vn3 = nij[2] * vr3;
+
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
+
+        // tangential component
+
+        vtr1 = vr1 - vnnr * nij[0];
+        vtr2 = vr2 - vnnr * nij[1];
+        vtr3 = vr3 - vnnr * nij[2];
+
+        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+        vrel = sqrt(vrel);
+
+        // meff = effective mass of pair of particles
+        // if I or J part of rigid body, use body mass
+        // if I or J is frozen, meff is other particle
+
+        mi = rmass[i];
+        mj = rmass[j];
+        if (fix_rigid) {
+          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+        }
+
+        meff = mi * mj / (mi + mj);
+        if (mask[i] & freeze_group_bit) meff = mj;
+        if (mask[j] & freeze_group_bit) meff = mi;
+
+        // normal forces = Hertzian contact + normal velocity damping
+
+        damp = meff * gamman * vnnr;
+        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+        ccel *= polyhertz;
+        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+        // shear history effects
+
+        touch[jj] = 1;
+        shear = &allhistory[size_history * jj];
+
+        if (shearupdate) {
+          shear[0] += vtr1 * dt;
+          shear[1] += vtr2 * dt;
+          shear[2] += vtr3 * dt;
+        }
+        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+        if (shearupdate) {
+
+          // rotate shear displacements
+
+          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
+          shear[0] -= rsht * nij[0];
+          shear[1] -= rsht * nij[1];
+          shear[2] -= rsht * nij[2];
+        }
+
+        // tangential forces = shear + tangential velocity damping
+
+        fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
+        fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
+        fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
+
+        // rescale frictional displacements and forces if needed
+
+        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+        fn = xmu * fabs(ccel);
+
+        if (fs > fn) {
+          if (shrmag != 0.0) {
+            shear[0] =
+                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
+            shear[1] =
+                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
+            shear[2] =
+                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
+            fs1 *= fn / fs;
+            fs2 *= fn / fs;
+            fs3 *= fn / fs;
+          } else
+            fs1 = fs2 = fs3 = 0.0;
+        }
+
+        // forces & torques
+
+        fx = nji[0] * ccel + fs1;
+        fy = nji[1] * ccel + fs2;
+        fz = nji[2] * ccel + fs3;
+        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
+        fy *= factor_lj;
+        fz *= factor_lj;
+        f[i][0] += fx;
+        f[i][1] += fy;
+        f[i][2] += fz;
+
+        // torques are cross prodcuts of branch vector with the entire force at contact point
+
+        tor1 = cr1[1] * fz - cr1[2] * fy;
+        tor2 = cr1[2] * fx - cr1[0] * fz;
+        tor3 = cr1[0] * fy - cr1[1] * fx;
+
+        torque[i][0] += tor1;
+        torque[i][1] += tor2;
+        torque[i][2] += tor3;
+
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fx;
+          f[j][1] -= fy;
+          f[j][2] -= fz;
+
+          tor1 = cr2[1] * fz - cr2[2] * fy;
+          tor2 = cr2[2] * fx - cr2[0] * fz;
+          tor3 = cr2[0] * fy - cr2[1] * fx;
+
+          torque[j][0] -= tor1;
+          torque[j][1] -= tor2;
+          torque[j][2] -= tor3;
+        }
+
+        if (evflag)
+          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
+                       delz);    // Correct even for non-spherical particles
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
+{
+  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
+
+  kn = utils::numeric(FLERR, arg[0], false, lmp);
+  if (strcmp(arg[1], "NULL") == 0)
+    kt = kn * 2.0 / 7.0;
+  else
+    kt = utils::numeric(FLERR, arg[1], false, lmp);
+
+  gamman = utils::numeric(FLERR, arg[2], false, lmp);
+  if (strcmp(arg[3], "NULL") == 0)
+    gammat = 0.5 * gamman;
+  else
+    gammat = utils::numeric(FLERR, arg[3], false, lmp);
+
+  xmu = utils::numeric(FLERR, arg[4], false, lmp);
+  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
+  if (dampflag == 0) gammat = 0.0;
+
+  limit_damping = 0;
+  bounding_box = 0;
+  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
+
+  for (int iarg = 6; iarg < narg; iarg++) {
+    if (strcmp(arg[iarg], "limit_damping") == 0)
+      limit_damping = 1;
+    else if (strcmp(arg[iarg], "bounding_box") == 0)
+      bounding_box = 1;
+    else if (strcmp(arg[iarg], "geometric") == 0)
+      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+    else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
+      curvature_model = MathExtraSuperellipsoids::CURV_GAUSSIAN;
+    else
+      error->all(FLERR, "Illegal pair_style command");
+  }
+
+  size_history = 8;    // reset to default size
+  if (bounding_box == 0) size_history--;
+
+  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
+      dampflag < 0 || dampflag > 1)
+    error->all(FLERR, "Illegal pair_style command");
+
+  // convert Kn and Kt from pressure units to force/distance^2
+
+  kn /= force->nktv2p;
+  kt /= force->nktv2p;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
+                                             double /*factor_coul*/, double /*factor_lj*/,
+                                             double &fforce)
+{
+  double radi, radj, radsum;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double mi, mj, meff, damp, ccel;
+  double vtr1, vtr2, vtr3, vrel, shrmag, polyhertz;
+  double fs1, fs2, fs3, fs, fn;
+
+  double *radius = atom->radius;
+  radi = radius[i];
+  radj = radius[j];
+  radsum = radi + radj;
+
+  double **x = atom->x;
+
+  // history effects
+  // neighprev = index of found neigh on previous call
+  // search entire jnum list of neighbors of I for neighbor J
+  // start from neighprev, since will typically be next neighbor
+  // reset neighprev to 0 as necessary
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+  int *touch = fix_history->firstflag[i];
+  double *allhistory = fix_history->firstvalue[i];
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+
+  if (rsq >= radsum * radsum) {
+    fforce = 0.0;
+    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+    return 0.0;
+  }
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+  bool skip_contact_detection(false);
+  if (bounding_box) {
+    int cached_axis =
+        (int) (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
+    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(x[i], Ri, shapei, x[j],
+                                                                           Rj, shapej, cached_axis);
+    if (new_axis != -1) skip_contact_detection = true;
+    if (skip_contact_detection) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+  }
+  // superellipsoid contact detection between atoms i and j
+  double X0[4], nij[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+  flagi = bonus[ellipsoid[i]].type;
+  flagj = bonus[ellipsoid[j]].type;
+  double *X0_prev = &allhistory[3 + size_history * neighprev];
+  if (touch[neighprev] == 1) {
+    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+    // Continued contact: use grain true shape and last contact point
+    X0[0] = X0_prev[0] + x[ref_index][0];
+    X0[1] = X0_prev[1] + x[ref_index][1];
+    X0[2] = X0_prev[2] + x[ref_index][2];
+    X0[3] = X0_prev[3];
+    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
+                                                                   x[j], Rj, shapej, blockj, flagj,
+                                                                   X0, nij, contact_formulation);
+    if (status == 1) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+    if (status != 0)
+      error->all(FLERR,
+                 "Ellipsoid contact detection (old contact) failed"
+                 "between particle {} and particle {}",
+                 atom->tag[i], atom->tag[j]);
+  } else {
+    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+      shapei[0] = shapei[1] = shapei[2] = reqi;
+      shapej[0] = shapej[1] = shapej[2] = reqj;
+      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+      // force ellipsoid flag for first initial guess iteration.
+      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+      int status = MathExtraSuperellipsoids::determine_contact_point(
+          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+          X0, nij, contact_formulation);
+      if (status == 1) {
+        fforce = 0.0;
+        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+        return 0.0;
+      }
+      if (status != 0)
+        error->all(FLERR,
+                   "Ellipsoid contact detection (new contact) failed"
+                   "between particle {} and particle {}",
+                   atom->tag[i], atom->tag[j]);
+    }
+  }
+  double overlap1, overlap2, omegai[3], omegaj[3];
+  double nji[3] = {-nij[0], -nij[1], -nij[2]};
+  overlap1 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+  overlap2 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+
+  double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
+  MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
+  MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
+
+  if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
+    curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri,
+                                                                          surf_point_i, x[i]);
+    curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj,
+                                                                          surf_point_j, x[j]);
+  } else {
+    curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+        shapei, blocki, flagi, Ri, surf_point_i, x[i]);
+    curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
+        shapej, blockj, flagj, Rj, surf_point_j, x[j]);
+  }
+
+  polyhertz = sqrt((overlap1 + overlap2) /
+                   (curvature_i + curvature_j));    // hertzian contact radius approximation
+
+  double cr1[3], cr2[3];
+  MathExtra::sub3(X0, x[i], cr1);
+  MathExtra::sub3(X0, x[j], cr2);
+
+  double ex_space[3], ey_space[3], ez_space[3];
+  double **angmom = atom->angmom;
+  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
+                             omegai);
+  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
+                             omegaj);
+
+  double omega_cross_r1[3], omega_cross_r2[3];
+  MathExtra::cross3(omegai, cr1, omega_cross_r1);
+  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+  // relative translational velocity
+  // compute directly the sum of relative translational velocity at contact point
+  // since rotational velocity contribution is different for superellipsoids
+
+  double **v = atom->v;
+  double cv1[3], cv2[3];
+
+  cv1[0] = v[i][0] + omega_cross_r1[0];
+  cv1[1] = v[i][1] + omega_cross_r1[1];
+  cv1[2] = v[i][2] + omega_cross_r1[2];
+
+  cv2[0] = v[j][0] + omega_cross_r2[0];
+  cv2[1] = v[j][1] + omega_cross_r2[1];
+  cv2[2] = v[j][2] + omega_cross_r2[2];
+
+  // total relavtive velocity at contact point
+
+  vr1 = cv1[0] - cv2[0];
+  vr2 = cv1[1] - cv2[1];
+  vr3 = cv1[2] - cv2[2];
+
+  // normal component
+
+  vn1 = nij[0] * vr1;    // dot product
+  vn2 = nij[1] * vr2;
+  vn3 = nij[2] * vr3;
+
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
+
+  // tangential component
+
+  vtr1 = vr1 - vnnr * nij[0];
+  vtr2 = vr2 - vnnr * nij[1];
+  vtr3 = vr3 - vnnr * nij[2];
+
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
+
+  // meff = effective mass of pair of particles
+  // if I or J part of rigid body, use body mass
+  // if I or J is frozen, meff is other particle
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+
+  mi = rmass[i];
+  mj = rmass[j];
+  if (fix_rigid) {
+    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+  }
+
+  meff = mi * mj / (mi + mj);
+  if (mask[i] & freeze_group_bit) meff = mj;
+  if (mask[j] & freeze_group_bit) meff = mi;
+
+  // normal forces = Hookian contact + normal velocity damping
+
+  damp = meff * gamman * vnnr;
+  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+  ccel *= polyhertz;
+  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+  double *shear = &allhistory[size_history * neighprev];
+  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+  // tangential forces = shear + tangential velocity damping
+
+  fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
+  fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
+  fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
+
+  // rescale frictional displacements and forces if needed
+
+  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+  fn = xmu * fabs(ccel);
+
+  if (fs > fn) {
+    if (shrmag != 0.0) {
+      fs1 *= fn / fs;
+      fs2 *= fn / fs;
+      fs3 *= fn / fs;
+      fs *= fn / fs;
+    } else
+      fs1 = fs2 = fs3 = 0.0;
+  }
+
+  // set force (normalized by r) and return no energy
+
+  fforce = ccel / sqrt(rsq);
+
+  // set single_extra quantities
+
+  svector[0] = fs1;
+  svector[1] = fs2;
+  svector[2] = fs3;
+  svector[3] = fs;
+  svector[4] = vn1;
+  svector[5] = vn2;
+  svector[6] = vn3;
+  svector[7] = vtr1;
+  svector[8] = vtr2;
+  svector[9] = vtr3;
+
+  return 0.0;
+}
diff --git a/src/pair_gran_hertz_history_ellipsoid.h b/src/pair_gran_hertz_history_ellipsoid.h
new file mode 100644
index 00000000000..b02f6b733e7
--- /dev/null
+++ b/src/pair_gran_hertz_history_ellipsoid.h
@@ -0,0 +1,44 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(gran/hertz/history/ellipsoid,PairGranHertzHistoryEllipsoid);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
+#define LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
+
+#include "pair_gran_hooke_history_ellipsoid.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
+ public:
+  PairGranHertzHistoryEllipsoid(class LAMMPS *);
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  double single(int, int, int, int, double, double, double, double &) override;
+
+ protected:
+  int curvature_model;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/pair_gran_hooke_history_ellipsoid.cpp b/src/pair_gran_hooke_history_ellipsoid.cpp
new file mode 100644
index 00000000000..01a6c4b0343
--- /dev/null
+++ b/src/pair_gran_hooke_history_ellipsoid.cpp
@@ -0,0 +1,1091 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#include "pair_gran_hooke_history_ellipsoid.h"
+
+#include "atom.h"
+#include "atom_vec_ellipsoid.h"
+#include "comm.h"
+#include "error.h"
+#include "fix.h"
+#include "fix_dummy.h"
+#include "fix_neigh_history.h"
+#include "force.h"
+#include "math_extra.h"    // probably needed for some computations
+#include "math_extra_superellipsoids.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+#include <iostream>
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+static constexpr int NUMSTEP_INITIAL_GUESS = 5;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair(lmp)
+{
+  single_enable = 1;
+  no_virial_fdotr_compute = 1;
+  centroidstressflag = CENTROID_NOTAVAIL;
+  finitecutflag = 1;
+  use_history = 1;
+  size_history =
+      8;    // shear[3], contact_point_and_Lagrange_multiplier[4], bounding_box_separating_axis_index
+
+  single_extra = 10;
+  svector = new double[10];
+
+  neighprev = 0;
+
+  nmax = 0;
+  mass_rigid = nullptr;
+
+  // set comm size needed by this Pair if used with fix rigid
+
+  comm_forward = 1;
+
+  // keep default behavior of history[i][j] = -history[j][i]
+
+  nondefault_history_transfer = 1;
+
+  // create dummy fix as placeholder for FixNeighHistory
+  // this is so final order of Modify:fix will conform to input script
+
+  fix_history = nullptr;
+  fix_dummy = dynamic_cast<FixDummy *>(
+      modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
+
+  contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHookeHistoryEllipsoid::~PairGranHookeHistoryEllipsoid()
+{
+  if (copymode) return;
+
+  delete[] svector;
+
+  if (!fix_history)
+    modify->delete_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me));
+  else
+    modify->delete_fix("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me));
+
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+
+    delete[] onerad_dynamic;
+    delete[] onerad_frozen;
+    delete[] maxrad_dynamic;
+    delete[] maxrad_frozen;
+  }
+
+  memory->destroy(mass_rigid);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
+{
+  int i, j, ii, jj, inum, jnum;
+  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
+  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3;
+  double vtr1, vtr2, vtr3, vrel;
+  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
+  double fn, fs, fs1, fs2, fs3;
+  double shrmag, rsht;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  int *touch, **firsttouch;
+  double *shear, *X0_prev, *history, *allhistory, **firsthistory;
+
+  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
+      overlap2, omegai[3], omegaj[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+
+  ev_init(eflag, vflag);
+
+  int shearupdate = 1;
+  if (update->setupflag) shearupdate = 0;
+
+  // update rigid body info for owned & ghost atoms if using FixRigid masses
+  // body[i] = which body atom I is in, -1 if none
+  // mass_body = mass of each rigid body
+
+  if (fix_rigid && neighbor->ago == 0) {
+    int tmp;
+    int *body = (int *) fix_rigid->extract("body", tmp);
+    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
+    if (atom->nmax > nmax) {
+      memory->destroy(mass_rigid);
+      nmax = atom->nmax;
+      memory->create(mass_rigid, nmax, "pair:mass_rigid");
+    }
+    int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++)
+      if (body[i] >= 0)
+        mass_rigid[i] = mass_body[body[i]];
+      else
+        mass_rigid[i] = 0.0;
+    comm->forward_comm(this);
+  }
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **angmom = atom->angmom;
+  double **torque = atom->torque;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+  double *special_lj = force->special_lj;
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = fix_history->firstflag;
+  firsthistory = fix_history->firstvalue;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+
+    touch = firsttouch[i];
+    allhistory = firsthistory[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      if (factor_lj == 0) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      X0_prev = &allhistory[3 + size_history * jj];
+      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+
+      // TODO: Below could be a `touch()` function
+      bool touching = false;
+      if (rsq >= radsum * radsum) {
+        touching = false;
+      } else {
+        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+        bool skip_contact_detection(false);
+        if (bounding_box) {
+          int cached_axis = (int) (allhistory[7 + size_history * jj]);
+          int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
+              x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
+          if (new_axis != -1) {
+            skip_contact_detection = true;
+            allhistory[7 + size_history * jj] = (double) new_axis;
+          }
+        }
+        if (skip_contact_detection)
+          touching = false;
+        else {
+          // superellipsoid contact detection between atoms i and j
+          flagi = bonus[ellipsoid[i]].type;
+          flagj = bonus[ellipsoid[j]].type;
+          if (touch[jj] == 1) {
+            // Continued contact: use grain true shape and last contact point with respect to grain i
+            X0[0] = x[ref_index][0] + X0_prev[0];
+            X0[1] = x[ref_index][1] + X0_prev[1];
+            X0[2] = x[ref_index][2] + X0_prev[2];
+            X0[3] = X0_prev[3];
+            // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
+            //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
+            int status = MathExtraSuperellipsoids::determine_contact_point(
+                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
+                contact_formulation);
+            if (status == 0)
+              touching = true;
+            else if (status == 1)
+              touching = false;
+            else {
+              error->warning(FLERR,
+                             "Ellipsoid contact detection (old contact) failed "
+                             "between particle {} and particle {} ",
+                             atom->tag[i], atom->tag[j]);
+            }
+          } else {
+            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
+
+            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
+            // but this is good enough. We might even be able to use radi and radj which is cheaper
+            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
+
+            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+              shapei[0] = shapei[1] = shapei[2] = reqi;
+              shapej[0] = shapej[1] = shapej[2] = reqj;
+              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+              // force ellipsoid flag for first initial guess iteration.
+              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+              int status = MathExtraSuperellipsoids::determine_contact_point(
+                  x[i], Ri, shapei, blocki,
+                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
+                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
+                  contact_formulation);
+
+              if (status == 0)
+                touching = true;
+              else if (status == 1)
+                touching = false;
+              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
+                // keep trying until last iteration to avoid erroring out too early
+                error->warning(FLERR,
+                               "Ellipsoid contact detection (new contact) failed"
+                               "between particle {} and particle {}",
+                               atom->tag[i], atom->tag[j]);
+              }
+            }
+          }
+        }
+      }
+
+      if (!touching) {
+        // unset non-touching neighbors
+
+        touch[jj] = 0;
+        history = &allhistory[size_history * jj];
+        for (int k = 0; k < size_history; k++) {
+          if (bounding_box && k == 7) continue;    // Do not delete cached axis information
+          history[k] = 0.0;
+        }
+      } else {
+        // Store contact point with respect to grain i for next time step
+        // This is crucial for periodic BCs when grains can move by large amount in one time step
+        // Keeping the previous contact point relative to global frame would lead to bad initial guess
+        X0_prev[0] = X0[0] - x[ref_index][0];
+        X0_prev[1] = X0[1] - x[ref_index][1];
+        X0_prev[2] = X0[2] - x[ref_index][2];
+        X0_prev[3] = X0[3];
+
+        double nji[3] = {-nij[0], -nij[1], -nij[2]};
+        // compute overlap depth along normal direction for each grain
+        // overlap is positive for both grains
+        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
+                                                                      nij, x[i]);
+        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
+                                                                      nji, x[j]);
+
+        // branch vectors
+        double cr1[3], cr2[3];
+        MathExtra::sub3(X0, x[i], cr1);
+        MathExtra::sub3(X0, x[j], cr2);
+
+        // we need to take the cross product of omega
+
+        double ex_space[3], ey_space[3], ez_space[3];
+        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[i]].inertia, omegai);
+        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
+                                   bonus[ellipsoid[j]].inertia, omegaj);
+
+        double omega_cross_r1[3], omega_cross_r2[3];
+        MathExtra::cross3(omegai, cr1, omega_cross_r1);
+        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+        // relative translational velocity
+        // compute directly the sum of relative translational velocity at contact point
+        // since rotational velocity contribution is different for superellipsoids
+        double cv1[3], cv2[3];
+
+        cv1[0] = v[i][0] + omega_cross_r1[0];
+        cv1[1] = v[i][1] + omega_cross_r1[1];
+        cv1[2] = v[i][2] + omega_cross_r1[2];
+
+        cv2[0] = v[j][0] + omega_cross_r2[0];
+        cv2[1] = v[j][1] + omega_cross_r2[1];
+        cv2[2] = v[j][2] + omega_cross_r2[2];
+
+        // total relavtive velocity at contact point
+        vr1 = cv1[0] - cv2[0];
+        vr2 = cv1[1] - cv2[1];
+        vr3 = cv1[2] - cv2[2];
+
+        // normal component
+
+        vn1 = nij[0] * vr1;    // dot product
+        vn2 = nij[1] * vr2;
+        vn3 = nij[2] * vr3;
+
+        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
+
+        // tangential component
+
+        vtr1 = vr1 - vnnr * nij[0];
+        vtr2 = vr2 - vnnr * nij[1];
+        vtr3 = vr3 - vnnr * nij[2];
+
+        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+        vrel = sqrt(vrel);
+
+        // meff = effective mass of pair of particles
+        // if I or J part of rigid body, use body mass
+        // if I or J is frozen, meff is other particle
+
+        mi = rmass[i];
+        mj = rmass[j];
+        if (fix_rigid) {
+          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+        }
+
+        meff = mi * mj / (mi + mj);
+        if (mask[i] & freeze_group_bit) meff = mj;
+        if (mask[j] & freeze_group_bit) meff = mi;
+
+        // normal forces = Hookian contact + normal velocity damping
+
+        damp = meff * gamman * vnnr;
+        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+        // shear history effects
+
+        touch[jj] = 1;
+        shear = &allhistory[size_history * jj];
+
+        if (shearupdate) {
+          shear[0] += vtr1 * dt;
+          shear[1] += vtr2 * dt;
+          shear[2] += vtr3 * dt;
+        }
+        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+        if (shearupdate) {
+
+          // rotate shear displacements
+
+          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
+          shear[0] -= rsht * nij[0];
+          shear[1] -= rsht * nij[1];
+          shear[2] -= rsht * nij[2];
+        }
+
+        // tangential forces = shear + tangential velocity damping
+
+        fs1 = -(kt * shear[0] + meff * gammat * vtr1);
+        fs2 = -(kt * shear[1] + meff * gammat * vtr2);
+        fs3 = -(kt * shear[2] + meff * gammat * vtr3);
+
+        // rescale frictional displacements and forces if needed
+
+        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+        fn = xmu * fabs(ccel);
+
+        if (fs > fn) {
+          if (shrmag != 0.0) {
+            shear[0] =
+                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
+            shear[1] =
+                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
+            shear[2] =
+                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
+            fs1 *= fn / fs;
+            fs2 *= fn / fs;
+            fs3 *= fn / fs;
+          } else
+            fs1 = fs2 = fs3 = 0.0;
+        }
+
+        // forces & torques
+
+        fx = nji[0] * ccel + fs1;
+        fy = nji[1] * ccel + fs2;
+        fz = nji[2] * ccel + fs3;
+        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
+        fy *= factor_lj;
+        fz *= factor_lj;
+        f[i][0] += fx;
+        f[i][1] += fy;
+        f[i][2] += fz;
+        // torques are cross prodcuts of branch vector with the entire force at contact point
+
+        tor1 = cr1[1] * fz - cr1[2] * fy;
+        tor2 = cr1[2] * fx - cr1[0] * fz;
+        tor3 = cr1[0] * fy - cr1[1] * fx;
+
+        torque[i][0] += tor1;
+        torque[i][1] += tor2;
+        torque[i][2] += tor3;
+
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fx;
+          f[j][1] -= fy;
+          f[j][2] -= fz;
+
+          tor1 = cr2[1] * fz - cr2[2] * fy;
+          tor2 = cr2[2] * fx - cr2[0] * fz;
+          tor3 = cr2[0] * fy - cr2[1] * fx;
+
+          torque[j][0] -= tor1;
+          torque[j][1] -= tor2;
+          torque[j][2] -= tor3;
+        }
+
+        if (evflag)
+          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
+                       delz);    // Correct even for non-spherical particles
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag, n + 1, n + 1, "pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++) setflag[i][j] = 0;
+
+  memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
+
+  onerad_dynamic = new double[n + 1];
+  onerad_frozen = new double[n + 1];
+  maxrad_dynamic = new double[n + 1];
+  maxrad_frozen = new double[n + 1];
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
+{
+  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
+
+  kn = utils::numeric(FLERR, arg[0], false, lmp);
+  if (strcmp(arg[1], "NULL") == 0)
+    kt = kn * 2.0 / 7.0;
+  else
+    kt = utils::numeric(FLERR, arg[1], false, lmp);
+
+  gamman = utils::numeric(FLERR, arg[2], false, lmp);
+  if (strcmp(arg[3], "NULL") == 0)
+    gammat = 0.5 * gamman;
+  else
+    gammat = utils::numeric(FLERR, arg[3], false, lmp);
+
+  xmu = utils::numeric(FLERR, arg[4], false, lmp);
+  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
+  if (dampflag == 0) gammat = 0.0;
+
+  limit_damping = 0;
+  bounding_box = 0;
+  for (int iarg = 6; iarg < narg; iarg++) {
+    if (strcmp(arg[iarg], "limit_damping") == 0)
+      limit_damping = 1;
+    else if (strcmp(arg[iarg], "bounding_box") == 0)
+      bounding_box = 1;
+    else if (strcmp(arg[iarg], "geometric") == 0)
+      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
+    else
+      error->all(FLERR, "Illegal pair_style command");
+  }
+
+  size_history = 8;    // reset to default for safety
+  if (bounding_box == 0) size_history--;
+
+  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
+      dampflag < 0 || dampflag > 1)
+    error->all(FLERR, "Illegal pair_style command");
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::coeff(int narg, char **arg)
+{
+  if (narg > 2) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
+  if (!allocated) allocate();
+
+  int ilo, ihi, jlo, jhi;
+  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
+  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo, i); j <= jhi; j++) {
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::init_style()
+{
+  int i;
+
+  // error and warning checks
+
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
+    error->all(FLERR,
+               "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and "
+               "superellipdoid flag");
+  if (comm->ghost_velocity == 0)
+    error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
+
+  // ensure all atoms have an allocated ellipsoid bonus structure (ellipsoidflag > 0)
+  int *ellipsoid = atom->ellipsoid;
+  if (!ellipsoid) error->all(FLERR, "Pair gran/h/ellipsoid* requires atom style ellipsoid");
+
+  int nlocal = atom->nlocal;
+  for (i = 0; i < nlocal; i++) {
+    if (ellipsoid[i] < 0) {
+      error->one(FLERR, "Pair gran/h/ellipsoid* requires all atoms to have ellipsoidflag = 1");
+    }
+  }
+
+  // need a granular neighbor list
+
+  if (use_history)
+    neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_HISTORY);
+  else
+    neighbor->add_request(this, NeighConst::REQ_SIZE);
+
+  dt = update->dt;
+
+  // if history is stored and first init, create Fix to store history
+  // it replaces FixDummy, created in the constructor
+  // this is so its order in the fix list is preserved
+
+  if (use_history && (fix_history == nullptr)) {
+    auto cmd =
+        fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
+    fix_history = dynamic_cast<FixNeighHistory *>(
+        modify->replace_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me), cmd, 1));
+    fix_history->pair = this;
+  }
+
+  // check for FixFreeze and set freeze_group_bit
+
+  auto fixlist = modify->get_fix_by_style("^freeze");
+  if (fixlist.size() == 0)
+    freeze_group_bit = 0;
+  else if (fixlist.size() > 1)
+    error->all(FLERR, "Only one fix freeze command at a time allowed");
+  else
+    freeze_group_bit = fixlist.front()->groupbit;
+
+  // check for FixRigid so can extract rigid body masses
+
+  fix_rigid = nullptr;
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (ifix->rigid_flag) {
+      if (fix_rigid)
+        error->all(FLERR, "Only one fix rigid command at a time allowed");
+      else
+        fix_rigid = ifix;
+    }
+  }
+
+  // check for FixPour and FixDeposit so can extract particle radii
+
+  auto pours = modify->get_fix_by_style("^pour");
+  auto deps = modify->get_fix_by_style("^deposit");
+
+  // set maxrad_dynamic and maxrad_frozen for each type
+  // include future FixPour and FixDeposit particles as dynamic
+
+  int itype;
+  for (i = 1; i <= atom->ntypes; i++) {
+    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
+    for (auto &ipour : pours) {
+      itype = i;
+      double maxrad = *((double *) ipour->extract("radius", itype));
+      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
+    }
+    for (auto &idep : deps) {
+      itype = i;
+      double maxrad = *((double *) idep->extract("radius", itype));
+      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
+    }
+  }
+
+  // since for ellipsoids radius is the maximum of the three axes, no need to change this part
+
+  double *radius = atom->radius;
+  int *mask = atom->mask;
+  int *type = atom->type;
+
+  for (i = 0; i < nlocal; i++) {
+    if (mask[i] & freeze_group_bit)
+      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]], radius[i]);
+    else
+      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
+  }
+
+  MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
+  MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
+
+  // set fix which stores history info
+
+  if (use_history) {
+    fix_history = dynamic_cast<FixNeighHistory *>(
+        modify->get_fix_by_id("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me)));
+    if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairGranHookeHistoryEllipsoid::init_one(int i, int j)
+{
+  if (!allocated) allocate();
+
+  // cutoff = sum of max I,J radii for
+  // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
+
+  double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
+  cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
+  cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
+  return cutoff;
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i, j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) fwrite(&setflag[i][j], sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+
+  int i, j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
+      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
+    }
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::write_restart_settings(FILE *fp)
+{
+  fwrite(&kn, sizeof(double), 1, fp);
+  fwrite(&kt, sizeof(double), 1, fp);
+  fwrite(&gamman, sizeof(double), 1, fp);
+  fwrite(&gammat, sizeof(double), 1, fp);
+  fwrite(&xmu, sizeof(double), 1, fp);
+  fwrite(&dampflag, sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &kn, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &kt, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &gamman, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &gammat, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &xmu, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &dampflag, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&kn, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&kt, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&gamman, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&gammat, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&xmu, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&dampflag, 1, MPI_INT, 0, world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::reset_dt()
+{
+  dt = update->dt;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
+                                             double /*factor_coul*/, double /*factor_lj*/,
+                                             double &fforce)
+{
+  double radi, radj, radsum;
+  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
+  double mi, mj, meff, damp, ccel;
+  double vtr1, vtr2, vtr3, vrel, shrmag;
+  double fs1, fs2, fs3, fs, fn;
+
+  double *radius = atom->radius;
+  radi = radius[i];
+  radj = radius[j];
+  radsum = radi + radj;
+
+  double **x = atom->x;
+
+  // history effects
+  // neighprev = index of found neigh on previous call
+  // search entire jnum list of neighbors of I for neighbor J
+  // start from neighprev, since will typically be next neighbor
+  // reset neighprev to 0 as necessary
+  int jnum = list->numneigh[i];
+  int *jlist = list->firstneigh[i];
+  int *touch = fix_history->firstflag[i];
+  double *allhistory = fix_history->firstvalue[i];
+  for (int jj = 0; jj < jnum; jj++) {
+    neighprev++;
+    if (neighprev >= jnum) neighprev = 0;
+    if (jlist[neighprev] == j) break;
+  }
+
+  if (rsq >= radsum * radsum) {
+    fforce = 0.0;
+    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+    return 0.0;
+  }
+  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
+  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
+  int *ellipsoid = atom->ellipsoid;
+  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
+  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
+  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
+  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
+  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
+  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
+  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
+  bool skip_contact_detection = false;
+  if (bounding_box) {
+    int cached_axis =
+        (int) (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
+    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(x[i], Ri, shapei, x[j],
+                                                                           Rj, shapej, cached_axis);
+    if (new_axis != -1) skip_contact_detection = true;
+    if (skip_contact_detection) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+  }
+  // superellipsoid contact detection between atoms i and j
+  double X0[4], nij[3];
+  AtomVecEllipsoid::BlockType flagi, flagj;
+  flagi = bonus[ellipsoid[i]].type;
+  flagj = bonus[ellipsoid[j]].type;
+  double *X0_prev = &allhistory[3 + size_history * neighprev];
+  if (touch[neighprev] == 1) {
+    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
+    // Continued contact: use grain true shape and last contact point
+    X0[0] = X0_prev[0] + x[ref_index][0];
+    X0[1] = X0_prev[1] + x[ref_index][1];
+    X0[2] = X0_prev[2] + x[ref_index][2];
+    X0[3] = X0_prev[3];
+    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
+                                                                   x[j], Rj, shapej, blockj, flagj,
+                                                                   X0, nij, contact_formulation);
+    if (status == 1) {
+      fforce = 0.0;
+      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+      return 0.0;
+    }
+    if (status != 0)
+      error->warning(FLERR,
+                     "Ellipsoid contact detection (old contact) failed"
+                     "between particle {} and particle {}",
+                     atom->tag[i], atom->tag[j]);
+  } else {
+    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
+    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
+    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
+    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
+    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
+      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
+      shapei[0] = shapei[1] = shapei[2] = reqi;
+      shapej[0] = shapej[1] = shapej[2] = reqj;
+      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
+      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
+      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
+      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
+      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
+      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
+
+      // force ellipsoid flag for first initial guess iteration.
+      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
+      int status = MathExtraSuperellipsoids::determine_contact_point(
+          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
+          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
+          X0, nij, contact_formulation);
+      if (status == 1) {
+        fforce = 0.0;
+        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
+        return 0.0;
+      }
+      if (status != 0)
+        error->one(FLERR,
+                   "Ellipsoid contact detection (new contact) failed"
+                   "between particle {} and particle {}",
+                   atom->tag[i], atom->tag[j]);
+    }
+  }
+  double overlap1, overlap2, omegai[3], omegaj[3];
+  double nji[3] = {-nij[0], -nij[1], -nij[2]};
+  overlap1 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
+  overlap2 =
+      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
+
+  double cr1[3], cr2[3];
+  MathExtra::sub3(X0, x[i], cr1);
+  MathExtra::sub3(X0, x[j], cr2);
+
+  double ex_space[3], ey_space[3], ez_space[3];
+  double **angmom = atom->angmom;
+  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
+                             omegai);
+  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
+  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
+                             omegaj);
+
+  double omega_cross_r1[3], omega_cross_r2[3];
+  MathExtra::cross3(omegai, cr1, omega_cross_r1);
+  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
+
+  // relative translational velocity
+  // compute directly the sum of relative translational velocity at contact point
+  // since rotational velocity contribution is different for superellipsoids
+
+  double **v = atom->v;
+  double cv1[3], cv2[3];
+
+  cv1[0] = v[i][0] + omega_cross_r1[0];
+  cv1[1] = v[i][1] + omega_cross_r1[1];
+  cv1[2] = v[i][2] + omega_cross_r1[2];
+
+  cv2[0] = v[j][0] + omega_cross_r2[0];
+  cv2[1] = v[j][1] + omega_cross_r2[1];
+  cv2[2] = v[j][2] + omega_cross_r2[2];
+
+  // total relavtive velocity at contact point
+
+  vr1 = cv1[0] - cv2[0];
+  vr2 = cv1[1] - cv2[1];
+  vr3 = cv1[2] - cv2[2];
+
+  // normal component
+
+  vn1 = nij[0] * vr1;    // dot product
+  vn2 = nij[1] * vr2;
+  vn3 = nij[2] * vr3;
+
+  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
+
+  // tangential component
+
+  vtr1 = vr1 - vnnr * nij[0];
+  vtr2 = vr2 - vnnr * nij[1];
+  vtr3 = vr3 - vnnr * nij[2];
+
+  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
+  vrel = sqrt(vrel);
+
+  // meff = effective mass of pair of particles
+  // if I or J part of rigid body, use body mass
+  // if I or J is frozen, meff is other particle
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+
+  mi = rmass[i];
+  mj = rmass[j];
+  if (fix_rigid) {
+    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
+    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
+  }
+
+  meff = mi * mj / (mi + mj);
+  if (mask[i] & freeze_group_bit) meff = mj;
+  if (mask[j] & freeze_group_bit) meff = mi;
+
+  // normal forces = Hookian contact + normal velocity damping
+
+  damp = meff * gamman * vnnr;
+  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
+  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
+
+  double *shear = &allhistory[size_history * neighprev];
+  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
+
+  // tangential forces = shear + tangential velocity damping
+
+  fs1 = -(kt * shear[0] + meff * gammat * vtr1);
+  fs2 = -(kt * shear[1] + meff * gammat * vtr2);
+  fs3 = -(kt * shear[2] + meff * gammat * vtr3);
+
+  // rescale frictional displacements and forces if needed
+
+  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
+  fn = xmu * fabs(ccel);
+
+  if (fs > fn) {
+    if (shrmag != 0.0) {
+      fs1 *= fn / fs;
+      fs2 *= fn / fs;
+      fs3 *= fn / fs;
+      fs *= fn / fs;
+    } else
+      fs1 = fs2 = fs3 = 0.0;
+  }
+
+  // set force (normalized by r) and return no energy
+
+  fforce = ccel / sqrt(rsq);
+
+  // set single_extra quantities
+
+  svector[0] = fs1;
+  svector[1] = fs2;
+  svector[2] = fs3;
+  svector[3] = fs;
+  svector[4] = vn1;
+  svector[5] = vn2;
+  svector[6] = vn3;
+  svector[7] = vtr1;
+  svector[8] = vtr2;
+  svector[9] = vtr3;
+
+  return 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairGranHookeHistoryEllipsoid::pack_forward_comm(int n, int *list, double *buf,
+                                                     int /*pbc_flag*/, int * /*pbc*/)
+{
+  int i, j, m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = mass_rigid[j];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistoryEllipsoid::unpack_forward_comm(int n, int first, double *buf)
+{
+  int i, m, last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) mass_rigid[i] = buf[m++];
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+double PairGranHookeHistoryEllipsoid::memory_usage()
+{
+  double bytes = (double) nmax * sizeof(double);
+  return bytes;
+}
+
+void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *target, int /*itype*/,
+                                                     int /*jtype*/)
+{
+  // Simple direct copy of all history variables (shear, contact point, axis)
+  for (int i = 0; i < size_history; i++) {
+    if (i < 3) target[i] = -source[i];    //shear
+    target[i] = source[i];
+  }
+}
diff --git a/src/pair_gran_hooke_history_ellipsoid.h b/src/pair_gran_hooke_history_ellipsoid.h
new file mode 100644
index 00000000000..e3bcf3e038c
--- /dev/null
+++ b/src/pair_gran_hooke_history_ellipsoid.h
@@ -0,0 +1,91 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(gran/hooke/history/ellipsoid,PairGranHookeHistoryEllipsoid);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
+#define LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
+
+#include "pair.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHookeHistoryEllipsoid : public Pair {
+ public:
+  PairGranHookeHistoryEllipsoid(class LAMMPS *);
+  ~PairGranHookeHistoryEllipsoid() override;
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_restart_settings(FILE *) override;
+  void read_restart_settings(FILE *) override;
+  void reset_dt() override;
+  double single(int, int, int, int, double, double, double, double &) override;
+  int pack_forward_comm(int, int *, double *, int, int *) override;
+  void unpack_forward_comm(int, int, double *) override;
+  double memory_usage() override;
+  void transfer_history(double *, double *, int, int) override;
+
+ protected:
+  double kn, kt, gamman, gammat, xmu;
+  int dampflag;
+  double dt;
+  int freeze_group_bit;
+  int use_history;
+  int limit_damping;
+  int bounding_box;
+
+  int neighprev;
+  double *onerad_dynamic, *onerad_frozen;
+  double *maxrad_dynamic, *maxrad_frozen;
+
+  int size_history;
+
+  class FixDummy *fix_dummy;
+  class FixNeighHistory *fix_history;
+
+  // storage of rigid body masses for use in granular interactions
+
+  class Fix *fix_rigid;    // ptr to rigid body fix, null pointer if none
+  double *mass_rigid;      // rigid mass for owned+ghost atoms
+  int nmax;                // allocated size of mass_rigid
+
+  int contact_formulation;
+
+  void allocate();
+
+ private:
+  // Below not implemented. Placeholder if we decide not to compute local hessian in line search
+  static double
+  shape_and_gradient_local(const double *, const double *, const double *,
+                           double *);    // would return a vector of temporary variables
+  static double hessian_local(
+      const double *, const double *, const double *,
+      double *);    // would use the above vector of temporary variables to compute local hessian
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif

From 1412af4b7c4b2aa1113be1bc00c291ae30e6cfdd Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 23 Mar 2026 09:13:09 +0100
Subject: [PATCH 156/174] Removed legacy pair gran ellipsoids files

---
 .../pair_gran_hertz_history_ellipsoid.cpp     |  783 ------------
 .../pair_gran_hertz_history_ellipsoid.h       |   44 -
 .../pair_gran_hooke_history_ellipsoid.cpp     | 1091 -----------------
 .../pair_gran_hooke_history_ellipsoid.h       |   91 --
 src/pair_gran_hertz_history_ellipsoid.cpp     |  783 ------------
 src/pair_gran_hertz_history_ellipsoid.h       |   44 -
 src/pair_gran_hooke_history_ellipsoid.cpp     | 1091 -----------------
 src/pair_gran_hooke_history_ellipsoid.h       |   91 --
 8 files changed, 4018 deletions(-)
 delete mode 100644 src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
 delete mode 100644 src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
 delete mode 100644 src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
 delete mode 100644 src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
 delete mode 100644 src/pair_gran_hertz_history_ellipsoid.cpp
 delete mode 100644 src/pair_gran_hertz_history_ellipsoid.h
 delete mode 100644 src/pair_gran_hooke_history_ellipsoid.cpp
 delete mode 100644 src/pair_gran_hooke_history_ellipsoid.h

diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
deleted file mode 100644
index 93ac6feab97..00000000000
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.cpp
+++ /dev/null
@@ -1,783 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#include "pair_gran_hertz_history_ellipsoid.h"
-
-#include "atom.h"
-#include "atom_vec_ellipsoid.h"
-#include "comm.h"
-#include "error.h"
-#include "fix.h"
-#include "fix_dummy.h"
-#include "fix_neigh_history.h"
-#include "force.h"
-#include "math_extra.h"    // probably needed for some computations
-#include "math_extra_superellipsoids.h"
-#include "memory.h"
-#include "modify.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-#include "update.h"
-
-#include <cmath>
-#include <cstring>
-
-using namespace LAMMPS_NS;
-
-static constexpr int NUMSTEP_INITIAL_GUESS = 5;
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHertzHistoryEllipsoid::PairGranHertzHistoryEllipsoid(LAMMPS *lmp) :
-    PairGranHookeHistoryEllipsoid(lmp)
-{
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
-{
-  int i, j, ii, jj, inum, jnum;
-  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
-  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double wr1, wr2, wr3;
-  double vtr1, vtr2, vtr3, vrel;
-  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
-  double fn, fs, fs1, fs2, fs3;
-  double shrmag, rsht, polyhertz;
-  int *ilist, *jlist, *numneigh, **firstneigh;
-  int *touch, **firsttouch;
-  double *shear, *X0_prev, *history, *allhistory, **firsthistory;
-
-  double shapex, shapey, shapez;    // ellipsoid shape params
-  double quat1, quat2, quat3, quat4;
-  double block1, block2;
-
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
-      overlap2, omegai[3], omegaj[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-
-  ev_init(eflag, vflag);
-
-  int shearupdate = 1;
-  if (update->setupflag) shearupdate = 0;
-
-  // update rigid body info for owned & ghost atoms if using FixRigid masses
-  // body[i] = which body atom I is in, -1 if none
-  // mass_body = mass of each rigid body
-
-  if (fix_rigid && neighbor->ago == 0) {
-    int tmp;
-    int *body = (int *) fix_rigid->extract("body", tmp);
-    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
-    if (atom->nmax > nmax) {
-      memory->destroy(mass_rigid);
-      nmax = atom->nmax;
-      memory->create(mass_rigid, nmax, "pair:mass_rigid");
-    }
-    int nlocal = atom->nlocal;
-    for (i = 0; i < nlocal; i++)
-      if (body[i] >= 0)
-        mass_rigid[i] = mass_body[body[i]];
-      else
-        mass_rigid[i] = 0.0;
-    comm->forward_comm(this);
-  }
-
-  double **x = atom->x;
-  double **v = atom->v;
-  double **f = atom->f;
-  double **angmom = atom->angmom;
-  double **torque = atom->torque;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
-  int newton_pair = force->newton_pair;
-  double *special_lj = force->special_lj;
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-
-  inum = list->inum;
-  ilist = list->ilist;
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
-  firsttouch = fix_history->firstflag;
-  firsthistory = fix_history->firstvalue;
-
-  // loop over neighbors of my atoms
-
-  for (ii = 0; ii < inum; ii++) {
-    i = ilist[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-
-    touch = firsttouch[i];
-    allhistory = firsthistory[i];
-    jlist = firstneigh[i];
-    jnum = numneigh[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
-      j &= NEIGHMASK;
-
-      if (factor_lj == 0) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      radj = radius[j];
-      radsum = radi + radj;
-
-      X0_prev = &allhistory[3 + size_history * jj];
-      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-
-      // TODO: Below could be a `touch()` function
-      bool touching = false;
-      if (rsq >= radsum * radsum) {
-        touching = false;
-      } else {
-        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-        bool skip_contact_detection(false);
-        if (bounding_box) {
-          int cached_axis = (int) (allhistory[7 + size_history * jj]);
-          int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-              x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
-          if (new_axis != -1) {
-            skip_contact_detection = true;
-            allhistory[7 + size_history * jj] = (double) new_axis;
-          }
-        }
-        if (skip_contact_detection)
-          touching = false;
-        else {
-          // superellipsoid contact detection between atoms i and j
-          flagi = bonus[ellipsoid[i]].type;
-          flagj = bonus[ellipsoid[j]].type;
-          if (touch[jj] == 1) {
-            // Continued contact: use grain true shape and last contact point with respect to grain i
-            X0[0] = x[ref_index][0] + X0_prev[0];
-            X0[1] = x[ref_index][1] + X0_prev[1];
-            X0[2] = x[ref_index][2] + X0_prev[2];
-            X0[3] = X0_prev[3];
-            int status = MathExtraSuperellipsoids::determine_contact_point(
-                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
-                contact_formulation);
-            if (status == 0)
-              touching = true;
-            else if (status == 1)
-              touching = false;
-            else
-              error->warning(FLERR,
-                             "Ellipsoid contact detection (old contact) failed"
-                             "between particle {} and particle {}",
-                             atom->tag[i], atom->tag[j]);
-          } else {
-            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
-
-            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
-            // but this is good enough. We might even be able to use radi and radj which is cheaper
-            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-
-            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-              shapei[0] = shapei[1] = shapei[2] = reqi;
-              shapej[0] = shapej[1] = shapej[2] = reqj;
-              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-              // force ellipsoid flag for first initial guess iteration.
-              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(
-                  x[i], Ri, shapei, blocki,
-                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
-                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
-                  contact_formulation);
-              if (status == 0)
-                touching = true;
-              else if (status == 1)
-                touching = false;
-              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
-                // keep trying until last iteration to avoid erroring out too early
-                error->warning(FLERR,
-                               "Ellipsoid contact detection (new contact) failed"
-                               "between particle {} and particle {}",
-                               atom->tag[i], atom->tag[j]);
-              }
-            }
-          }
-        }
-      }
-
-      if (!touching) {
-        // unset non-touching neighbors
-
-        touch[jj] = 0;
-        history = &allhistory[size_history * jj];
-        for (int k = 0; k < size_history; k++) {
-          if (bounding_box && k == 7) continue;    // Do not delete cached axis information
-          history[k] = 0.0;
-        }
-      } else {
-        // Store contact point with respect to grain i for next time step
-        // This is crucial for periodic BCs when grains can move by large amount in one time step
-        // Keeping the previous contact point relative to global frame would lead to bad initial guess
-        X0_prev[0] = X0[0] - x[ref_index][0];
-        X0_prev[1] = X0[1] - x[ref_index][1];
-        X0_prev[2] = X0[2] - x[ref_index][2];
-        X0_prev[3] = X0[3];
-
-        double nji[3] = {-nij[0], -nij[1], -nij[2]};
-        // compute overlap depth along normal direction for each grain
-        // overlap is positive for both grains
-        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
-                                                                      nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
-                                                                      nji, x[j]);
-
-        double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
-        MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
-        MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
-
-        if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-          curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-          curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-        } else {
-          curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-          curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-        }
-
-        polyhertz = sqrt((overlap1 + overlap2) /
-                         (curvature_i + curvature_j));    // hertzian contact radius approximation
-
-        // branch vectors
-        double cr1[3], cr2[3];
-        MathExtra::sub3(X0, x[i], cr1);
-        MathExtra::sub3(X0, x[j], cr2);
-
-        // we need to take the cross product of omega
-
-        double ex_space[3], ey_space[3], ez_space[3];
-        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[i]].inertia, omegai);
-        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[j]].inertia, omegaj);
-
-        double omega_cross_r1[3], omega_cross_r2[3];
-        MathExtra::cross3(omegai, cr1, omega_cross_r1);
-        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-        // relative translational velocity
-        // compute directly the sum of relative translational velocity at contact point
-        // since rotational velocity contribution is different for superellipsoids
-        double cv1[3], cv2[3];
-
-        cv1[0] = v[i][0] + omega_cross_r1[0];
-        cv1[1] = v[i][1] + omega_cross_r1[1];
-        cv1[2] = v[i][2] + omega_cross_r1[2];
-
-        cv2[0] = v[j][0] + omega_cross_r2[0];
-        cv2[1] = v[j][1] + omega_cross_r2[1];
-        cv2[2] = v[j][2] + omega_cross_r2[2];
-
-        // total relavtive velocity at contact point
-        vr1 = cv1[0] - cv2[0];
-        vr2 = cv1[1] - cv2[1];
-        vr3 = cv1[2] - cv2[2];
-
-        // normal component
-
-        vn1 = nij[0] * vr1;    // dot product
-        vn2 = nij[1] * vr2;
-        vn3 = nij[2] * vr3;
-
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
-
-        // tangential component
-
-        vtr1 = vr1 - vnnr * nij[0];
-        vtr2 = vr2 - vnnr * nij[1];
-        vtr3 = vr3 - vnnr * nij[2];
-
-        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-        vrel = sqrt(vrel);
-
-        // meff = effective mass of pair of particles
-        // if I or J part of rigid body, use body mass
-        // if I or J is frozen, meff is other particle
-
-        mi = rmass[i];
-        mj = rmass[j];
-        if (fix_rigid) {
-          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-        }
-
-        meff = mi * mj / (mi + mj);
-        if (mask[i] & freeze_group_bit) meff = mj;
-        if (mask[j] & freeze_group_bit) meff = mi;
-
-        // normal forces = Hertzian contact + normal velocity damping
-
-        damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-        ccel *= polyhertz;
-        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-        // shear history effects
-
-        touch[jj] = 1;
-        shear = &allhistory[size_history * jj];
-
-        if (shearupdate) {
-          shear[0] += vtr1 * dt;
-          shear[1] += vtr2 * dt;
-          shear[2] += vtr3 * dt;
-        }
-        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-        if (shearupdate) {
-
-          // rotate shear displacements
-
-          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
-          shear[0] -= rsht * nij[0];
-          shear[1] -= rsht * nij[1];
-          shear[2] -= rsht * nij[2];
-        }
-
-        // tangential forces = shear + tangential velocity damping
-
-        fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
-        fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
-        fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
-
-        // rescale frictional displacements and forces if needed
-
-        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-        fn = xmu * fabs(ccel);
-
-        if (fs > fn) {
-          if (shrmag != 0.0) {
-            shear[0] =
-                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
-            shear[1] =
-                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
-            shear[2] =
-                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
-            fs1 *= fn / fs;
-            fs2 *= fn / fs;
-            fs3 *= fn / fs;
-          } else
-            fs1 = fs2 = fs3 = 0.0;
-        }
-
-        // forces & torques
-
-        fx = nji[0] * ccel + fs1;
-        fy = nji[1] * ccel + fs2;
-        fz = nji[2] * ccel + fs3;
-        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
-        fy *= factor_lj;
-        fz *= factor_lj;
-        f[i][0] += fx;
-        f[i][1] += fy;
-        f[i][2] += fz;
-
-        // torques are cross prodcuts of branch vector with the entire force at contact point
-
-        tor1 = cr1[1] * fz - cr1[2] * fy;
-        tor2 = cr1[2] * fx - cr1[0] * fz;
-        tor3 = cr1[0] * fy - cr1[1] * fx;
-
-        torque[i][0] += tor1;
-        torque[i][1] += tor2;
-        torque[i][2] += tor3;
-
-        if (newton_pair || j < nlocal) {
-          f[j][0] -= fx;
-          f[j][1] -= fy;
-          f[j][2] -= fz;
-
-          tor1 = cr2[1] * fz - cr2[2] * fy;
-          tor2 = cr2[2] * fx - cr2[0] * fz;
-          tor3 = cr2[0] * fy - cr2[1] * fx;
-
-          torque[j][0] -= tor1;
-          torque[j][1] -= tor2;
-          torque[j][2] -= tor3;
-        }
-
-        if (evflag)
-          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
-                       delz);    // Correct even for non-spherical particles
-      }
-    }
-  }
-
-  if (vflag_fdotr) virial_fdotr_compute();
-}
-
-/* ----------------------------------------------------------------------
-   global settings
-------------------------------------------------------------------------- */
-
-void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
-{
-  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
-
-  kn = utils::numeric(FLERR, arg[0], false, lmp);
-  if (strcmp(arg[1], "NULL") == 0)
-    kt = kn * 2.0 / 7.0;
-  else
-    kt = utils::numeric(FLERR, arg[1], false, lmp);
-
-  gamman = utils::numeric(FLERR, arg[2], false, lmp);
-  if (strcmp(arg[3], "NULL") == 0)
-    gammat = 0.5 * gamman;
-  else
-    gammat = utils::numeric(FLERR, arg[3], false, lmp);
-
-  xmu = utils::numeric(FLERR, arg[4], false, lmp);
-  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
-  if (dampflag == 0) gammat = 0.0;
-
-  limit_damping = 0;
-  bounding_box = 0;
-  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
-
-  for (int iarg = 6; iarg < narg; iarg++) {
-    if (strcmp(arg[iarg], "limit_damping") == 0)
-      limit_damping = 1;
-    else if (strcmp(arg[iarg], "bounding_box") == 0)
-      bounding_box = 1;
-    else if (strcmp(arg[iarg], "geometric") == 0)
-      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-    else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
-      curvature_model = MathExtraSuperellipsoids::CURV_GAUSSIAN;
-    else
-      error->all(FLERR, "Illegal pair_style command");
-  }
-
-  size_history = 8;    // reset to default size
-  if (bounding_box == 0) size_history--;
-
-  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
-      dampflag < 0 || dampflag > 1)
-    error->all(FLERR, "Illegal pair_style command");
-
-  // convert Kn and Kt from pressure units to force/distance^2
-
-  kn /= force->nktv2p;
-  kt /= force->nktv2p;
-}
-
-/* ---------------------------------------------------------------------- */
-
-double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double /*factor_lj*/,
-                                             double &fforce)
-{
-  double radi, radj, radsum;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double mi, mj, meff, damp, ccel;
-  double vtr1, vtr2, vtr3, vrel, shrmag, polyhertz;
-  double fs1, fs2, fs3, fs, fn;
-
-  double *radius = atom->radius;
-  radi = radius[i];
-  radj = radius[j];
-  radsum = radi + radj;
-
-  double **x = atom->x;
-
-  // history effects
-  // neighprev = index of found neigh on previous call
-  // search entire jnum list of neighbors of I for neighbor J
-  // start from neighprev, since will typically be next neighbor
-  // reset neighprev to 0 as necessary
-  int jnum = list->numneigh[i];
-  int *jlist = list->firstneigh[i];
-  int *touch = fix_history->firstflag[i];
-  double *allhistory = fix_history->firstvalue[i];
-  for (int jj = 0; jj < jnum; jj++) {
-    neighprev++;
-    if (neighprev >= jnum) neighprev = 0;
-    if (jlist[neighprev] == j) break;
-  }
-
-  if (rsq >= radsum * radsum) {
-    fforce = 0.0;
-    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-    return 0.0;
-  }
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
-  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-  bool skip_contact_detection(false);
-  if (bounding_box) {
-    int cached_axis =
-        (int) (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
-    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(x[i], Ri, shapei, x[j],
-                                                                           Rj, shapej, cached_axis);
-    if (new_axis != -1) skip_contact_detection = true;
-    if (skip_contact_detection) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-  }
-  // superellipsoid contact detection between atoms i and j
-  double X0[4], nij[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-  flagi = bonus[ellipsoid[i]].type;
-  flagj = bonus[ellipsoid[j]].type;
-  double *X0_prev = &allhistory[3 + size_history * neighprev];
-  if (touch[neighprev] == 1) {
-    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-    // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0] + x[ref_index][0];
-    X0[1] = X0_prev[1] + x[ref_index][1];
-    X0[2] = X0_prev[2] + x[ref_index][2];
-    X0[3] = X0_prev[3];
-    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
-                                                                   x[j], Rj, shapej, blockj, flagj,
-                                                                   X0, nij, contact_formulation);
-    if (status == 1) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-    if (status != 0)
-      error->all(FLERR,
-                 "Ellipsoid contact detection (old contact) failed"
-                 "between particle {} and particle {}",
-                 atom->tag[i], atom->tag[j]);
-  } else {
-    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-      shapei[0] = shapei[1] = shapei[2] = reqi;
-      shapej[0] = shapej[1] = shapej[2] = reqj;
-      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-      // force ellipsoid flag for first initial guess iteration.
-      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-      int status = MathExtraSuperellipsoids::determine_contact_point(
-          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-          X0, nij, contact_formulation);
-      if (status == 1) {
-        fforce = 0.0;
-        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-        return 0.0;
-      }
-      if (status != 0)
-        error->all(FLERR,
-                   "Ellipsoid contact detection (new contact) failed"
-                   "between particle {} and particle {}",
-                   atom->tag[i], atom->tag[j]);
-    }
-  }
-  double overlap1, overlap2, omegai[3], omegaj[3];
-  double nji[3] = {-nij[0], -nij[1], -nij[2]};
-  overlap1 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-  overlap2 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
-
-  double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
-  MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
-  MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
-
-  if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-    curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri,
-                                                                          surf_point_i, x[i]);
-    curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj,
-                                                                          surf_point_j, x[j]);
-  } else {
-    curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-        shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-    curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-        shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-  }
-
-  polyhertz = sqrt((overlap1 + overlap2) /
-                   (curvature_i + curvature_j));    // hertzian contact radius approximation
-
-  double cr1[3], cr2[3];
-  MathExtra::sub3(X0, x[i], cr1);
-  MathExtra::sub3(X0, x[j], cr2);
-
-  double ex_space[3], ey_space[3], ez_space[3];
-  double **angmom = atom->angmom;
-  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
-                             omegai);
-  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
-                             omegaj);
-
-  double omega_cross_r1[3], omega_cross_r2[3];
-  MathExtra::cross3(omegai, cr1, omega_cross_r1);
-  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-  // relative translational velocity
-  // compute directly the sum of relative translational velocity at contact point
-  // since rotational velocity contribution is different for superellipsoids
-
-  double **v = atom->v;
-  double cv1[3], cv2[3];
-
-  cv1[0] = v[i][0] + omega_cross_r1[0];
-  cv1[1] = v[i][1] + omega_cross_r1[1];
-  cv1[2] = v[i][2] + omega_cross_r1[2];
-
-  cv2[0] = v[j][0] + omega_cross_r2[0];
-  cv2[1] = v[j][1] + omega_cross_r2[1];
-  cv2[2] = v[j][2] + omega_cross_r2[2];
-
-  // total relavtive velocity at contact point
-
-  vr1 = cv1[0] - cv2[0];
-  vr2 = cv1[1] - cv2[1];
-  vr3 = cv1[2] - cv2[2];
-
-  // normal component
-
-  vn1 = nij[0] * vr1;    // dot product
-  vn2 = nij[1] * vr2;
-  vn3 = nij[2] * vr3;
-
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
-
-  // tangential component
-
-  vtr1 = vr1 - vnnr * nij[0];
-  vtr2 = vr2 - vnnr * nij[1];
-  vtr3 = vr3 - vnnr * nij[2];
-
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
-
-  // meff = effective mass of pair of particles
-  // if I or J part of rigid body, use body mass
-  // if I or J is frozen, meff is other particle
-  double *rmass = atom->rmass;
-  int *mask = atom->mask;
-
-  mi = rmass[i];
-  mj = rmass[j];
-  if (fix_rigid) {
-    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-  }
-
-  meff = mi * mj / (mi + mj);
-  if (mask[i] & freeze_group_bit) meff = mj;
-  if (mask[j] & freeze_group_bit) meff = mi;
-
-  // normal forces = Hookian contact + normal velocity damping
-
-  damp = meff * gamman * vnnr;
-  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-  ccel *= polyhertz;
-  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-  double *shear = &allhistory[size_history * neighprev];
-  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-  // tangential forces = shear + tangential velocity damping
-
-  fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
-  fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
-  fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
-
-  // rescale frictional displacements and forces if needed
-
-  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  fn = xmu * fabs(ccel);
-
-  if (fs > fn) {
-    if (shrmag != 0.0) {
-      fs1 *= fn / fs;
-      fs2 *= fn / fs;
-      fs3 *= fn / fs;
-      fs *= fn / fs;
-    } else
-      fs1 = fs2 = fs3 = 0.0;
-  }
-
-  // set force (normalized by r) and return no energy
-
-  fforce = ccel / sqrt(rsq);
-
-  // set single_extra quantities
-
-  svector[0] = fs1;
-  svector[1] = fs2;
-  svector[2] = fs3;
-  svector[3] = fs;
-  svector[4] = vn1;
-  svector[5] = vn2;
-  svector[6] = vn3;
-  svector[7] = vtr1;
-  svector[8] = vtr2;
-  svector[9] = vtr3;
-
-  return 0.0;
-}
diff --git a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h b/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
deleted file mode 100644
index b02f6b733e7..00000000000
--- a/src/GRANULAR/pair_gran_hertz_history_ellipsoid.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#ifdef PAIR_CLASS
-// clang-format off
-PairStyle(gran/hertz/history/ellipsoid,PairGranHertzHistoryEllipsoid);
-// clang-format on
-#else
-
-#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
-#define LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
-
-#include "pair_gran_hooke_history_ellipsoid.h"
-
-namespace LAMMPS_NS {
-
-class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
- public:
-  PairGranHertzHistoryEllipsoid(class LAMMPS *);
-  void compute(int, int) override;
-  void settings(int, char **) override;
-  double single(int, int, int, int, double, double, double, double &) override;
-
- protected:
-  int curvature_model;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
deleted file mode 100644
index 01a6c4b0343..00000000000
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.cpp
+++ /dev/null
@@ -1,1091 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#include "pair_gran_hooke_history_ellipsoid.h"
-
-#include "atom.h"
-#include "atom_vec_ellipsoid.h"
-#include "comm.h"
-#include "error.h"
-#include "fix.h"
-#include "fix_dummy.h"
-#include "fix_neigh_history.h"
-#include "force.h"
-#include "math_extra.h"    // probably needed for some computations
-#include "math_extra_superellipsoids.h"
-#include "memory.h"
-#include "modify.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-#include "update.h"
-#include <iostream>
-
-#include <cmath>
-#include <cstring>
-
-using namespace LAMMPS_NS;
-
-static constexpr int NUMSTEP_INITIAL_GUESS = 5;
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair(lmp)
-{
-  single_enable = 1;
-  no_virial_fdotr_compute = 1;
-  centroidstressflag = CENTROID_NOTAVAIL;
-  finitecutflag = 1;
-  use_history = 1;
-  size_history =
-      8;    // shear[3], contact_point_and_Lagrange_multiplier[4], bounding_box_separating_axis_index
-
-  single_extra = 10;
-  svector = new double[10];
-
-  neighprev = 0;
-
-  nmax = 0;
-  mass_rigid = nullptr;
-
-  // set comm size needed by this Pair if used with fix rigid
-
-  comm_forward = 1;
-
-  // keep default behavior of history[i][j] = -history[j][i]
-
-  nondefault_history_transfer = 1;
-
-  // create dummy fix as placeholder for FixNeighHistory
-  // this is so final order of Modify:fix will conform to input script
-
-  fix_history = nullptr;
-  fix_dummy = dynamic_cast<FixDummy *>(
-      modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
-
-  contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
-}
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHookeHistoryEllipsoid::~PairGranHookeHistoryEllipsoid()
-{
-  if (copymode) return;
-
-  delete[] svector;
-
-  if (!fix_history)
-    modify->delete_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me));
-  else
-    modify->delete_fix("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me));
-
-  if (allocated) {
-    memory->destroy(setflag);
-    memory->destroy(cutsq);
-
-    delete[] onerad_dynamic;
-    delete[] onerad_frozen;
-    delete[] maxrad_dynamic;
-    delete[] maxrad_frozen;
-  }
-
-  memory->destroy(mass_rigid);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
-{
-  int i, j, ii, jj, inum, jnum;
-  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
-  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3;
-  double vtr1, vtr2, vtr3, vrel;
-  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
-  double fn, fs, fs1, fs2, fs3;
-  double shrmag, rsht;
-  int *ilist, *jlist, *numneigh, **firstneigh;
-  int *touch, **firsttouch;
-  double *shear, *X0_prev, *history, *allhistory, **firsthistory;
-
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
-      overlap2, omegai[3], omegaj[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-
-  ev_init(eflag, vflag);
-
-  int shearupdate = 1;
-  if (update->setupflag) shearupdate = 0;
-
-  // update rigid body info for owned & ghost atoms if using FixRigid masses
-  // body[i] = which body atom I is in, -1 if none
-  // mass_body = mass of each rigid body
-
-  if (fix_rigid && neighbor->ago == 0) {
-    int tmp;
-    int *body = (int *) fix_rigid->extract("body", tmp);
-    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
-    if (atom->nmax > nmax) {
-      memory->destroy(mass_rigid);
-      nmax = atom->nmax;
-      memory->create(mass_rigid, nmax, "pair:mass_rigid");
-    }
-    int nlocal = atom->nlocal;
-    for (i = 0; i < nlocal; i++)
-      if (body[i] >= 0)
-        mass_rigid[i] = mass_body[body[i]];
-      else
-        mass_rigid[i] = 0.0;
-    comm->forward_comm(this);
-  }
-
-  double **x = atom->x;
-  double **v = atom->v;
-  double **f = atom->f;
-  double **angmom = atom->angmom;
-  double **torque = atom->torque;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
-  int newton_pair = force->newton_pair;
-  double *special_lj = force->special_lj;
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-
-  inum = list->inum;
-  ilist = list->ilist;
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
-  firsttouch = fix_history->firstflag;
-  firsthistory = fix_history->firstvalue;
-
-  // loop over neighbors of my atoms
-
-  for (ii = 0; ii < inum; ii++) {
-    i = ilist[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-
-    touch = firsttouch[i];
-    allhistory = firsthistory[i];
-    jlist = firstneigh[i];
-    jnum = numneigh[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
-      j &= NEIGHMASK;
-
-      if (factor_lj == 0) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      radj = radius[j];
-      radsum = radi + radj;
-
-      X0_prev = &allhistory[3 + size_history * jj];
-      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-
-      // TODO: Below could be a `touch()` function
-      bool touching = false;
-      if (rsq >= radsum * radsum) {
-        touching = false;
-      } else {
-        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-        bool skip_contact_detection(false);
-        if (bounding_box) {
-          int cached_axis = (int) (allhistory[7 + size_history * jj]);
-          int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-              x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
-          if (new_axis != -1) {
-            skip_contact_detection = true;
-            allhistory[7 + size_history * jj] = (double) new_axis;
-          }
-        }
-        if (skip_contact_detection)
-          touching = false;
-        else {
-          // superellipsoid contact detection between atoms i and j
-          flagi = bonus[ellipsoid[i]].type;
-          flagj = bonus[ellipsoid[j]].type;
-          if (touch[jj] == 1) {
-            // Continued contact: use grain true shape and last contact point with respect to grain i
-            X0[0] = x[ref_index][0] + X0_prev[0];
-            X0[1] = x[ref_index][1] + X0_prev[1];
-            X0[2] = x[ref_index][2] + X0_prev[2];
-            X0[3] = X0_prev[3];
-            // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
-            //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
-            int status = MathExtraSuperellipsoids::determine_contact_point(
-                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
-                contact_formulation);
-            if (status == 0)
-              touching = true;
-            else if (status == 1)
-              touching = false;
-            else {
-              error->warning(FLERR,
-                             "Ellipsoid contact detection (old contact) failed "
-                             "between particle {} and particle {} ",
-                             atom->tag[i], atom->tag[j]);
-            }
-          } else {
-            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
-
-            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
-            // but this is good enough. We might even be able to use radi and radj which is cheaper
-            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-
-            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-              shapei[0] = shapei[1] = shapei[2] = reqi;
-              shapej[0] = shapej[1] = shapej[2] = reqj;
-              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-              // force ellipsoid flag for first initial guess iteration.
-              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(
-                  x[i], Ri, shapei, blocki,
-                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
-                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
-                  contact_formulation);
-
-              if (status == 0)
-                touching = true;
-              else if (status == 1)
-                touching = false;
-              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
-                // keep trying until last iteration to avoid erroring out too early
-                error->warning(FLERR,
-                               "Ellipsoid contact detection (new contact) failed"
-                               "between particle {} and particle {}",
-                               atom->tag[i], atom->tag[j]);
-              }
-            }
-          }
-        }
-      }
-
-      if (!touching) {
-        // unset non-touching neighbors
-
-        touch[jj] = 0;
-        history = &allhistory[size_history * jj];
-        for (int k = 0; k < size_history; k++) {
-          if (bounding_box && k == 7) continue;    // Do not delete cached axis information
-          history[k] = 0.0;
-        }
-      } else {
-        // Store contact point with respect to grain i for next time step
-        // This is crucial for periodic BCs when grains can move by large amount in one time step
-        // Keeping the previous contact point relative to global frame would lead to bad initial guess
-        X0_prev[0] = X0[0] - x[ref_index][0];
-        X0_prev[1] = X0[1] - x[ref_index][1];
-        X0_prev[2] = X0[2] - x[ref_index][2];
-        X0_prev[3] = X0[3];
-
-        double nji[3] = {-nij[0], -nij[1], -nij[2]};
-        // compute overlap depth along normal direction for each grain
-        // overlap is positive for both grains
-        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
-                                                                      nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
-                                                                      nji, x[j]);
-
-        // branch vectors
-        double cr1[3], cr2[3];
-        MathExtra::sub3(X0, x[i], cr1);
-        MathExtra::sub3(X0, x[j], cr2);
-
-        // we need to take the cross product of omega
-
-        double ex_space[3], ey_space[3], ez_space[3];
-        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[i]].inertia, omegai);
-        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[j]].inertia, omegaj);
-
-        double omega_cross_r1[3], omega_cross_r2[3];
-        MathExtra::cross3(omegai, cr1, omega_cross_r1);
-        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-        // relative translational velocity
-        // compute directly the sum of relative translational velocity at contact point
-        // since rotational velocity contribution is different for superellipsoids
-        double cv1[3], cv2[3];
-
-        cv1[0] = v[i][0] + omega_cross_r1[0];
-        cv1[1] = v[i][1] + omega_cross_r1[1];
-        cv1[2] = v[i][2] + omega_cross_r1[2];
-
-        cv2[0] = v[j][0] + omega_cross_r2[0];
-        cv2[1] = v[j][1] + omega_cross_r2[1];
-        cv2[2] = v[j][2] + omega_cross_r2[2];
-
-        // total relavtive velocity at contact point
-        vr1 = cv1[0] - cv2[0];
-        vr2 = cv1[1] - cv2[1];
-        vr3 = cv1[2] - cv2[2];
-
-        // normal component
-
-        vn1 = nij[0] * vr1;    // dot product
-        vn2 = nij[1] * vr2;
-        vn3 = nij[2] * vr3;
-
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
-
-        // tangential component
-
-        vtr1 = vr1 - vnnr * nij[0];
-        vtr2 = vr2 - vnnr * nij[1];
-        vtr3 = vr3 - vnnr * nij[2];
-
-        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-        vrel = sqrt(vrel);
-
-        // meff = effective mass of pair of particles
-        // if I or J part of rigid body, use body mass
-        // if I or J is frozen, meff is other particle
-
-        mi = rmass[i];
-        mj = rmass[j];
-        if (fix_rigid) {
-          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-        }
-
-        meff = mi * mj / (mi + mj);
-        if (mask[i] & freeze_group_bit) meff = mj;
-        if (mask[j] & freeze_group_bit) meff = mi;
-
-        // normal forces = Hookian contact + normal velocity damping
-
-        damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-        // shear history effects
-
-        touch[jj] = 1;
-        shear = &allhistory[size_history * jj];
-
-        if (shearupdate) {
-          shear[0] += vtr1 * dt;
-          shear[1] += vtr2 * dt;
-          shear[2] += vtr3 * dt;
-        }
-        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-        if (shearupdate) {
-
-          // rotate shear displacements
-
-          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
-          shear[0] -= rsht * nij[0];
-          shear[1] -= rsht * nij[1];
-          shear[2] -= rsht * nij[2];
-        }
-
-        // tangential forces = shear + tangential velocity damping
-
-        fs1 = -(kt * shear[0] + meff * gammat * vtr1);
-        fs2 = -(kt * shear[1] + meff * gammat * vtr2);
-        fs3 = -(kt * shear[2] + meff * gammat * vtr3);
-
-        // rescale frictional displacements and forces if needed
-
-        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-        fn = xmu * fabs(ccel);
-
-        if (fs > fn) {
-          if (shrmag != 0.0) {
-            shear[0] =
-                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
-            shear[1] =
-                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
-            shear[2] =
-                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
-            fs1 *= fn / fs;
-            fs2 *= fn / fs;
-            fs3 *= fn / fs;
-          } else
-            fs1 = fs2 = fs3 = 0.0;
-        }
-
-        // forces & torques
-
-        fx = nji[0] * ccel + fs1;
-        fy = nji[1] * ccel + fs2;
-        fz = nji[2] * ccel + fs3;
-        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
-        fy *= factor_lj;
-        fz *= factor_lj;
-        f[i][0] += fx;
-        f[i][1] += fy;
-        f[i][2] += fz;
-        // torques are cross prodcuts of branch vector with the entire force at contact point
-
-        tor1 = cr1[1] * fz - cr1[2] * fy;
-        tor2 = cr1[2] * fx - cr1[0] * fz;
-        tor3 = cr1[0] * fy - cr1[1] * fx;
-
-        torque[i][0] += tor1;
-        torque[i][1] += tor2;
-        torque[i][2] += tor3;
-
-        if (newton_pair || j < nlocal) {
-          f[j][0] -= fx;
-          f[j][1] -= fy;
-          f[j][2] -= fz;
-
-          tor1 = cr2[1] * fz - cr2[2] * fy;
-          tor2 = cr2[2] * fx - cr2[0] * fz;
-          tor3 = cr2[0] * fy - cr2[1] * fx;
-
-          torque[j][0] -= tor1;
-          torque[j][1] -= tor2;
-          torque[j][2] -= tor3;
-        }
-
-        if (evflag)
-          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
-                       delz);    // Correct even for non-spherical particles
-      }
-    }
-  }
-
-  if (vflag_fdotr) virial_fdotr_compute();
-}
-
-/* ----------------------------------------------------------------------
-   allocate all arrays
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::allocate()
-{
-  allocated = 1;
-  int n = atom->ntypes;
-
-  memory->create(setflag, n + 1, n + 1, "pair:setflag");
-  for (int i = 1; i <= n; i++)
-    for (int j = i; j <= n; j++) setflag[i][j] = 0;
-
-  memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
-
-  onerad_dynamic = new double[n + 1];
-  onerad_frozen = new double[n + 1];
-  maxrad_dynamic = new double[n + 1];
-  maxrad_frozen = new double[n + 1];
-}
-
-/* ----------------------------------------------------------------------
-   global settings
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
-{
-  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
-
-  kn = utils::numeric(FLERR, arg[0], false, lmp);
-  if (strcmp(arg[1], "NULL") == 0)
-    kt = kn * 2.0 / 7.0;
-  else
-    kt = utils::numeric(FLERR, arg[1], false, lmp);
-
-  gamman = utils::numeric(FLERR, arg[2], false, lmp);
-  if (strcmp(arg[3], "NULL") == 0)
-    gammat = 0.5 * gamman;
-  else
-    gammat = utils::numeric(FLERR, arg[3], false, lmp);
-
-  xmu = utils::numeric(FLERR, arg[4], false, lmp);
-  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
-  if (dampflag == 0) gammat = 0.0;
-
-  limit_damping = 0;
-  bounding_box = 0;
-  for (int iarg = 6; iarg < narg; iarg++) {
-    if (strcmp(arg[iarg], "limit_damping") == 0)
-      limit_damping = 1;
-    else if (strcmp(arg[iarg], "bounding_box") == 0)
-      bounding_box = 1;
-    else if (strcmp(arg[iarg], "geometric") == 0)
-      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-    else
-      error->all(FLERR, "Illegal pair_style command");
-  }
-
-  size_history = 8;    // reset to default for safety
-  if (bounding_box == 0) size_history--;
-
-  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
-      dampflag < 0 || dampflag > 1)
-    error->all(FLERR, "Illegal pair_style command");
-}
-
-/* ----------------------------------------------------------------------
-   set coeffs for one or more type pairs
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::coeff(int narg, char **arg)
-{
-  if (narg > 2) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
-  if (!allocated) allocate();
-
-  int ilo, ihi, jlo, jhi;
-  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
-  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
-
-  int count = 0;
-  for (int i = ilo; i <= ihi; i++) {
-    for (int j = MAX(jlo, i); j <= jhi; j++) {
-      setflag[i][j] = 1;
-      count++;
-    }
-  }
-
-  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
-}
-
-/* ----------------------------------------------------------------------
-   init specific to this pair style
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::init_style()
-{
-  int i;
-
-  // error and warning checks
-
-  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
-    error->all(FLERR,
-               "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and "
-               "superellipdoid flag");
-  if (comm->ghost_velocity == 0)
-    error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
-
-  // ensure all atoms have an allocated ellipsoid bonus structure (ellipsoidflag > 0)
-  int *ellipsoid = atom->ellipsoid;
-  if (!ellipsoid) error->all(FLERR, "Pair gran/h/ellipsoid* requires atom style ellipsoid");
-
-  int nlocal = atom->nlocal;
-  for (i = 0; i < nlocal; i++) {
-    if (ellipsoid[i] < 0) {
-      error->one(FLERR, "Pair gran/h/ellipsoid* requires all atoms to have ellipsoidflag = 1");
-    }
-  }
-
-  // need a granular neighbor list
-
-  if (use_history)
-    neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_HISTORY);
-  else
-    neighbor->add_request(this, NeighConst::REQ_SIZE);
-
-  dt = update->dt;
-
-  // if history is stored and first init, create Fix to store history
-  // it replaces FixDummy, created in the constructor
-  // this is so its order in the fix list is preserved
-
-  if (use_history && (fix_history == nullptr)) {
-    auto cmd =
-        fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
-    fix_history = dynamic_cast<FixNeighHistory *>(
-        modify->replace_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me), cmd, 1));
-    fix_history->pair = this;
-  }
-
-  // check for FixFreeze and set freeze_group_bit
-
-  auto fixlist = modify->get_fix_by_style("^freeze");
-  if (fixlist.size() == 0)
-    freeze_group_bit = 0;
-  else if (fixlist.size() > 1)
-    error->all(FLERR, "Only one fix freeze command at a time allowed");
-  else
-    freeze_group_bit = fixlist.front()->groupbit;
-
-  // check for FixRigid so can extract rigid body masses
-
-  fix_rigid = nullptr;
-  for (const auto &ifix : modify->get_fix_list()) {
-    if (ifix->rigid_flag) {
-      if (fix_rigid)
-        error->all(FLERR, "Only one fix rigid command at a time allowed");
-      else
-        fix_rigid = ifix;
-    }
-  }
-
-  // check for FixPour and FixDeposit so can extract particle radii
-
-  auto pours = modify->get_fix_by_style("^pour");
-  auto deps = modify->get_fix_by_style("^deposit");
-
-  // set maxrad_dynamic and maxrad_frozen for each type
-  // include future FixPour and FixDeposit particles as dynamic
-
-  int itype;
-  for (i = 1; i <= atom->ntypes; i++) {
-    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
-    for (auto &ipour : pours) {
-      itype = i;
-      double maxrad = *((double *) ipour->extract("radius", itype));
-      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
-    }
-    for (auto &idep : deps) {
-      itype = i;
-      double maxrad = *((double *) idep->extract("radius", itype));
-      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
-    }
-  }
-
-  // since for ellipsoids radius is the maximum of the three axes, no need to change this part
-
-  double *radius = atom->radius;
-  int *mask = atom->mask;
-  int *type = atom->type;
-
-  for (i = 0; i < nlocal; i++) {
-    if (mask[i] & freeze_group_bit)
-      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]], radius[i]);
-    else
-      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
-  }
-
-  MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
-  MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
-
-  // set fix which stores history info
-
-  if (use_history) {
-    fix_history = dynamic_cast<FixNeighHistory *>(
-        modify->get_fix_by_id("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me)));
-    if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
-  }
-}
-
-/* ----------------------------------------------------------------------
-   init for one type pair i,j and corresponding j,i
-------------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::init_one(int i, int j)
-{
-  if (!allocated) allocate();
-
-  // cutoff = sum of max I,J radii for
-  // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
-
-  double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
-  cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
-  cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
-  return cutoff;
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::write_restart(FILE *fp)
-{
-  write_restart_settings(fp);
-
-  int i, j;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) fwrite(&setflag[i][j], sizeof(int), 1, fp);
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::read_restart(FILE *fp)
-{
-  read_restart_settings(fp);
-  allocate();
-
-  int i, j;
-  int me = comm->me;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) {
-      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
-      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
-    }
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::write_restart_settings(FILE *fp)
-{
-  fwrite(&kn, sizeof(double), 1, fp);
-  fwrite(&kt, sizeof(double), 1, fp);
-  fwrite(&gamman, sizeof(double), 1, fp);
-  fwrite(&gammat, sizeof(double), 1, fp);
-  fwrite(&xmu, sizeof(double), 1, fp);
-  fwrite(&dampflag, sizeof(int), 1, fp);
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::read_restart_settings(FILE *fp)
-{
-  if (comm->me == 0) {
-    utils::sfread(FLERR, &kn, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &kt, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &gamman, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &gammat, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &xmu, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &dampflag, sizeof(int), 1, fp, nullptr, error);
-  }
-  MPI_Bcast(&kn, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&kt, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&gamman, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&gammat, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&xmu, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&dampflag, 1, MPI_INT, 0, world);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::reset_dt()
-{
-  dt = update->dt;
-}
-
-/* ---------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double /*factor_lj*/,
-                                             double &fforce)
-{
-  double radi, radj, radsum;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double mi, mj, meff, damp, ccel;
-  double vtr1, vtr2, vtr3, vrel, shrmag;
-  double fs1, fs2, fs3, fs, fn;
-
-  double *radius = atom->radius;
-  radi = radius[i];
-  radj = radius[j];
-  radsum = radi + radj;
-
-  double **x = atom->x;
-
-  // history effects
-  // neighprev = index of found neigh on previous call
-  // search entire jnum list of neighbors of I for neighbor J
-  // start from neighprev, since will typically be next neighbor
-  // reset neighprev to 0 as necessary
-  int jnum = list->numneigh[i];
-  int *jlist = list->firstneigh[i];
-  int *touch = fix_history->firstflag[i];
-  double *allhistory = fix_history->firstvalue[i];
-  for (int jj = 0; jj < jnum; jj++) {
-    neighprev++;
-    if (neighprev >= jnum) neighprev = 0;
-    if (jlist[neighprev] == j) break;
-  }
-
-  if (rsq >= radsum * radsum) {
-    fforce = 0.0;
-    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-    return 0.0;
-  }
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
-  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-  bool skip_contact_detection = false;
-  if (bounding_box) {
-    int cached_axis =
-        (int) (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
-    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(x[i], Ri, shapei, x[j],
-                                                                           Rj, shapej, cached_axis);
-    if (new_axis != -1) skip_contact_detection = true;
-    if (skip_contact_detection) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-  }
-  // superellipsoid contact detection between atoms i and j
-  double X0[4], nij[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-  flagi = bonus[ellipsoid[i]].type;
-  flagj = bonus[ellipsoid[j]].type;
-  double *X0_prev = &allhistory[3 + size_history * neighprev];
-  if (touch[neighprev] == 1) {
-    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-    // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0] + x[ref_index][0];
-    X0[1] = X0_prev[1] + x[ref_index][1];
-    X0[2] = X0_prev[2] + x[ref_index][2];
-    X0[3] = X0_prev[3];
-    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
-                                                                   x[j], Rj, shapej, blockj, flagj,
-                                                                   X0, nij, contact_formulation);
-    if (status == 1) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-    if (status != 0)
-      error->warning(FLERR,
-                     "Ellipsoid contact detection (old contact) failed"
-                     "between particle {} and particle {}",
-                     atom->tag[i], atom->tag[j]);
-  } else {
-    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-      shapei[0] = shapei[1] = shapei[2] = reqi;
-      shapej[0] = shapej[1] = shapej[2] = reqj;
-      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-      // force ellipsoid flag for first initial guess iteration.
-      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-      int status = MathExtraSuperellipsoids::determine_contact_point(
-          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-          X0, nij, contact_formulation);
-      if (status == 1) {
-        fforce = 0.0;
-        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-        return 0.0;
-      }
-      if (status != 0)
-        error->one(FLERR,
-                   "Ellipsoid contact detection (new contact) failed"
-                   "between particle {} and particle {}",
-                   atom->tag[i], atom->tag[j]);
-    }
-  }
-  double overlap1, overlap2, omegai[3], omegaj[3];
-  double nji[3] = {-nij[0], -nij[1], -nij[2]};
-  overlap1 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-  overlap2 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
-
-  double cr1[3], cr2[3];
-  MathExtra::sub3(X0, x[i], cr1);
-  MathExtra::sub3(X0, x[j], cr2);
-
-  double ex_space[3], ey_space[3], ez_space[3];
-  double **angmom = atom->angmom;
-  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
-                             omegai);
-  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
-                             omegaj);
-
-  double omega_cross_r1[3], omega_cross_r2[3];
-  MathExtra::cross3(omegai, cr1, omega_cross_r1);
-  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-  // relative translational velocity
-  // compute directly the sum of relative translational velocity at contact point
-  // since rotational velocity contribution is different for superellipsoids
-
-  double **v = atom->v;
-  double cv1[3], cv2[3];
-
-  cv1[0] = v[i][0] + omega_cross_r1[0];
-  cv1[1] = v[i][1] + omega_cross_r1[1];
-  cv1[2] = v[i][2] + omega_cross_r1[2];
-
-  cv2[0] = v[j][0] + omega_cross_r2[0];
-  cv2[1] = v[j][1] + omega_cross_r2[1];
-  cv2[2] = v[j][2] + omega_cross_r2[2];
-
-  // total relavtive velocity at contact point
-
-  vr1 = cv1[0] - cv2[0];
-  vr2 = cv1[1] - cv2[1];
-  vr3 = cv1[2] - cv2[2];
-
-  // normal component
-
-  vn1 = nij[0] * vr1;    // dot product
-  vn2 = nij[1] * vr2;
-  vn3 = nij[2] * vr3;
-
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
-
-  // tangential component
-
-  vtr1 = vr1 - vnnr * nij[0];
-  vtr2 = vr2 - vnnr * nij[1];
-  vtr3 = vr3 - vnnr * nij[2];
-
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
-
-  // meff = effective mass of pair of particles
-  // if I or J part of rigid body, use body mass
-  // if I or J is frozen, meff is other particle
-  double *rmass = atom->rmass;
-  int *mask = atom->mask;
-
-  mi = rmass[i];
-  mj = rmass[j];
-  if (fix_rigid) {
-    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-  }
-
-  meff = mi * mj / (mi + mj);
-  if (mask[i] & freeze_group_bit) meff = mj;
-  if (mask[j] & freeze_group_bit) meff = mi;
-
-  // normal forces = Hookian contact + normal velocity damping
-
-  damp = meff * gamman * vnnr;
-  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-  double *shear = &allhistory[size_history * neighprev];
-  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-  // tangential forces = shear + tangential velocity damping
-
-  fs1 = -(kt * shear[0] + meff * gammat * vtr1);
-  fs2 = -(kt * shear[1] + meff * gammat * vtr2);
-  fs3 = -(kt * shear[2] + meff * gammat * vtr3);
-
-  // rescale frictional displacements and forces if needed
-
-  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  fn = xmu * fabs(ccel);
-
-  if (fs > fn) {
-    if (shrmag != 0.0) {
-      fs1 *= fn / fs;
-      fs2 *= fn / fs;
-      fs3 *= fn / fs;
-      fs *= fn / fs;
-    } else
-      fs1 = fs2 = fs3 = 0.0;
-  }
-
-  // set force (normalized by r) and return no energy
-
-  fforce = ccel / sqrt(rsq);
-
-  // set single_extra quantities
-
-  svector[0] = fs1;
-  svector[1] = fs2;
-  svector[2] = fs3;
-  svector[3] = fs;
-  svector[4] = vn1;
-  svector[5] = vn2;
-  svector[6] = vn3;
-  svector[7] = vtr1;
-  svector[8] = vtr2;
-  svector[9] = vtr3;
-
-  return 0.0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int PairGranHookeHistoryEllipsoid::pack_forward_comm(int n, int *list, double *buf,
-                                                     int /*pbc_flag*/, int * /*pbc*/)
-{
-  int i, j, m;
-
-  m = 0;
-  for (i = 0; i < n; i++) {
-    j = list[i];
-    buf[m++] = mass_rigid[j];
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::unpack_forward_comm(int n, int first, double *buf)
-{
-  int i, m, last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) mass_rigid[i] = buf[m++];
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local atom-based arrays
-------------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::memory_usage()
-{
-  double bytes = (double) nmax * sizeof(double);
-  return bytes;
-}
-
-void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *target, int /*itype*/,
-                                                     int /*jtype*/)
-{
-  // Simple direct copy of all history variables (shear, contact point, axis)
-  for (int i = 0; i < size_history; i++) {
-    if (i < 3) target[i] = -source[i];    //shear
-    target[i] = source[i];
-  }
-}
diff --git a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h b/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
deleted file mode 100644
index e3bcf3e038c..00000000000
--- a/src/GRANULAR/pair_gran_hooke_history_ellipsoid.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#ifdef PAIR_CLASS
-// clang-format off
-PairStyle(gran/hooke/history/ellipsoid,PairGranHookeHistoryEllipsoid);
-// clang-format on
-#else
-
-#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
-#define LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
-
-#include "pair.h"
-
-namespace LAMMPS_NS {
-
-class PairGranHookeHistoryEllipsoid : public Pair {
- public:
-  PairGranHookeHistoryEllipsoid(class LAMMPS *);
-  ~PairGranHookeHistoryEllipsoid() override;
-  void compute(int, int) override;
-  void settings(int, char **) override;
-  void coeff(int, char **) override;
-  void init_style() override;
-  double init_one(int, int) override;
-  void write_restart(FILE *) override;
-  void read_restart(FILE *) override;
-  void write_restart_settings(FILE *) override;
-  void read_restart_settings(FILE *) override;
-  void reset_dt() override;
-  double single(int, int, int, int, double, double, double, double &) override;
-  int pack_forward_comm(int, int *, double *, int, int *) override;
-  void unpack_forward_comm(int, int, double *) override;
-  double memory_usage() override;
-  void transfer_history(double *, double *, int, int) override;
-
- protected:
-  double kn, kt, gamman, gammat, xmu;
-  int dampflag;
-  double dt;
-  int freeze_group_bit;
-  int use_history;
-  int limit_damping;
-  int bounding_box;
-
-  int neighprev;
-  double *onerad_dynamic, *onerad_frozen;
-  double *maxrad_dynamic, *maxrad_frozen;
-
-  int size_history;
-
-  class FixDummy *fix_dummy;
-  class FixNeighHistory *fix_history;
-
-  // storage of rigid body masses for use in granular interactions
-
-  class Fix *fix_rigid;    // ptr to rigid body fix, null pointer if none
-  double *mass_rigid;      // rigid mass for owned+ghost atoms
-  int nmax;                // allocated size of mass_rigid
-
-  int contact_formulation;
-
-  void allocate();
-
- private:
-  // Below not implemented. Placeholder if we decide not to compute local hessian in line search
-  static double
-  shape_and_gradient_local(const double *, const double *, const double *,
-                           double *);    // would return a vector of temporary variables
-  static double hessian_local(
-      const double *, const double *, const double *,
-      double *);    // would use the above vector of temporary variables to compute local hessian
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/pair_gran_hertz_history_ellipsoid.cpp b/src/pair_gran_hertz_history_ellipsoid.cpp
deleted file mode 100644
index 93ac6feab97..00000000000
--- a/src/pair_gran_hertz_history_ellipsoid.cpp
+++ /dev/null
@@ -1,783 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#include "pair_gran_hertz_history_ellipsoid.h"
-
-#include "atom.h"
-#include "atom_vec_ellipsoid.h"
-#include "comm.h"
-#include "error.h"
-#include "fix.h"
-#include "fix_dummy.h"
-#include "fix_neigh_history.h"
-#include "force.h"
-#include "math_extra.h"    // probably needed for some computations
-#include "math_extra_superellipsoids.h"
-#include "memory.h"
-#include "modify.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-#include "update.h"
-
-#include <cmath>
-#include <cstring>
-
-using namespace LAMMPS_NS;
-
-static constexpr int NUMSTEP_INITIAL_GUESS = 5;
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHertzHistoryEllipsoid::PairGranHertzHistoryEllipsoid(LAMMPS *lmp) :
-    PairGranHookeHistoryEllipsoid(lmp)
-{
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHertzHistoryEllipsoid::compute(int eflag, int vflag)
-{
-  int i, j, ii, jj, inum, jnum;
-  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
-  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double wr1, wr2, wr3;
-  double vtr1, vtr2, vtr3, vrel;
-  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
-  double fn, fs, fs1, fs2, fs3;
-  double shrmag, rsht, polyhertz;
-  int *ilist, *jlist, *numneigh, **firstneigh;
-  int *touch, **firsttouch;
-  double *shear, *X0_prev, *history, *allhistory, **firsthistory;
-
-  double shapex, shapey, shapez;    // ellipsoid shape params
-  double quat1, quat2, quat3, quat4;
-  double block1, block2;
-
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
-      overlap2, omegai[3], omegaj[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-
-  ev_init(eflag, vflag);
-
-  int shearupdate = 1;
-  if (update->setupflag) shearupdate = 0;
-
-  // update rigid body info for owned & ghost atoms if using FixRigid masses
-  // body[i] = which body atom I is in, -1 if none
-  // mass_body = mass of each rigid body
-
-  if (fix_rigid && neighbor->ago == 0) {
-    int tmp;
-    int *body = (int *) fix_rigid->extract("body", tmp);
-    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
-    if (atom->nmax > nmax) {
-      memory->destroy(mass_rigid);
-      nmax = atom->nmax;
-      memory->create(mass_rigid, nmax, "pair:mass_rigid");
-    }
-    int nlocal = atom->nlocal;
-    for (i = 0; i < nlocal; i++)
-      if (body[i] >= 0)
-        mass_rigid[i] = mass_body[body[i]];
-      else
-        mass_rigid[i] = 0.0;
-    comm->forward_comm(this);
-  }
-
-  double **x = atom->x;
-  double **v = atom->v;
-  double **f = atom->f;
-  double **angmom = atom->angmom;
-  double **torque = atom->torque;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
-  int newton_pair = force->newton_pair;
-  double *special_lj = force->special_lj;
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-
-  inum = list->inum;
-  ilist = list->ilist;
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
-  firsttouch = fix_history->firstflag;
-  firsthistory = fix_history->firstvalue;
-
-  // loop over neighbors of my atoms
-
-  for (ii = 0; ii < inum; ii++) {
-    i = ilist[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-
-    touch = firsttouch[i];
-    allhistory = firsthistory[i];
-    jlist = firstneigh[i];
-    jnum = numneigh[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
-      j &= NEIGHMASK;
-
-      if (factor_lj == 0) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      radj = radius[j];
-      radsum = radi + radj;
-
-      X0_prev = &allhistory[3 + size_history * jj];
-      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-
-      // TODO: Below could be a `touch()` function
-      bool touching = false;
-      if (rsq >= radsum * radsum) {
-        touching = false;
-      } else {
-        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-        bool skip_contact_detection(false);
-        if (bounding_box) {
-          int cached_axis = (int) (allhistory[7 + size_history * jj]);
-          int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-              x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
-          if (new_axis != -1) {
-            skip_contact_detection = true;
-            allhistory[7 + size_history * jj] = (double) new_axis;
-          }
-        }
-        if (skip_contact_detection)
-          touching = false;
-        else {
-          // superellipsoid contact detection between atoms i and j
-          flagi = bonus[ellipsoid[i]].type;
-          flagj = bonus[ellipsoid[j]].type;
-          if (touch[jj] == 1) {
-            // Continued contact: use grain true shape and last contact point with respect to grain i
-            X0[0] = x[ref_index][0] + X0_prev[0];
-            X0[1] = x[ref_index][1] + X0_prev[1];
-            X0[2] = x[ref_index][2] + X0_prev[2];
-            X0[3] = X0_prev[3];
-            int status = MathExtraSuperellipsoids::determine_contact_point(
-                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
-                contact_formulation);
-            if (status == 0)
-              touching = true;
-            else if (status == 1)
-              touching = false;
-            else
-              error->warning(FLERR,
-                             "Ellipsoid contact detection (old contact) failed"
-                             "between particle {} and particle {}",
-                             atom->tag[i], atom->tag[j]);
-          } else {
-            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
-
-            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
-            // but this is good enough. We might even be able to use radi and radj which is cheaper
-            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-
-            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-              shapei[0] = shapei[1] = shapei[2] = reqi;
-              shapej[0] = shapej[1] = shapej[2] = reqj;
-              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-              // force ellipsoid flag for first initial guess iteration.
-              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(
-                  x[i], Ri, shapei, blocki,
-                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
-                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
-                  contact_formulation);
-              if (status == 0)
-                touching = true;
-              else if (status == 1)
-                touching = false;
-              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
-                // keep trying until last iteration to avoid erroring out too early
-                error->warning(FLERR,
-                               "Ellipsoid contact detection (new contact) failed"
-                               "between particle {} and particle {}",
-                               atom->tag[i], atom->tag[j]);
-              }
-            }
-          }
-        }
-      }
-
-      if (!touching) {
-        // unset non-touching neighbors
-
-        touch[jj] = 0;
-        history = &allhistory[size_history * jj];
-        for (int k = 0; k < size_history; k++) {
-          if (bounding_box && k == 7) continue;    // Do not delete cached axis information
-          history[k] = 0.0;
-        }
-      } else {
-        // Store contact point with respect to grain i for next time step
-        // This is crucial for periodic BCs when grains can move by large amount in one time step
-        // Keeping the previous contact point relative to global frame would lead to bad initial guess
-        X0_prev[0] = X0[0] - x[ref_index][0];
-        X0_prev[1] = X0[1] - x[ref_index][1];
-        X0_prev[2] = X0[2] - x[ref_index][2];
-        X0_prev[3] = X0[3];
-
-        double nji[3] = {-nij[0], -nij[1], -nij[2]};
-        // compute overlap depth along normal direction for each grain
-        // overlap is positive for both grains
-        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
-                                                                      nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
-                                                                      nji, x[j]);
-
-        double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
-        MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
-        MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
-
-        if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-          curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-          curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(
-              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-        } else {
-          curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-              shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-          curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-              shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-        }
-
-        polyhertz = sqrt((overlap1 + overlap2) /
-                         (curvature_i + curvature_j));    // hertzian contact radius approximation
-
-        // branch vectors
-        double cr1[3], cr2[3];
-        MathExtra::sub3(X0, x[i], cr1);
-        MathExtra::sub3(X0, x[j], cr2);
-
-        // we need to take the cross product of omega
-
-        double ex_space[3], ey_space[3], ez_space[3];
-        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[i]].inertia, omegai);
-        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[j]].inertia, omegaj);
-
-        double omega_cross_r1[3], omega_cross_r2[3];
-        MathExtra::cross3(omegai, cr1, omega_cross_r1);
-        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-        // relative translational velocity
-        // compute directly the sum of relative translational velocity at contact point
-        // since rotational velocity contribution is different for superellipsoids
-        double cv1[3], cv2[3];
-
-        cv1[0] = v[i][0] + omega_cross_r1[0];
-        cv1[1] = v[i][1] + omega_cross_r1[1];
-        cv1[2] = v[i][2] + omega_cross_r1[2];
-
-        cv2[0] = v[j][0] + omega_cross_r2[0];
-        cv2[1] = v[j][1] + omega_cross_r2[1];
-        cv2[2] = v[j][2] + omega_cross_r2[2];
-
-        // total relavtive velocity at contact point
-        vr1 = cv1[0] - cv2[0];
-        vr2 = cv1[1] - cv2[1];
-        vr3 = cv1[2] - cv2[2];
-
-        // normal component
-
-        vn1 = nij[0] * vr1;    // dot product
-        vn2 = nij[1] * vr2;
-        vn3 = nij[2] * vr3;
-
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
-
-        // tangential component
-
-        vtr1 = vr1 - vnnr * nij[0];
-        vtr2 = vr2 - vnnr * nij[1];
-        vtr3 = vr3 - vnnr * nij[2];
-
-        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-        vrel = sqrt(vrel);
-
-        // meff = effective mass of pair of particles
-        // if I or J part of rigid body, use body mass
-        // if I or J is frozen, meff is other particle
-
-        mi = rmass[i];
-        mj = rmass[j];
-        if (fix_rigid) {
-          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-        }
-
-        meff = mi * mj / (mi + mj);
-        if (mask[i] & freeze_group_bit) meff = mj;
-        if (mask[j] & freeze_group_bit) meff = mi;
-
-        // normal forces = Hertzian contact + normal velocity damping
-
-        damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-        ccel *= polyhertz;
-        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-        // shear history effects
-
-        touch[jj] = 1;
-        shear = &allhistory[size_history * jj];
-
-        if (shearupdate) {
-          shear[0] += vtr1 * dt;
-          shear[1] += vtr2 * dt;
-          shear[2] += vtr3 * dt;
-        }
-        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-        if (shearupdate) {
-
-          // rotate shear displacements
-
-          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
-          shear[0] -= rsht * nij[0];
-          shear[1] -= rsht * nij[1];
-          shear[2] -= rsht * nij[2];
-        }
-
-        // tangential forces = shear + tangential velocity damping
-
-        fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
-        fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
-        fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
-
-        // rescale frictional displacements and forces if needed
-
-        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-        fn = xmu * fabs(ccel);
-
-        if (fs > fn) {
-          if (shrmag != 0.0) {
-            shear[0] =
-                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
-            shear[1] =
-                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
-            shear[2] =
-                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
-            fs1 *= fn / fs;
-            fs2 *= fn / fs;
-            fs3 *= fn / fs;
-          } else
-            fs1 = fs2 = fs3 = 0.0;
-        }
-
-        // forces & torques
-
-        fx = nji[0] * ccel + fs1;
-        fy = nji[1] * ccel + fs2;
-        fz = nji[2] * ccel + fs3;
-        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
-        fy *= factor_lj;
-        fz *= factor_lj;
-        f[i][0] += fx;
-        f[i][1] += fy;
-        f[i][2] += fz;
-
-        // torques are cross prodcuts of branch vector with the entire force at contact point
-
-        tor1 = cr1[1] * fz - cr1[2] * fy;
-        tor2 = cr1[2] * fx - cr1[0] * fz;
-        tor3 = cr1[0] * fy - cr1[1] * fx;
-
-        torque[i][0] += tor1;
-        torque[i][1] += tor2;
-        torque[i][2] += tor3;
-
-        if (newton_pair || j < nlocal) {
-          f[j][0] -= fx;
-          f[j][1] -= fy;
-          f[j][2] -= fz;
-
-          tor1 = cr2[1] * fz - cr2[2] * fy;
-          tor2 = cr2[2] * fx - cr2[0] * fz;
-          tor3 = cr2[0] * fy - cr2[1] * fx;
-
-          torque[j][0] -= tor1;
-          torque[j][1] -= tor2;
-          torque[j][2] -= tor3;
-        }
-
-        if (evflag)
-          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
-                       delz);    // Correct even for non-spherical particles
-      }
-    }
-  }
-
-  if (vflag_fdotr) virial_fdotr_compute();
-}
-
-/* ----------------------------------------------------------------------
-   global settings
-------------------------------------------------------------------------- */
-
-void PairGranHertzHistoryEllipsoid::settings(int narg, char **arg)
-{
-  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
-
-  kn = utils::numeric(FLERR, arg[0], false, lmp);
-  if (strcmp(arg[1], "NULL") == 0)
-    kt = kn * 2.0 / 7.0;
-  else
-    kt = utils::numeric(FLERR, arg[1], false, lmp);
-
-  gamman = utils::numeric(FLERR, arg[2], false, lmp);
-  if (strcmp(arg[3], "NULL") == 0)
-    gammat = 0.5 * gamman;
-  else
-    gammat = utils::numeric(FLERR, arg[3], false, lmp);
-
-  xmu = utils::numeric(FLERR, arg[4], false, lmp);
-  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
-  if (dampflag == 0) gammat = 0.0;
-
-  limit_damping = 0;
-  bounding_box = 0;
-  curvature_model = MathExtraSuperellipsoids::CURV_MEAN;    // Default to Mean curvature
-
-  for (int iarg = 6; iarg < narg; iarg++) {
-    if (strcmp(arg[iarg], "limit_damping") == 0)
-      limit_damping = 1;
-    else if (strcmp(arg[iarg], "bounding_box") == 0)
-      bounding_box = 1;
-    else if (strcmp(arg[iarg], "geometric") == 0)
-      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-    else if (strcmp(arg[iarg], "curvature_gaussian") == 0)
-      curvature_model = MathExtraSuperellipsoids::CURV_GAUSSIAN;
-    else
-      error->all(FLERR, "Illegal pair_style command");
-  }
-
-  size_history = 8;    // reset to default size
-  if (bounding_box == 0) size_history--;
-
-  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
-      dampflag < 0 || dampflag > 1)
-    error->all(FLERR, "Illegal pair_style command");
-
-  // convert Kn and Kt from pressure units to force/distance^2
-
-  kn /= force->nktv2p;
-  kt /= force->nktv2p;
-}
-
-/* ---------------------------------------------------------------------- */
-
-double PairGranHertzHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double /*factor_lj*/,
-                                             double &fforce)
-{
-  double radi, radj, radsum;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double mi, mj, meff, damp, ccel;
-  double vtr1, vtr2, vtr3, vrel, shrmag, polyhertz;
-  double fs1, fs2, fs3, fs, fn;
-
-  double *radius = atom->radius;
-  radi = radius[i];
-  radj = radius[j];
-  radsum = radi + radj;
-
-  double **x = atom->x;
-
-  // history effects
-  // neighprev = index of found neigh on previous call
-  // search entire jnum list of neighbors of I for neighbor J
-  // start from neighprev, since will typically be next neighbor
-  // reset neighprev to 0 as necessary
-  int jnum = list->numneigh[i];
-  int *jlist = list->firstneigh[i];
-  int *touch = fix_history->firstflag[i];
-  double *allhistory = fix_history->firstvalue[i];
-  for (int jj = 0; jj < jnum; jj++) {
-    neighprev++;
-    if (neighprev >= jnum) neighprev = 0;
-    if (jlist[neighprev] == j) break;
-  }
-
-  if (rsq >= radsum * radsum) {
-    fforce = 0.0;
-    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-    return 0.0;
-  }
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
-  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-  bool skip_contact_detection(false);
-  if (bounding_box) {
-    int cached_axis =
-        (int) (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
-    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(x[i], Ri, shapei, x[j],
-                                                                           Rj, shapej, cached_axis);
-    if (new_axis != -1) skip_contact_detection = true;
-    if (skip_contact_detection) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-  }
-  // superellipsoid contact detection between atoms i and j
-  double X0[4], nij[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-  flagi = bonus[ellipsoid[i]].type;
-  flagj = bonus[ellipsoid[j]].type;
-  double *X0_prev = &allhistory[3 + size_history * neighprev];
-  if (touch[neighprev] == 1) {
-    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-    // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0] + x[ref_index][0];
-    X0[1] = X0_prev[1] + x[ref_index][1];
-    X0[2] = X0_prev[2] + x[ref_index][2];
-    X0[3] = X0_prev[3];
-    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
-                                                                   x[j], Rj, shapej, blockj, flagj,
-                                                                   X0, nij, contact_formulation);
-    if (status == 1) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-    if (status != 0)
-      error->all(FLERR,
-                 "Ellipsoid contact detection (old contact) failed"
-                 "between particle {} and particle {}",
-                 atom->tag[i], atom->tag[j]);
-  } else {
-    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-      shapei[0] = shapei[1] = shapei[2] = reqi;
-      shapej[0] = shapej[1] = shapej[2] = reqj;
-      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-      // force ellipsoid flag for first initial guess iteration.
-      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-      int status = MathExtraSuperellipsoids::determine_contact_point(
-          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-          X0, nij, contact_formulation);
-      if (status == 1) {
-        fforce = 0.0;
-        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-        return 0.0;
-      }
-      if (status != 0)
-        error->all(FLERR,
-                   "Ellipsoid contact detection (new contact) failed"
-                   "between particle {} and particle {}",
-                   atom->tag[i], atom->tag[j]);
-    }
-  }
-  double overlap1, overlap2, omegai[3], omegaj[3];
-  double nji[3] = {-nij[0], -nij[1], -nij[2]};
-  overlap1 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-  overlap2 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
-
-  double surf_point_i[3], surf_point_j[3], curvature_i, curvature_j;
-  MathExtra::scaleadd3(overlap1, nij, X0, surf_point_i);
-  MathExtra::scaleadd3(overlap2, nji, X0, surf_point_j);
-
-  if (curvature_model == MathExtraSuperellipsoids::CURV_MEAN) {
-    curvature_i = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapei, blocki, flagi, Ri,
-                                                                          surf_point_i, x[i]);
-    curvature_j = MathExtraSuperellipsoids::mean_curvature_superellipsoid(shapej, blockj, flagj, Rj,
-                                                                          surf_point_j, x[j]);
-  } else {
-    curvature_i = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-        shapei, blocki, flagi, Ri, surf_point_i, x[i]);
-    curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
-        shapej, blockj, flagj, Rj, surf_point_j, x[j]);
-  }
-
-  polyhertz = sqrt((overlap1 + overlap2) /
-                   (curvature_i + curvature_j));    // hertzian contact radius approximation
-
-  double cr1[3], cr2[3];
-  MathExtra::sub3(X0, x[i], cr1);
-  MathExtra::sub3(X0, x[j], cr2);
-
-  double ex_space[3], ey_space[3], ez_space[3];
-  double **angmom = atom->angmom;
-  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
-                             omegai);
-  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
-                             omegaj);
-
-  double omega_cross_r1[3], omega_cross_r2[3];
-  MathExtra::cross3(omegai, cr1, omega_cross_r1);
-  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-  // relative translational velocity
-  // compute directly the sum of relative translational velocity at contact point
-  // since rotational velocity contribution is different for superellipsoids
-
-  double **v = atom->v;
-  double cv1[3], cv2[3];
-
-  cv1[0] = v[i][0] + omega_cross_r1[0];
-  cv1[1] = v[i][1] + omega_cross_r1[1];
-  cv1[2] = v[i][2] + omega_cross_r1[2];
-
-  cv2[0] = v[j][0] + omega_cross_r2[0];
-  cv2[1] = v[j][1] + omega_cross_r2[1];
-  cv2[2] = v[j][2] + omega_cross_r2[2];
-
-  // total relavtive velocity at contact point
-
-  vr1 = cv1[0] - cv2[0];
-  vr2 = cv1[1] - cv2[1];
-  vr3 = cv1[2] - cv2[2];
-
-  // normal component
-
-  vn1 = nij[0] * vr1;    // dot product
-  vn2 = nij[1] * vr2;
-  vn3 = nij[2] * vr3;
-
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitude
-
-  // tangential component
-
-  vtr1 = vr1 - vnnr * nij[0];
-  vtr2 = vr2 - vnnr * nij[1];
-  vtr3 = vr3 - vnnr * nij[2];
-
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
-
-  // meff = effective mass of pair of particles
-  // if I or J part of rigid body, use body mass
-  // if I or J is frozen, meff is other particle
-  double *rmass = atom->rmass;
-  int *mask = atom->mask;
-
-  mi = rmass[i];
-  mj = rmass[j];
-  if (fix_rigid) {
-    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-  }
-
-  meff = mi * mj / (mi + mj);
-  if (mask[i] & freeze_group_bit) meff = mj;
-  if (mask[j] & freeze_group_bit) meff = mi;
-
-  // normal forces = Hookian contact + normal velocity damping
-
-  damp = meff * gamman * vnnr;
-  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-  ccel *= polyhertz;
-  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-  double *shear = &allhistory[size_history * neighprev];
-  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-  // tangential forces = shear + tangential velocity damping
-
-  fs1 = -polyhertz * (kt * shear[0] + meff * gammat * vtr1);
-  fs2 = -polyhertz * (kt * shear[1] + meff * gammat * vtr2);
-  fs3 = -polyhertz * (kt * shear[2] + meff * gammat * vtr3);
-
-  // rescale frictional displacements and forces if needed
-
-  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  fn = xmu * fabs(ccel);
-
-  if (fs > fn) {
-    if (shrmag != 0.0) {
-      fs1 *= fn / fs;
-      fs2 *= fn / fs;
-      fs3 *= fn / fs;
-      fs *= fn / fs;
-    } else
-      fs1 = fs2 = fs3 = 0.0;
-  }
-
-  // set force (normalized by r) and return no energy
-
-  fforce = ccel / sqrt(rsq);
-
-  // set single_extra quantities
-
-  svector[0] = fs1;
-  svector[1] = fs2;
-  svector[2] = fs3;
-  svector[3] = fs;
-  svector[4] = vn1;
-  svector[5] = vn2;
-  svector[6] = vn3;
-  svector[7] = vtr1;
-  svector[8] = vtr2;
-  svector[9] = vtr3;
-
-  return 0.0;
-}
diff --git a/src/pair_gran_hertz_history_ellipsoid.h b/src/pair_gran_hertz_history_ellipsoid.h
deleted file mode 100644
index b02f6b733e7..00000000000
--- a/src/pair_gran_hertz_history_ellipsoid.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#ifdef PAIR_CLASS
-// clang-format off
-PairStyle(gran/hertz/history/ellipsoid,PairGranHertzHistoryEllipsoid);
-// clang-format on
-#else
-
-#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
-#define LMP_PAIR_GRAN_HERTZ_HISTORY_ELLIPSOID_H
-
-#include "pair_gran_hooke_history_ellipsoid.h"
-
-namespace LAMMPS_NS {
-
-class PairGranHertzHistoryEllipsoid : public PairGranHookeHistoryEllipsoid {
- public:
-  PairGranHertzHistoryEllipsoid(class LAMMPS *);
-  void compute(int, int) override;
-  void settings(int, char **) override;
-  double single(int, int, int, int, double, double, double, double &) override;
-
- protected:
-  int curvature_model;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/pair_gran_hooke_history_ellipsoid.cpp b/src/pair_gran_hooke_history_ellipsoid.cpp
deleted file mode 100644
index 01a6c4b0343..00000000000
--- a/src/pair_gran_hooke_history_ellipsoid.cpp
+++ /dev/null
@@ -1,1091 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#include "pair_gran_hooke_history_ellipsoid.h"
-
-#include "atom.h"
-#include "atom_vec_ellipsoid.h"
-#include "comm.h"
-#include "error.h"
-#include "fix.h"
-#include "fix_dummy.h"
-#include "fix_neigh_history.h"
-#include "force.h"
-#include "math_extra.h"    // probably needed for some computations
-#include "math_extra_superellipsoids.h"
-#include "memory.h"
-#include "modify.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-#include "update.h"
-#include <iostream>
-
-#include <cmath>
-#include <cstring>
-
-using namespace LAMMPS_NS;
-
-static constexpr int NUMSTEP_INITIAL_GUESS = 5;
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHookeHistoryEllipsoid::PairGranHookeHistoryEllipsoid(LAMMPS *lmp) : Pair(lmp)
-{
-  single_enable = 1;
-  no_virial_fdotr_compute = 1;
-  centroidstressflag = CENTROID_NOTAVAIL;
-  finitecutflag = 1;
-  use_history = 1;
-  size_history =
-      8;    // shear[3], contact_point_and_Lagrange_multiplier[4], bounding_box_separating_axis_index
-
-  single_extra = 10;
-  svector = new double[10];
-
-  neighprev = 0;
-
-  nmax = 0;
-  mass_rigid = nullptr;
-
-  // set comm size needed by this Pair if used with fix rigid
-
-  comm_forward = 1;
-
-  // keep default behavior of history[i][j] = -history[j][i]
-
-  nondefault_history_transfer = 1;
-
-  // create dummy fix as placeholder for FixNeighHistory
-  // this is so final order of Modify:fix will conform to input script
-
-  fix_history = nullptr;
-  fix_dummy = dynamic_cast<FixDummy *>(
-      modify->add_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me) + " all DUMMY"));
-
-  contact_formulation = MathExtraSuperellipsoids::FORMULATION_ALGEBRAIC;
-}
-
-/* ---------------------------------------------------------------------- */
-
-PairGranHookeHistoryEllipsoid::~PairGranHookeHistoryEllipsoid()
-{
-  if (copymode) return;
-
-  delete[] svector;
-
-  if (!fix_history)
-    modify->delete_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me));
-  else
-    modify->delete_fix("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me));
-
-  if (allocated) {
-    memory->destroy(setflag);
-    memory->destroy(cutsq);
-
-    delete[] onerad_dynamic;
-    delete[] onerad_frozen;
-    delete[] maxrad_dynamic;
-    delete[] maxrad_frozen;
-  }
-
-  memory->destroy(mass_rigid);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::compute(int eflag, int vflag)
-{
-  int i, j, ii, jj, inum, jnum;
-  double xtmp, ytmp, ztmp, delx, dely, delz, fx, fy, fz;
-  double radi, radj, radsum, rsq, r, rinv, rsqinv, factor_lj;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3;
-  double vtr1, vtr2, vtr3, vrel;
-  double mi, mj, meff, damp, ccel, tor1, tor2, tor3;
-  double fn, fs, fs1, fs2, fs3;
-  double shrmag, rsht;
-  int *ilist, *jlist, *numneigh, **firstneigh;
-  int *touch, **firsttouch;
-  double *shear, *X0_prev, *history, *allhistory, **firsthistory;
-
-  double X0[4], nij[3], shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3], overlap1,
-      overlap2, omegai[3], omegaj[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-
-  ev_init(eflag, vflag);
-
-  int shearupdate = 1;
-  if (update->setupflag) shearupdate = 0;
-
-  // update rigid body info for owned & ghost atoms if using FixRigid masses
-  // body[i] = which body atom I is in, -1 if none
-  // mass_body = mass of each rigid body
-
-  if (fix_rigid && neighbor->ago == 0) {
-    int tmp;
-    int *body = (int *) fix_rigid->extract("body", tmp);
-    auto *mass_body = (double *) fix_rigid->extract("masstotal", tmp);
-    if (atom->nmax > nmax) {
-      memory->destroy(mass_rigid);
-      nmax = atom->nmax;
-      memory->create(mass_rigid, nmax, "pair:mass_rigid");
-    }
-    int nlocal = atom->nlocal;
-    for (i = 0; i < nlocal; i++)
-      if (body[i] >= 0)
-        mass_rigid[i] = mass_body[body[i]];
-      else
-        mass_rigid[i] = 0.0;
-    comm->forward_comm(this);
-  }
-
-  double **x = atom->x;
-  double **v = atom->v;
-  double **f = atom->f;
-  double **angmom = atom->angmom;
-  double **torque = atom->torque;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
-  int newton_pair = force->newton_pair;
-  double *special_lj = force->special_lj;
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-
-  inum = list->inum;
-  ilist = list->ilist;
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
-  firsttouch = fix_history->firstflag;
-  firsthistory = fix_history->firstvalue;
-
-  // loop over neighbors of my atoms
-
-  for (ii = 0; ii < inum; ii++) {
-    i = ilist[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-
-    touch = firsttouch[i];
-    allhistory = firsthistory[i];
-    jlist = firstneigh[i];
-    jnum = numneigh[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
-      j &= NEIGHMASK;
-
-      if (factor_lj == 0) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      radj = radius[j];
-      radsum = radi + radj;
-
-      X0_prev = &allhistory[3 + size_history * jj];
-      int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-
-      // TODO: Below could be a `touch()` function
-      bool touching = false;
-      if (rsq >= radsum * radsum) {
-        touching = false;
-      } else {
-        MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-        MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-        MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-        MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-        MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-        MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-        bool skip_contact_detection(false);
-        if (bounding_box) {
-          int cached_axis = (int) (allhistory[7 + size_history * jj]);
-          int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(
-              x[i], Ri, shapei, x[j], Rj, shapej, cached_axis);
-          if (new_axis != -1) {
-            skip_contact_detection = true;
-            allhistory[7 + size_history * jj] = (double) new_axis;
-          }
-        }
-        if (skip_contact_detection)
-          touching = false;
-        else {
-          // superellipsoid contact detection between atoms i and j
-          flagi = bonus[ellipsoid[i]].type;
-          flagj = bonus[ellipsoid[j]].type;
-          if (touch[jj] == 1) {
-            // Continued contact: use grain true shape and last contact point with respect to grain i
-            X0[0] = x[ref_index][0] + X0_prev[0];
-            X0[1] = x[ref_index][1] + X0_prev[1];
-            X0[2] = x[ref_index][2] + X0_prev[2];
-            X0[3] = X0_prev[3];
-            // std::cout << "Using old contact point as initial guess between particle " << atom->tag[i] << " and particle " << atom->tag[j] << " : "
-            //           << X0[0] << " " << X0[1] << " " << X0[2] << " Lagrange multiplier mu^2: " << X0[3] << std::endl;
-            int status = MathExtraSuperellipsoids::determine_contact_point(
-                x[i], Ri, shapei, blocki, flagi, x[j], Rj, shapej, blockj, flagj, X0, nij,
-                contact_formulation);
-            if (status == 0)
-              touching = true;
-            else if (status == 1)
-              touching = false;
-            else {
-              error->warning(FLERR,
-                             "Ellipsoid contact detection (old contact) failed "
-                             "between particle {} and particle {} ",
-                             atom->tag[i], atom->tag[j]);
-            }
-          } else {
-            // New contact: Build initial guess incrementally by morphing the particles from spheres to actual shape
-
-            // There might be better heuristic for the "volume equivalent spheres" suggested in the paper
-            // but this is good enough. We might even be able to use radi and radj which is cheaper
-            // MathExtra::scaleadd3(radj / radsum, x[i], radi /radsum, x[j], X0);
-
-            double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-            double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-            MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-            X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-            for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-              double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-              shapei[0] = shapei[1] = shapei[2] = reqi;
-              shapej[0] = shapej[1] = shapej[2] = reqj;
-              MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-              MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-              blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-              blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-              blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-              blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-              // force ellipsoid flag for first initial guess iteration.
-              // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-              int status = MathExtraSuperellipsoids::determine_contact_point(
-                  x[i], Ri, shapei, blocki,
-                  iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi, x[j], Rj, shapej,
-                  blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj, X0, nij,
-                  contact_formulation);
-
-              if (status == 0)
-                touching = true;
-              else if (status == 1)
-                touching = false;
-              else if (iter_ig == NUMSTEP_INITIAL_GUESS) {
-                // keep trying until last iteration to avoid erroring out too early
-                error->warning(FLERR,
-                               "Ellipsoid contact detection (new contact) failed"
-                               "between particle {} and particle {}",
-                               atom->tag[i], atom->tag[j]);
-              }
-            }
-          }
-        }
-      }
-
-      if (!touching) {
-        // unset non-touching neighbors
-
-        touch[jj] = 0;
-        history = &allhistory[size_history * jj];
-        for (int k = 0; k < size_history; k++) {
-          if (bounding_box && k == 7) continue;    // Do not delete cached axis information
-          history[k] = 0.0;
-        }
-      } else {
-        // Store contact point with respect to grain i for next time step
-        // This is crucial for periodic BCs when grains can move by large amount in one time step
-        // Keeping the previous contact point relative to global frame would lead to bad initial guess
-        X0_prev[0] = X0[0] - x[ref_index][0];
-        X0_prev[1] = X0[1] - x[ref_index][1];
-        X0_prev[2] = X0[2] - x[ref_index][2];
-        X0_prev[3] = X0[3];
-
-        double nji[3] = {-nij[0], -nij[1], -nij[2]};
-        // compute overlap depth along normal direction for each grain
-        // overlap is positive for both grains
-        overlap1 = MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0,
-                                                                      nij, x[i]);
-        overlap2 = MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0,
-                                                                      nji, x[j]);
-
-        // branch vectors
-        double cr1[3], cr2[3];
-        MathExtra::sub3(X0, x[i], cr1);
-        MathExtra::sub3(X0, x[j], cr2);
-
-        // we need to take the cross product of omega
-
-        double ex_space[3], ey_space[3], ez_space[3];
-        MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[i]].inertia, omegai);
-        MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-        MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space,
-                                   bonus[ellipsoid[j]].inertia, omegaj);
-
-        double omega_cross_r1[3], omega_cross_r2[3];
-        MathExtra::cross3(omegai, cr1, omega_cross_r1);
-        MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-        // relative translational velocity
-        // compute directly the sum of relative translational velocity at contact point
-        // since rotational velocity contribution is different for superellipsoids
-        double cv1[3], cv2[3];
-
-        cv1[0] = v[i][0] + omega_cross_r1[0];
-        cv1[1] = v[i][1] + omega_cross_r1[1];
-        cv1[2] = v[i][2] + omega_cross_r1[2];
-
-        cv2[0] = v[j][0] + omega_cross_r2[0];
-        cv2[1] = v[j][1] + omega_cross_r2[1];
-        cv2[2] = v[j][2] + omega_cross_r2[2];
-
-        // total relavtive velocity at contact point
-        vr1 = cv1[0] - cv2[0];
-        vr2 = cv1[1] - cv2[1];
-        vr3 = cv1[2] - cv2[2];
-
-        // normal component
-
-        vn1 = nij[0] * vr1;    // dot product
-        vn2 = nij[1] * vr2;
-        vn3 = nij[2] * vr3;
-
-        vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
-
-        // tangential component
-
-        vtr1 = vr1 - vnnr * nij[0];
-        vtr2 = vr2 - vnnr * nij[1];
-        vtr3 = vr3 - vnnr * nij[2];
-
-        vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-        vrel = sqrt(vrel);
-
-        // meff = effective mass of pair of particles
-        // if I or J part of rigid body, use body mass
-        // if I or J is frozen, meff is other particle
-
-        mi = rmass[i];
-        mj = rmass[j];
-        if (fix_rigid) {
-          if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-          if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-        }
-
-        meff = mi * mj / (mi + mj);
-        if (mask[i] & freeze_group_bit) meff = mj;
-        if (mask[j] & freeze_group_bit) meff = mi;
-
-        // normal forces = Hookian contact + normal velocity damping
-
-        damp = meff * gamman * vnnr;
-        ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-        if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-        // shear history effects
-
-        touch[jj] = 1;
-        shear = &allhistory[size_history * jj];
-
-        if (shearupdate) {
-          shear[0] += vtr1 * dt;
-          shear[1] += vtr2 * dt;
-          shear[2] += vtr3 * dt;
-        }
-        shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-        if (shearupdate) {
-
-          // rotate shear displacements
-
-          rsht = shear[0] * nij[0] + shear[1] * nij[1] + shear[2] * nij[2];
-          shear[0] -= rsht * nij[0];
-          shear[1] -= rsht * nij[1];
-          shear[2] -= rsht * nij[2];
-        }
-
-        // tangential forces = shear + tangential velocity damping
-
-        fs1 = -(kt * shear[0] + meff * gammat * vtr1);
-        fs2 = -(kt * shear[1] + meff * gammat * vtr2);
-        fs3 = -(kt * shear[2] + meff * gammat * vtr3);
-
-        // rescale frictional displacements and forces if needed
-
-        fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-        fn = xmu * fabs(ccel);
-
-        if (fs > fn) {
-          if (shrmag != 0.0) {
-            shear[0] =
-                (fn / fs) * (shear[0] + meff * gammat * vtr1 / kt) - meff * gammat * vtr1 / kt;
-            shear[1] =
-                (fn / fs) * (shear[1] + meff * gammat * vtr2 / kt) - meff * gammat * vtr2 / kt;
-            shear[2] =
-                (fn / fs) * (shear[2] + meff * gammat * vtr3 / kt) - meff * gammat * vtr3 / kt;
-            fs1 *= fn / fs;
-            fs2 *= fn / fs;
-            fs3 *= fn / fs;
-          } else
-            fs1 = fs2 = fs3 = 0.0;
-        }
-
-        // forces & torques
-
-        fx = nji[0] * ccel + fs1;
-        fy = nji[1] * ccel + fs2;
-        fz = nji[2] * ccel + fs3;
-        fx *= factor_lj;    // I think factor lj is just 1 except for special bonds
-        fy *= factor_lj;
-        fz *= factor_lj;
-        f[i][0] += fx;
-        f[i][1] += fy;
-        f[i][2] += fz;
-        // torques are cross prodcuts of branch vector with the entire force at contact point
-
-        tor1 = cr1[1] * fz - cr1[2] * fy;
-        tor2 = cr1[2] * fx - cr1[0] * fz;
-        tor3 = cr1[0] * fy - cr1[1] * fx;
-
-        torque[i][0] += tor1;
-        torque[i][1] += tor2;
-        torque[i][2] += tor3;
-
-        if (newton_pair || j < nlocal) {
-          f[j][0] -= fx;
-          f[j][1] -= fy;
-          f[j][2] -= fz;
-
-          tor1 = cr2[1] * fz - cr2[2] * fy;
-          tor2 = cr2[2] * fx - cr2[0] * fz;
-          tor3 = cr2[0] * fy - cr2[1] * fx;
-
-          torque[j][0] -= tor1;
-          torque[j][1] -= tor2;
-          torque[j][2] -= tor3;
-        }
-
-        if (evflag)
-          ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fx, fy, fz, delx, dely,
-                       delz);    // Correct even for non-spherical particles
-      }
-    }
-  }
-
-  if (vflag_fdotr) virial_fdotr_compute();
-}
-
-/* ----------------------------------------------------------------------
-   allocate all arrays
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::allocate()
-{
-  allocated = 1;
-  int n = atom->ntypes;
-
-  memory->create(setflag, n + 1, n + 1, "pair:setflag");
-  for (int i = 1; i <= n; i++)
-    for (int j = i; j <= n; j++) setflag[i][j] = 0;
-
-  memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
-
-  onerad_dynamic = new double[n + 1];
-  onerad_frozen = new double[n + 1];
-  maxrad_dynamic = new double[n + 1];
-  maxrad_frozen = new double[n + 1];
-}
-
-/* ----------------------------------------------------------------------
-   global settings
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::settings(int narg, char **arg)
-{
-  if (narg < 6) error->all(FLERR, "Illegal pair_style command");
-
-  kn = utils::numeric(FLERR, arg[0], false, lmp);
-  if (strcmp(arg[1], "NULL") == 0)
-    kt = kn * 2.0 / 7.0;
-  else
-    kt = utils::numeric(FLERR, arg[1], false, lmp);
-
-  gamman = utils::numeric(FLERR, arg[2], false, lmp);
-  if (strcmp(arg[3], "NULL") == 0)
-    gammat = 0.5 * gamman;
-  else
-    gammat = utils::numeric(FLERR, arg[3], false, lmp);
-
-  xmu = utils::numeric(FLERR, arg[4], false, lmp);
-  dampflag = utils::inumeric(FLERR, arg[5], false, lmp);
-  if (dampflag == 0) gammat = 0.0;
-
-  limit_damping = 0;
-  bounding_box = 0;
-  for (int iarg = 6; iarg < narg; iarg++) {
-    if (strcmp(arg[iarg], "limit_damping") == 0)
-      limit_damping = 1;
-    else if (strcmp(arg[iarg], "bounding_box") == 0)
-      bounding_box = 1;
-    else if (strcmp(arg[iarg], "geometric") == 0)
-      contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;
-    else
-      error->all(FLERR, "Illegal pair_style command");
-  }
-
-  size_history = 8;    // reset to default for safety
-  if (bounding_box == 0) size_history--;
-
-  if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 ||
-      dampflag < 0 || dampflag > 1)
-    error->all(FLERR, "Illegal pair_style command");
-}
-
-/* ----------------------------------------------------------------------
-   set coeffs for one or more type pairs
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::coeff(int narg, char **arg)
-{
-  if (narg > 2) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
-  if (!allocated) allocate();
-
-  int ilo, ihi, jlo, jhi;
-  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
-  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
-
-  int count = 0;
-  for (int i = ilo; i <= ihi; i++) {
-    for (int j = MAX(jlo, i); j <= jhi; j++) {
-      setflag[i][j] = 1;
-      count++;
-    }
-  }
-
-  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients" + utils::errorurl(21));
-}
-
-/* ----------------------------------------------------------------------
-   init specific to this pair style
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::init_style()
-{
-  int i;
-
-  // error and warning checks
-
-  if (!atom->radius_flag || !atom->rmass_flag || !atom->angmom_flag || !atom->superellipsoid_flag)
-    error->all(FLERR,
-               "Pair gran/h/ellipsoid* requires atom attributes radius, rmass, angmom and "
-               "superellipdoid flag");
-  if (comm->ghost_velocity == 0)
-    error->all(FLERR, "Pair gran/h/ellipsoid* requires ghost atoms store velocity");
-
-  // ensure all atoms have an allocated ellipsoid bonus structure (ellipsoidflag > 0)
-  int *ellipsoid = atom->ellipsoid;
-  if (!ellipsoid) error->all(FLERR, "Pair gran/h/ellipsoid* requires atom style ellipsoid");
-
-  int nlocal = atom->nlocal;
-  for (i = 0; i < nlocal; i++) {
-    if (ellipsoid[i] < 0) {
-      error->one(FLERR, "Pair gran/h/ellipsoid* requires all atoms to have ellipsoidflag = 1");
-    }
-  }
-
-  // need a granular neighbor list
-
-  if (use_history)
-    neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_HISTORY);
-  else
-    neighbor->add_request(this, NeighConst::REQ_SIZE);
-
-  dt = update->dt;
-
-  // if history is stored and first init, create Fix to store history
-  // it replaces FixDummy, created in the constructor
-  // this is so its order in the fix list is preserved
-
-  if (use_history && (fix_history == nullptr)) {
-    auto cmd =
-        fmt::format("NEIGH_HISTORY_HH_ELL{} all NEIGH_HISTORY {}", instance_me, size_history);
-    fix_history = dynamic_cast<FixNeighHistory *>(
-        modify->replace_fix("NEIGH_HISTORY_HH_ELL_DUMMY" + std::to_string(instance_me), cmd, 1));
-    fix_history->pair = this;
-  }
-
-  // check for FixFreeze and set freeze_group_bit
-
-  auto fixlist = modify->get_fix_by_style("^freeze");
-  if (fixlist.size() == 0)
-    freeze_group_bit = 0;
-  else if (fixlist.size() > 1)
-    error->all(FLERR, "Only one fix freeze command at a time allowed");
-  else
-    freeze_group_bit = fixlist.front()->groupbit;
-
-  // check for FixRigid so can extract rigid body masses
-
-  fix_rigid = nullptr;
-  for (const auto &ifix : modify->get_fix_list()) {
-    if (ifix->rigid_flag) {
-      if (fix_rigid)
-        error->all(FLERR, "Only one fix rigid command at a time allowed");
-      else
-        fix_rigid = ifix;
-    }
-  }
-
-  // check for FixPour and FixDeposit so can extract particle radii
-
-  auto pours = modify->get_fix_by_style("^pour");
-  auto deps = modify->get_fix_by_style("^deposit");
-
-  // set maxrad_dynamic and maxrad_frozen for each type
-  // include future FixPour and FixDeposit particles as dynamic
-
-  int itype;
-  for (i = 1; i <= atom->ntypes; i++) {
-    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
-    for (auto &ipour : pours) {
-      itype = i;
-      double maxrad = *((double *) ipour->extract("radius", itype));
-      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
-    }
-    for (auto &idep : deps) {
-      itype = i;
-      double maxrad = *((double *) idep->extract("radius", itype));
-      if (maxrad > 0.0) onerad_dynamic[i] = maxrad;
-    }
-  }
-
-  // since for ellipsoids radius is the maximum of the three axes, no need to change this part
-
-  double *radius = atom->radius;
-  int *mask = atom->mask;
-  int *type = atom->type;
-
-  for (i = 0; i < nlocal; i++) {
-    if (mask[i] & freeze_group_bit)
-      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]], radius[i]);
-    else
-      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
-  }
-
-  MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
-  MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
-
-  // set fix which stores history info
-
-  if (use_history) {
-    fix_history = dynamic_cast<FixNeighHistory *>(
-        modify->get_fix_by_id("NEIGH_HISTORY_HH_ELL" + std::to_string(instance_me)));
-    if (!fix_history) error->all(FLERR, "Could not find pair fix neigh history ID");
-  }
-}
-
-/* ----------------------------------------------------------------------
-   init for one type pair i,j and corresponding j,i
-------------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::init_one(int i, int j)
-{
-  if (!allocated) allocate();
-
-  // cutoff = sum of max I,J radii for
-  // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
-
-  double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
-  cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
-  cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
-  return cutoff;
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::write_restart(FILE *fp)
-{
-  write_restart_settings(fp);
-
-  int i, j;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) fwrite(&setflag[i][j], sizeof(int), 1, fp);
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::read_restart(FILE *fp)
-{
-  read_restart_settings(fp);
-  allocate();
-
-  int i, j;
-  int me = comm->me;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) {
-      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
-      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
-    }
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::write_restart_settings(FILE *fp)
-{
-  fwrite(&kn, sizeof(double), 1, fp);
-  fwrite(&kt, sizeof(double), 1, fp);
-  fwrite(&gamman, sizeof(double), 1, fp);
-  fwrite(&gammat, sizeof(double), 1, fp);
-  fwrite(&xmu, sizeof(double), 1, fp);
-  fwrite(&dampflag, sizeof(int), 1, fp);
-}
-
-/* ----------------------------------------------------------------------
-  proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::read_restart_settings(FILE *fp)
-{
-  if (comm->me == 0) {
-    utils::sfread(FLERR, &kn, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &kt, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &gamman, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &gammat, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &xmu, sizeof(double), 1, fp, nullptr, error);
-    utils::sfread(FLERR, &dampflag, sizeof(int), 1, fp, nullptr, error);
-  }
-  MPI_Bcast(&kn, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&kt, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&gamman, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&gammat, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&xmu, 1, MPI_DOUBLE, 0, world);
-  MPI_Bcast(&dampflag, 1, MPI_INT, 0, world);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::reset_dt()
-{
-  dt = update->dt;
-}
-
-/* ---------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::single(int i, int j, int /*itype*/, int /*jtype*/, double rsq,
-                                             double /*factor_coul*/, double /*factor_lj*/,
-                                             double &fforce)
-{
-  double radi, radj, radsum;
-  double vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
-  double mi, mj, meff, damp, ccel;
-  double vtr1, vtr2, vtr3, vrel, shrmag;
-  double fs1, fs2, fs3, fs, fn;
-
-  double *radius = atom->radius;
-  radi = radius[i];
-  radj = radius[j];
-  radsum = radi + radj;
-
-  double **x = atom->x;
-
-  // history effects
-  // neighprev = index of found neigh on previous call
-  // search entire jnum list of neighbors of I for neighbor J
-  // start from neighprev, since will typically be next neighbor
-  // reset neighprev to 0 as necessary
-  int jnum = list->numneigh[i];
-  int *jlist = list->firstneigh[i];
-  int *touch = fix_history->firstflag[i];
-  double *allhistory = fix_history->firstvalue[i];
-  for (int jj = 0; jj < jnum; jj++) {
-    neighprev++;
-    if (neighprev >= jnum) neighprev = 0;
-    if (jlist[neighprev] == j) break;
-  }
-
-  if (rsq >= radsum * radsum) {
-    fforce = 0.0;
-    for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-    return 0.0;
-  }
-  auto avec_ellipsoid = dynamic_cast<AtomVecEllipsoid *>(atom->style_match("ellipsoid"));
-  AtomVecEllipsoid::BonusSuper *bonus = avec_ellipsoid->bonus_super;
-  int *ellipsoid = atom->ellipsoid;
-  double shapei[3], blocki[3], shapej[3], blockj[3], Ri[3][3], Rj[3][3];
-  MathExtra::copy3(bonus[ellipsoid[i]].shape, shapei);
-  MathExtra::copy3(bonus[ellipsoid[j]].shape, shapej);
-  MathExtra::copy3(bonus[ellipsoid[i]].block, blocki);
-  MathExtra::copy3(bonus[ellipsoid[j]].block, blockj);
-  MathExtra::quat_to_mat(bonus[ellipsoid[i]].quat, Ri);
-  MathExtra::quat_to_mat(bonus[ellipsoid[j]].quat, Rj);
-  bool skip_contact_detection = false;
-  if (bounding_box) {
-    int cached_axis =
-        (int) (allhistory[7 + size_history * neighprev]);    // Copy: no update of history in single
-    int new_axis = MathExtraSuperellipsoids::check_oriented_bounding_boxes(x[i], Ri, shapei, x[j],
-                                                                           Rj, shapej, cached_axis);
-    if (new_axis != -1) skip_contact_detection = true;
-    if (skip_contact_detection) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-  }
-  // superellipsoid contact detection between atoms i and j
-  double X0[4], nij[3];
-  AtomVecEllipsoid::BlockType flagi, flagj;
-  flagi = bonus[ellipsoid[i]].type;
-  flagj = bonus[ellipsoid[j]].type;
-  double *X0_prev = &allhistory[3 + size_history * neighprev];
-  if (touch[neighprev] == 1) {
-    int ref_index = (atom->tag[i] < atom->tag[j]) ? i : j;
-    // Continued contact: use grain true shape and last contact point
-    X0[0] = X0_prev[0] + x[ref_index][0];
-    X0[1] = X0_prev[1] + x[ref_index][1];
-    X0[2] = X0_prev[2] + x[ref_index][2];
-    X0[3] = X0_prev[3];
-    int status = MathExtraSuperellipsoids::determine_contact_point(x[i], Ri, shapei, blocki, flagi,
-                                                                   x[j], Rj, shapej, blockj, flagj,
-                                                                   X0, nij, contact_formulation);
-    if (status == 1) {
-      fforce = 0.0;
-      for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-      return 0.0;
-    }
-    if (status != 0)
-      error->warning(FLERR,
-                     "Ellipsoid contact detection (old contact) failed"
-                     "between particle {} and particle {}",
-                     atom->tag[i], atom->tag[j]);
-  } else {
-    double reqi = std::cbrt(shapei[0] * shapei[1] * shapei[2]);
-    double reqj = std::cbrt(shapej[0] * shapej[1] * shapej[2]);
-    MathExtra::scaleadd3(reqj / (reqi + reqj), x[i], reqi / (reqi + reqj), x[j], X0);
-    X0[3] = reqj / reqi;    // Lagrange multiplier mu^2
-    for (int iter_ig = 1; iter_ig <= NUMSTEP_INITIAL_GUESS; iter_ig++) {
-      double frac = iter_ig / double(NUMSTEP_INITIAL_GUESS);
-      shapei[0] = shapei[1] = shapei[2] = reqi;
-      shapej[0] = shapej[1] = shapej[2] = reqj;
-      MathExtra::scaleadd3(1.0 - frac, shapei, frac, bonus[ellipsoid[i]].shape, shapei);
-      MathExtra::scaleadd3(1.0 - frac, shapej, frac, bonus[ellipsoid[j]].shape, shapej);
-      blocki[0] = 2.0 + frac * (bonus[ellipsoid[i]].block[0] - 2.0);
-      blocki[1] = 2.0 + frac * (bonus[ellipsoid[i]].block[1] - 2.0);
-      blockj[0] = 2.0 + frac * (bonus[ellipsoid[j]].block[0] - 2.0);
-      blockj[1] = 2.0 + frac * (bonus[ellipsoid[j]].block[1] - 2.0);
-
-      // force ellipsoid flag for first initial guess iteration.
-      // Avoid incorrect values of n1/n2 - 2 in second derivatives.
-      int status = MathExtraSuperellipsoids::determine_contact_point(
-          x[i], Ri, shapei, blocki, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagi,
-          x[j], Rj, shapej, blockj, iter_ig == 1 ? AtomVecEllipsoid::BlockType::ELLIPSOID : flagj,
-          X0, nij, contact_formulation);
-      if (status == 1) {
-        fforce = 0.0;
-        for (int m = 0; m < single_extra; m++) svector[m] = 0.0;
-        return 0.0;
-      }
-      if (status != 0)
-        error->one(FLERR,
-                   "Ellipsoid contact detection (new contact) failed"
-                   "between particle {} and particle {}",
-                   atom->tag[i], atom->tag[j]);
-    }
-  }
-  double overlap1, overlap2, omegai[3], omegaj[3];
-  double nji[3] = {-nij[0], -nij[1], -nij[2]};
-  overlap1 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapei, blocki, Ri, flagi, X0, nij, x[i]);
-  overlap2 =
-      MathExtraSuperellipsoids::compute_overlap_distance(shapej, blockj, Rj, flagj, X0, nji, x[j]);
-
-  double cr1[3], cr2[3];
-  MathExtra::sub3(X0, x[i], cr1);
-  MathExtra::sub3(X0, x[j], cr2);
-
-  double ex_space[3], ey_space[3], ez_space[3];
-  double **angmom = atom->angmom;
-  MathExtra::q_to_exyz(bonus[ellipsoid[i]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[i], ex_space, ey_space, ez_space, bonus[ellipsoid[i]].inertia,
-                             omegai);
-  MathExtra::q_to_exyz(bonus[ellipsoid[j]].quat, ex_space, ey_space, ez_space);
-  MathExtra::angmom_to_omega(angmom[j], ex_space, ey_space, ez_space, bonus[ellipsoid[j]].inertia,
-                             omegaj);
-
-  double omega_cross_r1[3], omega_cross_r2[3];
-  MathExtra::cross3(omegai, cr1, omega_cross_r1);
-  MathExtra::cross3(omegaj, cr2, omega_cross_r2);
-
-  // relative translational velocity
-  // compute directly the sum of relative translational velocity at contact point
-  // since rotational velocity contribution is different for superellipsoids
-
-  double **v = atom->v;
-  double cv1[3], cv2[3];
-
-  cv1[0] = v[i][0] + omega_cross_r1[0];
-  cv1[1] = v[i][1] + omega_cross_r1[1];
-  cv1[2] = v[i][2] + omega_cross_r1[2];
-
-  cv2[0] = v[j][0] + omega_cross_r2[0];
-  cv2[1] = v[j][1] + omega_cross_r2[1];
-  cv2[2] = v[j][2] + omega_cross_r2[2];
-
-  // total relavtive velocity at contact point
-
-  vr1 = cv1[0] - cv2[0];
-  vr2 = cv1[1] - cv2[1];
-  vr3 = cv1[2] - cv2[2];
-
-  // normal component
-
-  vn1 = nij[0] * vr1;    // dot product
-  vn2 = nij[1] * vr2;
-  vn3 = nij[2] * vr3;
-
-  vnnr = vr1 * nij[0] + vr2 * nij[1] + vr3 * nij[2];    // magnitu
-
-  // tangential component
-
-  vtr1 = vr1 - vnnr * nij[0];
-  vtr2 = vr2 - vnnr * nij[1];
-  vtr3 = vr3 - vnnr * nij[2];
-
-  vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
-  vrel = sqrt(vrel);
-
-  // meff = effective mass of pair of particles
-  // if I or J part of rigid body, use body mass
-  // if I or J is frozen, meff is other particle
-  double *rmass = atom->rmass;
-  int *mask = atom->mask;
-
-  mi = rmass[i];
-  mj = rmass[j];
-  if (fix_rigid) {
-    if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
-    if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
-  }
-
-  meff = mi * mj / (mi + mj);
-  if (mask[i] & freeze_group_bit) meff = mj;
-  if (mask[j] & freeze_group_bit) meff = mi;
-
-  // normal forces = Hookian contact + normal velocity damping
-
-  damp = meff * gamman * vnnr;
-  ccel = kn * (overlap1 + overlap2) + damp;    // assuming we get the overlap depth
-  if (limit_damping && (ccel < 0.0)) ccel = 0.0;
-
-  double *shear = &allhistory[size_history * neighprev];
-  shrmag = sqrt(shear[0] * shear[0] + shear[1] * shear[1] + shear[2] * shear[2]);
-
-  // tangential forces = shear + tangential velocity damping
-
-  fs1 = -(kt * shear[0] + meff * gammat * vtr1);
-  fs2 = -(kt * shear[1] + meff * gammat * vtr2);
-  fs3 = -(kt * shear[2] + meff * gammat * vtr3);
-
-  // rescale frictional displacements and forces if needed
-
-  fs = sqrt(fs1 * fs1 + fs2 * fs2 + fs3 * fs3);
-  fn = xmu * fabs(ccel);
-
-  if (fs > fn) {
-    if (shrmag != 0.0) {
-      fs1 *= fn / fs;
-      fs2 *= fn / fs;
-      fs3 *= fn / fs;
-      fs *= fn / fs;
-    } else
-      fs1 = fs2 = fs3 = 0.0;
-  }
-
-  // set force (normalized by r) and return no energy
-
-  fforce = ccel / sqrt(rsq);
-
-  // set single_extra quantities
-
-  svector[0] = fs1;
-  svector[1] = fs2;
-  svector[2] = fs3;
-  svector[3] = fs;
-  svector[4] = vn1;
-  svector[5] = vn2;
-  svector[6] = vn3;
-  svector[7] = vtr1;
-  svector[8] = vtr2;
-  svector[9] = vtr3;
-
-  return 0.0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int PairGranHookeHistoryEllipsoid::pack_forward_comm(int n, int *list, double *buf,
-                                                     int /*pbc_flag*/, int * /*pbc*/)
-{
-  int i, j, m;
-
-  m = 0;
-  for (i = 0; i < n; i++) {
-    j = list[i];
-    buf[m++] = mass_rigid[j];
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairGranHookeHistoryEllipsoid::unpack_forward_comm(int n, int first, double *buf)
-{
-  int i, m, last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) mass_rigid[i] = buf[m++];
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local atom-based arrays
-------------------------------------------------------------------------- */
-
-double PairGranHookeHistoryEllipsoid::memory_usage()
-{
-  double bytes = (double) nmax * sizeof(double);
-  return bytes;
-}
-
-void PairGranHookeHistoryEllipsoid::transfer_history(double *source, double *target, int /*itype*/,
-                                                     int /*jtype*/)
-{
-  // Simple direct copy of all history variables (shear, contact point, axis)
-  for (int i = 0; i < size_history; i++) {
-    if (i < 3) target[i] = -source[i];    //shear
-    target[i] = source[i];
-  }
-}
diff --git a/src/pair_gran_hooke_history_ellipsoid.h b/src/pair_gran_hooke_history_ellipsoid.h
deleted file mode 100644
index e3bcf3e038c..00000000000
--- a/src/pair_gran_hooke_history_ellipsoid.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-/* ----------------------------------------------------------------------
-   Contributing author: Jacopo Bilotto (EPFL), Jibril B. Coulibaly
-------------------------------------------------------------------------- */
-
-#ifdef PAIR_CLASS
-// clang-format off
-PairStyle(gran/hooke/history/ellipsoid,PairGranHookeHistoryEllipsoid);
-// clang-format on
-#else
-
-#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
-#define LMP_PAIR_GRAN_HOOKE_HISTORY_ELLIPSOID_H
-
-#include "pair.h"
-
-namespace LAMMPS_NS {
-
-class PairGranHookeHistoryEllipsoid : public Pair {
- public:
-  PairGranHookeHistoryEllipsoid(class LAMMPS *);
-  ~PairGranHookeHistoryEllipsoid() override;
-  void compute(int, int) override;
-  void settings(int, char **) override;
-  void coeff(int, char **) override;
-  void init_style() override;
-  double init_one(int, int) override;
-  void write_restart(FILE *) override;
-  void read_restart(FILE *) override;
-  void write_restart_settings(FILE *) override;
-  void read_restart_settings(FILE *) override;
-  void reset_dt() override;
-  double single(int, int, int, int, double, double, double, double &) override;
-  int pack_forward_comm(int, int *, double *, int, int *) override;
-  void unpack_forward_comm(int, int, double *) override;
-  double memory_usage() override;
-  void transfer_history(double *, double *, int, int) override;
-
- protected:
-  double kn, kt, gamman, gammat, xmu;
-  int dampflag;
-  double dt;
-  int freeze_group_bit;
-  int use_history;
-  int limit_damping;
-  int bounding_box;
-
-  int neighprev;
-  double *onerad_dynamic, *onerad_frozen;
-  double *maxrad_dynamic, *maxrad_frozen;
-
-  int size_history;
-
-  class FixDummy *fix_dummy;
-  class FixNeighHistory *fix_history;
-
-  // storage of rigid body masses for use in granular interactions
-
-  class Fix *fix_rigid;    // ptr to rigid body fix, null pointer if none
-  double *mass_rigid;      // rigid mass for owned+ghost atoms
-  int nmax;                // allocated size of mass_rigid
-
-  int contact_formulation;
-
-  void allocate();
-
- private:
-  // Below not implemented. Placeholder if we decide not to compute local hessian in line search
-  static double
-  shape_and_gradient_local(const double *, const double *, const double *,
-                           double *);    // would return a vector of temporary variables
-  static double hessian_local(
-      const double *, const double *, const double *,
-      double *);    // would use the above vector of temporary variables to compute local hessian
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif

From 67667f43535342096c4313ecbbcf49fae4de1cd8 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 23 Mar 2026 09:15:19 +0100
Subject: [PATCH 157/174] Removed legacy pair gran ellipsoid doc

---
 doc/src/pair_gran_ellipsoid.rst | 401 --------------------------------
 1 file changed, 401 deletions(-)
 delete mode 100644 doc/src/pair_gran_ellipsoid.rst

diff --git a/doc/src/pair_gran_ellipsoid.rst b/doc/src/pair_gran_ellipsoid.rst
deleted file mode 100644
index a0bf8288f24..00000000000
--- a/doc/src/pair_gran_ellipsoid.rst
+++ /dev/null
@@ -1,401 +0,0 @@
-.. index:: pair_style gran/hooke/history/ellipsoid
-.. index:: pair_style gran/hertz/history/ellipsoid
-
-pair_style gran/hooke/history/ellipsoid command
-===============================================
-
-pair_style gran/hertz/history/ellipsoid command
-===============================================
-
-Syntax
-""""""
-
-.. code-block:: LAMMPS
-
-   pair_style style Kn Kt gamma_n gamma_t xmu dampflag keyword
-
-* style = *gran/hooke/history/ellipsoid* or *gran/hertz/history/ellipsoid*
-* Kn = elastic constant for normal particle repulsion (force/distance units or pressure units - see discussion below)
-* Kt = elastic constant for tangential contact (force/distance units or pressure units - see discussion below)
-* gamma_n = damping coefficient for collisions in normal direction (1/time units or 1/time-distance units - see discussion below)
-* gamma_t = damping coefficient for collisions in tangential direction (1/time units or 1/time-distance units - see discussion below)
-* xmu = static yield criterion (unitless value between 0.0 and 1.0e4)
-* dampflag = 0 or 1 if tangential damping force is excluded or included
-
-* keyword = *limit_damping*, *bounding_box*, *geometric*, *curvature_gaussian*
-
-  .. parsed-literal::
-
-      *limit_damping* value = none
-         limit damping to prevent attractive interaction
-      *bounding_box* value = none
-         use oriented bounding box in contact detection
-      *geometric* value = none
-         use geometric radial distance formulation for the contact formulation
-      *curvature_gaussian* value = none
-         use Gaussian curvature formulation for the contact detection (Hertz only)
-
-.. note::
-
-   Versions of LAMMPS before 9Jan09 had different style names for
-   granular force fields.  This is to emphasize the fact that the
-   Hertzian equation has changed to model polydispersity more accurately.
-   A side effect of the change is that the Kn, Kt, gamma_n, and gamma_t
-   coefficients in the pair_style command must be specified with
-   different values in order to reproduce calculations made with earlier
-   versions of LAMMPS, even for monodisperse systems.  See the NOTE below
-   for details.
-
-Examples
-""""""""
-
-.. code-block:: LAMMPS
-
-   pair_style gran/hooke/history/ellipsoid 200000.0 NULL 50.0 NULL 0.5 1 bounding_box
-   pair_style gran/hooke/history/ellipsoid 200000.0 70000.0 50.0 30.0 0.5 0 limit_damping
-   pair_style gran/hertz/history/ellipsoid 200000.0 70000.0 50.0 30.0 0.5 0 geometric curvature_gaussian
-
-Description
-"""""""""""
-
-The *gran/history/ellipsoid* styles allow granular contact
-between two superellipsoid particles whose surface is implicitly defined as:
-
-.. math::
-
-    f(\mathbf{x}) = \left(
-    \left|\frac{x}{a}\right|^{n_2} + \left|\frac{y}{b}\right|^{n_2}
-    \right)^{n_1 / n_2}
-    + \left|\frac{z}{c}\right|^{n_1} - 1 = 0
-
-for a point :math:`\mathbf{x} = (x, y, z)` where the coordinates are given
-in the principal directions of inertia of the particle.
-The half-diameters :math:`a`, :math:`b`, and :math:`c` correspond to the *shape*
-property, and the exponents :math:`n_1` and :math:`n_2` to the *block* property
-of the ellipsoid atom. See the doc page for the :doc:`set <set>` command for
-more details.
-
-.. note::
-   
-    The contact solver strictly requires convex particle shapes to ensure a mathematically
-    unique point of deepest penetration. Therefore, the blockiness parameters must be
-    :math:`n_1 \ge 2.0` and :math:`n_2 \ge 2.0`. Attempting to simulate concave or "pointy"
-    particles (:math:`n < 2.0`) will result in an error.
-
-.. note::
-
-    For particles with high blockiness exponents (:math:`n > 4.0`) involved in edge-to-edge
-    or corner-to-corner contacts, the surface normal vector varies rapidly over small
-    distances. The Newton solver may occasionally fail to converge to the strict gradient
-    alignment tolerance (typically :math:`10^{-10}`).
-    You may see warning messages in the log indicating that the solver returned a sub-optimal solution, 
-    but the simulation will proceed using this best-effort contact point.
-    Using the `geometric` keyword will help mitigate this issue. 
-
-Contact detection for these aspherical particles uses the so-called ''midway''
-minimization approach from :ref:`(Houlsby) <Houlsby>`. Considering two
-particles with shape functions,  :math:`F_i` and :math:`F_j`,
-the contact point :math:`\mathbf{X}_0` in the global frame is obtained as:
-
-.. math::
-
-    \mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}}
-                   \ F_i(\mathbf{X}) + F_j(\mathbf{X})
-                   \text{, subject to } F_i(\mathbf{X}) = F_j(\mathbf{X}) 
-
-where the shape function is given by
-:math:`F_i(\mathbf{X}) = f_i(\mathbf{R}_i^T (\mathbf{X} - \mathbf{X}_i))`
-and where :math:`\mathbf{X}_i` and :math:`\mathbf{R}_i` are the center of mass
-and rotation matrix of the particle, respectively.
-The constrained minimization problem is solved using Lagrange multipliers and
-Newton's method with a line search as described by :ref:`(Podlozhnyuk) <Podlozhnyuk>`.
-
-.. note::
-
-    The shape function :math:`F` is not a signed distance function and
-    does not have unit gradient :math:`\|\nabla F \| \neq 1` so that the
-    so-called ''midway'' point is not actually located at an equal distance from the
-    surface of both particles.
-    For contact between non-identical particles, the contact point tends to
-    be closer to the surface of the smaller and blockier particle.
-
-.. note::
-
-    This formulation leads to a 4x4 system of non-linear equations.
-    Tikhonov regularization and correction for high root multiplicity is used
-    to ensure robustness of the direct solver and high convergence rate,
-    even for blocky particles with near flat faces.
-
-The particles overlap if both shape functions are negative at the contact point.
-The contact normal is obtained as: :math:`\mathbf{n}_{ij} = \nabla F_i(\mathbf{X}_0) / \| \nabla F_i(\mathbf{X}_0)\| = - \nabla F_j(\mathbf{X}_0) / \| \nabla F_j(\mathbf{X}_0)\|`
-and the overlap :math:`\delta = \|\mathbf{X}_j^{\mathrm{surf}} - \mathbf{X}_i^{\mathrm{surf}}\|`
-is computed as the distance between the points on the
-particles surfaces that are closest to the contact point in the
-direction of the contact normal: :math:`F_i(\mathbf{X}_i^{\mathrm{surf}} = \mathbf{X}_0 + \lambda_i \mathbf{n}_{ij}) = 0`
-and :math:`F_j(\mathbf{X}_j^{\mathrm{surf}} = \mathbf{X}_0 + \lambda_j \mathbf{n}_{ij}) = 0`.
-Newton's method is used to solve this equation for the scalars
-:math:`\lambda_i` and :math:`\lambda_j` and find the surface points
-:math:`\mathbf{X}_i^{\mathrm{surf}}` and :math:`\mathbf{X}_j^{\mathrm{surf}}`.
-
-.. note::
-    A modified representation of the particle surface is defined as
-    :math:`G(\mathbf{X}) = (F(\mathbf{X})+1)^{1/n_1}-1` which is a radial distance function formulation.
-    This formulation is used to compute the surface points once the midway contact point is found.
-    This formulation is also used when the *geometric* keyword is specified in the pair_style command and the following optimization problem is solved instead for the contact point:
-    :math:`\mathbf{X}_0 = \underset{\mathbf{X}}{\text{argmin}} \, r_i \ G_i(\mathbf{X}) + r_j G_j(\mathbf{X})  \text{, subject to } r_i G_i(\mathbf{X}) = r_j G_j(\mathbf{X})`, 
-    where :math:`r_i` and :math:`r_j` are the average radii of the two particles.
-    The geometric formulation thus yields a better approximation of the contact point
-    for particles with different sizes, and it is more robust for particles with high *block* exponents.    
-
-A hierarchical approach is used to limit the cost of contact detection.
-First, intersection of the bounding spheres of the two particles of bounding
-radii :math:`r_i` and :math:`r_j` is checked. If the distance
-between the particles center is more than the sum of the radii
-:math:`\|\mathbf{X}_j - \mathbf{X}_j\| > r_i + r_j`, the particles do not intersect.
-Then, if the bounding spheres intersect, intersection of the oriented
-bounding box is checked. This is done following the equations of
-:ref:`(Eberly) <GeometricTools>`.
-This check is only performed if the *bounding_box* keyword is used.
-This is advantageous for all particles except for superellipses with 
-aspect ratio close to one and both blockiness indexes close to 2.
-
-The force calculation is similar to the :doc:`gran/hooke/history <pair_gran>`
-and :doc:`gran/hertz/history <pair_gran>` styles for spherical particles,
-and use the following formulas for the frictional force
-between two granular particles, as described in
-:ref:`(Brilliantov) <Brilliantov>`, :ref:`(Silbert) <Silbert>`, and
-:ref:`(Zhang) <Zhang3>`, 
-
-The Hookean style use this formula:
-
-.. math::
-
-   F_{hk} = (k_n \delta \mathbf{n}_{ij} -
-   m_{eff} \gamma_n\mathbf{ v}_n) -
-   (k_t \boldsymbol{\Delta} \mathbf{s}_t +
-   m_{eff} \gamma_t \mathbf{v}_t)
-
-The Hertzian style uses this formula:
-
-.. math::
-
-   F_{hz} = \sqrt{\delta} \sqrt{\frac{R_i R_j}{R_i + R_j}} F_{hk} =
-     \sqrt{\delta} \sqrt{\frac{R_i R_j}{R_i + R_j}}
-     \Big[ (k_n \delta \mathbf{n}_{ij} -
-       m_{eff} \: \gamma_n \mathbf{ v}_n) -
-       (k_t \boldsymbol{\Delta} \mathbf{s}_t +
-       m_{eff} \: \gamma_t \mathbf{v}_t) \Big]
-
-In both equations the first parenthesized term is the normal force
-between the two particles and the second parenthesized term is the
-tangential force.  The normal force has 2 terms, a contact force and a
-damping force.  The tangential force also has 2 terms: a shear force
-and a damping force.  The shear force is a "history" effect that
-accounts for the tangential displacement between the particles for the
-duration of the time they are in contact.  The tangential damping force
-term is included in all three pair styles if *dampflag* is set to 1;
-it is not included if *dampflag* is set to 0.
-
-The other quantities in the equations are as follows:
-
-* :math:`\delta` = overlap distance of 2 particles
-* :math:`K_n` = elastic constant for normal contact
-* :math:`K_t` = elastic constant for tangential contact
-* :math:`\gamma_n` = viscoelastic damping constant for normal contact
-* :math:`\gamma_t` = viscoelastic damping constant for tangential contact
-* :math:`m_{eff} = M_i M_j / (M_i + M_j) =` effective mass of 2 particles of mass M_i and M_j
-* :math:`\boldsymbol{\Delta} \mathbf{s}_t =` tangential displacement vector between 2 particles which is truncated to satisfy a frictional yield criterion
-* :math:`n_{ij} =` unit vector along the line connecting the centers of the 2 particles
-* :math:`V_n =` normal component of the relative velocity of the 2 particles
-* :math:`V_t =` tangential component of the relative velocity of the 2 particles
-* :math:`R_i, R_j =` approximated radii of the curvature of the two particles at the contact point
-
-The :math:`K_n`, :math:`K_t`, :math:`\gamma_n`, and :math:`\gamma_t`
-coefficients are specified as parameters to the pair_style command.  If
-a NULL is used for :math:`K_t`, then a default value is used where
-:math:`K_t = 2/7 K_n`.  If a NULL is used for :math:`\gamma_t`, then a
-default value is used where :math:`\gamma_t = 1/2 \gamma_n`.
-
-The interpretation and units for these 4 coefficients are different in
-the Hookean versus Hertzian equations.
-
-The Hookean model is one where the normal push-back force for two
-overlapping particles is a linear function of the overlap distance.
-Thus the specified :math:`K_n` is in units of (force/distance).  Note
-that this push-back force is independent of absolute particle size (in
-the monodisperse case) and of the relative sizes of the two particles
-(in the polydisperse case).  This model also applies to the other terms
-in the force equation so that the specified :math:`\gamma_n` is in units
-of (1/time), :math:`K_t` is in units of (force/distance), and
-:math:`\gamma_t` is in units of (1/time).
-
-The Hertzian model is one where the normal push-back force for two
-overlapping particles is proportional to the area of overlap of the
-two particles, and is thus a non-linear function of overlap distance.
-At each point on the surface of superellipsoids there are two
-principal radii of curvature. For simplicity, the Hertzian model
-approximates the contact radius of each particle, :math:`R`, as either 
-the inverse of the mean curvature or as the gaussian curvature coefficient
-if the *curvature_gaussian* keyword is used.
-These curvatures are calculated at the particle surface point that is closest 
-to the contact point in the direction of the contact normal. 
-Thus Kn has units of force per area and is thus specified in units of
-(pressure).  The effects of absolute particle size (monodispersity)
-and relative size (polydispersity) are captured in the radii-dependent
-prefactors.  When these prefactors are carried through to the other
-terms in the force equation it means that the specified :math:`\gamma_n` is in
-units of (1/(time\*distance)), :math:`K_t` is in units of (pressure), and
-:math:`\gamma_t` is in units of (1/(time\*distance)).
-
-Note that in the Hookean case, :math:`K_n` can be thought of as a linear
-spring constant with units of force/distance.  In the Hertzian case,
-:math:`K_n` is like a non-linear spring constant with units of
-force/area or pressure, and as shown in the :ref:`(Zhang) <Zhang3>`
-paper, :math:`K_n = 4G / (3(1-\nu))` where :math:`\nu =` the Poisson ratio,
-G = shear modulus = :math:`E / (2(1+\nu))`, and E = Young's modulus.  Similarly,
-:math:`K_t = 4G / (2-\nu)`.  (NOTE: in an earlier version of the manual, we incorrectly
-stated that :math:`K_t = 8G / (2-\nu)`.)
-
-Thus in the Hertzian case :math:`K_n` and :math:`K_t` can be set to
-values that corresponds to properties of the material being modeled.
-This is also true in the Hookean case, except that a spring constant
-must be chosen that is appropriate for the absolute size of particles in
-the model.  Since relative particle sizes are not accounted for, the
-Hookean styles may not be a suitable model for polydisperse systems.
-
-.. note::
-
-   In versions of LAMMPS before 9Jan09, the equation for Hertzian
-   interactions did not include the :math:`\sqrt{r_i r_j / (r_i + r_j)}`
-   term and thus was not as accurate for polydisperse systems.  For
-   monodisperse systems, :math:`\sqrt{ r_i r_j /(r_i+r_j)}` is a
-   constant factor that effectively scales all 4 coefficients:
-   :math:`K_n, K_t, \gamma_n, \gamma_t`.  Thus you can set the values of
-   these 4 coefficients appropriately in the current code to reproduce
-   the results of a previous Hertzian monodisperse calculation.  For
-   example, for the common case of a monodisperse system with particles
-   of diameter 1, all 4 of these coefficients should now be set 2x
-   larger than they were previously.
-
-Xmu is also specified in the pair_style command and is the upper limit
-of the tangential force through the Coulomb criterion Ft = xmu\*Fn,
-where Ft and Fn are the total tangential and normal force components
-in the formulas above.  Thus in the Hookean case, the tangential force
-between 2 particles grows according to a tangential spring and
-dash-pot model until Ft/Fn = xmu and is then held at Ft = Fn\*xmu until
-the particles lose contact.  In the Hertzian case, a similar analogy
-holds, though the spring is no longer linear.
-
-.. note::
-
-   Normally, xmu should be specified as a fractional value between
-   0.0 and 1.0, however LAMMPS allows large values (up to 1.0e4) to allow
-   for modeling of systems which can sustain very large tangential
-   forces.
-
-The effective mass *m_eff* is given by the formula above for two
-isolated particles.  If either particle is part of a rigid body, its
-mass is replaced by the mass of the rigid body in the formula above.
-This is determined by searching for a :doc:`fix rigid <fix_rigid>`
-command (or its variants).
-
-For granular styles there are no additional coefficients to set for
-each pair of atom types via the :doc:`pair_coeff <pair_coeff>` command.
-All settings are global and are made via the pair_style command.
-However you must still use the :doc:`pair_coeff <pair_coeff>` for all
-pairs of granular atom types.  For example the command
-
-.. code-block:: LAMMPS
-
-   pair_coeff * *
-
-should be used if all atoms in the simulation interact via a granular
-potential (i.e. one of the pair styles above is used).  If a granular
-potential is used as a sub-style of :doc:`pair_style hybrid <pair_hybrid>`, then specific atom types can be used in the
-pair_coeff command to determine which atoms interact via a granular
-potential.
-
-If two particles are moving away from each other while in contact, there
-is a possibility that the particles could experience an effective attractive
-force due to damping. If the *limit_damping* keyword is used, this option
-will zero out the normal component of the force if there is an effective
-attractive force.
-
-
-Mixing, shift, table, tail correction, restart, rRESPA info
-"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-
-The :doc:`pair_modify <pair_modify>` mix, shift, table, and tail options
-are not relevant for granular pair styles.
-
-These pair styles write their information to :doc:`binary restart files <restart>`, so a pair_style command does not need to be
-specified in an input script that reads a restart file.
-
-These pair styles can only be used via the *pair* keyword of the
-:doc:`run_style respa <run_style>` command.  They do not support the
-*inner*, *middle*, *outer* keywords.
-
-The single() function of these pair styles returns 0.0 for the energy
-of a pairwise interaction, since energy is not conserved in these
-dissipative potentials.  It also returns only the normal component of
-the pairwise interaction force.  However, the single() function also
-calculates 10 extra pairwise quantities.  The first 3 are the
-components of the tangential force between particles I and J, acting
-on particle I.  The fourth is the magnitude of this tangential force.
-The next 3 (5-7) are the components of the relative velocity in the
-normal direction (along the contact normal :math:`\mathbf{n}_{ij}`).  The
-last 3 (8-10) the components of the relative velocity in the
-tangential direction.
-
-These extra quantities can be accessed by the :doc:`compute pair/local <compute_pair_local>` command, as *p1*, *p2*, ...,
-*p10*\ .
-
-----------
-
-Restrictions
-""""""""""""
-
-All the granular pair styles are part of the GRANULAR package.  It is
-only enabled if LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
-
-These pair styles require the *ellipsoid* atom style with the *superellipsoid* flag enabled,
-(i.e. `atom_style ellipsoid superellipsoid`) which is part of the ASPHERE package. It is
-only enabled if LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
-
-This pair style requires you to use the :doc:`comm_modify vel yes <comm_modify>` command so that velocities are stored by ghost
-atoms.
-
-These pair styles will not restart exactly when using the
-:doc:`read_restart <read_restart>` command, though they should provide
-statistically similar results.  This is because the forces they
-compute depend on atom velocities.  See the
-:doc:`read_restart <read_restart>` command for more details.
-
-Accumulated values for individual contacts are saved to to restart
-files but are not saved to data files. Therefore, forces may
-differ significantly when a system is reloaded using A
-:doc:`read_data <read_data>` command.
-
-Related commands
-""""""""""""""""
-
-:doc:`pair_coeff <pair_coeff>`
-
-Default
-"""""""
-
-none
-
-----------
-
-.. _Podlozhnyuk:
-
-**(Podlozhnyuk)** Podlozhnyuk, Pirker, Kloss, Comp. Part. Mech., 4:101-118 (2017).
-
-.. _Houlsby:
-
-**(Houlsby)** Houlsby, Computers and Geotechnics, 36, 953-959 (2009).
-
-.. _GeometricTools:
-
-**(Eberly)** Eberly, Geometric Tools: Dynamic Collision Detection Using Oriented Bounding Boxes (2008).

From b9daa8641030a4d370bdbc398d504ddb64549de2 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 23 Mar 2026 09:23:19 +0100
Subject: [PATCH 158/174] Made bounding_box check the default option

---
 doc/src/pair_granular_superellipsoid.rst               | 8 ++++----
 examples/ASPHERE/superellipsoid_gran/in.bowling        | 3 +--
 examples/ASPHERE/superellipsoid_gran/in.drop_test      | 6 ++----
 examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran | 5 +----
 src/GRANULAR/pair_granular_superellipsoid.cpp          | 6 +++---
 5 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/doc/src/pair_granular_superellipsoid.rst b/doc/src/pair_granular_superellipsoid.rst
index 6b49af80b2b..ada8746d935 100644
--- a/doc/src/pair_granular_superellipsoid.rst
+++ b/doc/src/pair_granular_superellipsoid.rst
@@ -8,11 +8,11 @@ Syntax
 
 .. code-block:: LAMMPS
 
-   pair_style granular/superellipsoid cutoff bounding_box curvature_gaussian
+   pair_style granular/superellipsoid cutoff no_bounding_box curvature_gaussian
 
 Optional settings, see discussion below.
 * cutoff = global cutoff value
-* bounding_box = oriented bounding box check
+* no_bounding_box = skip oriented bounding box check
 * curvature_gaussian = gaussian curvature coeff approximation for contact patch
 
 Examples
@@ -20,7 +20,7 @@ Examples
 
 .. code-block:: LAMMPS
 
-   pair_style granular/superellipsoid bounding_box
+   pair_style granular/superellipsoid
    pair_coeff * * hooke 1000.0 50.0 tangential linear_history 1000.0 1.0 0.5 damping mass_velocity
 
    pair_style granular/superellipsoid 10.0 curvature_gaussian
@@ -150,7 +150,7 @@ between the particles center is more than the sum of the radii
 Then, if the bounding spheres intersect, intersection of the oriented
 bounding box is checked. This is done following the equations of
 :ref:`(Eberly) <GeometricTools>`.
-This check is only performed if the *bounding_box* keyword is used.
+This check is always performed, unless the *no_bounding_box* keyword is used.
 This is advantageous for all particle shapes except for superellipses with
 aspect ratio close to one and both blockiness indexes close to 2.
 
diff --git a/examples/ASPHERE/superellipsoid_gran/in.bowling b/examples/ASPHERE/superellipsoid_gran/in.bowling
index bc291fc737b..83e4f42dc35 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.bowling
+++ b/examples/ASPHERE/superellipsoid_gran/in.bowling
@@ -34,10 +34,9 @@ set             type 2 block 2.0 2.0
 set             type 2 mass 10.0
 group ball type 2
 
-pair_style      granular/superellipsoid bounding_box
+pair_style      granular/superellipsoid
 pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
 
-
 compute diameter all property/atom shapex shapey shapez
 compute orient all property/atom quatw quati quatj quatk
 compute block all property/atom block1 block2
diff --git a/examples/ASPHERE/superellipsoid_gran/in.drop_test b/examples/ASPHERE/superellipsoid_gran/in.drop_test
index 70394b60cfa..3d75b1479b9 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.drop_test
+++ b/examples/ASPHERE/superellipsoid_gran/in.drop_test
@@ -48,9 +48,7 @@ group           wall type 1
 group           mobile type 2
 
 # Interaction / Pair Style
-# pair_style      gran/hertz/history/ellipsoid 1e4 NULL 20.0 0.0 0.5 0 bounding_box curvature_gaussian
-# pair_coeff      * *
-pair_style granular/superellipsoid curvature_gaussian bounding_box
+pair_style granular/superellipsoid curvature_gaussian
 pair_coeff * * hertz 10000.0 200.0 tangential classic 2850 0.0 0.5 damping viscoelastic
 # Computes and Output
 compute         diameter all property/atom shapex shapey shapez
@@ -81,4 +79,4 @@ thermo          1000
 thermo_style  custom time step ke c_rke 
 
 timestep        0.0001
-run             200000
+run             20000
diff --git a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
index 058dd736ef8..b8f6358e9ed 100644
--- a/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
+++ b/examples/ASPHERE/superellipsoid_gran/in.ellipsoid_gran
@@ -18,10 +18,7 @@ set             type 1 mass 1.0
 set             type 1 shape 2.0 1.0 1.0
 set             type 1 block 2.0 2.0
 
-
-# pair_style      gran/hertz/history/ellipsoid 1e3 NULL 0.0 0.0 0.5 0 bounding_box
-# pair_coeff * *
-pair_style      granular/superellipsoid bounding_box
+pair_style      granular/superellipsoid
 pair_coeff      * * hooke 1000.0 0.0 tangential linear_history 285 0.0 0.5 damping mass_velocity
 # Hertz model instead
 # pair_coeff      * * hertz 1000.0 0.0 tangential linear_history 285.714 0.0 0.5 damping viscoelastic
diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 5f919afe005..8e211e87a70 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -357,12 +357,12 @@ void PairGranularSuperellipsoid::settings(int narg, char **arg)
 {
   cutoff_global = -1;    // default: will be set based on particle sizes, model choice
   curvature_model = MathExtraSuperellipsoids::CURV_MEAN;
-  bounding_box = 0;
+  bounding_box = 1;
 
   int iarg = 0;
   while (iarg < narg) {
-    if (strcmp(arg[iarg], "bounding_box") == 0) {
-      bounding_box = 1;
+    if (strcmp(arg[iarg], "no_bounding_box") == 0) {
+      bounding_box = 0;
       iarg++;
     } else if (strcmp(arg[iarg], "geometric") == 0) {
       contact_formulation = MathExtraSuperellipsoids::FORMULATION_GEOMETRIC;

From 31ea5c9f52cac00021c8a346b1677cd12449ab15 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 23 Mar 2026 09:45:53 +0100
Subject: [PATCH 159/174] Added safety guards to the curvature radius

---
 src/GRANULAR/pair_granular_superellipsoid.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/GRANULAR/pair_granular_superellipsoid.cpp
index 8e211e87a70..54278e3a376 100644
--- a/src/GRANULAR/pair_granular_superellipsoid.cpp
+++ b/src/GRANULAR/pair_granular_superellipsoid.cpp
@@ -45,6 +45,8 @@ enum { CLASSIC, LINEAR_HISTORY };
 
 static constexpr int NUMSTEP_INITIAL_GUESS = 5;
 static constexpr double EPSILON = 1e-10;
+static constexpr double MIN_RADIUS_RATIO = 1e-4;   
+static constexpr double MIN_CURVATURE = 1e-12;    
 
 /* ---------------------------------------------------------------------- */
 
@@ -1153,6 +1155,21 @@ void PairGranularSuperellipsoid::calculate_forces()
       curvature_j = MathExtraSuperellipsoids::gaussian_curvature_superellipsoid(
           shapej, blockj, flagj, Rj, surf_point_j, xj);
     }
+    double sum_curvature = curvature_i + curvature_j;
+    
+    // Physical upper bound smallest particle's bounding sphere radius
+    double max_physical_radius = MIN(radi, radj);
+    double min_physical_radius = MIN_RADIUS_RATIO * max_physical_radius;
+
+    if (sum_curvature > MIN_CURVATURE) {
+      contact_radius = sqrt((overlap_i + overlap_j) / sum_curvature);
+      // Cap the maximum radius (flat faces)
+      contact_radius = MIN(contact_radius, max_physical_radius); 
+      // Cap the minimum radius (sharp corners) to prevent force collapse
+      contact_radius = MAX(contact_radius, min_physical_radius);
+    } else {
+      contact_radius = max_physical_radius;
+    }
 
     // hertzian contact radius approximation
     contact_radius = sqrt((overlap_i + overlap_j) / (curvature_i + curvature_j));

From 9257cefd1c9d040fd4d5169f8ec2b5d078e029bd Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 23 Mar 2026 10:09:19 +0100
Subject: [PATCH 160/174] Moved pair granular superllipsoid to ASPHERE package

---
 src/{GRANULAR => ASPHERE}/pair_granular_superellipsoid.cpp | 0
 src/{GRANULAR => ASPHERE}/pair_granular_superellipsoid.h   | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename src/{GRANULAR => ASPHERE}/pair_granular_superellipsoid.cpp (100%)
 rename src/{GRANULAR => ASPHERE}/pair_granular_superellipsoid.h (100%)

diff --git a/src/GRANULAR/pair_granular_superellipsoid.cpp b/src/ASPHERE/pair_granular_superellipsoid.cpp
similarity index 100%
rename from src/GRANULAR/pair_granular_superellipsoid.cpp
rename to src/ASPHERE/pair_granular_superellipsoid.cpp
diff --git a/src/GRANULAR/pair_granular_superellipsoid.h b/src/ASPHERE/pair_granular_superellipsoid.h
similarity index 100%
rename from src/GRANULAR/pair_granular_superellipsoid.h
rename to src/ASPHERE/pair_granular_superellipsoid.h

From 19bb345cd2e2a603c07c1f4a7c382e65aab77c85 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 23 Mar 2026 10:19:06 +0100
Subject: [PATCH 161/174] Updated documentation

---
 doc/src/pair_granular_superellipsoid.rst | 30 +++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/doc/src/pair_granular_superellipsoid.rst b/doc/src/pair_granular_superellipsoid.rst
index ada8746d935..32bf5cfdc4a 100644
--- a/doc/src/pair_granular_superellipsoid.rst
+++ b/doc/src/pair_granular_superellipsoid.rst
@@ -154,6 +154,17 @@ This check is always performed, unless the *no_bounding_box* keyword is used.
 This is advantageous for all particle shapes except for superellipses with
 aspect ratio close to one and both blockiness indexes close to 2.
 
+.. warning::
+
+    The Newton-Raphson minimization used to find the midway contact point can
+    fail to converge if the initial starting guess is too far from the true
+    physical surface. This typically occurs if a user specifies a manual global
+    *cutoff* that is significantly larger than the particles **and** enables the
+    *no_bounding_box* keyword. Under these conditions, the solver attempts to
+    resolve contacts between widely separated particles, which might cause the
+    math to diverge and instantly crashing the simulation. It is strongly
+    recommended to keep bounding box checks enabled if a large cutoff is specified.
+
 ----------
 
 This section provides an overview of the various normal, tangential,
@@ -201,6 +212,19 @@ gaussian curvature coefficient :math:`R_i = 1 / \sqrt{\kappa_1 \kappa_2}`, where
 contact point. For *hertz*, the units of the spring constant :math:`k_n` are
 *force*\ /\ *length*\ \^2, or equivalently *pressure*\ .
 
+.. note::
+
+    To ensure numerical stability and preserve physical realism, the computed
+    contact radius is mathematically capped. For highly blocky particles
+    undergoing flat-on-flat contact, the theoretical curvature approaches zero,
+    which would yield an infinite contact radius and cause a force explosion. To
+    prevent this, the maximum contact radius is capped at the physical bounding
+    radius of the smallest interacting particle. Conversely, for sharp corner
+    contacts where curvature approaches infinity, the calculated radius would
+    drop to zero, eliminating the repulsive force entirely. The contact radius
+    is therefore lower-bounded by a minimum fraction of the physical radius
+    (:math:`10^{-4} \min(r_i, r_j)`) to prevent particles from unphysically interpenetrating.
+
 In addition, the normal force is augmented by a damping term of the
 following general form:
 
@@ -392,9 +416,9 @@ attractive force.
 ----------
 
 LAMMPS automatically sets pairwise cutoff values for *pair_style
-granular* based on particle radii. In the vast majority of situations,
+granular/superellipsoid* based on particle radii. In the vast majority of situations,
 this is adequate. However, a cutoff value can optionally be appended
-to the *pair_style granular* command to specify a global cutoff (i.e.
+to the *pair_style granular/superellipsoid* command to specify a global cutoff (i.e.
 a cutoff for all atom types). This option may be useful in some rare
 cases where the automatic cutoff determination is not sufficient.
 
@@ -441,7 +465,7 @@ This pair style require Newton's third law be set to *off* for pair interactions
 There are currently no versions of *fix wall/gran* or *fix wall/gran/region* that
 are compatible with the superellipsoid particles.
 
-This pair style is part of the GRANULAR package.  It is
+This pair style is part of the ASPHERE package.  It is
 only enabled if LAMMPS was built with that package.
 See the :doc:`Build package <Build_package>` page for more info.
 

From 4cf6069eb23edcb417bf242c6e7b10d3484a94eb Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Mon, 23 Mar 2026 10:33:13 +0100
Subject: [PATCH 162/174] Fixed unit test cmakelists

---
 doc/src/pair_granular_superellipsoid.rst | 8 ++++----
 unittest/utils/CMakeLists.txt            | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/src/pair_granular_superellipsoid.rst b/doc/src/pair_granular_superellipsoid.rst
index 32bf5cfdc4a..5902c558e75 100644
--- a/doc/src/pair_granular_superellipsoid.rst
+++ b/doc/src/pair_granular_superellipsoid.rst
@@ -10,10 +10,10 @@ Syntax
 
    pair_style granular/superellipsoid cutoff no_bounding_box curvature_gaussian
 
-Optional settings, see discussion below.
-* cutoff = global cutoff value
-* no_bounding_box = skip oriented bounding box check
-* curvature_gaussian = gaussian curvature coeff approximation for contact patch
+   Optional settings, see discussion below.
+   * cutoff = global cutoff value
+   * no_bounding_box = skip oriented bounding box check
+   * curvature_gaussian = gaussian curvature coeff approximation for contact patch
 
 Examples
 """"""""
diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index 812d71e4169..cb07e9e3ca2 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -156,8 +156,8 @@ endif()
 # =============================================================================
 
 # Extra math tests for superellipsoids
-if(PKG_ASPHERE AND PKG_GRANULAR)
+if(PKG_ASPHERE)
   add_executable(test_math_extra_superellipsoids test_math_extra_superellipsoids.cpp)
   target_link_libraries(test_math_extra_superellipsoids PRIVATE lammps GTest::GMockMain)
   add_test(NAME MathExtraSuperellipsoids COMMAND test_math_extra_superellipsoids)
-endif()
+endif()
\ No newline at end of file

From 95a51af59fc19df16f4c07541ce4956d1e82a0f7 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 23 Mar 2026 13:44:22 -0400
Subject: [PATCH 163/174] add support for superellipsoids to dump image

---
 src/GRAPHICS/dump_image.cpp    | 13 ++++++++-----
 src/GRAPHICS/image_objects.cpp | 24 ++++++++++++++++++++----
 src/GRAPHICS/image_objects.h   |  4 ++--
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/src/GRAPHICS/dump_image.cpp b/src/GRAPHICS/dump_image.cpp
index d36230e64a6..9042e53669a 100644
--- a/src/GRAPHICS/dump_image.cpp
+++ b/src/GRAPHICS/dump_image.cpp
@@ -1415,11 +1415,14 @@ void DumpImage::create_image()
         saved = reset_lighting(image, 0.3, 0.8, 0.45, 0.8);
       }
       EllipsoidObj e(elevel);
-      e.draw(image, estyle, color, x[j], avec_ellipsoid->bonus[ellipsoid[j]].shape,
-             avec_ellipsoid->bonus[ellipsoid[j]].quat, ediamvalue, opacity);
-      if (estyle & 1) {
-        // restore previous settings
-        restore_lighting(saved, image);
+      if (avec_ellipsoid->bonus_super) {
+        auto *bonus = avec_ellipsoid->bonus_super;
+        e.draw(image, estyle, color, x[j], bonus[ellipsoid[j]].shape, bonus[ellipsoid[j]].quat,
+               ediamvalue, opacity, bonus[ellipsoid[j]].block);
+      } else {
+        auto *bonus = avec_ellipsoid->bonus;
+        e.draw(image, estyle, color, x[j], bonus[ellipsoid[j]].shape,bonus[ellipsoid[j]].quat,
+               ediamvalue, opacity, nullptr);
       }
       m += size_one;
     }
diff --git a/src/GRAPHICS/image_objects.cpp b/src/GRAPHICS/image_objects.cpp
index a80c858bca9..f312d87250d 100644
--- a/src/GRAPHICS/image_objects.cpp
+++ b/src/GRAPHICS/image_objects.cpp
@@ -67,6 +67,14 @@ inline double radscale(const double *shape, const vec3 &pos)
                pos[2] / shape[2] * pos[2] / shape[2]));
 }
 
+// scale factor to move a position to the surface of a superellipsoid with given parameters
+inline double superscale(const double *shape, const double *block, const vec3 &pos)
+{
+  double a = pow(fabs(pos[0] / shape[0]), block[1]) + pow(fabs(pos[1] / shape[1]), block[1]);
+  double b = pow(fabs(pos[2] / shape[2]), block[0]);
+  return pow(pow(a, block[0] / block[1]) + b, -1.0 / block[0]);
+}
+
 // re-orient list of triangles to point along "dir", then scale and translate it.
 std::vector<triangle> transform(const std::vector<triangle> &triangles, const vec3 &dir,
                                 const vec3 &offs, double len, double width)
@@ -430,7 +438,8 @@ void EllipsoidObj::draw(Image *img, int flag, const double *color, const double
 // draw method for drawing ellipsoids from per-atom data which has a quaternion
 // and the shape list to define the orientation and stretch
 void EllipsoidObj::draw(Image *img, int flag, const double *color, const double *center,
-                        const double *shape, const double *quat, double diameter, double opacity)
+                        const double *shape, const double *quat, double diameter, double opacity,
+                        const double *block)
 {
   // select between triangles or cylinders or both
   bool doframe = true;
@@ -461,9 +470,16 @@ void EllipsoidObj::draw(Image *img, int flag, const double *color, const double
 
     if (dotri) {
       // set shape by shifting each corner to the surface
-      for (int i = 0; i < 3; ++i) {
-        auto &t = tri[i];
-        t = radscale(shape, t) * t;
+      if (block) {
+        for (int i = 0; i < 3; ++i) {
+          auto &t = tri[i];
+          t = superscale(shape, block, t) * t;
+        }
+      } else {
+        for (int i = 0; i < 3; ++i) {
+          auto &t = tri[i];
+          t = radscale(shape, t) * t;
+        }
       }
 
       // rotate
diff --git a/src/GRAPHICS/image_objects.h b/src/GRAPHICS/image_objects.h
index ccda1ccabc6..1c208d2c380 100644
--- a/src/GRAPHICS/image_objects.h
+++ b/src/GRAPHICS/image_objects.h
@@ -92,9 +92,9 @@ namespace ImageObjects {
     // construct (spherical) triangle mesh by refinining the triangles of an octahedron
     EllipsoidObj(int level = DEF_ELEVEL);
 
-    // draw ellipsoid from triangle mesh for ellipsoid particles
+    // draw ellipsoid from triangle mesh for ellipsoid and superellipsoid particles
     void draw(Image *, int, const double *, const double *, const double *, const double *, double,
-              double opacity = 1.0);
+              double opacity = 1.0, const double *block = nullptr);
 
     // draw ellipsoid from triangle mesh for ellipsoid regions
     void draw(Image *, int, const double *, const double *, const double *, Region *, double,

From 66da20e42e4cdaad2cc970349619cd334e44ea24 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 23 Mar 2026 14:29:17 -0400
Subject: [PATCH 164/174] port rounding fix from collected-small-fixes

---
 src/GRAPHICS/image.cpp         | 18 ++++++----------
 src/GRAPHICS/image_objects.cpp | 39 ++++++++++++++++++++++++----------
 2 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/src/GRAPHICS/image.cpp b/src/GRAPHICS/image.cpp
index c963254cc2d..7361d6753a5 100644
--- a/src/GRAPHICS/image.cpp
+++ b/src/GRAPHICS/image.cpp
@@ -1273,8 +1273,8 @@ void Image::draw_triangle(const double *x, const double *y, const double *z,
   double pixelWidth = (tanPerPixel > 0) ? tanPerPixel * dist : -tanPerPixel / zoom;
   double xf = xmap / pixelWidth;
   double yf = ymap / pixelWidth;
-  int xc = static_cast<int>(xf);
-  int yc = static_cast<int>(yf);
+  int xc = static_cast<int>(floor(xf));
+  int yc = static_cast<int>(floor(yf));
   double width_error = xf - xc;
   double height_error = yf - yc;
 
@@ -1287,10 +1287,10 @@ void Image::draw_triangle(const double *x, const double *y, const double *z,
   double pixelRightFull = rasterRight / pixelWidth;
   double pixelDownFull = rasterDown / pixelWidth;
   double pixelUpFull = rasterUp / pixelWidth;
-  int pixelLeft = std::lround(pixelLeftFull);
-  int pixelRight = std::lround(pixelRightFull);
-  int pixelDown = std::lround(pixelDownFull);
-  int pixelUp = std::lround(pixelUpFull);
+  int pixelLeft = static_cast<int>(ceil(pixelLeftFull));
+  int pixelRight = static_cast<int>(ceil(pixelRightFull));
+  int pixelDown = static_cast<int>(ceil(pixelDownFull));
+  int pixelUp = static_cast<int>(ceil(pixelUpFull));
 
   for (int iy = yc - pixelDown; iy <= yc + pixelUp; iy ++) {
     for (int ix = xc - pixelLeft; ix <= xc + pixelRight; ix ++) {
@@ -1316,12 +1316,6 @@ void Image::draw_triangle(const double *x, const double *y, const double *z,
       double s1[3], s2[3], s3[3];
       double c1[3], c2[3];
 
-      // for grid cell and other triangle meshes:
-      // there can be single pixel gaps due to rounding
-      // using <= if test can leave single-pixel gaps between 2 triangles
-      // using < if test fixes most of them
-      // suggested by Nathan Fabian, Nov 2022
-
       MathExtra::sub3(zlocal, xlocal, s1);
       MathExtra::sub3(ylocal, xlocal, s2);
       MathExtra::sub3(p, xlocal, s3);
diff --git a/src/GRAPHICS/image_objects.cpp b/src/GRAPHICS/image_objects.cpp
index f312d87250d..9a8c9eb8bf5 100644
--- a/src/GRAPHICS/image_objects.cpp
+++ b/src/GRAPHICS/image_objects.cpp
@@ -454,7 +454,8 @@ void EllipsoidObj::draw(Image *img, int flag, const double *color, const double
   const vec3 offs{center[0], center[1], center[2]};
 
   // optimization: just draw a sphere if a filled surface is requested and the object is a sphere
-  if (dotri && (shape[0] == shape[1]) && (shape[0] == shape[2])) {
+  // note: this does not apply to superellipsoids
+  if (dotri && !block && (shape[0] == shape[1]) && (shape[0] == shape[2])) {
     img->draw_sphere(center, color, 2.0 * shape[0], opacity);
     return;
   }
@@ -496,16 +497,32 @@ void EllipsoidObj::draw(Image *img, int flag, const double *color, const double
     }
 
     if (doframe) {
-      // set shape
-      for (int i = 0; i < 3; ++i) {
-        auto &t = tri[i];
-        if (dotri) {
-          // shift the cylinder positions inward by their diameter when using cylinders and
-          // triangles together for a smoother surface to avoid increasing the final size
-          double shapeplus[3] = {shape[0] - diameter, shape[1] - diameter, shape[1] - diameter};
-          t = radscale(shapeplus, t) * t;
-        } else {
-          t = radscale(shape, t) * t;
+      if (block) {
+        // set shape
+        for (int i = 0; i < 3; ++i) {
+          auto &t = tri[i];
+          if (dotri) {
+            // shift the cylinder positions inward by their diameter when using cylinders and
+            // triangles together for a smoother surface to avoid increasing the final size
+            double shapeplus[3] = {shape[0] - diameter, shape[1] - diameter, shape[1] - diameter};
+            t = superscale(shapeplus, block, t) * t;
+          } else {
+            t = superscale(shape, block, t) * t;
+          }
+        }
+
+      } else {
+        // set shape
+        for (int i = 0; i < 3; ++i) {
+          auto &t = tri[i];
+          if (dotri) {
+            // shift the cylinder positions inward by their diameter when using cylinders and
+            // triangles together for a smoother surface to avoid increasing the final size
+            double shapeplus[3] = {shape[0] - diameter, shape[1] - diameter, shape[1] - diameter};
+            t = radscale(shapeplus, t) * t;
+          } else {
+            t = radscale(shape, t) * t;
+          }
         }
       }
 

From f6474f91a814445af21ed2a75105192a115b5c3a Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 23 Mar 2026 17:19:33 -0400
Subject: [PATCH 165/174] fix whitespace issues

---
 doc/src/atom_style.rst                           |  4 ++--
 doc/src/compute_property_atom.rst                |  2 +-
 src/ASPHERE/compute_erotate_asphere.cpp          |  2 +-
 src/ASPHERE/compute_temp_asphere.cpp             | 12 ++++++------
 src/ASPHERE/fix_nve_asphere_noforce.cpp          |  2 +-
 src/ASPHERE/pair_granular_superellipsoid.cpp     |  8 ++++----
 src/GRANULAR/pair_gran_hooke_history.cpp         |  2 +-
 src/RIGID/fix_rigid.cpp                          |  2 +-
 src/RIGID/fix_rigid_small.cpp                    |  2 +-
 src/atom_vec_ellipsoid.cpp                       |  2 +-
 src/atom_vec_ellipsoid.h                         |  6 +++---
 src/set.cpp                                      |  4 ++--
 unittest/formats/test_atom_styles.cpp            |  6 +++---
 .../utils/test_math_extra_superellipsoids.cpp    | 16 ++++++++--------
 14 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/doc/src/atom_style.rst b/doc/src/atom_style.rst
index d1cc4c6d140..ca3bfac7942 100644
--- a/doc/src/atom_style.rst
+++ b/doc/src/atom_style.rst
@@ -355,8 +355,8 @@ quaternion 4-vector with its orientation.  Each particle stores a flag
 in the ellipsoid vector which indicates whether it is an ellipsoid (1)
 or a point particle (0).
 By adding the flag *superellipsoid* to the atom_style command, the particles
-can be superellipsoids, which are a generalization of ellipsoids with two 
-additional blockiness parameters that control the shape. Superellipsoids 
+can be superellipsoids, which are a generalization of ellipsoids with two
+additional blockiness parameters that control the shape. Superellipsoids
 also store the principal moments of inertia of the particle.
 
 For the *line* style, particles can be are idealized line segments
diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst
index 9e3bb7d3d0f..73389c5e690 100644
--- a/doc/src/compute_property_atom.rst
+++ b/doc/src/compute_property_atom.rst
@@ -174,7 +174,7 @@ See the :doc:`set <set>` command for an explanation of the blockiness.
 The quantities *inertiax*, *inertiay*, and *inertiaz* are defined for superellipsoidal
 particles and define the 3 principal moments of inertia of each particle.
 These are with respect to the particle's center of mass and in a reference system
-aligned with the particle's principal axes.  
+aligned with the particle's principal axes.
 
 The quantities *quatw*, *quati*, *quatj*, and *quatk* are defined for
 ellipsoidal particles and body particles and store the 4-vector quaternion
diff --git a/src/ASPHERE/compute_erotate_asphere.cpp b/src/ASPHERE/compute_erotate_asphere.cpp
index 1ae85c27fcd..4608b41319e 100644
--- a/src/ASPHERE/compute_erotate_asphere.cpp
+++ b/src/ASPHERE/compute_erotate_asphere.cpp
@@ -127,7 +127,7 @@ double ComputeERotateAsphere::compute_scalar()
           inertia[1] = rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]) / 5.0;
           inertia[2] = rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]) / 5.0;
         }
-          
+
         // wbody = angular velocity in body frame
 
         MathExtra::quat_to_mat(quat,rot);
diff --git a/src/ASPHERE/compute_temp_asphere.cpp b/src/ASPHERE/compute_temp_asphere.cpp
index 64454a77c0d..d55ab5dc341 100644
--- a/src/ASPHERE/compute_temp_asphere.cpp
+++ b/src/ASPHERE/compute_temp_asphere.cpp
@@ -206,13 +206,13 @@ void ComputeTempAsphere::compute_scalar_templated(double &t)
   // no point particles since divide by inertia
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
-      
+
       if (mode == ALL) {
         t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) * rmass[i];
       }
 
       int j = ellipsoid[i];
-      
+
       if (is_super) {
         quat = bonus_super[j].quat;
         // principal moments of inertia
@@ -237,7 +237,7 @@ void ComputeTempAsphere::compute_scalar_templated(double &t)
       wbody[2] /= inertia[2];
 
       t += inertia[0]*wbody[0]*wbody[0] +
-           inertia[1]*wbody[1]*wbody[1] + 
+           inertia[1]*wbody[1]*wbody[1] +
            inertia[2]*wbody[2]*wbody[2];
     }
   }
@@ -290,7 +290,7 @@ void ComputeTempAsphere::compute_vector_templated(double *t)
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
          massone = rmass[i];
-         
+
         if (mode == ALL) {
           t[0] += massone * v[i][0]*v[i][0];
           t[1] += massone * v[i][1]*v[i][1];
@@ -305,7 +305,7 @@ void ComputeTempAsphere::compute_vector_templated(double *t)
         // principal moments of inertia
         if (is_super) {
           quat = bonus_super[j].quat;
-        
+
           inertia[0] = bonus_super[j].inertia[0];
           inertia[1] = bonus_super[j].inertia[1];
           inertia[2] = bonus_super[j].inertia[2];
@@ -313,7 +313,7 @@ void ComputeTempAsphere::compute_vector_templated(double *t)
         } else {
           quat = bonus[j].quat;
           shape = bonus[j].shape;
-        
+
           inertia[0] = INERTIA*massone * (shape[1]*shape[1] + shape[2]*shape[2]);
           inertia[1] = INERTIA*massone * (shape[0]*shape[0] + shape[2]*shape[2]);
           inertia[2] = INERTIA*massone * (shape[0]*shape[0] + shape[1]*shape[1]);
diff --git a/src/ASPHERE/fix_nve_asphere_noforce.cpp b/src/ASPHERE/fix_nve_asphere_noforce.cpp
index 45a96033d7e..0d7b99e06d5 100644
--- a/src/ASPHERE/fix_nve_asphere_noforce.cpp
+++ b/src/ASPHERE/fix_nve_asphere_noforce.cpp
@@ -63,7 +63,7 @@ template <bool is_super>
 void FixNVEAsphereNoforce::initial_integrate_templated()
 {
   AtomVecEllipsoid::Bonus *bonus = nullptr;
-  AtomVecEllipsoid::BonusSuper *bonus_super = nullptr;  
+  AtomVecEllipsoid::BonusSuper *bonus_super = nullptr;
   if (avec) {
     if (is_super) bonus_super = avec->bonus_super;
     else bonus = avec->bonus;
diff --git a/src/ASPHERE/pair_granular_superellipsoid.cpp b/src/ASPHERE/pair_granular_superellipsoid.cpp
index 54278e3a376..15b2841e159 100644
--- a/src/ASPHERE/pair_granular_superellipsoid.cpp
+++ b/src/ASPHERE/pair_granular_superellipsoid.cpp
@@ -45,8 +45,8 @@ enum { CLASSIC, LINEAR_HISTORY };
 
 static constexpr int NUMSTEP_INITIAL_GUESS = 5;
 static constexpr double EPSILON = 1e-10;
-static constexpr double MIN_RADIUS_RATIO = 1e-4;   
-static constexpr double MIN_CURVATURE = 1e-12;    
+static constexpr double MIN_RADIUS_RATIO = 1e-4;
+static constexpr double MIN_CURVATURE = 1e-12;
 
 /* ---------------------------------------------------------------------- */
 
@@ -1156,7 +1156,7 @@ void PairGranularSuperellipsoid::calculate_forces()
           shapej, blockj, flagj, Rj, surf_point_j, xj);
     }
     double sum_curvature = curvature_i + curvature_j;
-    
+
     // Physical upper bound smallest particle's bounding sphere radius
     double max_physical_radius = MIN(radi, radj);
     double min_physical_radius = MIN_RADIUS_RATIO * max_physical_radius;
@@ -1164,7 +1164,7 @@ void PairGranularSuperellipsoid::calculate_forces()
     if (sum_curvature > MIN_CURVATURE) {
       contact_radius = sqrt((overlap_i + overlap_j) / sum_curvature);
       // Cap the maximum radius (flat faces)
-      contact_radius = MIN(contact_radius, max_physical_radius); 
+      contact_radius = MIN(contact_radius, max_physical_radius);
       // Cap the minimum radius (sharp corners) to prevent force collapse
       contact_radius = MAX(contact_radius, min_physical_radius);
     } else {
diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp
index c2662476d0a..0679be9173c 100644
--- a/src/GRANULAR/pair_gran_hooke_history.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history.cpp
@@ -811,4 +811,4 @@ double PairGranHookeHistory::memory_usage()
 {
   double bytes = (double) nmax * sizeof(double);
   return bytes;
-}
\ No newline at end of file
+}
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index d16b5a5320e..08b08695b52 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -758,7 +758,7 @@ void FixRigid::init()
   }
 
   // error for not supported superellipsoids
-  
+
   if (atom->superellipsoid_flag) error->all(FLERR,"Superellipsoids not supported in fix rigid");
 
   // timestep info
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index a2565f44ba7..82eccf1de6b 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -581,7 +581,7 @@ void FixRigidSmall::init()
   }
 
   // error for not supported superellipsoids
-  
+
   if (atom->superellipsoid_flag) error->all(FLERR,"Superellipsoids not supported in fix rigid");
 
   // timestep info
diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index cb4ddf79931..f662387fd8b 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -1114,4 +1114,4 @@ void AtomVecEllipsoid::process_args(int narg, char **arg)
       error->all(FLERR, fmt::format("Unknown atom_style ellipsoid argument: {}", arg[iarg]));
     }
   }
-}
\ No newline at end of file
+}
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index 2af35411192..43e9416041c 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -99,14 +99,14 @@ class AtomVecEllipsoid : virtual public AtomVec {
   static double radius_ellipsoid(double *, double *, BlockType);
   static void inertia_ellipsoid_principal(double *, double, double *,
                                    double *block, BlockType);
-  
+
 
   template <bool is_super>
   int pack_comm_bonus_templated(int, int *, double *);
 
   template <bool is_super>
-  void unpack_comm_bonus_templated(int, int, double *); 
-  
+  void unpack_comm_bonus_templated(int, int, double *);
+
   template <bool is_super>
   int pack_border_bonus_templated(int, int *, double *);
 
diff --git a/src/set.cpp b/src/set.cpp
index 5f82cdaefda..6f786ddbd5f 100644
--- a/src/set.cpp
+++ b/src/set.cpp
@@ -1345,7 +1345,7 @@ void Set::invoke_density(Action *action)
 
     else if (ellipsoid_flag && ellipsoid[i] >= 0) {
       double *shape;
-      if (atom->superellipsoid_flag) shape = avec_ellipsoid->bonus_super[ellipsoid[i]].shape; 
+      if (atom->superellipsoid_flag) shape = avec_ellipsoid->bonus_super[ellipsoid[i]].shape;
       else shape = avec_ellipsoid->bonus[ellipsoid[i]].shape;
       // could enable 2d ellipse (versus 3d ellipsoid) when time integration
       //   options (fix nve/asphere, fix nh/asphere) are also implemented
@@ -2016,7 +2016,7 @@ void Set::invoke_quat(Action *action)
 
     if (avec_ellipsoid && ellipsoid[i] >= 0){
       if (atom->superellipsoid_flag) quat_one = avec_ellipsoid->bonus_super[ellipsoid[i]].quat;
-      else quat_one = avec_ellipsoid->bonus[ellipsoid[i]].quat; 
+      else quat_one = avec_ellipsoid->bonus[ellipsoid[i]].quat;
     }
     else if (avec_tri && tri[i] >= 0)
       quat_one = avec_tri->bonus[tri[i]].quat;
diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp
index de27139585e..6023fb00469 100644
--- a/unittest/formats/test_atom_styles.cpp
+++ b/unittest/formats/test_atom_styles.cpp
@@ -1491,7 +1491,7 @@ TEST_F(AtomStyleTest, superellipsoid)
     command("create_atoms 1 single -2.0  2.0  0.1"); // Point
     command("create_atoms 2 single  2.0  2.0 -0.1"); // ELLIPSOID (n1=2, n2=2)
     command("create_atoms 3 single  2.0  2.0 -2.1"); // GENERAL (n1!=n2)
-    command("create_atoms 4 single -2.0 -2.0  0.1"); // N1_EQUAL_N2 
+    command("create_atoms 4 single -2.0 -2.0  0.1"); // N1_EQUAL_N2
     command("set type 1 mass 4.0");
     command("set type 2 mass 2.4");
     command("set type 3 mass 4.4");
@@ -1584,7 +1584,7 @@ TEST_F(AtomStyleTest, superellipsoid)
     EXPECT_NEAR(bonus[1].shape[1], 0.4, EPSILON);
     EXPECT_NEAR(bonus[1].shape[2], 0.55, EPSILON);
     EXPECT_NEAR(bonus[1].block[0], 4.0, EPSILON);
-    EXPECT_NEAR(bonus[1].block[1], 3.0, EPSILON); 
+    EXPECT_NEAR(bonus[1].block[1], 3.0, EPSILON);
     EXPECT_NEAR(bonus[1].type, 2, EPSILON); // BlockType::GENERAL
     ASSERT_EQ(type[GETIDX(4)], 4);
     ASSERT_EQ(ellipsoid[GETIDX(4)], 2);
@@ -1593,7 +1593,7 @@ TEST_F(AtomStyleTest, superellipsoid)
     EXPECT_NEAR(bonus[2].shape[1], 1.0, EPSILON);
     EXPECT_NEAR(bonus[2].shape[2], 1.0, EPSILON);
     EXPECT_NEAR(bonus[2].block[0], 3.5, EPSILON);
-    EXPECT_NEAR(bonus[2].block[1], 3.5, EPSILON); 
+    EXPECT_NEAR(bonus[2].block[1], 3.5, EPSILON);
     EXPECT_NEAR(bonus[2].type, 1, EPSILON); // BlockType::N1_EQUAL_N2
 
     BEGIN_HIDE_OUTPUT();
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index bbecbaa1001..aa2c6119162 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -21,23 +21,23 @@
 // TODO: consider making a fixture with several setup functions?
 
 static constexpr double EPSILON = 1e-4;
-static constexpr double SOLV_EPSILON = std::numeric_limits<double>::epsilon() * 100; 
+static constexpr double SOLV_EPSILON = std::numeric_limits<double>::epsilon() * 100;
 
 TEST(HandwrittenSolver, invertible)
 {
-    double A[16] = {4, 2, 1, 3, 
-                    0, 5, 2, 1, 
-                    1, 0, 3, 2, 
+    double A[16] = {4, 2, 1, 3,
+                    0, 5, 2, 1,
+                    1, 0, 3, 2,
                     2, 1, 0, 4};
-                    
+
     double b[4] = {23.0, 20.0, 18.0, 20.0};
-    
+
     double expected_solution[4] = {1.0, 2.0, 3.0, 4.0};
 
     bool success = MathExtraSuperellipsoids::solve_4x4_robust_unrolled(A, b);
 
     ASSERT_TRUE(success) << "The solver falsely flagged an invertible matrix as singular.";
-    
+
     for (int i = 0; i < 4; ++i) {
         ASSERT_NEAR(b[i], expected_solution[i], SOLV_EPSILON) << "Failed at index " << i;
     }
@@ -274,4 +274,4 @@ TEST(ContactPointAndNormal, supersphere_poly_geometric)
 // TODO: supersphere_mono with grains overlapping
 // TODO: supersphere_poly with grains overlapping
 // TODO: more
-// for polydisperse solution should be at the radii ratio
\ No newline at end of file
+// for polydisperse solution should be at the radii ratio

From 782bae9ecaf475d3546f2e7957278f7a21ff90f1 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 23 Mar 2026 17:46:50 -0400
Subject: [PATCH 166/174] emulate std::beta() for improved portability

---
 src/atom_vec_ellipsoid.cpp | 10 +++++-----
 src/math_extra.cpp         |  4 ++--
 src/math_extra.h           | 13 +++++++++++--
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp
index f662387fd8b..b806b0d9c25 100644
--- a/src/atom_vec_ellipsoid.cpp
+++ b/src/atom_vec_ellipsoid.cpp
@@ -1069,13 +1069,13 @@ void AtomVecEllipsoid::inertia_ellipsoid_principal(double *shape, double mass, d
   } else {
     // superellipsoid, Eq. (12) of Jaklic and Solina, 2003
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    double beta_tmp1 = std::beta(0.5 * e1, 1 + 2 * e1);
-    double beta_tmp2 = std::beta(0.5 * e2, 0.5 * e2);
-    double beta_tmp3 = std::beta(0.5 * e2, 1.5 * e2);
-    double dens = mass / (std::beta(0.5 * e1, 1.0 + e1) * beta_tmp2);
+    double beta_tmp1 = MathExtra::beta(0.5 * e1, 1 + 2 * e1);
+    double beta_tmp2 = MathExtra::beta(0.5 * e2, 0.5 * e2);
+    double beta_tmp3 = MathExtra::beta(0.5 * e2, 1.5 * e2);
+    double dens = mass / (MathExtra::beta(0.5 * e1, 1.0 + e1) * beta_tmp2);
     double m0 = 0.5 * rsq0 * beta_tmp1 * beta_tmp3;
     double m1 = 0.5 * rsq1 * beta_tmp1 * beta_tmp3;
-    double m2 = rsq2 * std::beta(1.5 * e1, 1 + e1) * beta_tmp2;
+    double m2 = rsq2 * MathExtra::beta(1.5 * e1, 1 + e1) * beta_tmp2;
     idiag[0] = dens * (m1 + m2);
     idiag[1] = dens * (m0 + m2);
     idiag[2] = dens * (m0 + m1);
diff --git a/src/math_extra.cpp b/src/math_extra.cpp
index a58df62eda0..4e5c5363ed4 100644
--- a/src/math_extra.cpp
+++ b/src/math_extra.cpp
@@ -655,8 +655,8 @@ double volume_ellipsoid(double *shape, double *block, int flag_super)
 
   if (flag_super) {
     double e1 = 2.0 / block[0], e2 = 2.0 / block[1];
-    unitvol = e1 * e2 * std::beta(0.5 * e1, 1.0 + e1) *
-                        std::beta(0.5 * e2, 0.5 * e2);
+    unitvol = e1 * e2 * beta(0.5 * e1, 1.0 + e1) *
+                        beta(0.5 * e2, 0.5 * e2);
   }
   return unitvol * shape[0] * shape[1] * shape[2];
 }
diff --git a/src/math_extra.h b/src/math_extra.h
index 1da51386457..aeb5d29b1cc 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -88,7 +88,8 @@ inline void multiply_shape_shape(const double *one, const double *two, double *a
 // quaternion operations
 
 inline void qnormalize(double *q);
-inline void qconjugate(double *q, double *qc); // would it be better to have q passed as const double?
+inline void qconjugate(double *q,
+                       double *qc);    // would it be better to have q passed as const double?
 inline void vecquat(double *a, double *b, double *c);
 inline void quatvec(double *a, double *b, double *c);
 inline void quatquat(double *a, double *b, double *c);
@@ -116,7 +117,7 @@ void BuildRyMatrix(double R[3][3], const double angle);
 void BuildRzMatrix(double R[3][3], const double angle);
 
 // moment of inertia operations
-void inertia_ellipsoid(double *idiag, double *quat, double *inertia); //superellipsoid version
+void inertia_ellipsoid(double *idiag, double *quat, double *inertia);    //superellipsoid version
 void inertia_ellipsoid(double *shape, double *quat, double mass, double *inertia);
 void inertia_line(double length, double theta, double mass, double *inertia);
 void inertia_triangle(double *v0, double *v1, double *v2, double mass, double *inertia);
@@ -130,6 +131,9 @@ double volume_ellipsoid(double *shape, double *block, int flag_super);
 
 void tribbox(double *, double, double *);
 
+// alternative to std::beta
+double beta(double x, double y);
+
 }    // namespace MathExtra
 
 /* ----------------------------------------------------------------------
@@ -842,4 +846,9 @@ inline void MathExtra::outer3(const double *v1, const double *v2, double ans[3][
   ans[2][2] = v1[2] * v2[2];
 }
 
+inline double MathExtra::beta(double x, double y)
+{
+  return std::tgamma(x) * std::tgamma(y) / std::tgamma(x + y);
+}
+
 #endif

From e928050b2c403361a19912790e92451cd95df65a Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 24 Mar 2026 09:43:41 +0100
Subject: [PATCH 167/174] Fixed multiple bugs as reported from Copilot. Fixed
 uninitialized memory for superellipsoid flag in atom.cpp

---
 doc/src/Howto_granular.rst                    |  4 ++--
 doc/src/Howto_spherical.rst                   |  5 ++---
 doc/src/pair_granular_superellipsoid.rst      |  4 ++--
 doc/src/pair_style.rst                        |  3 +--
 src/ASPHERE/math_extra_superellipsoids.cpp    |  4 ++--
 src/ASPHERE/math_extra_superellipsoids.h      |  2 +-
 src/RIGID/fix_rigid_small.cpp                 |  2 +-
 src/atom.cpp                                  |  2 +-
 src/atom_vec_ellipsoid.h                      |  2 +-
 src/compute_property_atom.cpp                 | 20 ++++++++---------
 src/math_special.h                            |  2 +-
 .../utils/test_math_extra_superellipsoids.cpp | 22 +++++++++----------
 12 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/doc/src/Howto_granular.rst b/doc/src/Howto_granular.rst
index e8c31c3adac..924449274da 100644
--- a/doc/src/Howto_granular.rst
+++ b/doc/src/Howto_granular.rst
@@ -1,7 +1,7 @@
 Granular models
 ===============
 
-Granular system are typically composed of spherical particles with a diameter,
+Granular systems are typically composed of spherical particles with a diameter,
 as opposed to point particles.  This means they have an angular
 velocity and torque can be imparted to them to cause them to rotate.
 
@@ -14,7 +14,7 @@ the following commands:
 
 Aspherical granular particles can be simulated by creating clusters of spherical
 particles using either the :doc:`rigid <fix_rigid>` or :doc:`BPM <Howto_bpm>`
-package or by using :doc:`superellipsoids <pair_granular_superellipsoids>`.
+package or by using :doc:`superellipsoids <pair_granular_superellipsoid>`.
 
 This compute
 
diff --git a/doc/src/Howto_spherical.rst b/doc/src/Howto_spherical.rst
index 162c3e148f5..bdbd7cc4de9 100644
--- a/doc/src/Howto_spherical.rst
+++ b/doc/src/Howto_spherical.rst
@@ -120,8 +120,7 @@ such interactions.  These are the various :doc:`pair styles <pair_style>` that g
 * :doc:`pair_style gran/hooke <pair_gran>`
 * :doc:`pair_style gran/hooke/history <pair_gran>`
 * :doc:`pair_style gran/hertz/history <pair_gran>`
-* :doc:`pair_style gran/hooke/history/ellipsoid <pair_gran_ellipsoid>`
-* :doc:`pair_style gran/hertz/history/ellipsoid <pair_gran_ellipsoid>`
+* :doc:`pair_style granular/superellipsoid <pair_granular_superellipsoid>`
 * :doc:`pair_style dipole/cut <pair_dipole>`
 * :doc:`pair_style gayberne <pair_gayberne>`
 * :doc:`pair_style resquared <pair_resquared>`
@@ -132,7 +131,7 @@ such interactions.  These are the various :doc:`pair styles <pair_style>` that g
 * :doc:`pair_style body/nparticle <pair_body_nparticle>`
 
 The granular pair styles are used with spherical particles.  The
-*gran/ellipsoid* granular pair styles are used with superellipsoid particles.
+*granular/superellipsoid* granular pair styles are used with superellipsoid particles.
 The dipole pair style is used with the dipole atom style, which could be
 applied to spherical or ellipsoidal particles.  The GayBerne and
 REsquared potentials require ellipsoidal particles, though they will
diff --git a/doc/src/pair_granular_superellipsoid.rst b/doc/src/pair_granular_superellipsoid.rst
index 5902c558e75..4cb5c462789 100644
--- a/doc/src/pair_granular_superellipsoid.rst
+++ b/doc/src/pair_granular_superellipsoid.rst
@@ -1,6 +1,6 @@
-.. index:: pair_style granular
+.. index:: pair_style granular/superellipsoid
 
-pair_style granular command
+pair_style granular/superellipsoid command
 ===========================
 
 Syntax
diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst
index aaccc64dcd0..fdc54304cea 100644
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@@ -201,11 +201,10 @@ accelerated styles exist.
 * :doc:`gauss/cut <pair_gauss>` - generalized Gaussian potential
 * :doc:`gayberne <pair_gayberne>` - Gay-Berne ellipsoidal potential
 * :doc:`granular <pair_granular>` - Generalized granular potential
+* :doc:`granular/superellipsoid <pair_granular_superellipsoid>` - Generalized granular potential for superellipsoids
 * :doc:`gran/hertz/history <pair_gran>` - granular potential with Hertzian interactions
-* :doc:`gran/hertz/history/ellipsoid <pair_gran_ellipsoid>` - granular potential with Hertzian interactions for superellipsoids
 * :doc:`gran/hooke <pair_gran>` - granular potential without history effects
 * :doc:`gran/hooke/history <pair_gran>` - granular potential with history effects
-* :doc:`gran/hooke/history/ellipsoid <pair_gran_ellipsoid>` - granular potential for superellipsoids with history effects
 * :doc:`gw <pair_gw>` - Gao-Weber potential
 * :doc:`gw/zbl <pair_gw>` - Gao-Weber potential with a repulsive ZBL core
 * :doc:`harmonic/cut <pair_harmonic_cut>` - repulsive-only harmonic potential
diff --git a/src/ASPHERE/math_extra_superellipsoids.cpp b/src/ASPHERE/math_extra_superellipsoids.cpp
index 1962d1951ae..d32a396fa2f 100644
--- a/src/ASPHERE/math_extra_superellipsoids.cpp
+++ b/src/ASPHERE/math_extra_superellipsoids.cpp
@@ -439,9 +439,9 @@ int determine_contact_point(const double* xci, const double Ri[3][3], const doub
     // TODO: might use a simpler function to simply compute the gradient, to
     // avoid computing quantities already computed in compute_residual_and_jacobian
     if (flagi <= 1)
-      val_dummy = shape_and_gradient_local_n1equaln2_surfacesearch(xilocal, shapei, blocki[0], gradi);
+      val_dummy = shape_and_gradient_local_n1equaln2_surfacesearch(xilocal, shapei, blocki[0], tmp_v);
     else
-      val_dummy = shape_and_gradient_local_superquad_surfacesearch(xilocal, shapei, blocki, gradi);
+      val_dummy = shape_and_gradient_local_superquad_surfacesearch(xilocal, shapei, blocki, tmp_v);
 
     // Rotate gradient back to global frame to get normal
     MathExtra::matvec(Ri, tmp_v, gradi);
diff --git a/src/ASPHERE/math_extra_superellipsoids.h b/src/ASPHERE/math_extra_superellipsoids.h
index 1c6b7387be7..375f7c40ccd 100644
--- a/src/ASPHERE/math_extra_superellipsoids.h
+++ b/src/ASPHERE/math_extra_superellipsoids.h
@@ -469,7 +469,7 @@ inline bool MathExtraSuperellipsoids::check_intersection_axis_and_get_seed(
   MathExtra::transpose_matvec(R2, center_distance, center_distance_box2);
 
   int best_axis = -1;
-  double min_overlap = 0.0;
+  double min_overlap = std::numeric_limits<double>::max();
   const double edge_bias = 1.05;    // Prefer face contacts over edge contacts
 
   // Lambda to test an axis. Returns TRUE if SEPARATED.
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index 82eccf1de6b..01f543f77b6 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -582,7 +582,7 @@ void FixRigidSmall::init()
 
   // error for not supported superellipsoids
 
-  if (atom->superellipsoid_flag) error->all(FLERR,"Superellipsoids not supported in fix rigid");
+  if (atom->superellipsoid_flag) error->all(FLERR,"Superellipsoids not supported in fix rigid/small");
 
   // timestep info
 
diff --git a/src/atom.cpp b/src/atom.cpp
index fec59cd5d6c..f372402d848 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -645,7 +645,7 @@ void Atom::set_atomflag_defaults()
   // identical list as 2nd customization in atom.h
 
   labelmapflag = 0;
-  ellipsoid_flag = line_flag = tri_flag = body_flag = 0;
+  ellipsoid_flag = line_flag = tri_flag = body_flag = superellipsoid_flag = 0;
   quat_flag = 0;
   peri_flag = electron_flag = sph_flag = 0;
   molecule_flag = molindex_flag = molatom_flag = 0;
diff --git a/src/atom_vec_ellipsoid.h b/src/atom_vec_ellipsoid.h
index 43e9416041c..c9a6435b858 100644
--- a/src/atom_vec_ellipsoid.h
+++ b/src/atom_vec_ellipsoid.h
@@ -29,7 +29,7 @@ class AtomVecEllipsoid : virtual public AtomVec {
   enum BlockType {
     ELLIPSOID = 0, // n1 = n2 = 2
     N1_EQUAL_N2 = 1, // n1 = n2 > 2
-    GENERAL = 2, // n2 != n2 > 2
+    GENERAL = 2, // n1 != n2, n1 > 2, n2 > 2
   };
   struct Bonus {
     double shape[3];
diff --git a/src/compute_property_atom.cpp b/src/compute_property_atom.cpp
index 5084bd7a783..e7a0c6292af 100644
--- a/src/compute_property_atom.cpp
+++ b/src/compute_property_atom.cpp
@@ -1426,7 +1426,7 @@ void ComputePropertyAtom::pack_block1(int n)
   for (int i = 0; i < nlocal; i++) {
     if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
       buf[n] = bonus[ellipsoid[i]].block[0];
-    else buf[n] = 1.0;
+    else buf[n] = 2.0;
     n += nvalues;
   }
 }
@@ -1442,7 +1442,7 @@ void ComputePropertyAtom::pack_block2(int n)
   for (int i = 0; i < nlocal; i++) {
     if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
       buf[n] = bonus[ellipsoid[i]].block[1];
-    else buf[n] = 1.0;
+    else buf[n] = 2.0;
     n += nvalues;
   }
 }
@@ -1536,7 +1536,7 @@ void ComputePropertyAtom::pack_quatw(int n)
     for (int i = 0; i < nlocal; i++) {
       if ((mask[i] & groupbit) && body[i] >= 0)
         buf[n] = bonus[body[i]].quat[0];
-      else buf[n] = 0.0;
+      else buf[n] = 1.0;
       n += nvalues;
     }
   } else {
@@ -1547,7 +1547,7 @@ void ComputePropertyAtom::pack_quatw(int n)
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit)
         buf[n] = quat[i][0];
-      else buf[n] = 0.0;
+      else buf[n] = 1.0;
       n += nvalues;
     }
   }
@@ -1568,7 +1568,7 @@ void ComputePropertyAtom::pack_quati(int n)
       for (int i = 0; i < nlocal; i++) {
         if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
           buf[n] = bonus[ellipsoid[i]].quat[1];
-        else buf[n] = 1.0;
+        else buf[n] = 0.0;
         n += nvalues;
       }
     } else {
@@ -1576,7 +1576,7 @@ void ComputePropertyAtom::pack_quati(int n)
       for (int i = 0; i < nlocal; i++) {
         if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
           buf[n] = bonus[ellipsoid[i]].quat[1];
-        else buf[n] = 1.0;
+        else buf[n] = 0.0;
         n += nvalues;
       }
     }
@@ -1622,7 +1622,7 @@ void ComputePropertyAtom::pack_quatj(int n)
       for (int i = 0; i < nlocal; i++) {
         if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
           buf[n] = bonus[ellipsoid[i]].quat[2];
-        else buf[n] = 1.0;
+        else buf[n] = 0.0;
         n += nvalues;
       }
     } else {
@@ -1630,7 +1630,7 @@ void ComputePropertyAtom::pack_quatj(int n)
       for (int i = 0; i < nlocal; i++) {
         if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
           buf[n] = bonus[ellipsoid[i]].quat[2];
-        else buf[n] = 1.0;
+        else buf[n] = 0.0;
         n += nvalues;
       }
     }
@@ -1676,7 +1676,7 @@ void ComputePropertyAtom::pack_quatk(int n)
       for (int i = 0; i < nlocal; i++) {
         if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
           buf[n] = bonus[ellipsoid[i]].quat[3];
-        else buf[n] = 1.0;
+        else buf[n] = 0.0;
         n += nvalues;
       }
     } else {
@@ -1684,7 +1684,7 @@ void ComputePropertyAtom::pack_quatk(int n)
       for (int i = 0; i < nlocal; i++) {
         if ((mask[i] & groupbit) && ellipsoid[i] >= 0)
           buf[n] = bonus[ellipsoid[i]].quat[3];
-        else buf[n] = 1.0;
+        else buf[n] = 0.0;
         n += nvalues;
       }
     }
diff --git a/src/math_special.h b/src/math_special.h
index 077d2b4c65d..3cce45538fa 100644
--- a/src/math_special.h
+++ b/src/math_special.h
@@ -184,7 +184,7 @@ namespace LAMMPS_NS::MathSpecial {
 
     return yy;
   }
-}    // namespace MathSpecial
+}    // namespace LAMMPS_NS::MathSpecial
 
 
 #endif
diff --git a/unittest/utils/test_math_extra_superellipsoids.cpp b/unittest/utils/test_math_extra_superellipsoids.cpp
index aa2c6119162..32dbba88b4f 100644
--- a/unittest/utils/test_math_extra_superellipsoids.cpp
+++ b/unittest/utils/test_math_extra_superellipsoids.cpp
@@ -16,19 +16,16 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <cmath>
-#include <vector>
 #include <limits>
+#include <vector>
 // TODO: consider making a fixture with several setup functions?
 
-static constexpr double EPSILON = 1e-4;
+static constexpr double EPSILON      = 1e-4;
 static constexpr double SOLV_EPSILON = std::numeric_limits<double>::epsilon() * 100;
 
 TEST(HandwrittenSolver, invertible)
 {
-    double A[16] = {4, 2, 1, 3,
-                    0, 5, 2, 1,
-                    1, 0, 3, 2,
-                    2, 1, 0, 4};
+    double A[16] = {4, 2, 1, 3, 0, 5, 2, 1, 1, 0, 3, 2, 2, 1, 0, 4};
 
     double b[4] = {23.0, 20.0, 18.0, 20.0};
 
@@ -43,7 +40,6 @@ TEST(HandwrittenSolver, invertible)
     }
 }
 
-
 TEST(ContactPointAndNormal, sphere)
 {
     // First grain
@@ -140,8 +136,9 @@ TEST(ContactPointAndNormal, supersphere_mono)
         int status = MathExtraSuperellipsoids::determine_contact_point(
             xci, R, shape, block, flag, xcj, R, shape, block, flag, X0, nij, method);
 
-        std::cout << n << " " << status << " " << X0[0] << " " << X0[1] << " " << X0[2] << " "
-                  << X0[3] << std::endl;
+        ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON)
+            << "Method: " << method << " | n: " << n << " | status: " << status << " | X0: ["
+            << X0[0] << ", " << X0[1] << ", " << X0[2] << ", " << X0[3] << "]";
         ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON) << "Method: " << method;
         ASSERT_NEAR(X0[1], X0_analytical[1], EPSILON) << "Method: " << method;
         ASSERT_NEAR(X0[2], X0_analytical[2], EPSILON) << "Method: " << method;
@@ -252,11 +249,12 @@ TEST(ContactPointAndNormal, supersphere_poly_geometric)
 
         // Initial Guess: Offset from 0 to test convergence
         double X0[4] = {overlap, overlap, overlap, 1.0 / 2.0}, nij[3];
-        int status = MathExtraSuperellipsoids::determine_contact_point(
+        int status   = MathExtraSuperellipsoids::determine_contact_point(
             xci, R, shapei, block, flag, xcj, R, shapej, block, flag, X0, nij, method);
 
-        std::cout << "n=" << n << " Status=" << status << " Res: " << X0[0] << " " << X0[1] << " "
-                  << X0[2] << " mu=" << X0[3] << std::endl;
+        ASSERT_NEAR(X0[0], X0_analytical[0], EPSILON)
+            << "Method: " << method << " | n: " << n << " | status: " << status << " | X0: ["
+            << X0[0] << ", " << X0[1] << ", " << X0[2] << ", " << X0[3] << "]";
 
         ASSERT_EQ(status, 0) << "Failed to converge/detect contact for n=" << n;
 

From 80a60be67f8146fc0dcc4d6a3b8b32beb41e8511 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 24 Mar 2026 10:05:54 +0100
Subject: [PATCH 168/174] Swapped tgamma for lgamma to prevent overflows

---
 src/math_extra.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/math_extra.h b/src/math_extra.h
index aeb5d29b1cc..8b5373b8a57 100644
--- a/src/math_extra.h
+++ b/src/math_extra.h
@@ -848,7 +848,7 @@ inline void MathExtra::outer3(const double *v1, const double *v2, double ans[3][
 
 inline double MathExtra::beta(double x, double y)
 {
-  return std::tgamma(x) * std::tgamma(y) / std::tgamma(x + y);
+  return std::exp(std::lgamma(x) + std::lgamma(y) - std::lgamma(x + y));
 }
 
 #endif

From 267628dfd459892a25b72e3431976badb6b7218e Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 24 Mar 2026 10:33:15 +0100
Subject: [PATCH 169/174] Added fix to Commands_pair.rst

---
 doc/src/Commands_pair.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index 5672e54adad..90f2110dfd8 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -109,11 +109,10 @@ OPT.
    * :doc:`gauss/cut (o) <pair_gauss>`
    * :doc:`gayberne (gio) <pair_gayberne>`
    * :doc:`gran/hertz/history (o) <pair_gran>`
-   * :doc:`gran/hertz/history/ellipsoid <pair_gran_ellipsoid>`
    * :doc:`gran/hooke (o) <pair_gran>`
    * :doc:`gran/hooke/history (ko) <pair_gran>`
-   * :doc:`gran/hooke/history/ellipsoid <pair_gran_ellipsoid>`
    * :doc:`granular <pair_granular>`
+   * :doc:`granular/superellipsoid <pair_granular_superellipsoid>`
    * :doc:`gw <pair_gw>`
    * :doc:`gw/zbl <pair_gw>`
    * :doc:`harmonic/cut (o) <pair_harmonic_cut>`

From f24ede51c89bb5bab4df3719d7745e6c807c0aa7 Mon Sep 17 00:00:00 2001
From: JBil8 <jacopo.bilotto@epfl.ch>
Date: Tue, 24 Mar 2026 11:39:29 +0100
Subject: [PATCH 170/174] Fixed version changes and additions in the doc

---
 doc/src/Packages_details.rst             |  5 ++--
 doc/src/atom_style.rst                   | 14 +++++++---
 doc/src/compute_property_atom.rst        | 20 ++++++++------
 doc/src/pair_granular_superellipsoid.rst |  2 ++
 doc/src/read_data.rst                    | 16 ++++++-----
 doc/src/set.rst                          | 34 +++++++++++++-----------
 6 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index d0dcdbe4be6..6d6ef08c870 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -253,8 +253,8 @@ ASPHERE package
 
 **Contents:**
 
-Computes, time-integration fixes, and pair styles for aspherical
-particle models including ellipsoids, 2d lines, and 3d triangles.
+Computes, time-integration fixes, and pair styles for aspherical particle models
+including ellipsoids, granular superellipsoids, 2d lines, and 3d triangles.
 
 **Supporting info:**
 
@@ -265,6 +265,7 @@ particle models including ellipsoids, 2d lines, and 3d triangles.
 * :doc:`pair_style ylz <pair_ylz>`
 * :doc:`pair_style line/lj <pair_line_lj>`
 * :doc:`pair_style tri/lj <pair_tri_lj>`
+* :doc:`pair_style granular/superellipsoid <pair_granular_superellipsoid>`
 * `doc/PDF/pair_gayberne_extra.pdf <PDF/pair_gayberne_extra.pdf>`_
 * `doc/PDF/pair_resquared_extra.pdf <PDF/pair_resquared_extra.pdf>`_
 * ``examples/ASPHERE``
diff --git a/doc/src/atom_style.rst b/doc/src/atom_style.rst
index ca3bfac7942..7f2240bb25d 100644
--- a/doc/src/atom_style.rst
+++ b/doc/src/atom_style.rst
@@ -354,10 +354,16 @@ stores a shape vector with the 3 diameters of the ellipsoid and a
 quaternion 4-vector with its orientation.  Each particle stores a flag
 in the ellipsoid vector which indicates whether it is an ellipsoid (1)
 or a point particle (0).
-By adding the flag *superellipsoid* to the atom_style command, the particles
-can be superellipsoids, which are a generalization of ellipsoids with two
-additional blockiness parameters that control the shape. Superellipsoids
-also store the principal moments of inertia of the particle.
+
+.. versionchanged:: TBD
+
+  Added *superellipsoid* flag to support blockiness parameters and principal
+  moments of inertia.
+  
+  By adding the flag *superellipsoid* to the atom_style command, the particles
+  can be superellipsoids, which are a generalization of ellipsoids with two
+  additional blockiness parameters that control the shape. Superellipsoids
+  also store the principal moments of inertia of the particle.
 
 For the *line* style, particles can be are idealized line segments
 which store a per-particle mass and length and orientation (i.e. the
diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst
index 73389c5e690..adcc2501143 100644
--- a/doc/src/compute_property_atom.rst
+++ b/doc/src/compute_property_atom.rst
@@ -167,14 +167,18 @@ If :doc:`newton bond off <newton>` is set, it will be tallied with both atom
 The quantities *shapex*, *shapey*, and *shapez* are defined for ellipsoidal
 particles and define the 3d shape of each particle.
 
-The quantities *block1*, and *block2*, are defined for superellipsoidal
-particles and define the blockiness of each superellipsoid particle.
-See the :doc:`set <set>` command for an explanation of the blockiness.
-
-The quantities *inertiax*, *inertiay*, and *inertiaz* are defined for superellipsoidal
-particles and define the 3 principal moments of inertia of each particle.
-These are with respect to the particle's center of mass and in a reference system
-aligned with the particle's principal axes.
+.. versionchanged:: TBD
+
+   Added *block1*, *block2*, and *inertiax*, *inertiay*, *inertiaz* keywords for superellipsoids.
+
+   The quantities *block1*, and *block2*, are defined for superellipsoidal
+   particles and define the blockiness of each superellipsoid particle.
+   See the :doc:`set <set>` command for an explanation of the blockiness.
+
+   The quantities *inertiax*, *inertiay*, and *inertiaz* are defined for superellipsoidal
+   particles and define the 3 principal moments of inertia of each particle.
+   These are with respect to the particle's center of mass and in a reference system
+   aligned with the particle's principal axes.
 
 The quantities *quatw*, *quati*, *quatj*, and *quatk* are defined for
 ellipsoidal particles and body particles and store the 4-vector quaternion
diff --git a/doc/src/pair_granular_superellipsoid.rst b/doc/src/pair_granular_superellipsoid.rst
index 4cb5c462789..20f655e85d2 100644
--- a/doc/src/pair_granular_superellipsoid.rst
+++ b/doc/src/pair_granular_superellipsoid.rst
@@ -3,6 +3,8 @@
 pair_style granular/superellipsoid command
 ===========================
 
+.. versionadded:: TBD
+
 Syntax
 """"""
 
diff --git a/doc/src/read_data.rst b/doc/src/read_data.rst
index 92ef8df8ece..94e5ffcdee2 100644
--- a/doc/src/read_data.rst
+++ b/doc/src/read_data.rst
@@ -1365,12 +1365,16 @@ the quaternion that represents its new orientation is given by
 LAMMPS normalizes each atom's quaternion in case (a,b,c) is not
 specified as a unit vector.
 
-The blockiness values *block1*, *block2* generalize the geometry to a super
-ellipsoid for use in granular simulations. Sections through the center and
-parallel to the z-axis are superellipses with squareness *block1* and sections
-in the x-y plane are superellipses with squareness *block2*.  These parameters
-are optional and default to a value of 2, recovering ellipsoid geometry.
-When specified, both values must be greater than or equal to 2.
+.. versionchanged:: TBD
+
+  Added *block1* and *block2* parameters for superellipsoid geometry.
+
+  The blockiness values *block1*, *block2* generalize the geometry to a super
+  ellipsoid for use in granular simulations. Sections through the center and
+  parallel to the z-axis are superellipses with squareness *block1* and sections
+  in the x-y plane are superellipses with squareness *block2*.  These parameters
+  are optional and default to a value of 2, recovering ellipsoid geometry.
+  When specified, both values must be greater than or equal to 2.
 
 If the data file defines a general triclinic box, then the quaternion
 for each ellipsoid should be specified for its orientation relative to
diff --git a/doc/src/set.rst b/doc/src/set.rst
index a45a39a6ab2..7667b7176d9 100644
--- a/doc/src/set.rst
+++ b/doc/src/set.rst
@@ -541,21 +541,25 @@ other. Note that the SPH smoothing kernel diameter used for computing
 long range, nonlocal interactions, is set using the *diameter*
 keyword.
 
-Keyword *block* sets the blockiness of the selected atoms.  The
-particles must be ellipsoids as defined by the :doc:`atom_style
-ellipsoid <atom_style>` command.  This command is used to define
-superellipsoid particle shapes for use in granular simulations.
-The *block1*, *block2* settings are the 2 exponents of the superellipsoid
-in the vertical and horizontal directions.  Vertical sections through the
-center are superellipses with squareness *block1* and horizontal sections
-are superellipses with squareness *block2*.  If both parameters are set to
-a value of 2 (the default), the atom is a regular ellipsoid.  The keyword
-*block* should be used together with the keyword *shape* to give the particle
-the desired shape.  If the keyword *block* is given alone, and the *shape* has
-not been defined, e.g., in a previous *set* command, the 3 diameters would be
-set to a value of 1 internally.  Note that this command does not
-adjust the particle mass, even if it was defined with a density,
-e.g. via the :doc:`read_data <read_data>` command.
+.. versionchanged:: TBD
+
+  Added *block* keyword to set blockiness parameters for superellipsoids.
+
+  Keyword *block* sets the blockiness of the selected atoms.  The
+  particles must be ellipsoids as defined by the :doc:`atom_style
+  ellipsoid <atom_style>` command.  This command is used to define
+  superellipsoid particle shapes for use in granular simulations.
+  The *block1*, *block2* settings are the 2 exponents of the superellipsoid
+  in the vertical and horizontal directions.  Vertical sections through the
+  center are superellipses with squareness *block1* and horizontal sections
+  are superellipses with squareness *block2*.  If both parameters are set to
+  a value of 2 (the default), the atom is a regular ellipsoid.  The keyword
+  *block* should be used together with the keyword *shape* to give the particle
+  the desired shape.  If the keyword *block* is given alone, and the *shape* has
+  not been defined, e.g., in a previous *set* command, the 3 diameters would be
+  set to a value of 1 internally.  Note that this command does not
+  adjust the particle mass, even if it was defined with a density,
+  e.g. via the :doc:`read_data <read_data>` command.
 
 
 Keyword *smd/mass/density* sets the mass of all selected particles,

From 2468c350f5ddb94f657a4afb67bac20c92c2fc4a Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 24 Mar 2026 06:49:20 -0400
Subject: [PATCH 171/174] correct version tags for new features

---
 doc/src/atom_style.rst                   | 14 ++++-----
 doc/src/compute_property_atom.rst        | 19 +++++++------
 doc/src/pair_granular_superellipsoid.rst |  4 +--
 doc/src/read_data.rst                    | 19 ++++++-------
 doc/src/set.rst                          | 36 +++++++++++-------------
 5 files changed, 44 insertions(+), 48 deletions(-)

diff --git a/doc/src/atom_style.rst b/doc/src/atom_style.rst
index 7f2240bb25d..ca367cc0d9d 100644
--- a/doc/src/atom_style.rst
+++ b/doc/src/atom_style.rst
@@ -355,15 +355,13 @@ quaternion 4-vector with its orientation.  Each particle stores a flag
 in the ellipsoid vector which indicates whether it is an ellipsoid (1)
 or a point particle (0).
 
-.. versionchanged:: TBD
+.. versionadded:: TBD
 
-  Added *superellipsoid* flag to support blockiness parameters and principal
-  moments of inertia.
-  
-  By adding the flag *superellipsoid* to the atom_style command, the particles
-  can be superellipsoids, which are a generalization of ellipsoids with two
-  additional blockiness parameters that control the shape. Superellipsoids
-  also store the principal moments of inertia of the particle.
+By adding the flag *superellipsoid* to the *ellipsoid* atom_style
+command, the particles can be superellipsoids, which are a
+generalization of ellipsoids with two additional blockiness parameters
+that control the shape.  Superellipsoids also store the principal
+moments of inertia of the particle.
 
 For the *line* style, particles can be are idealized line segments
 which store a per-particle mass and length and orientation (i.e. the
diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst
index adcc2501143..79d3932b6cc 100644
--- a/doc/src/compute_property_atom.rst
+++ b/doc/src/compute_property_atom.rst
@@ -167,18 +167,19 @@ If :doc:`newton bond off <newton>` is set, it will be tallied with both atom
 The quantities *shapex*, *shapey*, and *shapez* are defined for ellipsoidal
 particles and define the 3d shape of each particle.
 
-.. versionchanged:: TBD
+.. versionadded:: TBD
 
-   Added *block1*, *block2*, and *inertiax*, *inertiay*, *inertiaz* keywords for superellipsoids.
+The quantities *block1*, and *block2*, are defined for superellipsoidal
+particles and define the blockiness of each superellipsoid particle.
+See the :doc:`set <set>` command for an explanation of the blockiness.
 
-   The quantities *block1*, and *block2*, are defined for superellipsoidal
-   particles and define the blockiness of each superellipsoid particle.
-   See the :doc:`set <set>` command for an explanation of the blockiness.
+.. versionadded:: TBD
 
-   The quantities *inertiax*, *inertiay*, and *inertiaz* are defined for superellipsoidal
-   particles and define the 3 principal moments of inertia of each particle.
-   These are with respect to the particle's center of mass and in a reference system
-   aligned with the particle's principal axes.
+The quantities *inertiax*, *inertiay*, and *inertiaz* are defined for
+superellipsoidal particles and define the 3 principal moments of inertia
+of each particle.  These are with respect to the particle's center of
+mass and in a reference system aligned with the particle's principal
+axes.
 
 The quantities *quatw*, *quati*, *quatj*, and *quatk* are defined for
 ellipsoidal particles and body particles and store the 4-vector quaternion
diff --git a/doc/src/pair_granular_superellipsoid.rst b/doc/src/pair_granular_superellipsoid.rst
index 20f655e85d2..4ede2c07355 100644
--- a/doc/src/pair_granular_superellipsoid.rst
+++ b/doc/src/pair_granular_superellipsoid.rst
@@ -3,8 +3,6 @@
 pair_style granular/superellipsoid command
 ===========================
 
-.. versionadded:: TBD
-
 Syntax
 """"""
 
@@ -32,6 +30,8 @@ Examples
 Description
 """""""""""
 
+.. versionadded:: TBD
+
 The *granular/superellipsoid* style calculates granular contact forces
 between superellipsoidal particles (see :doc:`atom style ellipsoid
 <atom_style>`). Similar to the :doc:`granular pairstyle <pair_granular>`
diff --git a/doc/src/read_data.rst b/doc/src/read_data.rst
index 94e5ffcdee2..645d27a75f3 100644
--- a/doc/src/read_data.rst
+++ b/doc/src/read_data.rst
@@ -1365,16 +1365,15 @@ the quaternion that represents its new orientation is given by
 LAMMPS normalizes each atom's quaternion in case (a,b,c) is not
 specified as a unit vector.
 
-.. versionchanged:: TBD
-
-  Added *block1* and *block2* parameters for superellipsoid geometry.
-
-  The blockiness values *block1*, *block2* generalize the geometry to a super
-  ellipsoid for use in granular simulations. Sections through the center and
-  parallel to the z-axis are superellipses with squareness *block1* and sections
-  in the x-y plane are superellipses with squareness *block2*.  These parameters
-  are optional and default to a value of 2, recovering ellipsoid geometry.
-  When specified, both values must be greater than or equal to 2.
+.. versionadded:: TBD
+
+The blockiness values *block1*, *block2* generalize the geometry to a
+super ellipsoid for use in granular simulations. Sections through the
+center and parallel to the z-axis are superellipses with squareness
+*block1* and sections in the x-y plane are superellipses with squareness
+*block2*.  These parameters are optional and default to a value of 2,
+recovering ellipsoid geometry.  When specified, both values must be
+greater than or equal to 2.
 
 If the data file defines a general triclinic box, then the quaternion
 for each ellipsoid should be specified for its orientation relative to
diff --git a/doc/src/set.rst b/doc/src/set.rst
index 7667b7176d9..6af2b15a93d 100644
--- a/doc/src/set.rst
+++ b/doc/src/set.rst
@@ -541,26 +541,24 @@ other. Note that the SPH smoothing kernel diameter used for computing
 long range, nonlocal interactions, is set using the *diameter*
 keyword.
 
-.. versionchanged:: TBD
-
-  Added *block* keyword to set blockiness parameters for superellipsoids.
-
-  Keyword *block* sets the blockiness of the selected atoms.  The
-  particles must be ellipsoids as defined by the :doc:`atom_style
-  ellipsoid <atom_style>` command.  This command is used to define
-  superellipsoid particle shapes for use in granular simulations.
-  The *block1*, *block2* settings are the 2 exponents of the superellipsoid
-  in the vertical and horizontal directions.  Vertical sections through the
-  center are superellipses with squareness *block1* and horizontal sections
-  are superellipses with squareness *block2*.  If both parameters are set to
-  a value of 2 (the default), the atom is a regular ellipsoid.  The keyword
-  *block* should be used together with the keyword *shape* to give the particle
-  the desired shape.  If the keyword *block* is given alone, and the *shape* has
-  not been defined, e.g., in a previous *set* command, the 3 diameters would be
-  set to a value of 1 internally.  Note that this command does not
-  adjust the particle mass, even if it was defined with a density,
-  e.g. via the :doc:`read_data <read_data>` command.
+.. versionadded:: TBD
 
+Keyword *block* sets the blockiness of the selected atoms.  The
+particles must be ellipsoids as defined by the :doc:`atom_style
+ellipsoid <atom_style>` command.  This command is used to define
+superellipsoid particle shapes for use in granular simulations.  The
+*block1*, *block2* settings are the 2 exponents of the superellipsoid in
+the vertical and horizontal directions.  Vertical sections through the
+center are superellipses with squareness *block1* and horizontal
+sections are superellipses with squareness *block2*.  If both parameters
+are set to a value of 2 (the default), the atom is a regular ellipsoid.
+The keyword *block* should be used together with the keyword *shape* to
+give the particle the desired shape.  If the keyword *block* is given
+alone, and the *shape* has not been defined, e.g., in a previous *set*
+command, the 3 diameters would be set to a value of 1 internally.  Note
+that this command does not adjust the particle mass, even if it was
+defined with a density, e.g. via the :doc:`read_data <read_data>`
+command.
 
 Keyword *smd/mass/density* sets the mass of all selected particles,
 but it is only applicable to the Smooth Mach Dynamics package MACHDYN.

From 233a0b16f08a450c67434a40ce662e641c2e83dd Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 24 Mar 2026 06:49:40 -0400
Subject: [PATCH 172/174] remove bogus leftover brightness adjustment call

---
 src/GRAPHICS/dump_image.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/GRAPHICS/dump_image.cpp b/src/GRAPHICS/dump_image.cpp
index 9042e53669a..ec460b99560 100644
--- a/src/GRAPHICS/dump_image.cpp
+++ b/src/GRAPHICS/dump_image.cpp
@@ -1410,10 +1410,6 @@ void DumpImage::create_image()
         color = image->color2rgb("white");
       }
       savedColors saved;
-      if (estyle & 1) {
-        // brighten flat surfaces a little bit
-        saved = reset_lighting(image, 0.3, 0.8, 0.45, 0.8);
-      }
       EllipsoidObj e(elevel);
       if (avec_ellipsoid->bonus_super) {
         auto *bonus = avec_ellipsoid->bonus_super;

From aaf568872e7fb98834554267c1ed746a18cd8290 Mon Sep 17 00:00:00 2001
From: Zhao YN - Matsui Lab <zhao-yingnan-df@ynu.jp>
Date: Tue, 24 Mar 2026 23:42:52 +0900
Subject: [PATCH 173/174] RHEO: guard property/atom setup against missing
 dependent fixes

compute rheo/property/atom setup previously assumed required fixes existed and accessed fixes[0] directly. When thermal, pressure, or nbond/shell properties were requested without defining rheo/thermal, rheo/pressure, or rheo/oxidation, this could trigger out-of-bounds access and a silent crash during setup.

Add explicit empty checks before fix lookup/cast and report clear error messages for missing dependent fixes, so invalid configurations fail with diagnostics instead of crashing.
---
 src/RHEO/compute_rheo_property_atom.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/RHEO/compute_rheo_property_atom.cpp b/src/RHEO/compute_rheo_property_atom.cpp
index f02304b7c95..a3ed0b46e3b 100644
--- a/src/RHEO/compute_rheo_property_atom.cpp
+++ b/src/RHEO/compute_rheo_property_atom.cpp
@@ -202,16 +202,22 @@ void ComputeRHEOPropertyAtom::setup()
 {
   if (thermal_flag) {
     auto fixes = modify->get_fix_by_style("rheo/thermal");
+    if (fixes.empty())
+      error->all(FLERR, "Cannot request thermal property without fix rheo/thermal");
     fix_thermal = dynamic_cast<FixRHEOThermal *>(fixes[0]);
   }
 
   if (pressure_flag) {
     auto fixes = modify->get_fix_by_style("rheo/pressure");
+    if (fixes.empty())
+      error->all(FLERR, "Cannot request pressure property without fix rheo/pressure");
     fix_pressure = dynamic_cast<FixRHEOPressure *>(fixes[0]);
   }
 
   if (shell_flag) {
     auto fixes = modify->get_fix_by_style("rheo/oxidation");
+    if (fixes.empty())
+      error->all(FLERR, "Cannot request nbond/shell without fix rheo/oxidation");
     fix_oxidation = dynamic_cast<FixRHEOOxidation *>(fixes[0]);
   }
 }

From 39b3828e46bb812f5428a0715f670476590d4f84 Mon Sep 17 00:00:00 2001
From: Joel Clemmer <63308469+jtclemm@users.noreply.github.com>
Date: Tue, 24 Mar 2026 10:20:16 -0600
Subject: [PATCH 174/174] Setting missing pressure_flags for stresses

---
 src/RHEO/compute_rheo_property_atom.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/RHEO/compute_rheo_property_atom.cpp b/src/RHEO/compute_rheo_property_atom.cpp
index a3ed0b46e3b..47e0d822d21 100644
--- a/src/RHEO/compute_rheo_property_atom.cpp
+++ b/src/RHEO/compute_rheo_property_atom.cpp
@@ -124,8 +124,10 @@ ComputeRHEOPropertyAtom::ComputeRHEOPropertyAtom(LAMMPS *lmp, int narg, char **a
     } else if (utils::strmatch(arg[iarg], "^grad/v/")) {
       i += add_tensor_component(arg[iarg], i, &ComputeRHEOPropertyAtom::pack_gradv) - 1;
     } else if (utils::strmatch(arg[iarg], "^stress/v/")) {
+      pressure_flag = 1;
       i += add_tensor_component(arg[iarg], i, &ComputeRHEOPropertyAtom::pack_viscous_stress) - 1;
     } else if (utils::strmatch(arg[iarg], "^stress/t/")) {
+      pressure_flag = 1;
       i += add_tensor_component(arg[iarg], i, &ComputeRHEOPropertyAtom::pack_total_stress) - 1;
     } else if (strcmp(arg[iarg], "energy") == 0) {
       avec_index[i] = atom->avec->property_atom("esph");