From 54a9a84ebe49d4fb06970e5593231a6b1d49594d Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Sun, 18 Jan 2026 13:12:35 +0100
Subject: [PATCH 1/5] Add new hyperparameter optimisation script

(and parallelise the existing one through joblib)
---
 qstack/regression/hyperparameters.py  |  29 ++-
 qstack/regression/hyperparameters2.py | 354 ++++++++++++++++++++++++++
 2 files changed, 370 insertions(+), 13 deletions(-)
 create mode 100644 qstack/regression/hyperparameters2.py

diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py
index c762b38..358587d 100644
--- a/qstack/regression/hyperparameters.py
+++ b/qstack/regression/hyperparameters.py
@@ -4,6 +4,7 @@
 import numpy as np
 import scipy
 from sklearn.model_selection import KFold
+from sklearn.utils.parallel import Parallel, delayed
 from qstack.mathutils.fps import do_fps
 from qstack.tools import correct_num_threads
 from .kernel_utils import get_kernel, defaults, train_test_split_idx, sparse_regression_kernel
@@ -69,18 +70,21 @@ def k_fold_opt(K_all, eta):
 
     def hyper_loop(sigma, eta):
         errors = []
-        for s in sigma:
-            if read_kernel is False:
-                K_all = kernel(X_train, X_train, 1.0/s)
-            else:
-                K_all = X_train
-
-            for e in eta:
-                mean, std = k_fold_opt(K_all, e)
-                if printlevel>0 :
-                    sys.stderr.flush()
-                    print(s, e, mean, std, flush=True)
-                errors.append((mean, std, e, s))
+        with Parallel(n_jobs=-1) as parallel:
+            for s in sigma:
+                if read_kernel is False:
+                    K_all = kernel(X_train, X_train, 1.0/s)
+                else:
+                    K_all = X_train
+
+                def inner_loop(s,e):
+                    mean, std = k_fold_opt(K_all, e)
+                    if printlevel>0 :
+                        sys.stderr.flush()
+                        print(s, e, mean, std, flush=True)
+                    return (mean, std, e, s)
+
+                errors += parallel(delayed(inner_loop)(s,e) for e in eta)
         return errors
     if gkernel is None:
         gwrap = None
@@ -139,7 +143,6 @@ def hyper_loop(sigma, eta):
 def _get_arg_parser():
     """Parse CLI arguments."""
     parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array')
-    parser.remove_argument("random_state")
     parser.remove_argument("train_size")
     return parser
 
diff --git a/qstack/regression/hyperparameters2.py b/qstack/regression/hyperparameters2.py
new file mode 100644
index 0000000..4ca2e70
--- /dev/null
+++ b/qstack/regression/hyperparameters2.py
@@ -0,0 +1,354 @@
+"""Hyperparameter optimisation using a smoother version of sigma selection,
+using a no-gradient line search"""
+
+import sys, logging
+import numpy as np
+import scipy
+from sklearn.metrics import mean_absolute_error
+from sklearn.model_selection import KFold
+from sklearn.utils.parallel import Parallel, delayed
+from qstack.mathutils.fps import do_fps
+from qstack.tools import correct_num_threads
+from .kernel_utils import get_kernel, defaults, train_test_split_idx
+from .parser import RegressionParser
+
+logger = logging.getLogger("qstack.regression.hyperparameters2")
+
+# #####################
+# parabola-based line search
+
+
+def fit_quadratic(x1,x2,x3, y1,y2,y3):
+    """Compute the three coefficients of a quadratic polynomial going through three given points."""
+    # we need to change coordinates around for this
+
+    # first, slopes at 0.5(x1+x2) and 0.5(x2+x3)
+    # the 'meta-slope' allows us to get 2*curvature
+    slope1 = (y2-y1)/(x2-x1)
+    slope2 = (y3-y2)/(x3-x2)
+    curv = (slope2-slope1)/(x3-x1)  # the "0.5*" disappears
+
+    # then, remove the curvature from points 1 and 2 to determine 1st-degree term
+    y1_b = y1 - curv*x1**2
+    y2_b = y2 - curv*x2**2
+    slope = (y2_b-y1_b)/(x2-x1)
+
+    # finally, the intercept
+    intercept = y1_b - slope*x1
+    return curv, slope, intercept
+
+def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01):
+    """A 1D optimisation function, assuming the loss function in question is convex, It first checks to see the bounds are correct, then refines them by fitting quadratic polynomials"""
+
+    y_left = get_err(x_left)
+    y_right = get_err(x_right)
+    x_center = 0.5*(x_left+x_right)
+    y_center = get_err(x_center)
+
+    all_errs = [(x_left,y_left),(x_center,y_center),(x_right,y_right)]
+
+    while y_left < y_center or y_right < y_center:
+        # while it looks like we need to look elsewhere than in our original bracket
+        # (because the center isn't closer to the local minimum than the bounds)
+        if y_left < y_right:
+            logger.debug('no local minimum in sight, extending to the left...')
+            x_right, y_right = x_center, y_center
+            x_center, y_center = x_left, y_left
+            x_left = 2*x_center - x_right
+            y_left = get_err(x_left)
+            all_errs.insert(0, (x_left,y_left))
+        else:
+            logger.debug('no local minimum in sight, extending to the right...')
+            x_left, y_left = x_center, y_center
+            x_center, y_center = x_right, y_right
+            x_right = 2*x_center - x_left
+            y_right = get_err(x_right)
+            all_errs.append((x_right,y_right))
+        n_iter -= 1
+        if n_iter <=0:
+            break
+    # after this point, either we are exiting early or we have found the right bounds
+    all_errs.sort()
+    logger.debug('local minimum in bounds, proceeding with parabolic search (bounds at: %r)', all_errs)
+    logger.debug('chosen: %f\%f/%f', x_left, x_center, x_right)
+    while n_iter > 0:
+        a,b,c = fit_quadratic(x_left, x_center, x_right, y_left, y_center, y_right)
+        if a<=0:  # lol no local minimum
+            logger.debug('no local minimum...')
+            if x_left < x_right:
+                x_new = 0.5*(x_left+x_center)
+                ypred_new = np.nan
+            else:
+                x_new = 0.5*(x_right+x_center)
+                ypred_new = np.nan
+        else:
+            x_new = -0.5*b/a
+            ypred_new = -0.25*b**2/a + c
+        y_new = get_err(x_new)
+        n_iter -=1
+        logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new)
+        all_errs.append((x_new, y_new)) ; all_errs.sort()
+        logger.debug('current data: %r', all_errs)
+
+        if x_new < x_left or x_new > x_right:
+            logger.debug('predicted local minimum not in immediate bounds, regaining bearings...')
+            new_index = np.argmin(np.asarray(all_errs)[:,1])
+            if new_index in (0, len(all_errs)-1):
+                raise AssertionError('edges of the search are somehow the minimum in second phase of function')
+            x_left, y_left = all_errs[new_index-1]
+            x_right, y_right = all_errs[new_index+1]
+            x_center, y_center = all_errs[new_index]
+
+        elif y_new > y_center:
+            if x_new > x_center:
+                x_right, y_right = x_new, y_new
+            else:
+                x_left, y_left = x_new, y_new
+        elif y_left < y_right:
+            if max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres:
+                break
+            if x_new > x_center:
+                x_left, y_left = x_center, y_center
+                x_center, y_center = x_new, y_new
+            else:
+                x_right, y_right = x_center, y_center
+                x_center, y_center = x_new, y_new
+        if abs(x_right - x_left) < x_thres:
+            break
+
+    opt_idx = np.argmin(np.asarray(all_errs)[:,1])
+    return all_errs[opt_idx]
+
+
+
+def kfold_alpha_eval(K_all, y, n_splits, alpha_grid, parallel=None, on_compute=(lambda eta,err,stderr:None)):
+    if parallel is None:
+        parallel = Parallel(n_jobs=-1, return_as="generator_unordered")
+    kfold = KFold(n_splits=n_splits, shuffle=False)
+    maes = np.full((kfold.get_n_splits(), len(alpha_grid)), np.inf)
+    y = np.asarray(y)
+    is_sparse = K_all.shape[0] != K_all.shape[1]
+
+    def inner_loop(split_i, alpha_i, train_idx, val_idx, alpha):
+        y_val = y[val_idx]
+        y_train = y[train_idx]
+        if is_sparse:
+            K_train = K_all[train_idx, :]
+            K_val = K_all[val_idx, :]
+            y_train = K_train.T @ y_train
+            K_train = K_train.T @ K_train
+        else:
+            K_train = K_all[np.ix_(train_idx,train_idx)]
+            K_val = K_all [np.ix_(val_idx,train_idx)]
+            if np.may_share_memory(K_train, K_all):
+                K_train = K_train.copy()
+        K_train[np.diag_indices_from(K_train)] += alpha
+
+        try:
+            weights = scipy.linalg.solve(K_train, y_train, assume_a='pos', overwrite_a=True)
+        except Exception as err:
+            raise
+            # bad fit (singular matrix)!
+            return split_i, alpha_i, np.inf
+        predict = K_val @ weights
+        return split_i, alpha_i, mean_absolute_error(y_val, predict)
+
+    mae_generator = parallel(
+        delayed(inner_loop)(s_i,a_i, t,v,a)
+        for a_i,a in enumerate(alpha_grid)
+        for s_i,(t,v) in enumerate(kfold.split(y))
+    )
+    for split_i, alpha_i, mae in mae_generator:
+        maes[split_i, alpha_i] = mae
+
+    concat_results = np.full((len(alpha_grid), 2), np.inf)
+    for alpha_i in range(len(alpha_grid)):
+        if not np.isfinite(maes[:, alpha_i]).any():
+            pass
+        else:
+            res = maes[alpha_i]
+            res = res[np.isfinite(res)]
+            concat_results[alpha_i,0] = res.mean()
+            concat_results[alpha_i,1] = res.std()
+            on_compute(alpha_grid[alpha_i], *concat_results[alpha_i])
+    #print("kfold evaluation for alpha grid",alpha_grid,concat_results)
+    selected_alpha_i = concat_results[:,0].argmin()
+    return alpha_grid[selected_alpha_i], maes[:,selected_alpha_i]#, models[:, selected_alpha_i].copy()
+
+
+def search_sigma(
+    X, y, kernel,
+    sigma_bounds, alpha_grid,
+    n_iter, n_splits,
+    stddev_portion=+1.0, sparse_idx=None,
+    parallel=None, on_compute=(lambda sigma,eta,err,stderr:None)
+):
+    """Search"""
+
+    sigma_left, sigma_right = sigma_bounds
+
+    err_dict = {}
+
+    def get_err(log_sigma):
+        sigma = np.exp(log_sigma)
+
+        K_all = kernel(X, X, 1.0/sigma)
+        if sparse_idx is not None:
+            K_all = K_all[:, sparse_idx]
+
+        alpha, costs = kfold_alpha_eval(
+            K_all, y, n_splits, alpha_grid,
+            parallel=parallel, on_compute=(lambda eta,err,stderr: on_compute(sigma,eta,err,stderr)),
+        )
+        err_dict[log_sigma] = (alpha,costs)
+        cost_res = costs.mean() + stddev_portion*costs.std()
+        return cost_res
+
+    log_sigma_selected, cost_selected = parabolic_search(
+        np.log(sigma_left), np.log(sigma_right),
+        get_err,
+        n_iter=n_iter, x_thres=0.1, y_thres=0.01,
+    )
+
+    alpha_selected, costs_selected = err_dict[log_sigma_selected]
+    sigma = np.exp(log_sigma_selected)
+
+    return sigma, alpha_selected, costs_selected
+
+
+
+def hyperparameters(X, y,
+           sigma_low=defaults.sigmaarr[0], sigma_high=defaults.sigmaarr[-1], eta=defaults.etaarr,
+           akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, read_kernel=False,
+           test_size=defaults.test_size, splits=defaults.splits, n_sigma_iters=5, idx_test=None, idx_train=None,
+           printlevel=0, sparse=None,
+           stddev_portion=+1.0,
+           random_state=defaults.random_state,
+):
+    """Perform a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter).
+
+    Args:
+        X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples.
+        y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples.
+        sigma_low (float): Estimated low bound forthe kernel width.
+        sigma_high (float): Estimated high bound forthe kernel width.
+        eta (list): List of regularization strength for the grid search.
+        akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine').
+        gkernel (str): Global kernel (None, 'REM', 'avg').
+        gdict (dict): Parameters of the global kernels.
+        test_size (float or int): Test set fraction (or number of samples).
+        splits (int): K number of splits for the Kfold cross-validation.
+        n_sigma_iters (int): number of iterations for the sigma-optimisation line search
+        idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X).
+        idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X).
+        printlevel (int): Controls level of output printing.
+        read_kernel (bool): If 'X' is a kernel and not an array of representations (disables sigma optimisation).
+        sparse (int): The number of reference environnments to consider for sparse regression.
+        stddev_portion (float): The amount of error standard deviation to add to error means, for error distribution ranking.
+        random_state (int): The seed used for random number generator (controls train/test splitting).
+
+    Returns:
+        The results of the grid search as a numpy.2darray [Cx(MAE,std,eta,sigma)],
+            where C is the number of parameter set and
+            the array is sorted according to MAEs (last is minimum)
+
+    Raises:
+        RuntimeError: If 'X' is a kernel and sparse regression is chosen.
+    """
+
+    if gkernel is None:
+        gwrap = None
+    else:
+        gwrap = [gkernel, gdict]
+    kernel = get_kernel(akernel, gwrap)
+
+    idx_train, _, y_train, _ = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train,
+                                                    test_size=test_size, random_state=random_state)
+    if read_kernel is False:
+        X_train = X[idx_train]
+        optimise_sigma = True
+    else:
+        X_train = X[np.ix_(idx_train,idx_train)]
+        optimise_sigma = False
+
+    if sparse:
+        if read_kernel:
+            raise RuntimeError('Cannot do FPS with kernels')
+        sparse_idx = do_fps(X_train)[0][:sparse]
+    else:
+        sparse_idx = np.arange(X_train.shape[0])
+
+    errors = []
+    with Parallel(n_jobs=-1, return_as="generator_unordered") as parallel:
+        if optimise_sigma:
+            err_append = lambda sigma,alpha,err,stderr: errors.append((err,stderr, alpha,sigma))
+            _,_,_ = search_sigma(
+                X_train, y_train, kernel, (sigma_low, sigma_high), alpha_grid=eta,
+                parallel=parallel, on_compute=err_append,
+                n_iter = n_sigma_iters, n_splits=splits, stddev_portion=stddev_portion,
+                sparse_idx=sparse_idx,
+            )
+        else:
+            if sparse_idx is not None:
+                K_all = X_train[:, sparse_idx]
+            else:
+                K_all = X_train
+            sigma = np.nan
+            err_append = lambda alpha,err,stderr: errors.append((err,stderr, alpha,sigma))
+            _,_ = kfold_alpha_eval(
+                K_all, y, splits, alpha_grid=eta,
+                parallel=parallel, on_compute=err_append,
+            )
+
+
+    errors = np.array(errors)
+    ind = np.argsort(errors[:,0]+stddev_portion*errors[:,-1])[::-1]
+    errors = errors[ind]
+    return errors
+
+
+def _get_arg_parser():
+    """Parse CLI arguments."""
+    parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array')
+    parser.remove_argument("train_size")
+    parser.remove_argument("sigma")
+    parser.remove_argument("adaptative")
+    parser.add_argument('--sigma-low',        type=float, dest='sigma_low',      default=1E-2, help='estimated low bound for sigma')
+    parser.add_argument('--sigma-high',       type=float, dest='sigma_high',     default=1E+2, help='estimated high bound for sigma')
+    parser.add_argument('--stddev-portion',   type=float, dest='stddev_portion', default=1,    help='amount of error standard deviation to add to error means, for error distribution ranking in the output.')
+    parser.add_argument('--sigma-iterations', type=int,   dest='n_sigma_iters',  default=5,    help='number of iterations for the sigma-optimisation line search')
+
+
+    return parser
+
+
+def main():
+    """Command-line entry point for hyperparameter optimization."""
+    args = _get_arg_parser().parse_args()
+    if args.readk:
+        args.sigma = [np.nan]
+    if args.ll:
+        correct_num_threads()
+    print(vars(args))
+
+    X = np.load(args.repr)
+    y = np.loadtxt(args.prop)
+
+    errors = hyperparameters(
+        X, y, read_kernel=args.readk,
+        sigma_low=args.sigma_low, sigma_high=args.sigma_high, eta=args.eta,
+        akernel=args.akernel, gkernel=args.gkernel, gdict=args.gdict, sparse=args.sparse,
+        test_size=args.test_size, splits=args.splits, n_sigma_iters=args.n_sigma_iters,
+        printlevel=args.printlevel, random_state=args.random_state, stddev_portion=args.stddev_portion,
+    )
+    errors = np.array(errors)
+    if args.nameout is not None:
+        np.savetxt(args.nameout, errors, header="error        stdev          eta          sigma")
+    print()
+    print('error        stdev          eta          sigma')
+    for error in errors:
+        print("{:e} {:e} | {:e} {:e}".format(*tuple(error)))
+
+
+if __name__ == "__main__":
+    main()

From 033127d116e3d7cd46f455df1031f8656e209bc6 Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Tue, 20 Jan 2026 00:32:24 +0100
Subject: [PATCH 2/5] document said script's functions

---
 qstack/regression/hyperparameters2.py | 63 +++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/qstack/regression/hyperparameters2.py b/qstack/regression/hyperparameters2.py
index 4ca2e70..793a47d 100644
--- a/qstack/regression/hyperparameters2.py
+++ b/qstack/regression/hyperparameters2.py
@@ -19,7 +19,10 @@
 
 
 def fit_quadratic(x1,x2,x3, y1,y2,y3):
-    """Compute the three coefficients of a quadratic polynomial going through three given points."""
+    """
+    Compute the three coefficients of a quadratic polynomial going through three given points.
+    Could probably be replaced by `np.polyfit` now that I know about it. Fluff it, we ball.
+    """
     # we need to change coordinates around for this
 
     # first, slopes at 0.5(x1+x2) and 0.5(x2+x3)
@@ -38,7 +41,21 @@ def fit_quadratic(x1,x2,x3, y1,y2,y3):
     return curv, slope, intercept
 
 def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01):
-    """A 1D optimisation function, assuming the loss function in question is convex, It first checks to see the bounds are correct, then refines them by fitting quadratic polynomials"""
+    """
+    Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`.
+    Fits quadratic polynomials to perform this search, meaning `get_err` is assumed to be convex.
+
+    Args:
+        x_left (float): supposed left bound of the minimum of `get_err`
+        x_right (float): supposed right bound of the minimum of `get_err`
+        get_err (callable float->float): the function to minimise.
+        n_iter (int): the number of function calls allowed
+        x_thres (float): the acceptable error threshold for the the argmin to find
+        y_thres (float): the acceptable error threshold for the min to find
+
+    Returns:
+        the (argmin, min) tuple characterising the minimum of the function (2x float)
+    """
 
     y_left = get_err(x_left)
     y_right = get_err(x_right)
@@ -122,6 +139,21 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0
 
 
 def kfold_alpha_eval(K_all, y, n_splits, alpha_grid, parallel=None, on_compute=(lambda eta,err,stderr:None)):
+    """Module-internal function: optimise alpha (regularisation parameter) of a KRR learning model, using a K-fold validation.
+
+    Args:
+        K_all: matrix of kernel values (can be n_total*n_total for naive KRR or n_total*n_references for sparse KRR)
+        y: learnable properties for all inputs (n_total-length vector)
+        n_splits: number of folds for k-fold validation
+        alpha_grid: all the values of alpha to try (array-like)
+        parallel: optional joblib.Parallel instance to use to parallelise this function (by default one is constructed)
+        on_compute: function to call for the error summaries of each value of alpha
+                    (callable: alpha, error_mean, error_stddev -> None)
+
+    Returns:
+        - optimal value of alpha
+        - validation error list for all k-fold evaluations for this value of alpha
+    """
     if parallel is None:
         parallel = Parallel(n_jobs=-1, return_as="generator_unordered")
     kfold = KFold(n_splits=n_splits, shuffle=False)
@@ -181,9 +213,32 @@ def search_sigma(
     sigma_bounds, alpha_grid,
     n_iter, n_splits,
     stddev_portion=+1.0, sparse_idx=None,
-    parallel=None, on_compute=(lambda sigma,eta,err,stderr:None)
+    parallel=None, on_compute=(lambda sigma,alpha,err,stderr:None)
 ):
-    """Search"""
+    """Search the optimal values of sigma and alpha for a KRR model with known representations.
+    Sigma is the width parameter of the kernel function used,
+    and alpha is the regularisation parameter of the resulting matrix equation.
+
+    Internally, calls the line-search rountine for gamma,
+    where the function to minimise performs its own grid-based optimisation of alpha.
+
+    Args:
+        X (np.ndarray[n_total,n_features]: feature vectors for the combined train-validation dataset
+        y (np.ndarray[n_total]): learnable properties for all inputs
+        sigma_bounds (tuple(float,float)): presumed bounds of the optimal value of sigma
+        alpha_grid (array-like of floats): values of alpha to try
+        n_iter (int): number of iterations for the sigma line-search
+        n_splits (int): number of folds for k-fold validation
+        stddev_portion (float): contribution of the error's standard deviation to compare error distributions
+        sparse_idx (optional np.ndarray[int, n_references]): selection of reference inputs for sparse KRR.
+        parallel (optional joblib.Parallel): tool to make the optimisation more parallel. by default, one will be (re)created as often as necessary.
+        on_compute (callable sigma,alpha,err_mean,err_stddev -> None)
+
+    Returns:
+        sigma (float): optimal value of sigma
+        alpha (float): optimal value of alpha
+        costs (np.ndarray[n_splits]): validation error distribution for these values of sigma,alpha
+    """
 
     sigma_left, sigma_right = sigma_bounds
 

From 8704594307a514be306952bd6ad290e542647d92 Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Wed, 21 Jan 2026 22:28:38 +0100
Subject: [PATCH 3/5] add test for new hyperparameters fitting

---
 qstack/qml/b2r2.py                    |  4 ++++
 qstack/regression/hyperparameters2.py | 17 ++++++++++-------
 tests/test_regression.py              | 20 ++++++++++++++++++++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py
index f3db3d7..0c0f98f 100644
--- a/qstack/qml/b2r2.py
+++ b/qstack/qml/b2r2.py
@@ -13,6 +13,10 @@
 
 defaults = SimpleNamespace(rcut=3.5, gridspace=0.03)
 
+class Reaction:
+    def __init__(self, reactants, products):
+        self.reactants = reactants
+        self.products = products
 
 def get_bags(unique_ncharges):
     """Generate all unique element pair combinations including self-interactions.
diff --git a/qstack/regression/hyperparameters2.py b/qstack/regression/hyperparameters2.py
index 793a47d..7a7147b 100644
--- a/qstack/regression/hyperparameters2.py
+++ b/qstack/regression/hyperparameters2.py
@@ -87,7 +87,7 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0
     # after this point, either we are exiting early or we have found the right bounds
     all_errs.sort()
     logger.debug('local minimum in bounds, proceeding with parabolic search (bounds at: %r)', all_errs)
-    logger.debug('chosen: %f\%f/%f', x_left, x_center, x_right)
+    logger.debug('chosen: %f\\%f/%f', x_left, x_center, x_right)
     while n_iter > 0:
         a,b,c = fit_quadratic(x_left, x_center, x_right, y_left, y_center, y_right)
         if a<=0:  # lol no local minimum
@@ -103,6 +103,7 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0
             ypred_new = -0.25*b**2/a + c
         y_new = get_err(x_new)
         n_iter -=1
+        logger.debug('from chosen points %f\\%f/%f', x_left, x_center, x_right)
         logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new)
         all_errs.append((x_new, y_new)) ; all_errs.sort()
         logger.debug('current data: %r', all_errs)
@@ -116,20 +117,21 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0
             x_right, y_right = all_errs[new_index+1]
             x_center, y_center = all_errs[new_index]
 
+        elif max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres:
+            break
         elif y_new > y_center:
             if x_new > x_center:
                 x_right, y_right = x_new, y_new
             else:
                 x_left, y_left = x_new, y_new
-        elif y_left < y_right:
-            if max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres:
-                break
+        else:  # if y_new <= y_center
             if x_new > x_center:
                 x_left, y_left = x_center, y_center
                 x_center, y_center = x_new, y_new
             else:
                 x_right, y_right = x_center, y_center
                 x_center, y_center = x_new, y_new
+        
         if abs(x_right - x_left) < x_thres:
             break
 
@@ -198,7 +200,7 @@ def inner_loop(split_i, alpha_i, train_idx, val_idx, alpha):
         if not np.isfinite(maes[:, alpha_i]).any():
             pass
         else:
-            res = maes[alpha_i]
+            res = maes[:,alpha_i]
             res = res[np.isfinite(res)]
             concat_results[alpha_i,0] = res.mean()
             concat_results[alpha_i,1] = res.std()
@@ -257,6 +259,7 @@ def get_err(log_sigma):
         )
         err_dict[log_sigma] = (alpha,costs)
         cost_res = costs.mean() + stddev_portion*costs.std()
+        #print("now eval'ing σ=", sigma, '... α=', alpha, costs.shape, costs.mean(), costs.std())
         return cost_res
 
     log_sigma_selected, cost_selected = parabolic_search(
@@ -334,7 +337,7 @@ def hyperparameters(X, y,
         sparse_idx = np.arange(X_train.shape[0])
 
     errors = []
-    with Parallel(n_jobs=-1, return_as="generator_unordered") as parallel:
+    with Parallel(n_jobs=1, return_as="generator_unordered") as parallel:
         if optimise_sigma:
             err_append = lambda sigma,alpha,err,stderr: errors.append((err,stderr, alpha,sigma))
             _,_,_ = search_sigma(
@@ -357,7 +360,7 @@ def hyperparameters(X, y,
 
 
     errors = np.array(errors)
-    ind = np.argsort(errors[:,0]+stddev_portion*errors[:,-1])[::-1]
+    ind = np.argsort(errors[:,0]+stddev_portion*errors[:,1])[::-1]
     errors = errors[ind]
     return errors
 
diff --git a/tests/test_regression.py b/tests/test_regression.py
index 9702fa5..b822e6c 100755
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -3,6 +3,7 @@
 import os
 import numpy as np
 import qstack.regression.hyperparameters as hyperparameters
+import qstack.regression.hyperparameters2 as hyperparameters2
 import qstack.regression.regression as regression
 import qstack.regression.final_error as final_error
 import qstack.regression.condition as condition
@@ -25,6 +26,25 @@ def test_hyperparameters():
 
     assert (np.allclose(hyper, true_hyper))
 
+def test_hyperparameters2():
+    #import logging
+    #logging.basicConfig()
+    #logging.getLogger("qstack").setLevel('DEBUG')
+    path = os.path.dirname(os.path.realpath(__file__))
+    xfile = os.path.join(path, 'data/mols/X_lb.npy')
+    X = np.load(xfile)
+    yfile = os.path.join(path, 'data/mols/dipole.dat')
+    y = np.loadtxt(yfile)
+
+    hyper = hyperparameters2.hyperparameters(X, y, random_state=42)[-4:]
+    true_hyper = [
+        [5.15813198e-01, 2.37774396e-01, 3.16227766e-08, 3.64079252e+04],
+        [5.15719232e-01, 2.37430538e-01, 1.00000000e-10, 1.00000000e+06],
+        [5.15657638e-01, 2.37472003e-01, 1.00000000e-10, 3.64079252e+04],
+        [5.15699162e-01, 2.37420839e-01, 1.00000000e-10, 1.71990639e+05],
+    ]
+
+    assert (np.allclose(hyper, true_hyper))
 
 def test_regression():
     path = os.path.dirname(os.path.realpath(__file__))

From 8ae3fd8d1bbe00e707a1fb9b13e6adace89a9b32 Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Fri, 23 Jan 2026 01:09:46 +0100
Subject: [PATCH 4/5] fuse hyperparameter optimisation methods

---
 qstack/regression/hyperparameters.py | 473 ++++++++++++++++++++++-----
 tests/test_regression.py             |  18 +-
 2 files changed, 408 insertions(+), 83 deletions(-)

diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py
index 358587d..1cefc5b 100644
--- a/qstack/regression/hyperparameters.py
+++ b/qstack/regression/hyperparameters.py
@@ -1,6 +1,6 @@
 """Hyperparameter optimization."""
 
-import sys
+import sys, logging
 import numpy as np
 import scipy
 from sklearn.model_selection import KFold
@@ -10,11 +10,324 @@
 from .kernel_utils import get_kernel, defaults, train_test_split_idx, sparse_regression_kernel
 from .parser import RegressionParser
 
+logger = logging.getLogger("qstack.regression.hyperparameters")
+
+
+# #####################
+# parabola-based line search
+
+def fit_quadratic(x1,x2,x3, y1,y2,y3):
+    """
+    Compute the three coefficients of a quadratic polynomial going through three given points.
+    Could probably be replaced by `np.polyfit` now that I know about it. Fluff it, we ball.
+    """
+    # we need to change coordinates around for this
+
+    # first, slopes at 0.5(x1+x2) and 0.5(x2+x3)
+    # the 'meta-slope' allows us to get 2*curvature
+    slope1 = (y2-y1)/(x2-x1)
+    slope2 = (y3-y2)/(x3-x2)
+    curv = (slope2-slope1)/(x3-x1)  # the "0.5*" disappears
+
+    # then, remove the curvature from points 1 and 2 to determine 1st-degree term
+    y1_b = y1 - curv*x1**2
+    y2_b = y2 - curv*x2**2
+    slope = (y2_b-y1_b)/(x2-x1)
+
+    # finally, the intercept
+    intercept = y1_b - slope*x1
+    return curv, slope, intercept
+
+def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01):
+    """
+    Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`.
+    Fits quadratic polynomials to perform this search, meaning `get_err` is assumed to be convex.
+
+    Args:
+        x_left (float): supposed left bound of the minimum of `get_err`
+        x_right (float): supposed right bound of the minimum of `get_err`
+        get_err (callable float->float): the function to minimise.
+        n_iter (int): the number of function calls allowed
+        x_thres (float): the acceptable error threshold for the the argmin to find
+        y_thres (float): the acceptable error threshold for the min to find
+
+    Returns:
+        the (argmin, min) tuple characterising the minimum of the function (2x float)
+    """
+
+    y_left = get_err(x_left)
+    y_right = get_err(x_right)
+    x_center = 0.5*(x_left+x_right)
+    y_center = get_err(x_center)
+
+    all_errs = [(x_left,y_left),(x_center,y_center),(x_right,y_right)]
+
+    while y_left < y_center or y_right < y_center:
+        # while it looks like we need to look elsewhere than in our original bracket
+        # (because the center isn't closer to the local minimum than the bounds)
+        if y_left < y_right:
+            logger.debug('no local minimum in sight, extending to the left...')
+            x_right, y_right = x_center, y_center
+            x_center, y_center = x_left, y_left
+            x_left = 2*x_center - x_right
+            y_left = get_err(x_left)
+            all_errs.insert(0, (x_left,y_left))
+        else:
+            logger.debug('no local minimum in sight, extending to the right...')
+            x_left, y_left = x_center, y_center
+            x_center, y_center = x_right, y_right
+            x_right = 2*x_center - x_left
+            y_right = get_err(x_right)
+            all_errs.append((x_right,y_right))
+        n_iter -= 1
+        if n_iter <=0:
+            break
+    # after this point, either we are exiting early or we have found the right bounds
+    all_errs.sort()
+    logger.debug('local minimum in bounds, proceeding with parabolic search (bounds at: %r)', all_errs)
+    logger.debug('chosen: %f\\%f/%f', x_left, x_center, x_right)
+    while n_iter > 0:
+        a,b,c = fit_quadratic(x_left, x_center, x_right, y_left, y_center, y_right)
+        if a<=0:  # lol no local minimum
+            logger.debug('no local minimum...')
+            if x_left < x_right:
+                x_new = 0.5*(x_left+x_center)
+                ypred_new = np.nan
+            else:
+                x_new = 0.5*(x_right+x_center)
+                ypred_new = np.nan
+        else:
+            x_new = -0.5*b/a
+            ypred_new = -0.25*b**2/a + c
+        y_new = get_err(x_new)
+        n_iter -=1
+        logger.debug('from chosen points %f\\%f/%f', x_left, x_center, x_right)
+        logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new)
+        all_errs.append((x_new, y_new)) ; all_errs.sort()
+        logger.debug('current data: %r', all_errs)
+
+        if x_new < x_left or x_new > x_right:
+            logger.debug('predicted local minimum not in immediate bounds, regaining bearings...')
+            new_index = np.argmin(np.asarray(all_errs)[:,1])
+            if new_index in (0, len(all_errs)-1):
+                raise AssertionError('edges of the search are somehow the minimum in second phase of function')
+            x_left, y_left = all_errs[new_index-1]
+            x_right, y_right = all_errs[new_index+1]
+            x_center, y_center = all_errs[new_index]
+
+        elif max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres:
+            break
+        elif y_new > y_center:
+            if x_new > x_center:
+                x_right, y_right = x_new, y_new
+            else:
+                x_left, y_left = x_new, y_new
+        else:  # if y_new <= y_center
+            if x_new > x_center:
+                x_left, y_left = x_center, y_center
+                x_center, y_center = x_new, y_new
+            else:
+                x_right, y_right = x_center, y_center
+                x_center, y_center = x_new, y_new
+
+        if abs(x_right - x_left) < x_thres:
+            break
+
+    opt_idx = np.argmin(np.asarray(all_errs)[:,1])
+    return all_errs[opt_idx]
+
+
+def standard_grid_search(x_list, get_err):
+    errors = []
+    for x in x_list:
+        errors.append(get_err(x))
+    errors = np.asarray(errors)
+    xi = errors.argmin()
+    return xi, errors[xi]
+
+def adaptative_grid_search(x_list, get_err):
+    work_list = x_list
+    errors = []
+    direction = None
+    while True:
+        errors = list(errors)
+        for x in work_list:
+            errors.append((x,get_err(x)))
+        errors = np.array(errors)
+        ind = np.argsort(errors[:,1])[::-1]
+        errors = errors[ind]
+
+        current_argmin = errors[-1,0]
+        new_sigma = None
+
+        if direction is None:
+            if   current_argmin==max(work_list):
+                direction = 'up'
+            elif current_argmin==min(work_list):
+                direction = 'down'
+
+        # at the 1st iteration if is checked twice on purpose
+        if direction=='up'     and current_argmin==max(work_list):
+            work_list = current_argmin*np.array(defaults.sigmaarr_mult[1:])
+        elif direction=='down' and current_argmin==min(work_list):
+            work_list = current_argmin/np.array(defaults.sigmaarr_mult[1:])
+        else:
+            break
+
+        print('next iteration:', work_list, flush=True)
+    return errors
+
+# #####################
+# main functions of the hyperparameter optimisation
+
+
+def kfold_alpha_eval(K_all, y_train, n_splits, eta_list, sparse, parallel = None):
+    """Module-internal function: optimise alpha (regularisation parameter) of a KRR learning model, using a K-fold validation.
+
+    Args:
+        K_all: matrix of kernel values (can be n_total*n_total for naive KRR or n_total*n_references for sparse KRR)
+        y_train: learnable properties for all inputs (n_total-length vector)
+        n_splits: number of folds for k-fold validation
+        eta_list: all the values of eta (KRR regularisation parameter) to try (array-like)
+        sparse: whether the KRR to run is sparse (bool)
+        parallel: optional joblib.Parallel instance to use to parallelise this function (by default one is constructed)
+
+    Returns:
+        errors: array of "entries", each with the three values (np.ndarray, shape (len(eta_list),3) ):
+            mean: mean (over k-folds) validation error for this value of eta
+            stddev: standard deviation of the same error
+            eta: the corresponding value of eta
+    """
+    if parallel is None:
+        parallel = Parallel(n_jobs=-1, return_as="generator_unordered")
+    kfold = KFold(n_splits=n_splits, shuffle=False)
+    maes = np.full((n_splits, len(eta_list)), np.inf)
+    y_train = np.asarray(y_train)
+
+
+    def inner_call(fold_i, eta_i, K_all, sparse, y_train, eta, train_idx, test_idx):
+        y_kf_train, y_kf_test = y_train[train_idx], y_train[test_idx]
+
+        if not sparse:
+            K_solve = K_all [np.ix_(train_idx,train_idx)]
+            if np.may_share_memory(K_solve, K_all):
+                K_solve = K_solve.copy()
+            K_solve[np.diag_indices_from(K_solve)] += eta
+            y_solve = y_kf_train
+            Ks = K_all [np.ix_(test_idx,train_idx)]
+        else:
+            K_solve, y_solve = sparse_regression_kernel(K_all[train_idx], y_kf_train, slice(None), eta)
+            Ks = K_all[test_idx]
+
+        try:
+            alpha = scipy.linalg.solve(K_solve, y_solve, assume_a='pos', overwrite_a=True)
+        except scipy.linalg.LinAlgError:
+            print('singular matrix')
+            raise
+        y_kf_predict = np.dot(Ks, alpha)
+        return fold_i, eta_i, np.mean(np.abs(y_kf_predict-y_kf_test))
+
+    mae_generator = parallel(
+        delayed(inner_call)(fold_i, eta_i, K_all, sparse, y_train, eta, t,v)
+        for eta_i,eta in enumerate(eta_list)
+        for fold_i,(t,v) in enumerate(kfold.split(y_train))
+    )
+    for split_i, eta_i, mae in mae_generator:
+        maes[split_i, eta_i] = mae
+
+    concat_results = np.full((len(eta_list), 3), np.inf)
+    for eta_i in range(len(eta_list)):
+        res = maes[:,eta_i]
+        #res = res[np.isfinite(res)]
+        concat_results[eta_i,0] = res.mean()
+        concat_results[eta_i,1] = res.std()
+        concat_results[eta_i,2] = eta_list[eta_i]
+    return concat_results
+
+def search_sigma(
+    X_train, y_train, splits,
+    kernel, sigma, eta,
+    sparse_idx=None,
+    n_sigma_iter=5, stddev_portion=0.0,
+    adaptive=False, adaptive_v2=False,
+    read_kernel=False, printlevel=0,
+):
+    """Search the optimal values of sigma and alpha for a KRR model with known representations.
+    Sigma is the width parameter of the kernel function used,
+    and alpha is the regularisation parameter of the resulting matrix equation.
+
+    Internally, this can call for either a simple grid search, or be modified as so:
+    - the grid is adaptative for sigma (adaptive)
+    - the grid search for sigma becomes a continuous line search (adaptive_v2)
+    No matter what, the optimisation of alpha is done over a grid, with k-fold validation.
+
+    Args:
+        X (np.ndarray[n_total,n_features]: feature vectors for the combined train-validation dataset
+        y (np.ndarray[n_total]): learnable properties for all inputs
+        sigma (array-like(float)): values of sigma. for `adaptive`, starting values, for `adaptive_v2`, only the first and last values are used, as presumed bounds of the optimal value of sigma
+        alpha_grid (array-like of floats): values of alpha to try
+        n_sigma_iter (int): number of iterations for the sigma line-search (if adaptive_v2)
+        n_splits (int): number of folds for k-fold validation
+        stddev_portion (float): contribution of the error's standard deviation to compare error distributions
+        sparse_idx (optional np.ndarray[int, n_references]): selection of reference inputs for sparse KRR.
+        adaptive (bool): to use the adaptive grid for sigma
+        adaptive_v2 (bool): to use the line search for sigma
+        read_kernel (bool): to completely discard sigma, assuming the representation array is a precomputed kernel array
+        printlevel (int): level of verbosity
+
+    Returns:
+        sigma (float): optimal value of sigma
+        alpha (float): optimal value of alpha
+        costs (np.ndarray[n_splits]): validation error distribution for these values of sigma,alpha
+    """
+    errors = []
+
+    def get_err(s):
+        if read_kernel is False:
+            K_all = kernel(X_train, X_train, 1.0/s)
+        else:
+            K_all = X_train
+
+        sparse = sparse_idx != None
+        if sparse:
+            K_all = K_all[:,sparse_idx]
+
+        results_per_eta = kfold_alpha_eval(
+            K_all, y_train, splits, eta, sparse,
+            parallel = parallel,
+        )
+        for mean,std,e in results_per_eta:
+            if printlevel>0 :
+                sys.stderr.flush()
+                print(s, e, mean, std, flush=True)
+            errors.append((mean, std, e, s))
+
+        costs = results_per_eta[:,0] +  stddev_portion*results_per_eta[:,1]
+        return costs.min()
+
+    with Parallel(n_jobs=-1) as parallel:
+        if adaptive_v2:
+            assert not adaptive
+            _, _ = parabolic_search(
+                np.log(sigma[0]), np.log(sigma[-1]),
+                lambda log_s: get_err(np.exp(log_s)),
+                n_iter=n_sigma_iter, x_thres=0.1, y_thres=0.01,
+            )
+        elif adaptive:
+            _ = adaptative_grid_search(sigma, get_err)
+        else:
+            for s in sigma:
+                get_err(s)
+
+    return np.asarray(errors)
 
 def hyperparameters(X, y,
-           sigma=defaults.sigmaarr, eta=defaults.etaarr, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict,
+           sigma=defaults.sigmaarr, eta=defaults.etaarr, sparse=None,
+           akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, read_kernel=False,
            test_size=defaults.test_size, splits=defaults.splits, idx_test=None, idx_train=None,
-           printlevel=0, adaptive=False, read_kernel=False, sparse=None, random_state=defaults.random_state):
+           stddev_portion=0.0, n_sigma_iters=5,
+           printlevel=0, adaptive=False, adaptive_v2=False, random_state=defaults.random_state,
+):
     """Perform a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter).
 
     Args:
@@ -22,17 +335,20 @@ def hyperparameters(X, y,
         y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples.
         sigma (list): List of kernel width for the grid search.
         eta (list): List of regularization strength for the grid search.
+        sparse (int): The number of reference environnments to consider for sparse regression.
         akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine').
         gkernel (str): Global kernel (None, 'REM', 'avg').
         gdict (dict): Parameters of the global kernels.
+        read_kernel (bool): If 'X' is a kernel and not an array of representations.
         test_size (float or int): Test set fraction (or number of samples).
         splits (int): K number of splits for the Kfold cross-validation.
         idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X).
         idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X).
+        stddev_portion (float): The amount of error standard deviation to add to error means, for error distribution ranking.
+        n_sigma_iters (int): for adaptive_v2, the number of iterations to run the sigma line search for.
         printlevel (int): Controls level of output printing.
         adaptive (bool): To expand the grid search adaptatively.
-        read_kernel (bool): If 'X' is a kernel and not an array of representations.
-        sparse (int): The number of reference environnments to consider for sparse regression.
+        adaptive_v2 (bool): To optimise sigma though line search rather than grid search, using the ends of the `sigma` list as presumed lower/upper bounds for the optimal value.
         random_state (int): The seed used for random number generator (controls train/test splitting).
 
     Returns:
@@ -43,49 +359,7 @@ def hyperparameters(X, y,
     Raises:
         RuntimeError: If 'X' is a kernel and sparse regression is chosen.
     """
-    def k_fold_opt(K_all, eta):
-        kfold = KFold(n_splits=splits, shuffle=False)
-        all_maes = []
-        for train_idx, test_idx in kfold.split(X_train):
-            y_kf_train, y_kf_test = y_train[train_idx], y_train[test_idx]
-
-            if not sparse:
-                K_solve = np.copy(K_all [np.ix_(train_idx,train_idx)])
-                K_solve[np.diag_indices_from(K_solve)] += eta
-                y_solve = y_kf_train
-                Ks = K_all [np.ix_(test_idx,train_idx)]
-            else:
-                K_solve, y_solve = sparse_regression_kernel(K_all[train_idx], y_kf_train, sparse_idx, eta)
-                Ks = K_all [np.ix_(test_idx,sparse_idx)]
-
-            try:
-                alpha = scipy.linalg.solve(K_solve, y_solve, assume_a='pos', overwrite_a=True)
-            except scipy.linalg.LinAlgError:
-                print('singular matrix')
-                all_maes.append(np.nan)
-                break
-            y_kf_predict = np.dot(Ks, alpha)
-            all_maes.append(np.mean(np.abs(y_kf_predict-y_kf_test)))
-        return np.mean(all_maes), np.std(all_maes)
-
-    def hyper_loop(sigma, eta):
-        errors = []
-        with Parallel(n_jobs=-1) as parallel:
-            for s in sigma:
-                if read_kernel is False:
-                    K_all = kernel(X_train, X_train, 1.0/s)
-                else:
-                    K_all = X_train
-
-                def inner_loop(s,e):
-                    mean, std = k_fold_opt(K_all, e)
-                    if printlevel>0 :
-                        sys.stderr.flush()
-                        print(s, e, mean, std, flush=True)
-                    return (mean, std, e, s)
-
-                errors += parallel(delayed(inner_loop)(s,e) for e in eta)
-        return errors
+
     if gkernel is None:
         gwrap = None
     else:
@@ -96,47 +370,76 @@ def inner_loop(s,e):
                                                     test_size=test_size, random_state=random_state)
     if read_kernel is False:
         X_train = X[idx_train]
+        optimise_sigma = True
     else:
         X_train = X[np.ix_(idx_train,idx_train)]
-        sigma = [np.nan]
+        optimise_sigma = False
 
     if sparse:
         if read_kernel:
             raise RuntimeError('Cannot do FPS with kernels')
         sparse_idx = do_fps(X_train)[0][:sparse]
+    else:
+        sparse_idx = None
 
-    work_sigma = sigma
-    errors = []
-    direction = None
-    while True:
-        errors = list(errors)
-        errors.extend(hyper_loop(work_sigma, eta))
-        errors = np.array(errors)
-        ind = np.argsort(errors[:,0])[::-1]
-        errors = errors[ind]
+    with Parallel(n_jobs=1, return_as="generator_unordered") as parallel:
+        if optimise_sigma:
+            errors = search_sigma(
+                X_train, y_train, splits,
+                kernel, sigma, eta, sparse_idx,
+                n_sigma_iters, stddev_portion,
+                adaptive, adaptive_v2,
+                read_kernel, printlevel,
+            )
+        else:
+            K_all = X_train
+            sparse = sparse_idx is not None
+            if sparse:
+                K_all = K_all[:, sparse_idx]
+            sigma = np.nan
+            partial_errors = kfold_alpha_eval(
+                K_all, y, splits, eta,
+                sparse, parallel,
+            )
+            errors = np.ndarray((len(partial_errors),4))
+            errors[:,:3] = partial_errors
+            errors[:,3] = np.nan
 
-        if not adaptive:
-            break
+    sorter = (errors[:,0] + stddev_portion*errors[:,1]).argsort()
+    errors = errors[sorter[::-1]]
 
-        best_sigma = errors[-1][3]
-        new_sigma = None
+    # work_sigma = sigma
+    # errors = []
+    # direction = None
+    # while True:
+    #     errors = list(errors)
+    #     errors.extend(hyper_loop(work_sigma, eta))
+    #     errors = np.array(errors)
+    #     ind = np.argsort(errors[:,0])[::-1]
+    #     errors = errors[ind]
 
-        if direction is None:
-            if   best_sigma==max(work_sigma):
-                direction = 'up'
-            elif best_sigma==min(work_sigma):
-                direction = 'down'
+    #     if not adaptive:
+    #         break
 
-        # at the 1st iteration if is checked twice on purpose
-        if direction=='up'     and best_sigma==max(work_sigma):
-            new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:])
-        elif direction=='down' and best_sigma==min(work_sigma):
-            new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:])
+    #     best_sigma = errors[-1][3]
+    #     new_sigma = None
 
-        if new_sigma is None:
-            break
-        work_sigma = new_sigma
-        print('next iteration:', work_sigma, flush=True)
+    #     if direction is None:
+    #         if   best_sigma==max(work_sigma):
+    #             direction = 'up'
+    #         elif best_sigma==min(work_sigma):
+    #             direction = 'down'
+
+    #     # at the 1st iteration if is checked twice on purpose
+    #     if direction=='up'     and best_sigma==max(work_sigma):
+    #         new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:])
+    #     elif direction=='down' and best_sigma==min(work_sigma):
+    #         new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:])
+
+    #     if new_sigma is None:
+    #         break
+    #     work_sigma = new_sigma
+    #     print('next iteration:', work_sigma, flush=True)
     return errors
 
 
@@ -144,6 +447,11 @@ def _get_arg_parser():
     """Parse CLI arguments."""
     parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array')
     parser.remove_argument("train_size")
+    parser.add_argument('--ada2',    action='store_true', dest='adaptive_v2',    default=False, help='whether to use a continuous adaptative approach to sigma. If so, only the first and last sigma values will be used to start the optimisation.')
+    parser.add_argument('--stddev-portion',   type=float, dest='stddev_portion', default=0.0,   help='amount of error standard deviation to add to error means, for error distribution ranking in the output.')
+    parser.add_argument('--sigma-iterations', type=int,   dest='n_sigma_iters',  default=5,     help='number of iterations for the sigma-optimisation line search')
+
+
     return parser
 
 
@@ -159,10 +467,15 @@ def main():
     X = np.load(args.repr)
     y = np.loadtxt(args.prop)
 
-    errors = hyperparameters(X, y, read_kernel=args.readk, sigma=args.sigma, eta=args.eta,
-                             akernel=args.akernel, gkernel=args.gkernel, gdict=args.gdict,
-                             sparse=args.sparse,
-                             test_size=args.test_size, splits=args.splits, printlevel=args.printlevel, adaptive=args.adaptive)
+    errors = hyperparameters(
+        X, y,
+        sigma=args.sigma, eta=args.eta, sparse=args.sparse,
+        akernel=args.akernel, gkernel=args.gkernel, gdict=args.gdict, read_kernel=args.readk,
+        test_size=args.test_size, splits=args.splits, idx_test=None, idx_train=None,
+        stddev_portion=args.stddev_portion, n_sigma_iters=args.n_sigma_iters,
+        printlevel=args.printlevel, adaptive=args.adaptive, adaptive_v2=args.adaptive_v2,
+        random_state=args.random_state,
+    )
     errors = np.array(errors)
     if args.nameout is not None:
         np.savetxt(args.nameout, errors, header="error        stdev          eta          sigma")
diff --git a/tests/test_regression.py b/tests/test_regression.py
index b822e6c..a60e606 100755
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -18,15 +18,16 @@ def test_hyperparameters():
     yfile = os.path.join(path, 'data/mols/dipole.dat')
     y = np.loadtxt(yfile)
 
-    hyper = hyperparameters.hyperparameters(X, y)[-4:]
+    hyper = hyperparameters.hyperparameters(X, y, stddev_portion=0)[-4:]
     true_hyper = [  [5.18303885e-01,3.00507798e-01,1.00000000e-05,3.16227766e+01],
                     [5.18262897e-01,3.00473853e-01,3.16227766e-08,3.16227766e+01],
                     [5.18262767e-01,3.00473746e-01,1.00000000e-10,3.16227766e+01],
                     [5.10592542e-01,3.38247735e-01,1.00000000e+00,3.16227766e+01]]
 
+
     assert (np.allclose(hyper, true_hyper))
 
-def test_hyperparameters2():
+def test_hyperparameters_adapt2():
     #import logging
     #logging.basicConfig()
     #logging.getLogger("qstack").setLevel('DEBUG')
@@ -35,8 +36,9 @@ def test_hyperparameters2():
     X = np.load(xfile)
     yfile = os.path.join(path, 'data/mols/dipole.dat')
     y = np.loadtxt(yfile)
+    print(np.isfinite(X).all(), np.isfinite(y).all())
 
-    hyper = hyperparameters2.hyperparameters(X, y, random_state=42)[-4:]
+    hyper = hyperparameters.hyperparameters(X, y, random_state=42, stddev_portion=1.0, adaptive_v2=True)[-4:]
     true_hyper = [
         [5.15813198e-01, 2.37774396e-01, 3.16227766e-08, 3.64079252e+04],
         [5.15719232e-01, 2.37430538e-01, 1.00000000e-10, 1.00000000e+06],
@@ -46,6 +48,16 @@ def test_hyperparameters2():
 
     assert (np.allclose(hyper, true_hyper))
 
+    hyper = hyperparameters.hyperparameters(X, y, random_state=42, stddev_portion=0.0, adaptive_v2=True)[-4:]
+    true_hyper = [
+        [5.10293083e-01, 2.45220151e-01, 1.00000000e-10, 3.25422973e+02],
+        [5.04772360e-01, 2.54644677e-01, 1.00000000e-05, 1.51009311e+02],
+        [5.04560208e-01, 2.54267311e-01, 3.16227766e-08, 1.51009311e+02],
+        [5.04559535e-01, 2.54266113e-01, 1.00000000e-10, 1.51009311e+02],
+    ]
+    assert (np.allclose(hyper, true_hyper))
+
+
 def test_regression():
     path = os.path.dirname(os.path.realpath(__file__))
     X = np.load(os.path.join(path, 'data/mols/X_lb.npy'))

From 8c2fef3c75bdf170787a610488ac71de3d4235fd Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Tue, 10 Feb 2026 10:55:56 +0100
Subject: [PATCH 5/5] fix: make ruff happy

---
 docs/generate_rst.py                  |   5 +-
 qstack/qml/b2r2.py                    |   9 +-
 qstack/regression/hyperparameters.py  |  62 ++--
 qstack/regression/hyperparameters2.py | 412 --------------------------
 tests/test_regression.py              |   1 -
 5 files changed, 49 insertions(+), 440 deletions(-)
 delete mode 100644 qstack/regression/hyperparameters2.py

diff --git a/docs/generate_rst.py b/docs/generate_rst.py
index ce15334..ed9b8ba 100755
--- a/docs/generate_rst.py
+++ b/docs/generate_rst.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
-"""
-Generate Sphinx-ready .rst files
+"""Generate Sphinx-ready .rst files
 
 Used instead of Sphinx to avoid importing broken or expensive dependecies
 
@@ -147,7 +146,7 @@ def extract_module_info(py_path: Path, module_name: str) -> ModuleInfo:
     try:
         tree = ast.parse(src)
     except SyntaxError:
-        return ModuleInfo(module_name, py_path, None, [], [], None, [],)
+        return ModuleInfo(module_name, py_path, None, [], [], None, [])
 
     mdoc = ast.get_docstring(tree)
     classes: list[ClassInfo] = []
diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py
index 0c0f98f..66e50ca 100644
--- a/qstack/qml/b2r2.py
+++ b/qstack/qml/b2r2.py
@@ -5,6 +5,7 @@
 """
 
 import itertools
+from dataclasses import dataclass
 from types import SimpleNamespace
 import numpy as np
 from scipy.special import erf
@@ -13,10 +14,12 @@
 
 defaults = SimpleNamespace(rcut=3.5, gridspace=0.03)
 
+@dataclass
 class Reaction:
-    def __init__(self, reactants, products):
-        self.reactants = reactants
-        self.products = products
+    """Utility reaction-storing class for Inner workings of B2R2."""
+    reactants: list  # lists of Mol-like objects (that have .positions (in Å) and .charges attributes)
+    products: list
+
 
 def get_bags(unique_ncharges):
     """Generate all unique element pair combinations including self-interactions.
diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py
index 1cefc5b..eb4a333 100644
--- a/qstack/regression/hyperparameters.py
+++ b/qstack/regression/hyperparameters.py
@@ -1,6 +1,7 @@
 """Hyperparameter optimization."""
 
-import sys, logging
+import sys
+import logging
 import numpy as np
 import scipy
 from sklearn.model_selection import KFold
@@ -17,8 +18,8 @@
 # parabola-based line search
 
 def fit_quadratic(x1,x2,x3, y1,y2,y3):
-    """
-    Compute the three coefficients of a quadratic polynomial going through three given points.
+    """Compute the three coefficients of a quadratic polynomial going through three given points.
+
     Could probably be replaced by `np.polyfit` now that I know about it. Fluff it, we ball.
     """
     # we need to change coordinates around for this
@@ -39,8 +40,8 @@ def fit_quadratic(x1,x2,x3, y1,y2,y3):
     return curv, slope, intercept
 
 def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01):
-    """
-    Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`.
+    """Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`.
+
     Fits quadratic polynomials to perform this search, meaning `get_err` is assumed to be convex.
 
     Args:
@@ -54,7 +55,6 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0
     Returns:
         the (argmin, min) tuple characterising the minimum of the function (2x float)
     """
-
     y_left = get_err(x_left)
     y_right = get_err(x_right)
     x_center = 0.5*(x_left+x_right)
@@ -103,7 +103,8 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0
         n_iter -=1
         logger.debug('from chosen points %f\\%f/%f', x_left, x_center, x_right)
         logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new)
-        all_errs.append((x_new, y_new)) ; all_errs.sort()
+        all_errs.append((x_new, y_new))
+        all_errs.sort()
         logger.debug('current data: %r', all_errs)
 
         if x_new < x_left or x_new > x_right:
@@ -138,14 +139,32 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0
 
 
 def standard_grid_search(x_list, get_err):
-    errors = []
-    for x in x_list:
-        errors.append(get_err(x))
-    errors = np.asarray(errors)
+    """Module-internal function: single-parameter optimisation function, using a simple grid search.
+
+    Args:
+        x_list (iterable[float]): pre-defined grid containing the values to try for the parameter
+        get_err (callable float->float): process to optimise, returning the error associated with a single parameter value.
+
+    Returns:
+        val (float): optimal parameter value
+        err (float): the associated error
+    """
+    errors = np.array(get_err(x) for x in x_list)
     xi = errors.argmin()
     return xi, errors[xi]
 
 def adaptative_grid_search(x_list, get_err):
+    """Module-internal function: single-parameter optimisation function, using an adaptive grid search.
+
+    Operates like a standard grid search, but extends the grid if the optimal parameter value is at one of the edges of the provided grid.
+
+    Args:
+        x_list (iterable[float]): pre-defined original grid of the parameter
+        get_err (callable float->float): process to optimise, returning the error associated with a single parameter value.
+
+    Returns:
+        errors (np.ndarray[N_evals,2]): list of (parameter value, error) pairs, sorted to have decreasing errors
+    """
     work_list = x_list
     errors = []
     direction = None
@@ -158,7 +177,6 @@ def adaptative_grid_search(x_list, get_err):
         errors = errors[ind]
 
         current_argmin = errors[-1,0]
-        new_sigma = None
 
         if direction is None:
             if   current_argmin==max(work_list):
@@ -246,13 +264,14 @@ def inner_call(fold_i, eta_i, K_all, sparse, y_train, eta, train_idx, test_idx):
 
 def search_sigma(
     X_train, y_train, splits,
-    kernel, sigma, eta,
+    kernel, sigma, eta_grid,
     sparse_idx=None,
     n_sigma_iter=5, stddev_portion=0.0,
     adaptive=False, adaptive_v2=False,
     read_kernel=False, printlevel=0,
 ):
     """Search the optimal values of sigma and alpha for a KRR model with known representations.
+
     Sigma is the width parameter of the kernel function used,
     and alpha is the regularisation parameter of the resulting matrix equation.
 
@@ -262,12 +281,13 @@ def search_sigma(
     No matter what, the optimisation of alpha is done over a grid, with k-fold validation.
 
     Args:
-        X (np.ndarray[n_total,n_features]: feature vectors for the combined train-validation dataset
-        y (np.ndarray[n_total]): learnable properties for all inputs
+        X_train (np.ndarray[n_total,n_features]): feature vectors for the combined train-validation dataset
+        y_train (np.ndarray[n_total]): learnable properties for all inputs
         sigma (array-like(float)): values of sigma. for `adaptive`, starting values, for `adaptive_v2`, only the first and last values are used, as presumed bounds of the optimal value of sigma
-        alpha_grid (array-like of floats): values of alpha to try
+        eta_grid (array-like of floats): values of eta to try
         n_sigma_iter (int): number of iterations for the sigma line-search (if adaptive_v2)
-        n_splits (int): number of folds for k-fold validation
+        kernel (callable): kernel function computing a kernel matrix from two sets of representations vectors and a "gamma" scale parameter
+        splits (int): number of folds for k-fold validation
         stddev_portion (float): contribution of the error's standard deviation to compare error distributions
         sparse_idx (optional np.ndarray[int, n_references]): selection of reference inputs for sparse KRR.
         adaptive (bool): to use the adaptive grid for sigma
@@ -288,12 +308,12 @@ def get_err(s):
         else:
             K_all = X_train
 
-        sparse = sparse_idx != None
+        sparse = sparse_idx is not None
         if sparse:
             K_all = K_all[:,sparse_idx]
 
         results_per_eta = kfold_alpha_eval(
-            K_all, y_train, splits, eta, sparse,
+            K_all, y_train, splits, eta_grid, sparse,
             parallel = parallel,
         )
         for mean,std,e in results_per_eta:
@@ -307,7 +327,8 @@ def get_err(s):
 
     with Parallel(n_jobs=-1) as parallel:
         if adaptive_v2:
-            assert not adaptive
+            if adaptive:
+                raise ValueError("Only one of `adaptive`, `adaptive_v2` may be specified.")
             _, _ = parabolic_search(
                 np.log(sigma[0]), np.log(sigma[-1]),
                 lambda log_s: get_err(np.exp(log_s)),
@@ -359,7 +380,6 @@ def hyperparameters(X, y,
     Raises:
         RuntimeError: If 'X' is a kernel and sparse regression is chosen.
     """
-
     if gkernel is None:
         gwrap = None
     else:
diff --git a/qstack/regression/hyperparameters2.py b/qstack/regression/hyperparameters2.py
deleted file mode 100644
index 7a7147b..0000000
--- a/qstack/regression/hyperparameters2.py
+++ /dev/null
@@ -1,412 +0,0 @@
-"""Hyperparameter optimisation using a smoother version of sigma selection,
-using a no-gradient line search"""
-
-import sys, logging
-import numpy as np
-import scipy
-from sklearn.metrics import mean_absolute_error
-from sklearn.model_selection import KFold
-from sklearn.utils.parallel import Parallel, delayed
-from qstack.mathutils.fps import do_fps
-from qstack.tools import correct_num_threads
-from .kernel_utils import get_kernel, defaults, train_test_split_idx
-from .parser import RegressionParser
-
-logger = logging.getLogger("qstack.regression.hyperparameters2")
-
-# #####################
-# parabola-based line search
-
-
-def fit_quadratic(x1,x2,x3, y1,y2,y3):
-    """
-    Compute the three coefficients of a quadratic polynomial going through three given points.
-    Could probably be replaced by `np.polyfit` now that I know about it. Fluff it, we ball.
-    """
-    # we need to change coordinates around for this
-
-    # first, slopes at 0.5(x1+x2) and 0.5(x2+x3)
-    # the 'meta-slope' allows us to get 2*curvature
-    slope1 = (y2-y1)/(x2-x1)
-    slope2 = (y3-y2)/(x3-x2)
-    curv = (slope2-slope1)/(x3-x1)  # the "0.5*" disappears
-
-    # then, remove the curvature from points 1 and 2 to determine 1st-degree term
-    y1_b = y1 - curv*x1**2
-    y2_b = y2 - curv*x2**2
-    slope = (y2_b-y1_b)/(x2-x1)
-
-    # finally, the intercept
-    intercept = y1_b - slope*x1
-    return curv, slope, intercept
-
-def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01):
-    """
-    Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`.
-    Fits quadratic polynomials to perform this search, meaning `get_err` is assumed to be convex.
-
-    Args:
-        x_left (float): supposed left bound of the minimum of `get_err`
-        x_right (float): supposed right bound of the minimum of `get_err`
-        get_err (callable float->float): the function to minimise.
-        n_iter (int): the number of function calls allowed
-        x_thres (float): the acceptable error threshold for the the argmin to find
-        y_thres (float): the acceptable error threshold for the min to find
-
-    Returns:
-        the (argmin, min) tuple characterising the minimum of the function (2x float)
-    """
-
-    y_left = get_err(x_left)
-    y_right = get_err(x_right)
-    x_center = 0.5*(x_left+x_right)
-    y_center = get_err(x_center)
-
-    all_errs = [(x_left,y_left),(x_center,y_center),(x_right,y_right)]
-
-    while y_left < y_center or y_right < y_center:
-        # while it looks like we need to look elsewhere than in our original bracket
-        # (because the center isn't closer to the local minimum than the bounds)
-        if y_left < y_right:
-            logger.debug('no local minimum in sight, extending to the left...')
-            x_right, y_right = x_center, y_center
-            x_center, y_center = x_left, y_left
-            x_left = 2*x_center - x_right
-            y_left = get_err(x_left)
-            all_errs.insert(0, (x_left,y_left))
-        else:
-            logger.debug('no local minimum in sight, extending to the right...')
-            x_left, y_left = x_center, y_center
-            x_center, y_center = x_right, y_right
-            x_right = 2*x_center - x_left
-            y_right = get_err(x_right)
-            all_errs.append((x_right,y_right))
-        n_iter -= 1
-        if n_iter <=0:
-            break
-    # after this point, either we are exiting early or we have found the right bounds
-    all_errs.sort()
-    logger.debug('local minimum in bounds, proceeding with parabolic search (bounds at: %r)', all_errs)
-    logger.debug('chosen: %f\\%f/%f', x_left, x_center, x_right)
-    while n_iter > 0:
-        a,b,c = fit_quadratic(x_left, x_center, x_right, y_left, y_center, y_right)
-        if a<=0:  # lol no local minimum
-            logger.debug('no local minimum...')
-            if x_left < x_right:
-                x_new = 0.5*(x_left+x_center)
-                ypred_new = np.nan
-            else:
-                x_new = 0.5*(x_right+x_center)
-                ypred_new = np.nan
-        else:
-            x_new = -0.5*b/a
-            ypred_new = -0.25*b**2/a + c
-        y_new = get_err(x_new)
-        n_iter -=1
-        logger.debug('from chosen points %f\\%f/%f', x_left, x_center, x_right)
-        logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new)
-        all_errs.append((x_new, y_new)) ; all_errs.sort()
-        logger.debug('current data: %r', all_errs)
-
-        if x_new < x_left or x_new > x_right:
-            logger.debug('predicted local minimum not in immediate bounds, regaining bearings...')
-            new_index = np.argmin(np.asarray(all_errs)[:,1])
-            if new_index in (0, len(all_errs)-1):
-                raise AssertionError('edges of the search are somehow the minimum in second phase of function')
-            x_left, y_left = all_errs[new_index-1]
-            x_right, y_right = all_errs[new_index+1]
-            x_center, y_center = all_errs[new_index]
-
-        elif max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres:
-            break
-        elif y_new > y_center:
-            if x_new > x_center:
-                x_right, y_right = x_new, y_new
-            else:
-                x_left, y_left = x_new, y_new
-        else:  # if y_new <= y_center
-            if x_new > x_center:
-                x_left, y_left = x_center, y_center
-                x_center, y_center = x_new, y_new
-            else:
-                x_right, y_right = x_center, y_center
-                x_center, y_center = x_new, y_new
-        
-        if abs(x_right - x_left) < x_thres:
-            break
-
-    opt_idx = np.argmin(np.asarray(all_errs)[:,1])
-    return all_errs[opt_idx]
-
-
-
-def kfold_alpha_eval(K_all, y, n_splits, alpha_grid, parallel=None, on_compute=(lambda eta,err,stderr:None)):
-    """Module-internal function: optimise alpha (regularisation parameter) of a KRR learning model, using a K-fold validation.
-
-    Args:
-        K_all: matrix of kernel values (can be n_total*n_total for naive KRR or n_total*n_references for sparse KRR)
-        y: learnable properties for all inputs (n_total-length vector)
-        n_splits: number of folds for k-fold validation
-        alpha_grid: all the values of alpha to try (array-like)
-        parallel: optional joblib.Parallel instance to use to parallelise this function (by default one is constructed)
-        on_compute: function to call for the error summaries of each value of alpha
-                    (callable: alpha, error_mean, error_stddev -> None)
-
-    Returns:
-        - optimal value of alpha
-        - validation error list for all k-fold evaluations for this value of alpha
-    """
-    if parallel is None:
-        parallel = Parallel(n_jobs=-1, return_as="generator_unordered")
-    kfold = KFold(n_splits=n_splits, shuffle=False)
-    maes = np.full((kfold.get_n_splits(), len(alpha_grid)), np.inf)
-    y = np.asarray(y)
-    is_sparse = K_all.shape[0] != K_all.shape[1]
-
-    def inner_loop(split_i, alpha_i, train_idx, val_idx, alpha):
-        y_val = y[val_idx]
-        y_train = y[train_idx]
-        if is_sparse:
-            K_train = K_all[train_idx, :]
-            K_val = K_all[val_idx, :]
-            y_train = K_train.T @ y_train
-            K_train = K_train.T @ K_train
-        else:
-            K_train = K_all[np.ix_(train_idx,train_idx)]
-            K_val = K_all [np.ix_(val_idx,train_idx)]
-            if np.may_share_memory(K_train, K_all):
-                K_train = K_train.copy()
-        K_train[np.diag_indices_from(K_train)] += alpha
-
-        try:
-            weights = scipy.linalg.solve(K_train, y_train, assume_a='pos', overwrite_a=True)
-        except Exception as err:
-            raise
-            # bad fit (singular matrix)!
-            return split_i, alpha_i, np.inf
-        predict = K_val @ weights
-        return split_i, alpha_i, mean_absolute_error(y_val, predict)
-
-    mae_generator = parallel(
-        delayed(inner_loop)(s_i,a_i, t,v,a)
-        for a_i,a in enumerate(alpha_grid)
-        for s_i,(t,v) in enumerate(kfold.split(y))
-    )
-    for split_i, alpha_i, mae in mae_generator:
-        maes[split_i, alpha_i] = mae
-
-    concat_results = np.full((len(alpha_grid), 2), np.inf)
-    for alpha_i in range(len(alpha_grid)):
-        if not np.isfinite(maes[:, alpha_i]).any():
-            pass
-        else:
-            res = maes[:,alpha_i]
-            res = res[np.isfinite(res)]
-            concat_results[alpha_i,0] = res.mean()
-            concat_results[alpha_i,1] = res.std()
-            on_compute(alpha_grid[alpha_i], *concat_results[alpha_i])
-    #print("kfold evaluation for alpha grid",alpha_grid,concat_results)
-    selected_alpha_i = concat_results[:,0].argmin()
-    return alpha_grid[selected_alpha_i], maes[:,selected_alpha_i]#, models[:, selected_alpha_i].copy()
-
-
-def search_sigma(
-    X, y, kernel,
-    sigma_bounds, alpha_grid,
-    n_iter, n_splits,
-    stddev_portion=+1.0, sparse_idx=None,
-    parallel=None, on_compute=(lambda sigma,alpha,err,stderr:None)
-):
-    """Search the optimal values of sigma and alpha for a KRR model with known representations.
-    Sigma is the width parameter of the kernel function used,
-    and alpha is the regularisation parameter of the resulting matrix equation.
-
-    Internally, calls the line-search rountine for gamma,
-    where the function to minimise performs its own grid-based optimisation of alpha.
-
-    Args:
-        X (np.ndarray[n_total,n_features]: feature vectors for the combined train-validation dataset
-        y (np.ndarray[n_total]): learnable properties for all inputs
-        sigma_bounds (tuple(float,float)): presumed bounds of the optimal value of sigma
-        alpha_grid (array-like of floats): values of alpha to try
-        n_iter (int): number of iterations for the sigma line-search
-        n_splits (int): number of folds for k-fold validation
-        stddev_portion (float): contribution of the error's standard deviation to compare error distributions
-        sparse_idx (optional np.ndarray[int, n_references]): selection of reference inputs for sparse KRR.
-        parallel (optional joblib.Parallel): tool to make the optimisation more parallel. by default, one will be (re)created as often as necessary.
-        on_compute (callable sigma,alpha,err_mean,err_stddev -> None)
-
-    Returns:
-        sigma (float): optimal value of sigma
-        alpha (float): optimal value of alpha
-        costs (np.ndarray[n_splits]): validation error distribution for these values of sigma,alpha
-    """
-
-    sigma_left, sigma_right = sigma_bounds
-
-    err_dict = {}
-
-    def get_err(log_sigma):
-        sigma = np.exp(log_sigma)
-
-        K_all = kernel(X, X, 1.0/sigma)
-        if sparse_idx is not None:
-            K_all = K_all[:, sparse_idx]
-
-        alpha, costs = kfold_alpha_eval(
-            K_all, y, n_splits, alpha_grid,
-            parallel=parallel, on_compute=(lambda eta,err,stderr: on_compute(sigma,eta,err,stderr)),
-        )
-        err_dict[log_sigma] = (alpha,costs)
-        cost_res = costs.mean() + stddev_portion*costs.std()
-        #print("now eval'ing σ=", sigma, '... α=', alpha, costs.shape, costs.mean(), costs.std())
-        return cost_res
-
-    log_sigma_selected, cost_selected = parabolic_search(
-        np.log(sigma_left), np.log(sigma_right),
-        get_err,
-        n_iter=n_iter, x_thres=0.1, y_thres=0.01,
-    )
-
-    alpha_selected, costs_selected = err_dict[log_sigma_selected]
-    sigma = np.exp(log_sigma_selected)
-
-    return sigma, alpha_selected, costs_selected
-
-
-
-def hyperparameters(X, y,
-           sigma_low=defaults.sigmaarr[0], sigma_high=defaults.sigmaarr[-1], eta=defaults.etaarr,
-           akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, read_kernel=False,
-           test_size=defaults.test_size, splits=defaults.splits, n_sigma_iters=5, idx_test=None, idx_train=None,
-           printlevel=0, sparse=None,
-           stddev_portion=+1.0,
-           random_state=defaults.random_state,
-):
-    """Perform a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter).
-
-    Args:
-        X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples.
-        y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples.
-        sigma_low (float): Estimated low bound forthe kernel width.
-        sigma_high (float): Estimated high bound forthe kernel width.
-        eta (list): List of regularization strength for the grid search.
-        akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine').
-        gkernel (str): Global kernel (None, 'REM', 'avg').
-        gdict (dict): Parameters of the global kernels.
-        test_size (float or int): Test set fraction (or number of samples).
-        splits (int): K number of splits for the Kfold cross-validation.
-        n_sigma_iters (int): number of iterations for the sigma-optimisation line search
-        idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X).
-        idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X).
-        printlevel (int): Controls level of output printing.
-        read_kernel (bool): If 'X' is a kernel and not an array of representations (disables sigma optimisation).
-        sparse (int): The number of reference environnments to consider for sparse regression.
-        stddev_portion (float): The amount of error standard deviation to add to error means, for error distribution ranking.
-        random_state (int): The seed used for random number generator (controls train/test splitting).
-
-    Returns:
-        The results of the grid search as a numpy.2darray [Cx(MAE,std,eta,sigma)],
-            where C is the number of parameter set and
-            the array is sorted according to MAEs (last is minimum)
-
-    Raises:
-        RuntimeError: If 'X' is a kernel and sparse regression is chosen.
-    """
-
-    if gkernel is None:
-        gwrap = None
-    else:
-        gwrap = [gkernel, gdict]
-    kernel = get_kernel(akernel, gwrap)
-
-    idx_train, _, y_train, _ = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train,
-                                                    test_size=test_size, random_state=random_state)
-    if read_kernel is False:
-        X_train = X[idx_train]
-        optimise_sigma = True
-    else:
-        X_train = X[np.ix_(idx_train,idx_train)]
-        optimise_sigma = False
-
-    if sparse:
-        if read_kernel:
-            raise RuntimeError('Cannot do FPS with kernels')
-        sparse_idx = do_fps(X_train)[0][:sparse]
-    else:
-        sparse_idx = np.arange(X_train.shape[0])
-
-    errors = []
-    with Parallel(n_jobs=1, return_as="generator_unordered") as parallel:
-        if optimise_sigma:
-            err_append = lambda sigma,alpha,err,stderr: errors.append((err,stderr, alpha,sigma))
-            _,_,_ = search_sigma(
-                X_train, y_train, kernel, (sigma_low, sigma_high), alpha_grid=eta,
-                parallel=parallel, on_compute=err_append,
-                n_iter = n_sigma_iters, n_splits=splits, stddev_portion=stddev_portion,
-                sparse_idx=sparse_idx,
-            )
-        else:
-            if sparse_idx is not None:
-                K_all = X_train[:, sparse_idx]
-            else:
-                K_all = X_train
-            sigma = np.nan
-            err_append = lambda alpha,err,stderr: errors.append((err,stderr, alpha,sigma))
-            _,_ = kfold_alpha_eval(
-                K_all, y, splits, alpha_grid=eta,
-                parallel=parallel, on_compute=err_append,
-            )
-
-
-    errors = np.array(errors)
-    ind = np.argsort(errors[:,0]+stddev_portion*errors[:,1])[::-1]
-    errors = errors[ind]
-    return errors
-
-
-def _get_arg_parser():
-    """Parse CLI arguments."""
-    parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array')
-    parser.remove_argument("train_size")
-    parser.remove_argument("sigma")
-    parser.remove_argument("adaptative")
-    parser.add_argument('--sigma-low',        type=float, dest='sigma_low',      default=1E-2, help='estimated low bound for sigma')
-    parser.add_argument('--sigma-high',       type=float, dest='sigma_high',     default=1E+2, help='estimated high bound for sigma')
-    parser.add_argument('--stddev-portion',   type=float, dest='stddev_portion', default=1,    help='amount of error standard deviation to add to error means, for error distribution ranking in the output.')
-    parser.add_argument('--sigma-iterations', type=int,   dest='n_sigma_iters',  default=5,    help='number of iterations for the sigma-optimisation line search')
-
-
-    return parser
-
-
-def main():
-    """Command-line entry point for hyperparameter optimization."""
-    args = _get_arg_parser().parse_args()
-    if args.readk:
-        args.sigma = [np.nan]
-    if args.ll:
-        correct_num_threads()
-    print(vars(args))
-
-    X = np.load(args.repr)
-    y = np.loadtxt(args.prop)
-
-    errors = hyperparameters(
-        X, y, read_kernel=args.readk,
-        sigma_low=args.sigma_low, sigma_high=args.sigma_high, eta=args.eta,
-        akernel=args.akernel, gkernel=args.gkernel, gdict=args.gdict, sparse=args.sparse,
-        test_size=args.test_size, splits=args.splits, n_sigma_iters=args.n_sigma_iters,
-        printlevel=args.printlevel, random_state=args.random_state, stddev_portion=args.stddev_portion,
-    )
-    errors = np.array(errors)
-    if args.nameout is not None:
-        np.savetxt(args.nameout, errors, header="error        stdev          eta          sigma")
-    print()
-    print('error        stdev          eta          sigma')
-    for error in errors:
-        print("{:e} {:e} | {:e} {:e}".format(*tuple(error)))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/test_regression.py b/tests/test_regression.py
index a60e606..208a228 100755
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -3,7 +3,6 @@
 import os
 import numpy as np
 import qstack.regression.hyperparameters as hyperparameters
-import qstack.regression.hyperparameters2 as hyperparameters2
 import qstack.regression.regression as regression
 import qstack.regression.final_error as final_error
 import qstack.regression.condition as condition