From 54a9a84ebe49d4fb06970e5593231a6b1d49594d Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Sun, 18 Jan 2026 13:12:35 +0100 Subject: [PATCH 1/5] Add new hyperparameter optimisation script (and parallelise the existing one through joblib) --- qstack/regression/hyperparameters.py | 29 ++- qstack/regression/hyperparameters2.py | 354 ++++++++++++++++++++++++++ 2 files changed, 370 insertions(+), 13 deletions(-) create mode 100644 qstack/regression/hyperparameters2.py diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index c762b38..358587d 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -4,6 +4,7 @@ import numpy as np import scipy from sklearn.model_selection import KFold +from sklearn.utils.parallel import Parallel, delayed from qstack.mathutils.fps import do_fps from qstack.tools import correct_num_threads from .kernel_utils import get_kernel, defaults, train_test_split_idx, sparse_regression_kernel @@ -69,18 +70,21 @@ def k_fold_opt(K_all, eta): def hyper_loop(sigma, eta): errors = [] - for s in sigma: - if read_kernel is False: - K_all = kernel(X_train, X_train, 1.0/s) - else: - K_all = X_train - - for e in eta: - mean, std = k_fold_opt(K_all, e) - if printlevel>0 : - sys.stderr.flush() - print(s, e, mean, std, flush=True) - errors.append((mean, std, e, s)) + with Parallel(n_jobs=-1) as parallel: + for s in sigma: + if read_kernel is False: + K_all = kernel(X_train, X_train, 1.0/s) + else: + K_all = X_train + + def inner_loop(s,e): + mean, std = k_fold_opt(K_all, e) + if printlevel>0 : + sys.stderr.flush() + print(s, e, mean, std, flush=True) + return (mean, std, e, s) + + errors += parallel(delayed(inner_loop)(s,e) for e in eta) return errors if gkernel is None: gwrap = None @@ -139,7 +143,6 @@ def hyper_loop(sigma, eta): def _get_arg_parser(): """Parse CLI arguments.""" parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array') - parser.remove_argument("random_state") parser.remove_argument("train_size") return parser diff --git a/qstack/regression/hyperparameters2.py b/qstack/regression/hyperparameters2.py new file mode 100644 index 0000000..4ca2e70 --- /dev/null +++ b/qstack/regression/hyperparameters2.py @@ -0,0 +1,354 @@ +"""Hyperparameter optimisation using a smoother version of sigma selection, +using a no-gradient line search""" + +import sys, logging +import numpy as np +import scipy +from sklearn.metrics import mean_absolute_error +from sklearn.model_selection import KFold +from sklearn.utils.parallel import Parallel, delayed +from qstack.mathutils.fps import do_fps +from qstack.tools import correct_num_threads +from .kernel_utils import get_kernel, defaults, train_test_split_idx +from .parser import RegressionParser + +logger = logging.getLogger("qstack.regression.hyperparameters2") + +# ##################### +# parabola-based line search + + +def fit_quadratic(x1,x2,x3, y1,y2,y3): + """Compute the three coefficients of a quadratic polynomial going through three given points.""" + # we need to change coordinates around for this + + # first, slopes at 0.5(x1+x2) and 0.5(x2+x3) + # the 'meta-slope' allows us to get 2*curvature + slope1 = (y2-y1)/(x2-x1) + slope2 = (y3-y2)/(x3-x2) + curv = (slope2-slope1)/(x3-x1) # the "0.5*" disappears + + # then, remove the curvature from points 1 and 2 to determine 1st-degree term + y1_b = y1 - curv*x1**2 + y2_b = y2 - curv*x2**2 + slope = (y2_b-y1_b)/(x2-x1) + + # finally, the intercept + intercept = y1_b - slope*x1 + return curv, slope, intercept + +def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01): + """A 1D optimisation function, assuming the loss function in question is convex, It first checks to see the bounds are correct, then refines them by fitting quadratic polynomials""" + + y_left = get_err(x_left) + y_right = get_err(x_right) + x_center = 0.5*(x_left+x_right) + y_center = get_err(x_center) + + all_errs = [(x_left,y_left),(x_center,y_center),(x_right,y_right)] + + while y_left < y_center or y_right < y_center: + # while it looks like we need to look elsewhere than in our original bracket + # (because the center isn't closer to the local minimum than the bounds) + if y_left < y_right: + logger.debug('no local minimum in sight, extending to the left...') + x_right, y_right = x_center, y_center + x_center, y_center = x_left, y_left + x_left = 2*x_center - x_right + y_left = get_err(x_left) + all_errs.insert(0, (x_left,y_left)) + else: + logger.debug('no local minimum in sight, extending to the right...') + x_left, y_left = x_center, y_center + x_center, y_center = x_right, y_right + x_right = 2*x_center - x_left + y_right = get_err(x_right) + all_errs.append((x_right,y_right)) + n_iter -= 1 + if n_iter <=0: + break + # after this point, either we are exiting early or we have found the right bounds + all_errs.sort() + logger.debug('local minimum in bounds, proceeding with parabolic search (bounds at: %r)', all_errs) + logger.debug('chosen: %f\%f/%f', x_left, x_center, x_right) + while n_iter > 0: + a,b,c = fit_quadratic(x_left, x_center, x_right, y_left, y_center, y_right) + if a<=0: # lol no local minimum + logger.debug('no local minimum...') + if x_left < x_right: + x_new = 0.5*(x_left+x_center) + ypred_new = np.nan + else: + x_new = 0.5*(x_right+x_center) + ypred_new = np.nan + else: + x_new = -0.5*b/a + ypred_new = -0.25*b**2/a + c + y_new = get_err(x_new) + n_iter -=1 + logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new) + all_errs.append((x_new, y_new)) ; all_errs.sort() + logger.debug('current data: %r', all_errs) + + if x_new < x_left or x_new > x_right: + logger.debug('predicted local minimum not in immediate bounds, regaining bearings...') + new_index = np.argmin(np.asarray(all_errs)[:,1]) + if new_index in (0, len(all_errs)-1): + raise AssertionError('edges of the search are somehow the minimum in second phase of function') + x_left, y_left = all_errs[new_index-1] + x_right, y_right = all_errs[new_index+1] + x_center, y_center = all_errs[new_index] + + elif y_new > y_center: + if x_new > x_center: + x_right, y_right = x_new, y_new + else: + x_left, y_left = x_new, y_new + elif y_left < y_right: + if max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres: + break + if x_new > x_center: + x_left, y_left = x_center, y_center + x_center, y_center = x_new, y_new + else: + x_right, y_right = x_center, y_center + x_center, y_center = x_new, y_new + if abs(x_right - x_left) < x_thres: + break + + opt_idx = np.argmin(np.asarray(all_errs)[:,1]) + return all_errs[opt_idx] + + + +def kfold_alpha_eval(K_all, y, n_splits, alpha_grid, parallel=None, on_compute=(lambda eta,err,stderr:None)): + if parallel is None: + parallel = Parallel(n_jobs=-1, return_as="generator_unordered") + kfold = KFold(n_splits=n_splits, shuffle=False) + maes = np.full((kfold.get_n_splits(), len(alpha_grid)), np.inf) + y = np.asarray(y) + is_sparse = K_all.shape[0] != K_all.shape[1] + + def inner_loop(split_i, alpha_i, train_idx, val_idx, alpha): + y_val = y[val_idx] + y_train = y[train_idx] + if is_sparse: + K_train = K_all[train_idx, :] + K_val = K_all[val_idx, :] + y_train = K_train.T @ y_train + K_train = K_train.T @ K_train + else: + K_train = K_all[np.ix_(train_idx,train_idx)] + K_val = K_all [np.ix_(val_idx,train_idx)] + if np.may_share_memory(K_train, K_all): + K_train = K_train.copy() + K_train[np.diag_indices_from(K_train)] += alpha + + try: + weights = scipy.linalg.solve(K_train, y_train, assume_a='pos', overwrite_a=True) + except Exception as err: + raise + # bad fit (singular matrix)! + return split_i, alpha_i, np.inf + predict = K_val @ weights + return split_i, alpha_i, mean_absolute_error(y_val, predict) + + mae_generator = parallel( + delayed(inner_loop)(s_i,a_i, t,v,a) + for a_i,a in enumerate(alpha_grid) + for s_i,(t,v) in enumerate(kfold.split(y)) + ) + for split_i, alpha_i, mae in mae_generator: + maes[split_i, alpha_i] = mae + + concat_results = np.full((len(alpha_grid), 2), np.inf) + for alpha_i in range(len(alpha_grid)): + if not np.isfinite(maes[:, alpha_i]).any(): + pass + else: + res = maes[alpha_i] + res = res[np.isfinite(res)] + concat_results[alpha_i,0] = res.mean() + concat_results[alpha_i,1] = res.std() + on_compute(alpha_grid[alpha_i], *concat_results[alpha_i]) + #print("kfold evaluation for alpha grid",alpha_grid,concat_results) + selected_alpha_i = concat_results[:,0].argmin() + return alpha_grid[selected_alpha_i], maes[:,selected_alpha_i]#, models[:, selected_alpha_i].copy() + + +def search_sigma( + X, y, kernel, + sigma_bounds, alpha_grid, + n_iter, n_splits, + stddev_portion=+1.0, sparse_idx=None, + parallel=None, on_compute=(lambda sigma,eta,err,stderr:None) +): + """Search""" + + sigma_left, sigma_right = sigma_bounds + + err_dict = {} + + def get_err(log_sigma): + sigma = np.exp(log_sigma) + + K_all = kernel(X, X, 1.0/sigma) + if sparse_idx is not None: + K_all = K_all[:, sparse_idx] + + alpha, costs = kfold_alpha_eval( + K_all, y, n_splits, alpha_grid, + parallel=parallel, on_compute=(lambda eta,err,stderr: on_compute(sigma,eta,err,stderr)), + ) + err_dict[log_sigma] = (alpha,costs) + cost_res = costs.mean() + stddev_portion*costs.std() + return cost_res + + log_sigma_selected, cost_selected = parabolic_search( + np.log(sigma_left), np.log(sigma_right), + get_err, + n_iter=n_iter, x_thres=0.1, y_thres=0.01, + ) + + alpha_selected, costs_selected = err_dict[log_sigma_selected] + sigma = np.exp(log_sigma_selected) + + return sigma, alpha_selected, costs_selected + + + +def hyperparameters(X, y, + sigma_low=defaults.sigmaarr[0], sigma_high=defaults.sigmaarr[-1], eta=defaults.etaarr, + akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, read_kernel=False, + test_size=defaults.test_size, splits=defaults.splits, n_sigma_iters=5, idx_test=None, idx_train=None, + printlevel=0, sparse=None, + stddev_portion=+1.0, + random_state=defaults.random_state, +): + """Perform a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). + + Args: + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + sigma_low (float): Estimated low bound forthe kernel width. + sigma_high (float): Estimated high bound forthe kernel width. + eta (list): List of regularization strength for the grid search. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + splits (int): K number of splits for the Kfold cross-validation. + n_sigma_iters (int): number of iterations for the sigma-optimisation line search + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + printlevel (int): Controls level of output printing. + read_kernel (bool): If 'X' is a kernel and not an array of representations (disables sigma optimisation). + sparse (int): The number of reference environnments to consider for sparse regression. + stddev_portion (float): The amount of error standard deviation to add to error means, for error distribution ranking. + random_state (int): The seed used for random number generator (controls train/test splitting). + + Returns: + The results of the grid search as a numpy.2darray [Cx(MAE,std,eta,sigma)], + where C is the number of parameter set and + the array is sorted according to MAEs (last is minimum) + + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. + """ + + if gkernel is None: + gwrap = None + else: + gwrap = [gkernel, gdict] + kernel = get_kernel(akernel, gwrap) + + idx_train, _, y_train, _ = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train, + test_size=test_size, random_state=random_state) + if read_kernel is False: + X_train = X[idx_train] + optimise_sigma = True + else: + X_train = X[np.ix_(idx_train,idx_train)] + optimise_sigma = False + + if sparse: + if read_kernel: + raise RuntimeError('Cannot do FPS with kernels') + sparse_idx = do_fps(X_train)[0][:sparse] + else: + sparse_idx = np.arange(X_train.shape[0]) + + errors = [] + with Parallel(n_jobs=-1, return_as="generator_unordered") as parallel: + if optimise_sigma: + err_append = lambda sigma,alpha,err,stderr: errors.append((err,stderr, alpha,sigma)) + _,_,_ = search_sigma( + X_train, y_train, kernel, (sigma_low, sigma_high), alpha_grid=eta, + parallel=parallel, on_compute=err_append, + n_iter = n_sigma_iters, n_splits=splits, stddev_portion=stddev_portion, + sparse_idx=sparse_idx, + ) + else: + if sparse_idx is not None: + K_all = X_train[:, sparse_idx] + else: + K_all = X_train + sigma = np.nan + err_append = lambda alpha,err,stderr: errors.append((err,stderr, alpha,sigma)) + _,_ = kfold_alpha_eval( + K_all, y, splits, alpha_grid=eta, + parallel=parallel, on_compute=err_append, + ) + + + errors = np.array(errors) + ind = np.argsort(errors[:,0]+stddev_portion*errors[:,-1])[::-1] + errors = errors[ind] + return errors + + +def _get_arg_parser(): + """Parse CLI arguments.""" + parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array') + parser.remove_argument("train_size") + parser.remove_argument("sigma") + parser.remove_argument("adaptative") + parser.add_argument('--sigma-low', type=float, dest='sigma_low', default=1E-2, help='estimated low bound for sigma') + parser.add_argument('--sigma-high', type=float, dest='sigma_high', default=1E+2, help='estimated high bound for sigma') + parser.add_argument('--stddev-portion', type=float, dest='stddev_portion', default=1, help='amount of error standard deviation to add to error means, for error distribution ranking in the output.') + parser.add_argument('--sigma-iterations', type=int, dest='n_sigma_iters', default=5, help='number of iterations for the sigma-optimisation line search') + + + return parser + + +def main(): + """Command-line entry point for hyperparameter optimization.""" + args = _get_arg_parser().parse_args() + if args.readk: + args.sigma = [np.nan] + if args.ll: + correct_num_threads() + print(vars(args)) + + X = np.load(args.repr) + y = np.loadtxt(args.prop) + + errors = hyperparameters( + X, y, read_kernel=args.readk, + sigma_low=args.sigma_low, sigma_high=args.sigma_high, eta=args.eta, + akernel=args.akernel, gkernel=args.gkernel, gdict=args.gdict, sparse=args.sparse, + test_size=args.test_size, splits=args.splits, n_sigma_iters=args.n_sigma_iters, + printlevel=args.printlevel, random_state=args.random_state, stddev_portion=args.stddev_portion, + ) + errors = np.array(errors) + if args.nameout is not None: + np.savetxt(args.nameout, errors, header="error stdev eta sigma") + print() + print('error stdev eta sigma') + for error in errors: + print("{:e} {:e} | {:e} {:e}".format(*tuple(error))) + + +if __name__ == "__main__": + main() From 033127d116e3d7cd46f455df1031f8656e209bc6 Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Tue, 20 Jan 2026 00:32:24 +0100 Subject: [PATCH 2/5] document said script's functions --- qstack/regression/hyperparameters2.py | 63 +++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/qstack/regression/hyperparameters2.py b/qstack/regression/hyperparameters2.py index 4ca2e70..793a47d 100644 --- a/qstack/regression/hyperparameters2.py +++ b/qstack/regression/hyperparameters2.py @@ -19,7 +19,10 @@ def fit_quadratic(x1,x2,x3, y1,y2,y3): - """Compute the three coefficients of a quadratic polynomial going through three given points.""" + """ + Compute the three coefficients of a quadratic polynomial going through three given points. + Could probably be replaced by `np.polyfit` now that I know about it. Fluff it, we ball. + """ # we need to change coordinates around for this # first, slopes at 0.5(x1+x2) and 0.5(x2+x3) @@ -38,7 +41,21 @@ def fit_quadratic(x1,x2,x3, y1,y2,y3): return curv, slope, intercept def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01): - """A 1D optimisation function, assuming the loss function in question is convex, It first checks to see the bounds are correct, then refines them by fitting quadratic polynomials""" + """ + Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`. + Fits quadratic polynomials to perform this search, meaning `get_err` is assumed to be convex. + + Args: + x_left (float): supposed left bound of the minimum of `get_err` + x_right (float): supposed right bound of the minimum of `get_err` + get_err (callable float->float): the function to minimise. + n_iter (int): the number of function calls allowed + x_thres (float): the acceptable error threshold for the the argmin to find + y_thres (float): the acceptable error threshold for the min to find + + Returns: + the (argmin, min) tuple characterising the minimum of the function (2x float) + """ y_left = get_err(x_left) y_right = get_err(x_right) @@ -122,6 +139,21 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0 def kfold_alpha_eval(K_all, y, n_splits, alpha_grid, parallel=None, on_compute=(lambda eta,err,stderr:None)): + """Module-internal function: optimise alpha (regularisation parameter) of a KRR learning model, using a K-fold validation. + + Args: + K_all: matrix of kernel values (can be n_total*n_total for naive KRR or n_total*n_references for sparse KRR) + y: learnable properties for all inputs (n_total-length vector) + n_splits: number of folds for k-fold validation + alpha_grid: all the values of alpha to try (array-like) + parallel: optional joblib.Parallel instance to use to parallelise this function (by default one is constructed) + on_compute: function to call for the error summaries of each value of alpha + (callable: alpha, error_mean, error_stddev -> None) + + Returns: + - optimal value of alpha + - validation error list for all k-fold evaluations for this value of alpha + """ if parallel is None: parallel = Parallel(n_jobs=-1, return_as="generator_unordered") kfold = KFold(n_splits=n_splits, shuffle=False) @@ -181,9 +213,32 @@ def search_sigma( sigma_bounds, alpha_grid, n_iter, n_splits, stddev_portion=+1.0, sparse_idx=None, - parallel=None, on_compute=(lambda sigma,eta,err,stderr:None) + parallel=None, on_compute=(lambda sigma,alpha,err,stderr:None) ): - """Search""" + """Search the optimal values of sigma and alpha for a KRR model with known representations. + Sigma is the width parameter of the kernel function used, + and alpha is the regularisation parameter of the resulting matrix equation. + + Internally, calls the line-search rountine for gamma, + where the function to minimise performs its own grid-based optimisation of alpha. + + Args: + X (np.ndarray[n_total,n_features]: feature vectors for the combined train-validation dataset + y (np.ndarray[n_total]): learnable properties for all inputs + sigma_bounds (tuple(float,float)): presumed bounds of the optimal value of sigma + alpha_grid (array-like of floats): values of alpha to try + n_iter (int): number of iterations for the sigma line-search + n_splits (int): number of folds for k-fold validation + stddev_portion (float): contribution of the error's standard deviation to compare error distributions + sparse_idx (optional np.ndarray[int, n_references]): selection of reference inputs for sparse KRR. + parallel (optional joblib.Parallel): tool to make the optimisation more parallel. by default, one will be (re)created as often as necessary. + on_compute (callable sigma,alpha,err_mean,err_stddev -> None) + + Returns: + sigma (float): optimal value of sigma + alpha (float): optimal value of alpha + costs (np.ndarray[n_splits]): validation error distribution for these values of sigma,alpha + """ sigma_left, sigma_right = sigma_bounds From 8704594307a514be306952bd6ad290e542647d92 Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Wed, 21 Jan 2026 22:28:38 +0100 Subject: [PATCH 3/5] add test for new hyperparameters fitting --- qstack/qml/b2r2.py | 4 ++++ qstack/regression/hyperparameters2.py | 17 ++++++++++------- tests/test_regression.py | 20 ++++++++++++++++++++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py index f3db3d7..0c0f98f 100644 --- a/qstack/qml/b2r2.py +++ b/qstack/qml/b2r2.py @@ -13,6 +13,10 @@ defaults = SimpleNamespace(rcut=3.5, gridspace=0.03) +class Reaction: + def __init__(self, reactants, products): + self.reactants = reactants + self.products = products def get_bags(unique_ncharges): """Generate all unique element pair combinations including self-interactions. diff --git a/qstack/regression/hyperparameters2.py b/qstack/regression/hyperparameters2.py index 793a47d..7a7147b 100644 --- a/qstack/regression/hyperparameters2.py +++ b/qstack/regression/hyperparameters2.py @@ -87,7 +87,7 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0 # after this point, either we are exiting early or we have found the right bounds all_errs.sort() logger.debug('local minimum in bounds, proceeding with parabolic search (bounds at: %r)', all_errs) - logger.debug('chosen: %f\%f/%f', x_left, x_center, x_right) + logger.debug('chosen: %f\\%f/%f', x_left, x_center, x_right) while n_iter > 0: a,b,c = fit_quadratic(x_left, x_center, x_right, y_left, y_center, y_right) if a<=0: # lol no local minimum @@ -103,6 +103,7 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0 ypred_new = -0.25*b**2/a + c y_new = get_err(x_new) n_iter -=1 + logger.debug('from chosen points %f\\%f/%f', x_left, x_center, x_right) logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new) all_errs.append((x_new, y_new)) ; all_errs.sort() logger.debug('current data: %r', all_errs) @@ -116,20 +117,21 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0 x_right, y_right = all_errs[new_index+1] x_center, y_center = all_errs[new_index] + elif max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres: + break elif y_new > y_center: if x_new > x_center: x_right, y_right = x_new, y_new else: x_left, y_left = x_new, y_new - elif y_left < y_right: - if max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres: - break + else: # if y_new <= y_center if x_new > x_center: x_left, y_left = x_center, y_center x_center, y_center = x_new, y_new else: x_right, y_right = x_center, y_center x_center, y_center = x_new, y_new + if abs(x_right - x_left) < x_thres: break @@ -198,7 +200,7 @@ def inner_loop(split_i, alpha_i, train_idx, val_idx, alpha): if not np.isfinite(maes[:, alpha_i]).any(): pass else: - res = maes[alpha_i] + res = maes[:,alpha_i] res = res[np.isfinite(res)] concat_results[alpha_i,0] = res.mean() concat_results[alpha_i,1] = res.std() @@ -257,6 +259,7 @@ def get_err(log_sigma): ) err_dict[log_sigma] = (alpha,costs) cost_res = costs.mean() + stddev_portion*costs.std() + #print("now eval'ing σ=", sigma, '... α=', alpha, costs.shape, costs.mean(), costs.std()) return cost_res log_sigma_selected, cost_selected = parabolic_search( @@ -334,7 +337,7 @@ def hyperparameters(X, y, sparse_idx = np.arange(X_train.shape[0]) errors = [] - with Parallel(n_jobs=-1, return_as="generator_unordered") as parallel: + with Parallel(n_jobs=1, return_as="generator_unordered") as parallel: if optimise_sigma: err_append = lambda sigma,alpha,err,stderr: errors.append((err,stderr, alpha,sigma)) _,_,_ = search_sigma( @@ -357,7 +360,7 @@ def hyperparameters(X, y, errors = np.array(errors) - ind = np.argsort(errors[:,0]+stddev_portion*errors[:,-1])[::-1] + ind = np.argsort(errors[:,0]+stddev_portion*errors[:,1])[::-1] errors = errors[ind] return errors diff --git a/tests/test_regression.py b/tests/test_regression.py index 9702fa5..b822e6c 100755 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -3,6 +3,7 @@ import os import numpy as np import qstack.regression.hyperparameters as hyperparameters +import qstack.regression.hyperparameters2 as hyperparameters2 import qstack.regression.regression as regression import qstack.regression.final_error as final_error import qstack.regression.condition as condition @@ -25,6 +26,25 @@ def test_hyperparameters(): assert (np.allclose(hyper, true_hyper)) +def test_hyperparameters2(): + #import logging + #logging.basicConfig() + #logging.getLogger("qstack").setLevel('DEBUG') + path = os.path.dirname(os.path.realpath(__file__)) + xfile = os.path.join(path, 'data/mols/X_lb.npy') + X = np.load(xfile) + yfile = os.path.join(path, 'data/mols/dipole.dat') + y = np.loadtxt(yfile) + + hyper = hyperparameters2.hyperparameters(X, y, random_state=42)[-4:] + true_hyper = [ + [5.15813198e-01, 2.37774396e-01, 3.16227766e-08, 3.64079252e+04], + [5.15719232e-01, 2.37430538e-01, 1.00000000e-10, 1.00000000e+06], + [5.15657638e-01, 2.37472003e-01, 1.00000000e-10, 3.64079252e+04], + [5.15699162e-01, 2.37420839e-01, 1.00000000e-10, 1.71990639e+05], + ] + + assert (np.allclose(hyper, true_hyper)) def test_regression(): path = os.path.dirname(os.path.realpath(__file__)) From 8ae3fd8d1bbe00e707a1fb9b13e6adace89a9b32 Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Fri, 23 Jan 2026 01:09:46 +0100 Subject: [PATCH 4/5] fuse hyperparameter optimisation methods --- qstack/regression/hyperparameters.py | 473 ++++++++++++++++++++++----- tests/test_regression.py | 18 +- 2 files changed, 408 insertions(+), 83 deletions(-) diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index 358587d..1cefc5b 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -1,6 +1,6 @@ """Hyperparameter optimization.""" -import sys +import sys, logging import numpy as np import scipy from sklearn.model_selection import KFold @@ -10,11 +10,324 @@ from .kernel_utils import get_kernel, defaults, train_test_split_idx, sparse_regression_kernel from .parser import RegressionParser +logger = logging.getLogger("qstack.regression.hyperparameters") + + +# ##################### +# parabola-based line search + +def fit_quadratic(x1,x2,x3, y1,y2,y3): + """ + Compute the three coefficients of a quadratic polynomial going through three given points. + Could probably be replaced by `np.polyfit` now that I know about it. Fluff it, we ball. + """ + # we need to change coordinates around for this + + # first, slopes at 0.5(x1+x2) and 0.5(x2+x3) + # the 'meta-slope' allows us to get 2*curvature + slope1 = (y2-y1)/(x2-x1) + slope2 = (y3-y2)/(x3-x2) + curv = (slope2-slope1)/(x3-x1) # the "0.5*" disappears + + # then, remove the curvature from points 1 and 2 to determine 1st-degree term + y1_b = y1 - curv*x1**2 + y2_b = y2 - curv*x2**2 + slope = (y2_b-y1_b)/(x2-x1) + + # finally, the intercept + intercept = y1_b - slope*x1 + return curv, slope, intercept + +def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01): + """ + Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`. + Fits quadratic polynomials to perform this search, meaning `get_err` is assumed to be convex. + + Args: + x_left (float): supposed left bound of the minimum of `get_err` + x_right (float): supposed right bound of the minimum of `get_err` + get_err (callable float->float): the function to minimise. + n_iter (int): the number of function calls allowed + x_thres (float): the acceptable error threshold for the the argmin to find + y_thres (float): the acceptable error threshold for the min to find + + Returns: + the (argmin, min) tuple characterising the minimum of the function (2x float) + """ + + y_left = get_err(x_left) + y_right = get_err(x_right) + x_center = 0.5*(x_left+x_right) + y_center = get_err(x_center) + + all_errs = [(x_left,y_left),(x_center,y_center),(x_right,y_right)] + + while y_left < y_center or y_right < y_center: + # while it looks like we need to look elsewhere than in our original bracket + # (because the center isn't closer to the local minimum than the bounds) + if y_left < y_right: + logger.debug('no local minimum in sight, extending to the left...') + x_right, y_right = x_center, y_center + x_center, y_center = x_left, y_left + x_left = 2*x_center - x_right + y_left = get_err(x_left) + all_errs.insert(0, (x_left,y_left)) + else: + logger.debug('no local minimum in sight, extending to the right...') + x_left, y_left = x_center, y_center + x_center, y_center = x_right, y_right + x_right = 2*x_center - x_left + y_right = get_err(x_right) + all_errs.append((x_right,y_right)) + n_iter -= 1 + if n_iter <=0: + break + # after this point, either we are exiting early or we have found the right bounds + all_errs.sort() + logger.debug('local minimum in bounds, proceeding with parabolic search (bounds at: %r)', all_errs) + logger.debug('chosen: %f\\%f/%f', x_left, x_center, x_right) + while n_iter > 0: + a,b,c = fit_quadratic(x_left, x_center, x_right, y_left, y_center, y_right) + if a<=0: # lol no local minimum + logger.debug('no local minimum...') + if x_left < x_right: + x_new = 0.5*(x_left+x_center) + ypred_new = np.nan + else: + x_new = 0.5*(x_right+x_center) + ypred_new = np.nan + else: + x_new = -0.5*b/a + ypred_new = -0.25*b**2/a + c + y_new = get_err(x_new) + n_iter -=1 + logger.debug('from chosen points %f\\%f/%f', x_left, x_center, x_right) + logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new) + all_errs.append((x_new, y_new)) ; all_errs.sort() + logger.debug('current data: %r', all_errs) + + if x_new < x_left or x_new > x_right: + logger.debug('predicted local minimum not in immediate bounds, regaining bearings...') + new_index = np.argmin(np.asarray(all_errs)[:,1]) + if new_index in (0, len(all_errs)-1): + raise AssertionError('edges of the search are somehow the minimum in second phase of function') + x_left, y_left = all_errs[new_index-1] + x_right, y_right = all_errs[new_index+1] + x_center, y_center = all_errs[new_index] + + elif max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres: + break + elif y_new > y_center: + if x_new > x_center: + x_right, y_right = x_new, y_new + else: + x_left, y_left = x_new, y_new + else: # if y_new <= y_center + if x_new > x_center: + x_left, y_left = x_center, y_center + x_center, y_center = x_new, y_new + else: + x_right, y_right = x_center, y_center + x_center, y_center = x_new, y_new + + if abs(x_right - x_left) < x_thres: + break + + opt_idx = np.argmin(np.asarray(all_errs)[:,1]) + return all_errs[opt_idx] + + +def standard_grid_search(x_list, get_err): + errors = [] + for x in x_list: + errors.append(get_err(x)) + errors = np.asarray(errors) + xi = errors.argmin() + return xi, errors[xi] + +def adaptative_grid_search(x_list, get_err): + work_list = x_list + errors = [] + direction = None + while True: + errors = list(errors) + for x in work_list: + errors.append((x,get_err(x))) + errors = np.array(errors) + ind = np.argsort(errors[:,1])[::-1] + errors = errors[ind] + + current_argmin = errors[-1,0] + new_sigma = None + + if direction is None: + if current_argmin==max(work_list): + direction = 'up' + elif current_argmin==min(work_list): + direction = 'down' + + # at the 1st iteration if is checked twice on purpose + if direction=='up' and current_argmin==max(work_list): + work_list = current_argmin*np.array(defaults.sigmaarr_mult[1:]) + elif direction=='down' and current_argmin==min(work_list): + work_list = current_argmin/np.array(defaults.sigmaarr_mult[1:]) + else: + break + + print('next iteration:', work_list, flush=True) + return errors + +# ##################### +# main functions of the hyperparameter optimisation + + +def kfold_alpha_eval(K_all, y_train, n_splits, eta_list, sparse, parallel = None): + """Module-internal function: optimise alpha (regularisation parameter) of a KRR learning model, using a K-fold validation. + + Args: + K_all: matrix of kernel values (can be n_total*n_total for naive KRR or n_total*n_references for sparse KRR) + y_train: learnable properties for all inputs (n_total-length vector) + n_splits: number of folds for k-fold validation + eta_list: all the values of eta (KRR regularisation parameter) to try (array-like) + sparse: whether the KRR to run is sparse (bool) + parallel: optional joblib.Parallel instance to use to parallelise this function (by default one is constructed) + + Returns: + errors: array of "entries", each with the three values (np.ndarray, shape (len(eta_list),3) ): + mean: mean (over k-folds) validation error for this value of eta + stddev: standard deviation of the same error + eta: the corresponding value of eta + """ + if parallel is None: + parallel = Parallel(n_jobs=-1, return_as="generator_unordered") + kfold = KFold(n_splits=n_splits, shuffle=False) + maes = np.full((n_splits, len(eta_list)), np.inf) + y_train = np.asarray(y_train) + + + def inner_call(fold_i, eta_i, K_all, sparse, y_train, eta, train_idx, test_idx): + y_kf_train, y_kf_test = y_train[train_idx], y_train[test_idx] + + if not sparse: + K_solve = K_all [np.ix_(train_idx,train_idx)] + if np.may_share_memory(K_solve, K_all): + K_solve = K_solve.copy() + K_solve[np.diag_indices_from(K_solve)] += eta + y_solve = y_kf_train + Ks = K_all [np.ix_(test_idx,train_idx)] + else: + K_solve, y_solve = sparse_regression_kernel(K_all[train_idx], y_kf_train, slice(None), eta) + Ks = K_all[test_idx] + + try: + alpha = scipy.linalg.solve(K_solve, y_solve, assume_a='pos', overwrite_a=True) + except scipy.linalg.LinAlgError: + print('singular matrix') + raise + y_kf_predict = np.dot(Ks, alpha) + return fold_i, eta_i, np.mean(np.abs(y_kf_predict-y_kf_test)) + + mae_generator = parallel( + delayed(inner_call)(fold_i, eta_i, K_all, sparse, y_train, eta, t,v) + for eta_i,eta in enumerate(eta_list) + for fold_i,(t,v) in enumerate(kfold.split(y_train)) + ) + for split_i, eta_i, mae in mae_generator: + maes[split_i, eta_i] = mae + + concat_results = np.full((len(eta_list), 3), np.inf) + for eta_i in range(len(eta_list)): + res = maes[:,eta_i] + #res = res[np.isfinite(res)] + concat_results[eta_i,0] = res.mean() + concat_results[eta_i,1] = res.std() + concat_results[eta_i,2] = eta_list[eta_i] + return concat_results + +def search_sigma( + X_train, y_train, splits, + kernel, sigma, eta, + sparse_idx=None, + n_sigma_iter=5, stddev_portion=0.0, + adaptive=False, adaptive_v2=False, + read_kernel=False, printlevel=0, +): + """Search the optimal values of sigma and alpha for a KRR model with known representations. + Sigma is the width parameter of the kernel function used, + and alpha is the regularisation parameter of the resulting matrix equation. + + Internally, this can call for either a simple grid search, or be modified as so: + - the grid is adaptative for sigma (adaptive) + - the grid search for sigma becomes a continuous line search (adaptive_v2) + No matter what, the optimisation of alpha is done over a grid, with k-fold validation. + + Args: + X (np.ndarray[n_total,n_features]: feature vectors for the combined train-validation dataset + y (np.ndarray[n_total]): learnable properties for all inputs + sigma (array-like(float)): values of sigma. for `adaptive`, starting values, for `adaptive_v2`, only the first and last values are used, as presumed bounds of the optimal value of sigma + alpha_grid (array-like of floats): values of alpha to try + n_sigma_iter (int): number of iterations for the sigma line-search (if adaptive_v2) + n_splits (int): number of folds for k-fold validation + stddev_portion (float): contribution of the error's standard deviation to compare error distributions + sparse_idx (optional np.ndarray[int, n_references]): selection of reference inputs for sparse KRR. + adaptive (bool): to use the adaptive grid for sigma + adaptive_v2 (bool): to use the line search for sigma + read_kernel (bool): to completely discard sigma, assuming the representation array is a precomputed kernel array + printlevel (int): level of verbosity + + Returns: + sigma (float): optimal value of sigma + alpha (float): optimal value of alpha + costs (np.ndarray[n_splits]): validation error distribution for these values of sigma,alpha + """ + errors = [] + + def get_err(s): + if read_kernel is False: + K_all = kernel(X_train, X_train, 1.0/s) + else: + K_all = X_train + + sparse = sparse_idx != None + if sparse: + K_all = K_all[:,sparse_idx] + + results_per_eta = kfold_alpha_eval( + K_all, y_train, splits, eta, sparse, + parallel = parallel, + ) + for mean,std,e in results_per_eta: + if printlevel>0 : + sys.stderr.flush() + print(s, e, mean, std, flush=True) + errors.append((mean, std, e, s)) + + costs = results_per_eta[:,0] + stddev_portion*results_per_eta[:,1] + return costs.min() + + with Parallel(n_jobs=-1) as parallel: + if adaptive_v2: + assert not adaptive + _, _ = parabolic_search( + np.log(sigma[0]), np.log(sigma[-1]), + lambda log_s: get_err(np.exp(log_s)), + n_iter=n_sigma_iter, x_thres=0.1, y_thres=0.01, + ) + elif adaptive: + _ = adaptative_grid_search(sigma, get_err) + else: + for s in sigma: + get_err(s) + + return np.asarray(errors) def hyperparameters(X, y, - sigma=defaults.sigmaarr, eta=defaults.etaarr, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, + sigma=defaults.sigmaarr, eta=defaults.etaarr, sparse=None, + akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, read_kernel=False, test_size=defaults.test_size, splits=defaults.splits, idx_test=None, idx_train=None, - printlevel=0, adaptive=False, read_kernel=False, sparse=None, random_state=defaults.random_state): + stddev_portion=0.0, n_sigma_iters=5, + printlevel=0, adaptive=False, adaptive_v2=False, random_state=defaults.random_state, +): """Perform a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). Args: @@ -22,17 +335,20 @@ def hyperparameters(X, y, y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. sigma (list): List of kernel width for the grid search. eta (list): List of regularization strength for the grid search. + sparse (int): The number of reference environnments to consider for sparse regression. akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). gkernel (str): Global kernel (None, 'REM', 'avg'). gdict (dict): Parameters of the global kernels. + read_kernel (bool): If 'X' is a kernel and not an array of representations. test_size (float or int): Test set fraction (or number of samples). splits (int): K number of splits for the Kfold cross-validation. idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + stddev_portion (float): The amount of error standard deviation to add to error means, for error distribution ranking. + n_sigma_iters (int): for adaptive_v2, the number of iterations to run the sigma line search for. printlevel (int): Controls level of output printing. adaptive (bool): To expand the grid search adaptatively. - read_kernel (bool): If 'X' is a kernel and not an array of representations. - sparse (int): The number of reference environnments to consider for sparse regression. + adaptive_v2 (bool): To optimise sigma though line search rather than grid search, using the ends of the `sigma` list as presumed lower/upper bounds for the optimal value. random_state (int): The seed used for random number generator (controls train/test splitting). Returns: @@ -43,49 +359,7 @@ def hyperparameters(X, y, Raises: RuntimeError: If 'X' is a kernel and sparse regression is chosen. """ - def k_fold_opt(K_all, eta): - kfold = KFold(n_splits=splits, shuffle=False) - all_maes = [] - for train_idx, test_idx in kfold.split(X_train): - y_kf_train, y_kf_test = y_train[train_idx], y_train[test_idx] - - if not sparse: - K_solve = np.copy(K_all [np.ix_(train_idx,train_idx)]) - K_solve[np.diag_indices_from(K_solve)] += eta - y_solve = y_kf_train - Ks = K_all [np.ix_(test_idx,train_idx)] - else: - K_solve, y_solve = sparse_regression_kernel(K_all[train_idx], y_kf_train, sparse_idx, eta) - Ks = K_all [np.ix_(test_idx,sparse_idx)] - - try: - alpha = scipy.linalg.solve(K_solve, y_solve, assume_a='pos', overwrite_a=True) - except scipy.linalg.LinAlgError: - print('singular matrix') - all_maes.append(np.nan) - break - y_kf_predict = np.dot(Ks, alpha) - all_maes.append(np.mean(np.abs(y_kf_predict-y_kf_test))) - return np.mean(all_maes), np.std(all_maes) - - def hyper_loop(sigma, eta): - errors = [] - with Parallel(n_jobs=-1) as parallel: - for s in sigma: - if read_kernel is False: - K_all = kernel(X_train, X_train, 1.0/s) - else: - K_all = X_train - - def inner_loop(s,e): - mean, std = k_fold_opt(K_all, e) - if printlevel>0 : - sys.stderr.flush() - print(s, e, mean, std, flush=True) - return (mean, std, e, s) - - errors += parallel(delayed(inner_loop)(s,e) for e in eta) - return errors + if gkernel is None: gwrap = None else: @@ -96,47 +370,76 @@ def inner_loop(s,e): test_size=test_size, random_state=random_state) if read_kernel is False: X_train = X[idx_train] + optimise_sigma = True else: X_train = X[np.ix_(idx_train,idx_train)] - sigma = [np.nan] + optimise_sigma = False if sparse: if read_kernel: raise RuntimeError('Cannot do FPS with kernels') sparse_idx = do_fps(X_train)[0][:sparse] + else: + sparse_idx = None - work_sigma = sigma - errors = [] - direction = None - while True: - errors = list(errors) - errors.extend(hyper_loop(work_sigma, eta)) - errors = np.array(errors) - ind = np.argsort(errors[:,0])[::-1] - errors = errors[ind] + with Parallel(n_jobs=1, return_as="generator_unordered") as parallel: + if optimise_sigma: + errors = search_sigma( + X_train, y_train, splits, + kernel, sigma, eta, sparse_idx, + n_sigma_iters, stddev_portion, + adaptive, adaptive_v2, + read_kernel, printlevel, + ) + else: + K_all = X_train + sparse = sparse_idx is not None + if sparse: + K_all = K_all[:, sparse_idx] + sigma = np.nan + partial_errors = kfold_alpha_eval( + K_all, y, splits, eta, + sparse, parallel, + ) + errors = np.ndarray((len(partial_errors),4)) + errors[:,:3] = partial_errors + errors[:,3] = np.nan - if not adaptive: - break + sorter = (errors[:,0] + stddev_portion*errors[:,1]).argsort() + errors = errors[sorter[::-1]] - best_sigma = errors[-1][3] - new_sigma = None + # work_sigma = sigma + # errors = [] + # direction = None + # while True: + # errors = list(errors) + # errors.extend(hyper_loop(work_sigma, eta)) + # errors = np.array(errors) + # ind = np.argsort(errors[:,0])[::-1] + # errors = errors[ind] - if direction is None: - if best_sigma==max(work_sigma): - direction = 'up' - elif best_sigma==min(work_sigma): - direction = 'down' + # if not adaptive: + # break - # at the 1st iteration if is checked twice on purpose - if direction=='up' and best_sigma==max(work_sigma): - new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:]) - elif direction=='down' and best_sigma==min(work_sigma): - new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:]) + # best_sigma = errors[-1][3] + # new_sigma = None - if new_sigma is None: - break - work_sigma = new_sigma - print('next iteration:', work_sigma, flush=True) + # if direction is None: + # if best_sigma==max(work_sigma): + # direction = 'up' + # elif best_sigma==min(work_sigma): + # direction = 'down' + + # # at the 1st iteration if is checked twice on purpose + # if direction=='up' and best_sigma==max(work_sigma): + # new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:]) + # elif direction=='down' and best_sigma==min(work_sigma): + # new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:]) + + # if new_sigma is None: + # break + # work_sigma = new_sigma + # print('next iteration:', work_sigma, flush=True) return errors @@ -144,6 +447,11 @@ def _get_arg_parser(): """Parse CLI arguments.""" parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array') parser.remove_argument("train_size") + parser.add_argument('--ada2', action='store_true', dest='adaptive_v2', default=False, help='whether to use a continuous adaptative approach to sigma. If so, only the first and last sigma values will be used to start the optimisation.') + parser.add_argument('--stddev-portion', type=float, dest='stddev_portion', default=0.0, help='amount of error standard deviation to add to error means, for error distribution ranking in the output.') + parser.add_argument('--sigma-iterations', type=int, dest='n_sigma_iters', default=5, help='number of iterations for the sigma-optimisation line search') + + return parser @@ -159,10 +467,15 @@ def main(): X = np.load(args.repr) y = np.loadtxt(args.prop) - errors = hyperparameters(X, y, read_kernel=args.readk, sigma=args.sigma, eta=args.eta, - akernel=args.akernel, gkernel=args.gkernel, gdict=args.gdict, - sparse=args.sparse, - test_size=args.test_size, splits=args.splits, printlevel=args.printlevel, adaptive=args.adaptive) + errors = hyperparameters( + X, y, + sigma=args.sigma, eta=args.eta, sparse=args.sparse, + akernel=args.akernel, gkernel=args.gkernel, gdict=args.gdict, read_kernel=args.readk, + test_size=args.test_size, splits=args.splits, idx_test=None, idx_train=None, + stddev_portion=args.stddev_portion, n_sigma_iters=args.n_sigma_iters, + printlevel=args.printlevel, adaptive=args.adaptive, adaptive_v2=args.adaptive_v2, + random_state=args.random_state, + ) errors = np.array(errors) if args.nameout is not None: np.savetxt(args.nameout, errors, header="error stdev eta sigma") diff --git a/tests/test_regression.py b/tests/test_regression.py index b822e6c..a60e606 100755 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -18,15 +18,16 @@ def test_hyperparameters(): yfile = os.path.join(path, 'data/mols/dipole.dat') y = np.loadtxt(yfile) - hyper = hyperparameters.hyperparameters(X, y)[-4:] + hyper = hyperparameters.hyperparameters(X, y, stddev_portion=0)[-4:] true_hyper = [ [5.18303885e-01,3.00507798e-01,1.00000000e-05,3.16227766e+01], [5.18262897e-01,3.00473853e-01,3.16227766e-08,3.16227766e+01], [5.18262767e-01,3.00473746e-01,1.00000000e-10,3.16227766e+01], [5.10592542e-01,3.38247735e-01,1.00000000e+00,3.16227766e+01]] + assert (np.allclose(hyper, true_hyper)) -def test_hyperparameters2(): +def test_hyperparameters_adapt2(): #import logging #logging.basicConfig() #logging.getLogger("qstack").setLevel('DEBUG') @@ -35,8 +36,9 @@ def test_hyperparameters2(): X = np.load(xfile) yfile = os.path.join(path, 'data/mols/dipole.dat') y = np.loadtxt(yfile) + print(np.isfinite(X).all(), np.isfinite(y).all()) - hyper = hyperparameters2.hyperparameters(X, y, random_state=42)[-4:] + hyper = hyperparameters.hyperparameters(X, y, random_state=42, stddev_portion=1.0, adaptive_v2=True)[-4:] true_hyper = [ [5.15813198e-01, 2.37774396e-01, 3.16227766e-08, 3.64079252e+04], [5.15719232e-01, 2.37430538e-01, 1.00000000e-10, 1.00000000e+06], @@ -46,6 +48,16 @@ def test_hyperparameters2(): assert (np.allclose(hyper, true_hyper)) + hyper = hyperparameters.hyperparameters(X, y, random_state=42, stddev_portion=0.0, adaptive_v2=True)[-4:] + true_hyper = [ + [5.10293083e-01, 2.45220151e-01, 1.00000000e-10, 3.25422973e+02], + [5.04772360e-01, 2.54644677e-01, 1.00000000e-05, 1.51009311e+02], + [5.04560208e-01, 2.54267311e-01, 3.16227766e-08, 1.51009311e+02], + [5.04559535e-01, 2.54266113e-01, 1.00000000e-10, 1.51009311e+02], + ] + assert (np.allclose(hyper, true_hyper)) + + def test_regression(): path = os.path.dirname(os.path.realpath(__file__)) X = np.load(os.path.join(path, 'data/mols/X_lb.npy')) From 8c2fef3c75bdf170787a610488ac71de3d4235fd Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Tue, 10 Feb 2026 10:55:56 +0100 Subject: [PATCH 5/5] fix: make ruff happy --- docs/generate_rst.py | 5 +- qstack/qml/b2r2.py | 9 +- qstack/regression/hyperparameters.py | 62 ++-- qstack/regression/hyperparameters2.py | 412 -------------------------- tests/test_regression.py | 1 - 5 files changed, 49 insertions(+), 440 deletions(-) delete mode 100644 qstack/regression/hyperparameters2.py diff --git a/docs/generate_rst.py b/docs/generate_rst.py index ce15334..ed9b8ba 100755 --- a/docs/generate_rst.py +++ b/docs/generate_rst.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -""" -Generate Sphinx-ready .rst files +"""Generate Sphinx-ready .rst files Used instead of Sphinx to avoid importing broken or expensive dependecies @@ -147,7 +146,7 @@ def extract_module_info(py_path: Path, module_name: str) -> ModuleInfo: try: tree = ast.parse(src) except SyntaxError: - return ModuleInfo(module_name, py_path, None, [], [], None, [],) + return ModuleInfo(module_name, py_path, None, [], [], None, []) mdoc = ast.get_docstring(tree) classes: list[ClassInfo] = [] diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py index 0c0f98f..66e50ca 100644 --- a/qstack/qml/b2r2.py +++ b/qstack/qml/b2r2.py @@ -5,6 +5,7 @@ """ import itertools +from dataclasses import dataclass from types import SimpleNamespace import numpy as np from scipy.special import erf @@ -13,10 +14,12 @@ defaults = SimpleNamespace(rcut=3.5, gridspace=0.03) +@dataclass class Reaction: - def __init__(self, reactants, products): - self.reactants = reactants - self.products = products + """Utility reaction-storing class for Inner workings of B2R2.""" + reactants: list # lists of Mol-like objects (that have .positions (in Å) and .charges attributes) + products: list + def get_bags(unique_ncharges): """Generate all unique element pair combinations including self-interactions. diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index 1cefc5b..eb4a333 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -1,6 +1,7 @@ """Hyperparameter optimization.""" -import sys, logging +import sys +import logging import numpy as np import scipy from sklearn.model_selection import KFold @@ -17,8 +18,8 @@ # parabola-based line search def fit_quadratic(x1,x2,x3, y1,y2,y3): - """ - Compute the three coefficients of a quadratic polynomial going through three given points. + """Compute the three coefficients of a quadratic polynomial going through three given points. + Could probably be replaced by `np.polyfit` now that I know about it. Fluff it, we ball. """ # we need to change coordinates around for this @@ -39,8 +40,8 @@ def fit_quadratic(x1,x2,x3, y1,y2,y3): return curv, slope, intercept def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01): - """ - Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`. + """Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`. + Fits quadratic polynomials to perform this search, meaning `get_err` is assumed to be convex. Args: @@ -54,7 +55,6 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0 Returns: the (argmin, min) tuple characterising the minimum of the function (2x float) """ - y_left = get_err(x_left) y_right = get_err(x_right) x_center = 0.5*(x_left+x_right) @@ -103,7 +103,8 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0 n_iter -=1 logger.debug('from chosen points %f\\%f/%f', x_left, x_center, x_right) logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new) - all_errs.append((x_new, y_new)) ; all_errs.sort() + all_errs.append((x_new, y_new)) + all_errs.sort() logger.debug('current data: %r', all_errs) if x_new < x_left or x_new > x_right: @@ -138,14 +139,32 @@ def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0 def standard_grid_search(x_list, get_err): - errors = [] - for x in x_list: - errors.append(get_err(x)) - errors = np.asarray(errors) + """Module-internal function: single-parameter optimisation function, using a simple grid search. + + Args: + x_list (iterable[float]): pre-defined grid containing the values to try for the parameter + get_err (callable float->float): process to optimise, returning the error associated with a single parameter value. + + Returns: + val (float): optimal parameter value + err (float): the associated error + """ + errors = np.array(get_err(x) for x in x_list) xi = errors.argmin() return xi, errors[xi] def adaptative_grid_search(x_list, get_err): + """Module-internal function: single-parameter optimisation function, using an adaptive grid search. + + Operates like a standard grid search, but extends the grid if the optimal parameter value is at one of the edges of the provided grid. + + Args: + x_list (iterable[float]): pre-defined original grid of the parameter + get_err (callable float->float): process to optimise, returning the error associated with a single parameter value. + + Returns: + errors (np.ndarray[N_evals,2]): list of (parameter value, error) pairs, sorted to have decreasing errors + """ work_list = x_list errors = [] direction = None @@ -158,7 +177,6 @@ def adaptative_grid_search(x_list, get_err): errors = errors[ind] current_argmin = errors[-1,0] - new_sigma = None if direction is None: if current_argmin==max(work_list): @@ -246,13 +264,14 @@ def inner_call(fold_i, eta_i, K_all, sparse, y_train, eta, train_idx, test_idx): def search_sigma( X_train, y_train, splits, - kernel, sigma, eta, + kernel, sigma, eta_grid, sparse_idx=None, n_sigma_iter=5, stddev_portion=0.0, adaptive=False, adaptive_v2=False, read_kernel=False, printlevel=0, ): """Search the optimal values of sigma and alpha for a KRR model with known representations. + Sigma is the width parameter of the kernel function used, and alpha is the regularisation parameter of the resulting matrix equation. @@ -262,12 +281,13 @@ def search_sigma( No matter what, the optimisation of alpha is done over a grid, with k-fold validation. Args: - X (np.ndarray[n_total,n_features]: feature vectors for the combined train-validation dataset - y (np.ndarray[n_total]): learnable properties for all inputs + X_train (np.ndarray[n_total,n_features]): feature vectors for the combined train-validation dataset + y_train (np.ndarray[n_total]): learnable properties for all inputs sigma (array-like(float)): values of sigma. for `adaptive`, starting values, for `adaptive_v2`, only the first and last values are used, as presumed bounds of the optimal value of sigma - alpha_grid (array-like of floats): values of alpha to try + eta_grid (array-like of floats): values of eta to try n_sigma_iter (int): number of iterations for the sigma line-search (if adaptive_v2) - n_splits (int): number of folds for k-fold validation + kernel (callable): kernel function computing a kernel matrix from two sets of representations vectors and a "gamma" scale parameter + splits (int): number of folds for k-fold validation stddev_portion (float): contribution of the error's standard deviation to compare error distributions sparse_idx (optional np.ndarray[int, n_references]): selection of reference inputs for sparse KRR. adaptive (bool): to use the adaptive grid for sigma @@ -288,12 +308,12 @@ def get_err(s): else: K_all = X_train - sparse = sparse_idx != None + sparse = sparse_idx is not None if sparse: K_all = K_all[:,sparse_idx] results_per_eta = kfold_alpha_eval( - K_all, y_train, splits, eta, sparse, + K_all, y_train, splits, eta_grid, sparse, parallel = parallel, ) for mean,std,e in results_per_eta: @@ -307,7 +327,8 @@ def get_err(s): with Parallel(n_jobs=-1) as parallel: if adaptive_v2: - assert not adaptive + if adaptive: + raise ValueError("Only one of `adaptive`, `adaptive_v2` may be specified.") _, _ = parabolic_search( np.log(sigma[0]), np.log(sigma[-1]), lambda log_s: get_err(np.exp(log_s)), @@ -359,7 +380,6 @@ def hyperparameters(X, y, Raises: RuntimeError: If 'X' is a kernel and sparse regression is chosen. """ - if gkernel is None: gwrap = None else: diff --git a/qstack/regression/hyperparameters2.py b/qstack/regression/hyperparameters2.py deleted file mode 100644 index 7a7147b..0000000 --- a/qstack/regression/hyperparameters2.py +++ /dev/null @@ -1,412 +0,0 @@ -"""Hyperparameter optimisation using a smoother version of sigma selection, -using a no-gradient line search""" - -import sys, logging -import numpy as np -import scipy -from sklearn.metrics import mean_absolute_error -from sklearn.model_selection import KFold -from sklearn.utils.parallel import Parallel, delayed -from qstack.mathutils.fps import do_fps -from qstack.tools import correct_num_threads -from .kernel_utils import get_kernel, defaults, train_test_split_idx -from .parser import RegressionParser - -logger = logging.getLogger("qstack.regression.hyperparameters2") - -# ##################### -# parabola-based line search - - -def fit_quadratic(x1,x2,x3, y1,y2,y3): - """ - Compute the three coefficients of a quadratic polynomial going through three given points. - Could probably be replaced by `np.polyfit` now that I know about it. Fluff it, we ball. - """ - # we need to change coordinates around for this - - # first, slopes at 0.5(x1+x2) and 0.5(x2+x3) - # the 'meta-slope' allows us to get 2*curvature - slope1 = (y2-y1)/(x2-x1) - slope2 = (y3-y2)/(x3-x2) - curv = (slope2-slope1)/(x3-x1) # the "0.5*" disappears - - # then, remove the curvature from points 1 and 2 to determine 1st-degree term - y1_b = y1 - curv*x1**2 - y2_b = y2 - curv*x2**2 - slope = (y2_b-y1_b)/(x2-x1) - - # finally, the intercept - intercept = y1_b - slope*x1 - return curv, slope, intercept - -def parabolic_search(x_left, x_right, get_err, n_iter=10, x_thres=0.1, y_thres=0.01): - """ - Gradient-less line search of the minimum of `get_err`, supposedly between `x_left` and `x_right`. - Fits quadratic polynomials to perform this search, meaning `get_err` is assumed to be convex. - - Args: - x_left (float): supposed left bound of the minimum of `get_err` - x_right (float): supposed right bound of the minimum of `get_err` - get_err (callable float->float): the function to minimise. - n_iter (int): the number of function calls allowed - x_thres (float): the acceptable error threshold for the the argmin to find - y_thres (float): the acceptable error threshold for the min to find - - Returns: - the (argmin, min) tuple characterising the minimum of the function (2x float) - """ - - y_left = get_err(x_left) - y_right = get_err(x_right) - x_center = 0.5*(x_left+x_right) - y_center = get_err(x_center) - - all_errs = [(x_left,y_left),(x_center,y_center),(x_right,y_right)] - - while y_left < y_center or y_right < y_center: - # while it looks like we need to look elsewhere than in our original bracket - # (because the center isn't closer to the local minimum than the bounds) - if y_left < y_right: - logger.debug('no local minimum in sight, extending to the left...') - x_right, y_right = x_center, y_center - x_center, y_center = x_left, y_left - x_left = 2*x_center - x_right - y_left = get_err(x_left) - all_errs.insert(0, (x_left,y_left)) - else: - logger.debug('no local minimum in sight, extending to the right...') - x_left, y_left = x_center, y_center - x_center, y_center = x_right, y_right - x_right = 2*x_center - x_left - y_right = get_err(x_right) - all_errs.append((x_right,y_right)) - n_iter -= 1 - if n_iter <=0: - break - # after this point, either we are exiting early or we have found the right bounds - all_errs.sort() - logger.debug('local minimum in bounds, proceeding with parabolic search (bounds at: %r)', all_errs) - logger.debug('chosen: %f\\%f/%f', x_left, x_center, x_right) - while n_iter > 0: - a,b,c = fit_quadratic(x_left, x_center, x_right, y_left, y_center, y_right) - if a<=0: # lol no local minimum - logger.debug('no local minimum...') - if x_left < x_right: - x_new = 0.5*(x_left+x_center) - ypred_new = np.nan - else: - x_new = 0.5*(x_right+x_center) - ypred_new = np.nan - else: - x_new = -0.5*b/a - ypred_new = -0.25*b**2/a + c - y_new = get_err(x_new) - n_iter -=1 - logger.debug('from chosen points %f\\%f/%f', x_left, x_center, x_right) - logger.debug('predicted local minimum at %f->%f, true error %f', x_new, ypred_new, y_new) - all_errs.append((x_new, y_new)) ; all_errs.sort() - logger.debug('current data: %r', all_errs) - - if x_new < x_left or x_new > x_right: - logger.debug('predicted local minimum not in immediate bounds, regaining bearings...') - new_index = np.argmin(np.asarray(all_errs)[:,1]) - if new_index in (0, len(all_errs)-1): - raise AssertionError('edges of the search are somehow the minimum in second phase of function') - x_left, y_left = all_errs[new_index-1] - x_right, y_right = all_errs[new_index+1] - x_center, y_center = all_errs[new_index] - - elif max(y_right,y_left, y_new)-min(y_new, y_center) < y_thres: - break - elif y_new > y_center: - if x_new > x_center: - x_right, y_right = x_new, y_new - else: - x_left, y_left = x_new, y_new - else: # if y_new <= y_center - if x_new > x_center: - x_left, y_left = x_center, y_center - x_center, y_center = x_new, y_new - else: - x_right, y_right = x_center, y_center - x_center, y_center = x_new, y_new - - if abs(x_right - x_left) < x_thres: - break - - opt_idx = np.argmin(np.asarray(all_errs)[:,1]) - return all_errs[opt_idx] - - - -def kfold_alpha_eval(K_all, y, n_splits, alpha_grid, parallel=None, on_compute=(lambda eta,err,stderr:None)): - """Module-internal function: optimise alpha (regularisation parameter) of a KRR learning model, using a K-fold validation. - - Args: - K_all: matrix of kernel values (can be n_total*n_total for naive KRR or n_total*n_references for sparse KRR) - y: learnable properties for all inputs (n_total-length vector) - n_splits: number of folds for k-fold validation - alpha_grid: all the values of alpha to try (array-like) - parallel: optional joblib.Parallel instance to use to parallelise this function (by default one is constructed) - on_compute: function to call for the error summaries of each value of alpha - (callable: alpha, error_mean, error_stddev -> None) - - Returns: - - optimal value of alpha - - validation error list for all k-fold evaluations for this value of alpha - """ - if parallel is None: - parallel = Parallel(n_jobs=-1, return_as="generator_unordered") - kfold = KFold(n_splits=n_splits, shuffle=False) - maes = np.full((kfold.get_n_splits(), len(alpha_grid)), np.inf) - y = np.asarray(y) - is_sparse = K_all.shape[0] != K_all.shape[1] - - def inner_loop(split_i, alpha_i, train_idx, val_idx, alpha): - y_val = y[val_idx] - y_train = y[train_idx] - if is_sparse: - K_train = K_all[train_idx, :] - K_val = K_all[val_idx, :] - y_train = K_train.T @ y_train - K_train = K_train.T @ K_train - else: - K_train = K_all[np.ix_(train_idx,train_idx)] - K_val = K_all [np.ix_(val_idx,train_idx)] - if np.may_share_memory(K_train, K_all): - K_train = K_train.copy() - K_train[np.diag_indices_from(K_train)] += alpha - - try: - weights = scipy.linalg.solve(K_train, y_train, assume_a='pos', overwrite_a=True) - except Exception as err: - raise - # bad fit (singular matrix)! - return split_i, alpha_i, np.inf - predict = K_val @ weights - return split_i, alpha_i, mean_absolute_error(y_val, predict) - - mae_generator = parallel( - delayed(inner_loop)(s_i,a_i, t,v,a) - for a_i,a in enumerate(alpha_grid) - for s_i,(t,v) in enumerate(kfold.split(y)) - ) - for split_i, alpha_i, mae in mae_generator: - maes[split_i, alpha_i] = mae - - concat_results = np.full((len(alpha_grid), 2), np.inf) - for alpha_i in range(len(alpha_grid)): - if not np.isfinite(maes[:, alpha_i]).any(): - pass - else: - res = maes[:,alpha_i] - res = res[np.isfinite(res)] - concat_results[alpha_i,0] = res.mean() - concat_results[alpha_i,1] = res.std() - on_compute(alpha_grid[alpha_i], *concat_results[alpha_i]) - #print("kfold evaluation for alpha grid",alpha_grid,concat_results) - selected_alpha_i = concat_results[:,0].argmin() - return alpha_grid[selected_alpha_i], maes[:,selected_alpha_i]#, models[:, selected_alpha_i].copy() - - -def search_sigma( - X, y, kernel, - sigma_bounds, alpha_grid, - n_iter, n_splits, - stddev_portion=+1.0, sparse_idx=None, - parallel=None, on_compute=(lambda sigma,alpha,err,stderr:None) -): - """Search the optimal values of sigma and alpha for a KRR model with known representations. - Sigma is the width parameter of the kernel function used, - and alpha is the regularisation parameter of the resulting matrix equation. - - Internally, calls the line-search rountine for gamma, - where the function to minimise performs its own grid-based optimisation of alpha. - - Args: - X (np.ndarray[n_total,n_features]: feature vectors for the combined train-validation dataset - y (np.ndarray[n_total]): learnable properties for all inputs - sigma_bounds (tuple(float,float)): presumed bounds of the optimal value of sigma - alpha_grid (array-like of floats): values of alpha to try - n_iter (int): number of iterations for the sigma line-search - n_splits (int): number of folds for k-fold validation - stddev_portion (float): contribution of the error's standard deviation to compare error distributions - sparse_idx (optional np.ndarray[int, n_references]): selection of reference inputs for sparse KRR. - parallel (optional joblib.Parallel): tool to make the optimisation more parallel. by default, one will be (re)created as often as necessary. - on_compute (callable sigma,alpha,err_mean,err_stddev -> None) - - Returns: - sigma (float): optimal value of sigma - alpha (float): optimal value of alpha - costs (np.ndarray[n_splits]): validation error distribution for these values of sigma,alpha - """ - - sigma_left, sigma_right = sigma_bounds - - err_dict = {} - - def get_err(log_sigma): - sigma = np.exp(log_sigma) - - K_all = kernel(X, X, 1.0/sigma) - if sparse_idx is not None: - K_all = K_all[:, sparse_idx] - - alpha, costs = kfold_alpha_eval( - K_all, y, n_splits, alpha_grid, - parallel=parallel, on_compute=(lambda eta,err,stderr: on_compute(sigma,eta,err,stderr)), - ) - err_dict[log_sigma] = (alpha,costs) - cost_res = costs.mean() + stddev_portion*costs.std() - #print("now eval'ing σ=", sigma, '... α=', alpha, costs.shape, costs.mean(), costs.std()) - return cost_res - - log_sigma_selected, cost_selected = parabolic_search( - np.log(sigma_left), np.log(sigma_right), - get_err, - n_iter=n_iter, x_thres=0.1, y_thres=0.01, - ) - - alpha_selected, costs_selected = err_dict[log_sigma_selected] - sigma = np.exp(log_sigma_selected) - - return sigma, alpha_selected, costs_selected - - - -def hyperparameters(X, y, - sigma_low=defaults.sigmaarr[0], sigma_high=defaults.sigmaarr[-1], eta=defaults.etaarr, - akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, read_kernel=False, - test_size=defaults.test_size, splits=defaults.splits, n_sigma_iters=5, idx_test=None, idx_train=None, - printlevel=0, sparse=None, - stddev_portion=+1.0, - random_state=defaults.random_state, -): - """Perform a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). - - Args: - X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. - y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. - sigma_low (float): Estimated low bound forthe kernel width. - sigma_high (float): Estimated high bound forthe kernel width. - eta (list): List of regularization strength for the grid search. - akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). - gkernel (str): Global kernel (None, 'REM', 'avg'). - gdict (dict): Parameters of the global kernels. - test_size (float or int): Test set fraction (or number of samples). - splits (int): K number of splits for the Kfold cross-validation. - n_sigma_iters (int): number of iterations for the sigma-optimisation line search - idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). - idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). - printlevel (int): Controls level of output printing. - read_kernel (bool): If 'X' is a kernel and not an array of representations (disables sigma optimisation). - sparse (int): The number of reference environnments to consider for sparse regression. - stddev_portion (float): The amount of error standard deviation to add to error means, for error distribution ranking. - random_state (int): The seed used for random number generator (controls train/test splitting). - - Returns: - The results of the grid search as a numpy.2darray [Cx(MAE,std,eta,sigma)], - where C is the number of parameter set and - the array is sorted according to MAEs (last is minimum) - - Raises: - RuntimeError: If 'X' is a kernel and sparse regression is chosen. - """ - - if gkernel is None: - gwrap = None - else: - gwrap = [gkernel, gdict] - kernel = get_kernel(akernel, gwrap) - - idx_train, _, y_train, _ = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train, - test_size=test_size, random_state=random_state) - if read_kernel is False: - X_train = X[idx_train] - optimise_sigma = True - else: - X_train = X[np.ix_(idx_train,idx_train)] - optimise_sigma = False - - if sparse: - if read_kernel: - raise RuntimeError('Cannot do FPS with kernels') - sparse_idx = do_fps(X_train)[0][:sparse] - else: - sparse_idx = np.arange(X_train.shape[0]) - - errors = [] - with Parallel(n_jobs=1, return_as="generator_unordered") as parallel: - if optimise_sigma: - err_append = lambda sigma,alpha,err,stderr: errors.append((err,stderr, alpha,sigma)) - _,_,_ = search_sigma( - X_train, y_train, kernel, (sigma_low, sigma_high), alpha_grid=eta, - parallel=parallel, on_compute=err_append, - n_iter = n_sigma_iters, n_splits=splits, stddev_portion=stddev_portion, - sparse_idx=sparse_idx, - ) - else: - if sparse_idx is not None: - K_all = X_train[:, sparse_idx] - else: - K_all = X_train - sigma = np.nan - err_append = lambda alpha,err,stderr: errors.append((err,stderr, alpha,sigma)) - _,_ = kfold_alpha_eval( - K_all, y, splits, alpha_grid=eta, - parallel=parallel, on_compute=err_append, - ) - - - errors = np.array(errors) - ind = np.argsort(errors[:,0]+stddev_portion*errors[:,1])[::-1] - errors = errors[ind] - return errors - - -def _get_arg_parser(): - """Parse CLI arguments.""" - parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array') - parser.remove_argument("train_size") - parser.remove_argument("sigma") - parser.remove_argument("adaptative") - parser.add_argument('--sigma-low', type=float, dest='sigma_low', default=1E-2, help='estimated low bound for sigma') - parser.add_argument('--sigma-high', type=float, dest='sigma_high', default=1E+2, help='estimated high bound for sigma') - parser.add_argument('--stddev-portion', type=float, dest='stddev_portion', default=1, help='amount of error standard deviation to add to error means, for error distribution ranking in the output.') - parser.add_argument('--sigma-iterations', type=int, dest='n_sigma_iters', default=5, help='number of iterations for the sigma-optimisation line search') - - - return parser - - -def main(): - """Command-line entry point for hyperparameter optimization.""" - args = _get_arg_parser().parse_args() - if args.readk: - args.sigma = [np.nan] - if args.ll: - correct_num_threads() - print(vars(args)) - - X = np.load(args.repr) - y = np.loadtxt(args.prop) - - errors = hyperparameters( - X, y, read_kernel=args.readk, - sigma_low=args.sigma_low, sigma_high=args.sigma_high, eta=args.eta, - akernel=args.akernel, gkernel=args.gkernel, gdict=args.gdict, sparse=args.sparse, - test_size=args.test_size, splits=args.splits, n_sigma_iters=args.n_sigma_iters, - printlevel=args.printlevel, random_state=args.random_state, stddev_portion=args.stddev_portion, - ) - errors = np.array(errors) - if args.nameout is not None: - np.savetxt(args.nameout, errors, header="error stdev eta sigma") - print() - print('error stdev eta sigma') - for error in errors: - print("{:e} {:e} | {:e} {:e}".format(*tuple(error))) - - -if __name__ == "__main__": - main() diff --git a/tests/test_regression.py b/tests/test_regression.py index a60e606..208a228 100755 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -3,7 +3,6 @@ import os import numpy as np import qstack.regression.hyperparameters as hyperparameters -import qstack.regression.hyperparameters2 as hyperparameters2 import qstack.regression.regression as regression import qstack.regression.final_error as final_error import qstack.regression.condition as condition