diff --git a/bin/rabbit_fit.py b/bin/rabbit_fit.py index 58d7170..968c396 100755 --- a/bin/rabbit_fit.py +++ b/bin/rabbit_fit.py @@ -276,41 +276,43 @@ def fit(args, fitter, ws, dofit=True): if args.externalPostfit is not None: fitter.load_fitresult(args.externalPostfit, args.externalPostfitResult) - else: - if dofit: - cb = fitter.minimize() - if cb is not None: - ws.add_loss_time_hist(cb.loss_history, cb.time_history) + if fitter.binByBinStat: + fitter._profile_beta() - if not args.noHessian: - # compute the covariance matrix and estimated distance to minimum + if dofit: + cb = fitter.minimize() - val, grad, hess = fitter.loss_val_grad_hess() - edmval, cov = edmval_cov(grad, hess) + if cb is not None: + ws.add_loss_time_hist(cb.loss_history, cb.time_history) - logger.info(f"edmval: {edmval}") + if not args.noHessian: + # compute the covariance matrix and estimated distance to minimum + + val, grad, hess = fitter.loss_val_grad_hess() + edmval, cov = edmval_cov(grad, hess) + logger.info(f"edmval: {edmval}") - fitter.cov.assign(cov) + fitter.cov.assign(cov) - del cov + del cov - if fitter.binByBinStat and fitter.diagnostics: - # This is the estimated distance to minimum with respect to variations of - # the implicit binByBinStat nuisances beta at fixed parameter values. - # It should be near-zero by construction as long as the analytic profiling is - # correct - _, gradbeta, hessbeta = fitter.loss_val_grad_hess_beta() - edmvalbeta, covbeta = edmval_cov(gradbeta, hessbeta) - logger.info(f"edmvalbeta: {edmvalbeta}") + if fitter.binByBinStat and fitter.diagnostics: + # This is the estimated distance to minimum with respect to variations of + # the implicit binByBinStat nuisances beta at fixed parameter values. + # It should be near-zero by construction as long as the analytic profiling is + # correct + _, gradbeta, hessbeta = fitter.loss_val_grad_hess_beta() + edmvalbeta, covbeta = edmval_cov(gradbeta, hessbeta) + logger.info(f"edmvalbeta: {edmvalbeta}") - if args.doImpacts: - ws.add_impacts_hists(*fitter.impacts_parms(hess)) + if args.doImpacts: + ws.add_impacts_hists(*fitter.impacts_parms(hess)) - del hess + del hess - if args.globalImpacts: - ws.add_global_impacts_hists(*fitter.global_impacts_parms()) + if args.globalImpacts: + ws.add_global_impacts_hists(*fitter.global_impacts_parms()) nllvalreduced = fitter.reduced_nll().numpy() diff --git a/bin/rabbit_plot_likelihood_scan.py b/bin/rabbit_plot_likelihood_scan.py index a6a4752..a052db1 100755 --- a/bin/rabbit_plot_likelihood_scan.py +++ b/bin/rabbit_plot_likelihood_scan.py @@ -1,9 +1,7 @@ #!/usr/bin/env python3 import argparse -import os -import matplotlib.pyplot as plt import mplhep as hep import numpy as np @@ -16,32 +14,6 @@ logger = None -def writeOutput(fig, outfile, extensions=[], postfix=None, args=None, meta_info=None): - name, _ = os.path.splitext(outfile) - - if postfix: - name += f"_{postfix}" - - for ext in extensions: - if ext[0] != ".": - ext = "." + ext - output = name + ext - print(f"Write output file {output}") - plt.savefig(output) - - output = name.rsplit("/", 1) - output[1] = os.path.splitext(output[1])[0] - if len(output) == 1: - output = (None, *output) - if args is None and meta_info is None: - return - output_tools.write_logfile( - *output, - args=args, - meta_info=meta_info, - ) - - def parseArgs(): parser = argparse.ArgumentParser() parser.add_argument( @@ -73,6 +45,11 @@ def parseArgs(): default="./test", help="Folder path for output", ) + parser.add_argument( + "--eoscp", + action="store_true", + help="Override use of xrdcp and use the mount instead", + ) parser.add_argument( "-p", "--postfix", type=str, help="Postfix for output file name" ) @@ -113,6 +90,13 @@ def parseArgs(): default=None, help="x axis limits", ) + parser.add_argument( + "--ylim", + type=float, + nargs=2, + default=None, + help="y axis limits", + ) parser.add_argument("--titlePos", type=int, default=2, help="title position") parser.add_argument( "--config", @@ -133,6 +117,7 @@ def plot_scan( subtitle=None, titlePos=0, xlim=None, + ylim=None, combine=None, ylabel=r"$-2\,\Delta \log L$", config={}, @@ -148,13 +133,15 @@ def plot_scan( if xlim is None: xlim = (min(x), max(x)) + if ylim is None: + ylim = (min(y), max(y)) fig, ax = plot_tools.figure( x, xlabel, ylabel, xlim=xlim, - ylim=(min(y), max(y)), # logy=args.logy + ylim=ylim, # logy=args.logy ) ax.axhline(y=1, color="gray", linestyle="--", alpha=0.5) @@ -227,6 +214,7 @@ def plot_scan( def main(): args = parseArgs() + outdir = output_tools.make_plot_dir(args.outpath, eoscp=args.eoscp) global logger logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) @@ -280,19 +268,20 @@ def main(): subtitle=args.subtitle, titlePos=args.titlePos, xlim=args.xlim, + ylim=args.ylim, config=config, combine=(vals, nlls) if args.combine is not None else None, no_hessian=args.noHessian, ) - os.makedirs(args.outpath, exist_ok=True) - outfile = os.path.join(args.outpath, f"nll_scan_{param}") - writeOutput( - fig, - outfile=outfile, - extensions=["png", "pdf"], - meta_info=meta, + + to_join = [f"nll_scan_{param}", args.postfix] + outfile = "_".join(filter(lambda x: x, to_join)) + plot_tools.save_pdf_and_png(outdir, outfile) + output_tools.write_index_and_log( + outdir, + outfile, + analysis_meta_info=meta, args=args, - postfix=args.postfix, ) diff --git a/rabbit/fitter.py b/rabbit/fitter.py index faba62a..6255db3 100644 --- a/rabbit/fitter.py +++ b/rabbit/fitter.py @@ -45,9 +45,7 @@ def __init__(self, xv): def __call__(self, intermediate_result): loss = intermediate_result.fun - logger.debug( - f"Iteration {self.iiter}: loss value {loss}" - ) # ; status {intermediate_result.status}") + logger.debug(f"Iteration {self.iiter}: loss value {loss}") if np.isnan(loss): raise ValueError(f"Loss value is NaN at iteration {self.iiter}") @@ -81,15 +79,6 @@ def __init__( else: self.binByBinStatType = options.binByBinStatType - if ( - self.binByBinStat - and self.binByBinStatMode == "full" - and not self.binByBinStatType.startswith("normal") - ): - raise Exception( - 'bin-by-bin stat only for option "--binByBinStatMode full" with "--binByBinStatType normal"' - ) - if ( options.covarianceFit and self.binByBinStat @@ -139,15 +128,32 @@ def __init__( ) self.init_blinding_values(options.unblind) - self.parms = np.concatenate([self.poi_model.pois, self.indata.systs]) - + parms = [self.poi_model.pois, self.indata.systs] # tf variable containing all fit parameters thetadefault = tf.zeros([self.indata.nsyst], dtype=self.indata.dtype) if self.poi_model.npoi > 0: - xdefault = tf.concat([self.poi_model.xpoidefault, thetadefault], axis=0) + xdefault = [self.poi_model.xpoidefault, thetadefault] else: - xdefault = thetadefault + xdefault = [thetadefault] + if self.binByBinStatType == "gamma" and self.binByBinStatMode == "full": + # explicit parameters for nbeta define as: nbeta = sum(beta_j n_j)/ sum(n_j) + nbetadefault = tf.ones([self.indata.nbins], dtype=self.indata.dtype) + xdefault.append(nbetadefault) + self.nbeta0 = tf.Variable( + tf.ones(self.indata.nbins, dtype=self.indata.dtype), + trainable=False, + name="nbeta0", + ) + parms_nbeta = np.char.add( + "nbeta_", np.arange(self.indata.nbins).astype(str) + ) + parms.append(parms_nbeta) + self.parms = np.concatenate(parms) + if len(xdefault) > 1: + xdefault = tf.concat(xdefault, axis=0) + else: + xdefault = xdefault[0] self.x = tf.Variable(xdefault, trainable=True, name="x") # for freezing parameters @@ -230,10 +236,10 @@ def __init__( self.varbeta = self.indata.sumw2 self.sumw = self.indata.sumw + self.betamask = (self.varbeta == 0.0) | (self.sumw == 0.0) + if self.binByBinStatType in ["gamma", "normal-multiplicative"]: - self.kstat = self.sumw**2 / self.varbeta - self.betamask = (self.varbeta == 0.0) | (self.kstat == 0.0) - self.kstat = tf.where(self.betamask, 1.0, self.kstat) + self.kstat = tf.where(self.betamask, 1.0, self.sumw**2 / self.varbeta) elif self.binByBinStatType == "normal-additive": # precompute decomposition of composite matrix to speed up # calculation of profiled beta values @@ -403,8 +409,24 @@ def set_blinding_offsets(self, blind=True): np.zeros(self.indata.nsyst, dtype=np.float64) ) + def get_nbeta(self): + nbeta = self.x[ + self.poi_model.npoi + + self.indata.nsyst : self.poi_model.npoi + + self.indata.nsyst + + self.indata.nbins + ] + return nbeta + def get_blinded_theta(self): - theta = self.x[self.poi_model.npoi :] + theta = self.x[self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst] + theta = tf.where( + self.frozen_params_mask[ + self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst + ], + tf.stop_gradient(theta), + theta, + ) if self.do_blinding: return theta + self._blinding_offsets_theta else: @@ -416,6 +438,9 @@ def get_blinded_poi(self): poi = xpoi else: poi = tf.square(xpoi) + poi = tf.where( + self.frozen_params_mask[: self.poi_model.npoi], tf.stop_gradient(poi), poi + ) if self.do_blinding: return poi * self._blinding_offsets_poi else: @@ -438,8 +463,14 @@ def prefit_covariance(self, unconstrained_err=0.0): unconstrained_err**2, tf.math.reciprocal(self.indata.constraintweights), ) + vars = [var_poi, var_theta] - invhessianprefit = tf.linalg.diag(tf.concat([var_poi, var_theta], axis=0)) + if self.binByBinStatType == "gamma" and self.binByBinStatMode == "full": + # uncertainty from explicit nbeta parameters are accounted for by implicit beta parameters + var_nbeta = tf.zeros([self.indata.nbins], dtype=self.indata.dtype) + vars.append(var_nbeta) + + invhessianprefit = tf.linalg.diag(tf.concat(vars, axis=0)) return invhessianprefit @tf.function @@ -479,9 +510,19 @@ def theta0defaultassign(self): def xdefaultassign(self): if self.poi_model.npoi == 0: - self.x.assign(self.theta0) + to_assign = [self.theta0] + else: + to_assign = [self.poi_model.xpoidefault, self.theta0] + + if self.binByBinStatType == "gamma" and self.binByBinStatMode == "full": + to_assign.append(self.nbeta0) + + if len(to_assign) > 1: + to_assign = tf.concat(to_assign, axis=0) else: - self.x.assign(tf.concat([self.poi_model.xpoidefault, self.theta0], axis=0)) + to_assign = to_assign[0] + + self.x.assign(to_assign) def beta0defaultassign(self): self.set_beta0(self._default_beta0()) @@ -526,6 +567,10 @@ def bayesassign(self): if self.binByBinStat: if self.binByBinStatType == "gamma": + if self.binByBinStatMode == "full": + raise NotImplementedError( + "Barlow-Beeston full with gamma is not yet implemented" + ) # FIXME this is only valid for beta0=beta=1 (but this should always be the case when throwing toys) betagen = ( tf.random.gamma( @@ -561,6 +606,10 @@ def frequentistassign(self): ) if self.binByBinStat: if self.binByBinStatType == "gamma": + if self.binByBinStatMode == "full": + raise NotImplementedError( + "Barlow-Beeston full with gamma is not yet implemented" + ) # FIXME this is only valid for beta0=beta=1 (but this should always be the case when throwing toys) beta0gen = ( tf.random.poisson( @@ -767,7 +816,12 @@ def envelope(values): def _compute_impact_group(self, v, idxs): cov_reduced = tf.gather( - self.cov[self.poi_model.npoi :, self.poi_model.npoi :], idxs, axis=0 + self.cov[ + self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst, + self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst, + ], + idxs, + axis=0, ) cov_reduced = tf.gather(cov_reduced, idxs, axis=1) v_reduced = tf.gather(v, idxs, axis=1) @@ -779,7 +833,10 @@ def _gather_poi_noi_vector(self, v): v_poi = v[: self.poi_model.npoi] # protection for constained NOIs, set them to 0 mask = (self.indata.noiidxs >= 0) & ( - self.indata.noiidxs < tf.shape(v[self.poi_model.npoi :])[0] + self.indata.noiidxs + < tf.shape( + v[self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst] + )[0] ) safe_idxs = tf.where(mask, self.indata.noiidxs, 0) mask = tf.cast(mask, v.dtype) @@ -789,19 +846,46 @@ def _gather_poi_noi_vector(self, v): [tf.shape(mask), tf.ones(tf.rank(v) - 1, dtype=tf.int32)], axis=0 ), ) - v_noi = tf.gather(v[self.poi_model.npoi :], safe_idxs) * mask + v_noi = ( + tf.gather( + v[self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst], + safe_idxs, + ) + * mask + ) v_gathered = tf.concat([v_poi, v_noi], axis=0) return v_gathered @tf.function def impacts_parms(self, hess): # impact for poi at index i in covariance matrix from nuisance with index j is C_ij/sqrt(C_jj) = /sqrt() - v = self._gather_poi_noi_vector(self.cov) - impacts = v / tf.reshape(tf.sqrt(tf.linalg.diag_part(self.cov)), [1, -1]) - nstat = self.poi_model.npoi + self.indata.nsystnoconstraint - hess_stat = hess[:nstat, :nstat] - inv_hess_stat = tf.linalg.inv(hess_stat) + + if self.binByBinStatMode == "full" and self.binByBinStatType == "gamma": + # strip off explicit BB parameters + cov = self.cov[ + : self.poi_model.npoi + self.indata.nsyst, + : self.poi_model.npoi + self.indata.nsyst, + ] + # explicit BB parameters are unconstrained, i.e. stat uncertainty + idx_stat = tf.concat( + [ + tf.range(nstat), + tf.range( + self.poi_model.npoi + self.indata.nsyst, + self.poi_model.npoi + self.indata.nsyst + self.indata.nbins, + ), + ], + axis=0, + ) + hess_stat = tf.gather(tf.gather(hess, idx_stat, axis=0), idx_stat, axis=1) + else: + cov = self.cov + hess_stat = hess[:nstat, :nstat] + + v = self._gather_poi_noi_vector(cov) + impacts = v / tf.reshape(tf.sqrt(tf.linalg.diag_part(cov)), [1, -1]) + inv_hess_stat = tf.linalg.inv(hess_stat)[:nstat, :nstat] if self.binByBinStat: # impact bin-by-bin stat @@ -829,7 +913,8 @@ def impacts_parms(self, hess): if len(self.indata.systgroupidxs): impacts_grouped_syst = tf.map_fn( lambda idxs: self._compute_impact_group( - v[:, self.poi_model.npoi :], idxs + v[:, self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst], + idxs, ), tf.ragged.constant(self.indata.systgroupidxs, dtype=tf.int32), fn_output_signature=tf.TensorSpec( @@ -1015,6 +1100,8 @@ def global_impacts_parms(self): impacts_grouped_syst = tf.transpose(impacts_grouped_syst) impacts_grouped = tf.concat([impacts_grouped_syst, impacts_grouped], axis=1) + # strip off explicit BB parameters if available + impacts = impacts[:, : self.indata.nsyst] return impacts, impacts_grouped def _pd2ldbeta2(self, profile=False): @@ -1246,6 +1333,8 @@ def compute_derivatives(dvars): [impacts_grouped_syst, impacts_grouped], axis=-1 ) + # strip off explicit BB parameters if available + impacts = impacts[:, : self.indata.nsyst] impacts = tf.reshape(impacts, [*expvar.shape, impacts.shape[-1]]) impacts_grouped = tf.reshape( impacts_grouped, [*expvar.shape, impacts_grouped.shape[-1]] @@ -1301,15 +1390,6 @@ def _compute_yields_noBBB(self, full=True): poi = self.get_blinded_poi() theta = self.get_blinded_theta() - poi = tf.where( - self.frozen_params_mask[: self.poi_model.npoi], tf.stop_gradient(poi), poi - ) - theta = tf.where( - self.frozen_params_mask[self.poi_model.npoi :], - tf.stop_gradient(theta), - theta, - ) - rnorm = self.poi_model.compute(poi) normcentral = None @@ -1401,28 +1481,36 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) nexp_profile = nexp[: self.indata.nbins] beta0 = self.beta0[: self.indata.nbins] + betamask = self.betamask[: self.indata.nbins] if self.chisqFit: if self.binByBinStatType == "gamma": kstat = self.kstat[: self.indata.nbins] - - abeta = nexp_profile**2 - bbeta = kstat * self.varnobs - nexp_profile * self.nobs - cbeta = -kstat * self.varnobs * beta0 - beta = solve_quad_eq(abeta, bbeta, cbeta) - - betamask = self.betamask[: self.indata.nbins] - beta = tf.where(betamask, beta0, beta) + if self.binByBinStatMode == "lite": + abeta = nexp_profile**2 + bbeta = kstat * self.varnobs - nexp_profile * self.nobs + cbeta = -kstat * self.varnobs * beta0 + beta = solve_quad_eq(abeta, bbeta, cbeta) + elif self.binByBinStatMode == "full": + norm_profile = norm[: self.indata.nbins] + nbeta = self.x[self.poi_model.npoi + self.indata.nsyst :] + beta = ( + kstat + * beta0 + / ( + ((self.nobs - nexp_profile * nbeta) / self.varnobs)[ + ..., None + ] + * norm_profile + + kstat + ) + ) elif self.binByBinStatType == "normal-multiplicative": kstat = self.kstat[: self.indata.nbins] - betamask = self.betamask[: self.indata.nbins] if self.binByBinStatMode == "lite": beta = ( nexp_profile * self.nobs / self.varnobs + kstat * beta0 ) / (kstat + nexp_profile * nexp_profile / self.varnobs) - - beta = tf.where(betamask, beta0, beta) - elif self.binByBinStatMode == "full": norm_profile = norm[: self.indata.nbins] n2kstat = tf.square(norm_profile) / kstat @@ -1443,7 +1531,6 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) * norm_profile / kstat ) - beta = tf.where(betamask, beta0, beta) elif self.binByBinStatType == "normal-additive": varbeta = self.varbeta[: self.indata.nbins] sbeta = tf.math.sqrt(varbeta) @@ -1468,7 +1555,6 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) elif self.covarianceFit: if self.binByBinStatType == "normal-multiplicative": kstat = self.kstat[: self.indata.nbins] - betamask = self.betamask[: self.indata.nbins] if self.binByBinStatMode == "lite": nexp_profile_m = tf.linalg.LinearOperatorDiag(nexp_profile) @@ -1489,7 +1575,6 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) beta = tf.linalg.solve(A, b) beta = tf.squeeze(beta, axis=-1) - beta = tf.where(betamask, beta0, beta) elif self.binByBinStatMode == "full": norm_profile = norm[: self.indata.nbins] @@ -1523,7 +1608,6 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) beta = beta0 - norm_profile / kstat * ( self.data_cov_inv @ (nbeta - self.nobs[:, None]) ) - beta = tf.where(betamask, beta0, beta) elif self.binByBinStatType == "normal-additive": varbeta = self.varbeta[: self.indata.nbins] sbeta = tf.math.sqrt(varbeta) @@ -1558,19 +1642,28 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) else: if self.binByBinStatType == "gamma": kstat = self.kstat[: self.indata.nbins] - betamask = self.betamask[: self.indata.nbins] - - beta = (self.nobs + kstat * beta0) / (nexp_profile + kstat) - beta = tf.where(betamask, beta0, beta) + if self.binByBinStatMode == "lite": + beta = (self.nobs + kstat * beta0) / (nexp_profile + kstat) + elif self.binByBinStatMode == "full": + norm_profile = norm[: self.indata.nbins] + nbeta = self.x[self.poi_model.npoi + self.indata.nsyst :] + beta = ( + kstat + * beta0 + / ( + norm_profile + - (self.nobs / (nexp_profile * nbeta))[..., None] + * norm_profile + + kstat + ) + ) elif self.binByBinStatType == "normal-multiplicative": kstat = self.kstat[: self.indata.nbins] - betamask = self.betamask[: self.indata.nbins] if self.binByBinStatMode == "lite": abeta = kstat bbeta = nexp_profile - beta0 * kstat cbeta = -self.nobs beta = solve_quad_eq(abeta, bbeta, cbeta) - beta = tf.where(betamask, beta0, beta) elif self.binByBinStatMode == "full": norm_profile = norm[: self.indata.nbins] n2kstat = tf.square(norm_profile) / kstat @@ -1590,7 +1683,6 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) * norm_profile / kstat ) - beta = tf.where(betamask, beta0, beta) elif self.binByBinStatType == "normal-additive": varbeta = self.varbeta[: self.indata.nbins] sbeta = tf.math.sqrt(varbeta) @@ -1619,6 +1711,7 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) beta = beta0 + (self.nobs / nbeta - 1)[..., None] * sbeta + beta = tf.where(betamask, beta0, beta) if self.indata.nbinsmasked: beta = tf.concat([beta, self.beta0[self.indata.nbins :]], axis=0) else: @@ -1941,14 +2034,12 @@ def _compute_lbeta(self, beta, full_nll=False): return None def _compute_nll_components(self, profile=True, full_nll=False): - nexpfullcentral, _, beta = self._compute_yields_with_beta( + nexp, _, beta = self._compute_yields_with_beta( profile=profile, compute_norm=False, full=False, ) - nexp = nexpfullcentral - if self.chisqFit: ln = 0.5 * tf.reduce_sum((nexp - self.nobs) ** 2 / self.varnobs, axis=-1) elif self.covarianceFit: @@ -1978,7 +2069,6 @@ def _compute_nll_components(self, profile=True, full_nll=False): ) lc = self._compute_lc(full_nll) - lbeta = self._compute_lbeta(beta, full_nll) return ln, lc, lbeta, beta @@ -1989,7 +2079,7 @@ def _compute_nll(self, profile=True, full_nll=False): ) l = ln + lc - if lbeta is not None: + if self.binByBinStat: l = l + lbeta return l @@ -2063,8 +2153,78 @@ def loss_val_grad_hess_beta(self, profile=True): grad = t1.gradient(val, self.ubeta) hess = t2.jacobian(grad, self.ubeta) + grad = tf.reshape(grad, [-1]) + hess = tf.reshape(hess, [grad.shape[0], grad.shape[0]]) + + betamask = ~tf.reshape(self.betamask, [-1]) + grad = grad[betamask] + hess = tf.boolean_mask(hess, betamask, axis=0) + hess = tf.boolean_mask(hess, betamask, axis=1) + return val, grad, hess + def fit(self): + def scipy_loss(xval): + self.x.assign(xval) + val, grad = self.loss_val_grad() + return val.__array__(), grad.__array__() + + def scipy_hessp(xval, pval): + self.x.assign(xval) + p = tf.convert_to_tensor(pval) + val, grad, hessp = self.loss_val_grad_hessp(p) + return hessp.__array__() + + def scipy_hess(xval): + self.x.assign(xval) + val, grad, hess = self.loss_val_grad_hess() + if self.diagnostics: + cond_number = tfh.cond_number(hess) + logger.info(f" - Condition number: {cond_number}") + edmval = tfh.edmval(grad, hess) + logger.info(f" - edmval: {edmval}") + return hess.__array__() + + xval = self.x.numpy() + + callback = FitterCallback(self) + + if self.minimizer_method in [ + "trust-krylov", + "trust-ncg", + ]: + info_minimize = dict(hessp=scipy_hessp) + elif self.minimizer_method in [ + "trust-exact", + "dogleg", + ]: + info_minimize = dict(hess=scipy_hess) + else: + info_minimize = dict() + + try: + res = scipy.optimize.minimize( + scipy_loss, + xval, + method=self.minimizer_method, + jac=True, + tol=0.0, + callback=callback, + **info_minimize, + ) + except Exception as ex: + # minimizer could have called the loss or hessp functions with "random" values, so restore the + # state from the end of the last iteration before the exception + xval = callback.xval + logger.debug(ex) + else: + xval = res["x"] + logger.debug(res) + + self.x.assign(xval) + + return callback + def minimize(self): if self.is_linear: logger.info( @@ -2092,65 +2252,7 @@ def minimize(self): callback = None else: - - def scipy_loss(xval): - self.x.assign(xval) - val, grad = self.loss_val_grad() - return val.__array__(), grad.__array__() - - def scipy_hessp(xval, pval): - self.x.assign(xval) - p = tf.convert_to_tensor(pval) - val, grad, hessp = self.loss_val_grad_hessp(p) - return hessp.__array__() - - def scipy_hess(xval): - self.x.assign(xval) - val, grad, hess = self.loss_val_grad_hess() - if self.diagnostics: - cond_number = tfh.cond_number(hess) - logger.info(f" - Condition number: {cond_number}") - edmval = tfh.edmval(grad, hess) - logger.info(f" - edmval: {edmval}") - return hess.__array__() - - xval = self.x.numpy() - - callback = FitterCallback(xval) - - if self.minimizer_method in [ - "trust-krylov", - "trust-ncg", - ]: - info_minimize = dict(hessp=scipy_hessp) - elif self.minimizer_method in [ - "trust-exact", - "dogleg", - ]: - info_minimize = dict(hess=scipy_hess) - else: - info_minimize = dict() - - try: - res = scipy.optimize.minimize( - scipy_loss, - xval, - method=self.minimizer_method, - jac=True, - tol=0.0, - callback=callback, - **info_minimize, - ) - except Exception as ex: - # minimizer could have called the loss or hessp functions with "random" values, so restore the - # state from the end of the last iteration before the exception - xval = callback.xval - logger.debug(ex) - else: - xval = res["x"] - logger.debug(res) - - self.x.assign(xval) + callback = self.fit() # force profiling of beta with final parameter values # TODO avoid the extra calculation and jitting if possible since the relevant calculation @@ -2164,6 +2266,9 @@ def nll_scan(self, param, scan_range, scan_points, use_prefit=False): # make a likelihood scan for a single parameter # assuming the likelihood is minimized + # freeze minimize which mean to not update it in the fit + self.freeze_params(param) + idx = np.where(self.parms.astype(str) == param)[0][0] # store current state of x temporarily @@ -2189,51 +2294,28 @@ def nll_scan(self, param, scan_range, scan_points, use_prefit=False): if i == 0: continue + logger.debug(f"Now at i={i} x={ixval}") self.x.assign(tf.tensor_scatter_nd_update(self.x, [[idx]], [ixval])) - def scipy_loss(xval): - self.x.assign(xval) - val, grad = self.loss_val_grad() - grad = grad.numpy() - grad[idx] = 0 # Zero out gradient for the frozen parameter - return val.numpy(), grad - - def scipy_hessp(xval, pval): - self.x.assign(xval) - pval[idx] = ( - 0 # Ensure the perturbation does not affect frozen parameter - ) - p = tf.convert_to_tensor(pval) - val, grad, hessp = self.loss_val_grad_hessp(p) - hessp = hessp.numpy() - # TODO: worth testing modifying the loss/grad/hess functions to imply 1 - # for the corresponding hessian element instead of 0, - # since this might allow the minimizer to converge more efficiently - hessp[idx] = ( - 0 # Zero out Hessian-vector product at the frozen index - ) - return hessp - - res = scipy.optimize.minimize( - scipy_loss, - self.x, - method="trust-krylov", - jac=True, - hessp=scipy_hessp, - ) - if res["success"]: - dnlls[nscans // 2 + sign * i] = ( - self.reduced_nll().numpy() - nll_best - ) - scan_vals[nscans // 2 + sign * i] = ixval + self.fit() + + dnlls[nscans // 2 + sign * i] = self.reduced_nll().numpy() - nll_best + + scan_vals[nscans // 2 + sign * i] = ixval # reset x to original state self.x.assign(xval) + # let the parameter be free again + self.defreeze_params(param) + return scan_vals, dnlls def nll_scan2D(self, param_tuple, scan_range, scan_points, use_prefit=False): + # freeze minimize which mean to not update it in the fit + self.freeze_params(param_tuple) + idx0 = np.where(self.parms.astype(str) == param_tuple[0])[0][0] idx1 = np.where(self.parms.astype(str) == param_tuple[1])[0][0] @@ -2280,7 +2362,9 @@ def nll_scan2D(self, param_tuple, scan_range, scan_points, use_prefit=False): ix = best_fit - i iy = best_fit + j - # print(f"i={i}, j={j}, r={r} drow={drow}, dcol={dcol} | ix={ix}, iy={iy}") + logger.debug( + f"Now at (ix,iy) = ({ix},{iy}) (x,y)= ({x_scans[ix]},{y_scans[iy]})" + ) self.x.assign( tf.tensor_scatter_nd_update( @@ -2288,41 +2372,15 @@ def nll_scan2D(self, param_tuple, scan_range, scan_points, use_prefit=False): ) ) - def scipy_loss(xval): - self.x.assign(xval) - val, grad = self.loss_val_grad() - grad = grad.numpy() - grad[idx0] = 0 - grad[idx1] = 0 - return val.numpy(), grad - - def scipy_hessp(xval, pval): - self.x.assign(xval) - pval[idx0] = 0 - pval[idx1] = 0 - p = tf.convert_to_tensor(pval) - val, grad, hessp = self.loss_val_grad_hessp(p) - hessp = hessp.numpy() - hessp[idx0] = 0 - hessp[idx1] = 0 - - if np.allclose(hessp, 0, atol=1e-8): - return np.zeros_like(hessp) + self.fit() - return hessp + dnlls[ix, iy] = self.reduced_nll().numpy() - nll_best - res = scipy.optimize.minimize( - scipy_loss, - self.x, - method="trust-krylov", - jac=True, - hessp=scipy_hessp, - ) + self.x.assign(xval) - if res["success"]: - dnlls[ix, iy] = self.reduced_nll().numpy() - nll_best + # let the parameter be free again + self.defreeze_params(param_tuple) - self.x.assign(xval) return x_scans, y_scans, dnlls def contour_scan(self, param, nll_min, q=1, signs=[-1, 1], fun=None):