From 3471e6cc711b17209dd716fe5e1186c3078b886c Mon Sep 17 00:00:00 2001 From: Devin Date: Thu, 15 Jan 2026 16:33:20 +0100 Subject: [PATCH 01/12] Update to handle multiDim histograms --- AnaProd/MergeNtuples.py | 4 +- Analysis/HistMergerFromHists.py | 36 +++-- Analysis/HistPlotter.py | 56 +------ Analysis/HistProducerFromNTuple.py | 48 +++++- Analysis/HistTupleProducer.py | 12 +- Analysis/tasks.py | 26 +++- Common/HistHelper.py | 232 ++++++++++++++++++----------- include/HistHelper.h | 79 +++++++++- 8 files changed, 325 insertions(+), 168 deletions(-) diff --git a/AnaProd/MergeNtuples.py b/AnaProd/MergeNtuples.py index 89a87e69..87c16fbd 100644 --- a/AnaProd/MergeNtuples.py +++ b/AnaProd/MergeNtuples.py @@ -78,9 +78,9 @@ def merge_ntuples(df): snapshotOptions.fLazy = False snapshotOptions.fMode = "UPDATE" snapshotOptions.fCompressionAlgorithm = getattr( - ROOT.ROOT.RCompressionSetting.EAlgorithm, "kZLIB" + ROOT.ROOT.RCompressionSetting.EAlgorithm, "kLZMA" ) - snapshotOptions.fCompressionLevel = 4 + snapshotOptions.fCompressionLevel = 9 inputFiles = [ (fileName, ROOT.TFile(fileName, "READ")) for fileName in args.inputFile ] diff --git a/Analysis/HistMergerFromHists.py b/Analysis/HistMergerFromHists.py index 50202152..416b8aad 100644 --- a/Analysis/HistMergerFromHists.py +++ b/Analysis/HistMergerFromHists.py @@ -39,9 +39,10 @@ def checkFile(inFileRoot, channels, qcdRegions, categories): return True -def fill_all_hists_dict( +def fill_hists( items_dict, - all_hist_dict_per_var_and_datasettype, + all_hist_dict, + dataset_type, var_input, unc_source="Central", ): @@ -55,9 +56,13 @@ def fill_all_hists_dict( if var != var_check: continue final_key = (key_tuple, (unc_source, scale)) - if final_key not in all_hist_dict_per_var_and_datasettype: - all_hist_dict_per_var_and_datasettype[final_key] = [] - all_hist_dict_per_var_and_datasettype[final_key].append(var_hist) + if dataset_type not in all_hist_dict.keys(): + all_hist_dict[dataset_type] = {} + if final_key not in all_hist_dict[dataset_type]: + var_hist.SetDirectory(0) + all_hist_dict[dataset_type][final_key] = var_hist + else: + all_hist_dict[dataset_type][final_key].Add(var_hist) def MergeHistogramsPerType(all_hists_dict): @@ -236,23 +241,22 @@ def GetBTagWeightDict( f"input file for dataset {dataset_name} (with path= {inFile_path}) does not exist, skipping" ) continue - with ROOT.TFile.Open(inFile_path, "READ") as inFile: - # check that the file is ok - if inFile.IsZombie(): - raise RuntimeError(f"{inFile_path} is zombie") - if not checkFile(inFile, channels, regions, all_categories): - raise RuntimeError(f"{dataset_name} has void file") base_process_name = dataset_cfg_dict[dataset_name]["process_name"] dataset_type = setup.base_processes[base_process_name]["parent_process"] if dataset_type not in all_hists_dict.keys(): all_hists_dict[dataset_type] = {} - all_items = load_all_items(inFile_path) - fill_all_hists_dict( - all_items, all_hists_dict[dataset_type], args.var, args.uncSource - ) # to add: , unc_source="Central", scale="Central" - MergeHistogramsPerType(all_hists_dict) + with ROOT.TFile.Open(inFile_path, "READ") as inFile: + # check that the file is ok + if inFile.IsZombie(): + raise RuntimeError(f"{inFile_path} is zombie") + if not checkFile(inFile, channels, regions, all_categories): + raise RuntimeError(f"{dataset_name} has void file") + all_items = get_all_items_recursive(inFile) + fill_hists( + all_items, all_hists_dict, dataset_type, args.var, args.uncSource + ) # to add: , unc_source="Central", scale="Central" # here there should be the custom applications - e.g. GetBTagWeightDict, AddQCDInHistDict, etc. # analysis.ApplyMergeCustomisations() # --> here go the QCD and bTag functions diff --git a/Analysis/HistPlotter.py b/Analysis/HistPlotter.py index b4f14e47..f3addf31 100644 --- a/Analysis/HistPlotter.py +++ b/Analysis/HistPlotter.py @@ -25,42 +25,10 @@ def GetHistName(dataset_name, dataset_type, uncName, unc_scale, global_cfg_dict) return histName -def RebinHisto(hist_initial, new_binning, dataset, wantOverflow=True, verbose=False): - new_binning_array = array.array("d", new_binning) - new_hist = hist_initial.Rebin(len(new_binning) - 1, dataset, new_binning_array) - if dataset == "data": - new_hist.SetBinErrorOption(ROOT.TH1.kPoisson) - if wantOverflow: - n_finalbin = new_hist.GetBinContent(new_hist.GetNbinsX()) - n_overflow = new_hist.GetBinContent(new_hist.GetNbinsX() + 1) - new_hist.SetBinContent(new_hist.GetNbinsX(), n_finalbin + n_overflow) - err_finalbin = new_hist.GetBinError(new_hist.GetNbinsX()) - err_overflow = new_hist.GetBinError(new_hist.GetNbinsX() + 1) - new_hist.SetBinError( - new_hist.GetNbinsX(), - math.sqrt(err_finalbin * err_finalbin + err_overflow * err_overflow), - ) - - if verbose: - for nbin in range(0, len(new_binning)): - print( - f"nbin = {nbin}, content = {new_hist.GetBinContent(nbin)}, error {new_hist.GetBinError(nbin)}" - ) - fix_negative_contributions, debug_info, negative_bins_info = ( - FixNegativeContributions(new_hist) - ) - if not fix_negative_contributions: - print("negative contribution not fixed") - print(fix_negative_contributions, debug_info, negative_bins_info) - for nbin in range(0, new_hist.GetNbinsX() + 1): - content = new_hist.GetBinContent(nbin) - if content < 0: - print(f"for {dataset}, bin {nbin} content is < 0: {content}") - - return new_hist - - def findNewBins(hist_cfg_dict, var, channel, category): + if "2d" in hist_cfg_dict[var].keys(): + return hist_cfg_dict[var]["2d"] + if "x_rebin" not in hist_cfg_dict[var].keys(): return hist_cfg_dict[var]["x_bins"] @@ -85,22 +53,6 @@ def findNewBins(hist_cfg_dict, var, channel, category): return new_dict[category][channel] return hist_cfg_dict[var]["x_rebin"]["other"] - -def getNewBins(bins): - if type(bins) == list: - final_bins = bins - else: - n_bins, bin_range = bins.split("|") - start, stop = bin_range.split(":") - bin_width = (float(stop) - float(start)) / int(n_bins) - final_bins = [] - bin_center = float(start) - while bin_center >= float(start) and bin_center <= float(stop): - final_bins.append(bin_center) - bin_center = bin_center + bin_width - return final_bins - - if __name__ == "__main__": import argparse import FLAF.PlotKit.Plotter as Plotter @@ -249,7 +201,7 @@ def getNewBins(bins): hist_cfg_dict[args.var]["use_log_x"] = True rebin_condition = args.rebin and "x_rebin" in hist_cfg_dict[args.var].keys() - bins_to_compute = hist_cfg_dict[args.var]["x_bins"] + bins_to_compute = hist_cfg_dict[args.var]["x_bins"] if not rebin_condition else None if rebin_condition: bins_to_compute = findNewBins(hist_cfg_dict, args.var, channel, category) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index c45bb489..5c2b4576 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -31,7 +31,19 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale): dir_ptr = Utilities.mkdir(outFile, dir_name) model, unit_hist = hist_list[0] merged_hist = model.GetHistogram().Clone() - for i in range(0, unit_hist.GetNbinsX() + 2): + # Yes I know this is ugly + # Axis needs +2 for under/overflow, but only if the return is not 1!!! + # Was having issue with Z axis in 2D. We don't want to multiply by 3 if it's 2D + N_xbins = unit_hist.GetNbinsX() + 2 + N_ybins = unit_hist.GetNbinsY() if hasattr(unit_hist, "GetNbinsY") else 1 + N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins + N_zbins = unit_hist.GetNbinsZ() if hasattr(unit_hist, "GetNbinsZ") else 1 + N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins + N_bins = N_xbins * N_ybins * N_zbins + # If we use the THnD then we have 'GetNbins' function instead + N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins + print(f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}") + for i in range(0, N_bins): bin_content = unit_hist.GetBinContent(i) bin_error = unit_hist.GetBinError(i) merged_hist.SetBinContent(i, bin_content) @@ -41,7 +53,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale): if len(hist_list) > 1: for model, unit_hist in hist_list[1:]: hist = model.GetHistogram() - for i in range(0, unit_hist.GetNbinsX() + 2): + for i in range(0, N_bins): bin_content = unit_hist.GetBinContent(i) bin_error = unit_hist.GetBinError(i) hist.SetBinContent(i, bin_content) @@ -56,10 +68,27 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale): def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): - model, unit_bin_model = GetModel(hist_cfg_dict, var, return_unit_bin_model=True) - unit_hist = rdf.Filter(filter_to_apply).Histo1D( - unit_bin_model, f"{var}_bin", weight_name + dims = 1 if not hist_cfg_dict[var].get("var_list", False) else len( + hist_cfg_dict[var]["var_list"] ) + print(f"Dimensions: {dims}") + + model, unit_bin_model = GetModel(hist_cfg_dict, var, dims, return_unit_bin_model=True) + var_bin_list = [f"{var}_bin" for var in hist_cfg_dict[var]["var_list"]] if dims > 1 else [f"{var}_bin"] + if dims == 1: + unit_hist = rdf.Filter(filter_to_apply).Histo1D( + unit_bin_model, *var_bin_list, weight_name + ) + elif dims == 2: + unit_hist = rdf.Filter(filter_to_apply).Histo2D( + unit_bin_model, *var_bin_list, weight_name + ) + elif dims == 3: + unit_hist = rdf.Filter(filter_to_apply).Histo3D( + unit_bin_model, *var_bin_list, weight_name + ) + else: + raise RuntimeError("Only 1D, 2D and 3D histograms are supported") return model, unit_hist @@ -121,6 +150,7 @@ def SaveTmpFileUnc( for scale in scales: for key, filter_to_apply_base in key_filter_dict.items(): filter_to_apply_final = filter_to_apply_base + print("Saving Hist for unc/scale/key:", unc, scale, key) if further_cuts: for further_cut_name in further_cuts.keys(): filter_to_apply_final = ( @@ -262,7 +292,13 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts): ) variables = setup.global_params["variables"] - vars_needed = set(variables) + vars_needed = set() + for var in variables: + if isinstance(var, dict) and "vars" in var: + for v in var["vars"]: + vars_needed.add(v) + else: + vars_needed.add(var) for further_cut_name, (vars_for_cut, _) in further_cuts.items(): for var_for_cut in vars_for_cut: if var_for_cut: diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py index d74c9298..01fb7f49 100644 --- a/Analysis/HistTupleProducer.py +++ b/Analysis/HistTupleProducer.py @@ -176,7 +176,17 @@ def createHistTuple( df_is_central=True, ) dfw_central.colToSave.append(final_weight_name) + + # Return a flattened set of variables, the 2D happens later + flatten_vars = set() for var in variables: + if isinstance(var, dict) and "vars" in var: + for v in var["vars"]: + flatten_vars.add(v) + else: + flatten_vars.add(var) + + for var in flatten_vars: DefineBinnedColumn(hist_cfg_dict, var) dfw_central.df = dfw_central.df.Define(f"{var}_bin", f"get_{var}_bin({var})") dfw_central.colToSave.append(f"{var}_bin") @@ -231,7 +241,7 @@ def createHistTuple( df_is_central=False, ) dfw_shift.colToSave.append(final_weight_name) - for var in variables: + for var in flatten_vars: dfw_shift.df = dfw_shift.df.Define( f"{var}_bin", f"get_{var}_bin({var})" ) diff --git a/Analysis/tasks.py b/Analysis/tasks.py index 0ba769dd..ed34036a 100644 --- a/Analysis/tasks.py +++ b/Analysis/tasks.py @@ -185,13 +185,23 @@ def create_branch_map(self): ] datasets_to_consider.append("data") + flatten_vars = set() + for var in self.global_params["variables"]: + if isinstance(var, dict) and "vars" in var: + for v in var["vars"]: + flatten_vars.add(v) + else: + flatten_vars.add(var) + need_cache_list = [ (var_name in var_produced_by, var_produced_by.get(var_name, None)) - for var_name in self.global_params["variables"] + # for var_name in self.global_params["variables"] + for var_name in flatten_vars ] producer_list = [] need_cache_global = any(item[0] for item in need_cache_list) - for var_name in self.global_params["variables"]: + # for var_name in self.global_params["variables"]: + for var_name in flatten_vars: need_cache = True if var_name in var_produced_by else False producer_to_run = ( var_produced_by[var_name] if var_name in var_produced_by else None @@ -397,6 +407,7 @@ def create_branch_map(self): @workflow_condition.output def output(self): var, prod_br, dataset_name = self.branch_data + if type(var) == dict: var = var['name'] output_path = os.path.join( "hists", self.version, self.period, var, f"{dataset_name}.root" ) @@ -429,6 +440,7 @@ def run(self): stack.enter_context((inp).localize("r")).path for inp in self.input()[0] ] + var = var if type(var) != dict else var['name'] tmpFile = os.path.join(job_home, f"HistFromNtuple_{var}.root") HistFromNtupleProducer_cmd = [ @@ -549,6 +561,7 @@ def create_branch_map(self): prod_br_list, current_dataset, ) in HistFromNtupleProducerTask_branch_map.items(): + var_name = var_name.get("name", var_name) if type(var_name) == dict else var_name if var_name not in all_datasets.keys(): all_datasets[var_name] = [] all_datasets[var_name].append((br_idx, current_dataset)) @@ -1045,10 +1058,17 @@ def workflow_requires(self): merge_map = HistMergerTask.req( self, branch=-1, branches=(), customisations=self.customisations ).create_branch_map() + + branch_set = set() + for br_idx, (var) in self.branch_map.items(): + for br, (v, _, _) in merge_map.items(): + if v == var: + branch_set.add(br) + return { "merge": HistMergerTask.req( self, - branches=tuple(merge_map.keys()), + branches=tuple(branch_set), customisations=self.customisations, ) } diff --git a/Common/HistHelper.py b/Common/HistHelper.py index c724ecf0..02666c1c 100644 --- a/Common/HistHelper.py +++ b/Common/HistHelper.py @@ -10,6 +10,8 @@ import FLAF.Common.Utilities as Utilities +ROOT.gROOT.ProcessLine(f".include {os.environ['ANALYSIS_PATH']}") +ROOT.gROOT.ProcessLine(f'#include "FLAF/include/HistHelper.h"') def get_all_items_recursive(root_dir, path=()): items_dict = {} @@ -21,7 +23,7 @@ def get_all_items_recursive(root_dir, path=()): if obj.InheritsFrom("TDirectory"): items_dict.update(get_all_items_recursive(obj, path + (key.GetName(),))) elif obj.InheritsFrom("TH1"): - obj.SetDirectory(0) + # obj.SetDirectory(0) local_items[key.GetName()] = obj if local_items: @@ -198,6 +200,9 @@ def getNewBins(bins): if isinstance(bins, list): return bins + if isinstance(bins, dict): + return bins + n_bins, bin_range = bins.split("|") start, stop = map(float, bin_range.split(":")) step = (stop - start) / int(n_bins) @@ -206,8 +211,54 @@ def getNewBins(bins): def RebinHisto(hist_initial, new_binning, sample, wantOverflow=True, verbose=False): - new_binning_array = array.array("d", new_binning) - new_hist = hist_initial.Rebin(len(new_binning) - 1, sample, new_binning_array) + print("Rebinning histogram:", hist_initial.GetName()) + if isinstance(new_binning, dict): + N_xbins = hist_initial.GetNbinsX() + 2 + N_ybins = hist_initial.GetNbinsY() if hasattr(hist_initial, "GetNbinsY") else 1 + N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins + N_zbins = hist_initial.GetNbinsZ() if hasattr(hist_initial, "GetNbinsZ") else 1 + N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins + N_bins = N_xbins * N_ybins * N_zbins + # If we use the THnD then we have 'GetNbins' function instead + N_bins = ( + hist_initial.GetNbins() if hasattr(hist_initial, "GetNbins") else N_bins + ) + + # Prepare data structures for C++ function + y_bin_ranges = ROOT.std.vector("std::pair")() + output_bin_edges_vec = ROOT.std.vector("std::vector")() + + for combined_bin in new_binning["combined_bins"]: + # Parse y_bin range + y_min, y_max = combined_bin["y_bin"] + y_bin_ranges.push_back(ROOT.std.pair("float", "float")(y_min, y_max)) + + # Parse x_bins spec (can be string "nbins|min:max" or list of bin edges) + out_spec = combined_bin["x_bins"] + out_edges = ROOT.std.vector("float")() + if isinstance(out_spec, list): + for edge in out_spec: + out_edges.push_back(float(edge)) + else: + n_out_bins, out_range = out_spec.split("|") + out_min, out_max = map(float, out_range.split(":")) + n_out_bins = int(n_out_bins) + # Create uniform bins + step = (out_max - out_min) / n_out_bins + for i in range(n_out_bins + 1): + out_edges.push_back(out_min + i * step) + output_bin_edges_vec.push_back(out_edges) + + # Create ROOT vectors + # Call the C++ function which returns a new histogram + new_hist = ROOT.analysis.rebinHistogramDict( + hist_initial, N_bins, y_bin_ranges, output_bin_edges_vec + ) + new_hist.SetName(sample) + + else: + new_binning_array = array.array("d", new_binning) + new_hist = hist_initial.Rebin(len(new_binning) - 1, sample, new_binning_array) if sample == "data": new_hist.SetBinErrorOption(ROOT.TH1.kPoisson) @@ -251,96 +302,103 @@ def GetBinVec(hist_cfg, var): n_bins, bin_range = x_bins.split("|") start, stop = bin_range.split(":") edges = np.linspace(float(start), float(stop), int(n_bins)).tolist() - # print(len(edges)) x_bins_vec = Utilities.ListToVector(edges, "float") return x_bins_vec -def GetModel(hist_cfg, var, return_unit_bin_model=False): - x_bins = hist_cfg[var]["x_bins"] - if type(hist_cfg[var]["x_bins"]) == list: - x_bins_vec = Utilities.ListToVector(x_bins, "double") - model = ROOT.RDF.TH1DModel("", "", x_bins_vec.size() - 1, x_bins_vec.data()) - else: - n_bins, bin_range = x_bins.split("|") - start, stop = bin_range.split(":") - model = ROOT.RDF.TH1DModel("", "", int(n_bins), float(start), float(stop)) - if not return_unit_bin_model: - return model - unit_bin_model = ROOT.RDF.TH1DModel( - "", "", model.fNbinsX, -0.5, model.fNbinsX - 0.5 - ) - return model, unit_bin_model +def GetModel(hist_cfg, var, dims, return_unit_bin_model=False): + print(f"Model with var {var}") + THModel_Inputs = [] + unit_bin_Inputs = [] + if dims == 1: + print("1D histogram") + x_bins = hist_cfg[var]["x_bins"] + if type(hist_cfg[var]["x_bins"]) == list: + x_bins_vec = Utilities.ListToVector(x_bins, "double") + else: + n_bins, bin_range = x_bins.split("|") + start, stop = bin_range.split(":") + edges = np.linspace(float(start), float(stop), int(n_bins)).tolist() + x_bins_vec = Utilities.ListToVector(edges, "double") + THModel_Inputs.append(x_bins_vec.size() - 1) + THModel_Inputs.append(x_bins_vec.data()) + model = ROOT.RDF.TH1DModel("", "", *THModel_Inputs) + if not return_unit_bin_model: + return model + unit_bin_Inputs = [model.fNbinsX, -0.5, model.fNbinsX - 0.5] + unit_bin_model = ROOT.RDF.TH1DModel("", "", *unit_bin_Inputs) + + elif dims == 2: + print("2D histogram") + list_var_bins_vec = [] + for var_2d in hist_cfg[var]["var_list"]: + var_bin_name = f"{var_2d}_bins" + var_bins = ( + hist_cfg[var][var_bin_name] + if var_bin_name in hist_cfg[var] + else hist_cfg[var_2d]["x_bins"] + ) + if type(var_bins) == list: + var_bins_vec = Utilities.ListToVector(var_bins, "double") + else: + n_bins, bin_range = var_bins.split("|") + start, stop = bin_range.split(":") + edges = np.linspace(float(start), float(stop), int(n_bins) + 1).tolist() + var_bins_vec = Utilities.ListToVector(edges, "double") + list_var_bins_vec.append(var_bins_vec) + THModel_Inputs.append(var_bins_vec.size() - 1) + THModel_Inputs.append(var_bins_vec.data()) + model = ROOT.RDF.TH2DModel("", "", *THModel_Inputs) + if not return_unit_bin_model: + return model + unit_bin_Inputs = [ + model.fNbinsX, + -0.5, + model.fNbinsX - 0.5, + model.fNbinsY, + -0.5, + model.fNbinsY - 0.5, + ] + unit_bin_model = ROOT.RDF.TH2DModel("", "", *unit_bin_Inputs) + + elif dims == 3: + print("3D histogram") + list_var_bins_vec = [] + for var_3d in hist_cfg[var]["var_list"]: + var_bin_name = f"{var_3d}_bins" + var_bins = hist_cfg[var][var_bin_name] + if type(var_bins) == list: + var_bins_vec = Utilities.ListToVector(var_bins, "double") + else: + n_bins, bin_range = var_bins.split("|") + start, stop = bin_range.split(":") + edges = np.linspace(float(start), float(stop), int(n_bins)).tolist() + var_bins_vec = Utilities.ListToVector(edges, "double") + print(var_bins_vec) + list_var_bins_vec.append(var_bins_vec) + THModel_Inputs.append(var_bins_vec.size() - 1) + THModel_Inputs.append(var_bins_vec.data()) + model = ROOT.RDF.TH3DModel("", "", *THModel_Inputs) + if not return_unit_bin_model: + return model + unit_bin_Inputs = [ + model.fNbinsX, + -0.5, + model.fNbinsX - 0.5, + model.fNbinsY, + -0.5, + model.fNbinsY - 0.5, + model.fNbinsZ, + -0.5, + model.fNbinsZ - 0.5, + ] + unit_bin_model = ROOT.RDF.TH3DModel("", "", *unit_bin_Inputs) + else: + print("nD histogram not implemented yet") + # model = ROOT.RDF.THnDModel("", "", ) -# def GetModel(hist_cfg, var): -# x_bins = hist_cfg[var]["x_bins"] -# if type(hist_cfg[var]["x_bins"]) == list: -# x_bins_vec = Utilities.ListToVector(x_bins, "double") -# model = ROOT.RDF.TH1DModel("", "", x_bins_vec.size() - 1, x_bins_vec.data()) -# else: -# n_bins, bin_range = x_bins.split("|") -# start, stop = bin_range.split(":") -# model = ROOT.RDF.TH1DModel("", "", int(n_bins), float(start), float(stop)) -# return model - - -# # to be fixed -# def Get2DModel(hist_cfg, var1, var2): -# x_bins = hist_cfg[var1]["x_bins"] -# y_bins = hist_cfg[var2]["x_bins"] -# if type(x_bins) == list: -# x_bins_vec = Utilities.ListToVector(x_bins, "double") -# if type(y_bins) == list: -# y_bins_vec = Utilities.ListToVector(y_bins, "double") -# model = ROOT.RDF.TH2DModel( -# "", -# "", -# x_bins_vec.size() - 1, -# x_bins_vec.data(), -# y_bins_vec.size() - 1, -# y_bins_vec.data(), -# ) -# else: -# n_y_bins, y_bin_range = y_bins.split("|") -# y_start, y_stop = y_bin_range.split(":") -# model = ROOT.RDF.TH2DModel( -# "", -# "", -# x_bins_vec.size() - 1, -# x_bins_vec.data(), -# int(n_y_bins), -# float(y_start), -# float(y_stop), -# ) -# else: -# n_x_bins, x_bin_range = x_bins.split("|") -# x_start, x_stop = x_bin_range.split(":") -# if type(y_bins) == list: -# y_bins_vec = Utilities.ListToVector(y_bins, "double") -# model = ROOT.RDF.TH2DModel( -# "", -# "", -# int(n_x_bins), -# float(x_start), -# float(x_stop), -# y_bins_vec.size() - 1, -# y_bins_vec.data(), -# ) -# else: -# n_y_bins, y_bin_range = y_bins.split("|") -# y_start, y_stop = y_bin_range.split(":") -# model = ROOT.RDF.TH2DModel( -# "", -# "", -# int(n_x_bins), -# float(x_start), -# float(x_stop), -# int(n_y_bins), -# float(y_start), -# float(y_stop), -# ) -# return model + return model, unit_bin_model def createCacheQuantities(dfWrapped_cache, cache_map_name, cache_entry_name): diff --git a/include/HistHelper.h b/include/HistHelper.h index 020f2c5b..e55f9559 100644 --- a/include/HistHelper.h +++ b/include/HistHelper.h @@ -11,6 +11,10 @@ #include "EntryQueue.h" +#include +#include +#include + /* namespace kin_fit { struct FitResults { @@ -159,5 +163,78 @@ namespace analysis { return df_node; } }; - + + TH1D* rebinHistogramDict(TH1* hist_initial, int N_bins, + const std::vector>& y_bin_ranges, + const std::vector>& output_bin_edges) { + // Flatten output bin edges into a single sorted array + std::vector all_output_edges; + float last_edge = 0.0; + for (const auto& edges : output_bin_edges) { + for (float edge : edges) { + all_output_edges.push_back(edge + last_edge); + } + last_edge = all_output_edges.back(); + } + // Sort and remove duplicates + std::sort(all_output_edges.begin(), all_output_edges.end()); + all_output_edges.erase(std::unique(all_output_edges.begin(), all_output_edges.end()), all_output_edges.end()); + + // Create output histogram with variable binning + TH1D* hist_output = new TH1D("rebinned", "rebinned", all_output_edges.size() - 1, all_output_edges.data()); + hist_output->Sumw2(); + + // Helper function to find bin index from value and edges + auto findBinIndex = [](float value, const std::vector& edges) -> int { + if (edges.size() < 2) return -1; + for (size_t i = 0; i < edges.size() - 1; ++i) { + if (value >= edges[i] && value < edges[i + 1]) { + return i; + } + } + return -1; + }; + + // Iterate through all bins in the original histogram + for (int i = 0; i < N_bins; ++i) { + int binX, binY, binZ; + hist_initial->GetBinXYZ(i, binX, binY, binZ); + + // Get bin centers (actual values) + float x_value = hist_initial->GetXaxis()->GetBinCenter(binX); + float y_value = hist_initial->GetYaxis()->GetBinCenter(binY); + float z_value = hist_initial->GetZaxis()->GetBinCenter(binZ); + + // Get bin content and error + double bin_content = hist_initial->GetBinContent(i); + double bin_error = hist_initial->GetBinError(i); + double bin_error2 = bin_error * bin_error; + + // Find which y_bin range this y_value falls into + int y_bin_idx = -1; + for (size_t j = 0; j < y_bin_ranges.size(); ++j) { + if (y_value >= y_bin_ranges[j].first && y_value < y_bin_ranges[j].second) { + y_bin_idx = j; + break; + } + } + if (y_bin_idx == -1) continue; // Skip if y_value doesn't fall in any range + // Find output bin index within the output_bin_edges for this y_bin + int local_out_bin = findBinIndex(x_value, output_bin_edges[y_bin_idx]); + if (local_out_bin == -1) continue; // Skip if x_value doesn't fall in any output bin + // Calculate section offset by counting bins in all previous y_bin sections + int section_offset = 0; + for (int prev_y = 0; prev_y < y_bin_idx; ++prev_y) { + section_offset += output_bin_edges[prev_y].size() - 1; // size - 1 = number of bins + } + // Calculate global bin index: offset + local bin position within this section + int global_bin = section_offset + local_out_bin + 1; // +1 for ROOT's 1-indexed bins + // Set bin content and error + if (global_bin >= 1 && global_bin <= (int)all_output_edges.size() - 1) { + hist_output->SetBinContent(global_bin, hist_output->GetBinContent(global_bin) + bin_content); + hist_output->SetBinError(global_bin, std::sqrt(std::pow(hist_output->GetBinError(global_bin), 2) + bin_error2)); + } + } + return hist_output; + } } // namespace analysis From 929d7cb3f473659cc58e7937e8337958b6418ed1 Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 16 Jan 2026 16:02:24 +0100 Subject: [PATCH 02/12] Update flaf env again after reset-merge --- run_tools/mk_flaf_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_tools/mk_flaf_env.sh b/run_tools/mk_flaf_env.sh index 179a9f9b..47e1e843 100755 --- a/run_tools/mk_flaf_env.sh +++ b/run_tools/mk_flaf_env.sh @@ -73,7 +73,7 @@ EOF link_all $lcg_base/bin $env_base/bin pip pip3 pip3.12 python python3 python3.12 gosam2herwig gosam-config.py gosam.py git java link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/bin $env_base/bin go gofmt link_all $lcg_base/lib $env_base/lib/python3.12/site-packages python3.12 - link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info + link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools pathspec graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info link_all $lcg_base/lib64 $env_base/lib/python3.12/site-packages cairo cmake libonnx_proto.a libsvm.so.2 pkgconfig ThePEG libavh_olo.a libff.a libqcdloop.a python3.12 link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib $env_base/lib link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib64 $env_base/lib From eaa9bbfa95fa49f51ee5c4f00cc6e637694a2fb0 Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 16 Jan 2026 16:24:31 +0100 Subject: [PATCH 03/12] Fixed some issues with flattened vars --- Analysis/HistTupleProducer.py | 1 + Analysis/tasks.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py index 1cc0de1a..dc888293 100644 --- a/Analysis/HistTupleProducer.py +++ b/Analysis/HistTupleProducer.py @@ -127,6 +127,7 @@ def createHistTuple( print("Scale uncertainties to consider:", scale_uncertainties) print("Defining binnings for variables") + flatten_vars = set() for var in variables: if isinstance(var, dict) and "vars" in var: for v in var["vars"]: diff --git a/Analysis/tasks.py b/Analysis/tasks.py index 50cba496..da78165d 100644 --- a/Analysis/tasks.py +++ b/Analysis/tasks.py @@ -47,7 +47,16 @@ def workflow_requires(self): } req_dict["AnalysisCacheTask"] = [] var_produced_by = self.setup.var_producer_map - for var_name in self.global_params["variables"]: + + flatten_vars = set() + for var in self.global_params["variables"]: + if isinstance(var, dict) and "vars" in var: + for v in var["vars"]: + flatten_vars.add(v) + else: + flatten_vars.add(var) + + for var_name in flatten_vars: producer_to_run = var_produced_by.get(var_name, None) if producer_to_run is not None: req_dict["AnalysisCacheTask"].append( From 711b98c551ab543b0045daee99c863d7f55ae93b Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 16 Jan 2026 16:26:10 +0100 Subject: [PATCH 04/12] Formatting --- Analysis/HistMergerFromHists.py | 2 +- Analysis/HistPlotter.py | 5 ++++- Analysis/HistProducerFromNTuple.py | 20 +++++++++++++++----- Analysis/tasks.py | 9 ++++++--- Common/HistHelper.py | 1 + 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Analysis/HistMergerFromHists.py b/Analysis/HistMergerFromHists.py index 03667175..2a5dc348 100644 --- a/Analysis/HistMergerFromHists.py +++ b/Analysis/HistMergerFromHists.py @@ -255,7 +255,7 @@ def GetBTagWeightDict( all_items = get_all_items_recursive(inFile) fill_hists( all_items, all_hists_dict, dataset_type, args.var, args.uncSource - ) # to add: , unc_source="Central", scale="Central" + ) # to add: , unc_source="Central", scale="Central" # here there should be the custom applications - e.g. GetBTagWeightDict, AddQCDInHistDict, etc. # analysis.ApplyMergeCustomisations() # --> here go the QCD and bTag functions diff --git a/Analysis/HistPlotter.py b/Analysis/HistPlotter.py index f3addf31..a92d59e1 100644 --- a/Analysis/HistPlotter.py +++ b/Analysis/HistPlotter.py @@ -53,6 +53,7 @@ def findNewBins(hist_cfg_dict, var, channel, category): return new_dict[category][channel] return hist_cfg_dict[var]["x_rebin"]["other"] + if __name__ == "__main__": import argparse import FLAF.PlotKit.Plotter as Plotter @@ -201,7 +202,9 @@ def findNewBins(hist_cfg_dict, var, channel, category): hist_cfg_dict[args.var]["use_log_x"] = True rebin_condition = args.rebin and "x_rebin" in hist_cfg_dict[args.var].keys() - bins_to_compute = hist_cfg_dict[args.var]["x_bins"] if not rebin_condition else None + bins_to_compute = ( + hist_cfg_dict[args.var]["x_bins"] if not rebin_condition else None + ) if rebin_condition: bins_to_compute = findNewBins(hist_cfg_dict, args.var, channel, category) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 3161c7cd..42a0f56d 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -47,7 +47,9 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): N_bins = N_xbins * N_ybins * N_zbins # If we use the THnD then we have 'GetNbins' function instead N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins - print(f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}") + print( + f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}" + ) for i in range(0, N_bins): bin_content = unit_hist.GetBinContent(i) bin_error = unit_hist.GetBinError(i) @@ -73,13 +75,21 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): - dims = 1 if not hist_cfg_dict[var].get("var_list", False) else len( - hist_cfg_dict[var]["var_list"] + dims = ( + 1 + if not hist_cfg_dict[var].get("var_list", False) + else len(hist_cfg_dict[var]["var_list"]) ) print(f"Dimensions: {dims}") - model, unit_bin_model = GetModel(hist_cfg_dict, var, dims, return_unit_bin_model=True) - var_bin_list = [f"{var}_bin" for var in hist_cfg_dict[var]["var_list"]] if dims > 1 else [f"{var}_bin"] + model, unit_bin_model = GetModel( + hist_cfg_dict, var, dims, return_unit_bin_model=True + ) + var_bin_list = ( + [f"{var}_bin" for var in hist_cfg_dict[var]["var_list"]] + if dims > 1 + else [f"{var}_bin"] + ) if dims == 1: unit_hist = rdf.Filter(filter_to_apply).Histo1D( unit_bin_model, *var_bin_list, weight_name diff --git a/Analysis/tasks.py b/Analysis/tasks.py index da78165d..ded3ddfd 100644 --- a/Analysis/tasks.py +++ b/Analysis/tasks.py @@ -372,7 +372,8 @@ def create_branch_map(self): @workflow_condition.output def output(self): var, prod_br, dataset_name = self.branch_data - if type(var) == dict: var = var['name'] + if type(var) == dict: + var = var["name"] output_path = os.path.join( "hists", self.version, self.period, var, f"{dataset_name}.root" ) @@ -404,7 +405,7 @@ def run(self): stack.enter_context((inp).localize("r")).path for inp in self.input()[0] ] - var = var if type(var) != dict else var['name'] + var = var if type(var) != dict else var["name"] tmpFile = os.path.join(job_home, f"HistFromNtuple_{var}.root") HistFromNtupleProducer_cmd = [ @@ -525,7 +526,9 @@ def create_branch_map(self): prod_br_list, current_dataset, ) in HistFromNtupleProducerTask_branch_map.items(): - var_name = var_name.get("name", var_name) if type(var_name) == dict else var_name + var_name = ( + var_name.get("name", var_name) if type(var_name) == dict else var_name + ) if var_name not in all_datasets.keys(): all_datasets[var_name] = [] all_datasets[var_name].append((br_idx, current_dataset)) diff --git a/Common/HistHelper.py b/Common/HistHelper.py index f1a266fe..cffee623 100644 --- a/Common/HistHelper.py +++ b/Common/HistHelper.py @@ -13,6 +13,7 @@ ROOT.gROOT.ProcessLine(f".include {os.environ['ANALYSIS_PATH']}") ROOT.gROOT.ProcessLine(f'#include "FLAF/include/HistHelper.h"') + def get_all_items_recursive(root_dir, path=()): items_dict = {} local_items = {} From bda35355e8e2af40f8bb78888b8eab92d70adf1c Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 16 Jan 2026 16:42:59 +0100 Subject: [PATCH 05/12] Bug fix --- Analysis/HistProducerFromNTuple.py | 1 + Analysis/HistTupleProducer.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 42a0f56d..2da8b7eb 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -50,6 +50,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): print( f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}" ) + # This can be a loop over many bins, several times. Can be improved to be ran in c++ instead for i in range(0, N_bins): bin_content = unit_hist.GetBinContent(i) bin_error = unit_hist.GetBinError(i) diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py index dc888293..ca4e15b1 100644 --- a/Analysis/HistTupleProducer.py +++ b/Analysis/HistTupleProducer.py @@ -204,7 +204,7 @@ def createHistTuple( dfw.colToSave.append(desc["weight"]) print("Defining binned columns") - for var in variables: + for var in flatten_vars: dfw.df = dfw.df.Define(f"{var}_bin", f"get_{var}_bin({var})") dfw.colToSave.append(f"{var}_bin") From ffc7872b9b6fd7c01eefc82f0472a772de729b89 Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 16 Jan 2026 17:06:14 +0100 Subject: [PATCH 06/12] Remove debug prints --- Analysis/HistProducerFromNTuple.py | 6 +----- Common/HistHelper.py | 3 --- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 2da8b7eb..ce5d532f 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -47,9 +47,6 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): N_bins = N_xbins * N_ybins * N_zbins # If we use the THnD then we have 'GetNbins' function instead N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins - print( - f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}" - ) # This can be a loop over many bins, several times. Can be improved to be ran in c++ instead for i in range(0, N_bins): bin_content = unit_hist.GetBinContent(i) @@ -81,7 +78,6 @@ def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): if not hist_cfg_dict[var].get("var_list", False) else len(hist_cfg_dict[var]["var_list"]) ) - print(f"Dimensions: {dims}") model, unit_bin_model = GetModel( hist_cfg_dict, var, dims, return_unit_bin_model=True @@ -170,7 +166,7 @@ def SaveTmpFileUnc( for scale in scales: for key, filter_to_apply_base in key_filter_dict.items(): filter_to_apply_final = filter_to_apply_base - print("Saving Hist for unc/scale/key:", unc, scale, key) + # print("Saving Hist for unc/scale/key:", unc, scale, key) if further_cuts: for further_cut_name in further_cuts.keys(): filter_to_apply_final = ( diff --git a/Common/HistHelper.py b/Common/HistHelper.py index cffee623..094255d5 100644 --- a/Common/HistHelper.py +++ b/Common/HistHelper.py @@ -261,7 +261,6 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False): THModel_Inputs = [] unit_bin_Inputs = [] if dims == 1: - print("1D histogram") x_bins = hist_cfg[var]["x_bins"] if type(hist_cfg[var]["x_bins"]) == list: x_bins_vec = Utilities.ListToVector(x_bins, "double") @@ -279,7 +278,6 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False): unit_bin_model = ROOT.RDF.TH1DModel("", "", *unit_bin_Inputs) elif dims == 2: - print("2D histogram") list_var_bins_vec = [] for var_2d in hist_cfg[var]["var_list"]: var_bin_name = f"{var_2d}_bins" @@ -312,7 +310,6 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False): unit_bin_model = ROOT.RDF.TH2DModel("", "", *unit_bin_Inputs) elif dims == 3: - print("3D histogram") list_var_bins_vec = [] for var_3d in hist_cfg[var]["var_list"]: var_bin_name = f"{var_3d}_bins" From 129d233051a197bb32fca138cced1c249343061f Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 16 Jan 2026 17:27:07 +0100 Subject: [PATCH 07/12] Remove debug --- Common/HistHelper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Common/HistHelper.py b/Common/HistHelper.py index 094255d5..ec54bfb5 100644 --- a/Common/HistHelper.py +++ b/Common/HistHelper.py @@ -257,7 +257,6 @@ def GetBinVec(hist_cfg, var): def GetModel(hist_cfg, var, dims, return_unit_bin_model=False): - print(f"Model with var {var}") THModel_Inputs = [] unit_bin_Inputs = [] if dims == 1: From c979734da377322bd4e185e849092a7a75d8cfec Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 16 Jan 2026 20:27:27 +0100 Subject: [PATCH 08/12] Fix bug with 1D string binning --- Analysis/HistProducerFromNTuple.py | 1 - Common/HistHelper.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index ce5d532f..f16b61bc 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -166,7 +166,6 @@ def SaveTmpFileUnc( for scale in scales: for key, filter_to_apply_base in key_filter_dict.items(): filter_to_apply_final = filter_to_apply_base - # print("Saving Hist for unc/scale/key:", unc, scale, key) if further_cuts: for further_cut_name in further_cuts.keys(): filter_to_apply_final = ( diff --git a/Common/HistHelper.py b/Common/HistHelper.py index ec54bfb5..0c05c481 100644 --- a/Common/HistHelper.py +++ b/Common/HistHelper.py @@ -161,7 +161,6 @@ def getNewBins(bins): def RebinHisto(hist_initial, new_binning, sample, wantOverflow=True, verbose=False): - print("Rebinning histogram:", hist_initial.GetName()) if isinstance(new_binning, dict): N_xbins = hist_initial.GetNbinsX() + 2 N_ybins = hist_initial.GetNbinsY() if hasattr(hist_initial, "GetNbinsY") else 1 @@ -266,7 +265,7 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False): else: n_bins, bin_range = x_bins.split("|") start, stop = bin_range.split(":") - edges = np.linspace(float(start), float(stop), int(n_bins)).tolist() + edges = np.linspace(float(start), float(stop), int(n_bins) + 1).tolist() x_bins_vec = Utilities.ListToVector(edges, "double") THModel_Inputs.append(x_bins_vec.size() - 1) THModel_Inputs.append(x_bins_vec.data()) @@ -320,7 +319,6 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False): start, stop = bin_range.split(":") edges = np.linspace(float(start), float(stop), int(n_bins)).tolist() var_bins_vec = Utilities.ListToVector(edges, "double") - print(var_bins_vec) list_var_bins_vec.append(var_bins_vec) THModel_Inputs.append(var_bins_vec.size() - 1) THModel_Inputs.append(var_bins_vec.data()) From a195bc4d45b0dc6954f4afafaaf7e5aa320861d1 Mon Sep 17 00:00:00 2001 From: Devin Date: Sun, 18 Jan 2026 12:45:24 +0100 Subject: [PATCH 09/12] Add this var_entry thing and remove import star because I hate it --- Analysis/HistProducerFromNTuple.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 9565d703..27d56940 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -8,7 +8,7 @@ if __name__ == "__main__": sys.path.append(os.environ["ANALYSIS_PATH"]) -from FLAF.Common.HistHelper import * +import FLAF.Common.HistHelper as HistHelper import FLAF.Common.Utilities as Utilities from FLAF.Common.Setup import Setup from FLAF.RunKit.run_tools import ps_call @@ -73,13 +73,14 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): + var_entry = HistHelper.findBinEntry(hist_cfg_dict, args.var) dims = ( 1 - if not hist_cfg_dict[var].get("var_list", False) - else len(hist_cfg_dict[var]["var_list"]) + if not hist_cfg_dict[var_entry].get("var_list", False) + else len(hist_cfg_dict[var_entry]["var_list"]) ) - model, unit_bin_model = GetModel( + model, unit_bin_model = HistHelper.GetModel( hist_cfg_dict, var, dims, return_unit_bin_model=True ) var_bin_list = ( @@ -210,7 +211,7 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts): for filter_key in key_filter_dict.keys(): print(filter_key) for further_cut_name in [None] + list(further_cuts.keys()): - model, unit_bin_model = GetModel( + model, unit_bin_model = HistHelper.GetModel( hist_cfg_dict, var, return_unit_bin_model=True ) nbins = unit_bin_model.fNbinsX From 71939efde842d4fc1e20cfa68d5e80d4ec56e5ba Mon Sep 17 00:00:00 2001 From: Devin Date: Sun, 18 Jan 2026 13:01:16 +0100 Subject: [PATCH 10/12] Formatting and fixed 1 more var_entry --- Analysis/HistMergerFromHists.py | 5 ++--- Analysis/HistProducerFromNTuple.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Analysis/HistMergerFromHists.py b/Analysis/HistMergerFromHists.py index 2a5dc348..acf4a6d8 100644 --- a/Analysis/HistMergerFromHists.py +++ b/Analysis/HistMergerFromHists.py @@ -4,7 +4,6 @@ import time import importlib - if __name__ == "__main__": sys.path.append(os.environ["ANALYSIS_PATH"]) @@ -88,7 +87,7 @@ def GetBTagWeightDict( for dataset_type in all_hists_dict.keys(): all_hists_dict_1D[dataset_type] = {} for key_name, histogram in all_hists_dict[dataset_type].items(): - (key_1, key_2) = key_name + key_1, key_2 = key_name if var not in boosted_variables: ch, reg, cat = key_1 @@ -297,7 +296,7 @@ def GetBTagWeightDict( outFile = ROOT.TFile(args.outFile, "RECREATE") for dataset_type in all_hists_dict.keys(): for key in all_hists_dict[dataset_type].keys(): - (key_dir, (uncName, uncScale)) = key + key_dir, (uncName, uncScale) = key # here there can be some custom requirements - e.g. regions / categories to not merge, datasets to ignore dir_name = "/".join(key_dir) dir_ptr = Utilities.mkdir(outFile, dir_name) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 27d56940..a931e4ba 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -84,7 +84,7 @@ def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): hist_cfg_dict, var, dims, return_unit_bin_model=True ) var_bin_list = ( - [f"{var}_bin" for var in hist_cfg_dict[var]["var_list"]] + [f"{var}_bin" for var in hist_cfg_dict[var_entry]["var_list"]] if dims > 1 else [f"{var}_bin"] ) From 0811e7d0645149d996002b6a88c5b9bdd0279199 Mon Sep 17 00:00:00 2001 From: Devin Date: Sun, 18 Jan 2026 14:50:24 +0100 Subject: [PATCH 11/12] Update flaf env to get latest black --- Analysis/HistTupleProducer.py | 6 ++---- run_tools/mk_flaf_env.sh | 5 +++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py index c2a1ca80..90ea2d76 100644 --- a/Analysis/HistTupleProducer.py +++ b/Analysis/HistTupleProducer.py @@ -37,8 +37,7 @@ def DefineBinnedColumn(hist_cfg_dict, var): start, stop = bin_range.split(":") axis_definition = f"static const TAxis axis({n_bins}, {start}, {stop});" - ROOT.gInterpreter.Declare( - f""" + ROOT.gInterpreter.Declare(f""" #include "ROOT/RVec.hxx" #include "TAxis.h" @@ -56,8 +55,7 @@ def DefineBinnedColumn(hist_cfg_dict, var): }} return out; }} - """ - ) + """) def createHistTuple( diff --git a/run_tools/mk_flaf_env.sh b/run_tools/mk_flaf_env.sh index 47e1e843..93f59b22 100755 --- a/run_tools/mk_flaf_env.sh +++ b/run_tools/mk_flaf_env.sh @@ -33,6 +33,7 @@ install() { run_cmd pip install fastcrc run_cmd pip install bayesian-optimization run_cmd pip install yamllint + run_cmd pip install black==26.1.0 } join_by() { @@ -70,10 +71,10 @@ export LD_LIBRARY_PATH=${ld_lib_path} EOF - link_all $lcg_base/bin $env_base/bin pip pip3 pip3.12 python python3 python3.12 gosam2herwig gosam-config.py gosam.py git java + link_all $lcg_base/bin $env_base/bin pip pip3 pip3.12 python python3 python3.12 gosam2herwig gosam-config.py gosam.py git java black blackd link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/bin $env_base/bin go gofmt link_all $lcg_base/lib $env_base/lib/python3.12/site-packages python3.12 - link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools pathspec graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info + link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools black blackd blib2to3 pathspec graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info link_all $lcg_base/lib64 $env_base/lib/python3.12/site-packages cairo cmake libonnx_proto.a libsvm.so.2 pkgconfig ThePEG libavh_olo.a libff.a libqcdloop.a python3.12 link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib $env_base/lib link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib64 $env_base/lib From e53f55a5b3e8a8d0884273f6d6c36ab020cab6b2 Mon Sep 17 00:00:00 2001 From: Devin Date: Sun, 18 Jan 2026 14:50:45 +0100 Subject: [PATCH 12/12] Remove hardcode black version --- run_tools/mk_flaf_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_tools/mk_flaf_env.sh b/run_tools/mk_flaf_env.sh index 93f59b22..8c2e4ec3 100755 --- a/run_tools/mk_flaf_env.sh +++ b/run_tools/mk_flaf_env.sh @@ -33,7 +33,7 @@ install() { run_cmd pip install fastcrc run_cmd pip install bayesian-optimization run_cmd pip install yamllint - run_cmd pip install black==26.1.0 + run_cmd pip install black } join_by() {