diff --git a/Analysis/HistMergerFromHists.py b/Analysis/HistMergerFromHists.py index 96536c50..acf4a6d8 100644 --- a/Analysis/HistMergerFromHists.py +++ b/Analysis/HistMergerFromHists.py @@ -4,7 +4,6 @@ import time import importlib - if __name__ == "__main__": sys.path.append(os.environ["ANALYSIS_PATH"]) @@ -38,9 +37,10 @@ def checkFile(inFileRoot, channels, qcdRegions, categories): return True -def fill_all_hists_dict( +def fill_hists( items_dict, - all_hist_dict_per_var_and_datasettype, + all_hist_dict, + dataset_type, var_input, unc_source="Central", ): @@ -54,9 +54,13 @@ def fill_all_hists_dict( if var != var_check: continue final_key = (key_tuple, (unc_source, scale)) - if final_key not in all_hist_dict_per_var_and_datasettype: - all_hist_dict_per_var_and_datasettype[final_key] = [] - all_hist_dict_per_var_and_datasettype[final_key].append(var_hist) + if dataset_type not in all_hist_dict.keys(): + all_hist_dict[dataset_type] = {} + if final_key not in all_hist_dict[dataset_type]: + var_hist.SetDirectory(0) + all_hist_dict[dataset_type][final_key] = var_hist + else: + all_hist_dict[dataset_type][final_key].Add(var_hist) def MergeHistogramsPerType(all_hists_dict): @@ -83,7 +87,7 @@ def GetBTagWeightDict( for dataset_type in all_hists_dict.keys(): all_hists_dict_1D[dataset_type] = {} for key_name, histogram in all_hists_dict[dataset_type].items(): - (key_1, key_2) = key_name + key_1, key_2 = key_name if var not in boosted_variables: ch, reg, cat = key_1 @@ -235,23 +239,22 @@ def GetBTagWeightDict( f"input file for dataset {dataset_name} (with path= {inFile_path}) does not exist, skipping" ) continue - with ROOT.TFile.Open(inFile_path, "READ") as inFile: - # check that the file is ok - if inFile.IsZombie(): - raise RuntimeError(f"{inFile_path} is zombie") - if not checkFile(inFile, channels, regions, all_categories): - raise RuntimeError(f"{dataset_name} has void file") base_process_name = dataset_cfg_dict[dataset_name]["process_name"] dataset_type = setup.base_processes[base_process_name]["parent_process"] if dataset_type not in all_hists_dict.keys(): all_hists_dict[dataset_type] = {} - all_items = load_all_items(inFile_path) - fill_all_hists_dict( - all_items, all_hists_dict[dataset_type], args.var, args.uncSource - ) # to add: , unc_source="Central", scale="Central" - MergeHistogramsPerType(all_hists_dict) + with ROOT.TFile.Open(inFile_path, "READ") as inFile: + # check that the file is ok + if inFile.IsZombie(): + raise RuntimeError(f"{inFile_path} is zombie") + if not checkFile(inFile, channels, regions, all_categories): + raise RuntimeError(f"{dataset_name} has void file") + all_items = get_all_items_recursive(inFile) + fill_hists( + all_items, all_hists_dict, dataset_type, args.var, args.uncSource + ) # to add: , unc_source="Central", scale="Central" # here there should be the custom applications - e.g. GetBTagWeightDict, AddQCDInHistDict, etc. # analysis.ApplyMergeCustomisations() # --> here go the QCD and bTag functions @@ -293,7 +296,7 @@ def GetBTagWeightDict( outFile = ROOT.TFile(args.outFile, "RECREATE") for dataset_type in all_hists_dict.keys(): for key in all_hists_dict[dataset_type].keys(): - (key_dir, (uncName, uncScale)) = key + key_dir, (uncName, uncScale) = key # here there can be some custom requirements - e.g. regions / categories to not merge, datasets to ignore dir_name = "/".join(key_dir) dir_ptr = Utilities.mkdir(outFile, dir_name) diff --git a/Analysis/HistPlotter.py b/Analysis/HistPlotter.py index c4e15ef0..b825ef36 100644 --- a/Analysis/HistPlotter.py +++ b/Analysis/HistPlotter.py @@ -23,45 +23,12 @@ def GetHistName(dataset_name, dataset_type, uncName, unc_scale, global_cfg_dict) return histName -def RebinHisto(hist_initial, new_binning, dataset, wantOverflow=True, verbose=False): - new_binning_array = array.array("d", new_binning) - new_hist = hist_initial.Rebin(len(new_binning) - 1, dataset, new_binning_array) - if dataset == "data": - new_hist.SetBinErrorOption(ROOT.TH1.kPoisson) - if wantOverflow: - n_finalbin = new_hist.GetBinContent(new_hist.GetNbinsX()) - n_overflow = new_hist.GetBinContent(new_hist.GetNbinsX() + 1) - new_hist.SetBinContent(new_hist.GetNbinsX(), n_finalbin + n_overflow) - err_finalbin = new_hist.GetBinError(new_hist.GetNbinsX()) - err_overflow = new_hist.GetBinError(new_hist.GetNbinsX() + 1) - new_hist.SetBinError( - new_hist.GetNbinsX(), - math.sqrt(err_finalbin * err_finalbin + err_overflow * err_overflow), - ) - - if verbose: - for nbin in range(0, len(new_binning)): - print( - f"nbin = {nbin}, content = {new_hist.GetBinContent(nbin)}, error {new_hist.GetBinError(nbin)}" - ) - fix_negative_contributions, debug_info, negative_bins_info = ( - FixNegativeContributions(new_hist) - ) - if not fix_negative_contributions: - print("negative contribution not fixed") - print(fix_negative_contributions, debug_info, negative_bins_info) - for nbin in range(0, new_hist.GetNbinsX() + 1): - content = new_hist.GetBinContent(nbin) - if content < 0: - print(f"for {dataset}, bin {nbin} content is < 0: {content}") - - return new_hist - - def findNewBins(hist_cfg_dict, var, **keys): - cfg = hist_cfg_dict.get(var, {}) + if "2d" in cfg: + return cfg["2d"] + if "x_rebin" not in cfg: if "x_bins" not in cfg: raise RuntimeError(f'bins definition not found for "{var}"') @@ -100,21 +67,6 @@ def recursive_search(d, remaining_keys): return result -def getNewBins(bins): - if type(bins) == list: - final_bins = bins - else: - n_bins, bin_range = bins.split("|") - start, stop = bin_range.split(":") - bin_width = (float(stop) - float(start)) / int(n_bins) - final_bins = [] - bin_center = float(start) - while bin_center >= float(start) and bin_center <= float(stop): - final_bins.append(bin_center) - bin_center = bin_center + bin_width - return final_bins - - if __name__ == "__main__": import argparse import FLAF.PlotKit.Plotter as Plotter @@ -265,7 +217,9 @@ def getNewBins(bins): hist_cfg_dict[var_entry]["use_log_x"] = True rebin_condition = args.rebin and "x_rebin" in hist_cfg_dict[var_entry].keys() - bins_to_compute = hist_cfg_dict[var_entry]["x_bins"] + bins_to_compute = ( + hist_cfg_dict[var_entry]["x_bins"] if not rebin_condition else None + ) if rebin_condition: bins_to_compute = findNewBins( diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 959682d8..a931e4ba 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -8,7 +8,7 @@ if __name__ == "__main__": sys.path.append(os.environ["ANALYSIS_PATH"]) -from FLAF.Common.HistHelper import * +import FLAF.Common.HistHelper as HistHelper import FLAF.Common.Utilities as Utilities from FLAF.Common.Setup import Setup from FLAF.RunKit.run_tools import ps_call @@ -36,7 +36,19 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): dir_ptr = Utilities.mkdir(outFile, dir_name) merged_hist = model.GetHistogram().Clone() - for i in range(0, unit_hist.GetNbinsX() + 2): + # Yes I know this is ugly + # Axis needs +2 for under/overflow, but only if the return is not 1!!! + # Was having issue with Z axis in 2D. We don't want to multiply by 3 if it's 2D + N_xbins = unit_hist.GetNbinsX() + 2 + N_ybins = unit_hist.GetNbinsY() if hasattr(unit_hist, "GetNbinsY") else 1 + N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins + N_zbins = unit_hist.GetNbinsZ() if hasattr(unit_hist, "GetNbinsZ") else 1 + N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins + N_bins = N_xbins * N_ybins * N_zbins + # If we use the THnD then we have 'GetNbins' function instead + N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins + # This can be a loop over many bins, several times. Can be improved to be ran in c++ instead + for i in range(0, N_bins): bin_content = unit_hist.GetBinContent(i) bin_error = unit_hist.GetBinError(i) merged_hist.SetBinContent(i, bin_content) @@ -46,7 +58,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): if len(hist_list) > 1: for model, unit_hist in hist_list[1:]: hist = model.GetHistogram() - for i in range(0, unit_hist.GetNbinsX() + 2): + for i in range(0, N_bins): bin_content = unit_hist.GetBinContent(i) bin_error = unit_hist.GetBinError(i) hist.SetBinContent(i, bin_content) @@ -61,10 +73,35 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): - model, unit_bin_model = GetModel(hist_cfg_dict, var, return_unit_bin_model=True) - unit_hist = rdf.Filter(filter_to_apply).Histo1D( - unit_bin_model, f"{var}_bin", weight_name + var_entry = HistHelper.findBinEntry(hist_cfg_dict, args.var) + dims = ( + 1 + if not hist_cfg_dict[var_entry].get("var_list", False) + else len(hist_cfg_dict[var_entry]["var_list"]) ) + + model, unit_bin_model = HistHelper.GetModel( + hist_cfg_dict, var, dims, return_unit_bin_model=True + ) + var_bin_list = ( + [f"{var}_bin" for var in hist_cfg_dict[var_entry]["var_list"]] + if dims > 1 + else [f"{var}_bin"] + ) + if dims == 1: + unit_hist = rdf.Filter(filter_to_apply).Histo1D( + unit_bin_model, *var_bin_list, weight_name + ) + elif dims == 2: + unit_hist = rdf.Filter(filter_to_apply).Histo2D( + unit_bin_model, *var_bin_list, weight_name + ) + elif dims == 3: + unit_hist = rdf.Filter(filter_to_apply).Histo3D( + unit_bin_model, *var_bin_list, weight_name + ) + else: + raise RuntimeError("Only 1D, 2D and 3D histograms are supported") return model, unit_hist @@ -174,7 +211,7 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts): for filter_key in key_filter_dict.keys(): print(filter_key) for further_cut_name in [None] + list(further_cuts.keys()): - model, unit_bin_model = GetModel( + model, unit_bin_model = HistHelper.GetModel( hist_cfg_dict, var, return_unit_bin_model=True ) nbins = unit_bin_model.fNbinsX @@ -276,7 +313,13 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts): ) variables = setup.global_params["variables"] - vars_needed = set(variables) + vars_needed = set() + for var in variables: + if isinstance(var, dict) and "vars" in var: + for v in var["vars"]: + vars_needed.add(v) + else: + vars_needed.add(var) for further_cut_name, (vars_for_cut, _) in further_cuts.items(): for var_for_cut in vars_for_cut: if var_for_cut: diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py index 2c798d24..90ea2d76 100644 --- a/Analysis/HistTupleProducer.py +++ b/Analysis/HistTupleProducer.py @@ -37,8 +37,7 @@ def DefineBinnedColumn(hist_cfg_dict, var): start, stop = bin_range.split(":") axis_definition = f"static const TAxis axis({n_bins}, {start}, {stop});" - ROOT.gInterpreter.Declare( - f""" + ROOT.gInterpreter.Declare(f""" #include "ROOT/RVec.hxx" #include "TAxis.h" @@ -56,8 +55,7 @@ def DefineBinnedColumn(hist_cfg_dict, var): }} return out; }} - """ - ) + """) def createHistTuple( @@ -128,7 +126,15 @@ def createHistTuple( print("Scale uncertainties to consider:", scale_uncertainties) print("Defining binnings for variables") + flatten_vars = set() for var in variables: + if isinstance(var, dict) and "vars" in var: + for v in var["vars"]: + flatten_vars.add(v) + else: + flatten_vars.add(var) + + for var in flatten_vars: DefineBinnedColumn(hist_cfg_dict, var) snaps = [] @@ -197,7 +203,7 @@ def createHistTuple( dfw.colToSave.append(desc["weight"]) print("Defining binned columns") - for var in variables: + for var in flatten_vars: dfw.df = dfw.df.Define(f"{var}_bin", f"get_{var}_bin({var})") dfw.colToSave.append(f"{var}_bin") diff --git a/Analysis/tasks.py b/Analysis/tasks.py index 0a749fcd..ded3ddfd 100644 --- a/Analysis/tasks.py +++ b/Analysis/tasks.py @@ -47,7 +47,16 @@ def workflow_requires(self): } req_dict["AnalysisCacheTask"] = [] var_produced_by = self.setup.var_producer_map - for var_name in self.global_params["variables"]: + + flatten_vars = set() + for var in self.global_params["variables"]: + if isinstance(var, dict) and "vars" in var: + for v in var["vars"]: + flatten_vars.add(v) + else: + flatten_vars.add(var) + + for var_name in flatten_vars: producer_to_run = var_produced_by.get(var_name, None) if producer_to_run is not None: req_dict["AnalysisCacheTask"].append( @@ -149,13 +158,23 @@ def create_branch_map(self): ] datasets_to_consider.append("data") + flatten_vars = set() + for var in self.global_params["variables"]: + if isinstance(var, dict) and "vars" in var: + for v in var["vars"]: + flatten_vars.add(v) + else: + flatten_vars.add(var) + need_cache_list = [ (var_name in var_produced_by, var_produced_by.get(var_name, None)) - for var_name in self.global_params["variables"] + # for var_name in self.global_params["variables"] + for var_name in flatten_vars ] producer_list = [] need_cache_global = any(item[0] for item in need_cache_list) - for var_name in self.global_params["variables"]: + # for var_name in self.global_params["variables"]: + for var_name in flatten_vars: need_cache = True if var_name in var_produced_by else False producer_to_run = ( var_produced_by[var_name] if var_name in var_produced_by else None @@ -353,6 +372,8 @@ def create_branch_map(self): @workflow_condition.output def output(self): var, prod_br, dataset_name = self.branch_data + if type(var) == dict: + var = var["name"] output_path = os.path.join( "hists", self.version, self.period, var, f"{dataset_name}.root" ) @@ -384,6 +405,7 @@ def run(self): stack.enter_context((inp).localize("r")).path for inp in self.input()[0] ] + var = var if type(var) != dict else var["name"] tmpFile = os.path.join(job_home, f"HistFromNtuple_{var}.root") HistFromNtupleProducer_cmd = [ @@ -504,6 +526,9 @@ def create_branch_map(self): prod_br_list, current_dataset, ) in HistFromNtupleProducerTask_branch_map.items(): + var_name = ( + var_name.get("name", var_name) if type(var_name) == dict else var_name + ) if var_name not in all_datasets.keys(): all_datasets[var_name] = [] all_datasets[var_name].append((br_idx, current_dataset)) @@ -936,10 +961,17 @@ def workflow_requires(self): merge_map = HistMergerTask.req( self, branch=-1, branches=(), customisations=self.customisations ).create_branch_map() + + branch_set = set() + for br_idx, (var) in self.branch_map.items(): + for br, (v, _, _) in merge_map.items(): + if v == var: + branch_set.add(br) + return { "merge": HistMergerTask.req( self, - branches=tuple(merge_map.keys()), + branches=tuple(branch_set), customisations=self.customisations, ) } diff --git a/Common/HistHelper.py b/Common/HistHelper.py index d772b81d..11ebbdc1 100644 --- a/Common/HistHelper.py +++ b/Common/HistHelper.py @@ -11,6 +11,9 @@ import FLAF.Common.Utilities as Utilities +ROOT.gROOT.ProcessLine(f".include {os.environ['ANALYSIS_PATH']}") +ROOT.gROOT.ProcessLine(f'#include "FLAF/include/HistHelper.h"') + def findBinEntry(hist_cfg_dict, var_name): """ @@ -42,7 +45,7 @@ def get_all_items_recursive(root_dir, path=()): if obj.InheritsFrom("TDirectory"): items_dict.update(get_all_items_recursive(obj, path + (key.GetName(),))) elif obj.InheritsFrom("TH1"): - obj.SetDirectory(0) + # obj.SetDirectory(0) local_items[key.GetName()] = obj if local_items: @@ -169,6 +172,9 @@ def getNewBins(bins): if isinstance(bins, list): return bins + if isinstance(bins, dict): + return bins + n_bins, bin_range = bins.split("|") start, stop = map(float, bin_range.split(":")) step = (stop - start) / int(n_bins) @@ -177,8 +183,53 @@ def getNewBins(bins): def RebinHisto(hist_initial, new_binning, sample, wantOverflow=True, verbose=False): - new_binning_array = array.array("d", new_binning) - new_hist = hist_initial.Rebin(len(new_binning) - 1, sample, new_binning_array) + if isinstance(new_binning, dict): + N_xbins = hist_initial.GetNbinsX() + 2 + N_ybins = hist_initial.GetNbinsY() if hasattr(hist_initial, "GetNbinsY") else 1 + N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins + N_zbins = hist_initial.GetNbinsZ() if hasattr(hist_initial, "GetNbinsZ") else 1 + N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins + N_bins = N_xbins * N_ybins * N_zbins + # If we use the THnD then we have 'GetNbins' function instead + N_bins = ( + hist_initial.GetNbins() if hasattr(hist_initial, "GetNbins") else N_bins + ) + + # Prepare data structures for C++ function + y_bin_ranges = ROOT.std.vector("std::pair")() + output_bin_edges_vec = ROOT.std.vector("std::vector")() + + for combined_bin in new_binning["combined_bins"]: + # Parse y_bin range + y_min, y_max = combined_bin["y_bin"] + y_bin_ranges.push_back(ROOT.std.pair("float", "float")(y_min, y_max)) + + # Parse x_bins spec (can be string "nbins|min:max" or list of bin edges) + out_spec = combined_bin["x_bins"] + out_edges = ROOT.std.vector("float")() + if isinstance(out_spec, list): + for edge in out_spec: + out_edges.push_back(float(edge)) + else: + n_out_bins, out_range = out_spec.split("|") + out_min, out_max = map(float, out_range.split(":")) + n_out_bins = int(n_out_bins) + # Create uniform bins + step = (out_max - out_min) / n_out_bins + for i in range(n_out_bins + 1): + out_edges.push_back(out_min + i * step) + output_bin_edges_vec.push_back(out_edges) + + # Create ROOT vectors + # Call the C++ function which returns a new histogram + new_hist = ROOT.analysis.rebinHistogramDict( + hist_initial, N_bins, y_bin_ranges, output_bin_edges_vec + ) + new_hist.SetName(sample) + + else: + new_binning_array = array.array("d", new_binning) + new_hist = hist_initial.Rebin(len(new_binning) - 1, sample, new_binning_array) if sample == "data": new_hist.SetBinErrorOption(ROOT.TH1.kPoisson) @@ -223,24 +274,96 @@ def GetBinVec(hist_cfg, var): n_bins, bin_range = x_bins.split("|") start, stop = bin_range.split(":") edges = np.linspace(float(start), float(stop), int(n_bins)).tolist() - # print(len(edges)) x_bins_vec = Utilities.ListToVector(edges, "float") return x_bins_vec -def GetModel(hist_cfg, var, return_unit_bin_model=False): +def GetModel(hist_cfg, var, dims, return_unit_bin_model=False): + THModel_Inputs = [] + unit_bin_Inputs = [] var_entry = findBinEntry(hist_cfg, var) - x_bins = hist_cfg[var_entry]["x_bins"] - if type(hist_cfg[var_entry]["x_bins"]) == list: - x_bins_vec = Utilities.ListToVector(x_bins, "double") - model = ROOT.RDF.TH1DModel("", "", x_bins_vec.size() - 1, x_bins_vec.data()) + if dims == 1: + x_bins = hist_cfg[var_entry]["x_bins"] + if type(hist_cfg[var_entry]["x_bins"]) == list: + x_bins_vec = Utilities.ListToVector(x_bins, "double") + else: + n_bins, bin_range = x_bins.split("|") + start, stop = bin_range.split(":") + edges = np.linspace(float(start), float(stop), int(n_bins) + 1).tolist() + x_bins_vec = Utilities.ListToVector(edges, "double") + THModel_Inputs.append(x_bins_vec.size() - 1) + THModel_Inputs.append(x_bins_vec.data()) + model = ROOT.RDF.TH1DModel("", "", *THModel_Inputs) + if not return_unit_bin_model: + return model + unit_bin_Inputs = [model.fNbinsX, -0.5, model.fNbinsX - 0.5] + unit_bin_model = ROOT.RDF.TH1DModel("", "", *unit_bin_Inputs) + + elif dims == 2: + list_var_bins_vec = [] + for var_2d in hist_cfg[var_entry]["var_list"]: + var_bin_name = f"{var_2d}_bins" + var_bins = ( + hist_cfg[var_entry][var_bin_name] + if var_bin_name in hist_cfg[var_entry] + else hist_cfg[var_2d]["x_bins"] + ) + if type(var_bins) == list: + var_bins_vec = Utilities.ListToVector(var_bins, "double") + else: + n_bins, bin_range = var_bins.split("|") + start, stop = bin_range.split(":") + edges = np.linspace(float(start), float(stop), int(n_bins) + 1).tolist() + var_bins_vec = Utilities.ListToVector(edges, "double") + list_var_bins_vec.append(var_bins_vec) + THModel_Inputs.append(var_bins_vec.size() - 1) + THModel_Inputs.append(var_bins_vec.data()) + model = ROOT.RDF.TH2DModel("", "", *THModel_Inputs) + if not return_unit_bin_model: + return model + unit_bin_Inputs = [ + model.fNbinsX, + -0.5, + model.fNbinsX - 0.5, + model.fNbinsY, + -0.5, + model.fNbinsY - 0.5, + ] + unit_bin_model = ROOT.RDF.TH2DModel("", "", *unit_bin_Inputs) + + elif dims == 3: + list_var_bins_vec = [] + for var_3d in hist_cfg[var_entry]["var_list"]: + var_bin_name = f"{var_3d}_bins" + var_bins = hist_cfg[var_entry][var_bin_name] + if type(var_bins) == list: + var_bins_vec = Utilities.ListToVector(var_bins, "double") + else: + n_bins, bin_range = var_bins.split("|") + start, stop = bin_range.split(":") + edges = np.linspace(float(start), float(stop), int(n_bins)).tolist() + var_bins_vec = Utilities.ListToVector(edges, "double") + list_var_bins_vec.append(var_bins_vec) + THModel_Inputs.append(var_bins_vec.size() - 1) + THModel_Inputs.append(var_bins_vec.data()) + model = ROOT.RDF.TH3DModel("", "", *THModel_Inputs) + if not return_unit_bin_model: + return model + unit_bin_Inputs = [ + model.fNbinsX, + -0.5, + model.fNbinsX - 0.5, + model.fNbinsY, + -0.5, + model.fNbinsY - 0.5, + model.fNbinsZ, + -0.5, + model.fNbinsZ - 0.5, + ] + unit_bin_model = ROOT.RDF.TH3DModel("", "", *unit_bin_Inputs) + else: - n_bins, bin_range = x_bins.split("|") - start, stop = bin_range.split(":") - model = ROOT.RDF.TH1DModel("", "", int(n_bins), float(start), float(stop)) - if not return_unit_bin_model: - return model - unit_bin_model = ROOT.RDF.TH1DModel( - "", "", model.fNbinsX, -0.5, model.fNbinsX - 0.5 - ) + print("nD histogram not implemented yet") + # model = ROOT.RDF.THnDModel("", "", ) + return model, unit_bin_model diff --git a/include/HistHelper.h b/include/HistHelper.h index 020f2c5b..e55f9559 100644 --- a/include/HistHelper.h +++ b/include/HistHelper.h @@ -11,6 +11,10 @@ #include "EntryQueue.h" +#include +#include +#include + /* namespace kin_fit { struct FitResults { @@ -159,5 +163,78 @@ namespace analysis { return df_node; } }; - + + TH1D* rebinHistogramDict(TH1* hist_initial, int N_bins, + const std::vector>& y_bin_ranges, + const std::vector>& output_bin_edges) { + // Flatten output bin edges into a single sorted array + std::vector all_output_edges; + float last_edge = 0.0; + for (const auto& edges : output_bin_edges) { + for (float edge : edges) { + all_output_edges.push_back(edge + last_edge); + } + last_edge = all_output_edges.back(); + } + // Sort and remove duplicates + std::sort(all_output_edges.begin(), all_output_edges.end()); + all_output_edges.erase(std::unique(all_output_edges.begin(), all_output_edges.end()), all_output_edges.end()); + + // Create output histogram with variable binning + TH1D* hist_output = new TH1D("rebinned", "rebinned", all_output_edges.size() - 1, all_output_edges.data()); + hist_output->Sumw2(); + + // Helper function to find bin index from value and edges + auto findBinIndex = [](float value, const std::vector& edges) -> int { + if (edges.size() < 2) return -1; + for (size_t i = 0; i < edges.size() - 1; ++i) { + if (value >= edges[i] && value < edges[i + 1]) { + return i; + } + } + return -1; + }; + + // Iterate through all bins in the original histogram + for (int i = 0; i < N_bins; ++i) { + int binX, binY, binZ; + hist_initial->GetBinXYZ(i, binX, binY, binZ); + + // Get bin centers (actual values) + float x_value = hist_initial->GetXaxis()->GetBinCenter(binX); + float y_value = hist_initial->GetYaxis()->GetBinCenter(binY); + float z_value = hist_initial->GetZaxis()->GetBinCenter(binZ); + + // Get bin content and error + double bin_content = hist_initial->GetBinContent(i); + double bin_error = hist_initial->GetBinError(i); + double bin_error2 = bin_error * bin_error; + + // Find which y_bin range this y_value falls into + int y_bin_idx = -1; + for (size_t j = 0; j < y_bin_ranges.size(); ++j) { + if (y_value >= y_bin_ranges[j].first && y_value < y_bin_ranges[j].second) { + y_bin_idx = j; + break; + } + } + if (y_bin_idx == -1) continue; // Skip if y_value doesn't fall in any range + // Find output bin index within the output_bin_edges for this y_bin + int local_out_bin = findBinIndex(x_value, output_bin_edges[y_bin_idx]); + if (local_out_bin == -1) continue; // Skip if x_value doesn't fall in any output bin + // Calculate section offset by counting bins in all previous y_bin sections + int section_offset = 0; + for (int prev_y = 0; prev_y < y_bin_idx; ++prev_y) { + section_offset += output_bin_edges[prev_y].size() - 1; // size - 1 = number of bins + } + // Calculate global bin index: offset + local bin position within this section + int global_bin = section_offset + local_out_bin + 1; // +1 for ROOT's 1-indexed bins + // Set bin content and error + if (global_bin >= 1 && global_bin <= (int)all_output_edges.size() - 1) { + hist_output->SetBinContent(global_bin, hist_output->GetBinContent(global_bin) + bin_content); + hist_output->SetBinError(global_bin, std::sqrt(std::pow(hist_output->GetBinError(global_bin), 2) + bin_error2)); + } + } + return hist_output; + } } // namespace analysis diff --git a/run_tools/mk_flaf_env.sh b/run_tools/mk_flaf_env.sh index 47e1e843..8c2e4ec3 100755 --- a/run_tools/mk_flaf_env.sh +++ b/run_tools/mk_flaf_env.sh @@ -33,6 +33,7 @@ install() { run_cmd pip install fastcrc run_cmd pip install bayesian-optimization run_cmd pip install yamllint + run_cmd pip install black } join_by() { @@ -70,10 +71,10 @@ export LD_LIBRARY_PATH=${ld_lib_path} EOF - link_all $lcg_base/bin $env_base/bin pip pip3 pip3.12 python python3 python3.12 gosam2herwig gosam-config.py gosam.py git java + link_all $lcg_base/bin $env_base/bin pip pip3 pip3.12 python python3 python3.12 gosam2herwig gosam-config.py gosam.py git java black blackd link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/bin $env_base/bin go gofmt link_all $lcg_base/lib $env_base/lib/python3.12/site-packages python3.12 - link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools pathspec graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info + link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools black blackd blib2to3 pathspec graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info link_all $lcg_base/lib64 $env_base/lib/python3.12/site-packages cairo cmake libonnx_proto.a libsvm.so.2 pkgconfig ThePEG libavh_olo.a libff.a libqcdloop.a python3.12 link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib $env_base/lib link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib64 $env_base/lib