cms-flaf · aebid · Jan 15, 2026 · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026
diff --git a/Analysis/HistMergerFromHists.py b/Analysis/HistMergerFromHists.py
@@ -4,7 +4,6 @@
 import time
 import importlib
 
-
 if __name__ == "__main__":
     sys.path.append(os.environ["ANALYSIS_PATH"])
 
@@ -38,9 +37,10 @@ def checkFile(inFileRoot, channels, qcdRegions, categories):
     return True
 
 
-def fill_all_hists_dict(
+def fill_hists(
     items_dict,
-    all_hist_dict_per_var_and_datasettype,
+    all_hist_dict,
+    dataset_type,
     var_input,
     unc_source="Central",
 ):
@@ -54,9 +54,13 @@ def fill_all_hists_dict(
                 if var != var_check:
                     continue
                 final_key = (key_tuple, (unc_source, scale))
-                if final_key not in all_hist_dict_per_var_and_datasettype:
-                    all_hist_dict_per_var_and_datasettype[final_key] = []
-                all_hist_dict_per_var_and_datasettype[final_key].append(var_hist)
+                if dataset_type not in all_hist_dict.keys():
+                    all_hist_dict[dataset_type] = {}
+                if final_key not in all_hist_dict[dataset_type]:
+                    var_hist.SetDirectory(0)
+                    all_hist_dict[dataset_type][final_key] = var_hist
+                else:
+                    all_hist_dict[dataset_type][final_key].Add(var_hist)
 
 
 def MergeHistogramsPerType(all_hists_dict):
@@ -83,7 +87,7 @@ def GetBTagWeightDict(
     for dataset_type in all_hists_dict.keys():
         all_hists_dict_1D[dataset_type] = {}
         for key_name, histogram in all_hists_dict[dataset_type].items():
-            (key_1, key_2) = key_name
+            key_1, key_2 = key_name
 
             if var not in boosted_variables:
                 ch, reg, cat = key_1
@@ -235,23 +239,22 @@ def GetBTagWeightDict(
                 f"input file for dataset {dataset_name} (with path= {inFile_path}) does not exist, skipping"
             )
             continue
-        with ROOT.TFile.Open(inFile_path, "READ") as inFile:
-            # check that the file is ok
-            if inFile.IsZombie():
-                raise RuntimeError(f"{inFile_path} is zombie")
-            if not checkFile(inFile, channels, regions, all_categories):
-                raise RuntimeError(f"{dataset_name} has void file")
 
         base_process_name = dataset_cfg_dict[dataset_name]["process_name"]
         dataset_type = setup.base_processes[base_process_name]["parent_process"]
         if dataset_type not in all_hists_dict.keys():
             all_hists_dict[dataset_type] = {}
 
-        all_items = load_all_items(inFile_path)
-        fill_all_hists_dict(
-            all_items, all_hists_dict[dataset_type], args.var, args.uncSource
-        )  # to add: , unc_source="Central", scale="Central"
-    MergeHistogramsPerType(all_hists_dict)
+        with ROOT.TFile.Open(inFile_path, "READ") as inFile:
+            # check that the file is ok
+            if inFile.IsZombie():
+                raise RuntimeError(f"{inFile_path} is zombie")
+            if not checkFile(inFile, channels, regions, all_categories):
+                raise RuntimeError(f"{dataset_name} has void file")
+            all_items = get_all_items_recursive(inFile)
+            fill_hists(
+                all_items, all_hists_dict, dataset_type, args.var, args.uncSource
+            )  # to add: , unc_source="Central", scale="Central"
 
     # here there should be the custom applications - e.g. GetBTagWeightDict, AddQCDInHistDict, etc.
     # analysis.ApplyMergeCustomisations() # --> here go the QCD and bTag functions
@@ -293,7 +296,7 @@ def GetBTagWeightDict(
     outFile = ROOT.TFile(args.outFile, "RECREATE")
     for dataset_type in all_hists_dict.keys():
         for key in all_hists_dict[dataset_type].keys():
-            (key_dir, (uncName, uncScale)) = key
+            key_dir, (uncName, uncScale) = key
             # here there can be some custom requirements - e.g. regions / categories to not merge, datasets to ignore
             dir_name = "/".join(key_dir)
             dir_ptr = Utilities.mkdir(outFile, dir_name)

diff --git a/Analysis/HistPlotter.py b/Analysis/HistPlotter.py
@@ -23,45 +23,12 @@ def GetHistName(dataset_name, dataset_type, uncName, unc_scale, global_cfg_dict)
     return histName
 
 
-def RebinHisto(hist_initial, new_binning, dataset, wantOverflow=True, verbose=False):
-    new_binning_array = array.array("d", new_binning)
-    new_hist = hist_initial.Rebin(len(new_binning) - 1, dataset, new_binning_array)
-    if dataset == "data":
-        new_hist.SetBinErrorOption(ROOT.TH1.kPoisson)
-    if wantOverflow:
-        n_finalbin = new_hist.GetBinContent(new_hist.GetNbinsX())
-        n_overflow = new_hist.GetBinContent(new_hist.GetNbinsX() + 1)
-        new_hist.SetBinContent(new_hist.GetNbinsX(), n_finalbin + n_overflow)
-        err_finalbin = new_hist.GetBinError(new_hist.GetNbinsX())
-        err_overflow = new_hist.GetBinError(new_hist.GetNbinsX() + 1)
-        new_hist.SetBinError(
-            new_hist.GetNbinsX(),
-            math.sqrt(err_finalbin * err_finalbin + err_overflow * err_overflow),
-        )
-
-    if verbose:
-        for nbin in range(0, len(new_binning)):
-            print(
-                f"nbin = {nbin}, content = {new_hist.GetBinContent(nbin)}, error {new_hist.GetBinError(nbin)}"
-            )
-    fix_negative_contributions, debug_info, negative_bins_info = (
-        FixNegativeContributions(new_hist)
-    )
-    if not fix_negative_contributions:
-        print("negative contribution not fixed")
-        print(fix_negative_contributions, debug_info, negative_bins_info)
-        for nbin in range(0, new_hist.GetNbinsX() + 1):
-            content = new_hist.GetBinContent(nbin)
-            if content < 0:
-                print(f"for {dataset}, bin {nbin} content is < 0:  {content}")
-
-    return new_hist
-
-
 def findNewBins(hist_cfg_dict, var, **keys):
-
     cfg = hist_cfg_dict.get(var, {})
 
+    if "2d" in cfg:
+        return cfg["2d"]
+
     if "x_rebin" not in cfg:
         if "x_bins" not in cfg:
             raise RuntimeError(f'bins definition not found for "{var}"')
@@ -100,21 +67,6 @@ def recursive_search(d, remaining_keys):
     return result
 
 
-def getNewBins(bins):
-    if type(bins) == list:
-        final_bins = bins
-    else:
-        n_bins, bin_range = bins.split("|")
-        start, stop = bin_range.split(":")
-        bin_width = (float(stop) - float(start)) / int(n_bins)
-        final_bins = []
-        bin_center = float(start)
-        while bin_center >= float(start) and bin_center <= float(stop):
-            final_bins.append(bin_center)
-            bin_center = bin_center + bin_width
-    return final_bins
-
-
 if __name__ == "__main__":
     import argparse
     import FLAF.PlotKit.Plotter as Plotter
@@ -265,7 +217,9 @@ def getNewBins(bins):
             hist_cfg_dict[var_entry]["use_log_x"] = True
 
         rebin_condition = args.rebin and "x_rebin" in hist_cfg_dict[var_entry].keys()
-        bins_to_compute = hist_cfg_dict[var_entry]["x_bins"]
+        bins_to_compute = (
+            hist_cfg_dict[var_entry]["x_bins"] if not rebin_condition else None
+        )
 
         if rebin_condition:
             bins_to_compute = findNewBins(

diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py
@@ -8,7 +8,7 @@
 if __name__ == "__main__":
     sys.path.append(os.environ["ANALYSIS_PATH"])
 
-from FLAF.Common.HistHelper import *
+import FLAF.Common.HistHelper as HistHelper
 import FLAF.Common.Utilities as Utilities
 from FLAF.Common.Setup import Setup
 from FLAF.RunKit.run_tools import ps_call
@@ -36,7 +36,19 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
     dir_ptr = Utilities.mkdir(outFile, dir_name)
 
     merged_hist = model.GetHistogram().Clone()
-    for i in range(0, unit_hist.GetNbinsX() + 2):
+    # Yes I know this is ugly
+    # Axis needs +2 for under/overflow, but only if the return is not 1!!!
+    # Was having issue with Z axis in 2D. We don't want to multiply by 3 if it's 2D
+    N_xbins = unit_hist.GetNbinsX() + 2
+    N_ybins = unit_hist.GetNbinsY() if hasattr(unit_hist, "GetNbinsY") else 1
+    N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins
+    N_zbins = unit_hist.GetNbinsZ() if hasattr(unit_hist, "GetNbinsZ") else 1
+    N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins
+    N_bins = N_xbins * N_ybins * N_zbins
+    # If we use the THnD then we have 'GetNbins' function instead
+    N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins
-    # Yes I know this is ugly
-    # Axis needs +2 for under/overflow, but only if the return is not 1!!!
-    # Was having issue with Z axis in 2D. We don't want to multiply by 3 if it's 2D
-    N_xbins = unit_hist.GetNbinsX() + 2
-    N_ybins = unit_hist.GetNbinsY() if hasattr(unit_hist, "GetNbinsY") else 1
-    N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins
-    N_zbins = unit_hist.GetNbinsZ() if hasattr(unit_hist, "GetNbinsZ") else 1
-    N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins
-    N_bins = N_xbins * N_ybins * N_zbins
-    # If we use the THnD then we have 'GetNbins' function instead
-    N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins
+    N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else unit_hist.GetNcells()
-    # Yes I know this is ugly
-    # Axis needs +2 for under/overflow, but only if the return is not 1!!!
-    # Was having issue with Z axis in 2D. We don't want to multiply by 3 if it's 2D
-    N_xbins = unit_hist.GetNbinsX() + 2
-    N_ybins = unit_hist.GetNbinsY() if hasattr(unit_hist, "GetNbinsY") else 1
-    N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins
-    N_zbins = unit_hist.GetNbinsZ() if hasattr(unit_hist, "GetNbinsZ") else 1
-    N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins
-    N_bins = N_xbins * N_ybins * N_zbins
-    # If we use the THnD then we have 'GetNbins' function instead
-    N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins
+    N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else unit_hist.GetNcells()
+    # This can be a loop over many bins, several times. Can be improved to be ran in c++ instead
+    for i in range(0, N_bins):
         bin_content = unit_hist.GetBinContent(i)
         bin_error = unit_hist.GetBinError(i)
         merged_hist.SetBinContent(i, bin_content)
@@ -46,7 +58,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
     if len(hist_list) > 1:
         for model, unit_hist in hist_list[1:]:
             hist = model.GetHistogram()
-            for i in range(0, unit_hist.GetNbinsX() + 2):
+            for i in range(0, N_bins):
                 bin_content = unit_hist.GetBinContent(i)
                 bin_error = unit_hist.GetBinError(i)
                 hist.SetBinContent(i, bin_content)
@@ -61,10 +73,35 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
 
 
 def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale):
-    model, unit_bin_model = GetModel(hist_cfg_dict, var, return_unit_bin_model=True)
-    unit_hist = rdf.Filter(filter_to_apply).Histo1D(
-        unit_bin_model, f"{var}_bin", weight_name
+    var_entry = HistHelper.findBinEntry(hist_cfg_dict, args.var)
+    dims = (
+        1
+        if not hist_cfg_dict[var_entry].get("var_list", False)
+        else len(hist_cfg_dict[var_entry]["var_list"])
     )
+
+    model, unit_bin_model = HistHelper.GetModel(
+        hist_cfg_dict, var, dims, return_unit_bin_model=True
+    )
+    var_bin_list = (
+        [f"{var}_bin" for var in hist_cfg_dict[var_entry]["var_list"]]
+        if dims > 1
+        else [f"{var}_bin"]
+    )
+    if dims == 1:
+        unit_hist = rdf.Filter(filter_to_apply).Histo1D(
+            unit_bin_model, *var_bin_list, weight_name
+        )
+    elif dims == 2:
+        unit_hist = rdf.Filter(filter_to_apply).Histo2D(
+            unit_bin_model, *var_bin_list, weight_name
+        )
+    elif dims == 3:
+        unit_hist = rdf.Filter(filter_to_apply).Histo3D(
+            unit_bin_model, *var_bin_list, weight_name
+        )
-    if dims == 1:
-        unit_hist = rdf.Filter(filter_to_apply).Histo1D(
-            unit_bin_model, *var_bin_list, weight_name
-        )
-    elif dims == 2:
-        unit_hist = rdf.Filter(filter_to_apply).Histo2D(
-            unit_bin_model, *var_bin_list, weight_name
-        )
-    elif dims == 3:
-        unit_hist = rdf.Filter(filter_to_apply).Histo3D(
-            unit_bin_model, *var_bin_list, weight_name
-        )
+    rdf_filtered = rdf.Filter(filter_to_apply)
+    if dims >= 1 and dims <= 3:
+        mkhist_fn = getattr(rdf_filtered, f'Histo{dims}D')
+        unit_hist = mkhist_fn(unit_bin_model, *var_bin_list, weight_name)
+
-    if dims == 1:
-        unit_hist = rdf.Filter(filter_to_apply).Histo1D(
-            unit_bin_model, *var_bin_list, weight_name
-        )
-    elif dims == 2:
-        unit_hist = rdf.Filter(filter_to_apply).Histo2D(
-            unit_bin_model, *var_bin_list, weight_name
-        )
-    elif dims == 3:
-        unit_hist = rdf.Filter(filter_to_apply).Histo3D(
-            unit_bin_model, *var_bin_list, weight_name
-        )
+    rdf_filtered = rdf.Filter(filter_to_apply)
+    if dims >= 1 and dims <= 3:
+        mkhist_fn = getattr(rdf_filtered, f'Histo{dims}D')
+        unit_hist = mkhist_fn(unit_bin_model, *var_bin_list, weight_name)
+
+    else:
+        raise RuntimeError("Only 1D, 2D and 3D histograms are supported")
     return model, unit_hist
 
 
@@ -174,7 +211,7 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts):
     for filter_key in key_filter_dict.keys():
         print(filter_key)
         for further_cut_name in [None] + list(further_cuts.keys()):
-            model, unit_bin_model = GetModel(
+            model, unit_bin_model = HistHelper.GetModel(
                 hist_cfg_dict, var, return_unit_bin_model=True
             )
             nbins = unit_bin_model.fNbinsX
@@ -276,7 +313,13 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts):
     )
 
     variables = setup.global_params["variables"]
-    vars_needed = set(variables)
+    vars_needed = set()
+    for var in variables:
+        if isinstance(var, dict) and "vars" in var:
+            for v in var["vars"]:
+                vars_needed.add(v)
+        else:
+            vars_needed.add(var)
     for further_cut_name, (vars_for_cut, _) in further_cuts.items():
         for var_for_cut in vars_for_cut:
             if var_for_cut:

diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py
@@ -37,8 +37,7 @@ def DefineBinnedColumn(hist_cfg_dict, var):
         start, stop = bin_range.split(":")
         axis_definition = f"static const TAxis axis({n_bins}, {start}, {stop});"
 
-    ROOT.gInterpreter.Declare(
-        f"""
+    ROOT.gInterpreter.Declare(f"""
         #include "ROOT/RVec.hxx"
         #include "TAxis.h"
 
@@ -56,8 +55,7 @@ def DefineBinnedColumn(hist_cfg_dict, var):
             }}
             return out;
         }}
-        """
-    )
+        """)
 
 
 def createHistTuple(
@@ -128,7 +126,15 @@ def createHistTuple(
     print("Scale uncertainties to consider:", scale_uncertainties)
 
     print("Defining binnings for variables")
+    flatten_vars = set()
     for var in variables:
+        if isinstance(var, dict) and "vars" in var:
+            for v in var["vars"]:
+                flatten_vars.add(v)
+        else:
+            flatten_vars.add(var)
+
+    for var in flatten_vars:
         DefineBinnedColumn(hist_cfg_dict, var)
 
     snaps = []
@@ -197,7 +203,7 @@ def createHistTuple(
                 dfw.colToSave.append(desc["weight"])
 
             print("Defining binned columns")
-            for var in variables:
+            for var in flatten_vars:
                 dfw.df = dfw.df.Define(f"{var}_bin", f"get_{var}_bin({var})")
                 dfw.colToSave.append(f"{var}_bin")