From 3471e6cc711b17209dd716fe5e1186c3078b886c Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Thu, 15 Jan 2026 16:33:20 +0100
Subject: [PATCH 01/12] Update to handle multiDim histograms

---
 AnaProd/MergeNtuples.py            |   4 +-
 Analysis/HistMergerFromHists.py    |  36 +++--
 Analysis/HistPlotter.py            |  56 +------
 Analysis/HistProducerFromNTuple.py |  48 +++++-
 Analysis/HistTupleProducer.py      |  12 +-
 Analysis/tasks.py                  |  26 +++-
 Common/HistHelper.py               | 232 ++++++++++++++++++-----------
 include/HistHelper.h               |  79 +++++++++-
 8 files changed, 325 insertions(+), 168 deletions(-)

diff --git a/AnaProd/MergeNtuples.py b/AnaProd/MergeNtuples.py
index 89a87e69..87c16fbd 100644
--- a/AnaProd/MergeNtuples.py
+++ b/AnaProd/MergeNtuples.py
@@ -78,9 +78,9 @@ def merge_ntuples(df):
     snapshotOptions.fLazy = False
     snapshotOptions.fMode = "UPDATE"
     snapshotOptions.fCompressionAlgorithm = getattr(
-        ROOT.ROOT.RCompressionSetting.EAlgorithm, "kZLIB"
+        ROOT.ROOT.RCompressionSetting.EAlgorithm, "kLZMA"
     )
-    snapshotOptions.fCompressionLevel = 4
+    snapshotOptions.fCompressionLevel = 9
     inputFiles = [
         (fileName, ROOT.TFile(fileName, "READ")) for fileName in args.inputFile
     ]
diff --git a/Analysis/HistMergerFromHists.py b/Analysis/HistMergerFromHists.py
index 50202152..416b8aad 100644
--- a/Analysis/HistMergerFromHists.py
+++ b/Analysis/HistMergerFromHists.py
@@ -39,9 +39,10 @@ def checkFile(inFileRoot, channels, qcdRegions, categories):
     return True
 
 
-def fill_all_hists_dict(
+def fill_hists(
     items_dict,
-    all_hist_dict_per_var_and_datasettype,
+    all_hist_dict,
+    dataset_type,
     var_input,
     unc_source="Central",
 ):
@@ -55,9 +56,13 @@ def fill_all_hists_dict(
                 if var != var_check:
                     continue
                 final_key = (key_tuple, (unc_source, scale))
-                if final_key not in all_hist_dict_per_var_and_datasettype:
-                    all_hist_dict_per_var_and_datasettype[final_key] = []
-                all_hist_dict_per_var_and_datasettype[final_key].append(var_hist)
+                if dataset_type not in all_hist_dict.keys():
+                    all_hist_dict[dataset_type] = {}
+                if final_key not in all_hist_dict[dataset_type]:
+                    var_hist.SetDirectory(0)
+                    all_hist_dict[dataset_type][final_key] = var_hist
+                else:
+                    all_hist_dict[dataset_type][final_key].Add(var_hist)
 
 
 def MergeHistogramsPerType(all_hists_dict):
@@ -236,23 +241,22 @@ def GetBTagWeightDict(
                 f"input file for dataset {dataset_name} (with path= {inFile_path}) does not exist, skipping"
             )
             continue
-        with ROOT.TFile.Open(inFile_path, "READ") as inFile:
-            # check that the file is ok
-            if inFile.IsZombie():
-                raise RuntimeError(f"{inFile_path} is zombie")
-            if not checkFile(inFile, channels, regions, all_categories):
-                raise RuntimeError(f"{dataset_name} has void file")
 
         base_process_name = dataset_cfg_dict[dataset_name]["process_name"]
         dataset_type = setup.base_processes[base_process_name]["parent_process"]
         if dataset_type not in all_hists_dict.keys():
             all_hists_dict[dataset_type] = {}
 
-        all_items = load_all_items(inFile_path)
-        fill_all_hists_dict(
-            all_items, all_hists_dict[dataset_type], args.var, args.uncSource
-        )  # to add: , unc_source="Central", scale="Central"
-    MergeHistogramsPerType(all_hists_dict)
+        with ROOT.TFile.Open(inFile_path, "READ") as inFile:
+            # check that the file is ok
+            if inFile.IsZombie():
+                raise RuntimeError(f"{inFile_path} is zombie")
+            if not checkFile(inFile, channels, regions, all_categories):
+                raise RuntimeError(f"{dataset_name} has void file")
+            all_items = get_all_items_recursive(inFile)
+            fill_hists(
+                all_items, all_hists_dict, dataset_type, args.var, args.uncSource
+            ) # to add: , unc_source="Central", scale="Central"
 
     # here there should be the custom applications - e.g. GetBTagWeightDict, AddQCDInHistDict, etc.
     # analysis.ApplyMergeCustomisations() # --> here go the QCD and bTag functions
diff --git a/Analysis/HistPlotter.py b/Analysis/HistPlotter.py
index b4f14e47..f3addf31 100644
--- a/Analysis/HistPlotter.py
+++ b/Analysis/HistPlotter.py
@@ -25,42 +25,10 @@ def GetHistName(dataset_name, dataset_type, uncName, unc_scale, global_cfg_dict)
     return histName
 
 
-def RebinHisto(hist_initial, new_binning, dataset, wantOverflow=True, verbose=False):
-    new_binning_array = array.array("d", new_binning)
-    new_hist = hist_initial.Rebin(len(new_binning) - 1, dataset, new_binning_array)
-    if dataset == "data":
-        new_hist.SetBinErrorOption(ROOT.TH1.kPoisson)
-    if wantOverflow:
-        n_finalbin = new_hist.GetBinContent(new_hist.GetNbinsX())
-        n_overflow = new_hist.GetBinContent(new_hist.GetNbinsX() + 1)
-        new_hist.SetBinContent(new_hist.GetNbinsX(), n_finalbin + n_overflow)
-        err_finalbin = new_hist.GetBinError(new_hist.GetNbinsX())
-        err_overflow = new_hist.GetBinError(new_hist.GetNbinsX() + 1)
-        new_hist.SetBinError(
-            new_hist.GetNbinsX(),
-            math.sqrt(err_finalbin * err_finalbin + err_overflow * err_overflow),
-        )
-
-    if verbose:
-        for nbin in range(0, len(new_binning)):
-            print(
-                f"nbin = {nbin}, content = {new_hist.GetBinContent(nbin)}, error {new_hist.GetBinError(nbin)}"
-            )
-    fix_negative_contributions, debug_info, negative_bins_info = (
-        FixNegativeContributions(new_hist)
-    )
-    if not fix_negative_contributions:
-        print("negative contribution not fixed")
-        print(fix_negative_contributions, debug_info, negative_bins_info)
-        for nbin in range(0, new_hist.GetNbinsX() + 1):
-            content = new_hist.GetBinContent(nbin)
-            if content < 0:
-                print(f"for {dataset}, bin {nbin} content is < 0:  {content}")
-
-    return new_hist
-
-
 def findNewBins(hist_cfg_dict, var, channel, category):
+    if "2d" in hist_cfg_dict[var].keys():
+        return hist_cfg_dict[var]["2d"]
+
     if "x_rebin" not in hist_cfg_dict[var].keys():
         return hist_cfg_dict[var]["x_bins"]
 
@@ -85,22 +53,6 @@ def findNewBins(hist_cfg_dict, var, channel, category):
                     return new_dict[category][channel]
     return hist_cfg_dict[var]["x_rebin"]["other"]
 
-
-def getNewBins(bins):
-    if type(bins) == list:
-        final_bins = bins
-    else:
-        n_bins, bin_range = bins.split("|")
-        start, stop = bin_range.split(":")
-        bin_width = (float(stop) - float(start)) / int(n_bins)
-        final_bins = []
-        bin_center = float(start)
-        while bin_center >= float(start) and bin_center <= float(stop):
-            final_bins.append(bin_center)
-            bin_center = bin_center + bin_width
-    return final_bins
-
-
 if __name__ == "__main__":
     import argparse
     import FLAF.PlotKit.Plotter as Plotter
@@ -249,7 +201,7 @@ def getNewBins(bins):
             hist_cfg_dict[args.var]["use_log_x"] = True
 
         rebin_condition = args.rebin and "x_rebin" in hist_cfg_dict[args.var].keys()
-        bins_to_compute = hist_cfg_dict[args.var]["x_bins"]
+        bins_to_compute = hist_cfg_dict[args.var]["x_bins"] if not rebin_condition else None
 
         if rebin_condition:
             bins_to_compute = findNewBins(hist_cfg_dict, args.var, channel, category)
diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py
index c45bb489..5c2b4576 100644
--- a/Analysis/HistProducerFromNTuple.py
+++ b/Analysis/HistProducerFromNTuple.py
@@ -31,7 +31,19 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale):
     dir_ptr = Utilities.mkdir(outFile, dir_name)
     model, unit_hist = hist_list[0]
     merged_hist = model.GetHistogram().Clone()
-    for i in range(0, unit_hist.GetNbinsX() + 2):
+    # Yes I know this is ugly
+    # Axis needs +2 for under/overflow, but only if the return is not 1!!!
+    # Was having issue with Z axis in 2D. We don't want to multiply by 3 if it's 2D
+    N_xbins = unit_hist.GetNbinsX() + 2
+    N_ybins = unit_hist.GetNbinsY() if hasattr(unit_hist, "GetNbinsY") else 1
+    N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins
+    N_zbins = unit_hist.GetNbinsZ() if hasattr(unit_hist, "GetNbinsZ") else 1
+    N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins
+    N_bins = N_xbins * N_ybins * N_zbins
+    # If we use the THnD then we have 'GetNbins' function instead
+    N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins
+    print(f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}")
+    for i in range(0, N_bins):
         bin_content = unit_hist.GetBinContent(i)
         bin_error = unit_hist.GetBinError(i)
         merged_hist.SetBinContent(i, bin_content)
@@ -41,7 +53,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale):
     if len(hist_list) > 1:
         for model, unit_hist in hist_list[1:]:
             hist = model.GetHistogram()
-            for i in range(0, unit_hist.GetNbinsX() + 2):
+            for i in range(0, N_bins):
                 bin_content = unit_hist.GetBinContent(i)
                 bin_error = unit_hist.GetBinError(i)
                 hist.SetBinContent(i, bin_content)
@@ -56,10 +68,27 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale):
 
 
 def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale):
-    model, unit_bin_model = GetModel(hist_cfg_dict, var, return_unit_bin_model=True)
-    unit_hist = rdf.Filter(filter_to_apply).Histo1D(
-        unit_bin_model, f"{var}_bin", weight_name
+    dims = 1 if not hist_cfg_dict[var].get("var_list", False) else len(
+        hist_cfg_dict[var]["var_list"]
     )
+    print(f"Dimensions: {dims}")
+
+    model, unit_bin_model = GetModel(hist_cfg_dict, var, dims, return_unit_bin_model=True)
+    var_bin_list = [f"{var}_bin" for var in hist_cfg_dict[var]["var_list"]] if dims > 1 else [f"{var}_bin"]
+    if dims == 1:
+        unit_hist = rdf.Filter(filter_to_apply).Histo1D(
+            unit_bin_model, *var_bin_list, weight_name
+        )
+    elif dims == 2:
+        unit_hist = rdf.Filter(filter_to_apply).Histo2D(
+            unit_bin_model, *var_bin_list, weight_name
+        )
+    elif dims == 3:
+        unit_hist = rdf.Filter(filter_to_apply).Histo3D(
+            unit_bin_model, *var_bin_list, weight_name
+        )
+    else:
+        raise RuntimeError("Only 1D, 2D and 3D histograms are supported")
     return model, unit_hist
 
 
@@ -121,6 +150,7 @@ def SaveTmpFileUnc(
         for scale in scales:
             for key, filter_to_apply_base in key_filter_dict.items():
                 filter_to_apply_final = filter_to_apply_base
+                print("Saving Hist for unc/scale/key:", unc, scale, key)
                 if further_cuts:
                     for further_cut_name in further_cuts.keys():
                         filter_to_apply_final = (
@@ -262,7 +292,13 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts):
     )
 
     variables = setup.global_params["variables"]
-    vars_needed = set(variables)
+    vars_needed = set()
+    for var in variables:
+        if isinstance(var, dict) and "vars" in var:
+            for v in var["vars"]:
+                vars_needed.add(v)
+        else:
+            vars_needed.add(var)
     for further_cut_name, (vars_for_cut, _) in further_cuts.items():
         for var_for_cut in vars_for_cut:
             if var_for_cut:
diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py
index d74c9298..01fb7f49 100644
--- a/Analysis/HistTupleProducer.py
+++ b/Analysis/HistTupleProducer.py
@@ -176,7 +176,17 @@ def createHistTuple(
                 df_is_central=True,
             )
             dfw_central.colToSave.append(final_weight_name)
+
+    # Return a flattened set of variables, the 2D happens later
+    flatten_vars = set()
     for var in variables:
+        if isinstance(var, dict) and "vars" in var:
+            for v in var["vars"]:
+                flatten_vars.add(v)
+        else:
+            flatten_vars.add(var)
+
+    for var in flatten_vars:
         DefineBinnedColumn(hist_cfg_dict, var)
         dfw_central.df = dfw_central.df.Define(f"{var}_bin", f"get_{var}_bin({var})")
         dfw_central.colToSave.append(f"{var}_bin")
@@ -231,7 +241,7 @@ def createHistTuple(
                         df_is_central=False,
                     )
                     dfw_shift.colToSave.append(final_weight_name)
-                    for var in variables:
+                    for var in flatten_vars:
                         dfw_shift.df = dfw_shift.df.Define(
                             f"{var}_bin", f"get_{var}_bin({var})"
                         )
diff --git a/Analysis/tasks.py b/Analysis/tasks.py
index 0ba769dd..ed34036a 100644
--- a/Analysis/tasks.py
+++ b/Analysis/tasks.py
@@ -185,13 +185,23 @@ def create_branch_map(self):
         ]
         datasets_to_consider.append("data")
 
+        flatten_vars = set()
+        for var in self.global_params["variables"]:
+            if isinstance(var, dict) and "vars" in var:
+                for v in var["vars"]:
+                    flatten_vars.add(v)
+            else:
+                flatten_vars.add(var)
+
         need_cache_list = [
             (var_name in var_produced_by, var_produced_by.get(var_name, None))
-            for var_name in self.global_params["variables"]
+            # for var_name in self.global_params["variables"]
+            for var_name in flatten_vars
         ]
         producer_list = []
         need_cache_global = any(item[0] for item in need_cache_list)
-        for var_name in self.global_params["variables"]:
+        # for var_name in self.global_params["variables"]:
+        for var_name in flatten_vars:
             need_cache = True if var_name in var_produced_by else False
             producer_to_run = (
                 var_produced_by[var_name] if var_name in var_produced_by else None
@@ -397,6 +407,7 @@ def create_branch_map(self):
     @workflow_condition.output
     def output(self):
         var, prod_br, dataset_name = self.branch_data
+        if type(var) == dict: var = var['name']
         output_path = os.path.join(
             "hists", self.version, self.period, var, f"{dataset_name}.root"
         )
@@ -429,6 +440,7 @@ def run(self):
                 stack.enter_context((inp).localize("r")).path for inp in self.input()[0]
             ]
 
+            var = var if type(var) != dict else var['name']
             tmpFile = os.path.join(job_home, f"HistFromNtuple_{var}.root")
 
             HistFromNtupleProducer_cmd = [
@@ -549,6 +561,7 @@ def create_branch_map(self):
             prod_br_list,
             current_dataset,
         ) in HistFromNtupleProducerTask_branch_map.items():
+            var_name = var_name.get("name", var_name) if type(var_name) == dict else var_name
             if var_name not in all_datasets.keys():
                 all_datasets[var_name] = []
             all_datasets[var_name].append((br_idx, current_dataset))
@@ -1045,10 +1058,17 @@ def workflow_requires(self):
         merge_map = HistMergerTask.req(
             self, branch=-1, branches=(), customisations=self.customisations
         ).create_branch_map()
+
+        branch_set = set()
+        for br_idx, (var) in self.branch_map.items():
+            for br, (v, _, _) in merge_map.items():
+                if v == var:
+                    branch_set.add(br)
+
         return {
             "merge": HistMergerTask.req(
                 self,
-                branches=tuple(merge_map.keys()),
+                branches=tuple(branch_set),
                 customisations=self.customisations,
             )
         }
diff --git a/Common/HistHelper.py b/Common/HistHelper.py
index c724ecf0..02666c1c 100644
--- a/Common/HistHelper.py
+++ b/Common/HistHelper.py
@@ -10,6 +10,8 @@
 
 import FLAF.Common.Utilities as Utilities
 
+ROOT.gROOT.ProcessLine(f".include {os.environ['ANALYSIS_PATH']}")
+ROOT.gROOT.ProcessLine(f'#include "FLAF/include/HistHelper.h"')
 
 def get_all_items_recursive(root_dir, path=()):
     items_dict = {}
@@ -21,7 +23,7 @@ def get_all_items_recursive(root_dir, path=()):
         if obj.InheritsFrom("TDirectory"):
             items_dict.update(get_all_items_recursive(obj, path + (key.GetName(),)))
         elif obj.InheritsFrom("TH1"):
-            obj.SetDirectory(0)
+            # obj.SetDirectory(0)
             local_items[key.GetName()] = obj
 
     if local_items:
@@ -198,6 +200,9 @@ def getNewBins(bins):
     if isinstance(bins, list):
         return bins
 
+    if isinstance(bins, dict):
+        return bins
+
     n_bins, bin_range = bins.split("|")
     start, stop = map(float, bin_range.split(":"))
     step = (stop - start) / int(n_bins)
@@ -206,8 +211,54 @@ def getNewBins(bins):
 
 
 def RebinHisto(hist_initial, new_binning, sample, wantOverflow=True, verbose=False):
-    new_binning_array = array.array("d", new_binning)
-    new_hist = hist_initial.Rebin(len(new_binning) - 1, sample, new_binning_array)
+    print("Rebinning histogram:", hist_initial.GetName())
+    if isinstance(new_binning, dict):
+        N_xbins = hist_initial.GetNbinsX() + 2
+        N_ybins = hist_initial.GetNbinsY() if hasattr(hist_initial, "GetNbinsY") else 1
+        N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins
+        N_zbins = hist_initial.GetNbinsZ() if hasattr(hist_initial, "GetNbinsZ") else 1
+        N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins
+        N_bins = N_xbins * N_ybins * N_zbins
+        # If we use the THnD then we have 'GetNbins' function instead
+        N_bins = (
+            hist_initial.GetNbins() if hasattr(hist_initial, "GetNbins") else N_bins
+        )
+
+        # Prepare data structures for C++ function
+        y_bin_ranges = ROOT.std.vector("std::pair<float,float>")()
+        output_bin_edges_vec = ROOT.std.vector("std::vector<float>")()
+
+        for combined_bin in new_binning["combined_bins"]:
+            # Parse y_bin range
+            y_min, y_max = combined_bin["y_bin"]
+            y_bin_ranges.push_back(ROOT.std.pair("float", "float")(y_min, y_max))
+
+            # Parse x_bins spec (can be string "nbins|min:max" or list of bin edges)
+            out_spec = combined_bin["x_bins"]
+            out_edges = ROOT.std.vector("float")()
+            if isinstance(out_spec, list):
+                for edge in out_spec:
+                    out_edges.push_back(float(edge))
+            else:
+                n_out_bins, out_range = out_spec.split("|")
+                out_min, out_max = map(float, out_range.split(":"))
+                n_out_bins = int(n_out_bins)
+                # Create uniform bins
+                step = (out_max - out_min) / n_out_bins
+                for i in range(n_out_bins + 1):
+                    out_edges.push_back(out_min + i * step)
+            output_bin_edges_vec.push_back(out_edges)
+
+        # Create ROOT vectors
+        # Call the C++ function which returns a new histogram
+        new_hist = ROOT.analysis.rebinHistogramDict(
+            hist_initial, N_bins, y_bin_ranges, output_bin_edges_vec
+        )
+        new_hist.SetName(sample)
+
+    else:
+        new_binning_array = array.array("d", new_binning)
+        new_hist = hist_initial.Rebin(len(new_binning) - 1, sample, new_binning_array)
 
     if sample == "data":
         new_hist.SetBinErrorOption(ROOT.TH1.kPoisson)
@@ -251,96 +302,103 @@ def GetBinVec(hist_cfg, var):
         n_bins, bin_range = x_bins.split("|")
         start, stop = bin_range.split(":")
         edges = np.linspace(float(start), float(stop), int(n_bins)).tolist()
-        # print(len(edges))
         x_bins_vec = Utilities.ListToVector(edges, "float")
     return x_bins_vec
 
 
-def GetModel(hist_cfg, var, return_unit_bin_model=False):
-    x_bins = hist_cfg[var]["x_bins"]
-    if type(hist_cfg[var]["x_bins"]) == list:
-        x_bins_vec = Utilities.ListToVector(x_bins, "double")
-        model = ROOT.RDF.TH1DModel("", "", x_bins_vec.size() - 1, x_bins_vec.data())
-    else:
-        n_bins, bin_range = x_bins.split("|")
-        start, stop = bin_range.split(":")
-        model = ROOT.RDF.TH1DModel("", "", int(n_bins), float(start), float(stop))
-    if not return_unit_bin_model:
-        return model
-    unit_bin_model = ROOT.RDF.TH1DModel(
-        "", "", model.fNbinsX, -0.5, model.fNbinsX - 0.5
-    )
-    return model, unit_bin_model
+def GetModel(hist_cfg, var, dims, return_unit_bin_model=False):
+    print(f"Model with var {var}")
+    THModel_Inputs = []
+    unit_bin_Inputs = []
+    if dims == 1:
+        print("1D histogram")
+        x_bins = hist_cfg[var]["x_bins"]
+        if type(hist_cfg[var]["x_bins"]) == list:
+            x_bins_vec = Utilities.ListToVector(x_bins, "double")
+        else:
+            n_bins, bin_range = x_bins.split("|")
+            start, stop = bin_range.split(":")
+            edges = np.linspace(float(start), float(stop), int(n_bins)).tolist()
+            x_bins_vec = Utilities.ListToVector(edges, "double")
+        THModel_Inputs.append(x_bins_vec.size() - 1)
+        THModel_Inputs.append(x_bins_vec.data())
+        model = ROOT.RDF.TH1DModel("", "", *THModel_Inputs)
+        if not return_unit_bin_model:
+            return model
+        unit_bin_Inputs = [model.fNbinsX, -0.5, model.fNbinsX - 0.5]
+        unit_bin_model = ROOT.RDF.TH1DModel("", "", *unit_bin_Inputs)
+
+    elif dims == 2:
+        print("2D histogram")
+        list_var_bins_vec = []
+        for var_2d in hist_cfg[var]["var_list"]:
+            var_bin_name = f"{var_2d}_bins"
+            var_bins = (
+                hist_cfg[var][var_bin_name]
+                if var_bin_name in hist_cfg[var]
+                else hist_cfg[var_2d]["x_bins"]
+            )
+            if type(var_bins) == list:
+                var_bins_vec = Utilities.ListToVector(var_bins, "double")
+            else:
+                n_bins, bin_range = var_bins.split("|")
+                start, stop = bin_range.split(":")
+                edges = np.linspace(float(start), float(stop), int(n_bins) + 1).tolist()
+                var_bins_vec = Utilities.ListToVector(edges, "double")
+            list_var_bins_vec.append(var_bins_vec)
+            THModel_Inputs.append(var_bins_vec.size() - 1)
+            THModel_Inputs.append(var_bins_vec.data())
+        model = ROOT.RDF.TH2DModel("", "", *THModel_Inputs)
+        if not return_unit_bin_model:
+            return model
+        unit_bin_Inputs = [
+            model.fNbinsX,
+            -0.5,
+            model.fNbinsX - 0.5,
+            model.fNbinsY,
+            -0.5,
+            model.fNbinsY - 0.5,
+        ]
+        unit_bin_model = ROOT.RDF.TH2DModel("", "", *unit_bin_Inputs)
+
+    elif dims == 3:
+        print("3D histogram")
+        list_var_bins_vec = []
+        for var_3d in hist_cfg[var]["var_list"]:
+            var_bin_name = f"{var_3d}_bins"
+            var_bins = hist_cfg[var][var_bin_name]
+            if type(var_bins) == list:
+                var_bins_vec = Utilities.ListToVector(var_bins, "double")
+            else:
+                n_bins, bin_range = var_bins.split("|")
+                start, stop = bin_range.split(":")
+                edges = np.linspace(float(start), float(stop), int(n_bins)).tolist()
+                var_bins_vec = Utilities.ListToVector(edges, "double")
+                print(var_bins_vec)
+            list_var_bins_vec.append(var_bins_vec)
+            THModel_Inputs.append(var_bins_vec.size() - 1)
+            THModel_Inputs.append(var_bins_vec.data())
+        model = ROOT.RDF.TH3DModel("", "", *THModel_Inputs)
+        if not return_unit_bin_model:
+            return model
+        unit_bin_Inputs = [
+            model.fNbinsX,
+            -0.5,
+            model.fNbinsX - 0.5,
+            model.fNbinsY,
+            -0.5,
+            model.fNbinsY - 0.5,
+            model.fNbinsZ,
+            -0.5,
+            model.fNbinsZ - 0.5,
+        ]
+        unit_bin_model = ROOT.RDF.TH3DModel("", "", *unit_bin_Inputs)
 
+    else:
+        print("nD histogram not implemented yet")
+        # model = ROOT.RDF.THnDModel("", "", )
 
-# def GetModel(hist_cfg, var):
-#     x_bins = hist_cfg[var]["x_bins"]
-#     if type(hist_cfg[var]["x_bins"]) == list:
-#         x_bins_vec = Utilities.ListToVector(x_bins, "double")
-#         model = ROOT.RDF.TH1DModel("", "", x_bins_vec.size() - 1, x_bins_vec.data())
-#     else:
-#         n_bins, bin_range = x_bins.split("|")
-#         start, stop = bin_range.split(":")
-#         model = ROOT.RDF.TH1DModel("", "", int(n_bins), float(start), float(stop))
-#     return model
-
-
-# # to be fixed
-# def Get2DModel(hist_cfg, var1, var2):
-#     x_bins = hist_cfg[var1]["x_bins"]
-#     y_bins = hist_cfg[var2]["x_bins"]
-#     if type(x_bins) == list:
-#         x_bins_vec = Utilities.ListToVector(x_bins, "double")
-#         if type(y_bins) == list:
-#             y_bins_vec = Utilities.ListToVector(y_bins, "double")
-#             model = ROOT.RDF.TH2DModel(
-#                 "",
-#                 "",
-#                 x_bins_vec.size() - 1,
-#                 x_bins_vec.data(),
-#                 y_bins_vec.size() - 1,
-#                 y_bins_vec.data(),
-#             )
-#         else:
-#             n_y_bins, y_bin_range = y_bins.split("|")
-#             y_start, y_stop = y_bin_range.split(":")
-#             model = ROOT.RDF.TH2DModel(
-#                 "",
-#                 "",
-#                 x_bins_vec.size() - 1,
-#                 x_bins_vec.data(),
-#                 int(n_y_bins),
-#                 float(y_start),
-#                 float(y_stop),
-#             )
-#     else:
-#         n_x_bins, x_bin_range = x_bins.split("|")
-#         x_start, x_stop = x_bin_range.split(":")
-#         if type(y_bins) == list:
-#             y_bins_vec = Utilities.ListToVector(y_bins, "double")
-#             model = ROOT.RDF.TH2DModel(
-#                 "",
-#                 "",
-#                 int(n_x_bins),
-#                 float(x_start),
-#                 float(x_stop),
-#                 y_bins_vec.size() - 1,
-#                 y_bins_vec.data(),
-#             )
-#         else:
-#             n_y_bins, y_bin_range = y_bins.split("|")
-#             y_start, y_stop = y_bin_range.split(":")
-#             model = ROOT.RDF.TH2DModel(
-#                 "",
-#                 "",
-#                 int(n_x_bins),
-#                 float(x_start),
-#                 float(x_stop),
-#                 int(n_y_bins),
-#                 float(y_start),
-#                 float(y_stop),
-#             )
-#     return model
+    return model, unit_bin_model
 
 
 def createCacheQuantities(dfWrapped_cache, cache_map_name, cache_entry_name):
diff --git a/include/HistHelper.h b/include/HistHelper.h
index 020f2c5b..e55f9559 100644
--- a/include/HistHelper.h
+++ b/include/HistHelper.h
@@ -11,6 +11,10 @@
 
 #include "EntryQueue.h"
 
+#include <vector>
+#include <map>
+#include <cmath>
+
 /*
 namespace kin_fit {
 struct FitResults {
@@ -159,5 +163,78 @@ namespace analysis {
             return df_node;
         }
     };
-
+    
+    TH1D* rebinHistogramDict(TH1* hist_initial, int N_bins, 
+                                const std::vector<std::pair<float, float>>& y_bin_ranges,
+                                const std::vector<std::vector<float>>& output_bin_edges) {
+        // Flatten output bin edges into a single sorted array
+        std::vector<float> all_output_edges;
+        float last_edge = 0.0;
+        for (const auto& edges : output_bin_edges) {
+            for (float edge : edges) {
+                all_output_edges.push_back(edge + last_edge);
+            }
+            last_edge = all_output_edges.back();
+        }
+        // Sort and remove duplicates
+        std::sort(all_output_edges.begin(), all_output_edges.end());
+        all_output_edges.erase(std::unique(all_output_edges.begin(), all_output_edges.end()), all_output_edges.end());
+
+        // Create output histogram with variable binning
+        TH1D* hist_output = new TH1D("rebinned", "rebinned", all_output_edges.size() - 1, all_output_edges.data());
+        hist_output->Sumw2();
+
+        // Helper function to find bin index from value and edges
+        auto findBinIndex = [](float value, const std::vector<float>& edges) -> int {
+            if (edges.size() < 2) return -1;
+            for (size_t i = 0; i < edges.size() - 1; ++i) {
+                if (value >= edges[i] && value < edges[i + 1]) {
+                    return i;
+                }
+            }
+            return -1;
+        };
+
+        // Iterate through all bins in the original histogram
+        for (int i = 0; i < N_bins; ++i) {
+            int binX, binY, binZ;
+            hist_initial->GetBinXYZ(i, binX, binY, binZ);
+
+            // Get bin centers (actual values)
+            float x_value = hist_initial->GetXaxis()->GetBinCenter(binX);
+            float y_value = hist_initial->GetYaxis()->GetBinCenter(binY);
+            float z_value = hist_initial->GetZaxis()->GetBinCenter(binZ);
+
+            // Get bin content and error
+            double bin_content = hist_initial->GetBinContent(i);
+            double bin_error = hist_initial->GetBinError(i);
+            double bin_error2 = bin_error * bin_error;
+
+            // Find which y_bin range this y_value falls into
+            int y_bin_idx = -1;
+            for (size_t j = 0; j < y_bin_ranges.size(); ++j) {
+                if (y_value >= y_bin_ranges[j].first && y_value < y_bin_ranges[j].second) {
+                    y_bin_idx = j;
+                    break;
+                }
+            }
+            if (y_bin_idx == -1) continue;  // Skip if y_value doesn't fall in any range
+            // Find output bin index within the output_bin_edges for this y_bin
+            int local_out_bin = findBinIndex(x_value, output_bin_edges[y_bin_idx]);
+            if (local_out_bin == -1) continue;  // Skip if x_value doesn't fall in any output bin
+            // Calculate section offset by counting bins in all previous y_bin sections
+            int section_offset = 0;
+            for (int prev_y = 0; prev_y < y_bin_idx; ++prev_y) {
+                section_offset += output_bin_edges[prev_y].size() - 1;  // size - 1 = number of bins
+            }
+            // Calculate global bin index: offset + local bin position within this section
+            int global_bin = section_offset + local_out_bin + 1;  // +1 for ROOT's 1-indexed bins
+            // Set bin content and error
+            if (global_bin >= 1 && global_bin <= (int)all_output_edges.size() - 1) {
+                hist_output->SetBinContent(global_bin, hist_output->GetBinContent(global_bin) + bin_content);
+                hist_output->SetBinError(global_bin, std::sqrt(std::pow(hist_output->GetBinError(global_bin), 2) + bin_error2));
+            }
+        }
+        return hist_output;
+    }
 }  // namespace analysis

From 929d7cb3f473659cc58e7937e8337958b6418ed1 Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Fri, 16 Jan 2026 16:02:24 +0100
Subject: [PATCH 02/12] Update flaf env again after reset-merge

---
 run_tools/mk_flaf_env.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run_tools/mk_flaf_env.sh b/run_tools/mk_flaf_env.sh
index 179a9f9b..47e1e843 100755
--- a/run_tools/mk_flaf_env.sh
+++ b/run_tools/mk_flaf_env.sh
@@ -73,7 +73,7 @@ EOF
     link_all $lcg_base/bin $env_base/bin pip pip3 pip3.12 python python3 python3.12 gosam2herwig gosam-config.py gosam.py git java
     link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/bin $env_base/bin go gofmt
     link_all $lcg_base/lib $env_base/lib/python3.12/site-packages python3.12
-    link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info
+    link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools pathspec graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info
     link_all $lcg_base/lib64 $env_base/lib/python3.12/site-packages cairo cmake libonnx_proto.a libsvm.so.2 pkgconfig ThePEG libavh_olo.a libff.a libqcdloop.a python3.12
     link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib $env_base/lib
     link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib64 $env_base/lib

From eaa9bbfa95fa49f51ee5c4f00cc6e637694a2fb0 Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Fri, 16 Jan 2026 16:24:31 +0100
Subject: [PATCH 03/12] Fixed some issues with flattened vars

---
 Analysis/HistTupleProducer.py |  1 +
 Analysis/tasks.py             | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py
index 1cc0de1a..dc888293 100644
--- a/Analysis/HistTupleProducer.py
+++ b/Analysis/HistTupleProducer.py
@@ -127,6 +127,7 @@ def createHistTuple(
     print("Scale uncertainties to consider:", scale_uncertainties)
 
     print("Defining binnings for variables")
+    flatten_vars = set()
     for var in variables:
         if isinstance(var, dict) and "vars" in var:
             for v in var["vars"]:
diff --git a/Analysis/tasks.py b/Analysis/tasks.py
index 50cba496..da78165d 100644
--- a/Analysis/tasks.py
+++ b/Analysis/tasks.py
@@ -47,7 +47,16 @@ def workflow_requires(self):
             }
             req_dict["AnalysisCacheTask"] = []
             var_produced_by = self.setup.var_producer_map
-            for var_name in self.global_params["variables"]:
+
+            flatten_vars = set()
+            for var in self.global_params["variables"]:
+                if isinstance(var, dict) and "vars" in var:
+                    for v in var["vars"]:
+                        flatten_vars.add(v)
+                else:
+                    flatten_vars.add(var)
+
+            for var_name in flatten_vars:
                 producer_to_run = var_produced_by.get(var_name, None)
                 if producer_to_run is not None:
                     req_dict["AnalysisCacheTask"].append(

From 711b98c551ab543b0045daee99c863d7f55ae93b Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Fri, 16 Jan 2026 16:26:10 +0100
Subject: [PATCH 04/12] Formatting

---
 Analysis/HistMergerFromHists.py    |  2 +-
 Analysis/HistPlotter.py            |  5 ++++-
 Analysis/HistProducerFromNTuple.py | 20 +++++++++++++++-----
 Analysis/tasks.py                  |  9 ++++++---
 Common/HistHelper.py               |  1 +
 5 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/Analysis/HistMergerFromHists.py b/Analysis/HistMergerFromHists.py
index 03667175..2a5dc348 100644
--- a/Analysis/HistMergerFromHists.py
+++ b/Analysis/HistMergerFromHists.py
@@ -255,7 +255,7 @@ def GetBTagWeightDict(
             all_items = get_all_items_recursive(inFile)
             fill_hists(
                 all_items, all_hists_dict, dataset_type, args.var, args.uncSource
-            ) # to add: , unc_source="Central", scale="Central"
+            )  # to add: , unc_source="Central", scale="Central"
 
     # here there should be the custom applications - e.g. GetBTagWeightDict, AddQCDInHistDict, etc.
     # analysis.ApplyMergeCustomisations() # --> here go the QCD and bTag functions
diff --git a/Analysis/HistPlotter.py b/Analysis/HistPlotter.py
index f3addf31..a92d59e1 100644
--- a/Analysis/HistPlotter.py
+++ b/Analysis/HistPlotter.py
@@ -53,6 +53,7 @@ def findNewBins(hist_cfg_dict, var, channel, category):
                     return new_dict[category][channel]
     return hist_cfg_dict[var]["x_rebin"]["other"]
 
+
 if __name__ == "__main__":
     import argparse
     import FLAF.PlotKit.Plotter as Plotter
@@ -201,7 +202,9 @@ def findNewBins(hist_cfg_dict, var, channel, category):
             hist_cfg_dict[args.var]["use_log_x"] = True
 
         rebin_condition = args.rebin and "x_rebin" in hist_cfg_dict[args.var].keys()
-        bins_to_compute = hist_cfg_dict[args.var]["x_bins"] if not rebin_condition else None
+        bins_to_compute = (
+            hist_cfg_dict[args.var]["x_bins"] if not rebin_condition else None
+        )
 
         if rebin_condition:
             bins_to_compute = findNewBins(hist_cfg_dict, args.var, channel, category)
diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py
index 3161c7cd..42a0f56d 100644
--- a/Analysis/HistProducerFromNTuple.py
+++ b/Analysis/HistProducerFromNTuple.py
@@ -47,7 +47,9 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
     N_bins = N_xbins * N_ybins * N_zbins
     # If we use the THnD then we have 'GetNbins' function instead
     N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins
-    print(f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}")
+    print(
+        f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}"
+    )
     for i in range(0, N_bins):
         bin_content = unit_hist.GetBinContent(i)
         bin_error = unit_hist.GetBinError(i)
@@ -73,13 +75,21 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
 
 
 def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale):
-    dims = 1 if not hist_cfg_dict[var].get("var_list", False) else len(
-        hist_cfg_dict[var]["var_list"]
+    dims = (
+        1
+        if not hist_cfg_dict[var].get("var_list", False)
+        else len(hist_cfg_dict[var]["var_list"])
     )
     print(f"Dimensions: {dims}")
 
-    model, unit_bin_model = GetModel(hist_cfg_dict, var, dims, return_unit_bin_model=True)
-    var_bin_list = [f"{var}_bin" for var in hist_cfg_dict[var]["var_list"]] if dims > 1 else [f"{var}_bin"]
+    model, unit_bin_model = GetModel(
+        hist_cfg_dict, var, dims, return_unit_bin_model=True
+    )
+    var_bin_list = (
+        [f"{var}_bin" for var in hist_cfg_dict[var]["var_list"]]
+        if dims > 1
+        else [f"{var}_bin"]
+    )
     if dims == 1:
         unit_hist = rdf.Filter(filter_to_apply).Histo1D(
             unit_bin_model, *var_bin_list, weight_name
diff --git a/Analysis/tasks.py b/Analysis/tasks.py
index da78165d..ded3ddfd 100644
--- a/Analysis/tasks.py
+++ b/Analysis/tasks.py
@@ -372,7 +372,8 @@ def create_branch_map(self):
     @workflow_condition.output
     def output(self):
         var, prod_br, dataset_name = self.branch_data
-        if type(var) == dict: var = var['name']
+        if type(var) == dict:
+            var = var["name"]
         output_path = os.path.join(
             "hists", self.version, self.period, var, f"{dataset_name}.root"
         )
@@ -404,7 +405,7 @@ def run(self):
                 stack.enter_context((inp).localize("r")).path for inp in self.input()[0]
             ]
 
-            var = var if type(var) != dict else var['name']
+            var = var if type(var) != dict else var["name"]
             tmpFile = os.path.join(job_home, f"HistFromNtuple_{var}.root")
 
             HistFromNtupleProducer_cmd = [
@@ -525,7 +526,9 @@ def create_branch_map(self):
             prod_br_list,
             current_dataset,
         ) in HistFromNtupleProducerTask_branch_map.items():
-            var_name = var_name.get("name", var_name) if type(var_name) == dict else var_name
+            var_name = (
+                var_name.get("name", var_name) if type(var_name) == dict else var_name
+            )
             if var_name not in all_datasets.keys():
                 all_datasets[var_name] = []
             all_datasets[var_name].append((br_idx, current_dataset))
diff --git a/Common/HistHelper.py b/Common/HistHelper.py
index f1a266fe..cffee623 100644
--- a/Common/HistHelper.py
+++ b/Common/HistHelper.py
@@ -13,6 +13,7 @@
 ROOT.gROOT.ProcessLine(f".include {os.environ['ANALYSIS_PATH']}")
 ROOT.gROOT.ProcessLine(f'#include "FLAF/include/HistHelper.h"')
 
+
 def get_all_items_recursive(root_dir, path=()):
     items_dict = {}
     local_items = {}

From bda35355e8e2af40f8bb78888b8eab92d70adf1c Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Fri, 16 Jan 2026 16:42:59 +0100
Subject: [PATCH 05/12] Bug fix

---
 Analysis/HistProducerFromNTuple.py | 1 +
 Analysis/HistTupleProducer.py      | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py
index 42a0f56d..2da8b7eb 100644
--- a/Analysis/HistProducerFromNTuple.py
+++ b/Analysis/HistProducerFromNTuple.py
@@ -50,6 +50,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
     print(
         f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}"
     )
+    # This can be a loop over many bins, several times. Can be improved to be ran in c++ instead
     for i in range(0, N_bins):
         bin_content = unit_hist.GetBinContent(i)
         bin_error = unit_hist.GetBinError(i)
diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py
index dc888293..ca4e15b1 100644
--- a/Analysis/HistTupleProducer.py
+++ b/Analysis/HistTupleProducer.py
@@ -204,7 +204,7 @@ def createHistTuple(
                 dfw.colToSave.append(desc["weight"])
 
             print("Defining binned columns")
-            for var in variables:
+            for var in flatten_vars:
                 dfw.df = dfw.df.Define(f"{var}_bin", f"get_{var}_bin({var})")
                 dfw.colToSave.append(f"{var}_bin")
 

From ffc7872b9b6fd7c01eefc82f0472a772de729b89 Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Fri, 16 Jan 2026 17:06:14 +0100
Subject: [PATCH 06/12] Remove debug prints

---
 Analysis/HistProducerFromNTuple.py | 6 +-----
 Common/HistHelper.py               | 3 ---
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py
index 2da8b7eb..ce5d532f 100644
--- a/Analysis/HistProducerFromNTuple.py
+++ b/Analysis/HistProducerFromNTuple.py
@@ -47,9 +47,6 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
     N_bins = N_xbins * N_ybins * N_zbins
     # If we use the THnD then we have 'GetNbins' function instead
     N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins
-    print(
-        f"We have n bins {N_bins}, coming from unit hist with nxbins {N_xbins}, nybins {N_ybins}, nzbins {N_zbins}"
-    )
     # This can be a loop over many bins, several times. Can be improved to be ran in c++ instead
     for i in range(0, N_bins):
         bin_content = unit_hist.GetBinContent(i)
@@ -81,7 +78,6 @@ def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale):
         if not hist_cfg_dict[var].get("var_list", False)
         else len(hist_cfg_dict[var]["var_list"])
     )
-    print(f"Dimensions: {dims}")
 
     model, unit_bin_model = GetModel(
         hist_cfg_dict, var, dims, return_unit_bin_model=True
@@ -170,7 +166,7 @@ def SaveTmpFileUnc(
         for scale in scales:
             for key, filter_to_apply_base in key_filter_dict.items():
                 filter_to_apply_final = filter_to_apply_base
-                print("Saving Hist for unc/scale/key:", unc, scale, key)
+                # print("Saving Hist for unc/scale/key:", unc, scale, key)
                 if further_cuts:
                     for further_cut_name in further_cuts.keys():
                         filter_to_apply_final = (
diff --git a/Common/HistHelper.py b/Common/HistHelper.py
index cffee623..094255d5 100644
--- a/Common/HistHelper.py
+++ b/Common/HistHelper.py
@@ -261,7 +261,6 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False):
     THModel_Inputs = []
     unit_bin_Inputs = []
     if dims == 1:
-        print("1D histogram")
         x_bins = hist_cfg[var]["x_bins"]
         if type(hist_cfg[var]["x_bins"]) == list:
             x_bins_vec = Utilities.ListToVector(x_bins, "double")
@@ -279,7 +278,6 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False):
         unit_bin_model = ROOT.RDF.TH1DModel("", "", *unit_bin_Inputs)
 
     elif dims == 2:
-        print("2D histogram")
         list_var_bins_vec = []
         for var_2d in hist_cfg[var]["var_list"]:
             var_bin_name = f"{var_2d}_bins"
@@ -312,7 +310,6 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False):
         unit_bin_model = ROOT.RDF.TH2DModel("", "", *unit_bin_Inputs)
 
     elif dims == 3:
-        print("3D histogram")
         list_var_bins_vec = []
         for var_3d in hist_cfg[var]["var_list"]:
             var_bin_name = f"{var_3d}_bins"

From 129d233051a197bb32fca138cced1c249343061f Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Fri, 16 Jan 2026 17:27:07 +0100
Subject: [PATCH 07/12] Remove debug

---
 Common/HistHelper.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Common/HistHelper.py b/Common/HistHelper.py
index 094255d5..ec54bfb5 100644
--- a/Common/HistHelper.py
+++ b/Common/HistHelper.py
@@ -257,7 +257,6 @@ def GetBinVec(hist_cfg, var):
 
 
 def GetModel(hist_cfg, var, dims, return_unit_bin_model=False):
-    print(f"Model with var {var}")
     THModel_Inputs = []
     unit_bin_Inputs = []
     if dims == 1:

From c979734da377322bd4e185e849092a7a75d8cfec Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Fri, 16 Jan 2026 20:27:27 +0100
Subject: [PATCH 08/12] Fix bug with 1D string binning

---
 Analysis/HistProducerFromNTuple.py | 1 -
 Common/HistHelper.py               | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py
index ce5d532f..f16b61bc 100644
--- a/Analysis/HistProducerFromNTuple.py
+++ b/Analysis/HistProducerFromNTuple.py
@@ -166,7 +166,6 @@ def SaveTmpFileUnc(
         for scale in scales:
             for key, filter_to_apply_base in key_filter_dict.items():
                 filter_to_apply_final = filter_to_apply_base
-                # print("Saving Hist for unc/scale/key:", unc, scale, key)
                 if further_cuts:
                     for further_cut_name in further_cuts.keys():
                         filter_to_apply_final = (
diff --git a/Common/HistHelper.py b/Common/HistHelper.py
index ec54bfb5..0c05c481 100644
--- a/Common/HistHelper.py
+++ b/Common/HistHelper.py
@@ -161,7 +161,6 @@ def getNewBins(bins):
 
 
 def RebinHisto(hist_initial, new_binning, sample, wantOverflow=True, verbose=False):
-    print("Rebinning histogram:", hist_initial.GetName())
     if isinstance(new_binning, dict):
         N_xbins = hist_initial.GetNbinsX() + 2
         N_ybins = hist_initial.GetNbinsY() if hasattr(hist_initial, "GetNbinsY") else 1
@@ -266,7 +265,7 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False):
         else:
             n_bins, bin_range = x_bins.split("|")
             start, stop = bin_range.split(":")
-            edges = np.linspace(float(start), float(stop), int(n_bins)).tolist()
+            edges = np.linspace(float(start), float(stop), int(n_bins) + 1).tolist()
             x_bins_vec = Utilities.ListToVector(edges, "double")
         THModel_Inputs.append(x_bins_vec.size() - 1)
         THModel_Inputs.append(x_bins_vec.data())
@@ -320,7 +319,6 @@ def GetModel(hist_cfg, var, dims, return_unit_bin_model=False):
                 start, stop = bin_range.split(":")
                 edges = np.linspace(float(start), float(stop), int(n_bins)).tolist()
                 var_bins_vec = Utilities.ListToVector(edges, "double")
-                print(var_bins_vec)
             list_var_bins_vec.append(var_bins_vec)
             THModel_Inputs.append(var_bins_vec.size() - 1)
             THModel_Inputs.append(var_bins_vec.data())

From a195bc4d45b0dc6954f4afafaaf7e5aa320861d1 Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Sun, 18 Jan 2026 12:45:24 +0100
Subject: [PATCH 09/12] Add this var_entry thing and remove import star because
 I hate it

---
 Analysis/HistProducerFromNTuple.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py
index 9565d703..27d56940 100644
--- a/Analysis/HistProducerFromNTuple.py
+++ b/Analysis/HistProducerFromNTuple.py
@@ -8,7 +8,7 @@
 if __name__ == "__main__":
     sys.path.append(os.environ["ANALYSIS_PATH"])
 
-from FLAF.Common.HistHelper import *
+import FLAF.Common.HistHelper as HistHelper
 import FLAF.Common.Utilities as Utilities
 from FLAF.Common.Setup import Setup
 from FLAF.RunKit.run_tools import ps_call
@@ -73,13 +73,14 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
 
 
 def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale):
+    var_entry = HistHelper.findBinEntry(hist_cfg_dict, args.var)
     dims = (
         1
-        if not hist_cfg_dict[var].get("var_list", False)
-        else len(hist_cfg_dict[var]["var_list"])
+        if not hist_cfg_dict[var_entry].get("var_list", False)
+        else len(hist_cfg_dict[var_entry]["var_list"])
     )
 
-    model, unit_bin_model = GetModel(
+    model, unit_bin_model = HistHelper.GetModel(
         hist_cfg_dict, var, dims, return_unit_bin_model=True
     )
     var_bin_list = (
@@ -210,7 +211,7 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts):
     for filter_key in key_filter_dict.keys():
         print(filter_key)
         for further_cut_name in [None] + list(further_cuts.keys()):
-            model, unit_bin_model = GetModel(
+            model, unit_bin_model = HistHelper.GetModel(
                 hist_cfg_dict, var, return_unit_bin_model=True
             )
             nbins = unit_bin_model.fNbinsX

From 71939efde842d4fc1e20cfa68d5e80d4ec56e5ba Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Sun, 18 Jan 2026 13:01:16 +0100
Subject: [PATCH 10/12] Formatting and fixed 1 more var_entry

---
 Analysis/HistMergerFromHists.py    | 5 ++---
 Analysis/HistProducerFromNTuple.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/Analysis/HistMergerFromHists.py b/Analysis/HistMergerFromHists.py
index 2a5dc348..acf4a6d8 100644
--- a/Analysis/HistMergerFromHists.py
+++ b/Analysis/HistMergerFromHists.py
@@ -4,7 +4,6 @@
 import time
 import importlib
 
-
 if __name__ == "__main__":
     sys.path.append(os.environ["ANALYSIS_PATH"])
 
@@ -88,7 +87,7 @@ def GetBTagWeightDict(
     for dataset_type in all_hists_dict.keys():
         all_hists_dict_1D[dataset_type] = {}
         for key_name, histogram in all_hists_dict[dataset_type].items():
-            (key_1, key_2) = key_name
+            key_1, key_2 = key_name
 
             if var not in boosted_variables:
                 ch, reg, cat = key_1
@@ -297,7 +296,7 @@ def GetBTagWeightDict(
     outFile = ROOT.TFile(args.outFile, "RECREATE")
     for dataset_type in all_hists_dict.keys():
         for key in all_hists_dict[dataset_type].keys():
-            (key_dir, (uncName, uncScale)) = key
+            key_dir, (uncName, uncScale) = key
             # here there can be some custom requirements - e.g. regions / categories to not merge, datasets to ignore
             dir_name = "/".join(key_dir)
             dir_ptr = Utilities.mkdir(outFile, dir_name)
diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py
index 27d56940..a931e4ba 100644
--- a/Analysis/HistProducerFromNTuple.py
+++ b/Analysis/HistProducerFromNTuple.py
@@ -84,7 +84,7 @@ def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale):
         hist_cfg_dict, var, dims, return_unit_bin_model=True
     )
     var_bin_list = (
-        [f"{var}_bin" for var in hist_cfg_dict[var]["var_list"]]
+        [f"{var}_bin" for var in hist_cfg_dict[var_entry]["var_list"]]
         if dims > 1
         else [f"{var}_bin"]
     )

From 0811e7d0645149d996002b6a88c5b9bdd0279199 Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Sun, 18 Jan 2026 14:50:24 +0100
Subject: [PATCH 11/12] Update flaf env to get latest black

---
 Analysis/HistTupleProducer.py | 6 ++----
 run_tools/mk_flaf_env.sh      | 5 +++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/Analysis/HistTupleProducer.py b/Analysis/HistTupleProducer.py
index c2a1ca80..90ea2d76 100644
--- a/Analysis/HistTupleProducer.py
+++ b/Analysis/HistTupleProducer.py
@@ -37,8 +37,7 @@ def DefineBinnedColumn(hist_cfg_dict, var):
         start, stop = bin_range.split(":")
         axis_definition = f"static const TAxis axis({n_bins}, {start}, {stop});"
 
-    ROOT.gInterpreter.Declare(
-        f"""
+    ROOT.gInterpreter.Declare(f"""
         #include "ROOT/RVec.hxx"
         #include "TAxis.h"
 
@@ -56,8 +55,7 @@ def DefineBinnedColumn(hist_cfg_dict, var):
             }}
             return out;
         }}
-        """
-    )
+        """)
 
 
 def createHistTuple(
diff --git a/run_tools/mk_flaf_env.sh b/run_tools/mk_flaf_env.sh
index 47e1e843..93f59b22 100755
--- a/run_tools/mk_flaf_env.sh
+++ b/run_tools/mk_flaf_env.sh
@@ -33,6 +33,7 @@ install() {
     run_cmd pip install fastcrc
     run_cmd pip install bayesian-optimization
     run_cmd pip install yamllint
+    run_cmd pip install black==26.1.0
 }
 
 join_by() {
@@ -70,10 +71,10 @@ export LD_LIBRARY_PATH=${ld_lib_path}
 
 EOF
 
-    link_all $lcg_base/bin $env_base/bin pip pip3 pip3.12 python python3 python3.12 gosam2herwig gosam-config.py gosam.py git java
+    link_all $lcg_base/bin $env_base/bin pip pip3 pip3.12 python python3 python3.12 gosam2herwig gosam-config.py gosam.py git java black blackd
     link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/bin $env_base/bin go gofmt
     link_all $lcg_base/lib $env_base/lib/python3.12/site-packages python3.12
-    link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools pathspec graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info
+    link_all $lcg_base/lib/python3.12/site-packages $env_base/lib/python3.12/site-packages _distutils_hack distutils-precedence.pth pip pkg_resources setuptools black blackd blib2to3 pathspec graphviz py __pycache__ gosam-2.1.1_4b98559-py3.12.egg-info tenacity tenacity-9.0.0.dist-info servicex servicex-3.1.0.dist-info paramiko paramiko-2.9.2-py3.12.egg-info
     link_all $lcg_base/lib64 $env_base/lib/python3.12/site-packages cairo cmake libonnx_proto.a libsvm.so.2 pkgconfig ThePEG libavh_olo.a libff.a libqcdloop.a python3.12
     link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib $env_base/lib
     link_all /cvmfs/sft.cern.ch/lcg/releases/gcc/15.2.0-35657/x86_64-el9/lib64 $env_base/lib

From e53f55a5b3e8a8d0884273f6d6c36ab020cab6b2 Mon Sep 17 00:00:00 2001
From: Devin <aebid@tamu.edu>
Date: Sun, 18 Jan 2026 14:50:45 +0100
Subject: [PATCH 12/12] Remove hardcode black version

---
 run_tools/mk_flaf_env.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run_tools/mk_flaf_env.sh b/run_tools/mk_flaf_env.sh
index 93f59b22..8c2e4ec3 100755
--- a/run_tools/mk_flaf_env.sh
+++ b/run_tools/mk_flaf_env.sh
@@ -33,7 +33,7 @@ install() {
     run_cmd pip install fastcrc
     run_cmd pip install bayesian-optimization
     run_cmd pip install yamllint
-    run_cmd pip install black==26.1.0
+    run_cmd pip install black
 }
 
 join_by() {