Skip to content
41 changes: 22 additions & 19 deletions Analysis/HistMergerFromHists.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import time
import importlib


if __name__ == "__main__":
sys.path.append(os.environ["ANALYSIS_PATH"])

Expand Down Expand Up @@ -38,9 +37,10 @@ def checkFile(inFileRoot, channels, qcdRegions, categories):
return True


def fill_all_hists_dict(
def fill_hists(
items_dict,
all_hist_dict_per_var_and_datasettype,
all_hist_dict,
dataset_type,
var_input,
unc_source="Central",
):
Expand All @@ -54,9 +54,13 @@ def fill_all_hists_dict(
if var != var_check:
continue
final_key = (key_tuple, (unc_source, scale))
if final_key not in all_hist_dict_per_var_and_datasettype:
all_hist_dict_per_var_and_datasettype[final_key] = []
all_hist_dict_per_var_and_datasettype[final_key].append(var_hist)
if dataset_type not in all_hist_dict.keys():
all_hist_dict[dataset_type] = {}
if final_key not in all_hist_dict[dataset_type]:
var_hist.SetDirectory(0)
all_hist_dict[dataset_type][final_key] = var_hist
else:
all_hist_dict[dataset_type][final_key].Add(var_hist)


def MergeHistogramsPerType(all_hists_dict):
Expand All @@ -83,7 +87,7 @@ def GetBTagWeightDict(
for dataset_type in all_hists_dict.keys():
all_hists_dict_1D[dataset_type] = {}
for key_name, histogram in all_hists_dict[dataset_type].items():
(key_1, key_2) = key_name
key_1, key_2 = key_name

if var not in boosted_variables:
ch, reg, cat = key_1
Expand Down Expand Up @@ -235,23 +239,22 @@ def GetBTagWeightDict(
f"input file for dataset {dataset_name} (with path= {inFile_path}) does not exist, skipping"
)
continue
with ROOT.TFile.Open(inFile_path, "READ") as inFile:
# check that the file is ok
if inFile.IsZombie():
raise RuntimeError(f"{inFile_path} is zombie")
if not checkFile(inFile, channels, regions, all_categories):
raise RuntimeError(f"{dataset_name} has void file")

base_process_name = dataset_cfg_dict[dataset_name]["process_name"]
dataset_type = setup.base_processes[base_process_name]["parent_process"]
if dataset_type not in all_hists_dict.keys():
all_hists_dict[dataset_type] = {}

all_items = load_all_items(inFile_path)
fill_all_hists_dict(
all_items, all_hists_dict[dataset_type], args.var, args.uncSource
) # to add: , unc_source="Central", scale="Central"
MergeHistogramsPerType(all_hists_dict)
with ROOT.TFile.Open(inFile_path, "READ") as inFile:
# check that the file is ok
if inFile.IsZombie():
raise RuntimeError(f"{inFile_path} is zombie")
if not checkFile(inFile, channels, regions, all_categories):
raise RuntimeError(f"{dataset_name} has void file")
all_items = get_all_items_recursive(inFile)
fill_hists(
all_items, all_hists_dict, dataset_type, args.var, args.uncSource
) # to add: , unc_source="Central", scale="Central"

# here there should be the custom applications - e.g. GetBTagWeightDict, AddQCDInHistDict, etc.
# analysis.ApplyMergeCustomisations() # --> here go the QCD and bTag functions
Expand Down Expand Up @@ -293,7 +296,7 @@ def GetBTagWeightDict(
outFile = ROOT.TFile(args.outFile, "RECREATE")
for dataset_type in all_hists_dict.keys():
for key in all_hists_dict[dataset_type].keys():
(key_dir, (uncName, uncScale)) = key
key_dir, (uncName, uncScale) = key
# here there can be some custom requirements - e.g. regions / categories to not merge, datasets to ignore
dir_name = "/".join(key_dir)
dir_ptr = Utilities.mkdir(outFile, dir_name)
Expand Down
58 changes: 6 additions & 52 deletions Analysis/HistPlotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,45 +23,12 @@ def GetHistName(dataset_name, dataset_type, uncName, unc_scale, global_cfg_dict)
return histName


def RebinHisto(hist_initial, new_binning, dataset, wantOverflow=True, verbose=False):
new_binning_array = array.array("d", new_binning)
new_hist = hist_initial.Rebin(len(new_binning) - 1, dataset, new_binning_array)
if dataset == "data":
new_hist.SetBinErrorOption(ROOT.TH1.kPoisson)
if wantOverflow:
n_finalbin = new_hist.GetBinContent(new_hist.GetNbinsX())
n_overflow = new_hist.GetBinContent(new_hist.GetNbinsX() + 1)
new_hist.SetBinContent(new_hist.GetNbinsX(), n_finalbin + n_overflow)
err_finalbin = new_hist.GetBinError(new_hist.GetNbinsX())
err_overflow = new_hist.GetBinError(new_hist.GetNbinsX() + 1)
new_hist.SetBinError(
new_hist.GetNbinsX(),
math.sqrt(err_finalbin * err_finalbin + err_overflow * err_overflow),
)

if verbose:
for nbin in range(0, len(new_binning)):
print(
f"nbin = {nbin}, content = {new_hist.GetBinContent(nbin)}, error {new_hist.GetBinError(nbin)}"
)
fix_negative_contributions, debug_info, negative_bins_info = (
FixNegativeContributions(new_hist)
)
if not fix_negative_contributions:
print("negative contribution not fixed")
print(fix_negative_contributions, debug_info, negative_bins_info)
for nbin in range(0, new_hist.GetNbinsX() + 1):
content = new_hist.GetBinContent(nbin)
if content < 0:
print(f"for {dataset}, bin {nbin} content is < 0: {content}")

return new_hist


def findNewBins(hist_cfg_dict, var, **keys):

cfg = hist_cfg_dict.get(var, {})

if "2d" in cfg:
return cfg["2d"]

if "x_rebin" not in cfg:
if "x_bins" not in cfg:
raise RuntimeError(f'bins definition not found for "{var}"')
Expand Down Expand Up @@ -100,21 +67,6 @@ def recursive_search(d, remaining_keys):
return result


def getNewBins(bins):
if type(bins) == list:
final_bins = bins
else:
n_bins, bin_range = bins.split("|")
start, stop = bin_range.split(":")
bin_width = (float(stop) - float(start)) / int(n_bins)
final_bins = []
bin_center = float(start)
while bin_center >= float(start) and bin_center <= float(stop):
final_bins.append(bin_center)
bin_center = bin_center + bin_width
return final_bins


if __name__ == "__main__":
import argparse
import FLAF.PlotKit.Plotter as Plotter
Expand Down Expand Up @@ -265,7 +217,9 @@ def getNewBins(bins):
hist_cfg_dict[var_entry]["use_log_x"] = True

rebin_condition = args.rebin and "x_rebin" in hist_cfg_dict[var_entry].keys()
bins_to_compute = hist_cfg_dict[var_entry]["x_bins"]
bins_to_compute = (
hist_cfg_dict[var_entry]["x_bins"] if not rebin_condition else None
)

if rebin_condition:
bins_to_compute = findNewBins(
Expand Down
59 changes: 51 additions & 8 deletions Analysis/HistProducerFromNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
if __name__ == "__main__":
sys.path.append(os.environ["ANALYSIS_PATH"])

from FLAF.Common.HistHelper import *
import FLAF.Common.HistHelper as HistHelper
import FLAF.Common.Utilities as Utilities
from FLAF.Common.Setup import Setup
from FLAF.RunKit.run_tools import ps_call
Expand Down Expand Up @@ -36,7 +36,19 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
dir_ptr = Utilities.mkdir(outFile, dir_name)

merged_hist = model.GetHistogram().Clone()
for i in range(0, unit_hist.GetNbinsX() + 2):
# Yes I know this is ugly
# Axis needs +2 for under/overflow, but only if the return is not 1!!!
# Was having issue with Z axis in 2D. We don't want to multiply by 3 if it's 2D
N_xbins = unit_hist.GetNbinsX() + 2
N_ybins = unit_hist.GetNbinsY() if hasattr(unit_hist, "GetNbinsY") else 1
N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins
N_zbins = unit_hist.GetNbinsZ() if hasattr(unit_hist, "GetNbinsZ") else 1
N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins
N_bins = N_xbins * N_ybins * N_zbins
# If we use the THnD then we have 'GetNbins' function instead
N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins
Comment on lines +39 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Yes I know this is ugly
# Axis needs +2 for under/overflow, but only if the return is not 1!!!
# Was having issue with Z axis in 2D. We don't want to multiply by 3 if it's 2D
N_xbins = unit_hist.GetNbinsX() + 2
N_ybins = unit_hist.GetNbinsY() if hasattr(unit_hist, "GetNbinsY") else 1
N_ybins = N_ybins + 2 if N_ybins > 1 else N_ybins
N_zbins = unit_hist.GetNbinsZ() if hasattr(unit_hist, "GetNbinsZ") else 1
N_zbins = N_zbins + 2 if N_zbins > 1 else N_zbins
N_bins = N_xbins * N_ybins * N_zbins
# If we use the THnD then we have 'GetNbins' function instead
N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else N_bins
N_bins = unit_hist.GetNbins() if hasattr(unit_hist, "GetNbins") else unit_hist.GetNcells()

# This can be a loop over many bins, several times. Can be improved to be ran in c++ instead
for i in range(0, N_bins):
bin_content = unit_hist.GetBinContent(i)
bin_error = unit_hist.GetBinError(i)
merged_hist.SetBinContent(i, bin_content)
Expand All @@ -46,7 +58,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):
if len(hist_list) > 1:
for model, unit_hist in hist_list[1:]:
hist = model.GetHistogram()
for i in range(0, unit_hist.GetNbinsX() + 2):
for i in range(0, N_bins):
bin_content = unit_hist.GetBinContent(i)
bin_error = unit_hist.GetBinError(i)
hist.SetBinContent(i, bin_content)
Expand All @@ -61,10 +73,35 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0):


def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale):
model, unit_bin_model = GetModel(hist_cfg_dict, var, return_unit_bin_model=True)
unit_hist = rdf.Filter(filter_to_apply).Histo1D(
unit_bin_model, f"{var}_bin", weight_name
var_entry = HistHelper.findBinEntry(hist_cfg_dict, args.var)
dims = (
1
if not hist_cfg_dict[var_entry].get("var_list", False)
else len(hist_cfg_dict[var_entry]["var_list"])
)

model, unit_bin_model = HistHelper.GetModel(
hist_cfg_dict, var, dims, return_unit_bin_model=True
)
var_bin_list = (
[f"{var}_bin" for var in hist_cfg_dict[var_entry]["var_list"]]
if dims > 1
else [f"{var}_bin"]
)
if dims == 1:
unit_hist = rdf.Filter(filter_to_apply).Histo1D(
unit_bin_model, *var_bin_list, weight_name
)
elif dims == 2:
unit_hist = rdf.Filter(filter_to_apply).Histo2D(
unit_bin_model, *var_bin_list, weight_name
)
elif dims == 3:
unit_hist = rdf.Filter(filter_to_apply).Histo3D(
unit_bin_model, *var_bin_list, weight_name
)
Comment on lines +91 to +102
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if dims == 1:
unit_hist = rdf.Filter(filter_to_apply).Histo1D(
unit_bin_model, *var_bin_list, weight_name
)
elif dims == 2:
unit_hist = rdf.Filter(filter_to_apply).Histo2D(
unit_bin_model, *var_bin_list, weight_name
)
elif dims == 3:
unit_hist = rdf.Filter(filter_to_apply).Histo3D(
unit_bin_model, *var_bin_list, weight_name
)
rdf_filtered = rdf.Filter(filter_to_apply)
if dims >= 1 and dims <= 3:
mkhist_fn = getattr(rdf_filtered, f'Histo{dims}D')
unit_hist = mkhist_fn(unit_bin_model, *var_bin_list, weight_name)

else:
raise RuntimeError("Only 1D, 2D and 3D histograms are supported")
return model, unit_hist


Expand Down Expand Up @@ -174,7 +211,7 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts):
for filter_key in key_filter_dict.keys():
print(filter_key)
for further_cut_name in [None] + list(further_cuts.keys()):
model, unit_bin_model = GetModel(
model, unit_bin_model = HistHelper.GetModel(
hist_cfg_dict, var, return_unit_bin_model=True
)
nbins = unit_bin_model.fNbinsX
Expand Down Expand Up @@ -276,7 +313,13 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts):
)

variables = setup.global_params["variables"]
vars_needed = set(variables)
vars_needed = set()
for var in variables:
if isinstance(var, dict) and "vars" in var:
for v in var["vars"]:
vars_needed.add(v)
else:
vars_needed.add(var)
for further_cut_name, (vars_for_cut, _) in further_cuts.items():
for var_for_cut in vars_for_cut:
if var_for_cut:
Expand Down
16 changes: 11 additions & 5 deletions Analysis/HistTupleProducer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ def DefineBinnedColumn(hist_cfg_dict, var):
start, stop = bin_range.split(":")
axis_definition = f"static const TAxis axis({n_bins}, {start}, {stop});"

ROOT.gInterpreter.Declare(
f"""
ROOT.gInterpreter.Declare(f"""
#include "ROOT/RVec.hxx"
#include "TAxis.h"

Expand All @@ -56,8 +55,7 @@ def DefineBinnedColumn(hist_cfg_dict, var):
}}
return out;
}}
"""
)
""")


def createHistTuple(
Expand Down Expand Up @@ -128,7 +126,15 @@ def createHistTuple(
print("Scale uncertainties to consider:", scale_uncertainties)

print("Defining binnings for variables")
flatten_vars = set()
for var in variables:
if isinstance(var, dict) and "vars" in var:
for v in var["vars"]:
flatten_vars.add(v)
else:
flatten_vars.add(var)

for var in flatten_vars:
DefineBinnedColumn(hist_cfg_dict, var)

snaps = []
Expand Down Expand Up @@ -197,7 +203,7 @@ def createHistTuple(
dfw.colToSave.append(desc["weight"])

print("Defining binned columns")
for var in variables:
for var in flatten_vars:
dfw.df = dfw.df.Define(f"{var}_bin", f"get_{var}_bin({var})")
dfw.colToSave.append(f"{var}_bin")

Expand Down
Loading