diff --git a/configs/nmssm_boosted/2018/preselection_et.yaml b/configs/nmssm_boosted/2018/preselection_et.yaml
index 32140ab..c5aac8b 100644
--- a/configs/nmssm_boosted/2018/preselection_et.yaml
+++ b/configs/nmssm_boosted/2018/preselection_et.yaml
@@ -69,6 +69,94 @@ processes:
- "EGamma_Run2018C-UL2018"
- "EGamma_Run2018D-UL2018"
+column_definitions:
+ njets:
+ expression: njets_boosted
+ allow_redefine: True
+ nbtag:
+ expression: nbtag_boosted
+ allow_redefine: True
+ metphi:
+ expression: metphi_boosted
+ allow_redefine: True
+ met:
+ expression: met_boosted
+ allow_redefine: True
+ pt_1:
+ expression: boosted_pt_1
+ allow_redefine: True
+ q_1:
+ expression: boosted_q_1
+ allow_redefine: True
+ pt_2:
+ expression: boosted_pt_2
+ allow_redefine: True
+ q_2:
+ expression: boosted_q_2
+ allow_redefine: True
+ mt_1:
+ expression: boosted_mt_1
+ allow_redefine: True
+ iso_1:
+ expression: boosted_iso_1
+ allow_redefine: True
+ mass_2:
+ expression: boosted_mass_2
+ allow_redefine: True
+ tau_decaymode_2:
+ expression: boosted_tau_decaymode_2
+ allow_redefine: True
+ deltaR_ditaupair:
+ expression: boosted_deltaR_ditaupair
+ allow_redefine: True
+ m_vis:
+ expression: boosted_m_vis
+ allow_redefine: True
+ fj_Xbb_pt:
+ expression: fj_Xbb_pt_boosted
+ allow_redefine: True
+ fj_Xbb_eta:
+ expression: fj_Xbb_eta_boosted
+ allow_redefine: True
+ fj_Xbb_particleNet_XbbvsQCD:
+ expression: fj_Xbb_particleNet_XbbvsQCD_boosted
+ allow_redefine: True
+ bpair_pt_1:
+ expression: bpair_pt_1_boosted
+ allow_redefine: True
+ bpair_pt_2:
+ expression: bpair_pt_2_boosted
+ allow_redefine: True
+ bpair_btag_value_2:
+ expression: bpair_btag_value_2_boosted
+ allow_redefine: True
+ bpair_eta_2:
+ expression: bpair_eta_2_boosted
+ allow_redefine: True
+ extraelec_veto:
+ expression: extraelec_veto_boosted
+ allow_redefine: True
+ gen_match_1:
+ expression: boosted_gen_match_1
+ allow_redefine: True
+ exclude_processes:
+ - data
+ gen_match_2:
+ expression: boosted_gen_match_2
+ allow_redefine: True
+ exclude_processes:
+ - data
+ btag_weight:
+ expression: btag_weight_boosted
+ allow_redefine: True
+ exclude_processes:
+ - data
+ pNet_Xbb_weight:
+ expression: pNet_Xbb_weight_boosted
+ allow_redefine: True
+ exclude_processes:
+ - data
+
event_selection:
# lep_pt: "boosted_pt_1 > 120"
had_tau_pt: "boosted_pt_2 > 40"
@@ -130,4 +218,4 @@ output_features:
- "bpair_eta_2"
- "met"
- "mass_2"
- - "tau_decaymode_2"
\ No newline at end of file
+ - "tau_decaymode_2"
diff --git a/configs/nmssm_boosted/2018/preselection_mt.yaml b/configs/nmssm_boosted/2018/preselection_mt.yaml
index 23163f9..ffd5cef 100644
--- a/configs/nmssm_boosted/2018/preselection_mt.yaml
+++ b/configs/nmssm_boosted/2018/preselection_mt.yaml
@@ -69,6 +69,89 @@ processes:
- "SingleMuon_Run2018C-UL2018_GT36"
- "SingleMuon_Run2018D-UL2018_GT36"
+column_definitions:
+ njets:
+ expression: njets_boosted
+ allow_redefine: True
+ nbtag:
+ expression: nbtag_boosted
+ allow_redefine: True
+ metphi:
+ expression: metphi_boosted
+ allow_redefine: True
+ met:
+ expression: met_boosted
+ allow_redefine: True
+ pt_1:
+ expression: boosted_pt_1
+ allow_redefine: True
+ q_1:
+ expression: boosted_q_1
+ allow_redefine: True
+ pt_2:
+ expression: boosted_pt_2
+ allow_redefine: True
+ q_2:
+ expression: boosted_q_2
+ allow_redefine: True
+ mt_1:
+ expression: boosted_mt_1
+ allow_redefine: True
+ iso_1:
+ expression: boosted_iso_1
+ allow_redefine: True
+ mass_2:
+ expression: boosted_mass_2
+ allow_redefine: True
+ tau_decaymode_2:
+ expression: boosted_tau_decaymode_2
+ allow_redefine: True
+ deltaR_ditaupair:
+ expression: boosted_deltaR_ditaupair
+ allow_redefine: True
+ m_vis:
+ expression: boosted_m_vis
+ allow_redefine: True
+ fj_Xbb_pt:
+ expression: fj_Xbb_pt_boosted
+ allow_redefine: True
+ fj_Xbb_eta:
+ expression: fj_Xbb_eta_boosted
+ allow_redefine: True
+ fj_Xbb_particleNet_XbbvsQCD:
+ expression: fj_Xbb_particleNet_XbbvsQCD_boosted
+ allow_redefine: True
+ bpair_pt_1:
+ expression: bpair_pt_1_boosted
+ allow_redefine: True
+ bpair_pt_2:
+ expression: bpair_pt_2_boosted
+ allow_redefine: True
+ bpair_btag_value_2:
+ expression: bpair_btag_value_2_boosted
+ allow_redefine: True
+ bpair_eta_2:
+ expression: bpair_eta_2_boosted
+ allow_redefine: True
+ extramuon_veto:
+ expression: extramuon_veto_boosted
+ allow_redefine: True
+ gen_match_2:
+ expression: boosted_gen_match_2
+ allow_redefine: True
+ exclude_processes:
+ - data
+ btag_weight:
+ expression: btag_weight_boosted
+ allow_redefine: True
+ exclude_processes:
+ - data
+ pNet_Xbb_weight:
+ expression: pNet_Xbb_weight_boosted
+ allow_redefine: True
+ exclude_processes:
+ - data
+
event_selection:
# lep_pt: "boosted_pt_1 > 55"
had_tau_pt: "boosted_pt_2 > 40"
@@ -130,4 +213,4 @@ output_features:
- "bpair_eta_2"
- "met"
- "mass_2"
- - "tau_decaymode_2"
\ No newline at end of file
+ - "tau_decaymode_2"
diff --git a/configs/nmssm_boosted/2018/preselection_tt.yaml b/configs/nmssm_boosted/2018/preselection_tt.yaml
index 2322fe7..ad16246 100644
--- a/configs/nmssm_boosted/2018/preselection_tt.yaml
+++ b/configs/nmssm_boosted/2018/preselection_tt.yaml
@@ -69,6 +69,95 @@ processes:
- "JetHT_Run2018C-UL2018"
- "JetHT_Run2018D-UL2018"
+column_definitions:
+ njets:
+ expression: njets_boosted
+ allow_redefine: True
+ nbtag:
+ expression: nbtag_boosted
+ allow_redefine: True
+ metphi:
+ expression: metphi_boosted
+ allow_redefine: True
+ met:
+ expression: met_boosted
+ allow_redefine: True
+ pt_1:
+ expression: boosted_pt_1
+ allow_redefine: True
+ q_1:
+ expression: boosted_q_1
+ allow_redefine: True
+ pt_2:
+ expression: boosted_pt_2
+ allow_redefine: True
+ q_2:
+ expression: boosted_q_2
+ allow_redefine: True
+ mt_1:
+ expression: boosted_mt_1
+ allow_redefine: True
+ iso_1:
+ expression: boosted_iso_1
+ allow_redefine: True
+ mass_1:
+ expression: boosted_mass_1
+ allow_redefine: True
+ mass_2:
+ expression: boosted_mass_2
+ allow_redefine: True
+ tau_decaymode_1:
+ expression: boosted_tau_decaymode_1
+ allow_redefine: True
+ tau_decaymode_2:
+ expression: boosted_tau_decaymode_2
+ allow_redefine: True
+ deltaR_ditaupair:
+ expression: boosted_deltaR_ditaupair
+ allow_redefine: True
+ m_vis:
+ expression: boosted_m_vis
+ allow_redefine: True
+ fj_Xbb_pt:
+ expression: fj_Xbb_pt_boosted
+ allow_redefine: True
+ fj_Xbb_eta:
+ expression: fj_Xbb_eta_boosted
+ allow_redefine: True
+ fj_Xbb_particleNet_XbbvsQCD:
+ expression: fj_Xbb_particleNet_XbbvsQCD_boosted
+ allow_redefine: True
+ bpair_pt_1:
+ expression: bpair_pt_1_boosted
+ allow_redefine: True
+ bpair_pt_2:
+ expression: bpair_pt_2_boosted
+ allow_redefine: True
+ bpair_btag_value_2:
+ expression: bpair_btag_value_2_boosted
+ allow_redefine: True
+ bpair_eta_2:
+ expression: bpair_eta_2_boosted
+ allow_redefine: True
+ extramuon_veto:
+ expression: extramuon_veto_boosted
+ allow_redefine: True
+ gen_match_2:
+ expression: boosted_gen_match_2
+ allow_redefine: True
+ exclude_processes:
+ - data
+ btag_weight:
+ expression: btag_weight_boosted
+ allow_redefine: True
+ exclude_processes:
+ - data
+ pNet_Xbb_weight:
+ expression: pNet_Xbb_weight_boosted
+ allow_redefine: True
+ exclude_processes:
+ - data
+
event_selection:
# met: "(met_boosted > 120)"
had_tau_pt: "(boosted_pt_1 > 40) && (boosted_pt_2 > 40)"
@@ -132,4 +221,4 @@ output_features:
- "mass_1"
- "tau_decaymode_1"
- "mass_2"
- - "tau_decaymode_2"
\ No newline at end of file
+ - "tau_decaymode_2"
diff --git a/docs/preselection.md b/docs/preselection.md
index bbe1478..e16ca7f 100644
--- a/docs/preselection.md
+++ b/docs/preselection.md
@@ -8,6 +8,7 @@ The preselection config has the following parameters:
---|---|---
`channel` | `string` | tau pair decay channels ("et", "mt", "tt")
`processes` | `dict` | process parameters are explained below
+ `column_definitions` | `dict` | in this section, new columns can be defined based on a given `ROOT` expression.
The keys of the dictionary correspond to the name of the defined column. The values are dictionaries itself, with the `expression` key defining the `ROOT` expression for defining the column. Optional entries `processes` and `exclude_processes` allow to target specific processes, the entry `allow_redefine` can be used to enable the use of the `ROOT.RDataFrame.Redefine` function for overwriting already existing columns. For a more detailed description, see below.
`event_selection` | `dict` | with this parameter all selections that should be applied are defined.
This is basically a dictionary of cuts where the key is the name of a cut and the value is the cut itself as a string e.g. `had_tau_pt: "pt_2 > 30"`. The name of a cut is not really important, it is only used as an output information in the terminal. A cut can only use variables which are in the ntuples.
`mc_weights` | `dict` | weight parameter are defined below
`emb_weights` | `dict` | all weights that should be applied for embedded samples are defined.
Like for `event_selection` a weight can directly be specified and is then applied to all samples the same way e.g. `single_trigger: "trg_wgt_single_mu24ormu27"`
@@ -31,6 +32,42 @@ The `tau_gen_modes` have following modes:
`L` | `string` | lepton misidentified as a tau
`all` | `string` | if no split should be performed
+In `column_definitions`, new columns can be added to the output `ntuples` by
+using `ROOT` expression. An example entry could look like this:
+
+```yaml
+column_definitions:
+ nbtag:
+ expression: n_bjets
+ processes:
+ - ttbar
+ - DY
+ btag_weight:
+ expression: id_wgt_bjet_pnet_shape
+ exclude_processes:
+ - data
+ allow_redefine: True
+ jj_deltaR:
+ expression: ROOT::VecOps::DeltaR(jeta_1, jeta_2, jphi_1, jphi_2)
+```
+
+The key `expression` is required and can contain any valid `ROOT` expression.
+
+The entry `exclude_processes` is optional. Column definitions are performed for
+all processes except the ones given in this list. The entry `processes` is also
+optional. The column definition is performed only for processes in this list.
+The lists `processes` and `exclude_processes` can contain the names from the
+`processes` section of this configuration. By default, the new columns are
+defined for all processes. To write the new columns to the output file, you have
+to explicitly add the columns to the `output_features` list. Note that you can
+only set `processes` or `exclude_processes` for a column, but not both at the
+same time.
+
+If the key `allow_redefine` is set to `True`, the `ROOT.RDataFrame.Redefine`
+function is used if a column with the same name has been found in the
+`RDataFrame`. The values in this column are then overwritten by the expression
+given for the new column.
+
In `mc_weights` all weights that should be applied for simulated samples are defined.
There are two types of weights.
@@ -53,4 +90,4 @@ python preselection.py --config-file configs/PATH/CONFIG.yaml
Further there are additional optional parameters:
1. `--nthreads=SOME_INTEGER` to define the number of threads for the multiprocessing pool to run the sample processing in parallel. Default value is 8 (this should normally cover running all of the samples in parallel).
-2. `--ncores=SOME_INTEGER` to define the number of cores that should be used for each pool thread to speed up the ROOT dataframe calculation. Default value is 2.
\ No newline at end of file
+2. `--ncores=SOME_INTEGER` to define the number of cores that should be used for each pool thread to speed up the ROOT dataframe calculation. Default value is 2.
diff --git a/helper/functions.py b/helper/functions.py
index 3709073..baa8bd5 100644
--- a/helper/functions.py
+++ b/helper/functions.py
@@ -22,6 +22,9 @@
from XRootD import client
+TAU_FAKE_FACTORS_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
class CachingKeyHelper:
@staticmethod
def make_hashable(obj: Union[Dict, List, Tuple, Any]) -> Union[Dict, Tuple, bytes, Any]:
@@ -411,7 +414,21 @@ def load_config(config_file: str) -> Dict:
else:
print("No common config file found!")
- config = {}
+ # Container of the loaded configuration
+ #
+ # Some default values are pre-defined in the config dict that is going to contain the loaded
+ # configuration. These values are overwritten if they are explicitly set in the common config file.
+ #
+ # The variables, for which defaults are set, are:
+ #
+ # - 'sample_database`: Path to the sample database directory. Usually, this path is set to the
+ # `datasets` submodule of the `TauFakeFactors` module. Users can set a custom path, e.g.,
+ # to an external path to a working version of their sample database.
+ config = {
+ "sample_database": os.path.join(TAU_FAKE_FACTORS_DIR, "datasets"),
+ }
+
+ # Update the config with common settings, applying to all steps
with open(common_config_file, "r") as file:
config.update(configured_yaml.load(file))
@@ -623,74 +640,75 @@ def get_output_name(
return os.path.join(path, f"{process}{tau_gen_mode}.root")
-def rename_boosted_variables(rdf: Any, channel: str) -> Any:
+def define_columns(rdf: Any, column_definitions: dict, process: str) -> Any:
"""
- Function to redefine variables to the boosted tau pair information. Redefining only variables
- which are written out for the fake factor measurement. Due to the hardcoded naming and redifinitions
- this function needs to be adjusted if something changes in the list of output variables.
+ Customizer function to define additional columns in the ntuples.
+
+ The `column_definitions` dictionary is usually provided with the preselection configuration
+ file. The keys of the dictionary correspond to the columns to be created. The values are
+ dictionaries which contain the information for the column information. The keys of these inner
+ dictionaries have the following meaning:
+
+ - `expression`: The expression string which is used to define the new column.
+
+ - `processes` (_optional_): An exclusive list of process names for which the definition should be
+ performed. For all processes, that are not part of the list, the column definition is not
+ performed. If this entry is set, `processes` cannot be part of `column_definitions`.
+
+ - `exclude_processes` (_optional_): A list of process names for which the definition should be
+ skipped. If `process` is in this list, the column definition is not processed. If this entry
+ is set, `exclude_processes` cannot be part of `column_definitions`.
+
+ - `allow_redefine` (_optional_): If this flag is set to `True`, the `Redefine` method is used
+ to overwrite the value of an already existing column with the same name. Default: `False`.
+
+ The new column names must not exist in the ntuples, except for the case that `allow_redefine`
+ is set to true. Otherwise an error is raised.
Args:
rdf: root DataFrame
- channel: Analysis channel of the tau analysis e.g. "et", "mt" or "tt"
+ column_definitions: Dictionary mapping new column names (keys) to expressions (values)
+ process: Name of the current process
Return:
root DataFrame with redefined variables
"""
- rdf = rdf.Redefine("njets", "njets_boosted")
- rdf = rdf.Redefine("nbtag", "nbtag_boosted")
- rdf = rdf.Redefine("metphi", "metphi_boosted")
- rdf = rdf.Redefine("met", "met_boosted")
- rdf = rdf.Redefine("pt_1", "boosted_pt_1")
- rdf = rdf.Redefine("q_1", "boosted_q_1")
- rdf = rdf.Redefine("pt_2", "boosted_pt_2")
- rdf = rdf.Redefine("q_2", "boosted_q_2")
- rdf = rdf.Redefine("mt_1", "boosted_mt_1")
- rdf = rdf.Redefine("iso_1", "boosted_iso_1")
- rdf = rdf.Redefine("mass_2", "boosted_mass_2")
- rdf = rdf.Redefine("tau_decaymode_2", "boosted_tau_decaymode_2")
- rdf = rdf.Redefine("deltaR_ditaupair", "boosted_deltaR_ditaupair")
- rdf = rdf.Redefine("m_vis", "boosted_m_vis")
- rdf = rdf.Redefine("fj_Xbb_pt", "fj_Xbb_pt_boosted")
- rdf = rdf.Redefine("fj_Xbb_eta", "fj_Xbb_eta_boosted")
- rdf = rdf.Redefine(
- "fj_Xbb_particleNet_XbbvsQCD", "fj_Xbb_particleNet_XbbvsQCD_boosted"
- )
- rdf = rdf.Redefine("bpair_pt_1", "bpair_pt_1_boosted")
- rdf = rdf.Redefine("bpair_pt_2", "bpair_pt_2_boosted")
- rdf = rdf.Redefine("bpair_btag_value_2", "bpair_btag_value_2_boosted")
- rdf = rdf.Redefine("bpair_eta_2", "bpair_eta_2_boosted")
- if "boosted_gen_match_2" in rdf.GetColumnNames():
- rdf = rdf.Redefine("gen_match_2", "boosted_gen_match_2")
- else:
- rdf = rdf.Define("boosted_gen_match_2", "-1.")
- rdf = rdf.Redefine("gen_match_2", "boosted_gen_match_2")
-
- if "btag_weight_boosted" in rdf.GetColumnNames():
- rdf = rdf.Redefine("btag_weight", "btag_weight_boosted")
- else:
- rdf = rdf.Define("btag_weight_boosted", "1.")
- rdf = rdf.Redefine("btag_weight", "btag_weight_boosted")
+ # Ensure that the new column names are not already present in the ntuple
+ rdf_columns = set(rdf.GetColumnNames())
+ new_columns = set(k for k, v in column_definitions.items() if not v.get("allow_redefine", False))
+ intersection = rdf_columns.intersection(new_columns)
+ if intersection:
+ raise ValueError(
+ f"The following new column names already exist in the ntuple and allow_redefine is not set: {intersection}"
+ )
- if "pNet_Xbb_weight_boosted" in rdf.GetColumnNames():
- rdf = rdf.Redefine("pNet_Xbb_weight", "pNet_Xbb_weight_boosted")
- else:
- rdf = rdf.Define("pNet_Xbb_weight_boosted", "1.")
- rdf = rdf.Redefine("pNet_Xbb_weight", "pNet_Xbb_weight_boosted")
-
- if channel == "tt":
- rdf = rdf.Redefine("mass_1", "boosted_mass_1")
- rdf = rdf.Redefine("tau_decaymode_1", "boosted_tau_decaymode_1")
- if "boosted_gen_match_1" in rdf.GetColumnNames():
- rdf = rdf.Redefine("gen_match_1", "boosted_gen_match_1")
- else:
- rdf = rdf.Define("boosted_gen_match_1", "-1.")
- rdf = rdf.Redefine("gen_match_1", "boosted_gen_match_1")
+ # Perform the define declarations on the RDataFrame object
+ for new_column, define_dict in column_definitions.items():
+ # Check that processes and exclude_processes are not set at the same time
+ if "processes" in define_dict and "exclude_processes" in define_dict:
+ raise ValueError(
+ f"Both processes and exclude_processes have been specified for column {new_column}. You can only set one of them for the same entry."
+ )
- if channel == "et":
- rdf = rdf.Redefine("extraelec_veto", "boosted_extraelec_veto")
- if channel == "mt":
- rdf = rdf.Redefine("extramuon_veto", "boosted_extramuon_veto")
+ # Check if the process should be skipped
+ if "processes" in define_dict and process not in define_dict["processes"]:
+ continue
+ if "exclude_processes" in define_dict and process in define_dict["exclude_processes"]:
+ continue
+
+ # Get the ROOT expression for defining the new column
+ expression = define_dict["expression"]
+
+ # Use
+ # - `Redefine` if allow_redefine is `True` and the column is already present in the RDataFrame
+ # - `Define` in all other cases
+ rdf_define_call = (
+ rdf.Redefine
+ if new_column in rdf_columns and allow_redefine
+ else rdf.Define
+ )
+ rdf = rdf_define_call(new_column, expression)
return rdf
diff --git a/helper/weights.py b/helper/weights.py
index 7472cd3..5c61f0a 100644
--- a/helper/weights.py
+++ b/helper/weights.py
@@ -94,6 +94,14 @@ def lumi_weight(rdf: Any, era: str) -> Any:
rdf = rdf.Redefine("weight", "weight * 41.48 * 1000.")
elif era == "2018":
rdf = rdf.Redefine("weight", "weight * 59.83 * 1000.")
+ elif era == "2022preEE":
+ rdf = rdf.Redefine("weight", "weight * 7.9804 * 1000.")
+ elif era == "2022postEE":
+ rdf = rdf.Redefine("weight", "weight * 26.6717 * 1000.")
+ elif era == "2023preBPix":
+ rdf = rdf.Redefine("weight", "weight * 18.063 * 1000.")
+ elif era == "2023postBPix":
+ rdf = rdf.Redefine("weight", "weight * 9.693 * 1000.")
else:
raise ValueError(f"Weight calc: lumi: Era is not defined: {era}")
diff --git a/preselection.py b/preselection.py
index ce8be61..878dc35 100644
--- a/preselection.py
+++ b/preselection.py
@@ -80,11 +80,23 @@ def run_sample_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]],
log.info(f"WARNING: Sample {sample} is empty. Skipping...")
return ()
+ # get column definitions from config and declare definitions on the RDataFrame
+ column_definitions = config.get("column_definitions", {})
+ if column_definitions:
+ rdf = func.define_columns(rdf, column_definitions, process)
+
# apply analysis specific event filters
selection_conf = config["event_selection"]
for cut in selection_conf:
rdf = rdf.Filter(f"({selection_conf[cut]})", f"cut on {cut}")
+ # For Run 3 DY samples, we need to collect the events from two samples, that need to be selected
+ # for different flavors
+ if sample.startswith("DYto2L"):
+ rdf = rdf.Filter("lhe_drell_yan_decay_flavor == 11 || lhe_drell_yan_decay_flavor == 13", "DY e/mu selection")
+ if sample.startswith("DYto2Tau"):
+ rdf = rdf.Filter("lhe_drell_yan_decay_flavor == 15", "DY tau selection")
+
if process == "embedding":
rdf = filters.emb_tau_gen_match(rdf=rdf, channel=config["channel"])
@@ -122,6 +134,15 @@ def run_sample_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]],
rdf = rdf.Redefine(
"weight", f"weight * ({mc_weight_conf[weight]})"
)
+ elif weight == "ttbar_norm_weight":
+ if process == "ttbar" and tau_gen_mode in ["L", "J", "T"]:
+ # This function applies an additional normalization weight to tt backgrounds
+ # obtained from simulation. The factor corrects for a mismodelling of the
+ # normalization of tt compared to data and is extracted in an e mu control
+ # region.
+ rdf = rdf.Redefine(
+ "weight", f"weight * ({mc_weight_conf[weight]})"
+ )
else:
rdf = rdf.Redefine("weight", f"weight * ({mc_weight_conf[weight]})")
@@ -260,22 +281,28 @@ def run_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]], str, in
# loading of the chosen config file
config = func.load_config(args.config_file)
- # loading general dataset info file for xsec and event number
- with open(f"datasets/{config['nanoAOD_version']}/datasets.json", "r") as file:
- datasets = json.load(file)
-
# define output path for the preselected samples
output_path = os.path.join(
config["output_path"], "preselection", config["era"], config["channel"]
)
func.check_path(path=output_path)
+ # Set up logger and retrieve logger instance for main routine
func.setup_logger(
log_file=output_path + "/preselection.log",
log_name="preselection",
log_level=logging.INFO,
subcategories=config["processes"],
)
+ log = logging.getLogger("preselection.main")
+
+ # Load general dataset info file for xsec and event number
+ datasets_file = os.path.join(
+ config["sample_database"], config["nanoAOD_version"], "datasets.json"
+ )
+ with open(datasets_file, "r") as file:
+ datasets = json.load(file)
+ log.info(f"Loading sample database from {datasets_file}")
# get needed features for fake factor calculation
output_features = config["output_features"]
diff --git a/preselection_boosted.py b/preselection_boosted.py
index 6b38216..9a10045 100644
--- a/preselection_boosted.py
+++ b/preselection_boosted.py
@@ -164,11 +164,6 @@ def run_sample_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]],
log.debug(out.getvalue())
log.debug("-" * 50)
- # WARNING: cross check this function is something changes in the list of output features
- tmp_rdf = func.rename_boosted_variables(
- rdf=tmp_rdf, channel=config["channel"]
- )
-
tmp_file_name = func.get_output_name(
path=output_path, process=sample, tau_gen_mode=tau_gen_mode
)
@@ -265,22 +260,28 @@ def run_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]], str, in
# loading of the chosen config file
config = func.load_config(args.config_file)
- # loading general dataset info file for xsec and event number
- with open(f"datasets/{config['nanoAOD_version']}/datasets.json", "r") as file:
- datasets = json.load(file)
-
# define output path for the preselected samples
output_path = os.path.join(
config["output_path"], "preselection", config["era"], config["channel"]
)
func.check_path(path=output_path)
+ # Set up logger and retrieve logger instance for main routine
func.setup_logger(
log_file=output_path + "/preselection.log",
log_name="preselection",
log_level=logging.INFO,
subcategories=config["processes"],
)
+ log = logging.getLogger("preselection.main")
+
+ # Load general dataset info file for xsec and event number
+ datasets_file = os.path.join(
+ config["sample_database"], config["nanoAOD_version"], "datasets.json"
+ )
+ with open(datasets_file, "r") as file:
+ datasets = json.load(file)
+ log.info(f"Loading sample database from {datasets_file}")
# get needed features for fake factor calculation
output_features = config["output_features"]