diff --git a/configs/nmssm_boosted/2018/preselection_et.yaml b/configs/nmssm_boosted/2018/preselection_et.yaml index 32140ab..c5aac8b 100644 --- a/configs/nmssm_boosted/2018/preselection_et.yaml +++ b/configs/nmssm_boosted/2018/preselection_et.yaml @@ -69,6 +69,94 @@ processes: - "EGamma_Run2018C-UL2018" - "EGamma_Run2018D-UL2018" +column_definitions: + njets: + expression: njets_boosted + allow_redefine: True + nbtag: + expression: nbtag_boosted + allow_redefine: True + metphi: + expression: metphi_boosted + allow_redefine: True + met: + expression: met_boosted + allow_redefine: True + pt_1: + expression: boosted_pt_1 + allow_redefine: True + q_1: + expression: boosted_q_1 + allow_redefine: True + pt_2: + expression: boosted_pt_2 + allow_redefine: True + q_2: + expression: boosted_q_2 + allow_redefine: True + mt_1: + expression: boosted_mt_1 + allow_redefine: True + iso_1: + expression: boosted_iso_1 + allow_redefine: True + mass_2: + expression: boosted_mass_2 + allow_redefine: True + tau_decaymode_2: + expression: boosted_tau_decaymode_2 + allow_redefine: True + deltaR_ditaupair: + expression: boosted_deltaR_ditaupair + allow_redefine: True + m_vis: + expression: boosted_m_vis + allow_redefine: True + fj_Xbb_pt: + expression: fj_Xbb_pt_boosted + allow_redefine: True + fj_Xbb_eta: + expression: fj_Xbb_eta_boosted + allow_redefine: True + fj_Xbb_particleNet_XbbvsQCD: + expression: fj_Xbb_particleNet_XbbvsQCD_boosted + allow_redefine: True + bpair_pt_1: + expression: bpair_pt_1_boosted + allow_redefine: True + bpair_pt_2: + expression: bpair_pt_2_boosted + allow_redefine: True + bpair_btag_value_2: + expression: bpair_btag_value_2_boosted + allow_redefine: True + bpair_eta_2: + expression: bpair_eta_2_boosted + allow_redefine: True + extraelec_veto: + expression: extraelec_veto_boosted + allow_redefine: True + gen_match_1: + expression: boosted_gen_match_1 + allow_redefine: True + exclude_processes: + - data + gen_match_2: + expression: boosted_gen_match_2 + allow_redefine: True + exclude_processes: + - data + btag_weight: + expression: btag_weight_boosted + allow_redefine: True + exclude_processes: + - data + pNet_Xbb_weight: + expression: pNet_Xbb_weight_boosted + allow_redefine: True + exclude_processes: + - data + event_selection: # lep_pt: "boosted_pt_1 > 120" had_tau_pt: "boosted_pt_2 > 40" @@ -130,4 +218,4 @@ output_features: - "bpair_eta_2" - "met" - "mass_2" - - "tau_decaymode_2" \ No newline at end of file + - "tau_decaymode_2" diff --git a/configs/nmssm_boosted/2018/preselection_mt.yaml b/configs/nmssm_boosted/2018/preselection_mt.yaml index 23163f9..ffd5cef 100644 --- a/configs/nmssm_boosted/2018/preselection_mt.yaml +++ b/configs/nmssm_boosted/2018/preselection_mt.yaml @@ -69,6 +69,89 @@ processes: - "SingleMuon_Run2018C-UL2018_GT36" - "SingleMuon_Run2018D-UL2018_GT36" +column_definitions: + njets: + expression: njets_boosted + allow_redefine: True + nbtag: + expression: nbtag_boosted + allow_redefine: True + metphi: + expression: metphi_boosted + allow_redefine: True + met: + expression: met_boosted + allow_redefine: True + pt_1: + expression: boosted_pt_1 + allow_redefine: True + q_1: + expression: boosted_q_1 + allow_redefine: True + pt_2: + expression: boosted_pt_2 + allow_redefine: True + q_2: + expression: boosted_q_2 + allow_redefine: True + mt_1: + expression: boosted_mt_1 + allow_redefine: True + iso_1: + expression: boosted_iso_1 + allow_redefine: True + mass_2: + expression: boosted_mass_2 + allow_redefine: True + tau_decaymode_2: + expression: boosted_tau_decaymode_2 + allow_redefine: True + deltaR_ditaupair: + expression: boosted_deltaR_ditaupair + allow_redefine: True + m_vis: + expression: boosted_m_vis + allow_redefine: True + fj_Xbb_pt: + expression: fj_Xbb_pt_boosted + allow_redefine: True + fj_Xbb_eta: + expression: fj_Xbb_eta_boosted + allow_redefine: True + fj_Xbb_particleNet_XbbvsQCD: + expression: fj_Xbb_particleNet_XbbvsQCD_boosted + allow_redefine: True + bpair_pt_1: + expression: bpair_pt_1_boosted + allow_redefine: True + bpair_pt_2: + expression: bpair_pt_2_boosted + allow_redefine: True + bpair_btag_value_2: + expression: bpair_btag_value_2_boosted + allow_redefine: True + bpair_eta_2: + expression: bpair_eta_2_boosted + allow_redefine: True + extramuon_veto: + expression: extramuon_veto_boosted + allow_redefine: True + gen_match_2: + expression: boosted_gen_match_2 + allow_redefine: True + exclude_processes: + - data + btag_weight: + expression: btag_weight_boosted + allow_redefine: True + exclude_processes: + - data + pNet_Xbb_weight: + expression: pNet_Xbb_weight_boosted + allow_redefine: True + exclude_processes: + - data + event_selection: # lep_pt: "boosted_pt_1 > 55" had_tau_pt: "boosted_pt_2 > 40" @@ -130,4 +213,4 @@ output_features: - "bpair_eta_2" - "met" - "mass_2" - - "tau_decaymode_2" \ No newline at end of file + - "tau_decaymode_2" diff --git a/configs/nmssm_boosted/2018/preselection_tt.yaml b/configs/nmssm_boosted/2018/preselection_tt.yaml index 2322fe7..ad16246 100644 --- a/configs/nmssm_boosted/2018/preselection_tt.yaml +++ b/configs/nmssm_boosted/2018/preselection_tt.yaml @@ -69,6 +69,95 @@ processes: - "JetHT_Run2018C-UL2018" - "JetHT_Run2018D-UL2018" +column_definitions: + njets: + expression: njets_boosted + allow_redefine: True + nbtag: + expression: nbtag_boosted + allow_redefine: True + metphi: + expression: metphi_boosted + allow_redefine: True + met: + expression: met_boosted + allow_redefine: True + pt_1: + expression: boosted_pt_1 + allow_redefine: True + q_1: + expression: boosted_q_1 + allow_redefine: True + pt_2: + expression: boosted_pt_2 + allow_redefine: True + q_2: + expression: boosted_q_2 + allow_redefine: True + mt_1: + expression: boosted_mt_1 + allow_redefine: True + iso_1: + expression: boosted_iso_1 + allow_redefine: True + mass_1: + expression: boosted_mass_1 + allow_redefine: True + mass_2: + expression: boosted_mass_2 + allow_redefine: True + tau_decaymode_1: + expression: boosted_tau_decaymode_1 + allow_redefine: True + tau_decaymode_2: + expression: boosted_tau_decaymode_2 + allow_redefine: True + deltaR_ditaupair: + expression: boosted_deltaR_ditaupair + allow_redefine: True + m_vis: + expression: boosted_m_vis + allow_redefine: True + fj_Xbb_pt: + expression: fj_Xbb_pt_boosted + allow_redefine: True + fj_Xbb_eta: + expression: fj_Xbb_eta_boosted + allow_redefine: True + fj_Xbb_particleNet_XbbvsQCD: + expression: fj_Xbb_particleNet_XbbvsQCD_boosted + allow_redefine: True + bpair_pt_1: + expression: bpair_pt_1_boosted + allow_redefine: True + bpair_pt_2: + expression: bpair_pt_2_boosted + allow_redefine: True + bpair_btag_value_2: + expression: bpair_btag_value_2_boosted + allow_redefine: True + bpair_eta_2: + expression: bpair_eta_2_boosted + allow_redefine: True + extramuon_veto: + expression: extramuon_veto_boosted + allow_redefine: True + gen_match_2: + expression: boosted_gen_match_2 + allow_redefine: True + exclude_processes: + - data + btag_weight: + expression: btag_weight_boosted + allow_redefine: True + exclude_processes: + - data + pNet_Xbb_weight: + expression: pNet_Xbb_weight_boosted + allow_redefine: True + exclude_processes: + - data + event_selection: # met: "(met_boosted > 120)" had_tau_pt: "(boosted_pt_1 > 40) && (boosted_pt_2 > 40)" @@ -132,4 +221,4 @@ output_features: - "mass_1" - "tau_decaymode_1" - "mass_2" - - "tau_decaymode_2" \ No newline at end of file + - "tau_decaymode_2" diff --git a/docs/preselection.md b/docs/preselection.md index bbe1478..e16ca7f 100644 --- a/docs/preselection.md +++ b/docs/preselection.md @@ -8,6 +8,7 @@ The preselection config has the following parameters: ---|---|--- `channel` | `string` | tau pair decay channels ("et", "mt", "tt") `processes` | `dict` | process parameters are explained below + `column_definitions` | `dict` | in this section, new columns can be defined based on a given `ROOT` expression.
The keys of the dictionary correspond to the name of the defined column. The values are dictionaries itself, with the `expression` key defining the `ROOT` expression for defining the column. Optional entries `processes` and `exclude_processes` allow to target specific processes, the entry `allow_redefine` can be used to enable the use of the `ROOT.RDataFrame.Redefine` function for overwriting already existing columns. For a more detailed description, see below. `event_selection` | `dict` | with this parameter all selections that should be applied are defined.
This is basically a dictionary of cuts where the key is the name of a cut and the value is the cut itself as a string e.g. `had_tau_pt: "pt_2 > 30"`. The name of a cut is not really important, it is only used as an output information in the terminal. A cut can only use variables which are in the ntuples. `mc_weights` | `dict` | weight parameter are defined below `emb_weights` | `dict` | all weights that should be applied for embedded samples are defined.
Like for `event_selection` a weight can directly be specified and is then applied to all samples the same way e.g. `single_trigger: "trg_wgt_single_mu24ormu27"` @@ -31,6 +32,42 @@ The `tau_gen_modes` have following modes: `L` | `string` | lepton misidentified as a tau `all` | `string` | if no split should be performed +In `column_definitions`, new columns can be added to the output `ntuples` by +using `ROOT` expression. An example entry could look like this: + +```yaml +column_definitions: + nbtag: + expression: n_bjets + processes: + - ttbar + - DY + btag_weight: + expression: id_wgt_bjet_pnet_shape + exclude_processes: + - data + allow_redefine: True + jj_deltaR: + expression: ROOT::VecOps::DeltaR(jeta_1, jeta_2, jphi_1, jphi_2) +``` + +The key `expression` is required and can contain any valid `ROOT` expression. + +The entry `exclude_processes` is optional. Column definitions are performed for +all processes except the ones given in this list. The entry `processes` is also +optional. The column definition is performed only for processes in this list. +The lists `processes` and `exclude_processes` can contain the names from the +`processes` section of this configuration. By default, the new columns are +defined for all processes. To write the new columns to the output file, you have +to explicitly add the columns to the `output_features` list. Note that you can +only set `processes` or `exclude_processes` for a column, but not both at the +same time. + +If the key `allow_redefine` is set to `True`, the `ROOT.RDataFrame.Redefine` +function is used if a column with the same name has been found in the +`RDataFrame`. The values in this column are then overwritten by the expression +given for the new column. + In `mc_weights` all weights that should be applied for simulated samples are defined.
There are two types of weights. @@ -53,4 +90,4 @@ python preselection.py --config-file configs/PATH/CONFIG.yaml Further there are additional optional parameters: 1. `--nthreads=SOME_INTEGER` to define the number of threads for the multiprocessing pool to run the sample processing in parallel. Default value is 8 (this should normally cover running all of the samples in parallel). -2. `--ncores=SOME_INTEGER` to define the number of cores that should be used for each pool thread to speed up the ROOT dataframe calculation. Default value is 2. \ No newline at end of file +2. `--ncores=SOME_INTEGER` to define the number of cores that should be used for each pool thread to speed up the ROOT dataframe calculation. Default value is 2. diff --git a/helper/functions.py b/helper/functions.py index 3709073..baa8bd5 100644 --- a/helper/functions.py +++ b/helper/functions.py @@ -22,6 +22,9 @@ from XRootD import client +TAU_FAKE_FACTORS_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + class CachingKeyHelper: @staticmethod def make_hashable(obj: Union[Dict, List, Tuple, Any]) -> Union[Dict, Tuple, bytes, Any]: @@ -411,7 +414,21 @@ def load_config(config_file: str) -> Dict: else: print("No common config file found!") - config = {} + # Container of the loaded configuration + # + # Some default values are pre-defined in the config dict that is going to contain the loaded + # configuration. These values are overwritten if they are explicitly set in the common config file. + # + # The variables, for which defaults are set, are: + # + # - 'sample_database`: Path to the sample database directory. Usually, this path is set to the + # `datasets` submodule of the `TauFakeFactors` module. Users can set a custom path, e.g., + # to an external path to a working version of their sample database. + config = { + "sample_database": os.path.join(TAU_FAKE_FACTORS_DIR, "datasets"), + } + + # Update the config with common settings, applying to all steps with open(common_config_file, "r") as file: config.update(configured_yaml.load(file)) @@ -623,74 +640,75 @@ def get_output_name( return os.path.join(path, f"{process}{tau_gen_mode}.root") -def rename_boosted_variables(rdf: Any, channel: str) -> Any: +def define_columns(rdf: Any, column_definitions: dict, process: str) -> Any: """ - Function to redefine variables to the boosted tau pair information. Redefining only variables - which are written out for the fake factor measurement. Due to the hardcoded naming and redifinitions - this function needs to be adjusted if something changes in the list of output variables. + Customizer function to define additional columns in the ntuples. + + The `column_definitions` dictionary is usually provided with the preselection configuration + file. The keys of the dictionary correspond to the columns to be created. The values are + dictionaries which contain the information for the column information. The keys of these inner + dictionaries have the following meaning: + + - `expression`: The expression string which is used to define the new column. + + - `processes` (_optional_): An exclusive list of process names for which the definition should be + performed. For all processes, that are not part of the list, the column definition is not + performed. If this entry is set, `processes` cannot be part of `column_definitions`. + + - `exclude_processes` (_optional_): A list of process names for which the definition should be + skipped. If `process` is in this list, the column definition is not processed. If this entry + is set, `exclude_processes` cannot be part of `column_definitions`. + + - `allow_redefine` (_optional_): If this flag is set to `True`, the `Redefine` method is used + to overwrite the value of an already existing column with the same name. Default: `False`. + + The new column names must not exist in the ntuples, except for the case that `allow_redefine` + is set to true. Otherwise an error is raised. Args: rdf: root DataFrame - channel: Analysis channel of the tau analysis e.g. "et", "mt" or "tt" + column_definitions: Dictionary mapping new column names (keys) to expressions (values) + process: Name of the current process Return: root DataFrame with redefined variables """ - rdf = rdf.Redefine("njets", "njets_boosted") - rdf = rdf.Redefine("nbtag", "nbtag_boosted") - rdf = rdf.Redefine("metphi", "metphi_boosted") - rdf = rdf.Redefine("met", "met_boosted") - rdf = rdf.Redefine("pt_1", "boosted_pt_1") - rdf = rdf.Redefine("q_1", "boosted_q_1") - rdf = rdf.Redefine("pt_2", "boosted_pt_2") - rdf = rdf.Redefine("q_2", "boosted_q_2") - rdf = rdf.Redefine("mt_1", "boosted_mt_1") - rdf = rdf.Redefine("iso_1", "boosted_iso_1") - rdf = rdf.Redefine("mass_2", "boosted_mass_2") - rdf = rdf.Redefine("tau_decaymode_2", "boosted_tau_decaymode_2") - rdf = rdf.Redefine("deltaR_ditaupair", "boosted_deltaR_ditaupair") - rdf = rdf.Redefine("m_vis", "boosted_m_vis") - rdf = rdf.Redefine("fj_Xbb_pt", "fj_Xbb_pt_boosted") - rdf = rdf.Redefine("fj_Xbb_eta", "fj_Xbb_eta_boosted") - rdf = rdf.Redefine( - "fj_Xbb_particleNet_XbbvsQCD", "fj_Xbb_particleNet_XbbvsQCD_boosted" - ) - rdf = rdf.Redefine("bpair_pt_1", "bpair_pt_1_boosted") - rdf = rdf.Redefine("bpair_pt_2", "bpair_pt_2_boosted") - rdf = rdf.Redefine("bpair_btag_value_2", "bpair_btag_value_2_boosted") - rdf = rdf.Redefine("bpair_eta_2", "bpair_eta_2_boosted") - if "boosted_gen_match_2" in rdf.GetColumnNames(): - rdf = rdf.Redefine("gen_match_2", "boosted_gen_match_2") - else: - rdf = rdf.Define("boosted_gen_match_2", "-1.") - rdf = rdf.Redefine("gen_match_2", "boosted_gen_match_2") - - if "btag_weight_boosted" in rdf.GetColumnNames(): - rdf = rdf.Redefine("btag_weight", "btag_weight_boosted") - else: - rdf = rdf.Define("btag_weight_boosted", "1.") - rdf = rdf.Redefine("btag_weight", "btag_weight_boosted") + # Ensure that the new column names are not already present in the ntuple + rdf_columns = set(rdf.GetColumnNames()) + new_columns = set(k for k, v in column_definitions.items() if not v.get("allow_redefine", False)) + intersection = rdf_columns.intersection(new_columns) + if intersection: + raise ValueError( + f"The following new column names already exist in the ntuple and allow_redefine is not set: {intersection}" + ) - if "pNet_Xbb_weight_boosted" in rdf.GetColumnNames(): - rdf = rdf.Redefine("pNet_Xbb_weight", "pNet_Xbb_weight_boosted") - else: - rdf = rdf.Define("pNet_Xbb_weight_boosted", "1.") - rdf = rdf.Redefine("pNet_Xbb_weight", "pNet_Xbb_weight_boosted") - - if channel == "tt": - rdf = rdf.Redefine("mass_1", "boosted_mass_1") - rdf = rdf.Redefine("tau_decaymode_1", "boosted_tau_decaymode_1") - if "boosted_gen_match_1" in rdf.GetColumnNames(): - rdf = rdf.Redefine("gen_match_1", "boosted_gen_match_1") - else: - rdf = rdf.Define("boosted_gen_match_1", "-1.") - rdf = rdf.Redefine("gen_match_1", "boosted_gen_match_1") + # Perform the define declarations on the RDataFrame object + for new_column, define_dict in column_definitions.items(): + # Check that processes and exclude_processes are not set at the same time + if "processes" in define_dict and "exclude_processes" in define_dict: + raise ValueError( + f"Both processes and exclude_processes have been specified for column {new_column}. You can only set one of them for the same entry." + ) - if channel == "et": - rdf = rdf.Redefine("extraelec_veto", "boosted_extraelec_veto") - if channel == "mt": - rdf = rdf.Redefine("extramuon_veto", "boosted_extramuon_veto") + # Check if the process should be skipped + if "processes" in define_dict and process not in define_dict["processes"]: + continue + if "exclude_processes" in define_dict and process in define_dict["exclude_processes"]: + continue + + # Get the ROOT expression for defining the new column + expression = define_dict["expression"] + + # Use + # - `Redefine` if allow_redefine is `True` and the column is already present in the RDataFrame + # - `Define` in all other cases + rdf_define_call = ( + rdf.Redefine + if new_column in rdf_columns and allow_redefine + else rdf.Define + ) + rdf = rdf_define_call(new_column, expression) return rdf diff --git a/helper/weights.py b/helper/weights.py index 7472cd3..5c61f0a 100644 --- a/helper/weights.py +++ b/helper/weights.py @@ -94,6 +94,14 @@ def lumi_weight(rdf: Any, era: str) -> Any: rdf = rdf.Redefine("weight", "weight * 41.48 * 1000.") elif era == "2018": rdf = rdf.Redefine("weight", "weight * 59.83 * 1000.") + elif era == "2022preEE": + rdf = rdf.Redefine("weight", "weight * 7.9804 * 1000.") + elif era == "2022postEE": + rdf = rdf.Redefine("weight", "weight * 26.6717 * 1000.") + elif era == "2023preBPix": + rdf = rdf.Redefine("weight", "weight * 18.063 * 1000.") + elif era == "2023postBPix": + rdf = rdf.Redefine("weight", "weight * 9.693 * 1000.") else: raise ValueError(f"Weight calc: lumi: Era is not defined: {era}") diff --git a/preselection.py b/preselection.py index ce8be61..878dc35 100644 --- a/preselection.py +++ b/preselection.py @@ -80,11 +80,23 @@ def run_sample_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]], log.info(f"WARNING: Sample {sample} is empty. Skipping...") return () + # get column definitions from config and declare definitions on the RDataFrame + column_definitions = config.get("column_definitions", {}) + if column_definitions: + rdf = func.define_columns(rdf, column_definitions, process) + # apply analysis specific event filters selection_conf = config["event_selection"] for cut in selection_conf: rdf = rdf.Filter(f"({selection_conf[cut]})", f"cut on {cut}") + # For Run 3 DY samples, we need to collect the events from two samples, that need to be selected + # for different flavors + if sample.startswith("DYto2L"): + rdf = rdf.Filter("lhe_drell_yan_decay_flavor == 11 || lhe_drell_yan_decay_flavor == 13", "DY e/mu selection") + if sample.startswith("DYto2Tau"): + rdf = rdf.Filter("lhe_drell_yan_decay_flavor == 15", "DY tau selection") + if process == "embedding": rdf = filters.emb_tau_gen_match(rdf=rdf, channel=config["channel"]) @@ -122,6 +134,15 @@ def run_sample_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]], rdf = rdf.Redefine( "weight", f"weight * ({mc_weight_conf[weight]})" ) + elif weight == "ttbar_norm_weight": + if process == "ttbar" and tau_gen_mode in ["L", "J", "T"]: + # This function applies an additional normalization weight to tt backgrounds + # obtained from simulation. The factor corrects for a mismodelling of the + # normalization of tt compared to data and is extracted in an e mu control + # region. + rdf = rdf.Redefine( + "weight", f"weight * ({mc_weight_conf[weight]})" + ) else: rdf = rdf.Redefine("weight", f"weight * ({mc_weight_conf[weight]})") @@ -260,22 +281,28 @@ def run_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]], str, in # loading of the chosen config file config = func.load_config(args.config_file) - # loading general dataset info file for xsec and event number - with open(f"datasets/{config['nanoAOD_version']}/datasets.json", "r") as file: - datasets = json.load(file) - # define output path for the preselected samples output_path = os.path.join( config["output_path"], "preselection", config["era"], config["channel"] ) func.check_path(path=output_path) + # Set up logger and retrieve logger instance for main routine func.setup_logger( log_file=output_path + "/preselection.log", log_name="preselection", log_level=logging.INFO, subcategories=config["processes"], ) + log = logging.getLogger("preselection.main") + + # Load general dataset info file for xsec and event number + datasets_file = os.path.join( + config["sample_database"], config["nanoAOD_version"], "datasets.json" + ) + with open(datasets_file, "r") as file: + datasets = json.load(file) + log.info(f"Loading sample database from {datasets_file}") # get needed features for fake factor calculation output_features = config["output_features"] diff --git a/preselection_boosted.py b/preselection_boosted.py index 6b38216..9a10045 100644 --- a/preselection_boosted.py +++ b/preselection_boosted.py @@ -164,11 +164,6 @@ def run_sample_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]], log.debug(out.getvalue()) log.debug("-" * 50) - # WARNING: cross check this function is something changes in the list of output features - tmp_rdf = func.rename_boosted_variables( - rdf=tmp_rdf, channel=config["channel"] - ) - tmp_file_name = func.get_output_name( path=output_path, process=sample, tau_gen_mode=tau_gen_mode ) @@ -265,22 +260,28 @@ def run_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]], str, in # loading of the chosen config file config = func.load_config(args.config_file) - # loading general dataset info file for xsec and event number - with open(f"datasets/{config['nanoAOD_version']}/datasets.json", "r") as file: - datasets = json.load(file) - # define output path for the preselected samples output_path = os.path.join( config["output_path"], "preselection", config["era"], config["channel"] ) func.check_path(path=output_path) + # Set up logger and retrieve logger instance for main routine func.setup_logger( log_file=output_path + "/preselection.log", log_name="preselection", log_level=logging.INFO, subcategories=config["processes"], ) + log = logging.getLogger("preselection.main") + + # Load general dataset info file for xsec and event number + datasets_file = os.path.join( + config["sample_database"], config["nanoAOD_version"], "datasets.json" + ) + with open(datasets_file, "r") as file: + datasets = json.load(file) + log.info(f"Loading sample database from {datasets_file}") # get needed features for fake factor calculation output_features = config["output_features"]