From 61a6551bd9b4040a51fbbc27efe29c0973f31502 Mon Sep 17 00:00:00 2001 From: Simon Daigler Date: Thu, 24 Apr 2025 16:16:34 +0200 Subject: [PATCH 1/3] feat: support multiple nanoAOD versions in sample database --- processor/tasks/CROWNBase.py | 16 +++++++++++----- processor/tasks/ConfigureDatasets.py | 21 +++++---------------- processor/tasks/helpers/NanoAODVersions.py | 7 +++++++ 3 files changed, 23 insertions(+), 21 deletions(-) create mode 100644 processor/tasks/helpers/NanoAODVersions.py diff --git a/processor/tasks/CROWNBase.py b/processor/tasks/CROWNBase.py index c6449c8..7dee3be 100644 --- a/processor/tasks/CROWNBase.py +++ b/processor/tasks/CROWNBase.py @@ -13,6 +13,8 @@ # import timeout_decorator import time +from processor.tasks.helpers.NanoAODVersions import NanoAODVersions + class ProduceBase(Task, WrapperTask): """ @@ -23,14 +25,18 @@ class ProduceBase(Task, WrapperTask): sample_list = luigi.Parameter() analysis = luigi.Parameter() config = luigi.Parameter() - dataset_database = luigi.Parameter( - default="sample_database/datasets.json", - significant=False, - ) + nanoAOD_version = luigi.Parameter(default=NanoAODVersions.v12.value) + dataset_database = luigi.Parameter(default=None, significant=False) shifts = luigi.Parameter() scopes = luigi.Parameter() silent = False + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Dynamically set the default value of dataset_database based on nanoAOD_version + if self.dataset_database is None: + self.dataset_database = f"sample_database/{self.nanoAOD_version}/datasets.json" + def parse_samplelist(self, sample_list): """ The function `parse_samplelist` takes a sample list as input and returns a list of samples, handling @@ -114,7 +120,7 @@ def set_sample_data(self, samples): console.log( "Sample {} not found in {}".format(nick, self.dataset_database) ) - raise Exception("Sample not found in DB") + raise Exception(f"Sample not found in DB: {nick}") sample_data = sample_db[nick] data["details"][nick]["era"] = str(sample_data["era"]) data["details"][nick]["sample_type"] = sample_data["sample_type"] diff --git a/processor/tasks/ConfigureDatasets.py b/processor/tasks/ConfigureDatasets.py index f488004..2dfb0bf 100644 --- a/processor/tasks/ConfigureDatasets.py +++ b/processor/tasks/ConfigureDatasets.py @@ -1,9 +1,9 @@ import luigi import os import json -import yaml from framework import Task from framework import console +from processor.tasks.helpers.NanoAODVersions import NanoAODVersions def ensure_dir(file_path): @@ -11,28 +11,25 @@ def ensure_dir(file_path): if not os.path.exists(directory): os.makedirs(directory) - class ConfigureDatasets(Task): """ Gather information on the selected datasets. """ nick = luigi.Parameter() + nanoAOD_version = luigi.Parameter(default=NanoAODVersions.v12.value) era = luigi.Parameter() sample_type = luigi.Parameter() silent = luigi.BoolParameter(default=False, significant=False) def output(self): - target = self.remote_target("sample_database/{}.json".format(self.nick)) + target = self.remote_target(f"sample_database/{self.nanoAOD_version}/{self.nick}.json") return target def load_filelist_config(self): # first check if a json exists, if not, check for a yaml - sample_configfile_json = "sample_database/{era}/{type}/{nick}.json".format( - era=self.era, type=self.sample_type, nick=self.nick - ) - sample_configfile_yaml = "sample_database/{era}/{type}/{nick}.yaml".format( - era=self.era, type=self.sample_type, nick=self.nick + sample_configfile_json = "sample_database/{nanoAOD_version}/{era}/{type}/{nick}.json".format( + nanoAOD_version=self.nanoAOD_version, era=self.era, type=self.sample_type, nick=self.nick ) if os.path.exists(sample_configfile_json): with open(sample_configfile_json, "r") as stream: @@ -41,14 +38,6 @@ def load_filelist_config(self): except json.JSONDecodeError as exc: print(exc) raise Exception("Failed to load sample information") - elif os.path.exists(sample_configfile_yaml): - console.log("[DEPRECATED] Loading from YAML") - with open(sample_configfile_yaml, "r") as stream: - try: - sample_data = yaml.safe_load(stream) - except yaml.YAMLError as exc: - print(exc) - raise Exception("Failed to load sample information") else: console.log("[DEPRECATED] Loading from DAS is not supported anymore") raise Exception("Failed to load sample information") diff --git a/processor/tasks/helpers/NanoAODVersions.py b/processor/tasks/helpers/NanoAODVersions.py new file mode 100644 index 0000000..ac330e0 --- /dev/null +++ b/processor/tasks/helpers/NanoAODVersions.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class NanoAODVersions(Enum): + v9 = "nanoAOD_v9" + v12 = "nanoAOD_v12" + v15 = "nanoAOD_v15" \ No newline at end of file From a4e5ec856e7a66760211b37db027bd1c433066b6 Mon Sep 17 00:00:00 2001 From: Simon Daigler Date: Tue, 29 Apr 2025 18:11:50 +0200 Subject: [PATCH 2/3] chore: change sample_configfile_json to f-string --- processor/tasks/ConfigureDatasets.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/processor/tasks/ConfigureDatasets.py b/processor/tasks/ConfigureDatasets.py index 2dfb0bf..3681401 100644 --- a/processor/tasks/ConfigureDatasets.py +++ b/processor/tasks/ConfigureDatasets.py @@ -28,9 +28,7 @@ def output(self): def load_filelist_config(self): # first check if a json exists, if not, check for a yaml - sample_configfile_json = "sample_database/{nanoAOD_version}/{era}/{type}/{nick}.json".format( - nanoAOD_version=self.nanoAOD_version, era=self.era, type=self.sample_type, nick=self.nick - ) + sample_configfile_json = f"sample_database/{self.nanoAOD_version}/{self.era}/{self.sample_type}/{self.nick}.json" if os.path.exists(sample_configfile_json): with open(sample_configfile_json, "r") as stream: try: From 581a2fba06847f804e6193cfe009e159ac0b3729 Mon Sep 17 00:00:00 2001 From: Simon Daigler Date: Mon, 5 May 2025 14:51:07 +0200 Subject: [PATCH 3/3] chore: fix formatting --- processor/tasks/CROWNBase.py | 4 +++- processor/tasks/ConfigureDatasets.py | 5 ++++- processor/tasks/MLTraining.py | 2 +- processor/tasks/helpers/NanoAODVersions.py | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/processor/tasks/CROWNBase.py b/processor/tasks/CROWNBase.py index 7dee3be..3d4db55 100644 --- a/processor/tasks/CROWNBase.py +++ b/processor/tasks/CROWNBase.py @@ -35,7 +35,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Dynamically set the default value of dataset_database based on nanoAOD_version if self.dataset_database is None: - self.dataset_database = f"sample_database/{self.nanoAOD_version}/datasets.json" + self.dataset_database = ( + f"sample_database/{self.nanoAOD_version}/datasets.json" + ) def parse_samplelist(self, sample_list): """ diff --git a/processor/tasks/ConfigureDatasets.py b/processor/tasks/ConfigureDatasets.py index 3681401..5209c3a 100644 --- a/processor/tasks/ConfigureDatasets.py +++ b/processor/tasks/ConfigureDatasets.py @@ -11,6 +11,7 @@ def ensure_dir(file_path): if not os.path.exists(directory): os.makedirs(directory) + class ConfigureDatasets(Task): """ Gather information on the selected datasets. @@ -23,7 +24,9 @@ class ConfigureDatasets(Task): silent = luigi.BoolParameter(default=False, significant=False) def output(self): - target = self.remote_target(f"sample_database/{self.nanoAOD_version}/{self.nick}.json") + target = self.remote_target( + f"sample_database/{self.nanoAOD_version}/{self.nick}.json" + ) return target def load_filelist_config(self): diff --git a/processor/tasks/MLTraining.py b/processor/tasks/MLTraining.py index b3e1164..89627de 100644 --- a/processor/tasks/MLTraining.py +++ b/processor/tasks/MLTraining.py @@ -2,7 +2,7 @@ """ Collection of tasks used to create training datasets and config files -for the NN trainings of the NMSSM analysis +for the NN trainings of the NMSSM analysis """ import yaml import os diff --git a/processor/tasks/helpers/NanoAODVersions.py b/processor/tasks/helpers/NanoAODVersions.py index ac330e0..ca01a96 100644 --- a/processor/tasks/helpers/NanoAODVersions.py +++ b/processor/tasks/helpers/NanoAODVersions.py @@ -4,4 +4,4 @@ class NanoAODVersions(Enum): v9 = "nanoAOD_v9" v12 = "nanoAOD_v12" - v15 = "nanoAOD_v15" \ No newline at end of file + v15 = "nanoAOD_v15"