kdlong · dteague · May 7, 2020 · May 7, 2020 · May 7, 2020 · May 7, 2020
diff --git a/Templates/config.dteague b/Templates/config.dteague
@@ -2,6 +2,7 @@
 user = dteague
 analysis = 3top
 dataset_manager_path = /afs/cern.ch/work/d/%(user)s/
+dataset_manager_name = AnalysisDatasetManager
 combine_path = %(dataset_manager_path)s/HiggsCombine/CMSSW_8_1_0/src/HiggsAnalysis/CombinedLimit
 output_path = /afs/cern.ch/work/d/%(user)s/testHold
 fakeRate_output = %(output_path)s/FakeRates

diff --git a/Utilities/python/CombineCardTools.py b/Utilities/python/CombineCardTools.py
@@ -80,7 +80,8 @@ def getVariations(self):
 
     def getVariationsForProcess(self, process):
         if process not in self.variations.keys():
-            raise ValueError("Variations not defined for process %s" % process)
+            # return []
+           raise ValueError("Variations not defined for process %s" % process)
         return self.variations[process]
 
     def setVariationsByProcess(self, process, variations):
@@ -204,7 +205,7 @@ def setCombineChannels(self, groups):
         self.channelsToCombine = groups
 
     def combineChannels(self, group, processName, central=True):
-        variations = self.variations[group.GetName()][:]
+        variations = self.variations[group.GetName()][:] if group.GetName() in self.variations else []
         fitVariable = self.getFitVariable(group.GetName())
         if central:
             variations.append("")
@@ -263,7 +264,7 @@ def loadHistsForProcess(self, processName, scaleNorm=1, expandedTheory=True):
         #TODO:Make optional
         processedHists = []
         for chan in self.channels:
-            histName = "_".join([fitVariable, chan]) if chan != "all" else fitVariable
+            histName = "_".join([fitVariable, chan])
             hist = group.FindObject(histName)
             if not hist:
                 logging.warning("Failed to produce hist %s for process %s" % (histName, processName))

diff --git a/Utilities/python/ConfigureJobs.py b/Utilities/python/ConfigureJobs.py
@@ -181,35 +181,24 @@ def getListOfHDFSFiles(file_path):
 
 # TODO: Would be good to switch the order of the last two arguments
 # completely deprecate manager_path without breaking things
-def getListOfFiles(filelist, selection, manager_path="", analysis=""):
-    if manager_path is "":
-        manager_path = getManagerPath()
+def getListOfFiles(filelist, analysis="", input_tier=""):
+    manager_path = getManagerPath()
     data_path = "%s/%s/FileInfo" % (manager_path, getManagerName())
-    group_path = "%s/AnalysisDatasetManager/PlotGroups" % manager_path
+    group_path = "%s/%s/PlotGroups" % (manager_path, getManagerName())
     data_info = UserInput.readAllInfo("/".join([data_path, "data/*"]))
     mc_info = UserInput.readAllInfo("/".join([data_path, "montecarlo/*"]))
-    analysis_info = UserInput.readInfo("/".join([data_path, analysis, selection])) \
-        if analysis != "" else []
-    valid_names = (data_info.keys() + mc_info.keys()) if not analysis_info else analysis_info.keys()
-    group_names = UserInput.readAllInfo("%s/%s.py" %(group_path, analysis)) if analysis else dict()
+
+    valid_names = UserInput.readInfo("/".join([data_path, analysis, input_tier])) if input_tier and analysis else (data_info.keys() + mc_info.keys())
+    group_names = UserInput.readAllInfo("%s/%s.py" %(group_path, analysis))
     names = []
     for name in filelist:
         if ".root" in name:
             names.append(name)
         # Allow negative contributions
         elif name[0] == "-" :
             names.append(name)
-        elif "WZxsec2016" in name:
-            dataset_file = manager_path + \
-                "%s/FileInfo/WZxsec2016/%s.json" % (getManagerPath(), selection)
-            allnames = json.load(open(dataset_file)).keys()
-            if "nodata" in name:
-                nodata = [x for x in allnames if "data" not in x]
-                names += nodata
-            elif "data" in name:
-                names += [x for x in allnames if "data" in x]
-            else:
-                names += allnames
+        elif "*" in name:
+            names += fnmatch.filter(valid_names, name)
         elif name in group_names:
             names += group_names[name]['Members']
         elif "*" in name:
@@ -247,11 +236,10 @@ def fillTemplatedFile(template_file_name, out_file_name, template_dict):
     with open(out_file_name, "w") as outFile:
         outFile.write(result)
 
-def getListOfFilesWithXSec(filelist, manager_path="", selection="ntuples"):
-    if manager_path is "":
-        manager_path = getManagerPath()
+def getListOfFilesWithXSec(filelist, analysis="", input_tier=""):
+    manager_path = getManagerPath()
     data_path = "%s/%s/FileInfo" % (manager_path, getManagerName())
-    files = getListOfFiles(filelist, selection, manager_path)
+    files = getListOfFiles(filelist, analysis, input_tier)
     mc_info = UserInput.readAllInfo("/".join([data_path, "montecarlo/*"]))
     info = {}
     for file_name in files:
@@ -269,21 +257,20 @@ def getListOfFilesWithXSec(filelist, manager_path="", selection="ntuples"):
             info.update({file_name : file_info["cross_section"]*kfac})
     return info
 
-def getListOfFilesWithPath(filelist, analysis, selection, das=True, manager_path=""):
-    if manager_path is "":
-        manager_path = getManagerPath()
+def getListOfFilesWithPath(filelist, analysis, input_tier, das=True):
+    manager_path = getManagerPath()
     data_path = "%s/%s/FileInfo" % (manager_path, getManagerName())
-    files = getListOfFiles(filelist, selection, manager_path, analysis)
-    selection_info = UserInput.readInfo("/".join([data_path, analysis, selection]))
+    files = getListOfFiles(filelist, analysis, input_tier)
+    input_tier_info = UserInput.readInfo("/".join([data_path, analysis, input_tier]))
     info = {}
     for file_name in files:
-        if das and "DAS" not in selection_info[file_name].keys():
-            logging.error("DAS path not defined for file %s in analysis %s/%s" % (file_name, analysis, selection))
+        if das and "DAS" not in input_tier_info[file_name].keys():
+            logging.error("DAS path not defined for file %s in analysis %s/%s" % (file_name, analysis, input_tier))
             continue
-        elif not das and "file_path" not in selection_info[file_name].keys():
-            logging.error("File_path not defined for file %s in analysis %s/%s" % (file_name, analysis, selection))
+        elif not das and "file_path" not in input_tier_info[file_name].keys():
+            logging.error("File_path not defined for file %s in analysis %s/%s" % (file_name, analysis, input_tier))
             continue
-        info.update({file_name : selection_info[file_name]["DAS" if das else "file_path"]})
+        info.update({file_name : input_tier_info[file_name]["DAS" if das else "file_path"]})
     return info
 
 def getPreviousStep(selection, analysis):
@@ -331,9 +318,8 @@ def getConfigFileName(config_file_name):
     raise ValueError("Invalid configuration file. Tried to read %s which does not exist" % \
             config_file_name)
 
-def getInputFilesPath(sample_name, selection, analysis, manager_path=""):
-    if manager_path is "":
-        manager_path = getManagerPath()
+def getInputFilesPath(sample_name, selection, analysis):
+    manager_path = getManagerPath()
     if ".root" in sample_name:
         logging.info("Using simple file %s" % sample_name)
         return sample_name

diff --git a/Utilities/python/SelectorTools.py b/Utilities/python/SelectorTools.py
@@ -21,10 +21,10 @@ def __init__(self, analysis, selection, input_tier, year):
             "ZZGen" : "ZZGenSelector",
             "WGen" : "WGenSelector",
             "ZGen" : "ZGenSelector",
-            "TTT" : "TTTSelector",
             "ThreeLep" : "ThreeLepSelector",
             "Eff" : "Efficiency",
             "Efficiency" : "Efficiency",
+            "FR" : "FakeRateSelector",
         }
 
         self.subanalysis = None
@@ -94,7 +94,7 @@ def setInputs(self, inputs):
             self.inputs.Add(inp)
         self.addTNamed("ntupleType", self.ntupleType)
         self.addTNamed("selection", self.selection)
-        self.addTNamed("year", self.year)
+        #self.addTNamed("year", self.year)
 
     def setSelection(self, selection):
         self.selection = selection
@@ -158,7 +158,7 @@ def clearDatasets(self):
 
     def setDatasets(self, datalist):
         analysis = self.subanalysis if self.subanalysis else self.analysis
-        datasets = ConfigureJobs.getListOfFiles(datalist, self.input_tier, analysis=analysis)
+        datasets = ConfigureJobs.getListOfFiles(datalist, analysis, self.input_tier)
 
         for dataset in datasets:
             if "@" in dataset:
@@ -189,8 +189,10 @@ def isBackground(self):
         self.selector_name = self.selector_name.replace("Selector", "BackgroundSelector")
 
     def processDataset(self, dataset, file_path, chan):
+
         logging.info("Processing dataset %s" % dataset)
         select = getattr(ROOT, self.selector_name)()
+
         select.SetInputList(self.inputs)
         self.addTNamed("name", dataset)
         if dataset in self.regions:
@@ -200,6 +202,7 @@ def processDataset(self, dataset, file_path, chan):
             select.addSubprocesses(vec)
         # Only add for one channel
         addSumweights = self.addSumweights and self.channels.index(chan) == 0 and "data" not in dataset
+
         if addSumweights:
             # Avoid accidentally combining sumweights across datasets
             currfile_name = self.current_file.GetName()
@@ -215,8 +218,8 @@ def processDataset(self, dataset, file_path, chan):
                 sumweights_hist = ROOT.TH1D("sumweights", "sumweights", 1000, 0, 1000)
             sumweights_hist.SetDirectory(ROOT.gROOT)
             self.current_file = ROOT.TFile.Open(currfile_name, "update")
+
         self.processLocalFiles(select, file_path, addSumweights, chan)
-
         output_list = select.GetOutputList()
         processes = [dataset] + (self.regions[dataset] if dataset in self.regions else [])
         self.writeOutput(output_list, chan, processes, dataset, addSumweights)
@@ -296,7 +299,7 @@ def processParallelByDataset(self, datasets, chan):
         # Store arrays in temp files, since it can get way too big to keep around in memory
         tempfiles = [self.tempfileName(d) for d in datasets] 
         self.combineParallelFiles(tempfiles, chan)
-
+        
     # Pool.map can only take in one argument, so expand the array
     def processDatasetHelper(self, args):
         self.processDataset(*args)

diff --git a/Utilities/python/UserInput.py b/Utilities/python/UserInput.py
@@ -16,7 +16,7 @@ def getDefaultParser(allow_from_file=True):
                         required=False, help="Name of selection to make, "
                         " as defined in Cuts/<analysis>/<selection>.json")
     parser.add_argument("-a", "--analysis", type=str,
-                        required=False, default="WZxsec2016",
+                        required=False,
                         help="Analysis name, used in selecting the cut json")
     parser.add_argument("--selectorArgs", nargs='+', type=str,
         help="List of additional configurations to send to selector")

diff --git a/Utilities/scripts/addUnrolledHistsToFile.py b/Utilities/scripts/addUnrolledHistsToFile.py
@@ -23,7 +23,7 @@
             ['wzjj-vbfnlo-sf', 'wzjj-vbfnlo-of', ] + \
                 ['wz3lnu-mg5amcnlo','wz3lnu-powheg', 'zz4l-mg5amcnlo'] + \
                 ['AllData', 'WZxsec2016data', 'DataEWKCorrected'], 
-            'Wselection'),
+            input_tier='Wselection'),
         ["mjj_etajj_2D_%s" % c for c in ConfigureJobs.getChannels()] + \
         ["mjj_etajj_2D_Fakes_%s" % c for c in ConfigureJobs.getChannels()] + \
         ["mjj_etajj_2D_%s_%s" % (var, c) for c in ConfigureJobs.getChannels()
@@ -39,7 +39,7 @@
                 ['wzjj-vbfnlo-sf', 'wzjj-vbfnlo-of', ] + \
                     ['wz3lnu-mg5amcnlo','wz3lnu-powheg', 'zz4l-mg5amcnlo'] + \
                     ['AllData', 'WZxsec2016data', 'DataEWKCorrected'], 
-                'Wselection'),
+                input_tier='Wselection'),
             ["mjj_dRjj_2D_%s" % c for c in ConfigureJobs.getChannels()] + \
             ["mjj_dRjj_2D_Fakes_%s" % c for c in ConfigureJobs.getChannels()] + \
             ["mjj_dRjj_2D_%s_%s" % (var, c) for c in ConfigureJobs.getChannels()