Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Templates/config.dteague
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
user = dteague
analysis = 3top
dataset_manager_path = /afs/cern.ch/work/d/%(user)s/
dataset_manager_name = AnalysisDatasetManager
combine_path = %(dataset_manager_path)s/HiggsCombine/CMSSW_8_1_0/src/HiggsAnalysis/CombinedLimit
output_path = /afs/cern.ch/work/d/%(user)s/testHold
fakeRate_output = %(output_path)s/FakeRates
Expand Down
7 changes: 4 additions & 3 deletions Utilities/python/CombineCardTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ def getVariations(self):

def getVariationsForProcess(self, process):
if process not in self.variations.keys():
raise ValueError("Variations not defined for process %s" % process)
# return []
raise ValueError("Variations not defined for process %s" % process)
return self.variations[process]

def setVariationsByProcess(self, process, variations):
Expand Down Expand Up @@ -204,7 +205,7 @@ def setCombineChannels(self, groups):
self.channelsToCombine = groups

def combineChannels(self, group, processName, central=True):
variations = self.variations[group.GetName()][:]
variations = self.variations[group.GetName()][:] if group.GetName() in self.variations else []
fitVariable = self.getFitVariable(group.GetName())
if central:
variations.append("")
Expand Down Expand Up @@ -263,7 +264,7 @@ def loadHistsForProcess(self, processName, scaleNorm=1, expandedTheory=True):
#TODO:Make optional
processedHists = []
for chan in self.channels:
histName = "_".join([fitVariable, chan]) if chan != "all" else fitVariable
histName = "_".join([fitVariable, chan])
hist = group.FindObject(histName)
if not hist:
logging.warning("Failed to produce hist %s for process %s" % (histName, processName))
Expand Down
58 changes: 22 additions & 36 deletions Utilities/python/ConfigureJobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,35 +181,24 @@ def getListOfHDFSFiles(file_path):

# TODO: Would be good to switch the order of the last two arguments
# completely deprecate manager_path without breaking things
def getListOfFiles(filelist, selection, manager_path="", analysis=""):
if manager_path is "":
manager_path = getManagerPath()
def getListOfFiles(filelist, analysis="", input_tier=""):
manager_path = getManagerPath()
data_path = "%s/%s/FileInfo" % (manager_path, getManagerName())
group_path = "%s/AnalysisDatasetManager/PlotGroups" % manager_path
group_path = "%s/%s/PlotGroups" % (manager_path, getManagerName())
data_info = UserInput.readAllInfo("/".join([data_path, "data/*"]))
mc_info = UserInput.readAllInfo("/".join([data_path, "montecarlo/*"]))
analysis_info = UserInput.readInfo("/".join([data_path, analysis, selection])) \
if analysis != "" else []
valid_names = (data_info.keys() + mc_info.keys()) if not analysis_info else analysis_info.keys()
group_names = UserInput.readAllInfo("%s/%s.py" %(group_path, analysis)) if analysis else dict()

valid_names = UserInput.readInfo("/".join([data_path, analysis, input_tier])) if input_tier and analysis else (data_info.keys() + mc_info.keys())
group_names = UserInput.readAllInfo("%s/%s.py" %(group_path, analysis))
names = []
for name in filelist:
if ".root" in name:
names.append(name)
# Allow negative contributions
elif name[0] == "-" :
names.append(name)
elif "WZxsec2016" in name:
dataset_file = manager_path + \
"%s/FileInfo/WZxsec2016/%s.json" % (getManagerPath(), selection)
allnames = json.load(open(dataset_file)).keys()
if "nodata" in name:
nodata = [x for x in allnames if "data" not in x]
names += nodata
elif "data" in name:
names += [x for x in allnames if "data" in x]
else:
names += allnames
elif "*" in name:
names += fnmatch.filter(valid_names, name)
elif name in group_names:
names += group_names[name]['Members']
elif "*" in name:
Expand Down Expand Up @@ -247,11 +236,10 @@ def fillTemplatedFile(template_file_name, out_file_name, template_dict):
with open(out_file_name, "w") as outFile:
outFile.write(result)

def getListOfFilesWithXSec(filelist, manager_path="", selection="ntuples"):
if manager_path is "":
manager_path = getManagerPath()
def getListOfFilesWithXSec(filelist, analysis="", input_tier=""):
manager_path = getManagerPath()
data_path = "%s/%s/FileInfo" % (manager_path, getManagerName())
files = getListOfFiles(filelist, selection, manager_path)
files = getListOfFiles(filelist, analysis, input_tier)
mc_info = UserInput.readAllInfo("/".join([data_path, "montecarlo/*"]))
info = {}
for file_name in files:
Expand All @@ -269,21 +257,20 @@ def getListOfFilesWithXSec(filelist, manager_path="", selection="ntuples"):
info.update({file_name : file_info["cross_section"]*kfac})
return info

def getListOfFilesWithPath(filelist, analysis, selection, das=True, manager_path=""):
if manager_path is "":
manager_path = getManagerPath()
def getListOfFilesWithPath(filelist, analysis, input_tier, das=True):
manager_path = getManagerPath()
data_path = "%s/%s/FileInfo" % (manager_path, getManagerName())
files = getListOfFiles(filelist, selection, manager_path, analysis)
selection_info = UserInput.readInfo("/".join([data_path, analysis, selection]))
files = getListOfFiles(filelist, analysis, input_tier)
input_tier_info = UserInput.readInfo("/".join([data_path, analysis, input_tier]))
info = {}
for file_name in files:
if das and "DAS" not in selection_info[file_name].keys():
logging.error("DAS path not defined for file %s in analysis %s/%s" % (file_name, analysis, selection))
if das and "DAS" not in input_tier_info[file_name].keys():
logging.error("DAS path not defined for file %s in analysis %s/%s" % (file_name, analysis, input_tier))
continue
elif not das and "file_path" not in selection_info[file_name].keys():
logging.error("File_path not defined for file %s in analysis %s/%s" % (file_name, analysis, selection))
elif not das and "file_path" not in input_tier_info[file_name].keys():
logging.error("File_path not defined for file %s in analysis %s/%s" % (file_name, analysis, input_tier))
continue
info.update({file_name : selection_info[file_name]["DAS" if das else "file_path"]})
info.update({file_name : input_tier_info[file_name]["DAS" if das else "file_path"]})
return info

def getPreviousStep(selection, analysis):
Expand Down Expand Up @@ -331,9 +318,8 @@ def getConfigFileName(config_file_name):
raise ValueError("Invalid configuration file. Tried to read %s which does not exist" % \
config_file_name)

def getInputFilesPath(sample_name, selection, analysis, manager_path=""):
if manager_path is "":
manager_path = getManagerPath()
def getInputFilesPath(sample_name, selection, analysis):
manager_path = getManagerPath()
if ".root" in sample_name:
logging.info("Using simple file %s" % sample_name)
return sample_name
Expand Down
13 changes: 8 additions & 5 deletions Utilities/python/SelectorTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ def __init__(self, analysis, selection, input_tier, year):
"ZZGen" : "ZZGenSelector",
"WGen" : "WGenSelector",
"ZGen" : "ZGenSelector",
"TTT" : "TTTSelector",
"ThreeLep" : "ThreeLepSelector",
"Eff" : "Efficiency",
"Efficiency" : "Efficiency",
"FR" : "FakeRateSelector",
}

self.subanalysis = None
Expand Down Expand Up @@ -94,7 +94,7 @@ def setInputs(self, inputs):
self.inputs.Add(inp)
self.addTNamed("ntupleType", self.ntupleType)
self.addTNamed("selection", self.selection)
self.addTNamed("year", self.year)
#self.addTNamed("year", self.year)

def setSelection(self, selection):
self.selection = selection
Expand Down Expand Up @@ -158,7 +158,7 @@ def clearDatasets(self):

def setDatasets(self, datalist):
analysis = self.subanalysis if self.subanalysis else self.analysis
datasets = ConfigureJobs.getListOfFiles(datalist, self.input_tier, analysis=analysis)
datasets = ConfigureJobs.getListOfFiles(datalist, analysis, self.input_tier)

for dataset in datasets:
if "@" in dataset:
Expand Down Expand Up @@ -189,8 +189,10 @@ def isBackground(self):
self.selector_name = self.selector_name.replace("Selector", "BackgroundSelector")

def processDataset(self, dataset, file_path, chan):

logging.info("Processing dataset %s" % dataset)
select = getattr(ROOT, self.selector_name)()

select.SetInputList(self.inputs)
self.addTNamed("name", dataset)
if dataset in self.regions:
Expand All @@ -200,6 +202,7 @@ def processDataset(self, dataset, file_path, chan):
select.addSubprocesses(vec)
# Only add for one channel
addSumweights = self.addSumweights and self.channels.index(chan) == 0 and "data" not in dataset

if addSumweights:
# Avoid accidentally combining sumweights across datasets
currfile_name = self.current_file.GetName()
Expand All @@ -215,8 +218,8 @@ def processDataset(self, dataset, file_path, chan):
sumweights_hist = ROOT.TH1D("sumweights", "sumweights", 1000, 0, 1000)
sumweights_hist.SetDirectory(ROOT.gROOT)
self.current_file = ROOT.TFile.Open(currfile_name, "update")

self.processLocalFiles(select, file_path, addSumweights, chan)

output_list = select.GetOutputList()
processes = [dataset] + (self.regions[dataset] if dataset in self.regions else [])
self.writeOutput(output_list, chan, processes, dataset, addSumweights)
Expand Down Expand Up @@ -296,7 +299,7 @@ def processParallelByDataset(self, datasets, chan):
# Store arrays in temp files, since it can get way too big to keep around in memory
tempfiles = [self.tempfileName(d) for d in datasets]
self.combineParallelFiles(tempfiles, chan)

# Pool.map can only take in one argument, so expand the array
def processDatasetHelper(self, args):
self.processDataset(*args)
Expand Down
2 changes: 1 addition & 1 deletion Utilities/python/UserInput.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def getDefaultParser(allow_from_file=True):
required=False, help="Name of selection to make, "
" as defined in Cuts/<analysis>/<selection>.json")
parser.add_argument("-a", "--analysis", type=str,
required=False, default="WZxsec2016",
required=False,
help="Analysis name, used in selecting the cut json")
parser.add_argument("--selectorArgs", nargs='+', type=str,
help="List of additional configurations to send to selector")
Expand Down
4 changes: 2 additions & 2 deletions Utilities/scripts/addUnrolledHistsToFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
['wzjj-vbfnlo-sf', 'wzjj-vbfnlo-of', ] + \
['wz3lnu-mg5amcnlo','wz3lnu-powheg', 'zz4l-mg5amcnlo'] + \
['AllData', 'WZxsec2016data', 'DataEWKCorrected'],
'Wselection'),
input_tier='Wselection'),
["mjj_etajj_2D_%s" % c for c in ConfigureJobs.getChannels()] + \
["mjj_etajj_2D_Fakes_%s" % c for c in ConfigureJobs.getChannels()] + \
["mjj_etajj_2D_%s_%s" % (var, c) for c in ConfigureJobs.getChannels()
Expand All @@ -39,7 +39,7 @@
['wzjj-vbfnlo-sf', 'wzjj-vbfnlo-of', ] + \
['wz3lnu-mg5amcnlo','wz3lnu-powheg', 'zz4l-mg5amcnlo'] + \
['AllData', 'WZxsec2016data', 'DataEWKCorrected'],
'Wselection'),
input_tier='Wselection'),
["mjj_dRjj_2D_%s" % c for c in ConfigureJobs.getChannels()] + \
["mjj_dRjj_2D_Fakes_%s" % c for c in ConfigureJobs.getChannels()] + \
["mjj_dRjj_2D_%s_%s" % (var, c) for c in ConfigureJobs.getChannels()
Expand Down
Loading