diff --git a/.gitignore b/.gitignore index dbc7fd1..0d20b64 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1 @@ -python/__init__.py *.pyc diff --git a/common/include/IndexRangeIterator.h b/common/include/IndexRangeIterator.h new file mode 100644 index 0000000..8bacaba --- /dev/null +++ b/common/include/IndexRangeIterator.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +/* + * Provide an iterator interface over an index that runs from 0 to N + */ +template +class IndexRangeIterator + : public boost::iterator_facade, + IDX, // value + boost::random_access_traversal_tag, + IDX // ref + > +{ +public: + IndexRangeIterator(IDX idx, IDX max) + : m_idx{idx}, m_max{max} {} + + IDX dereference() const { return m_idx; } + bool equal( const IndexRangeIterator& other ) const { return ( m_max == other.m_max ) && ( m_idx == other.m_idx ); } + void increment() { ++m_idx; } + void decrement() { --m_idx; } + void advance(std::ptrdiff_t d) { m_idx += d; } + std::ptrdiff_t distance_to(const IndexRangeIterator& other) const { return m_idx-other.m_idx; } +private: + IDX m_idx, m_max; +}; diff --git a/common/include/ScaleFactors.h b/common/include/ScaleFactors.h new file mode 100644 index 0000000..46ff4ca --- /dev/null +++ b/common/include/ScaleFactors.h @@ -0,0 +1,203 @@ +#pragma once + +#include "BinnedValuesJSONParser.h" +#include + +class ILeptonScaleFactor { +public: + virtual ~ILeptonScaleFactor() {} + virtual float get(const Parameters& parameters, Variation variation) const = 0; +}; +class IDiLeptonScaleFactor { +public: + virtual ~IDiLeptonScaleFactor() {} + virtual float get(const Parameters& l1Param, const Parameters& l2Param, Variation variation) const = 0; +}; + +class IJetScaleFactor { +public: + virtual ~IJetScaleFactor() {} + + enum Flavour { + Light = 0, + Charm = 1, + Beauty = 2 + }; + static inline Flavour get_flavour(int hadron_flavour) { + switch(hadron_flavour) { + case 5: + return Flavour::Beauty; + case 4: + return Flavour::Charm; + default: + return Flavour::Light; + } + } + virtual float get(const Parameters& parameters, Flavour flavour, Variation variation) const = 0; +}; + +/** + * Simple case: one scale factor from file + */ +class ScaleFactor : public ILeptonScaleFactor { +public: + explicit ScaleFactor(const std::string& file) + : m_values{BinnedValuesJSONParser(file).get_values()} + {} + virtual ~ScaleFactor() {} + + virtual float get(const Parameters& parameters, Variation variation) const override final { + return m_values.get(parameters)[static_cast(variation)]; + } +private: + BinnedValues m_values; +}; + +/** + * Dilepton scalefactor from two lepton scalefactors (takes a parameter set for each) + */ +class DiLeptonFromLegsScaleFactor : public IDiLeptonScaleFactor { +public: + DiLeptonFromLegsScaleFactor( std::unique_ptr&& l1_leg1 + , std::unique_ptr&& l1_leg2 + , std::unique_ptr&& l2_leg1 + , std::unique_ptr&& l2_leg2 ) + : m_l1_leg1{std::move(l1_leg1)} + , m_l1_leg2{std::move(l1_leg2)} + , m_l2_leg1{std::move(l2_leg1)} + , m_l2_leg2{std::move(l2_leg2)} + {} + virtual ~DiLeptonFromLegsScaleFactor() {} + + virtual float get(const Parameters& l1Param, const Parameters& l2Param, Variation variation) const override final { + const float eff_lep1_leg1 = m_l1_leg1->get(l1Param, variation); + const float eff_lep1_leg2 = m_l1_leg2->get(l1Param, variation); + const float eff_lep2_leg1 = m_l2_leg1->get(l2Param, variation); + const float eff_lep2_leg2 = m_l2_leg2->get(l2Param, variation); + + if ( variation == Nominal ) { + return -(eff_lep1_leg1 * eff_lep2_leg1) + + (1 - (1 - eff_lep1_leg2)) * eff_lep2_leg1 + + eff_lep1_leg1 * (1 - (1 - eff_lep2_leg2)) ; + } else { + const float nom_eff_lep1_leg1 = m_l1_leg1->get(l1Param, Nominal); + const float nom_eff_lep1_leg2 = m_l1_leg2->get(l1Param, Nominal); + const float nom_eff_lep2_leg1 = m_l2_leg1->get(l2Param, Nominal); + const float nom_eff_lep2_leg2 = m_l2_leg2->get(l2Param, Nominal); + + const float nominal = -(eff_lep1_leg1 * eff_lep2_leg1) + + (1 - (1 - eff_lep1_leg2)) * eff_lep2_leg1 + + eff_lep1_leg1 * (1 - (1 - eff_lep2_leg2)) ; + + const float error_squared = + std::pow(1 - nom_eff_lep2_leg1 - (1 - nom_eff_lep2_leg2), 2) * + std::pow(eff_lep1_leg1, 2) + + std::pow(nom_eff_lep2_leg1, 2) * + std::pow(eff_lep1_leg2, 2) + + std::pow(1 - nom_eff_lep1_leg1 - (1 - nom_eff_lep1_leg2), 2) * + std::pow(eff_lep2_leg1, 2) + + std::pow(nom_eff_lep1_leg1, 2) * + std::pow(eff_lep2_leg2, 2); + + if ( variation == Up ) { + return std::min(nominal+std::sqrt(error_squared), 1.f); + } else if ( variation == Down ) { + return std::max(0.f, nominal-std::sqrt(error_squared)); + } + } + } +private: + std::unique_ptr m_l1_leg1; + std::unique_ptr m_l1_leg2; + std::unique_ptr m_l2_leg1; + std::unique_ptr m_l2_leg2; +}; + +/** + * B-tagging scale factor (depends on flavour) + */ +class BTaggingScaleFactor : public IJetScaleFactor { +public: + BTaggingScaleFactor(const std::string& lightFile, const std::string& charmFile, const std::string& beautyFile) + : m_lightValues {BinnedValuesJSONParser(lightFile ).get_values()} + , m_charmValues {BinnedValuesJSONParser(charmFile ).get_values()} + , m_beautyValues{BinnedValuesJSONParser(beautyFile).get_values()} + {} + virtual ~BTaggingScaleFactor() {} + + virtual float get(const Parameters& parameters, Flavour flavour, Variation variation) const override final { + switch(flavour) { + case Flavour::Beauty: + return m_beautyValues.get(parameters)[static_cast(variation)]; + case Flavour::Charm: + return m_charmValues .get(parameters)[static_cast(variation)]; + case Flavour::Light: + return m_lightValues .get(parameters)[static_cast(variation)]; + default: + throw std::invalid_argument("Unsupported flavour: "+flavour); + } + } +private: + BinnedValues m_lightValues; + BinnedValues m_charmValues; + BinnedValues m_beautyValues; +}; + +#include +#include +#include "IndexRangeIterator.h" +/** + * Weight between different scale factors + */ +template +class WeightedScaleFactor : public ISingle { +public: + WeightedScaleFactor( const std::vector& probs, std::vector>&& sfs ) + : m_terms{std::move(sfs)} + { + const double norm{1./std::accumulate(std::begin(probs), std::end(probs), 0.)}; + std::transform(std::begin(probs), std::end(probs), std::back_inserter(m_probs), [norm] ( float p ) -> float { return norm*p; } ); + } + virtual ~WeightedScaleFactor() {} + + virtual float get(Args&&... args, Variation variation) const override final { + return std::accumulate(IndexRangeIterator(0, m_terms.size()), IndexRangeIterator(m_terms.size(), m_terms.size()), 0., + [this,&args...,variation] ( float wsum, std::size_t i ) -> float { + return wsum + m_probs[i]*m_terms[i]->get(std::forward(args)..., variation); + } + ); + } +private: + std::vector m_probs; + std::vector> m_terms; +}; +using WScaleFactor = WeightedScaleFactor; +using WLLScaleFactor = WeightedScaleFactor; +using WBTaggingScaleFactor = WeightedScaleFactor; + +#include +/** + * Sample according to fractions from different scale factors + */ +template +class SampledScaleFactor : public ISingle +{ +public: + SampledScaleFactor( const std::vector probs, std::vector>&& sfs ) + : m_rg{42} + , m_probs{std::discrete_distribution(std::begin(probs), std::end(probs))} + , m_terms{std::move(sfs)} + {} + virtual ~SampledScaleFactor() {} + + virtual float get(Args... args, Variation variation) const override final { + return m_terms[m_probs(m_rg)]->get(std::forward(args)..., variation); + } +private: + mutable std::mt19937 m_rg; + mutable std::discrete_distribution m_probs; + std::vector> m_terms; +}; +using SmpScaleFactor = SampledScaleFactor; +using SmpLLScaleFactor = SampledScaleFactor; +using SmpBTaggingScaleFactor = SampledScaleFactor; diff --git a/common/include/kinematics.h b/common/include/kinematics.h new file mode 100644 index 0000000..97f9ab4 --- /dev/null +++ b/common/include/kinematics.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace Kinematics { + using LorentzVector = ROOT::Math::LorentzVector >; + + float deltaPhi( const LorentzVector& pa, const LorentzVector& pb ) + { + return std::acos(std::min(std::max(-1., + (pa.Px()*pb.Px()+pa.Py()*pb.Py())/(pa.Pt()*pb.Pt()) + ), 1.)); + } + float deltaR( const LorentzVector& pa, const LorentzVector& pb ) + { + return std::sqrt(std::pow(pb.Eta()-pa.Eta(), 2)+std::pow(deltaPhi(pa, pb), 2)); + } + + float signedDeltaPhi( const LorentzVector& pa, const LorentzVector& pb ) + { + return ( ( pb.Py()*pa.Px()-pb.Px()*pa.Py() > 0. ) ? 1. : -1. )*deltaPhi(pa, pb); + } + + float signedDeltaEta( const LorentzVector& pa, const LorentzVector& pb ) + { + const float etaA{pa.Eta()}; + const float etaB{pb.Eta()}; + return ( etaA > 0. ? 1. : -1. )*( etaB - etaA ); // (small) positive and negative = more and less forward + } +}; diff --git a/installpy_standalone.sh b/installpy_standalone.sh new file mode 100755 index 0000000..c7d8bc5 --- /dev/null +++ b/installpy_standalone.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# +# Creates a scram-like (symlinked) python install directory for *Tools packages +# in the equivalent of ${CMSSW_BASE}/src/install_python (or ${PREFIX}/install_python, if PREFIX is defined) +# +thisscript="$(realpath ${0})" +commontoolspath="$(dirname ${thisscript})" +cp3llbbpath="$(dirname ${commontoolspath})" +if [[ "$(basename ${cp3llbbpath})" != "cp3_llbb" ]]; then + echo "Directory above CommonTools is not called cp3_llbb, aborting" + exit 1 +fi +basepath="$(dirname ${cp3llbbpath})" +if [[ -z "${PREFIX}" ]]; then + PREFIX="${basepath}" +fi +installpath="${PREFIX}/install_python" +if [[ -a "${installpath}" ]]; then + echo "Install path ${installpath} already exists, aborting" + exit 1 +fi +echo "Installing into ${installpath}, make sure to add it to PYTHONPATH as well" +mkdir -p "${installpath}" +pushd "${basepath}" > /dev/null +for pkgpy in */*Tools/python; do + pkgpath_py="${basepath}/${pkgpy}" + if [[ ! -d "${pkgpath_py}" ]]; then + echo "ASSERT FAILURE: ${pkgpath_py}" + exit 1 + fi + pkgname_full="$(dirname ${pkgpy})" + pkgname="$(basename ${pkgname_full})" + hatname="$(dirname ${pkgname_full})" + hatinstalldir="${installpath}/${hatname}" + mkdir -p "${hatinstalldir}" + hatinitpy="${hatinstalldir}/__init__.py" + if [[ ! -f "${hatinitpy}" ]]; then + echo "" > "${hatinitpy}" + fi + ln -s "${pkgpath_py}" "${hatinstalldir}/${pkgname}" + echo "Installed ${pkgname_full}" +done diff --git a/python/__init__.py b/python/__init__.py new file mode 100644 index 0000000..3e30dd5 --- /dev/null +++ b/python/__init__.py @@ -0,0 +1,13 @@ +__all__ = ("pathCommonTools", "pathCP3llbb", "pathCMS") +import os.path +pathCommonTools = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +pathCP3llbb = os.path.dirname(os.path.abspath(pathCommonTools)) +_pathCMS_paths = os.path.dirname(os.path.dirname(pathCP3llbb)) +import os +_pathCMS_env = os.getenv("CMSSW_BASE") +pathCMS = _pathCMS_paths +if _pathCMS_env == "": + print("Warning: Could not get CMSSW_BASE variable from the environment") +elif _pathCMS_env != _pathCMS_paths: + print("Warning: CommonTools is not using the CMSSW release version from $CMSSW_BASE ({0} versus {1}), using the former".format(_pathCMS_paths, _pathCMS_env)) + pathCMS = _pathCMS_paths diff --git a/python/batchhelpers.py b/python/batchhelpers.py new file mode 100644 index 0000000..5617f0c --- /dev/null +++ b/python/batchhelpers.py @@ -0,0 +1,221 @@ +""" +Batch cluster tools (common part for HTCondor and slurm) +""" +__author__ = "Pieter David " +__date__ = "February-April 2017" + +import logging +logger = logging.getLogger(__name__) + +import os +import os.path + +def makeExecutable(path): + """ Set file permissions to executable """ + import stat + if os.path.exists(path) and os.path.isfile(path): + perm = os.stat(path) + os.chmod(path, perm.st_mode | stat.S_IEXEC) + +class CommandListJob(object): + """ Interface/base for 'backend' classes to create a job cluster/array from a list of commands (eah becoming a subjob) """ + def __init__(self, commandList, workDir=None, workdir_default_pattern="batch_work"): + self.commandList = commandList + self.workDir = self.init_dirtowrite(workDir, default_pattern=workdir_default_pattern) + self.workDirs = self.setupBatchDirs(self.workDir) + + ## interface methods + def submit(self): + """ Submit the job(s) """ + pass + def commandOutFiles(self, command): + """ Output files for a given command (when finished) """ + pass + ## for monitoring + @property + def status(self): + """ overall job status summary """ + pass + def statuses(self): + """ list of subjob statuses (numeric, using indices in CondorJobStatus) """ + pass + def updateStatuses(self): + """ Update the subjob status cache (if used in the implementation) """ + pass + def subjobStatus(self, i): + """ get status of subjob """ + pass + def commandStatus(self, command): + """ get the status of the jobs corresponding to one of the commands """ + pass + + ## helper methods + @staticmethod + def init_dirtowrite(given=None, default_pattern="batch"): + """ Initialisation helper: check that the directory does not exist, then create it + + If None is given, a default ("$(pwd)/{default_pattern}", "$(pwd)/default_pattern_0" etc.) is used + """ + if given is None: + test_dir = os.path.join(os.getcwd(), default_pattern) + if os.path.exists(test_dir): + i = 0 + while os.path.exists(test_dir) and i < 10: + test_dir = os.path.join(os.getcwd(), "{0}_{1:d}".format(default_pattern, i)) + i += 1 + else: + test_dir = given + if os.path.exists(test_dir): + raise Exception("Directory {0} exists".format(test_dir)) + + os.makedirs(test_dir) + + return os.path.abspath(test_dir) ## make sure we keep absolute paths + + @staticmethod + def setupBatchDirs(workDir): + """ Create up the working directories (input, output, logs) under workDir """ + dirs = { + "in" : os.path.join(workDir, "input") + , "out" : os.path.join(workDir, "output") + , "log" : os.path.join(workDir, "logs") + } + for subdir in dirs.itervalues(): + os.mkdir(subdir) + logger.info("Created working directories under {0}".format(workDir)) + return dirs + +class SplitAggregationTask(object): + """ Task split in commands, whose results can be merged """ + def __init__(self, commandList, finalizeAction=None): + self._jobCluster = None + self.commandList = commandList + self.finalizeAction = finalizeAction + @property + def jobCluster(self): + return self._jobCluster + @jobCluster.setter + def jobCluster(self, jobCluster): + if self._jobCluster: + raise Exception("SplitAggregationTask.jobCluster has already been set") + self._jobCluster = jobCluster + if self.finalizeAction: ## pass it on + self.finalizeAction.jobCluster = jobCluster + + def tryFinalize(self): + if self.jobCluster and all(self.jobCluster.commandStatus(cmd).upper() == "COMPLETED" for cmd in self.commandList): + if self.finalizeAction: + self.finalizeAction.perform() + return True + return False + +class Action(object): + """ interface for job finalization """ + def perform(self): + """ interface method """ + return False + +import subprocess + +class HaddAction(Action): + """ merge results with hadd + """ + def __init__(self, commandList, outDir, options=None): + self.jobCluster = None + self.commandList = commandList + self.outDir = outDir + self.options = options if options is not None else [] + super(HaddAction, self).__init__() + def perform(self): + if len(self.commandList) == 0: ## nothing + return + elif len(self.commandList) == 1: ## move + cmd = self.commandList[0] + for outf in self.jobCluster.commandOutFiles(cmd): + logger.info("finalization: moving output file {0} to {1}".format(outf, self.outDir)) + subprocess.check_call(["mv", outf, self.outDir]) + else: ## merge + ## collect for each output file name which jobs produced one + filesToMerge = dict() + for cmd in self.commandList: + for outf in self.jobCluster.commandOutFiles(cmd): + outfb = os.path.basename(outf) + if outfb not in filesToMerge: + filesToMerge[outfb] = [] + filesToMerge[outfb].append(outf) + + for outfb, outfin in filesToMerge.iteritems(): + fullout = os.path.join(self.outDir, outfb) + logger.info("finalization: merging {0} to {1}".format(", ".join(outfin), fullout)) + subprocess.check_call(["hadd"]+self.options+[fullout]+outfin) + +from itertools import izip, chain + +class TasksMonitor(object): + """ Monitor a number of tasks and the associated job clusters """ + def __init__(self, jobs=[], tasks=[], interval=120, allStatuses=None, activeStatuses=None, completedStatus=None): + """ Constructor + + jobs: job clusters to monitor + tasks: tasks to monitor (and finalize) + interval: number of seconds between status checks + """ + self.jobs = set() + self.tasks = set() + self.activetasks = set() + self.add(jobs, tasks) + self.interval = interval + self.allStatuses = list(allStatuses) if allStatuses else [] + self.activeStatuses = list(activeStatuses) if activeStatuses else [] + self.completedStatus = completedStatus + def add(self, jobs, tasks): + """ add jobs and tasks """ + self.jobs.update(jobs) + self.tasks.update(tasks) + assert sum(len(j.commandList) for j in self.jobs) == sum(len(t.commandList) for t in self.tasks) ## assumption: same set of commands + self.activetasks.update(tasks) + @staticmethod + def makeStats(statuses, allStatuses): + histo = [ 0 for st in allStatuses ] + for jSt in statuses: + histo[jSt] += 1 + return histo + @staticmethod + def formatStats(stats, allStatuses): + return "{0} / {1:d} Total".format(", ".join("{0:d} {1}".format(n,nm) for n,nm in izip(stats, allStatuses)), sum(stats)) + + def _tryFinalize(self): + """ try to finalize tasks """ + finalized = [] + for t in self.activetasks: + if t.tryFinalize(): + finalized.append(t) ## don't change while iterating + for ft in finalized: + self.activetasks.remove(ft) + if len(finalized) > 0: + logger.info("Finalized {0:d} tasks, {1:d} remaining".format(len(finalized), len(self.activetasks))) + + def collect(self, wait=None): + """ wait for the jobs to finish, then finalize tasks """ + for j in self.jobs: + j.updateStatuses() + self._tryFinalize() + if len(self.activetasks) > 0: + logger.info("Waiting for jobs to finish...") + from datetime import datetime + import time + nJobs = sum( len(j.commandList) for j in self.jobs ) + if wait: + time.sleep(wait) + for j in self.jobs: + j.updateStatuses() + stats = self.makeStats(chain.from_iterable(j.statuses() for j in self.jobs), self.allStatuses) + while len(self.activetasks) > 0 and sum(stats[sa] for sa in self.activeStatuses) > 0: + time.sleep(self.interval) + prevStats = stats + for j in self.jobs: + j.updateStatuses() + stats = self.makeStats(chain.from_iterable(j.statuses() for j in self.jobs), self.allStatuses) + logger.info("[ {0} :: {1} ]".format(datetime.now().strftime("%H:%M:%S"), self.formatStats(stats, self.allStatuses))) + if stats[self.completedStatus] > prevStats[self.completedStatus]: + self._tryFinalize() diff --git a/python/condorhelpers.py b/python/condorhelpers.py new file mode 100644 index 0000000..6f0123e --- /dev/null +++ b/python/condorhelpers.py @@ -0,0 +1,174 @@ +""" +HTCondor tools (based on cp3-llbb/CommonTools condorSubmitter) +""" +__author__ = "Pieter David 200)\n" + "executable = {indir}/condor.sh\n" + "arguments = $(Process)\n" + "output = {logdir_rel}/condor_$(Process).out\n" + "error = {logdir_rel}/condor_$(Process).err\n" + "log = {logdir_rel}/condor_$(Process).log\n" + "queue {nJobs:d}\n" + ) + MasterShell = ( + "#!/usr/bin/env bash\n" + "\n" + "{indir}/condor_$1.sh\n" + ) + + JobShell = ( + "#!/usr/bin/env bash\n" + "\n" + "{environment_setup}" + # "# Setup our CMS environment\n" + # "pushd {CMS_PATH}\n" + # "source /cvmfs/cms.cern.ch/cmsset_default.sh\n" + # "eval `scram runtime --sh`\n" + # "popd\n" + "\n" + "function move_files {{\n" + "{move_fragment}" + "\n}}\n" + "\n" + "{command} && move_files" + ) + + def _writeCondorFiles(self): + """ Create Condor .sh and .cmd files """ + masterCmdName = os.path.join(self.workDirs["in"], "condor.cmd") + with open(masterCmdName, "w") as masterCmd: + masterCmd.write(CommandListCondorJob.MasterCmd.format( + indir=self.workDirs["in"] + , logdir_rel=os.path.relpath(self.workDirs["log"]) + , nJobs=len(self.commandList) + )) + masterShName = os.path.join(self.workDirs["in"], "condor.sh") + with open(masterShName, "w") as masterSh: + masterSh.write(CommandListCondorJob.MasterShell.format( + indir=self.workDirs["in"] + )) + makeExecutable(masterShName) + + for i,command in izip(count(), self.commandList): + jobShName = os.path.join(self.workDirs["in"], "condor_{:d}.sh".format(i)) + job_outdir = os.path.join(self.workDirs["out"], str(i)) + os.makedirs(job_outdir) + with open(jobShName, "w") as jobSh: + jobSh.write(CommandListCondorJob.JobShell.format( + environment_setup="\n".join(self.envSetupLines) + , move_fragment="\n".join(( + " for file in {pattern}; do\n" + ' echo "Moving $file to {outdir}/"\n' + " mv $file {outdir}/\n" + " done" + ).format(pattern=ipatt, outdir=job_outdir) + for ipatt in self.outputPatterns) + , command=command + )) + makeExecutable(jobShName) + + return masterCmdName + + def _commandOutDir(self, command): + """ Output directory for a given command """ + return os.path.join(self.workDirs["out"], str(self.commandList.index(command))) + def commandOutFiles(self, command): + """ Output files for a given command """ + import fnmatch + cmdOutDir = self._commandOutDir(command) + return list( os.path.join(cmdOutDir, fn) for fn in os.listdir(cmdOutDir) + if any( fnmatch.fnmatch(fn, pat) for pat in self.outputPatterns) ) + + def submit(self): + """ Submit the jobs to condor """ + logger.info("Submitting {0:d} condor jobs.".format(len(self.commandList))) + result = subprocess.check_output(["condor_submit", self.masterCmd]) + + import re + pat = re.compile("\d+ job\(s\) submitted to cluster (\d+)\.") + g = pat.search(result) + self.clusterId = g.group(1) + + ## save to file in case + with open(os.path.join(self.workDirs["in"], "cluster_id"), "w") as f: + f.write(self.clusterId) + + logger.info("Submitted, job ID is {}".format(self.clusterId)) + + def statuses(self): + """ list of subjob statuses (numeric, using indices in CondorJobStatus) """ + if self.clusterId is None: + raise Exception("Cannot get status before submitting the jobs to condor") + return map(int, list(subprocess.check_output(["condor_q" , self.clusterId, "-format", '%d ', "JobStatus"]).strip().split()) + + list(subprocess.check_output(["condor_history", self.clusterId, "-format", '%d ', "JobStatus"]).strip().split()) ) + + @property + def status(self): + statuses = self.statuses() + if all(st == statuses[0] for st in statuses): + return CondorJobStatus[statuses[0]] + elif any(st == 2 for st in statuses): + return "Running" + elif any(st == 3 for st in statuses): + return "Removed" + else: + return "unknown" + + def subjobStatus(self, i): + subjobId = "{0}.{1:d}".format(self.clusterId, i) + ret = subprocess.check_output(["condor_q", subjobId, "-format", '%d', "JobStatus"]) + if len(ret) == 0: # search in the completed ones + ret = subprocess.check_output(["condor_history", subjobId, "-format", '%d', "JobStatus"]) + return CondorJobStatus[int(ret)] + def commandStatus(self, command): + return self.subjobStatus(self.commandList.index(command)) + +def makeCondorTasksMonitor(jobs=[], tasks=[], interval=120): + """ make a TasksMonitor for condor jobs """ + from .batchhelpers import TasksMonitor + return TasksMonitor(jobs=jobs, tasks=tasks, interval=interval + , allStasuses=CondorJobStatus + , activeStatuses=(1,2) + , completedStatus=4 + ) diff --git a/python/factoryhelpers.py b/python/factoryhelpers.py new file mode 100644 index 0000000..d9de511 --- /dev/null +++ b/python/factoryhelpers.py @@ -0,0 +1,83 @@ +""" +Helper methods shared between histFactory and treeFactory +""" + +import logging +logger = logging.getLogger(__name__) + +import os.path + +from . import pathCommonTools +TEMPLATE_DIR = os.path.join(pathCommonTools, "templates") +def getTemplate(templName): + return os.path.join(TEMPLATE_DIR, "{0}.tpl".format(templName)) +COMMON_DIR = os.path.join(pathCommonTools, "common") +COMMON_INCLUDE_DIR = os.path.join(COMMON_DIR, "include") +COMMON_SRC_DIR = os.path.join(COMMON_DIR, "src") + +################################################################################ +## TEMPLATE EXPANSION ## +################################################################################ + +def expandTemplateLines(inFileName, **replacements): + """ Generator: lines with replacements """ + fmtReplacements = dict(("{{{{{0}}}}}".format(fro), to) for fro,to in replacements.iteritems()) + with open(inFileName, "r") as inFile: + for line in inFile: + myLn = line + for fro,to in fmtReplacements.iteritems(): + myLn = myLn.replace(fro, to) + if "{{" not in myLn: + break ## stop searching + yield myLn ## could also split by "\n" again +def expandTemplate(inFileName, **replacements): + """ expand as a single string """ + return "".join(expandTemplateLines(inFileName, **replacements)) +def expandTemplateAndWrite(outFileName, inFileName, **replacements): + """ expand and write to file """ + with open(outFileName, "w") as outFile: + outFile.writelines(expandTemplateLines(inFileName, **replacements)) + + +## other helpers +def getBranchType(name, skeleton): + ## TODO plots should contain expressions not callables, and leafDeps should return the type as well + ## then this method disappears (code is in ttWdeco already), expressions compiled when creating the plot + ## and createPlotter does not even need a skeleton any more + ibr = next(br for br in skeleton.GetListOfBranches() if br.GetName() == name) + clNm = ibr.GetClassName() + if len(clNm) > 0: + return clNm + else: + lf = skeleton.GetLeaf(ibr.GetName()) + if not lf: + logger.error("Can't deduce type for branch '{0}'".format(ibr.GetName())) + else: + return lf.GetTypeName() + +import os +from contextlib import contextmanager + +@contextmanager +def insideOtherDirectory(path): + prevDir = os.getcwd() + os.chdir(path) + assert os.getcwd() != prevDir + yield + os.chdir(prevDir) + +def getARootFileName(jsonName): + """ + Get any file from the json files field + """ + return next(fn for sampDict in loadSamples(jsonName).itervalues() for fn in sampDict["files"]) + +def loadSamples(jsonName): + """ + Load all samples from the json file + + """ + import json + with open(jsonName, "r") as sampFile: + samples = json.load(sampFile) + return samples diff --git a/python/hist_opt_2v2.py b/python/hist_opt_2v2.py new file mode 100644 index 0000000..255dfda --- /dev/null +++ b/python/hist_opt_2v2.py @@ -0,0 +1,184 @@ +""" +histfactory opt2v2 (complete tree of cuts, precalculation etc.) helper code +""" + +from .treedecorators import allArgsOfOp, opDependsOn + +class NodeTreeNeedsOpCache(object): + """ Store nodes that depend on the operator to save time + """ + def __init__(self, op): + self.depends = set() + self.notDepends = set() + self.op = op + def __call__(self, aNode): + if aNode in self.depends: + return True + elif aNode in self.notDepends: + return False + else: + ret = aNode.treeNeedsOp_cache(self) + if ret: + self.depends.add(aNode) + else: + self.notDepends.add(aNode) + return ret + +class IDTNode(object): + """ Interface for a node in the dependency tree + """ + __slots__ = ("__weakref__", "_parent") + def __init__(self, parent=None): + self._parent = parent + @property + def depNodes(self): + pass + @property + def isTerminal(self): + return False + @property + def parent(self): + return self._parent + @parent.setter + def parent(self, parent): + self._parent = parent + def clone(self): + """ + Clone this node (without dependencies) + """ + pass + def ownDbgStr(self): + pass + def treeNeedsOp(self, op): + pass + def treeNeedsOp_cache(self, cache): + return self.treeNeedsOp(cache.op) ## default: don't use the cache + +class PlotNode(IDTNode): + """ Plot (terminal) node + """ + __slots__ = ("plot",) + def __init__(self, plot, parent=None): + self.plot = plot + super(PlotNode, self).__init__(parent=parent) + @property + def isTerminal(self): + return True + @property + def depNodes(self): + return tuple() + def clone(self): + return PlotNode(self.plot, parent=self.parent) + def ownDbgStr(self): ## for reasonably compact debug printing + return "PlotNode(( {0} ), {1})".format(", ".join(v.get_TTreeDrawStr() for v in self.plot.variables), self.plot.selection.weight.get_TTreeDrawStr()) + def treeNeedsOp(self, op): + return opDependsOn(self.plot.weight, op) or any(opDependsOn(var, op) for var in self.plot.variables) + +class DTNode(IDTNode): + """ Selection or reduce (non-terminal) node + """ + __slots__ = ("_deps",) + def __init__(self, parent=None, deps=None): + if deps is None: + deps = [] + self._deps = deps + super(DTNode, self).__init__(parent=parent) + def addDependentNode(self, nd): + self._deps.append(nd) + def removeDependentNode(self, nd): + self._deps.remove(nd) + @property + def depNodes(self): + return self._deps + @property + def isTerminal(self): + return False + def treeNeedsOp(self, op): + return any( nd.treeNeedsOp(op) for nd in self.depNodes ) + def treeNeedsOp_cache(self, cache): + return any( cache(nd) for nd in self.depNodes ) + +class RootNode(DTNode): + """ Special case: root node (no parent by definition, only holds deps list) + """ + __slots__ = () + def __init__(self, deps=None): + super(RootNode, self).__init__(parent=None, deps=deps) + def ownDbgStr(self): + return "/" + +class SelectionNode(DTNode): + """ Selection node (cuts list and dependencies) + """ + __slots__ = ("cuts",) + def __init__(self, cuts, parent=None, deps=None): + self.cuts = cuts + super(SelectionNode, self).__init__(parent=parent, deps=deps) + def clone(self): + return SelectionNode(self.cuts, parent=self.parent, deps=self._deps) + def aboveSel(self): + ## return the first SelectionNode in the ancestor tree (or None) + cand = self.parent + while cand and not isinstance(cand, SelectionNode): + cand = cand.parent + return cand + @property + def ownCuts(self): + parSel = self.aboveSel() + if not parSel: + return self.cuts + else: + return tuple(c for c in self.cuts if c not in parSel.cuts) + def ownDbgStr(self): ## for reasonably compact debug printing + from .treedecorators import makeExprAnd + return "SelectionNode({0}) with {1:d} dependent nodes".format(makeExprAnd(self.ownCuts).get_TTreeDrawStr(), len(self._deps)) + def treeNeedsOp(self, op): + return any( opDependsOn(ci, op) for ci in self.cuts ) or super(SelectionNode, self).treeNeedsOp(op) + def treeNeedsOp_cache(self, cache): + return any( opDependsOn(ci, cache.op) for ci in self.cuts ) or super(SelectionNode, self).treeNeedsOp_cache(cache) + +class PrecalcNode(DTNode): + """ Precalculation node (expression and dependencies) + """ + __slots__ = ("op",) + def __init__(self, op, parent=None, deps=None): + self.op = op + super(PrecalcNode, self).__init__(parent=parent, deps=deps) + def clone(self): + return PrecalcNode(self.op, parent=self.parent, deps=self._deps) + def ownDbgStr(self): ## for reasonably compact debug printing + return "PrecalcNode({0}={1}) with {2:d} dependent nodes".format(self.op.uname, self.op._get_TTreeDrawStr(), len(self._deps)) + def treeNeedsOp(self, op): + return opDependsOn(self.op, op) or super(PrecalcNode, self).treeNeedsOp(op) + def treeNeedsOp_cache(self, cache): + return opDependsOn(self.op, cache.op) or super(PrecalcNode, self).treeNeedsOp_cache(cache) + +class SkimmerFillBranchesNode(IDTNode): + __slots__ = ("branches", "fill_tree_lines") + def __init__(self, branches, parent=None, fill_tree_lines=None): + self.branches = branches + self.fill_tree_lines = fill_tree_lines + super(SkimmerFillBranchesNode, self).__init__(parent=parent) + @property + def isTerminal(self): + return True + @property + def depNodes(self): + return tuple() + def clone(self): + return SkimmerFillBranchesNode(self.branches, parent=self.parent, fill_tree_lines=self.fill_tree_lines) + def ownDbgStr(self): + return "SkimmerFillBranchesNode(...)" + def treeNeedsOp(self, op): + return any(opDependsOn(var, op) for nm,var,unm in self.branches) + + +def printTree(nd, indent=0): + ## helper to (recursively) print a tree + yield (indent*" ")+nd.ownDbgStr() + nPlots = sum( 1 for dep in nd.depNodes if isinstance(dep, PlotNode) ) + yield ((indent+1)*" ")+"{0:d} plots".format(nPlots) + for dep in nd.depNodes: + if not isinstance(dep, PlotNode): + for ln in printTree(dep, indent=indent+1): + yield ln diff --git a/python/histfactory.py b/python/histfactory.py new file mode 100644 index 0000000..36d4d0d --- /dev/null +++ b/python/histfactory.py @@ -0,0 +1,776 @@ +""" +Generate C++ code to fill the histos, with helpers to run it +""" +__all__ = ("createPlotter", "compilePlotter", "runPlotterOnSamples") + +import logging +logger = logging.getLogger(__name__) + +from itertools import izip, count, chain, groupby + +from .factoryhelpers import getTemplate, expandTemplate, expandTemplateAndWrite, getBranchType, insideOtherDirectory +from .plots import * + +def collectIdentifiersFromPlots(plots): + identifiers = set() + for i,plot in izip(count(1), plots): + if i % 200 == 0: + print "Parsing plot #{0:d}/{1:d}".format(i,len(plots)) + try: + identifiers.update(plot.cut.leafDeps) + except Exception, e: + logger.error("Problem parsing cut of plot {0} : {1}".format(plot.name, str(e))) + try: + identifiers.update(plot.weight.leafDeps) + except Exception, e: + logger.error("Problem parsing weight of plot {0} : {1}".format(plot.name, str(e))) + for j,sVar in izip(count(), plot.variables): + try: + identifiers.update(sVar.leafDeps) + except Exception, e: + logger.error("Problem parsing sVar of plot {0} : {1}".format(plot.name, str(e))) + return identifiers + + +def getHistType(plot): + assert len(plot.variables) == len(plot.binnings) and len(plot.variables) == len(plot.axisTitles) + if len(plot.variables) == 1: + return "TH1F" + elif len(plot.variables) == 2: + return "TH2F" + elif len(plot.variables) == 3: + return "TH3F" + else: + raise Exception("Invalid number of dimensions") + +def getBinningDefinitions(plot, uname): + decls = [] + for idx,bn in izip(count(), plot.binnings): + if isinstance(bn, EquidistantBinning): + pass + elif isinstance(bn, VariableBinning): + arrNm = "{name}_{idx}".format(name=uname, idx=idx) + decls.append(" double {name}[] = {{ {0} }};".format(", ".join("{0:e}".format(iEdg) for iEdg in bn.binEdges), name=arrNm)) + else: + raise Exception("Unsupported binning: {0!r}".format(bn)) + return decls + +def getBinningUses(plot, uname): + uses = [] + for idx,bn in izip(count(), plot.binnings): + if isinstance(bn, EquidistantBinning): + uses.append("{0:d}, {1:e}, {2:e}".format(bn.N, bn.mn, bn.mx)) + elif isinstance(bn, VariableBinning): + arrNm = "{name}_{idx}".format(name=uname, idx=idx) + uses.append("{0:d}, &({name}[0])".format(len(bn.binEdges)-1, name=arrNm)) + else: + raise Exception("Unsupported binning: {0!r}".format(bn)) + return ", ".join(uses) + +def getOtherHistSettings(plot, uname): + outLines = [] + ## set bin labels if necessary + ## x-axis + if len(plot.axisBinLabels) > 0 and plot.axisBinLabels[0] is not None: + axisBinLabels = plot.axisBinLabels[0] + if len(axisBinLabels) == plot.binnings[0].N: + outLines.append(" {0}".format(" ".join("{hn}->GetXaxis()->SetBinLabel({idx:d}, \"{iLabel}\");".format(hn=uname, idx=i, iLabel=lbl) for i,lbl in izip(count(1), axisBinLabels)))) + else: + logger.error("Not the same number of x bins ({0:d}) and labels ({1:d}) -> not setting them".format(plot.binnings[0].N, len(axisBinLabels))) + ## y-axis + if len(plot.axisBinLabels) > 1 and plot.axisBinLabels[1] is not None: + axisBinLabels = plot.axisBinLabels[1] + if len(axisBinLabels) == plot.binnings[1].N: + outLines.append(" {0}".format(" ".join("{hn}->GetYaxis()->SetBinLabel({idx:d}, \"{iLabel}\");".format(hn=uname, idx=i, iLabel=lbl) for i,lbl in izip(count(1), axisBinLabels)))) + else: + logger.error("Not the same number of y bins ({0:d}) and labels ({1:d}) -> not setting them".format(plot.binnings[1].N, len(axisBinLabels))) + ## z-axis + if len(plot.axisBinLabels) > 2 and plot.axisBinLabels[2] is not None: + axisBinLabels = plot.axisBinLabels[2] + if len(axisBinLabels) == plot.binnings[2].N: + outLines.append(" {0}".format(" ".join("{hn}->GetZaxis()->SetBinLabel({idx:d}, \"{iLabel}\");".format(hn=uname, idx=i, iLabel=lbl) for i,lbl in izip(count(1), axisBinLabels)))) + else: + logger.error("Not the same number of z bins ({0:d}) and labels ({1:d}) -> not setting them".format(plot.binnings[2].N, len(axisBinLabels))) + # + return outLines + +def createPlotsInnerLoop_ref(plots, uhNames): + """ + Original (reference) implementation: evaluate selection, weight and value for each and every histogram + """ + return "\n".join( + expandTemplate(getTemplate("Plot") + , CUT=plot.cut.get_TTreeDrawStr() + , WEIGHT=plot.weight.get_TTreeDrawStr() + , VAR=", ".join(v.get_TTreeDrawStr() for v in plot.variables) + , HIST=uhNames[plot] + ) for plot in plots + ) + +def greedyGroupBy(inputs, keyFunc): + """ + Greedy version of groupby + """ + items = list(inputs) + while len(items) > 0: + idxs = set() + val = keyFunc(items[0]) + idxs.add(0) + thisGroup = [ items[0] ] + for i,it in izip(count(1), items[1:]): + if keyFunc(it) == val: + idxs.add(i) + thisGroup.append(it) + yield val, thisGroup + nBefore = len(items) + items = [ it for i,it in izip(count(),items) if i not in idxs ] + assert len(items)+len(thisGroup) == nBefore + +def createPlotsInnerLoop_opt1(plots, uhNames, indent=2, alreadyReq=tuple()): + """ + More optimised plotting loop - take 1: group by cut and weight + """ + from .treedecorators import makeExprAnd + return "\n".join((indent*" ")+ln if len(ln) > 0 else ln for ln in chain.from_iterable( + [ "__cut = ({0});".format(makeExprAnd([ct for ct in selGrpPlots[0].selection.cuts if ct not in alreadyReq]).get_TTreeDrawStr()) + , "if (__cut) {" + ] + list(" {0}".format(ln) if len(ln) > 0 else ln for ln in chain.from_iterable( + [ "__weight = ({0});".format(weight.get_TTreeDrawStr()) + ] + [ "fill({HIST}.get(), {VAR}, __weight);".format(HIST=uhNames[plot], VAR=", ".join(v.get_TTreeDrawStr() for v in plot.variables)) + for plot in wgtGrpPlots + ] for weight,wgtGrpPlots in greedyGroupBy(selGrpPlots, lambda ip : ip.weight) )) + + [ "}" + , "" + ] for ct,selGrpPlots in greedyGroupBy(plots, lambda ip : ip.cut) + )) + +from .treedecorators import allArgsOfOp, opDependsOn + +def plotDependsOn(pt, other): + return any(opDependsOn(ct, other) for ct in pt.selection.cuts) or any(opDependsOn(wt, other) for wt in pt.selection.weights) or any(opDependsOn(var, other) for var in pt.variables) + +def iunique(otherGen): + """ Remove duplicates from another generator + """ + alreadyReturned = set() + for elm in otherGen: + if elm not in alreadyReturned: + yield elm + alreadyReturned.add(elm) + +def collectReduces(listOfPlots): + from .treedecorators import TupleOp, Reduce, Next + import uuid + def _addReduces(depDict, op, abovePlotOrReduce): + ## the one to be used for recursion + if isinstance(op, TupleOp): + top = abovePlotOrReduce + if ( isinstance(op, Reduce) or isinstance(op, Next) ): + if op not in depDict: + depDict[op] = ("r_{0}".format(str(uuid.uuid4()).replace("-", "_")), set()) + assert op.uname is None + uname, deps = depDict[op] + op.uname = uname + deps.add(top) + top = op ## build a dependency tree + for ia in op.args: + _addReduces(depDict, ia, top) + else: + raise Exception("_addReduces called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + depDict = dict() + for plot in listOfPlots: + for var in chain((plot.cut, plot.weight), plot.variables): + _addReduces(depDict, var, plot) + return depDict ## { op : (uname, depPlotsAndOps) } ## only for reduce opts + +def removeReduceUnames(listOfPlots): + # mirror of the above: clean up afterwards + from .treedecorators import TupleOp, Reduce, Next + def _clearReduces(op): + ## recurse + if isinstance(op, TupleOp): + if ( isinstance(op, Reduce) or isinstance(op, Next) ): + op.uname = None + for ia in op.args: + _clearReduces(ia) + else: + raise Exception("_clearReduces called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + for plot in listOfPlots: + for var in chain((plot.cut, plot.weight), plot.variables): + _clearReduces(var) + +def collectPrecalc(listOfPlots): + from .treedecorators import TupleOp + def _addPrecalc(depDict, op, abovePlotOrPrecalc): + ## the one to be used for recursion + if isinstance(op, TupleOp): + top = abovePlotOrPrecalc + if op.uname is not None: + if op not in depDict: + depDict[op] = (op.uname, set()) + uname, deps = depDict[op] + op.uname = uname ## keep to make sure they get renamed to the same + deps.add(top) + top = op ## build a dependency tree + for ia in op.args: + _addPrecalc(depDict, ia, top) + else: + raise Exception("_addPrecalc called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + depDict = dict() + for plot in listOfPlots: + for var in chain((plot.cut, plot.weight), plot.variables): + _addPrecalc(depDict, var, plot) + return depDict ## { op : (uname, depPlotsAndOps) } ## only for precalc ops + + +def commonConditionForAll(somePlots): + """ find common condition for all plots in the list (allows to move selection before reduce) + """ + inCommon = [] + notInAll = set() ## caching to avoid too much double work + for ap in somePlots: + for cdc in ap.selection.cuts: + if cdc not in inCommon and cdc not in notInAll: + if all( cdc in op.selection.cuts for op in somePlots ): + inCommon.append(cdc) + else: + notInAll.add(cdc) + return inCommon + +def createPlotsInnerLoop_opt2(plots, uhNames, indent=2): + """ + Second optimisation round: pre-calculate "expensive" reduce operations + """ + from .plots import Plot + reducesWithAllDeps = collectReduces(plots) + ## 1: not to be forgotten: plots that don't depend on these (keep this simple for now) + plotLines_noDeps = createPlotsInnerLoop_opt1([ p for p in plots if all( p not in depList for uN,depList in reducesWithAllDeps.itervalues() ) ], uhNames, indent=indent) + + ## make minimal (real reduce+plot-dependency tree) + reducesWithMinDeps = dict((redOp, (uname, + [ di for di in deps if not any( ( plotDependsOn(di, do) if isinstance(di, Plot) else opDependsOn(di, do) ) for do in deps if ( not isinstance(do, Plot) ) and ( do != di ) ) ])) + ## if you depend on another dependee, you need to be further downstream + ## top-down traversal strategy: always take the next key that is not in any list of dependees (shallow copy then pop) + for redOp,(uname, deps) in reducesWithAllDeps.iteritems()) + + ## temporary solution: put in the right order (leave out-of-bounds to selection) + def getLinesForRedsAndPlotsDepTreeTopDown(myMinDepDict, indent=0, alreadyReq=tuple()): + ##print (indent*" ")+"getLinesForRedsAndPlotsDepTreeTopDown called with {0:d} already required selections".format(len(alreadyReq)) + ##print (indent*" ")+"Treating plots: ", ", ".join(p.name for p in set(di for r,(u,ds) in myMinDepDict.iteritems() for di in ds if isinstance(di, Plot))) + ## + while len(myMinDepDict) > 0: + redsToTreat = sorted([ k for k,v in myMinDepDict.iteritems() if not any(k in od for ou,od in myMinDepDict.itervalues()) ], key=(lambda k : len(myMinDepDict[k][1])), reverse=True) + if len(redsToTreat) == 0: + print "ERROR: also the operators have multiple sibling dependencies -> not handled yet"; break + ##print (indent*" ")+"--> Adding reduces in this loop:" + ##for redOp in redsToTreat: + ## print (indent*" ")+" - ", redOp._get_TTreeDrawStr() + + for redOp in redsToTreat: + if redOp in myMinDepDict: + uName,deps = myMinDepDict[redOp] + # + commonCuts = [ ct for ct in commonConditionForAll(list(di for di in reducesWithAllDeps[redOp][1] if isinstance(di, Plot))) ] + commonCuts_apply = [ ct for ct in commonCuts if ct not in alreadyReq ] + commonCuts_applyBefore, commonCuts_applyAfter = [], [] + for ct in commonCuts_apply: + if opDependsOn(ct, redOp): + commonCuts_applyAfter.append(ct) + else: + commonCuts_applyBefore.append(ct) + # + if len(commonCuts_applyBefore) > 0: + from .treedecorators import makeExprAnd + yield (indent*" ")+"if ( {0} ) {{".format(makeExprAnd(commonCuts_applyBefore).get_TTreeDrawStr()) + else: + yield (indent*" ")+"{" + # + yield ((indent+1)*" ")+"const auto {0} = {1};".format(uName, redOp._get_TTreeDrawStr()) + # + if len(commonCuts_applyAfter) > 0: + from .treedecorators import makeExprAnd + yield ((indent+1)*" ")+"if ( {0} ) {{".format(makeExprAnd(commonCuts_applyAfter).get_TTreeDrawStr()) + else: + yield ((indent+1)*" ")+"{" + # + + req_uptohere = list(alreadyReq)+commonCuts_apply + ###print "Making block - already applied: ", req_uptohere + # + ### find those that depend on (not interdependent) reduce ops - this kind of reordering is not handled yet + otherDeps = set() + for di in deps: + if isinstance(di, Plot): + for oro,(ouN,odeps) in myMinDepDict.iteritems(): + if oro != redOp and di in odeps: + otherDeps.add(oro) + odeps.remove(di) + for oro in otherDeps: + (ouN,odeps) = myMinDepDict[oro] + yield ((indent+2)*" ")+"const auto {0} = {1};".format(ouN, oro._get_TTreeDrawStr()) + ## clean up: if we got all the dependencies, it's time to clean up + if len(odeps) == 0: + del myMinDepDict[oro] + ### + ## NOTE the bookkeeping of this part is actually done above + ##print (indent*" ")+"Passing plots to createPlotsInnerloop_opt1: ", ", ".join(di.name for di in deps if isinstance(di, Plot)) + for ln in createPlotsInnerLoop_opt1([ di for di in deps if isinstance(di, Plot) ], uhNames, indent=indent+2, alreadyReq=req_uptohere).split("\n"): + yield ln + ## can forget about dependant reduces (at this stage) they won't appear any more after this and can be handled next + ## more interesting case: C depends on A and B, D only on A, E only on B - how will they be ordered if you do everything nicely in blocks? + ## + ###del myMinDepDict[redOp] ## keep simple like this for now (other option: pass context to the same algo for the next layer) + # RECURSE + sub_depDict = dict((k,v) for k,v in myMinDepDict.iteritems() if k in reducesWithAllDeps[redOp][1]) ## -> straight down to the next level + cp_depDict = dict(sub_depDict) + for ln in getLinesForRedsAndPlotsDepTreeTopDown(cp_depDict, indent=indent+2, alreadyReq=req_uptohere): + yield ln + # + yield ((indent+1)*" ")+"}" ## inner block + # + yield (indent*" ")+"}" ## outer block + # + for k in sub_depDict: ## remove handled deps + if k not in cp_depDict: + del myMinDepDict[k] + # + for redOp in redsToTreat: + if redOp in myMinDepDict: + del myMinDepDict[redOp] + + ## 2. handle these as well + plotLines_redsWithPlots = list(getLinesForRedsAndPlotsDepTreeTopDown(dict(reducesWithMinDeps), indent=2)) + #import IPython;IPython.embed() + + ## clean up + removeReduceUnames(plots) + + return "\n".join((plotLines_noDeps, "\n".join(plotLines_redsWithPlots))) + +def createPlotsInnerLoop_opt2v2(plots, uhNames, indent=2): + """ + Second attempt: keep track of things in a bit more organised way + + Build up a tree of Plots and non-terminal nodes (selections and reduces) + """ + from .hist_opt_2v2 import RootNode, SelectionNode, PrecalcNode, PlotNode, printTree, NodeTreeNeedsOpCache + + reducesWithAllDeps = collectPrecalc(plots) + reducesWithAllDeps.update(collectReduces(plots)) # { op : (uname, depPlotsAndOps) } ## TODO to be changed - but without the unames the rest is unreadable + + def nCommonCuts_cons(listA, listB): + for i,iA,iB in izip(count(), listA, listB): + if iA != iB: + break + return i+(1 if iA == iB else 0) + def commonCuts_cons(listA, listB): + result = [] + for iA,iB in izip(listA, listB): + if iA != iB: + break + else: + result.append(iA) + return result + + logger.info("Ordering selections and plots") + from itertools import imap + rootNode = RootNode() + ## start with selections + for ap in plots: + #print "\n\nAdding plot {0} with selection of {1:d} cuts\n".format(ap.name, len(ap.selection.cuts)) + prevSelNode = rootNode + prevNCommon = 0 + selNode = rootNode + toSearch = rootNode.depNodes + #while sum(1 for sn in toSearch if isinstance(sn, SelectionNode)) > 0: + while True: + prevSelNode = selNode + if len(toSearch) > 0 and any(isinstance(nd, SelectionNode) for nd in toSearch): + selNode,nCommon = max(imap(lambda nd : ( (nd,nCommonCuts_cons(ap.selection.cuts, nd.cuts)) if isinstance(nd, SelectionNode) else (nd,0) ), toSearch), key=lambda (nd,n) : n) + else: + selNode,nCommon = None,0 + if selNode and nCommon == len(selNode.cuts): + if nCommon == len(ap.selection.cuts): + #print "Found fully matching selection node" + break + else: + if any(isinstance(sn, SelectionNode) for sn in toSearch): + #print "Found fully required selection node, going deeper..." + toSearch = selNode.depNodes ## next iteration + prevNCommon = nCommon + else: + #print "Found fully required selection node without selections, adding here" + newSelNode = SelectionNode(cuts=ap.selection.cuts, parent=selNode) + selNode.addDependentNode(newSelNode) + selNode = newSelNode + break + elif nCommon > prevNCommon: + #print "Not fully matching, but common part ({0:d} - versus {1:d} in our selection and {2:d} in best match) -> splitting".format(nCommon, len(ap.selection.cuts), len(selNode.cuts)) + newSel = SelectionNode(cuts=( ap.selection.cuts if nCommon == len(ap.selection.cuts) else commonCuts_cons(ap.selection.cuts, selNode.cuts)), parent=prevSelNode) + prevSelNode.addDependentNode(newSel) + prevSelNode.removeDependentNode(selNode) + selNode.parent = newSel + newSel.addDependentNode(selNode) + if nCommon == len(ap.selection.cuts): + #print "Adding plot to split node" + selNode = newSel + else: + #print "Adding one with our selection" + selNode = SelectionNode(cuts=ap.selection.cuts, parent=newSel) + newSel.addDependentNode(selNode) + break + else: + #print "Nothing more in common, adding one level higher" + if len(ap.selection.cuts) > prevNCommon: + #print "With selection" + selNode = SelectionNode(cuts=ap.selection.cuts, parent=prevSelNode) + prevSelNode.addDependentNode(selNode) + else: + #print "Without selection" + selNode = prevSelNode + break + selNode.addDependentNode(PlotNode(ap, parent=selNode)) + + # print "\n\nTree after adding all plot:" + # ## how does our tree look at this point? + # for ln in printTree(rootNode): + # print ln + + logger.info("Inserting pre-calculated variables") + ## make minimal (real reduce+plot-dependency tree) + reducesWithMinDeps = dict((redOp, (uname, + [ di for di in deps if not any( ( plotDependsOn(di, do) if isinstance(di, Plot) else opDependsOn(di, do) ) for do in deps if ( not isinstance(do, Plot) ) and ( do != di ) ) ])) + for redOp,(uname, deps) in reducesWithAllDeps.iteritems()) + + ## collect weights + import uuid + uwNames = dict() + for ap in plots: + wOp = ap.selection.weight + if wOp not in uwNames: + uname = "w_{0}".format(str(uuid.uuid4()).replace("-", "_")) + wOp.uname = uname + uwNames[wOp] = (uname, [ ap ]) + else: + uwNames[wOp][1].append(ap) + + reducesWithAllDeps.update(uwNames) ## yes that's a bit of a workaround... + reducesWithMinDeps.update(uwNames) + + logger.debug("Collected pre-calc expressions to insert, found {0:d}".format(len(reducesWithMinDeps))) + + while len(reducesWithMinDeps) > 0: + redsToTreat = sorted([ k for k,v in reducesWithMinDeps.iteritems() if not any(k in od for ou,od in reducesWithMinDeps.itervalues()) ], key=(lambda k : len(reducesWithMinDeps[k][1])), reverse=True) + logger.debug("Reduces to treat in this round: {0:d}".format(len(redsToTreat))) + for redOp in redsToTreat: + #nodeTreeNeedsOp = ( lambda op : ( lambda nd : nd.treeNeedsOp(redOp) ) )(redOp) + nodeTreeNeedsOp = NodeTreeNeedsOpCache(redOp) + logger.debug("Inserting reduce {0}".format(redOp._get_TTreeDrawStr())) + ## FIXME following line isn't used? + redCuts = [ ct for ct in commonConditionForAll(list(di for di in reducesWithAllDeps[redOp][1] if isinstance(di, Plot))) ] + cand = rootNode + while True: + if ( isinstance(cand, PrecalcNode) and opDependsOn(cand.op, redOp) ) or ( isinstance(cand, SelectionNode) and any( opDependsOn(ci, redOp) for ci in cand.cuts ) ): + ## TODO split the selection node if only some depend - will need that when treating precalc deps as well + logger.debug("Candidate is precalc or selection node that depends on us -> insert above") + myNode = PrecalcNode(redOp, parent=cand.parent) + cand.parent.addDependentNode(myNode) + cand.parent.removeDependentNode(cand) + cand.parent = myNode + myNode.addDependentNode(cand) + break + elif any(isinstance(nd, PlotNode) and nodeTreeNeedsOp(nd) for nd in cand.depNodes) or sum(1 for nd in cand.depNodes if nodeTreeNeedsOp(nd)) > 1: + ## TODO we should also check that our validity requirements are satisfied here, otherwise should go further + logger.debug("Candidate has a plot or several dependencies that depend on us -> insert below") + myNode = PrecalcNode(redOp, parent=cand) + for nd in cand.depNodes: + if nodeTreeNeedsOp(nd): + myNode.addDependentNode(nd) + nd.parent = myNode + for nd in myNode.depNodes: + cand.removeDependentNode(nd) + cand.addDependentNode(myNode) + break + else: ## here is where things get interesting + if not any( nodeTreeNeedsOp(nd) for nd in cand.depNodes ): + print "Nothing depends on this operation... that's most likely wrong" + break ## this is a bit problematic + else: + cand = next(nd for nd in cand.depNodes if nodeTreeNeedsOp(nd)) + logger.debug("Found one node that depends: {0}".format(cand.ownDbgStr())) + for trtd in redsToTreat: + del reducesWithMinDeps[trtd] + + # print "\n\nTree after adding all precalculated variables:" + # ## how does our tree look at this point? + # for ln in printTree(rootNode): + # print ln + + from .treedecorators import makeExprAnd + indentStr = " " ## 4 spaces + def cppLines(someNode, indent=0): + dindent = indent + closeBlock = False + if isinstance(someNode, SelectionNode): + yield (indent*indentStr)+"if ( {0} ) {{".format(makeExprAnd(someNode.ownCuts).get_TTreeDrawStr()) + dindent = indent+1 + closeBlock = True + elif isinstance(someNode, PrecalcNode): + yield (indent*indentStr)+"const auto {0} = {1};".format(someNode.op.uname, someNode.op._get_TTreeDrawStr()) + elif isinstance(someNode, PlotNode): + ## TODO check that all cuts have been applied here + yield (indent*indentStr)+"fill({HIST}.get(), {VAR}, {WEIGHT});".format( + HIST=uhNames[someNode.plot] + , VAR=", ".join(v.get_TTreeDrawStr() for v in someNode.plot.variables) + , WEIGHT=uwNames[someNode.plot.selection.weight][0] + ) + if not someNode.isTerminal: + ## first terminal nodes + for dn in someNode.depNodes: + if dn.isTerminal: + for ln in cppLines(dn, indent=dindent): + yield ln + ## then dependency tree of the others + for dn in someNode.depNodes: + if not dn.isTerminal: + for ln in cppLines(dn, indent=dindent): + yield ln + if closeBlock: + yield (indent*indentStr)+"}" + + logger.info("Generating output") + ret = "\n".join(ln for ln in cppLines(rootNode, indent=2)) + + ## clean up unames + removeReduceUnames(plots) + for wOp in uwNames.iterkeys(): + wOp.uname = None + + return ret + +################################################################################ +## MAIN METHOD ## +################################################################################ + +import os, os.path + +def createPlotter(plots, skeleton, outdir=None, **kwargs): + """ + Turned the config into the main script + + this is only a helper method that deals with creating a C++ plotter from it + """ + if not outdir: + outdir = os.getcwd() + logger.warning("No output directory set, writing plotter code to current directory {0}".format(outdir)) + else: + logger.info("Writing plotter code to output directory {0}".format(outdir)) + + ## common forced helpers + add_packages = list(kwargs.get("add_packages", [])) + include_dirs = list(kwargs.get("include_dirs", [])) + includes = list(kwargs.get("includes", [])) + sources = list(kwargs.get("sources", [])) + linked_libraries = list(kwargs.get("libraries", [])) + add_properties = [] + + # add some defaults + from .factoryhelpers import COMMON_INCLUDE_DIR + include_dirs.append(os.path.join(COMMON_INCLUDE_DIR)) + includes.append("kinematics.h") + includes.append("IndexRangeIterator.h") + + if kwargs.get("addScaleFactorsLib", False): + from . import pathCommonTools + pathCP3llbb = os.path.dirname(os.path.abspath(pathCommonTools)) + linked_libraries.append(os.path.join(pathCP3llbb, "Framework", "libBinnedValues.so")) + include_dirs.append(os.path.join(pathCP3llbb, "Framework", "interface")) + add_properties.append('set_target_properties(plotter PROPERTIES COMPILE_FLAGS "-DSTANDALONE_SCALEFACTORS")') + + logger.info("List of requested plots: {0}".format(", ".join(plot.name for plot in plots))) + logger.info("List of requested include files: {0}".format(", ".join(inc for inc in includes))) + logger.info("List of requested source files: {0}".format(", ".join(src for src in sources))) + + ### add systematic variations histos + allPlots = list(plots)+list(chain.from_iterable( ( mplt.clone(name="__".join((mplt.name, varNm)), selection=varSel) for varNm,varSel in mplt.selection.systVars.iteritems() ) for mplt in plots )) + + import uuid + unames = dict((plot, "p_{0}".format(str(uuid.uuid4()).replace("-", "_"))) for plot in allPlots) + + expandTemplateAndWrite(os.path.join(outdir, "Plotter.h"), getTemplate("Plotter.h") + , BRANCHES="\n ".join( + 'const {typ}& {name} = tree["{name}"].read<{typ}>();'.format(typ=getBranchType(brName, skeleton._skeleton), name=brName) + for brName in collectIdentifiersFromPlots(allPlots)) + ) + + expandTemplateAndWrite(os.path.join(outdir, "Plotter.cc"), getTemplate("Plotter.cc") + , INCLUDES="\n".join('#include "{0}"'.format(incl) for incl in includes) + , BEFORE_LOOP="\n".join(chain( + ## declare histograms + chain.from_iterable(chain( + getBinningDefinitions(plot, unames[plot]) + , [ ' std::unique_ptr<{typ}> {uname}(new {typ}("{uname}", "{title}", {binning})); {uname}->SetDirectory(nullptr);'.format( + typ=getHistType(plot), uname=unames[plot], title=plot.longTitle, binning=getBinningUses(plot, unames[plot])) ] + , getOtherHistSettings(plot, unames[plot]) + ) for plot in allPlots) + , (" {0}".format(ln) for ln in list(kwargs.get("user_initialisation", []))) + )) + , IN_LOOP=createPlotsInnerLoop_opt2v2(allPlots, unames) + , AFTER_LOOP="\n".join( ## save plots + expandTemplate(getTemplate("SavePlot") + , UNIQUE_NAME=unames[plot] + , PLOT_NAME=plot.name + ) for plot in allPlots + ) + ) + + expandTemplateAndWrite(os.path.join(outdir, "CMakeLists.txt"), getTemplate("CMakeLists_plotter.txt") + , ADD_FIND="\n".join("find_package({0})".format(fnd) for fnd in add_packages) + , ADD_INCLUDE_DIRS=" ".join(include_dirs) + , ADD_SOURCES=" ".join(sources) + , ADD_PROPERTIES="\n".join(add_properties) + , ADD_LINK="\n".join("target_link_libraries(plotter {0})".format(lib) for lib in linked_libraries) + ) + +def prepareWorkdir(workdir): ## TODO rename to preparePlotsWorkdir + if os.path.exists(workdir): + raise Exception("Work directory {0} exists already!".format(workdir)) + os.mkdir(workdir) + plotterdir = os.path.join(workdir, "plotter") + histosdir = os.path.join(workdir, "histos") + plotsdir = os.path.join(workdir, "plots") + for aDir in (plotterdir, histosdir, plotsdir): + os.mkdir(aDir) + return plotterdir, histosdir, plotsdir + +################################################################################ +## HELPER: COMPILE PLOTTER DIRECTORY ## +################################################################################ + +import subprocess + +def compilePlotter(plotterdir): + """ + Copy the necessary files and compile the plotter + """ + try: + from .import pathCommonTools + subprocess.check_output(["cp", "-r", os.path.join(pathCommonTools, "cmake"), plotterdir], stderr=subprocess.STDOUT) + subprocess.check_output(["cp", os.path.join(pathCommonTools, "templates", "generateHeader.sh"), plotterdir], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(e.cmd, e.returncode, e.output)) + raise e + + with insideOtherDirectory(plotterdir): + logger.info("Compiling plotter in {0}".format(plotterdir)) + try: + logger.debug("Calling CMake") + subprocess.check_output(["cmake", "."], stderr=subprocess.STDOUT) + logger.debug("Calling make") + subprocess.check_output(["make"], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(e.cmd, e.returncode, e.output)) + raise e + + return os.path.join(plotterdir, "plotter.exe") + +def splitInChunks(theList, n): + from itertools import izip, islice + import math + N = len(theList) + chunkLength = int(math.ceil(1.*N/n)) + for iStart, iStop in izip(xrange(0, len(theList), chunkLength), xrange(chunkLength, len(theList)+chunkLength, chunkLength)): + yield islice(theList, iStart, min(iStop,N)) + +def _makeSplitTask(commonArgs, toSplitArgs, outDir=None, info=None): + splitSplitArgs = [ toSplitArgs ] ## no-splitting strategy + if info and "db_name" in info: + if info["db_name"].startswith("DYJetsToLL_M-10to50"): + splitSplitArgs = list(list(chunk) for chunk in splitInChunks(toSplitArgs, 4)) + elif info["db_name"].startswith("TT_Tune"): + splitSplitArgs = list(list(chunk) for chunk in splitInChunks(toSplitArgs, 3)) + cmds = [ " ".join(commonArgs+splitArgs) for splitArgs in splitSplitArgs ] + from .batchhelpers import SplitAggregationTask, HaddAction + return SplitAggregationTask(cmds, finalizeAction=HaddAction(cmds, outDir=outDir)) + +def _condorClusterFromTasks(taskList, workdir=None): + from . import pathCMS + from .condorhelpers import CommandListCondorJob + condorJob = CommandListCondorJob(list(chain.from_iterable(task.commandList for task in taskList)), + workDir=workdir, + envSetupLines=[ + "# Setup our CMS environment" + , "pushd {CMS_PATH}".format(CMS_PATH=pathCMS) + , "source /cvmfs/cms.cern.ch/cmsset_default.sh" + , "eval `scram runtime --sh`" + , "popd" + , 'export THEANO_FLAGS="base_compiledir=$(pwd)"' ## isolate compilation cache for each job + , "" + ]) + for task in taskList: + task.jobCluster = condorJob + return condorJob + +def _slurmArrayFromTasks(taskList, workdir=None): + from . import pathCMS + from .slurmhelpers import CommandListSlurmJob + cfg_opts = { + "environmentType" : "cms" + , "cmsswDir" : pathCMS + , "sbatch_time" : "0-02:00" + , "sbatch_mem" : "2048" + , "stageoutFiles" : ["*.root"] + } + slurmJob = CommandListSlurmJob(list(chain.from_iterable(task.commandList for task in taskList)), workDir=workdir, configOpts=cfg_opts) + for task in taskList: + task.jobCluster = slurmJob + return slurmJob + +def runPlotterOnSamples(plotterName, samples, outdir=None, suffix=None, useCluster=False, clusterworkdir=None, taskMon=None, verbose=False): + """ Run the plotter for a sample """ + import os.path + if useCluster: + if outdir: + if not ( os.path.exists(outdir) and os.path.isdir(outdir) ): + raise Exception("Output path {} is not a directory") + else: + import os + outdir = os.path.abspath(os.getcwd()) + taskList = [] + for sampName, sampDict in samples.iteritems(): + outFileName = ( "{0}.root".format(sampName) if suffix is None else "{0}{1}.root".format(sampName, suffix) ) + taskList.append(_makeSplitTask([plotterName, outFileName, sampDict["tree_name"], sampDict["sample_cut"]], sampDict["files"], outDir=outdir, info=sampDict)) + logger.info("{0:d} samples -> created {1:d} tasks with in total {2:d} commands".format(len(samples), len(taskList), sum(len(tsk.commandList) for tsk in taskList))) + + if False: + clusterJob = _condorClusterFromTasks(taskList, workdir=clusterworkdir) + else: + clusterJob = _slurmArrayFromTasks(taskList, workdir=clusterworkdir) + + clusterJob.submit() + if taskMon: + taskMon.add([clusterJob], taskList) + logger.info("Added slurm job to monitor") + else: + from .slurmhelpers import makeSlurmTasksMonitor as makeTasksMonitor + mon = makeTasksMonitor([clusterJob], taskList) + mon.collect() ## wait here + + else: ## run one after the other + for sampName, sampDict in samples.iteritems(): + outFileName = ( "{0}.root".format(sampName) if suffix is None else "{0}{1}.root".format(sampName, suffix) ) + if outdir: + outFileName = os.path.join(outdir, outFileName) + logger.info("Writing histograms for sample {0} to file {1}".format(sampName, outFileName)) + cmdTokens = [plotterName, outFileName, sampDict["tree_name"], sampDict["sample_cut"]]+sampDict["files"] + try: + allout = subprocess.check_output(cmdTokens, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(" ".join(e.cmd), e.returncode, e.output)) + raise e + if verbose: + logger.info("Output of {}:".format(" ".join(cmdTokens))) + print allout + logger.info("END of command output") diff --git a/python/lldeco.py b/python/lldeco.py new file mode 100644 index 0000000..ec228f9 --- /dev/null +++ b/python/lldeco.py @@ -0,0 +1,113 @@ +""" +Helper classes for llX tree decorators +""" +__all__ = ("leptonAdaptor", "onlyMuon", "onlyElectron", "LeptonScaleFactor", "DiLeptonScaleFactor") + +from .treedecorators import PODStub, SwitchOp + +class LeptonStub(PODStub): + """ Reference to a lepton (electron or muon), or anything operation on it """ + __slots__ = ("el", "mu") + def __init__(self, parent, el=None, mu=None): + self.el = el if el is not None else parent.record.electrons[parent.idx] + self.mu = mu if mu is not None else parent.record.muons[parent.idx] + tpNm = self.el._typeName + assert self.mu._typeName == tpNm + super(LeptonStub, self).__init__(parent, tpNm) + @property + def op(self): + return SwitchOp(self._parent.isEl, self.el, self.mu) + def _get(self, name): + try: + return LeptonStub(self._parent, el=getattr(self.el, name), mu=getattr(self.mu, name)) + except Exception, e: + raise AttributeError("Problem getting attribute '{0}' for LeptonStub with {1} and {2}: {3}".format(name, self.el, self.mu, str(e))) + def __call__(self, *args): + try: ## TODO the same as for _binaryOp + return LeptonStub(self._parent, el=self.el(*args), mu=self.mu(*args)) + except Exception, e: + raise AttributeError("Problem with __all__ on LeptonStub with {0} and {1}: {2}".format(self.el, self.mu, str(e))) + def __repr__(self): + return "LeptonStub({0!r}, el={1!r}, mu={2!r})".format(self._parent, self.el, self.mu) + + def _binaryOp(self, opName, other, outType="Double_t"): + if isinstance(other, LeptonStub) and ( self._parent == other._parent ): + return LeptonStub(self._parent, + el=self.el._binaryOp(opName, other.el, outType=outType), + mu=self.mu._binaryOp(opName, other.mu, outType=outType)) + else: + return super(LeptonStub, self)._binaryOp(opName, other, outType=outType) + +def onlyElectron(fun, muonValue=-1.): + """ apply fun to the electron (or fill the muon value) """ + return lambda l : op.switch(l.isEl, fun(l.L.el), muonValue) +def onlyMuon(fun, electronValue=-1.): + """ apply fun to the muon (or fill the electron value) """ + return lambda l : op.switch(op.NOT(l.isEl), fun(l.L.mu), electronValue) +def leptonAdaptor(elFun, muFun): + """ apply fun to the electron or the muon, as applicable """ + return lambda l : op.switch(l.isEl, elFun(l.L.el), muFun(l.L.mu)) + +from .treedecorators import SmartTupleStub + +class LeptonRef(SmartTupleStub._RefToOther): + __slots__ = tuple() + def __init__(self, name, parent=None): + super(LeptonRef, self).__init__(name, None, parent=parent) + def __getattr__(self, name): + if name == self._name: + return LeptonStub(self._parent) + else: + raise AttributeError("No attribute called {0!r} for object {1!r}".format(name, self)) + def __repr__(self): + return "LeptonRef({0!r}, parent={1!r})".format(self._name, self._parent) + +def prodIfIterable(sfArg, **kwargs): + return (( lambda x : op.product(*(iw(x, **kwargs) for iw in sfArg)) ) + if hasattr(sfArg, "__iter__") else + ( lambda x : sfArg(x, **kwargs) )) + +from .treedecorators import op, boolType, makeConst +class LeptonScaleFactor(object): + def __init__(self, elSF, muSF, cache=True): + self.elSF = elSF + self.muSF = muSF + self.cache = cache + def __call__(self, lepton, variation="Nominal"): + kwargs = dict(withMCCheck=False, precalc=False, variation=variation) + expr = op.switch(op.extVar(boolType, "runOnMC") + , leptonAdaptor(prodIfIterable(self.elSF, **kwargs), prodIfIterable(self.muSF, **kwargs))(lepton) + , makeConst(1., "Float_t") + ) + if self.cache: + import uuid ## caching + expr._parent.uname = "sf_{0}".format(str(uuid.uuid4()).replace("-", "_")) + return expr + +class DiLeptonScaleFactor(object): + class _LL(object): + def __init__(self, l1, l2): + self.l1 = l1 + self.l2 = l2 + + def __init__(self, elelSF=None, elmuSF=None, muelSF=None, mumuSF=None, cache=True): + assert all(arg is not None for arg in (elelSF, elmuSF, muelSF, mumuSF)) + self.elelSF = elelSF + self.elmuSF = elmuSF + self.muelSF = muelSF + self.mumuSF = mumuSF + self.cache = cache + def __call__(self, ll, variation="Nominal"): + kwargs = dict(withMCCheck=False, precalc=False, variation=variation) + expr = op.switch(op.extVar(boolType, "runOnMC") + , op.switch(ll.isElEl, prodIfIterable(self.elelSF, **kwargs)(DiLeptonScaleFactor._LL(ll.l1.L.el, ll.l2.L.el)) + , op.switch(ll.isElMu, prodIfIterable(self.elmuSF, **kwargs)(DiLeptonScaleFactor._LL(ll.l1.L.el, ll.l2.L.mu)) + , op.switch(ll.isMuEl, prodIfIterable(self.muelSF, **kwargs)(DiLeptonScaleFactor._LL(ll.l1.L.mu, ll.l2.L.el)) + , op.switch(ll.isMuMu, prodIfIterable(self.mumuSF, **kwargs)(DiLeptonScaleFactor._LL(ll.l1.L.mu, ll.l2.L.mu)) + , makeConst(1., "Float_t"))))) + , makeConst(1., "Float_t") + ) + if self.cache: + import uuid ## caching + expr._parent.uname = "sf_{0}".format(str(uuid.uuid4()).replace("-", "_")) + return expr diff --git a/python/nanoaoddeco.py b/python/nanoaoddeco.py new file mode 100644 index 0000000..dc85199 --- /dev/null +++ b/python/nanoaoddeco.py @@ -0,0 +1,62 @@ +from .treedecorators import levelsAbove, addIntoHierarchy, TreeStub, BranchGroupStub, SmartTupleStub + +def decoratedNanoAOD(nnAODTree): + allLvs = dict((lv.GetName(), lv) for lv in nnAODTree.GetListOfLeaves()) + noCountLvs = set() + collectionLvs = dict() + for lvNm, lv in allLvs.iteritems(): + cntLv = lv.GetLeafCount() + if cntLv: + if cntLv.GetName() not in collectionLvs: + collectionLvs[cntLv.GetName()] = [] + collectionLvs[cntLv.GetName()].append(lvNm) + else: + noCountLvs.add(lvNm) + + import re + refPat = re.compile("(?P\w+)Idx(?P\w+)?") + def prefix_to_collectionName(prefix): + return "{0}{1}s".format(prefix[0].lower(), prefix[1:].rstrip("_")) # un-capitalize + ## construct the collections + tree = TreeStub(nnAODTree) + evtHierarchy = levelsAbove([], "event") + # > event + for cntNm, collAttrs in collectionLvs.iteritems(): + prefix = "{}_".format(cntNm[1:]) ## from nJet to Jet_ + if not all(lvNm.startswith(prefix) for lvNm in collAttrs): + if all(lvNm.startswith(prefix[:-1]) for lvNm in collAttrs): + prefix = prefix[:-1] + else: + print "Warning: not all leafs countable with '{0}' have names starting with '{1}': {2}".format(cntNm, prefix, ", ".join(lvNm for lvNm in collAttrs if not lvNm.startswith(prefix))) + objs_smart = BranchGroupStub(prefix) + collName = prefix_to_collectionName(prefix) + addIntoHierarchy(collName, objs_smart, tree, evtHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(cntNm) ]) + for lvNm in collAttrs: + name_nopref = lvNm[len(prefix):] + m = refPat.match(name_nopref) + if m: + target = m.group("target") + nm = ( "{0}{1}".format(target, m.group("suff")) if m.group("suff") else target ) + targetColl = ("genParts" if target == "mcMatch" else prefix_to_collectionName(target)) + objs_smart._addCapability(SmartTupleStub._RefToOther(nm, ( lambda collN, idxN : ( lambda obj : getattr(obj.event, collN)[getattr(obj, idxN)] ) )(targetColl, name_nopref), repl=(name_nopref,))) + + addIntoHierarchy("PV", BranchGroupStub("PV_"), tree, evtHierarchy) + + addIntoHierarchy("HLT", BranchGroupStub("HLT_"), tree, evtHierarchy) + addIntoHierarchy("Flag", BranchGroupStub("Flag_"), tree, evtHierarchy) + + addIntoHierarchy("LHE", BranchGroupStub("LHE_"), tree, evtHierarchy) + + addIntoHierarchy("MET", BranchGroupStub("MET_"), tree, evtHierarchy) + addIntoHierarchy("TkMET", BranchGroupStub("TkMET_"), tree, evtHierarchy) + addIntoHierarchy("CaloMET", BranchGroupStub("CaloMET_"), tree, evtHierarchy) + addIntoHierarchy("RawMET", BranchGroupStub("RawMET_"), tree, evtHierarchy) + addIntoHierarchy("PuppiMET", BranchGroupStub("PuppiMET_"), tree, evtHierarchy) + + addIntoHierarchy("SoftActivityJet", BranchGroupStub("SoftActivityJet"), tree, evtHierarchy) + addIntoHierarchy("HLTrigger", BranchGroupStub("HLTrigger"), tree, evtHierarchy) + addIntoHierarchy("fixedGridRhoFastjet", BranchGroupStub("fixedGridRhoFastjet"), tree, evtHierarchy) + # > event + + return tree diff --git a/python/plotithelpers.py b/python/plotithelpers.py new file mode 100644 index 0000000..c186176 --- /dev/null +++ b/python/plotithelpers.py @@ -0,0 +1,26 @@ +""" +Automatically generate plotIt yml file from list of plots etc. +""" + +def writePlotIt(plotList, outfile): + for plot in plotList: + if len(plot.variables) == 1: + plotopts = { + "x-axis" : '"{0}"'.format(plot.axisTitles[0]) + , "y-axis" : "Evt" + , "y-axis-format" : '"%1% / %2$.0f"' + , "normalized" : "false" + , "x-axis-format" : "[{0:e}, {1:e}]".format(plot.binnings[0].minimum, plot.binnings[0].maximum) + , "log-y" : "both" + , "y-axis-show-zero" : "true" + , "save-extensions" : '["pdf"]' + , "show-ratio" : "true" + , "sort-by-yields" : "false" + } + plotopts.update(plot.plotopts) ## user can set everything, but the above defaults are always there + frag = "\n".join([ + "'{name}':".format(name=plot.name) + ]+[ + " {0}: {1}".format(k,v) for k,v in plotopts.iteritems() + ]+['']) + outfile.write(frag) diff --git a/python/plots.py b/python/plots.py new file mode 100644 index 0000000..55947ea --- /dev/null +++ b/python/plots.py @@ -0,0 +1,148 @@ +""" +Helper classes describing histograms, binnings etc. +""" +__all__ = ("Plot", "EquidistantBinning", "VariableBinning") + +import numpy as np +from itertools import chain + +class EquidistantBinning(object): + """ Equidistant binning + """ + __slots__ = ("__weakref__", "N", "mn", "mx") + def __init__(self, N, mn, mx): + self.N = N + self.mn = mn + self.mx = mx + @property + def minimum(self): + return self.mn + @property + def maximum(self): + return self.mx + +class VariableBinning(object): + """ Variable-sized bins + """ + __slots__ = ("__weakref__", "binEdges") + def __init__(self, binEdges): + self.binEdges = np.array(binEdges) + @property + def N(self): + return len(self.binEdges)-1 + @property + def minimum(self): + return self.binEdges[0] + @property + def maximum(self): + return self.binEdges[-1] + +from .treedecorators import adaptArg + +class Plot(object): + """ Plot representation (specifications for making a 1,2,3,...-dimensional histogram) + """ + __slots__ = ("__weakref__", "name", "variables", "selection", "binnings", "title", "axisTitles", "axisBinLabels", "plotopts") + def __init__(self, name, variables, selection, binnings, title="", axisTitles=tuple(), axisBinLabels=tuple(), plotopts=None): + self.name = name + self.variables = variables + self.selection = selection + self.binnings = binnings + self.title = title + self.axisTitles = axisTitles + self.axisBinLabels = axisBinLabels + self.plotopts = plotopts if plotopts else dict() + def clone(self, name=None, variables=None, selection=None, binnings=None, title=None, axisTitles=None, axisBinLabels=None, plotopts=None): + """ Low-level helper method: copy with optional re-setting of attributes + """ + return Plot( (name if name is not None else self.name) + , (variables if variables is not None else self.variables) + , (selection if selection is not None else self.selection) + , (binnings if binnings is not None else self.binnings) + , title=(title if title is not None else self.title) + , axisTitles=(axisTitles if axisTitles is not None else self.axisTitles) + , axisBinLabels=(axisBinLabels if axisBinLabels is not None else self.axisBinLabels) + , plotopts=(plotopts if plotopts is not None else self.plotopts) + ) + + @property + def cut(self): + return self.selection.cut + @property + def weight(self): + return self.selection.weight + + @property + def longTitle(self): + return ";".join(chain([self.title], self.axisTitles)) + + def __repr__(self): + return "Plot({0!r}, {1!r}, {2!r}, {3!r}, title={4!r}, axisTitles={5!r})".format(self.name, self.variables, self.selection, self.binnings, self.title, self.axisTitles) + + @staticmethod + def make1D(name, variable, selection, binning, **kwargs): + kwargs["axisTitles"] = (kwargs.pop("xTitle", ""),) + kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None),) + return Plot(name, (adaptArg(variable),), selection, (binning,), **kwargs) + @staticmethod + def make2D(name, variables, selection, binnings, **kwargs): + kwargs["axisTitles"] = (kwargs.pop("xTitle", ""), kwargs.pop("yTitle", "")) + kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None), kwargs.pop("yBinLabels", None)) + return Plot(name, tuple(adaptArg(v) for v in variables), selection, binnings, **kwargs) + @staticmethod + def make3D(name, variables, selection, binnings, **kwargs): + kwargs["axisTitles"] = (kwargs.pop("xTitle", ""), kwargs.pop("yTitle", ""), kwargs.pop("zTitle", "")) + kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None), kwargs.pop("yBinLabels", None), kwargs.pop("zBinLabels", None)) + return Plot(name, tuple(adaptArg(v) for v in variables), selection, binnings, **kwargs) + +class Selection(object): + """ Group of selection requirements, weight factors and a handle on the candidate (they logically belong together) + """ + __slots__ = ("__weakref__", "cuts", "weights", "candidate", "systVars") + def __init__(self, cuts, weights, candidate): + self.cuts = [ adaptArg(cut, "Bool_t") for cut in list(cuts) ] + self.weights = [ adaptArg(wgt, "Float_t") for wgt in list(weights) ] + self.candidate = candidate + self.systVars = dict() + def clone(self, cuts=None, weights=None, candidate=None): + """ Low-level helper method: copy with optional re-setting of attributes + """ + return Selection( (cuts if cuts is not None else self.cuts) + , (weights if weights is not None else self.weights) + , (candidate if candidate is not None else self.candidate) + ) + @property + def cut(self): + from .treedecorators import makeExprAnd + return makeExprAnd(self.cuts) + @property + def weight(self): + from .treedecorators import makeExprProduct + return makeExprProduct(self.weights) + def __repr__(self): + return "Selection({0!r}, {1!r}, {2!r})".format(self.cut, self.weight, self.candidate) + def __eq__(self, other): + return ( ( len(self.cuts) == len(other.cuts) ) and all( sc == oc for sc,oc in izip(self.cuts, other.cuts) ) + and ( len(self.weights) == len(other.weights) ) and all( sw == ow for sw,ow in izip(self.weights, other.weights) ) + and ( self.candidate == other.candidate ) ) + + @staticmethod + def addTo(prev, cut=None, weight=None, candidate=None): + """ + Create a new selection by adding a cut and/or weight, and possibly replacing the candidate + """ + newCuts = list(prev.cuts) + if cut is not None: + if hasattr(cut, "__len__"): + newCuts += list(adaptArg(ct, "Bool_t") for ct in cut) + else: + newCuts.append(adaptArg(cut, "Bool_t")) + newWeights = list(prev.weights) + if weight is not None: + if hasattr(weight, "__len__"): + newWeights += list(adaptArg(wgt, "Float_t") for wgt in weight) + else: + newWeights.append(adaptArg(weight, "Float_t")) + return Selection(newCuts, newWeights, candidate if candidate is not None else prev.candidate) + + ## TODO add more helpers for manipulation diff --git a/python/scalefactors.py b/python/scalefactors.py new file mode 100644 index 0000000..c76a1e8 --- /dev/null +++ b/python/scalefactors.py @@ -0,0 +1,278 @@ +""" +Helpers for configuring scale factors, fake rates etc. + +The basic configuration parameter is the json file path for a set of factors. +There two basic types are +- lepton scale factors (dependent on a number of object variables, e.g. PT and ETA), +- jet (b-tagging) scale factors (grouped set for different flavours, for convenience) + +Different values (depending on the data-taking period) +can be taken into account by weighting or by randomly sampling. +""" +""" +Common definitions of scale factors +""" +__all__ = ("get_scalefactors",) + +import os.path +from itertools import chain, ifilter + +from . import pathCP3llbb +def localize_llbbFwk(aPath, cp3llbbBase=pathCP3llbb): + return os.path.join(cp3llbbBase, "Framework", "data", "ScaleFactors", aPath) + + +lumiPerPeriod_2016 = { + "B" : 5785.152 ## averaged 5783.740 (DoubleMuon), 5787.976 (DoubleEG) and 5783.740 (MuonEG) - max dev. from average is 5.e-4 << lumi syst + , "C" : 2573.399 + , "D" : 4248.384 + , "E" : 4009.132 + , "F" : 3101.618 + , "G" : 7540.488 + , "H" : 8605.689 + ## + , "Run271036to275783" : 6274.191 + , "Run275784to276500" : 3426.131 + , "Run276501to276811" : 3191.207 + } + +hwwMuonPeriods_2016 = [ "Run271036to275783", "Run275784to276500", "Run276501to276811" ] +hwwElePeriods_2016 = [] ## TODO + +all_scalefactors = { + "electron_2015_76" : dict((k,localize_llbbFwk(v)) for k, v in chain( + dict(("id_{wp}".format(wp=wp.lower()), ("Electron_CutBasedID_{wp}WP_fromTemplates_withSyst_76X.json".format(wp=wp))) + for wp in ("Veto", "Loose", "Medium", "Tight")).iteritems() + , { "hww_wp" : "Electrons_HWW_CutBasedID_TightWP_76X_forHWW_Final.json" }.iteritems() + )) + , "muon_2015_76" : dict((k, localize_llbbFwk(v)) for k, v in chain( + dict(("id_{wp}".format(wp=wp.lower()), "Muon_{wp}ID_genTracks_id.json".format(wp=wp)) for wp in ("Soft", "Loose", "Medium")).iteritems() + , { "id_tight" : "Muon_TightIDandIPCut_genTracks_id.json" }.iteritems() + , dict(("iso_{isowp}_id_{idwp}".format(isowp=isowp.lower(), idwp=idwp.lower()), "Muon_{isowp}RelIso_{idwp}ID_iso.json".format(isowp=isowp, idwp=idwp)) + for (idwp,isowp) in (("Loose", "Loose"), ("Loose", "Medium"), ("Loose", "Tight"), ("Tight", "Medium"), ("Tight", "Tight")) ).iteritems() + , { "id_hww" : "Muon_MediumID_Data_MC_25ns_PTvsETA_HWW_76.json" + , "iso_tight_id_hww" : "Muon_ISOTight_Data_MC_25ns_PTvsETA_HWW.json" }.iteritems() + )) + , "btag_2015_76" : dict() ## TODO + ## 2016 CMSSW_8_0_... + , "muon_2016_80" : dict((k,( localize_llbbFwk(v) if isinstance(v, str) + else [ (eras, localize_llbbFwk(path)) for eras,path in v ])) + for k, v in chain( + ## https://twiki.cern.ch/twiki/bin/view/CMS/MuonReferenceEffsRun2#Tracking_efficiency_provided_by + { "tracking" : "Muon_tracking_BCDEFGH.json" }.iteritems() + ## https://twiki.cern.ch/twiki/bin/viewauth/CMS/MuonWorkInProgressAndPagResults + , dict(("id_{wp}".format(wp=wp.lower()), [ (eras, "Muon_{wp}ID_genTracks_id_{era}.json".format(wp=wp, era=eras)) for eras in ("BCDEF", "GH") ]) for wp in ("Loose", "Medium", "Tight")).iteritems() + , { "id_medium2016" : [ (eras, "Muon_MediumID2016_genTracks_id_{era}.json".format(era=eras)) for eras in ("BCDEF", "GH") ] }.iteritems() + , dict(("iso_{isowp}_id_{idwp}".format(isowp=isowp.lower(), idwp=idwp.lower()) + , [ (eras, "Muon_{isowp}ISO_{idwp}ID_iso_{era}.json".format(isowp=isowp, idwp=idwp, era=eras)) for eras in ("BCDEF", "GH") ]) + for (isowp, idwp) in [("Loose", "Loose"), ("Loose", "Medium"), ("Loose", "Tight"), ("Tight", "Medium"), ("Tight", "Tight")]).iteritems() + ## https://twiki.cern.ch/twiki/bin/view/CMS/HWW2016TriggerAndIdIsoScaleFactorsResults + , { "iso_tight_hww" : [ ((era,), "Muon_data_mc_ISOTight_Run2016_{era}_PTvsETA_HWW.json".format(era=era)) for era in hwwMuonPeriods_2016 ] }.iteritems() + , { "id_tight_hww" : [ ((era,), "Muon_data_mc_TightID_Run2016_{era}_PTvsETA_HWW.json".format(era=era)) for era in hwwMuonPeriods_2016 ] }.iteritems() + )) + ## https://twiki.cern.ch/twiki/bin/view/CMS/EgammaIDRecipesRun2#Efficiencies_and_scale_factors + , "electron_2016_ichep2016" : dict((k,( localize_llbbFwk(v) if isinstance(v, str) + else [ (eras, localize_llbbFwk(path)) for eras,path in v ])) + for k, v in chain( + { "gsf_tracking" : "Electron_EGamma_SF2D_gsfTracking.json" }.iteritems() + , dict(("id_{wp}".format(wp=wp), "Electron_EGamma_SF2D_{wp}.json".format(wp=wp)) for wp in ("veto", "loose", "medium", "tight")).iteritems() + , { "hww_wp" : [ ((era,), "Electron_Tight_Run2016_{era}_HWW.json".format(era=era)) for era in hwwElePeriods_2016 ] }.iteritems() + )) + , "electron_2016_moriond2017" : dict((k,( localize_llbbFwk(v) if isinstance(v, str) + else [ (eras, localize_llbbFwk(path)) for eras,path in v ])) + for k, v in chain( + { "gsf_tracking" : "Electron_EGamma_SF2D_reco_moriond17.json" }.iteritems() + , dict(("id_{wp}".format(wp=wp), "Electron_EGamma_SF2D_{wp}_moriond17.json".format(wp=wp)) for wp in ("veto", "loose", "medium", "tight")).iteritems() + )) + , "eleltrig_2016_HHMoriond17" : tuple(os.path.join(pathCP3llbb, "ZAAnalysis", "data", "Efficiencies", "{0}.json".format(nm)) for nm in ("Electron_IsoEle23Leg", "Electron_IsoEle12Leg", "Electron_IsoEle23Leg", "Electron_IsoEle12Leg")) + , "elmutrig_2016_HHMoriond17" : tuple(os.path.join(pathCP3llbb, "ZAAnalysis", "data", "Efficiencies", "{0}.json".format(nm)) for nm in ("Electron_IsoEle23Leg", "Electron_IsoEle12Leg", "Muon_XPathIsoMu23leg", "Muon_XPathIsoMu8leg")) + , "mueltrig_2016_HHMoriond17" : tuple(os.path.join(pathCP3llbb, "ZAAnalysis", "data", "Efficiencies", "{0}.json".format(nm)) for nm in ("Muon_XPathIsoMu23leg", "Muon_XPathIsoMu8leg", "Electron_IsoEle23Leg", "Electron_IsoEle12Leg")) + , "mumutrig_2016_HHMoriond17" : tuple(os.path.join(pathCP3llbb, "ZAAnalysis", "data", "Efficiencies", "{0}.json".format(nm)) for nm in ("Muon_DoubleIsoMu17Mu8_IsoMu17leg", "Muon_DoubleIsoMu17TkMu8_IsoMu8legORTkMu8leg", "Muon_DoubleIsoMu17Mu8_IsoMu17leg", "Muon_DoubleIsoMu17TkMu8_IsoMu8legORTkMu8leg")) + ## https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation80XReReco + , "btag_2016_moriond2017" : dict((k,( tuple(localize_llbbFwk(fv) for fv in v) if isinstance(v,tuple) and all(isinstance(fv, str) for fv in v) + else [ (eras, tuple(localize_llbbFwk(fpath) for fpath in paths)) for eras,paths in v ])) + for k, v in + dict(("cMVAv2_{wp}".format(wp=wp), tuple("BTagging_{wp}_{flav}_{calib}_cmvav2_BtoH_moriond17.json".format(wp=wp, flav=flav, calib=calib) for (flav, calib) in (("lightjets", "incl"), ("cjets", "ttbar"), ("bjets", "ttbar")))) for wp in ("loose", "medium", "tight")).iteritems() + ) + } + +from .treedecorators import op +binningVariablesByName = { + "Eta" : lambda obj : obj.p4.Eta() + , "ClusEta" : lambda obj : obj.clusterEta + , "AbsEta" : lambda obj : op.abs(obj.p4.Eta()) + , "AbsClusEta": lambda obj : op.abs(obj.clusterEta) + , "Pt" : lambda obj : obj.p4.Pt() + } ## TODO add more? + +from collections import OrderedDict as odict +def getBinningVarNames(jsonpath): + import json + with open(jsonpath, "r") as jsf: + cont = json.load(jsf) + return tuple(cont["variables"]) + +class BinningParameters(object): + def __init__(self, binningVars): + self.binningVars = binningVars + def __call__(self, obj): + return op.construct("Parameters", + (op.initList("std::initializer_list", "Parameters::value_type::value_type", ( + op.initList("Parameters::value_type::value_type", "float", (op.extVar("int", "BinningVariable::{0}".format(bvNm.replace("Clus", ""))), bv(obj))) + for bvNm,bv in self.binningVars.iteritems())),) + ) + +def getBinningParameters(bVarNames, isElectron=False, moreVars=dict()): + if isElectron: + bVarNames = [ k.replace("Eta", "ClusEta") for k in bVarNames ] + theDict = dict(binningVariablesByName) + theDict.update(moreVars) + return BinningParameters(odict((k,theDict[k]) for k in bVarNames)) + +import ROOT +ROOT.gROOT.ProcessLine(".I {0}".format(os.path.join(pathCP3llbb, "Framework", "interface"))) +ROOT.gROOT.ProcessLine("#define STANDALONE_SCALEFACTORS") +ROOT.gROOT.ProcessLine('#include "BinnedValues.h"') + +class ScaleFactor(object): + def __init__(self, name, initLines=None, args=None): + self._name = name + self._initLines = initLines if initLines is not None else [] + self._args = args if args is not None else [] + @property + def cpp_initCode(self): + return tuple(self._initLines) + def __call__(self, obj, variation="Nominal", withMCCheck=True, precalc=True, check=None): + from .treedecorators import makeConst, boolType + expr = op.extMethod("{0}.get".format(self._name))(*tuple(chain( + list(a(obj) for a in self._args) + , (op.extVar("int", variation),) + ))) + if check: + expr = op.switch(check, expr, makeConst(1., "Float_t")) + if withMCCheck: + expr = op.switch(op.extVar(boolType, "runOnMC"), expr, makeConst(1., "Float_t")) + if precalc: + import uuid ## caching + expr._parent.uname = "sf_{0}".format(str(uuid.uuid4()).replace("-", "_")) + return expr + +def lepton_scalefactor(elSF, muSF, obj): + pass + +def get_scalefactors(name, objType, key, periods=None, combine=None, additionalVariables=dict()): + ## get the right config + if isinstance(key, tuple): + # interpret key=("a", "b") as ...["a"]["b"] + mainKey = key[0] + config = all_scalefactors[key[0]] + for idx in xrange(1,len(key)): + config = config[key[idx]] + else: + mainKey = key + config = all_scalefactors[key] + + if periods is None: + if "2016" in mainKey: + periods = "BCDEFGH" + else: + periods = [] + periods = set(periods) + + combPrefix = "" + objStr = "" + initLines = tuple() + sfArgs = [] + + if combine is not None: + combPrefix = { "weight" : "W" + , "sample" : "Smp" }.get(combine, "W") + + def doubleQuote(aStr): + return '"{0}"'.format(aStr) + def initList(elms): + return "{{ {0} }}".format(", ".join(elms)) + def mapInitList(items): + return initList(initList((k,v)) for k,v in items) + + if objType == "lepton": + if isinstance(config, str): + initLines = ("{obj}ScaleFactor {nm}{{{pth}}};".format(nm=name, obj=objStr, pth=doubleQuote(config)),) + sfArgs = [ getBinningParameters(getBinningVarNames(config), isElectron=(key[0].split("_")[0] == "electron"), moreVars=additionalVariables) ] + else: + if combPrefix == "": + raise ValueError("A combination mode needs to be specified for this scale factor") + selConfigs = list(ifilter(lambda (w,v) : w != 0., + ((sum(lumiPerPeriod_2016[ier] for ier in eras if ier in periods),path) + for eras,path in config if any(ier in periods for ier in eras)))) + if len(selConfigs) > 1: + initLines = tuple(["std::vector> __tmp_{nm};".format(nm=name)] + +[ "__tmp_{nm}.emplace_back(std::unique_ptr{{new {obj}ScaleFactor{{{0}}}}});".format(doubleQuote(path), obj=objStr, nm=name) for wgt,path in selConfigs ] + +["{cmb}{obj}ScaleFactor {nm}{{{{ {0} }}, std::move(__tmp_{nm}) }};".format(", ".join("{0:e}".format(wgt) for wgt,path in selConfigs), nm=name, obj=objStr, cmb=combPrefix)] + ) + else: + initLines = ("{obj}ScaleFactor {nm}{{{pth}}};".format(nm=name, obj=objStr, pth=doubleQuote(selConfigs[0][1])),) + bVarNames = set(chain.from_iterable(getBinningVarNames(iPth) for iWgt,iPth in selConfigs)) + sfArgs = [ getBinningParameters(bVarNames, isElectron=(key[0].split("_")[0] == "electron"), moreVars=additionalVariables) ] + + elif objType == "dilepton": + objStr = "DiLeptonFromLegs" + if isinstance(config, tuple) and len(config) == 4: + if not all(isinstance(iCfg, str) for iCfg in config): + raise TypeError("Config for dilepton scale factor should be quadruplet of paths or list f weights and triplets, found {0}".format(config)) + else: + initLines = ("{obj}ScaleFactor {nm}{{{args}}};".format(nm=name, obj=objStr, args=", ".join("std::unique_ptr(new ScaleFactor({0}))".format(doubleQuote(leplegCfg)) for leplegCfg in config)),) + sfArgs = [ (lambda bp : (lambda ll : bp(ll.l1)))(getBinningParameters(set(chain(getBinningVarNames(config[0]), getBinningVarNames(config[1]))), moreVars=additionalVariables)) + , (lambda bp : (lambda ll : bp(ll.l2)))(getBinningParameters(set(chain(getBinningVarNames(config[2]), getBinningVarNames(config[3]))), moreVars=additionalVariables)) + ] + else: + raise NotImplementedError("Still to do this part") + + elif objType == "jet": + objStr = "BTagging" + if isinstance(config, tuple) and len(config) == 3: + if not all(isinstance(iCfg, str) for iCfg in config): + raise TypeError("Config for b-tagging should be triplet of paths or list of weights and triplets, found {0}".format(config)) + else: + initLines = ("{obj}ScaleFactor {nm}{{{0}}};".format(", ".join(doubleQuote(iCfg) for iCfg in config), obj=objStr, nm=name),) + + bVarNames = set(chain.from_iterable(getBinningVarNames(iCfg) for iCfg in config)) + sfArgs = [ getBinningParameters(bVarNames, moreVars=additionalVariables) ] + else: + if not ( all((isinstance(iCfg, tuple) and len(iCfg) == 3 and all(isinstance(iPth, str) for iPth in iCfg) ) for iCfg in config) ): + raise TypeError("Config for b-tagging should be triplet of paths or list of weights and triplets, found {0}".format(config)) + else: + if combPrefix == "": + raise ValueError("A combination mode needs to be specified for this scale factor") + selConfigs = list(ifilter(lambda (w,v) : w != 0., + ((sum(lumiPerPeriod_2016[ier] for ier in eras if ier in periods),paths) + for eras,paths in config if any(ier in periods for ier in eras)))) + if len(selConfigs) > 1: + initLines = tuple(["std::vector> __tmp_{nm};".format(obj=objStr, nm=name)] + +[ "__tmp_{nm}.emplace_back(std::unique_ptr{{new {obj}ScaleFactor{{{0}}}}});".format(", ".join(doubleQuote(iPth) for iPth in paths), obj=objStr, nm=name) for wgt,paths in selConfigs ] + +["{cmb}{obj}ScaleFactor {nm}{{ {{ {0} }}, std::move(__tmp_{nm}) }};".format(", ".join("{0:e}".format(wgt) for wgt,paths in selConfigs), nm=name, obj=objStr, cmb=combPrefix)] + ) + else: + initLines = ("{obj}ScaleFactor {nm}{{{0}}};".format(", ".join(doubleQuote(iCfg) for iCfg in config[0][1]), obj=objStr, nm=name),) + + bVarNames = set(chain.from_iterable(getBinningVarNames(iPth) for iWgt,paths in selConfigs for iPth in paths)) + sfArgs = [ getBinningParameters(bVarNames, moreVars=additionalVariables) ] + sfArgs.append(lambda j : op.extMethod("IJetScaleFactor::get_flavour")(j.hadronFlavor)) + else: + raise ValueError("Unknown object type: {0}".format(objType)) + + ### cpp_initCode + return ("{typ} {name}{{{0}}};".format( + ", ".join(self._initArgs) + , typ=self._cppType, name=self._name + ),) ## combPrefix objStr + ### + + return ScaleFactor(name, initLines, args=sfArgs) + + +if __name__ == "__main__": + #aSF = get_scalefactors("test_mu", "lepton", ("muon_2015_76", "iso_loose_id_loose")) + lSF = get_scalefactors("test", "lepton", ("muon_2016_80", "iso_loose_id_loose"), combine="weight") + jSF = get_scalefactors("testb", "jet", ("btag_2016_moriond2017", "cMVAv2_loose"), combine="weight") + llSF = get_scalefactors("testTrig", "dilepton", "eleltrig_2016_HHMoriond17") diff --git a/python/slurmhelpers.py b/python/slurmhelpers.py new file mode 100644 index 0000000..c66393a --- /dev/null +++ b/python/slurmhelpers.py @@ -0,0 +1,182 @@ +""" +Slurm tools (based on previous condorhelpers and cp3-llbb/CommonTools condorSubmitter and slurmSubmitter) +""" +__author__ = "Pieter David " +__date__ = "April 2017" + +import logging +logger = logging.getLogger(__name__) + +import os.path +import subprocess + +from .batchhelpers import CommandListJob + +SlurmJobStatus = ["PENDING", "RUNNING", "COMPLETED", "FAILED", "COMPLETING", "CONFIGURING", "CANCELLED", "BOOT_FAIL", "NODE_FAIL", "PREEMPTED", "RESIZING", "SUSPENDED", "TIMEOUT", "unknown"] + +from contextlib import contextmanager + +# take from contextlib when moving to python3 +@contextmanager +def redirect_stdout(whereto=None): + import sys + bk_out = sys.stdout + sys.stdout = whereto + yield + sys.stdout = bk_out + +class CommandListSlurmJob(CommandListJob): + """ + Helper class to create a slurm job array from a list of commands (each becoming a task in the array) + + Default work directory will be $(pwd)/slurm_work, default output pattern is "*.root" + """ + default_cfg_opts = { + "environmentType" : "cms" + , "sbatch_time" : "0-04:00" + , "sbatch_mem" : "2048" + , "stageoutFiles" : ["*.root"] + } + + def __init__(self, commandList, workDir=None, configOpts=None): + super(CommandListSlurmJob, self).__init__(commandList, workDir=workDir, workdir_default_pattern="slurm_work") + ## + from CP3SlurmUtils.Configuration import Configuration + self.cfg = Configuration() + ## apply user-specified + cfg_opts = dict(CommandListSlurmJob.default_cfg_opts) + if configOpts: + cfg_opts.update(configOpts) + for k, v in cfg_opts.iteritems(): + setattr(self.cfg, k, v) + + ## check working and output directory + self.cfg.sbatch_workdir = self.workDir + self.cfg.inputSandboxDir = self.workDirs["in"] + self.cfg.batchScriptsDir = self.workDir + self.cfg.batchScriptsFilename = "slurmSubmission.sh" + self.cfg.stageoutDir = os.path.join(self.workDirs["out"], "${SLURM_ARRAY_TASK_ID}") + self.cfg.stageoutLogsDir = self.workDirs["log"] + self.cfg.useJobArray = True + self.cfg.inputParamsNames = ["taskcmd"] + self.cfg.inputParams = list([cmd] for cmd in self.commandList) + self.cfg.payload = ("${taskcmd}") + + self.slurmScript = os.path.join(self.cfg.batchScriptsDir, self.cfg.batchScriptsFilename) + self.clusterId = None ## will be set by submit + self._finishedTasks = dict() + self._statuses = ["PENDING" for cmd in self.commandList] + + ## output + try: + from CP3SlurmUtils.SubmitWorker import SubmitWorker as slurmSubmitWorker + except ImportError as ex: + logger.info("Could not import slurmSubmitWorker from CP3SlurmUtils.SubmitUtils. Please run 'module load slurm/slurm_utils'") + try: + slurm_submit = slurmSubmitWorker(self.cfg, submit=False, debug=False, quiet=False) + except Exception as ex: + logger.error("Problem constructing slurm submit worker from CP3SlurmUtils: {}".format(str(ex))) + raise ex + else: + from StringIO import StringIO ## python3: from io import StringIO + workerout = StringIO() + submitLoggerFun = logger.debug + try: + with redirect_stdout(workerout): + slurm_submit() + except Exception as ex: + submitLoggerFun = logger.info + raise RuntimeError("Problem in slurm_submit: {}".format(str())) + finally: + submitLoggerFun("========== BEGIN slurm_submit output ==========") + for ln in workerout.getvalue().split("\n"): + submitLoggerFun(ln) + submitLoggerFun("========== END slurm_submit output ==========") + + def _commandOutDir(self, command): + """ Output directory for a given command """ + return os.path.join(self.workDirs["out"], str(self.commandList.index(command)+1)) + + def commandOutFiles(self, command): + """ Output files for a given command """ + import fnmatch + cmdOutDir = self._commandOutDir(command) + return list( os.path.join(cmdOutDir, fn) for fn in os.listdir(cmdOutDir) + if any( fnmatch.fnmatch(fn, pat) for pat in self.cfg.stageoutFiles) ) + + def submit(self): + """ Submit the job to slurm """ + logger.info("Submitting an array of {0:d} jobs to slurm".format(len(self.commandList))) + result = subprocess.check_output(["sbatch" + , "--partition={}".format(self.cfg.sbatch_partition) + , "--qos={}".format(self.cfg.sbatch_qos) + , "--wckey=cms" + , self.slurmScript]) + + self.clusterId = next(tok for tok in reversed(result.split()) if tok.isdigit()) + + ## save to file in case + with open(os.path.join(self.workDirs["in"], "cluster_id"), "w") as f: + f.write(self.clusterId) + + logger.info("Submitted, job ID is {}".format(self.clusterId)) + + def statuses(self, update=True): + """ list of subjob statuses (numeric, using indices in SlurmJobStatus) """ + if update: + self.updateStatuses() + return [ SlurmJobStatus.index(sjst) for sjst in self._statuses ] + + @property + def status(self): + if all(st == self._statuses[0] for st in self._statuses): + return self._statuses[0] + elif any(st == "RUNNING" for st in self._statuses): + return "RUNNING" + elif any(st == "CANCELLED" for st in self._statuses): + return "CANCELLED" + else: + return "unknown" + + def subjobStatus(self, i): + return self._statuses[i-1] + + def updateStatuses(self): + for i in xrange(len(self.commandList)): + subjobId = "{0}_{1:d}".format(self.clusterId, i+1) + status = "unknown" + if subjobId in self._finishedTasks: + status = self._finishedTasks[subjobId] + else: + sacctCmdArgs = ["sacct", "-n", "--format", "State", "-j", subjobId] + ret = subprocess.check_output(sacctCmdArgs).strip() + if "\n" in ret: ## if finished + if len(ret.split("\n")) == 2: + ret = subprocess.check_output(["sacct", "-n", "--format", "State", "-j", "{}.batch".format(subjobId)]).strip() + self._finishedTasks[subjobId] = ret + status = ret + else: + raise AssertionError("More than two lines in sacct... there's something wrong") + else: + if len(ret) != 0: + status = ret + else: + squeueCmdArgs = ["squeue", "-h", "-O", "state", "-j", subjobId] + ret = subprocess.check_output(squeueCmdArgs).strip() + if len(ret) != 0: + status = ret + else: # fall back to previous status (probably PENDING or RUNNING) + status = self._statuses[i] + self._statuses[i] = status + + def commandStatus(self, command): + return self.subjobStatus(self.commandList.index(command)+1) + +def makeSlurmTasksMonitor(jobs=[], tasks=[], interval=120): + """ make a TasksMonitor for slurm jobs """ + from .batchhelpers import TasksMonitor + return TasksMonitor(jobs=jobs, tasks=tasks, interval=interval + , allStatuses=SlurmJobStatus + , activeStatuses=[SlurmJobStatus.index(stNm) for stNm in ("CONFIGURING", "COMPLETING", "PENDING", "RUNNING", "RESIZING", "SUSPENDED")] + , completedStatus=SlurmJobStatus.index("COMPLETED") + ) diff --git a/python/treedecorators.py b/python/treedecorators.py new file mode 100644 index 0000000..3882a7a --- /dev/null +++ b/python/treedecorators.py @@ -0,0 +1,1435 @@ +import ROOT +from itertools import chain, izip +import copy + +boolType = "bool" +PODTypes = set(("Float_t", "Double_t", "Int_t", "UInt_t", "Bool_t", "Char_t", "UChar_t", "ULong64_t", "int", "unsigned", "unsigned short", "char", "signed char", "unsigned char", "float", "double", "bool", "Short_t", "size_t", "std::size_t", "unsigned short")) ## there are many more (at least unsigned) +SizeType = "Int_t" +## TODO implement a "type system" - only need numeric types (boolean, integral and floating point, different precisions, math operators -> largest, comparison operators always -> bool) +## -> Python data model (emulating numeric types) +## for iterables / non-zero length things: may need new "elementary" operations (to be converted into control flow) - adapt "streaming" style and use bit-shift operations for readabilty (filter, map, reduce, zip (should not even be necessary: just stream enough info and optimise through getters later) etc. - fine as long as it is transparent enough, PURE functions start to matter here) + +################################################################################ +## INTERFACES ## +################################################################################ + +class TupleStub(object): + """ + Interface & base class for stubs + + NOTE: to avoid conflicts with branch names, subclasses should not define + new "visible" attributes (not starting with "_") + """ + __slots__ = ("__weakref__", "_typeName") + def __init__(self, typeName): + self._typeName = typeName + @property + def op(self): + raise ValueError("Can only get operation for 'complete' stubs (with a defined type)") + def _get(self, name): + raise AttributeError("No attribute called {0!r} for object {1!r}".format(name, self)) + def __getattr__(self, name): + return self._get(name) + +class TupleOp(object): + """ + Interface for operations on leafs and resulting objects / numbers + """ + __slots__ = ("__weakref__", "uname", "_explValidDeps") + def __init__(self): + self.uname = None ## cache ID for optimisations + self._explValidDeps = [] + @property + def args(self): + # return iterable over other operations + return tuple() + @property + def leafDeps(self): + # return iterable over other all used fields from the TTree + return tuple() + @property + def validDeps(self): + # requirements for this expression to be valid (recursive by construction) + lst = list(self._explValidDeps) + for a in self.args: + lst += a.validDeps + return lst + def addValidDep(self, dependency): + self._explValidDeps.append(adaptArg(dependency, typeHint=boolType)) + @property + def extDeps(self): + # return iterable of (type, name) over externally-defined variables used (no-recursive) + return tuple() + @property + def result(self): + pass + + def __hash__(self): + return hash(repr(self)) + + ## Backends TODO: there should be a way to separate this a bit... + def get_TTreeDrawStr(self): + """ + Generate string for TTree::Draw (TTreeFormula etc.) + Mostly for debugging and making sure all features are there + """ + if self.uname is not None: + return self.uname + else: + return self._get_TTreeDrawStr() + + def __eq__(self, other): + raise Exception("TupleOp has no default __eq__") ## FIXME should be something like not implemented or so + def __ne__(self, other): + return not ( self == other ) + +# helpers +def allArgsOfOp(op, stopAtUName=False): + """ Recursively yield all TupleOps this one depends on + """ + for ia in op.args: + yield ia + if ( not stopAtUName ) or ( ia.uname is None ): + for aa in allArgsOfOp(ia): + yield aa + +def opDependsOn(arg, other): + """ Is other present anywhere in the dependency tree of arg? + """ + return ( arg == other ) or ( other in arg.validDeps ) or any(ia == other for ia in allArgsOfOp(arg)) + +################################################################################ +## STUBS ## +################################################################################ + +class PODStub(TupleStub): + """ + Just a number + """ + __slots__ = ("_parent",) + def __init__(self, parent, typeName): + self._parent = parent + super(PODStub, self).__init__(typeName) + @property + def op(self): + return self._parent + def __repr__(self): + return "PODStub({0!r}, {1!r})".format(self._parent, self._typeName) + def _binaryOp(self, opName, other, outType="Double_t"): + return MathOp(opName, self, other, outType=outType).result +## operator overloads +for nm,opNm in { + "__add__" : "add" + , "__sub__" : "subtract" + , "__mul__" : "multiply" + , "__truediv__" : " divide" + , "__div__" : "divide" + }.iteritems(): + setattr(PODStub, nm, (lambda oN : (lambda self, other : self._binaryOp(oN, other)))(opNm)) +for nm in ("__lt__", "__le__", "__eq__", "__ne__", "__gt__", "__ge__"): + setattr(PODStub, nm, (lambda oN, oT : (lambda self, other : self._binaryOp(oN, other, outType=oT)))(nm.strip("_"), boolType)) + +class ObjectStub(PODStub): + """ + Imitate an object + """ + __slots__ = ("_typ",) + def __init__(self, parent, typeName): + self._typ = getattr(ROOT, typeName) ## NOTE could also use TClass machinery + super(ObjectStub, self).__init__(parent, typeName) + def _get(self, name): + if name not in dir(self._typ): + raise AttributeError("Type {0} has no member {1}".format(self._typeName, name)) + if hasattr(self._typ, name) and isinstance(getattr(self._typ, name), ROOT.MethodProxy): + return ObjectMethodStub(self, name) + else: + return GetDataMember(self, name).result + def __repr__(self): + return "ObjectStub({0!r}, {1!r})".format(self._parent, self._typeName) + ## for mappable types: operator[] and valueType + def __getitem__(self, ky): + if self._get("__getitem__"): + return GetItem(self, ky, indexType=self.indexType).result + else: + return NotImplemented + @property + def valueType(self): + if self._typ.__name__.startswith("map<"): + return self._typ.__name__[4:-1].split(",")[-1].strip() + else: + print "Name: ", self._typ.__name__ + return NotImplemented + @property + def indexType(self): + if self._typ.__name__.startswith("map<"): + return self._typ.__name__[4:-1].split(",")[0].strip() + else: + print "Name: ", self._typ.__name__ + return NotImplemented + + ## TODO implement more operators, if they are supported by the underlying class + +class ArrayStub(TupleStub): ## different from an array result of a calculation or not? Not really, in fact... + """ + (possibly var-sized) array of anything + """ + __slots__ = ("_parent", "_length", "valueType") + def __init__(self, parent, typeName, length): + self._parent = parent + self._length = length + self.valueType = typeName + super(ArrayStub, self).__init__("[{0}]".format(typeName)) + @property + def op(self): + return self._parent + def __getitem__(self, index): + return GetItem(self, index).result + def __len__(self): + return self._length + def __repr__(self): + return "ArrayStub({0!r}, {1!r}, {2!r})".format(self._parent, self._typeName, self._length) + + +class VectorStub(ObjectStub): + """ + Vector-as-array (to be eliminated with var-array branches / generalised into object) + """ + __slots__ = ("_parent", "valueType") + def __init__(self, parent, typeName): + self._parent = parent + ##self.valueType = getattr(ROOT, typeName).value_type ## usable from 6.04.something on + self.valueType = ">".join(tk.strip() for tk in "<".join(tok.strip() for tok in next(mem for mem in dir(getattr(ROOT, typeName)) if mem.startswith("_vector<") and mem.endswith("___M_range_check")).split("<")[1:]).split(">")[:-1]) + super(VectorStub, self).__init__(parent, typeName) + @property + def op(self): + return self._parent + def __getitem__(self, index): + return GetItem(self, index).result + def __len__(self): + return op.extMethod("Length$")(self) + def __repr__(self): + return "VectorStub({0!r}, {1!r})".format(self._parent, self._typeName) + +import re +vecPat = re.compile("vector\<(?P[a-zA-Z_0-9\<\>,\: ]+)\>") + +def makeStubForType(typeName, parent, length=None): + if length is not None: + return ArrayStub(parent, typeName, length) + if typeName in PODTypes: + return PODStub(parent, typeName) ## maybe this one can also take the type name + else: + m = vecPat.match(typeName) + if m: + return VectorStub(parent, typeName) + else: + return ObjectStub(parent, typeName) + +def adaptArg(arg, typeHint=None): + if isinstance(arg, TupleStub): + return arg.op + elif isinstance(arg, TupleOp): + return arg + elif typeHint is not None: + if str(arg) == arg: ## string, needs quote + return Const(typeHint, '"{}"'.format(arg)) + else: + return Const(typeHint, arg) + else: + raise ValueError("Should get some kind of type hint") + +def makeConst(value, typeHint): + return makeStubForType(typeHint, adaptArg(value, typeHint)) + +class MethodStub(TupleStub): + """ + Imitate a free-standing method + """ + __slots__ = ("_name",) + def __init__(self, name): + self._name = name + super(MethodStub, self).__init__("{0}(...)".format(self._name)) + def __call__(self, *args): + ## TODO maybe tihs is a good place to resolve the right overload? or do some arguments checking + return CallMethod(self._name, tuple(args)).result + def __repr__(self): + return "MethodStub({0!r})".format(self._name) + +class ObjectMethodStub(TupleStub): ## TODO data members? + """ + Imitate a member method of an object + """ + __slots__ = ("_objStb", "_name") + def __init__(self, objStb, name): + self._objStb = objStb + self._name = name + super(ObjectMethodStub, self).__init__("{0}.{1}(...)".format(objStb._typeName, self._name)) + def __call__(self, *args): + ## TODO maybe tihs is a good place to resolve the right overload? or do some arguments checking + return CallMemberMethod(self._objStb, self._name, tuple(args)).result + def __repr__(self): + return "ObjectMethodStub({0!r}, {1!r})".format(self._objStb, self._name) + +def allLeafs(branch, split=False): ## study splitlevel in tree as well + """ + Recursively collect TTree leaves (TLeaf and TBranchElement) + """ + if split: + raise NotImplementedError("Going into TBranchElement not implemented yet") + for lv in branch.GetListOfLeaves(): + yield lv + for br in branch.GetListOfBranches(): + if isinstance(br, ROOT.TBranchElement): + yield br + else: + for lv in allLeafs(br): + yield lv + +class SmartTupleStub(TupleStub): + """ + First try a fixed list of proxies (which should have a _parent attribute to refer up the tree) + """ + __slots__ = ("_parent", "_smartLeafs", "_extraCap") + ## TODO make a SmartLeafStub base class (problems with multiple inheritance, maybe, unless these can always have smart leafs as well) + def __init__(self, typeName, parent=None): + self._smartLeafs = dict() + self._extraCap = list() + self._registerParent(parent) + super(SmartTupleStub, self).__init__(typeName) + @property + def op(self): + return self._parent + + def _registerSmartLeaf(self, name, stub, extraCap=None): + stub._registerParent(self, extraCap=extraCap) + self._smartLeafs[name] = stub + + def _registerParent(self, parent, extraCap=None): + self._parent = parent + if extraCap: + for hlp in extraCap: + self._addCapability(hlp) + + def _addCapability(self, hlp): + hlp._parent = self + self._extraCap.append(hlp) + + def __getattr__(self, name): ## FIXME as all are registered, we could probably also do this with properties + # override to first check smart leafs + if name in self._smartLeafs: + return self._smartLeafs[name] + else: + att = self._getattrFromCap(name) + if att is not None: + return att + else: + return self._get(name) + + def _getCapWithAttr(self, name, exceptionIfAbsent=False): + for hlp in self._extraCap: + if name in hlp._dir: + return hlp + if exceptionIfAbsent: + raise AttributeError("Object {0!r} has no attribute with name {1!r}".format(self, name)) + + def _getattrFromCap(self, name, exceptionIfAbsent=False): + hlp = self._getCapWithAttr(name, exceptionIfAbsent=exceptionIfAbsent) + if hlp is not None: + return getattr(hlp, name) + + ## inspect + def __dir__(self): + return list(set(self._availLeafs()).difference(self._listedLeafsBelow())) + ## pass full list top-down + def _moreAvailLeafs(self): ## excluding own + if self._parent is not None: + return self._parent._availLeafs() + else: + return set() + def _availLeafs(self): ## including own + avail = self._moreAvailLeafs() + avail.update(self._smartLeafs.iterkeys()) + for hlp in self._extraCap: + avail.update(hlp._dir) + return avail + ## extra helper (split in two) + def _listedLeafsBelow(self): ## excluding own + lst = set() + for ch in self._smartLeafs.itervalues(): + lst.update(set(ch._listedLeafs())) + for dc in self._extraCap: + lst.update(dc._replLeafs()) + return lst + ## pass already-listed bottom up + def _listedLeafs(self): ## including own + lst = self._listedLeafsBelow() + availListed = set(self.__dir__()).intersection(self._moreAvailLeafs()) ## exclude those that have already been listed + ## add object members + lst.update(availListed) + return lst + + ## extra decorations + class _SmartLeafDecoration(object): + __slots__ = ("__weakref__", "_parent") + _dir = tuple() + def __init__(self, parent=None): + self._parent = parent + def _replLeafs(self): + return set() + @property + def _leafDeps(self): + return tuple() + + class _SmartIterable(_SmartLeafDecoration): + """ Approach a structure of arrays as an array of structures """ + __slots__ = ("_len",) + _dir = ("__getitem__", "__len__", "__nonzero__", "__iter__") + def __init__(self, _len, parent=None): + if str(_len) == _len: + self._len = ( lambda ilen : (lambda self : getattr(self._parent, ilen)) )(_len) ## one level up from parent + else: + self._len = _len + super(SmartTupleStub._SmartIterable, self).__init__(parent=parent) + def __getitem__(self, idx): + return SmartLeafItemStub(idx, parent=self._parent) + def __len__(self): + return self._len(self._parent) + def __iter__(self): + for i in xrange(len(self)): + yield self[i] + def __nonzero__(self): + return self.__len__() > 0 ## FIXME put what makes sense... + def _replLeafs(self): + return self[-1]._availLeafs() + @property + def _leafDeps(self): + return adaptArg(self.__len__()).leafDeps + + class _WrappedIterable(_SmartLeafDecoration): + """ Return wrapped elements of the array (attached to a LeafFacade) """ + __slots__ = ("_base", "_stubMap") + _dir = ("__getitem__", "__len__", "__nonzero__", "__iter__") + def __init__(self, base, stubMap, parent=None): + self._base = base + self._stubMap = stubMap + super(SmartTupleStub._WrappedIterable, self).__init__(parent=parent) + def _wrap(self, rawItm): + return self._stubMap[rawItm._typeName](rawItm._parent, pStub=self._parent) + def __getitem__(self, idx): + return self._wrap(self._base[idx]) + def __len__(self): + return self._base.__len__() + def __iter__(self): + for itm in self._base: + yield self_wrap(itm) + def __nonzero__(self): + return self.__len__() > 0 ## FIXME put what makes sense + def _replLeafs(self): + return self[-1]._availLeafs() + @property + def _leafDeps(self): + return self._base._parent.leafDeps + + class _BoolTestableFromLeaf(_SmartLeafDecoration): + __slots__ = ("_bool",) + _dir = ("__nonzero__",) + def __init__(self, _bool, parent=None): + self._bool = _bool + super(SmartTupleStub._BoolTestableFromLeaf, self).__init__(parent=parent) + def __nonzero__(self): + return self._bool(self._parent) + + class _RefToOther(_SmartLeafDecoration): + __slots__ = ("_name", "_refGetter", "_repl") + def __init__(self, name, refGetter, parent=None, repl=None): + self._name = name + self._refGetter = refGetter + self._repl = tuple(repl) if repl else tuple() + super(SmartTupleStub._RefToOther, self).__init__(parent=parent) + @property + def _dir(self): + return (self._name,) + def _replLeafs(self): + return self._repl + def __getattr__(self, name): + if name == self._name: + return self._refGetter(self._parent) + else: + raise AttributeError("No attribute called {0!r} for object {1!r}".format(name, self)) + def __repr__(self): + return "SmartTupleStub._RefToOther({0!r}, {1!r}, parent={2!r})".format(self._name, self._refGetter, self._parent) + + class _RefList(_SmartLeafDecoration): + __slots__ = ("_base", "_cont") + _dir = ("__getitem__", "__iter__", "__len__", "__nonzero__") + def __init__(self, base, cont, parent=None): + self._base = base + self._cont = cont + super(SmartTupleStub._RefList, self).__init__(parent=parent) + def __getitem__(self, idx): + return self._cont[self._base[idx]] + def __iter__(self): + for i in self._base: + yield self._cont[i] + def __len__(self): + return self._base.__len__() + def __nonzero__(self): + return self.__len__() != 0 ### FIXME + @property + def _leafDeps(self): + ###print "RefList with base {0!r} and container {1!r}".format(self._base, self._cont) + return chain(self._base._parent.leafDeps, self._cont._get_leafDeps()) ## FIXME this probably won't work for MC particle refs + + ## resolve iterable-like methods dynamically (could be implemented generically using properties) + def __getitem__(self, idx): + method = self._getattrFromCap("__getitem__") + if method: + return method(idx) + else: + return NotImplemented + def __len__(self): + method = self._getattrFromCap("__len__") + if method: + return method() + else: + return NotImplemented + def __iter__(self): + for elm in self._getattFromCap("__iter__", True)(): + yield elm + ## FIXME this should probably be done differently (use filter/map/reduce/... directly) + def __nonzero__(self): + ## NOTE important as soon as __len__ is implemented (see python reference on the data model) + method = self._getattrFromCap("__nonzero__") + if method: + return method() + else: + return NotImplemented + + def _get_leafDeps(self): + return chain.from_iterable(cp._leafDeps for cp in self._extraCap) + +class TreeStub(SmartTupleStub): + """ + Tree stub + """ + __slots__ = ("_skeleton", "_lvs") + def __init__(self, skeleton): + self._skeleton = skeleton + self._lvs = dict() + self._readLeafs() ## also fill self._lvs + super(TreeStub, self).__init__("struct") + + def _readLeafs(self): + self._lvs = dict( (br.GetName(), br) for br in allLeafs(self._skeleton) ) + + def _get(self, name): + if name in self._lvs: + lv = self._lvs[name] + if not lv: ## update in case we moved somewhere else or so + print "DBG: re-reading leafs" + self._readLeafs() + lv = self._lvs[name] + assert lv + if isinstance(lv, ROOT.TLeaf): + if lv.GetLeafCount(): + lenLv = lv.GetLeafCount() + return GetArrayLeaf(lv.GetTypeName(), name, GetLeaf(lenLv.GetTypeName(), lenLv.GetName())).result + elif lv.GetTitle().endswith("]"): + tp,ln = tuple(lv.GetTitle().strip("]").split("[")) + return GetArrayLeaf(lv.GetTypeName(), name, Const(SizeType, int(ln))).result + else: + return GetLeaf(lv.GetTypeName(), name).result + else: + return GetLeaf(lv.GetClassName() if lv.GetClassName() != '' else lv.GetTypeName(), name).result + else: + raise AttributeError("Tree has no branch with name {0}".format(repr(name))) + def __repr__(self): + return "TreeStub({0!r})".format(self._skeleton) + + def _availLeafs(self): + avail = super(TreeStub, self)._availLeafs() + avail.update(set(lv for lv in self._lvs.iterkeys() if "." not in lv and not lv.endswith("_"))) ## TODO let the classes tell us... + return avail + +class BranchGroupStub(SmartTupleStub): + """ + Stub for a group of related branches (with shared prefix) + """ + __slots__ = ("_prefix", "_nStrp") + def __init__(self, prefix, nStrp=0, **kwargs): + self._prefix = prefix + self._nStrp = nStrp + super(BranchGroupStub, self).__init__("struct", **kwargs) + def _get(self, name): + return getattr(self._parent, "".join((("".join(self._prefix.split("_")[:-self._nStrp]) if self._nStrp > 0 else self._prefix), name))) ## TODO could composition of join be made optional? + def __repr__(self): + return "BranchGroupStub({0!r}, parent={1!r})".format(self._prefix, self._parent) + + def _listedLeafs(self): + return set("".join((self._prefix, ky)) for ky in super(BranchGroupStub, self)._listedLeafs()) + + def _moreAvailLeafs(self): + return set(lf[len(self._prefix):] for lf in super(BranchGroupStub, self)._moreAvailLeafs() if lf.startswith(self._prefix) and lf != self._prefix) + +class SmartLeafItemStub(SmartTupleStub): + ### FIXME maybe need to make BranchGroupStub a base class, with the current behaviour for non-indexed and + """ + Stub for an item in a group of related branches + """ + __slots__ = ("_idx",) + ## NOTE special case: always has a parent, but is never registered - maybe we should change that and use the copy trick here + def __init__(self, idx, parent=None): + self._idx = adaptArg(idx, typeHint=SizeType) + super(SmartLeafItemStub, self).__init__("struct", parent=parent) + + @property + def i(self): + return makeStubForType(SizeType, self._idx) + + class _ItemSmartLeafStub(SmartTupleStub): + __slots__ = ("_orig",) + def __init__(self, orig, parent=None): + self._orig = orig + super(SmartLeafItemStub._ItemSmartLeafStub, self).__init__("struct", parent=parent) + def __getattr__(self, name): + return getattr(self._orig, name)[self._orig._idx] ## first attempt + + def __getattr__(self, name): + if name.startswith("_ipython"): + raise AttributeError("No attribute called {0!r} for object {1!r}".format(name, self)) + # override to first check smart leafs + if name in self._parent._smartLeafs: ## unless they are refs, then jump from parent + print "Getting smart leaf {0} of {1}".format(name, self) + return SmartLeafItemStub._ItemSmartLeafStub(self._parent._smartLeafs[name], self) + else: + att = self._getattrFromCap(name) + if att is not None: + return att + hlp = self._parent._getCapWithAttr(name) + if hlp is not None: + cp = copy.copy(hlp) + cp._parent = self + if isinstance(cp, SmartTupleStub._RefToOther) and isinstance(cp._refGetter, GoUpBy): + cp._refGetter = copy.copy(cp._refGetter) + cp._refGetter.nSteps += 1 + return getattr(cp, name) + else: + return self._parent._get(name)[self._idx] + def __repr__(self): + return "SmartLeafItemStub({0!r}, parent={1!r})".format(self._idx, self._parent) + + def _moreAvailLeafs(self): ## everything from parent except list inteface + avail = self._parent._availLeafs() + avail.discard("__len__") + avail.discard("__getitem__") + ## TODO discard a few more (references that are not up and not indexed like our array, see above - needs a bit of logic, may not be possible until arriving at the final branch) + return avail + + def __eq__(self, other): + if self._parent == other._parent: + return self.i == other.i + else: + raise Exception("Only objects from the same container can be compared for equality") + def __ne__(self, other): + return not ( self == other ) + +class GoUpBy(object): + """ + Construct .., ../.. etc. + """ + __slots__ = ("__weakref__", "nSteps",) + def __init__(self, nSteps): + self.nSteps = nSteps + def __call__(self, start): + i = 0 + res = start + while i < self.nSteps: + res = res._parent + i += 1 + return res + def __repr__(self): + return "GoUpBy({0:d})".format(self.nSteps) + +################################################################################ +## OPERATIONS ## +################################################################################ + +class Const(TupleOp): + """ + Hard-coded number + """ + __slots__ = ("typeName", "value") + def __init__(self, typeName, value): + self.typeName = typeName + self.value = value + super(Const, self).__init__() + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "Const({0!r})".format(self.value) + def __eq__(self, other): + return isinstance(other, Const) and ( self.typeName == other.typeName ) and ( self.value == other.value ) + # backends + def _get_TTreeDrawStr(self): + try: + if abs(self.value) == float("inf"): + return "std::numeric_limits<{0}>::{mnmx}()".format(self.typeName, mnmx=("min" if self.value < 0. else "max")) + except: + pass + return str(self.value) ## should maybe be type-aware... + +class Construct(TupleOp): + __slots__ = ("typeName", "_args") + def __init__(self, typeName, args): + self.typeName = typeName + self._args = tuple(adaptArg(a, typeHint="Double_t") for a in args) + super(Construct, self).__init__() + @property + def args(self): + return self._args + @property + def leafDeps(self): + return chain.from_iterable( arg.leafDeps for arg in self._args ) + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "Construct({0!r}, {1})".format(self.typeName, ", ".join(repr(a) for a in self._args)) + def __eq__(self, other): + return isinstance(other, Construct) and ( self.typeName == other.typeName ) and len(self._args) == len(other._args) and all( ( aa == ab ) for aa,ab in izip(self._args, other._args) ) + def _get_TTreeDrawStr(self): + return "{0}{{{1}}}".format(self.typeName, ", ".join(a.get_TTreeDrawStr() for a in self._args)) + +class InitList(TupleOp): + __slots__ = ("typeName", "elms") + def __init__(self, typeName, elmType, elms): + self.typeName = typeName + self.elms = tuple(adaptArg(e, typeHint=elmType) for e in elms) + super(InitList, self).__init__() + @property + def args(self): + return self.elms + @property + def leafDeps(self): + return chain.from_iterable( arg.leafDeps for arg in self.elms ) + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "InitList<{0}>({1})".format(self.typeName, ", ".join(repr(elm) for elm in self.elms)) + def __eq__(self, other): + return isinstance(other, InitList) and ( self.typeName == other.typeName ) and len(self.elms) == len(other.elms) and all( ( ea == eb ) for ea, eb in izip(self.elms, other.elms) ) + def _get_TTreeDrawStr(self): + return "{{ {0} }}".format(", ".join(elm.get_TTreeDrawStr() for elm in self.elms)) + +class ExtVar(TupleOp): + """ + Externally-defined variable (used by name) + """ + __slots__ = ("typeName", "name") + def __init__(self, typeName, name): + self.typeName = typeName + self.name = name + super(ExtVar, self).__init__() + @property + def extDeps(self): + return ((self.typeName, self.name),) + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "ExtVar({0!r}, {1!r})".format(self.typeName, self.name) + def __eq__(self, other): + return isinstance(other, ExtVar) and ( self.typeName == other.typeName ) and ( self.name == other.name ) + # backends + def _get_TTreeDrawStr(self): + return self.name + +class GetLeaf(TupleOp): + """ + Get the number from a leaf + """ + __slots__ = ("typeName", "name") + def __init__(self, typeName, name): + self.typeName = typeName + self.name = name + super(GetLeaf, self).__init__() + @property + def leafDeps(self): + return self.name, + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "GetLeaf({0!r}, {1!r})".format(self.typeName, self.name) + def __eq__(self, other): ## TODO maybe name is enough + return isinstance(other, GetLeaf) and ( self.typeName == other.typeName ) and ( self.name == other.name ) + # backends + def _get_TTreeDrawStr(self): + return self.name + +class GetArrayLeaf(TupleOp): + """ + Get the number from a leaf + """ + __slots__ = ("typeName", "name", "length") + def __init__(self, typeName, name, length): + self.typeName = typeName + self.name = name + self.length = length + super(GetArrayLeaf, self).__init__() + @property + def leafDeps(self): + return self.name, ## TODO add length (if applicable !!!). Maybe at this point need to allow for Const "stub" (with same operator overloads as POD stub) + @property + def result(self): + return makeStubForType(self.typeName, self, makeStubForType(SizeType, self.length)) + def __repr__(self): + return "GetArrayLeaf({0!r}, {1!r}, {2!r})".format(self.typeName, self.name, self.length) + def __eq__(self, other): ## TODO maybe name is enough + return isinstance(other, GetArrayLeaf) and ( self.typeName == other.typeName ) and ( self.name == other.name ) and ( self.length == other.length ) + # backends + def _get_TTreeDrawStr(self): + return self.name + +class GetItem(TupleOp): + """ + Get item from array (from function call or from array leaf) + """ + __slots__ = ("arg", "typeName", "index") + def __init__(self, arg, index, indexType=SizeType): + self.arg = adaptArg(arg) + self.typeName = arg.valueType + self.index = adaptArg(index, typeHint=SizeType) + super(GetItem, self).__init__() + ## add validity requirement + ## TODO in the case of a ref (MC, -1 or good) or index (< len), we *could* do some checks + ## TODO for the rng_min_element_by etc. case, the best would be to add (to GetItem) a validity requirement at construction time + if isinstance(self.index, CallMethod) and self.index.name == "*" and isinstance(self.index._args[0], Next): + self.addValidDep(self.index._args[0].isValid()) + @property + def args(self): + return [ self.arg ] + ([ self.index ] if not isinstance(self.index, Const) else []) + @property + def leafDeps(self): + return chain.from_iterable(dep.leafDeps for dep in (self.arg, self.index)) + @property + def result(self): + return makeStubForType(self.typeName, self) ## TODO make sure that any parent gives back the right type name (may have to call it itemtype or so) + def __repr__(self): + return "GetItem({0!r}, {1!r})".format(self.arg, self.index) + def __eq__(self, other): ## TODO maybe typeName is not needed + return isinstance(other, GetItem) and ( self.arg == other.arg ) and ( self.typeName == other.typeName ) and ( self.index == other.index ) + # backends + def _get_TTreeDrawStr(self): + return "{0}[{1}]".format(self.arg.get_TTreeDrawStr(), self.index.get_TTreeDrawStr()) + +class LocalVariablePlaceholder(TupleOp): + """ + Placeholder type for a local variable connected to an index (first step in a specific-to-general strategy) + """ + __slots__ = ("name", "typeHint") + def __init__(self, name, typeHint): + self.name = name + self.typeHint = typeHint + super(LocalVariablePlaceholder, self).__init__() + @property + def leafDeps(self): + return tuple() + @property + def result(self): + return makeStubForType(self.typeHint, self) + def __eq__(self, other): ## TODO ?? + return isinstance(other, LocalVariablePlaceholder) and ( self.name == other.name ) and ( self.typeHint == other.typeHint ) + def _get_TTreeDrawStr(self): + return str(self.name) + def __repr__(self): + return "LocalVariablePlaceholder({0!r}, {1!r})".format(self.name, self.typeHint) + +def isRefList(rng): + """ Test if the range is a list of references or a group of vectors + """ + iterCap = rng._getCapWithAttr("__getitem__") + return ( iterCap is not None ) and isinstance(iterCap, SmartTupleStub._RefList) + +def getCapturesForExprs(expressions): + captures = set() + for expr in expressions: + for a in allArgsOfOp(expr, stopAtUName=True): + if a.uname: + captures.add(a.uname) + for evT,evN in a.extDeps: + if True: ## TODO only if evT is small + captures.add(evN) + else: + captures.add("&{0}".format(evN)) + return captures + +class Next(TupleOp): + """ std::find_if + """ + __slots__ = ("rng", "predFun", "_itArg", "_predExpr", "_refList") + def __init__(self, rng, predFun): + self.rng = rng + self.predFun = predFun + self._init() + super(Next, self).__init__() + def _init(self): + if isRefList(self.rng): + refListCap = self.rng._getCapWithAttr("__getitem__") + self._itArg = adaptArg(refListCap._base) + self._predExpr = adaptArg(self.predFun(refListCap._cont[LocalVariablePlaceholder("item", "unsigned short")]), typeHint=boolType) + self._refList = True + else: + self._itArg = adaptArg(op.len(self.rng)) + self._predExpr = adaptArg(self.predFun(self.rng[LocalVariablePlaceholder("i", "unsigned short")]), typeHint=boolType) + self._refList = False + @property + def args(self): + return [ self._predExpr ] + @property + def leafDeps(self): + return chain(self.rng._get_leafDeps(), self._predExpr.leafDeps) + def isValid(self): + if self._refList: + return ( self.result != op.extMethod("std::end")(self._itArg) ) + else: + return ( self.result != op.extMethod("IndexRangeIterator")(self._itArg, self._itArg) ) + @property + def result(self): + return makeStubForType("unsigned short", self) + def __repr__(self): + if self._refList: + return "Next_RefList( {0!r}, {1!r} )".format(self._itArg, self._predExpr) + else: + return "Next_Generic( {0!r}, {1!r} )".format(self._itArg, self._predExpr) + def __eq__(self, other): + return isinstance(other, Next) and ( self.rng == other.rng ) and ( self._predExpr == other._predExpr ) and ( self._itArg == other._itArg ) and ( self._refList == other._refList ) + def _get_TTreeDrawStr(self): + if self._refList: + return "std::find_if(std::begin({arg}), std::end({arg}), [{captures}] ( unsigned short item ) {{ return {predExpr}; }} )".format( + arg=self._itArg.get_TTreeDrawStr() + , predExpr=self._predExpr.get_TTreeDrawStr() + , captures=",".join(chain(["this"], getCapturesForExprs((self._predExpr,)))) + ) + else: + return "std::find_if({itTp}(0,{N}), {itTp}({N},{N}), [{captures}] ( unsigned short i ) {{ return {predExpr}; }} )".format( + itTp="IndexRangeIterator" + , N=self._itArg.get_TTreeDrawStr() + , predExpr=self._predExpr.get_TTreeDrawStr() + , captures=",".join(chain(["this"], getCapturesForExprs((self._predExpr,)))) + ) + +class Reduce(TupleOp): + """ + reduce (std::accumulate) over an iterable + + accuFun: res, item -> nextResExpr + isAssociative: if not, items are guaranteed to be processed in order (otherwise could be distribute) + """ + __slots__ = ("rng", "start", "accuFun", "valueType", "isAssociative", "_funExpr", "_itArg", "_refList") + def __init__(self, rng, start, accuFun, valueType=None, isAssociative=False): + self.rng = rng + self.start = adaptArg(start) + self.accuFun = accuFun + self.valueType = valueType if valueType else self.start.valueType + self.isAssociative = isAssociative + self._init() + super(Reduce, self).__init__() + def _init(self): + if isRefList(self.rng): + refListCap = self.rng._getCapWithAttr("__getitem__") + self._funExpr = adaptArg(self.accuFun( + makeStubForType(self.valueType, LocalVariablePlaceholder("res", self.valueType)) + , refListCap._cont[LocalVariablePlaceholder("item", "unsigned short int")] + ), typeHint=self.valueType) + self._itArg = adaptArg(refListCap._base) + self._refList = True + else: + self._funExpr= adaptArg(self.accuFun( + makeStubForType(self.valueType, LocalVariablePlaceholder("res", self.valueType)) + , self.rng[LocalVariablePlaceholder("i", "unsigned short int")] + ), typeHint=self.valueType) + self._itArg = adaptArg(op.len(self.rng)) + self._refList = False + + @property + def args(self): + return [ self.start, self._funExpr ] + @property + def leafDeps(self): + return chain(self.rng._get_leafDeps(), self.start.leafDeps, self._funExpr.leafDeps) + @property + def result(self): + return makeStubForType(self.valueType, self) + + def __repr__(self): + if self._refList: + return "Reduce_RefList( {0!r}, {1!r}, {2!r}, valueType={3!r}, isAssociative={4} )".format(self._itArg, self.start, self._funExpr, self.valueType, self.isAssociative) + else: + return "Reduce_Generic( {0!r}, {1!r}, {2!r}, valueType={3!r}, isAssociative={4} )".format(self._itArg, self.start, self._funExpr, self.valueType, self.isAssociative) + def __eq__(self, other): + return isinstance(other, Reduce) and ( self.rng == other.rng ) and ( self.start == other.start ) and ( self._refList == other._refList ) and ( self._itArg == other._itArg ) and ( self._funExpr == other._funExpr ) and ( self.valueType == other.valueType ) and ( self.isAssociative == other.isAssociative ) + # backend + def _get_TTreeDrawStr(self): + if self._refList: + return "std::accumulate(std::begin({arg}), std::end({arg}), {valTp}({start}), [{captures}] ( {valTp} res, unsigned short item ) -> {valTp} {{ return {funExpr}; }} )".format( + valTp=self.valueType + , arg=self._itArg.get_TTreeDrawStr() + , start=self.start.get_TTreeDrawStr() + , funExpr=self._funExpr.get_TTreeDrawStr() + , captures=",".join(chain(["this"], getCapturesForExprs((self._funExpr,)))) + ) + else: # generic case (typically virtual container (e.g. ttW_lepton_*) -> isinstance(iterCap, SmartTupleStub._SmartIterable)) + return "std::accumulate({itTp}(0,{N}), {itTp}({N},{N}), {valTp}({start}), [{captures}] ( {valTp} res, unsigned short i ) -> {valTp} {{ return {funExpr}; }} )".format( + valTp=self.valueType + , itTp="IndexRangeIterator" + , N=self._itArg.get_TTreeDrawStr() + , start=self.start.get_TTreeDrawStr() + , funExpr=self._funExpr.get_TTreeDrawStr() + , captures=",".join(chain(["this"], getCapturesForExprs((self._funExpr,)))) + ) + ## what works now: op.rng_sum(tup.muons, lambda mu : mu.p4.Pt()) + op.rng_sum(tup.electrons, lambda el : el.p4.Pt()) - op.rng_sum(tup.ttW.l, lambda l : l.L.p4.Pt()) (sum of non-selected lepton PT's) + +class GetDataMember(TupleOp): + """ + Get a data member + """ + __slots__ = ("this", "name") + def __init__(self, this, name): + self.this = adaptArg(this) + self.name = name ## NOTE can only be a hardcoded string this way + super(GetDataMember, self).__init__() + @property + def args(self): + return [ self.this ] + @property + def leafDeps(self): + return self.this.leafDeps ## TODO and add something for the (possibly) new branches we ask for if splitlevel is high? + @property + def result(self): + if not self.name.startswith("_"): + try: + protoTp = self.this.result._typ + proto = protoTp() ## should *in principle* work for most ROOT objects + att = getattr(proto, self.name) + tpNm = type(att).__name__ + if protoTp.__name__.startswith("pair<") and self.name in ("first", "second"): + tpNms = tuple(tok.strip() for tok in protoTp.__name__[5:-1].split(",")) + return makeStubForType((tpNms[0] if self.name == "first" else tpNms[1]), self) + return makeStubForType(tpNm, self) + except Exception, e: + print "Problem getting type of data member {0} of {1!r}".format(self.name, self.this), e + return makeStubForType("void", self) + def __repr__(self): + return "GetDataMember({0!r}, {1!r})".format(self.this, self.name) + def __eq__(self, other): + return isinstance(other, GetDataMember) and ( self.this == other.this ) and ( self.name == other.name ) + # backends + def _get_TTreeDrawStr(self): + return "{0}.{1}".format(self.this.get_TTreeDrawStr(), self.name) + +class CallMethod(TupleOp): + """ + Call a method + """ + __slots__ = ("name", "_mp", "_args") + def __init__(self, name, args): + self.name = name ## NOTE can only be a hardcoded string this way + try: + self._mp = getattr(ROOT, name) + except: + self._mp = None + self._args = tuple(adaptArg(arg) for arg in args) + super(CallMethod, self).__init__() + @property + def args(self): + return self._args + @property + def leafDeps(self): + return chain.from_iterable( arg.leafDeps for arg in self._args ) + @property + def result(self): + retTypeN = next( tok.strip("*&") for tok in self._mp.func_doc.split() if tok != "const" ) if self._mp else "Float_t" + return makeStubForType(retTypeN, self) + def __repr__(self): + return "CallMethod({0!r}, ({1}))".format(self.name, ", ".join(repr(arg) for arg in self._args)) + def __eq__(self, other): + return isinstance(other, CallMethod) and ( self.name == other.name ) and ( len(self._args) == len(other._args) ) and all( ( sa == oa ) for sa, oa in izip(self._args, other._args)) + # backends + def _get_TTreeDrawStr(self): + return "{0}({1})".format(self.name, ", ".join(arg.get_TTreeDrawStr() for arg in self._args)) + +class CallMemberMethod(TupleOp): + #return CallMemberMethod(self._objStb.parent, self._name, tuple(args)).result + """ + Call a member method + """ + __slots__ = ("this", "name", "_mp", "_args") + def __init__(self, this, name, args): + self.this = adaptArg(this) + self.name = name ## NOTE can only be a hardcoded string this way + self._mp = getattr(this._typ, name) + self._args = tuple(adaptArg(arg) for arg in args) + super(CallMemberMethod, self).__init__() + @property + def args(self): + return chain([self.this], self._args) + @property + def leafDeps(self): + return chain( self.this.leafDeps, chain.from_iterable( arg.leafDeps for arg in self._args ) ) + @property + def result(self): + retTypeN = next( tok.strip("*&") for tok in self._mp.func_doc.split() if tok != "const" ) + return makeStubForType(retTypeN, self) + def __repr__(self): + return "CallMemberMethod({0!r}, {1!r}, ({2}))".format(self.this, self.name, ", ".join(repr(arg) for arg in self._args)) + def __eq__(self, other): + return isinstance(other, CallMemberMethod) and ( self.this == other.this ) and ( self.name == other.name ) and ( len(self._args) == len(other._args) ) and all( ( sa == oa ) for sa, oa in izip(self._args, other._args)) + # backends + def _get_TTreeDrawStr(self): + return "{0}.{1}({2})".format(self.this.get_TTreeDrawStr(), self.name, ", ".join(arg.get_TTreeDrawStr() for arg in self._args)) + +mathOpFuns_TTreeDrawStr = { + "add" : lambda *args : "( {0} )".format(" + ".join(arg.get_TTreeDrawStr() for arg in args)) + , "multiply" : lambda *args : "( {0} )".format(" * ".join(arg.get_TTreeDrawStr() for arg in args)) + , "subtract" : lambda a1,a2 : "( {0} - {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "divide" : lambda a1,a2 : "( {0} / {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + # + , "lt" : lambda a1,a2 : "( {0} < {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "le" : lambda a1,a2 : "( {0} <= {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "eq" : lambda a1,a2 : "( {0} == {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "ne" : lambda a1,a2 : "( {0} != {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "gt" : lambda a1,a2 : "( {0} > {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "ge" : lambda a1,a2 : "( {0} >= {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "and" : lambda *args : "( {0} )".format(" && ".join(a.get_TTreeDrawStr() for a in args)) + , "or" : lambda *args : "( {0} )".format(" || ".join(a.get_TTreeDrawStr() for a in args)) + , "not" : lambda a : "( ! {0} )".format(a.get_TTreeDrawStr()) + # + , "abs" : lambda arg : "std::abs( {0} )".format(arg.get_TTreeDrawStr()) + , "log" : lambda arg : "std::log( {0} )".format(arg.get_TTreeDrawStr()) + , "log10" : lambda arg : "std::log10( {0} )".format(arg.get_TTreeDrawStr()) + , "max" : lambda a1,a2 : "std::max( {0}, {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "min" : lambda a1,a2 : "std::min( {0}, {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + } + +class MathOp(TupleOp): + """ + Mathematical function N->1, e.g. sin, abs, ( lambda x, y : x*y ) + """ + __slots__ = ("outType", "op", "_args") + def __init__(self, op, *args, **kwargs): + self.outType = kwargs.pop("outType", "Double_t") + assert len(kwargs) == 0 + self.op = op + self._args = tuple(adaptArg(a, typeHint="Double_t") for a in args) + super(MathOp, self).__init__() + @property + def args(self): + return self._args + @property + def leafDeps(self): + return chain.from_iterable(arg.leafDeps for arg in self._args) + @property + def result(self): + return makeStubForType(self.outType, self) + def __repr__(self): + return "{0}({1})".format(self.op, ", ".join(repr(arg) for arg in self._args)) + def __eq__(self, other): + return isinstance(other, MathOp) and ( self.outType == other.outType ) and ( self.op == other.op ) and ( len(self._args) == len(other._args) ) and all( ( sa == oa ) for sa, oa in izip(self._args, other._args)) + # no TTreeDrawStr backend, this is an abstract base class + # convention: wrap *result*, if all parts do that, we have not too many excess parentheses + def _get_TTreeDrawStr(self): + return mathOpFuns_TTreeDrawStr[self.op](*self._args) + +## FIXME this can be done by MathOp, as things are now +# also of this type, for now ( control flow -> may be lazy, dep. on implementation ) +class SwitchOp(MathOp): ## may need another layer of indirection around... + """ + Ternary operator if cond then trueBranch else falseBranch + """ + __slots__ = () + def __init__(self, cond, trueBr, falseBr): + if isinstance(trueBr, TupleStub) and isinstance(falseBr, TupleStub): + assert trueBr._typeName == falseBr._typeName + super(SwitchOp, self).__init__("switch", adaptArg(cond, typeHint=boolType), trueBr, falseBr, outType=(trueBr._typeName if isinstance(trueBr, TupleStub) else falseBr._typeName if isinstance(falseBr, TupleStub) else "Double_t")) + # backends + def _get_TTreeDrawStr(self): + return "( {0} ? {1} : {2} )".format(*(arg.get_TTreeDrawStr() for arg in self._args)) + #return "( {0}*{1} + ( ! {0} )*{2} )".format(*(arg.get_TTreeDrawStr() for arg in self._args)) + +class Adaptor(object): + """ + Helper class: compose two single-argument callables + """ + __slots__ = ("__weakref__", "getter", "worker") + def __init__(self, getter, worker): + self.getter = getter + self.worker = worker + def __call__(self, arg): + return self.worker(self.getter(arg)) + def __repr__(self): + return "Adaptor({0!r}, {1!r})".format(self.getter, self.worker) + +class op(object): + """ + pseudo-module + """ + @staticmethod + def NOT(sth): + return MathOp("not", sth, outType=boolType).result + @staticmethod + def AND(*args): + return MathOp("and", *args, outType=boolType).result + @staticmethod + def OR(*args): + return MathOp("or", *args, outType=boolType).result + @staticmethod + def switch(test, trueBranch, falseBranch): + return SwitchOp(test, trueBranch, falseBranch).result + @staticmethod + def extMethod(name): + return MethodStub(name) ## TODO somehow take care of includes as well + @staticmethod + def extVar(typeName, name): + return ExtVar(typeName, name).result + @staticmethod + def construct(typeName, args): + return Construct(typeName, args).result + @staticmethod + def initList(typeName, elmName, elms): + return InitList(typeName, elmName, elms).result + @staticmethod + def abs(sth): + return MathOp("abs", sth, outType="Float_t").result + @staticmethod + def sign(sth): + return op.switch(sth!=0., sth/op.abs(sth), 0.) + @staticmethod + def sum(*args, **kwargs): + return MathOp("add", *args, outType=kwargs.pop("outType", "Float_t")).result + @staticmethod + def product(*args): + return MathOp("multiply", *args, outType="Float_t").result + @staticmethod + def log(sth): + return MathOp("log", sth, outType="Float_t").result + @staticmethod + def log10(sth): + return MathOp("log10", sth, outType="Float_t").result + @staticmethod + def max(a1,a2): + return MathOp("max", a1, a2, outType="Float_t").result + @staticmethod + def min(a1,a2): + return MathOp("min", a1, a2, outType="Float_t").result + @staticmethod + def in_range(low, arg, up): + return op.AND(arg > low, arg < up) + + ## range operations + @staticmethod + def len(sth): + return sth.__len__() ## __builtins__.len check it is an integer + @staticmethod + def rng_sum(rng, fun, start=makeConst(0., "Float_t")): + return Reduce(rng, start, ( lambda fn : ( lambda res, elm : res+fn(elm) ) )(fun), valueType=start._typeName, isAssociative=True).result + @staticmethod + def rng_count(rng, pred): ## specialised version of sum, for convenience + return Reduce(rng, makeConst(0, "Int_t"), ( lambda prd : ( lambda res, elm : res+op.switch(prd(elm), makeConst(1, "Int_t"), makeConst(0, "Int_t")) ) )(pred), valueType="Int_t", isAssociative=True).result + @staticmethod + def rng_product(rng, fun): + return Reduce(rng, makeConst(1., "Float_t"), ( lambda fn : ( lambda res, elm : res*fn(elm) ) )(fun), valueType="Float_t", isAssociative=True).result + @staticmethod + def rng_max(rng, fun): + return Reduce(rng, makeConst(float("-inf"), "Float_t"), ( lambda fn : ( lambda res, elm : op.max(res, fn(elm)) ) )(fun), valueType="Float_t", isAssociative=True).result + @staticmethod + def rng_min(rng, fun): + return Reduce(rng, makeConst(float("+inf"), "Float_t"), ( lambda fn : ( lambda res, elm : op.min(res, fn(elm)) ) )(fun), valueType="Float_t", isAssociative=True).result + ## + @staticmethod + def rng_max_element_by(rng, fun=lambda elm : elm): + ## stick with the convention: only work with indices referring to the object containers (not to the position in the one with selected object indices) + cont = rng._getCapWithAttr("__getitem__")._cont if isRefList(rng) else rng + return cont[Reduce(rng, makeConst(-1, "Int_t"), + ( lambda fn,lst : + ( lambda ires, elm : op.switch(fn(elm) > fn(lst[ires]), elm.i, ires) ) + )(fun, cont), + valueType="Int_t", isAssociative=True).result] ## FIXME if we want to automatically check the validity of this, probably need to cache it in the GetItem that is created here + @staticmethod + def rng_min_element_by(rng, fun=lambda elm : elm): + ## stick with the convention: only work with indices referring to the object containers (not to the position in the one with selected object indices) + cont = rng._getCapWithAttr("__getitem__")._cont if isRefList(rng) else rng + return cont[Reduce(rng, makeConst(-1, "Int_t"), + ( lambda fn,lst : + ( lambda ires, elm : op.switch(op.OR(ires < 0, fn(elm) < fn(lst[ires])), elm.i, ires) ) + )(fun, cont), + valueType="Int_t", isAssociative=True).result] + ## early-exit algorithms + @staticmethod + def rng_any(rng, fun=lambda elm : elm): + return Next(rng, fun).isValid() + @staticmethod + def rng_find(rng, pred=lambda elm : makeConst("true", boolType)): + cont = rng._getCapWithAttr("__getitem__")._cont if isRefList(rng) else rng + return cont[op.extMethod("*")(Next(rng, pred))] + + + ## Kinematics and helpers + @staticmethod + def invariant_mass(*args): + return op.extMethod("ROOT::Math::VectorUtil::InvariantMass")(*args) + @staticmethod + def invariant_mass_squared(*args): + return op.extMethod("ROOT::Math::VectorUtil::InvariantMass2")(*args) + @staticmethod + def deltaPhi(a1, a2): + return op.extMethod("Kinematics::deltaPhi")(a1, a2) + @staticmethod + def deltaR(a1, a2): + return op.extMethod("Kinematics::deltaR")(a1, a2) + @staticmethod + def signedDeltaPhi(a1, a2): + return op.extMethod("Kinematics::signedDeltaPhi")(a1, a2) + @staticmethod + def signedDeltaEta(a1, a2): + return op.extMethod("Kinematics::signedDeltaEta")(a1, a2) + +## related helpers +def makeExprAnd(listOfReqs): + """ op.AND for expressions (helper for histfactory etc.) + """ + if len(listOfReqs) > 1: + return adaptArg(op.AND(*listOfReqs)) + elif len(listOfReqs) == 1: + return listOfReqs[0] + else: + return adaptArg(makeConst("true", boolType), typeHint=boolType) +def makeExprProduct(listOfFactors): + """ op.product for expressions (helper for histfactory etc.) + """ + if len(listOfFactors) > 1: + return adaptArg(op.product(*listOfFactors)) + elif len(listOfFactors) == 1: + return listOfFactors[0] + else: + return adaptArg(makeConst(1., "float"), typeHint="float") + +def levelsAbove(prevLevels, thisLevel): + levels = [ SmartTupleStub._RefToOther(rf._name, GoUpBy(rf._refGetter.nSteps+1)) for rf in prevLevels ] + levels.append(SmartTupleStub._RefToOther(thisLevel, GoUpBy(1))) + return levels + +def addIntoHierarchy(name, smartLeaf, parent, hierarchy, capabilities=[]): + for cap in capabilities+list(copy.copy(cap) for cap in hierarchy): + smartLeaf._addCapability(cap) + parent._registerSmartLeaf(name, smartLeaf) + return smartLeaf + +def addRefListFacade(parent, name, collection): + fac = LeafFacade(name, parent) + base = getattr(parent, name) + #fac = SmartTupleStub(base._typeName, parent=parent) ## TODO may need to customise a little bit + fac._addCapability(SmartTupleStub._RefList(base, collection)) + parent._registerSmartLeaf(name, fac) + +def addWPSelGroup(name, prefix, collection, parent, hierarchy): + grp = BranchGroupStub(prefix) ## TODO __dir__ is not so happy with this, maybe need a (fairly simple) customisation of SmartTupleStub and/or BranchGroupStub + addIntoHierarchy(name, grp, parent, hierarchy) + for wp in grp._availLeafs(): + #if not wp.startswith("_"): ## TODO improve a bit on that + if wp.startswith("I") or wp.startswith("B"): + if hasattr(getattr(grp, wp), "__len__"): + addRefListFacade(grp, wp, collection) + else: + grp._addCapability(SmartTupleStub._RefToOther(wp, (lambda coll, iwp : ( lambda wpgrp : collection[wpgrp._get(iwp)] ))(collection, wp))) + return grp + +def addWPWPSelGroup(name, prefix, collectionGroup, parent, hierarchy): + ## FIXME can be removed when truth-matching gives jet index back (instead of index in selected-jets container) + grp = BranchGroupStub(prefix) ## TODO __dir__ is not so happy with this, maybe need a (fairly simple) customisation of SmartTupleStub and/or BranchGroupStub + addIntoHierarchy(name, grp, parent, hierarchy) + for wp in grp._availLeafs(): + #if not wp.startswith("_"): ## TODO improve a bit on that + if wp.startswith("I") or wp.startswith("B"): + if hasattr(getattr(grp, wp), "__len__"): + addRefListFacade(grp, wp, getattr(collectionGroup, wp)) + else: + grp._addCapability(SmartTupleStub._RefToOther(wp, (lambda collGrp, iwp : ( lambda wpgrp : getattr(collGrp, iwp)[wpgrp._get(iwp)] ))(collectionGroup, wp))) + return grp + +class LeafFacade(SmartTupleStub): + __slots__ = ("_name",) + def __init__(self, name, parent=None): + self._name = name + typeName = ( getattr(parent, name)._typeName if parent is not None else "struct" ) + super(LeafFacade, self).__init__(typeName, parent=parent) + ### main use: say we only hide one leaf. All other functionality is in the base class and works fine + def _listedLeafs(self): ## including own + lst = self._listedLeafsBelow() + lst.update(self._name) + return lst + ## FIXME this is wrong - do not want to inherit all branches from the parent + +class SmartObjectStub(ObjectStub): + """ + Imitate an object, with additional methods/properties + """ + def __init__(self, parent, typeName, caps=None, parentStub=None): + ## NOTE: inherits _typ (PyROOT type) + self._caps = caps + self._parentStub = parentStub + super(SmartObjectStub, self).__init__(parent, typeName) + def __getattr__(self, name): + for cap in self._caps: + if name in cap._dir: + cp = copy.copy(cap) + cp._parent = self + return getattr(cp, name) + + hlp = self._parentStub._getCapWithAttr(name) + if hlp: + return getattr(hlp, name) + + return self._get(name) ## fallback (which is the most common) + def __dir__(self): ## including own + return list(self._availLeafs()) + def _availLeafs(self): + avail = set() + for hlp in self._caps: + avail.update(hlp._dir) + for hlp in self._parentStub._extraCap: + avail.update(hlp._dir) + for att in dir(self._typ): + if not att.startswith("_"): + avail.add(att) + return avail + def __repr__(self): + return "SmartObjectStub({0!r}, {1!r})".format(self._parent, self._typeName) + + from collections import MutableMapping + class Map(MutableMapping): + """ + usage: instStub = myMap[typNm](parent) + """ + def __init__(self, items): + self._capDict = dict(items) + def __iter__(self): + for k in self._capDict: + yield k + def __len__(self): + return len(self._capDict) + def __setitem__(self, k, v): + self._capDict[k] = v + def __delitem__(self, k): + del self._capDict[k] + def get(self, k): ## without decoration + return self._capDict[k] + def __getitem__(self, typeName): + if typeName in self._capDict: + return ( lambda typNm, cap : + ( lambda p,pStub=None : SmartObjectStub(p, typNm, caps=cap, parentStub=pStub) ) + )(typeName, self._capDict[typeName]) + else: + raise KeyError("Type with name {} not found".format(typeName)) diff --git a/python/treefactory.py b/python/treefactory.py new file mode 100644 index 0000000..d04e423 --- /dev/null +++ b/python/treefactory.py @@ -0,0 +1,385 @@ +""" +Generate C++ to fill a skimmed tree, with helpers to run it +""" +__all__ = ("createSkimmer", "compileSkimmer", "runSkimmerOnSamples") + +import logging +logger = logging.getLogger(__name__) + +from itertools import chain + +from .factoryhelpers import getTemplate, expandTemplate, expandTemplateAndWrite, getBranchType, insideOtherDirectory + +def collectIdentifiersForSkimmer(selection, branchVars): + identifiers = set() + from .treedecorators import adaptArg + try: + identifiers.update(adaptArg(selection).leafDeps) + except Exception, e: + logger.error("Problem parsing selection for skimmer: {0}".format(str(e))) + for brNm,brVar in branchVars.iteritems(): + try: + identifiers.update(adaptArg(brVar).leafDeps) + except Exception, e: + logger.error("Problem parsing branch {0} : {1}".format(brNm, str(e))) + return identifiers + +def collectReducesFromCuts(cutsList, varList): + from .treedecorators import TupleOp, Reduce, Next + import uuid + def _addReduces(depDict, op, aboveCut): + if isinstance(op, TupleOp): + top = aboveCut + if ( isinstance(op, Reduce) or isinstance(op, Next) ): + if op not in depDict: + depDict[op] = ("r_{0}".format(str(uuid.uuid4()).replace("-", "_")), set()) + assert op.uname is None + uname, deps = depDict[op] + op.uname = uname + deps.add(top) + top = op + for ia in op.args: + _addReduces(depDict, ia, top) + else: + raise Exception("_addReduces called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + depDict = dict() + for ct in cutsList: + _addReduces(depDict, ct, ct) + for v in varList: + _addReduces(depDict, v, v) + return depDict ## { op : (uname, depCutsAndOps) } + +def clearReducesFromCuts(cutsList, varList): + from .treedecorators import TupleOp, Reduce, Next + import uuid + def _clearReduces(op): + if isinstance(op, TupleOp): + if ( isinstance(op, Reduce) or isinstance(op, Next) ): + op.uname = None + for ia in op.args: + _clearReduces(ia) + else: + raise Exception("_clearReduces called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + for expr in chain(cutsList, varList): + _clearReduces(expr) + +def collectPrecalcFromCuts(cutsList, varList): + from .treedecorators import TupleOp + def _addPrecalc(depDict, op, aboveCut): + ## the one to be used for recursion + if isinstance(op, TupleOp): + top = aboveCut + if op.uname is not None: + if op not in depDict: + depDict[op] = (op.uname, set()) + uname, deps = depDict[op] + op.uname = uname ## keep to make sure they get renamed to the same + deps.add(top) + top = op ## build a dependency tree + for ia in op.args: + _addPrecalc(depDict, ia, top) + else: + raise Exception("_addPrecalc called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + depDict = dict() + for ct in cutsList: + _addPrecalc(depDict, ct, ct) + for v in varList: + _addPrecalc(depDict, v, v) + return depDict ## { op : (uname, depPlotsAndOps) } ## only for precalc ops + +def createSkimmerInnerLoop_opt1(selCuts, branchesWithUnameList, fill_tree_lines=None): + """ + """ + ## NOTE this is a bit more limited scope than the histfactory: only need to arrange the cuts and reduces properly + from .treedecorators import adaptArg, opDependsOn, boolType + varsList = [ adaptArg(var) for nm,var,unm in branchesWithUnameList ] + reducesWithAllDeps = collectPrecalcFromCuts(selCuts, varsList) + reducesWithAllDeps.update(collectReducesFromCuts(selCuts, varsList)) + + def selDependsOnSel(s1, s2): + # does the first arg depend on the second? (i.e. is s2 a validity requirement for s1) + vd1 = set(adaptArg(d, typeHint=boolType) for ct in s1.cuts for d in ct.validDeps) + return any( c2 in vd1 for c2 in s2.cuts ) + def selDependsOnRed(s1, r2): + return any( opDependsOn(ct, r2.op) for ct in s1.cuts ) + def redDependsOnSel(r1, s2): + vd1 = set(adaptArg(d, typeHint=boolType) for d in r1.op.validDeps) + return any( c2 in vd1 for c2 in s2.cuts ) + def redDependsOnRed(r1, r2): + vd1 = set(adaptArg(d, typeHint=boolType) for d in r1.op.validDeps) + return opDependsOn(r1.op, r2.op) or any( opDependsOn(si, r2.op) for si in vd1 ) + def cut_reduce_comparator(o1, o2): + ## sel dep on red : via opDependsOn + ## red dep on sel : only if in validDeps + ## sel dep on sel : only if in validDeps of any of the reduce ops it depends on + ## red dep on dep + ## neg if o1 should come first + if isinstance(o1, SelectionNode): + if isinstance(o2, SelectionNode): + if selDependsOnSel(o2, o1): + #print "{0} depends on {1}".format(o2.ownDbgStr(), o1.ownDbgStr()) + return -1 + elif selDependsOnSel(o1, o2): + #print "{0} depends on {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 1 + else: + #print "no dependency between {0} and {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 0 + elif isinstance(o2, PrecalcNode): + if redDependsOnSel(o2, o1): + #print "{0} depends on {1}".format(o2.ownDbgStr(), o1.ownDbgStr()) + return -1 + elif selDependsOnRed(o1, o2): + #print "{0} depends on {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 1 + else: + #print "no dependency between {0} and {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 0 + else: + print "Unexpected type for second argument: ", type(o2) + elif isinstance(o1, PrecalcNode): + if isinstance(o2, SelectionNode): + if selDependsOnRed(o2, o1): + #print "{0} depends on {1}".format(o2.ownDbgStr(), o1.ownDbgStr()) + return -1 + elif redDependsOnSel(o1, o2): + #print "{0} depends on {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 1 + else: + return 0 + elif isinstance(o2, PrecalcNode): + if redDependsOnRed(o2, o1): + #print "{0} depends on {1}".format(o2.ownDbgStr(), o1.ownDbgStr()) + return -1 + elif redDependsOnRed(o1, o2): + #print "{0} depends on {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 1 + else: + #print "no dependency between {0} and {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 0 + else: + print "Unexpected type for second argument: ", type(o2) + elif isinstance(o1, RootNode): + return -1 + else: + print "Unexpected type for first argument: ", type(o1) + + def treeDependsOn(ndWithTree, otherNode): + return cut_reduce_comparator(ndWithTree, otherNode) > 0 or any( treeDependsOn(dnd, otherNode) for dnd in ndWithTree.depNodes ) + def depOnTree(myNode, otherWithTree): + return cut_reduce_comparator(myNode, otherWithTree) > 0 or any( depOnTree(myNode, dnd) for dnd in otherWithTree.depNodes ) + + from .hist_opt_2v2 import RootNode, SelectionNode, PrecalcNode, SkimmerFillBranchesNode, printTree + + skimmerNode = SkimmerFillBranchesNode(branchesWithUnameList, fill_tree_lines=fill_tree_lines) + + unsortedNodes = [ SelectionNode((c,)) for c in selCuts ] + [ PrecalcNode(red) for red in reducesWithAllDeps.iterkeys() ] + + rootNode = RootNode() + for nd in unsortedNodes: + whereAmI = rootNode + #print "Inserting ", nd.ownDbgStr() + while True: + if any(depOnTree(nd, ond) for ond in whereAmI.depNodes): + #print "Stepping to ", whereAmI.ownDbgStr() + whereAmI = next(ond for ond in whereAmI.depNodes if depOnTree(nd, ond)) + else: ## you don't depend on anything here + if depOnTree(whereAmI, nd): ## insert above + #print "Inserting above ", whereAmI.ownDbgStr() + if whereAmI.parent: + nd.parent = whereAmI.parent + nd.parent.removeDependentNode(whereAmI) + nd.parent.addDependentNode(nd) + else: + print "Inserting above the root node - you must be kidding" + whereAmI.parent = nd + nd.addDependentNode(whereAmI) + break + elif len(whereAmI.depNodes) == 1: ## special case here: go as deep as possible to stay in the same order + whereAmI = whereAmI.depNodes[0] + elif len(whereAmI.depNodes) > 1: ## sanity check + print "There's something seriously wrong: more than one dependency for the same node" + else: ## at end, insert below + #print "Inserting below ", whereAmI.ownDbgStr() + nd.parent = whereAmI + whereAmI.addDependentNode(nd) + break + #print "Tree after inserting ", nd.ownDbgStr() + #for ln in printTree(rootNode): + # print ln + + ## collapse selection nodes + ndAbove = rootNode + while len(ndAbove.depNodes) > 0: + nd = ndAbove.depNodes[0] + if isinstance(ndAbove, SelectionNode) and isinstance(nd, SelectionNode): + deps = list(nd.depNodes) + ndAbove.removeDependentNode(nd) + ndAbove.cuts = tuple(chain(ndAbove.cuts, nd.cuts)) + for ad in deps: + nd.removeDependentNode(ad) + ad.parent = ndAbove + ndAbove.addDependentNode(ad) + elif len(ndAbove.depNodes) > 1: + print "ERROR more than one dependent node" + else: + ndAbove = nd + ## stopping criterium makes this the "tip" of the tree + ndAbove.addDependentNode(skimmerNode) + skimmerNode.parent = ndAbove + + from .treedecorators import makeExprAnd + indentStr = " " ## 4 spaces + def cppLines(someNode, indent=0): + dindent = indent + closeBlock = False + if isinstance(someNode, SelectionNode): + yield (indent*indentStr)+"if ( {0} ) {{".format(makeExprAnd(someNode.ownCuts).get_TTreeDrawStr()) + dindent = indent+1 + closeBlock = True + elif isinstance(someNode, PrecalcNode): + yield (indent*indentStr)+"const auto {0} = {1};".format(someNode.op.uname, someNode.op._get_TTreeDrawStr()) + elif isinstance(someNode, SkimmerFillBranchesNode): + for nm,var,unm in someNode.branches: + yield (indent*indentStr)+ "{uname} = ( {expr} );".format(uname=unm, expr=adaptArg(var).get_TTreeDrawStr()) + if someNode.fill_tree_lines: + yield "" + for ln in someNode.fill_tree_lines: + yield (indent*indentStr)+ln + if not someNode.isTerminal: + ## first terminal nodes + for dn in someNode.depNodes: + if dn.isTerminal: + for ln in cppLines(dn, indent=dindent): + yield ln + ## then dependency tree of the others + for dn in someNode.depNodes: + if not dn.isTerminal: + for ln in cppLines(dn, indent=dindent): + yield ln + if closeBlock: + yield (indent*indentStr)+"}" + + logger.info("Generating output") + retCode = "\n".join(ln for ln in cppLines(rootNode, indent=2)) + + clearReducesFromCuts(selCuts, varsList) + + return retCode + +################################################################################ +## MAIN METHOD ## +################################################################################ + +import os, os.path + +def createSkimmer(selection, branchVars, skeleton, outdir=None, treeName="t", **kwargs): ## includes, sources as kwargs + """ + Generate C++ code from selection and variable definitions + """ + if not outdir: + outdir = os.getcwd() + logger.warning("No output directory set, writing skimmer code to current directory {0}".format(outdir)) + else: + logger.info("Writing plotter code to output directory {0}".format(outdir)) + + ## common forced helpers + add_packages = list(kwargs.get("add_packages", [])) + include_dirs = list(kwargs.get("include_dirs", [])) + includes = list(kwargs.get("includes", [])) + sources = list(kwargs.get("sources", [])) + linked_libraries = list(kwargs.get("libraries", [])) + add_properties = [] + + # add some defaults + from .factoryhelpers import COMMON_INCLUDE_DIR + include_dirs.append(os.path.join(COMMON_INCLUDE_DIR)) + includes.append("kinematics.h") + includes.append("IndexRangeIterator.h") + # add jsoncpp (from external) + from . import pathCommonTools + external_dir = os.path.join(os.path.abspath(pathCommonTools), "external") + include_dirs.append(os.path.join(external_dir, "include")) + sources.append(os.path.join(external_dir, "src", "jsoncpp.cpp")) + + if kwargs.get("addScaleFactorsLib", False): + pathCP3llbb = os.path.dirname(os.path.abspath(pathCommonTools)) + linked_libraries.append(os.path.join(pathCP3llbb, "Framework", "libBinnedValues.so")) + include_dirs.append(os.path.join(pathCP3llbb, "Framework", "interface")) + add_properties.append('set_target_properties(skimmer PROPERTIES COMPILE_FLAGS "-DSTANDALONE_SCALEFACTORS")') + + logger.info("List of requested branches: {0}".format(", ".join(nm for nm in branchVars.iterkeys()))) + logger.info("List of requested include files: {0}".format(", ".join(inc for inc in includes))) + logger.info("List of requested source files: {0}".format(", ".join(src for src in sources))) + + expandTemplateAndWrite(os.path.join(outdir, "Skimmer.h"), getTemplate("Skimmer.h") + , BRANCHES="\n ".join( + 'const {typ}& {name} = tree["{name}"].read<{typ}>();'.format(typ=getBranchType(brName, skeleton._skeleton), name=brName) + for brName in collectIdentifiersForSkimmer(selection.cut, branchVars)) + ) + + import uuid + branchesWithUnameList = list((nm, var, "b_{0}".format(str(uuid.uuid4()).replace("-", "_"))) for nm,var in branchVars.iteritems()) ## keep declarations and filling in the same order + expandTemplateAndWrite(os.path.join(outdir, "Skimmer.cc"), getTemplate("Skimmer.cc") + , INCLUDES="\n".join('#include "{0}"'.format(incl) for incl in includes) + ## initialisation + , OUTPUT_TREE_NAME=treeName + , BEFORE_LOOP="\n".join(chain( + + (" {typ}& {uname} = output_tree[\"{name}\"].write<{typ}>();".format( + typ=var._typeName, uname=unm, name=nm ## assumes stubs + ) for nm,var,unm in branchesWithUnameList) + , (" {0}".format(ln) for ln in list(kwargs.get("user_initialisation", []))) + )) + ## event loop + , GLOBAL_CUT_AND_OUTPUT_BRANCHES_FILLING=createSkimmerInnerLoop_opt1(selection.cuts, branchesWithUnameList, fill_tree_lines=("output_tree.fill();", "++selected_entries;")) + ) + + expandTemplateAndWrite(os.path.join(outdir, "CMakeLists.txt"), getTemplate("CMakeLists_skimmer.txt") + , ADD_FIND="\n".join("find_package({0})".format(fnd) for fnd in add_packages) + , ADD_INCLUDE_DIRS=" ".join(include_dirs) + , ADD_SOURCES =" ".join(sources) + , ADD_PROPERTIES="\n".join(add_properties) + , ADD_LINK="\n".join("target_link_libraries(skimmer {0})".format(lib) for lib in linked_libraries) + ) + +def prepareWorkdir(workdir): + # need much less than for plotter (output dir is passed as arg to skimmer, name generated from dataset) + if os.path.exists(workdir): + raise Exception("Work directory {0} exists already!".format(workdir)) + os.mkdir(workdir) + return workdir + +################################################################################ +## HELPER: COMPILE SKIMMER DIRECTORY ## +################################################################################ + +import subprocess + +def compileSkimmer(skimmerdir): + """ + Copy the necessary files and compile the skimmer + """ + try: + from .import pathCommonTools + subprocess.check_output(["cp", "-r", os.path.join(pathCommonTools, "cmake"), skimmerdir], stderr=subprocess.STDOUT) + subprocess.check_output(["cp", os.path.join(pathCommonTools, "templates", "generateHeader.sh"), skimmerdir], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(e.cmd, e.returncode, e.output)) + raise e + + with insideOtherDirectory(skimmerdir): + logger.info("Compiling skimmer in {0}".format(skimmerdir)) + try: + logger.debug("Calling CMake") + subprocess.check_output(["cmake", "."], stderr=subprocess.STDOUT) + logger.debug("Calling make") + subprocess.check_output(["make"], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(e.cmd, e.returncode, e.output)) + raise e + + return os.path.join(skimmerdir, "skimmer.exe") diff --git a/scripts/example_plotsHtoZA.py b/scripts/example_plotsHtoZA.py new file mode 100755 index 0000000..6cdfc85 --- /dev/null +++ b/scripts/example_plotsHtoZA.py @@ -0,0 +1,363 @@ +#!/usr/bin/env python2 +""" +Example: H->ZA tree decoration, interactive exploration based on that, and a plotter +""" +import ROOT +ROOT.PyConfig.IgnoreCommandLineOptions = True # let python do option parsing +## logging +import logging +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) +""" +za decoration + +This part would go into a zadeco.py module and used as 'from cp3_llbb.ZATools.zadeco import decoratedZATree +""" +from cp3_llbb.CommonTools.lldeco import LeptonRef +from cp3_llbb.CommonTools.treedecorators import levelsAbove, addIntoHierarchy, TreeStub, BranchGroupStub, SmartTupleStub, SmartObjectStub, LeafFacade + +import ROOT +## Load the library +ROOT.gROOT.ProcessLine('#include "cp3_llbb/ZAAnalysis/interface/Types.h"') + +hZAObjectStubMap = SmartObjectStub.Map({ + "HtoZA::Lepton" : [ + LeptonRef("L") + , SmartTupleStub._RefToOther("hlt", lambda l : l.record.hlt.object[l.hlt_idx]) + ] + , "HtoZA::Dilepton" : [ + SmartTupleStub._RefToOther("l1", lambda ll : ll.hZA.leptons[ll.ilep1]) + , SmartTupleStub._RefToOther("l2", lambda ll : ll.hZA.leptons[ll.ilep2]) + , SmartTupleStub._RefToOther("hlt1", lambda ll : ll.record.hlt.object[ll.hlt_idxs.first]) + , SmartTupleStub._RefToOther("hlt2", lambda ll : ll.record.hlt.object[ll.hlt_idxs.second]) + ] + , "HtoZA::Met" : [] + , "HtoZA::Jet" : [ + ## Framework jet ref + SmartTupleStub._RefToOther("J", lambda j : ( + getattr(j.record, "jets_{}".format(j.systVar._prefix.strip("_")))[j.idx] + if hasattr(j, "systVar") else j.record.jets[j.idx] + ) ) + ] + , "HtoZA::Dijet" : [ + ## HtoZA::Jet refs + SmartTupleStub._RefToOther("j1", lambda jj : (jj.systVar.jets[jj.ijet1] if hasattr(jj, "systVar") else jj.hZA.jets[jj.ijet1])) + , SmartTupleStub._RefToOther("j2", lambda jj : (jj.systVar.jets[jj.ijet2] if hasattr(jj, "systVar") else jj.hZA.jets[jj.ijet2])) + ## Framework jet refs + , SmartTupleStub._RefToOther("J1", lambda jj : jj.j1.J) + , SmartTupleStub._RefToOther("J2", lambda jj : jj.j2.J) + ] + , "HtoZA::MELAAngles" : [] + }) +hZAObjectStubMap["HtoZA::DileptonDijet"] = hZAObjectStubMap.get("HtoZA::Dilepton")+hZAObjectStubMap.get("HtoZA::Dijet") + +def decoratedZATree(someTree): + tree = TreeStub(someTree) + recHierarchy = levelsAbove([], "record") + # + evt = BranchGroupStub("event_") + addIntoHierarchy("event", evt, tree, recHierarchy) + evtHierarchy = levelsAbove(recHierarchy, "event") + # > event + addIntoHierarchy("pdf", BranchGroupStub("pdf_"), evt, evtHierarchy) + # < event + hlt = BranchGroupStub("hlt_") + addIntoHierarchy("hlt", hlt, tree, recHierarchy) + # > hlt + hltHierarchy = levelsAbove(recHierarchy, "hlt") + addIntoHierarchy("object", BranchGroupStub("object_"), hlt, hltHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(lambda objs : objs.record.hlt.object_p4.size()) ] ) + # < hlt + ## objects + addIntoHierarchy("muons", BranchGroupStub("muon_"), tree, recHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(lambda muons : muons.record.muon_charge.size()) ] ) + addIntoHierarchy("electrons", BranchGroupStub("electron_"), tree, recHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(lambda electrons : electrons.record.electron_charge.size()) ] ) + addIntoHierarchy("jets", BranchGroupStub("jet_"), tree, recHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(lambda jets : jets.record.jet_gen_charge.size()) ] ) + for systVar in ("jecdown", "jecup", "jerdown", "jerup"): + cNm = "jets_{}".format(systVar) + addIntoHierarchy("jets_{}".format(systVar), BranchGroupStub("jet_{}_".format(systVar)), tree, recHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(( lambda sv : ( lambda jets : getattr(jets.record, "jet_{}_gen_p4".format(sv)).size() ) )(systVar)) ]) + met = BranchGroupStub("met_") + addIntoHierarchy("met", met, tree, recHierarchy) + cand = BranchGroupStub("hZA_") + addIntoHierarchy("hZA", cand, tree, recHierarchy) + hZAHierarchy = levelsAbove(recHierarchy, "hZA") + # > hZA + gen = BranchGroupStub("gen_") + addIntoHierarchy("gen", gen, cand, hZAHierarchy) + # + for systVar in (None, "jecdown", "jecup", "jerdown", "jerup"): + if systVar: + cand_sv = BranchGroupStub("{}_".format(systVar)) + addIntoHierarchy(systVar, cand_sv, cand, hZAHierarchy) + svHierarchy = levelsAbove(hZAHierarchy, "systVar") ## used by jet refs + svJetColl = getattr(tree, "jets_{}".format(systVar)) + else: + cand_sv = cand + svHierarchy = hZAHierarchy + svJetColl = tree.jets + + addIntoHierarchy("leptons", LeafFacade("leptons", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.leptons, hZAObjectStubMap)]) + addIntoHierarchy("ll", LeafFacade("ll", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.ll, hZAObjectStubMap)]) + + addIntoHierarchy("jets", LeafFacade("jets", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.jets, hZAObjectStubMap)]) + addIntoHierarchy("jj", LeafFacade("jj", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.jj, hZAObjectStubMap)]) + + addIntoHierarchy("lljj_cmva", LeafFacade("lljj_cmva", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.lljj_cmva, hZAObjectStubMap)]) + addIntoHierarchy("lljj_deepCSV", LeafFacade("lljj_deepCSV", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.lljj_deepCSV, hZAObjectStubMap)]) + + return tree + +""" +Interactive +""" +def doInteractive(): + from cp3_llbb.CommonTools.treedecorators import adaptArg, op + + tChain = ROOT.TChain("t") + tChain.Add("/storage/data/cms/store/user/asaggio/DYToLL_2J_13TeV-amcatnloFXFX-pythia8/DYToLL_2J_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8_Summer16MiniAODv2/180219_144051/0000/output_mc_271.root") + tChain.GetEntries() ## force load + tup = decoratedZATree(tChain) + + def toDraw(sth): + return adaptArg(sth).get_TTreeDrawStr() + def leafDeps(expr): + return list(expr._parent.leafDeps) + + import IPython; IPython.embed() + +""" +Plotter demo: generate plots +""" +from cp3_llbb.CommonTools.plots import Plot, EquidistantBinning, Selection + +jetVars = { + "PT" : (lambda j : j.p4.Pt() , EquidistantBinning(50, 0., 350.), "transverse momentum", "p_{T} (GeV/c)") + , "ETA" : (lambda j : j.p4.Eta(), EquidistantBinning(50, -2.5, 2.5), "pseudorapidity" , "#eta") + ## flavour taggers + , "CSVv2" : (lambda j : j.pfCombinedInclusiveSecondaryVertexV2BJetTags, EquidistantBinning(50, 0., 1.), "CSVv2 classifier output", "CSVv2") + , "cMVAv2" : (lambda j : j.pfCombinedMVAV2BJetTags, EquidistantBinning(50, -1., 1.), "cMVAv2 classifier output", "cMVAv2") + , "deepCSV" : (lambda j : j.pfDeepCSVJetTags_probb+j.pfDeepCSVJetTags_probbb, EquidistantBinning(50, 0., 1.), "DeepCSV classifier output: probb+probbb", "probb+probbb") + } + +lepVars = { + "PT" : (lambda l : l.p4.Pt() , EquidistantBinning(50, 0., 300.), "transverse momentum", "p_{T} (GeV/c)") + , "ETA" : (lambda l : l.p4.Eta(), EquidistantBinning(50, -2.5, 2.5), "pseudorapidity" , "#eta") + } + +lepVars_split = { + "IsoEA" : (lambda l : l.relativeIsoR03_withEA, -1., EquidistantBinning(50, 0., 1.), "PFRelIsoR03 with EA", "RelIsoR03_withEA") + } + +## Get scale factors +scale_factors = set() +from cp3_llbb.CommonTools.scalefactors import get_scalefactors +from cp3_llbb.CommonTools.lldeco import LeptonScaleFactor, DiLeptonScaleFactor +sf_ele_trk = get_scalefactors("sf_ele_2016_trk" , "lepton", ("electron_2016_moriond2017", "gsf_tracking"), combine="weight") +sf_ele_IDL = get_scalefactors("sf_ele_2016_IDLtrk" , "lepton", ("electron_2016_moriond2017", "id_loose"), combine="weight") +sf_mu_trk = get_scalefactors("sf_muon_2016_trk", "lepton", ("muon_2016_80", "tracking"), combine="weight") +sf_mu_IDL = get_scalefactors("sf_muon_2016_IDLtrk", "lepton", ("muon_2016_80", "id_loose"), combine="weight") +lepton_sf_IDL = LeptonScaleFactor((sf_ele_trk, sf_ele_IDL), (sf_mu_trk, sf_mu_IDL)) +sf_elel_trig = get_scalefactors("sf_elel_2016_trig", "dilepton", "eleltrig_2016_HHMoriond17") +sf_elmu_trig = get_scalefactors("sf_elmu_2016_trig", "dilepton", "elmutrig_2016_HHMoriond17") +sf_muel_trig = get_scalefactors("sf_muel_2016_trig", "dilepton", "mueltrig_2016_HHMoriond17") +sf_mumu_trig = get_scalefactors("sf_mumu_2016_trig", "dilepton", "mumutrig_2016_HHMoriond17") +dilepton_sf_trig = DiLeptonScaleFactor(elelSF=sf_elel_trig, elmuSF=sf_elmu_trig, muelSF=sf_muel_trig, mumuSF=sf_mumu_trig) +cmva_discriVar = {"BTagDiscri":lambda j : j.pfCombinedMVAV2BJetTags} +sf_jet_btag_cmvaLoose = get_scalefactors("sf_jet_2016_btag_cmvaLoose", "jet", ("btag_2016_moriond2017", "cMVAv2_loose"), additionalVariables=cmva_discriVar) +sf_jet_btag_cmvaMedium = get_scalefactors("sf_jet_2016_btag_cmvaMedium", "jet", ("btag_2016_moriond2017", "cMVAv2_medium"), additionalVariables=cmva_discriVar) +for aSF in ( sf_ele_trk, sf_ele_IDL, sf_mu_trk, sf_mu_IDL + , sf_elel_trig, sf_elmu_trig, sf_muel_trig, sf_mumu_trig + , sf_jet_btag_cmvaLoose, sf_jet_btag_cmvaMedium + ): + scale_factors.add(aSF) +## FIXME temporary hack for ZA (no cluster eta in trees) +from cp3_llbb.CommonTools.scalefactors import BinningParameters, binningVariablesByName +for sf in scale_factors: + for bv in sf._args: + if isinstance(bv, BinningParameters): + new_bv = dict() + for bvNm in bv.binningVars.iterkeys(): + if bvNm.endswith("ClusEta"): + new_bv[bvNm] = binningVariablesByName[bvNm.replace("ClusEta", "Eta")] + for bvNm, bvFun in new_bv.iteritems(): + bv.binningVars[bvNm] = bvFun +## FIXME END + +def make1DPlot(name, variable, selection, binning, **kwargs): + """ 1D plot (allows to select categories, combined plot or both without duplication) """ + out = [] + doCombined = kwargs.pop("combined", True) + categories = kwargs.pop("categories", None) + if doCombined: + out.append(Plot.make1D(name, variable, selection, binning, **kwargs)) + if categories: + out += [ Plot.make1D("{0}_{cat}".format(name, cat=catNm), variable, Selection.addTo(selection, cut=catCut), binning, **kwargs) for catNm, catCut in categories.iteritems() ] + return out + +def makeControlPlots(record, hza): + from cp3_llbb.CommonTools.treedecorators import op, makeConst, boolType + from cp3_llbb.CommonTools.lldeco import onlyElectron, onlyMuon + + ## Define selections (this part would go into a shared python module in ZATools) + from collections import OrderedDict as odict + selDict = odict() + + evtSel_all = Selection([], [ record.event.weight, record.event.pu_weight ], None) + + llSel_loose = lambda ll : ( + ll.isOS + #, onlyElectron(lambda el : el.POGIDHLTPre, muonValue=makeConst("true", boolType))(ll.l1) + #, onlyElectron(lambda el : el.POGIDHLTPre, muonValue=makeConst("true", boolType))(ll.l2) + ) + llCand = op.rng_find(hza.ll, llSel_loose) + selDict["ll"] = Selection.addTo(evtSel_all, + weight=[ lepton_sf_IDL(llCand.l1), lepton_sf_IDL(llCand.l2), dilepton_sf_trig(llCand) ], + cut=[ ( op.rng_min(hza.ll, lambda ll : op.invariant_mass(ll.l1.L.p4, ll.l2.L.p4) ) > 12. ) + , op.rng_any(hza.ll, llSel_loose) ], + candidate=llCand) + lljjCand = op.rng_find(hza.lljj_deepCSV, lambda lljj : llSel_loose(lljj)) + selDict["lljj"] = Selection.addTo(evtSel_all, + weight=[ lepton_sf_IDL(lljjCand.l1), lepton_sf_IDL(lljjCand.l2), dilepton_sf_trig(lljjCand) ], + cut=[ ( op.rng_min(hza.ll, lambda ll : op.invariant_mass(ll.l1.L.p4, ll.l2.L.p4) ) > 12. ) + , op.rng_any(hza.lljj_deepCSV, lambda lljj : llSel_loose(lljj)) ], + candidate=lljjCand) + + from itertools import product, tee + allFlavCats = list("".join((f1,f2)) for f1,f2 in product(*tee(("El", "Mu"),2))) + flavCats_ll = dict((fc, getattr(llCand, "is{0}".format(fc))) for fc in allFlavCats) + flavCats_lljj = dict((fc, getattr(lljjCand, "is{0}".format(fc))) for fc in allFlavCats) + + from itertools import izip, count, chain + ## Define plots + plots = [] + for psName, presel in selDict.iteritems(): + cand = presel.candidate + flavCats = dict() + if psName.startswith("lljj") or psName.startswith("llbb"): + flavCats = flavCats_lljj + elif psName.startswith("ll"): + flavCats = flavCats_ll + plots += make1DPlot("{0}_llMZ".format(psName) + , op.invariant_mass(cand.l1.L.p4, cand.l2.L.p4), presel + , EquidistantBinning(50, 0., 200.), categories=flavCats + , title="Dilepton invariant mass", xTitle="Invariant Mass (GeV/c^{2})") + + plots += chain.from_iterable(make1DPlot("{0}_L{1:d}_{2}".format(psName, iL, varName) + , fun(lep.L), presel, binning + , title=title, xTitle=xTitle) + for varName, (fun, binning, title, xTitle) in lepVars.iteritems() + for iL, lep in izip(count(1), (cand.l1, cand.l2))) + + plots += chain( + chain.from_iterable(make1DPlot("{0}_L{1:d}_mu_{2}".format(psName, iL, varName) + , onlyMuon(fun, electronValue=elVal)(lep), presel, binning + , title=title, xTitle=xTitle) + for varName, (fun, elVal, binning, title, xTitle) in lepVars_split.iteritems() + for iL, lep in izip(count(1), (cand.l1, cand.l2))) + , chain.from_iterable(make1DPlot("{0}_L{1:d}_el_{2}".format(psName, iL, varName) + , onlyElectron(fun, muonValue=muVal)(lep), presel, binning + , title=title, xTitle=xTitle) + for varName, (fun, muVal, binning, title, xTitle) in lepVars_split.iteritems() + for iL, lep in izip(count(1), (cand.l1, cand.l2))) + ) + + if psName.startswith("lljj"): + plots += chain.from_iterable( + make1DPlot("{0}_J{1:d}_{2}".format(psName, iJ, varName) + , fun(jet), presel, binning + , title=title, xTitle=xTitle) + for varName, (fun, binning, title, xTitle) in jetVars.iteritems() + for iJ, jet in izip(count(1), (cand.J1, cand.J2)) + ) + + return plots + +if __name__ == "__main__": + import argparse + argparser = argparse.ArgumentParser() + argparser.add_argument("-i", "--interactive", action="store_true", help="Launch an IPython shell to inspect the tree") + argparser.add_argument("--reuseplotter", action="store_true", help="Don't write and compile the plotter(s), but reuse from a previous run") + argparser.add_argument("--reusehistos", action="store_true", help="Don't create or run the plotters") + argparser.add_argument("--skipplotit", action="store_true", help="Don't produce PDF plots") + args = argparser.parse_args() + + if args.interactive: + doInteractive() + else: ## make and run a plotter + logger.info("Loading and decorating skeleton tree") + skelF = ROOT.TFile.Open("/storage/data/cms/store/user/asaggio/DYToLL_2J_13TeV-amcatnloFXFX-pythia8/DYToLL_2J_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8_Summer16MiniAODv2/180219_144051/0000/output_mc_271.root") + tup = decoratedZATree(skelF.Get("t")) + + from itertools import chain + controlPlots = makeControlPlots(tup, tup.hZA) + controlPlots_systVariations = [] + for svNm in ("jecup", "jecdown", "jerup", "jerdown"): + plots_sv = makeControlPlots(tup, getattr(tup.hZA, svNm)) + for ap in plots_sv: + ap.name = "{0}__{1}".format(ap.name, svNm) + controlPlots_systVariations += plots_sv + + import os.path + from cp3_llbb.SAMADhi.SAMADhi import DbStore, Sample + db = DbStore() + ctrl_samples = dict((smpName, { + "tree_name" : "t" + , "sample_cut" : "1." + , "files" : [ u"/storage/data/cms{}".format(f.lfn) for f in db.find(Sample, Sample.name.like(smpName)).one().files ] + }) for smpName in [ + u"DoubleEG_Run2016H-03Feb2017-v3_v6.2.0+80X_ZAAnalysis_2018-02-16" + , u"DYToLL_2J_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8_Summer16MiniAODv2_v6.2.0+80X_ZAAnalysis_2018-02-16" + ]) + + from cp3_llbb.CommonTools.histfactory import createPlotter, compilePlotter, runPlotterOnSamples, prepareWorkdir + import os + workdir = os.path.join(os.getcwd(), "latest_zaplots") + if not args.reusehistos: + if not args.reuseplotter: + plotterdir, histosdir, plotsdir = prepareWorkdir(workdir) + + includes = [ "Math/VectorUtil.h", "ScaleFactors.h" ] + sf_init_lines = list(chain.from_iterable(sf.cpp_initCode for sf in scale_factors)) + + createPlotter(controlPlots+controlPlots_systVariations, tup, includes=includes, outdir=plotterdir, + user_initialisation=sf_init_lines, addScaleFactorsLib=True) + plotterName = compilePlotter(plotterdir) + else: + plotterdir = os.path.join(workdir, "plotter") + histosdir = os.path.join(workdir, "histos") + plotterName = os.path.join(plotterdir, "plotter.exe") + if not os.path.exists(plotterName): + raise Exception("No such file {}".format(plotterName)) + logger.info("Reusing with-MC plotter {}".format(plotterName)) + plotsdir = os.path.join(workdir, "plots") + + ## run the plotter + from cp3_llbb.CommonTools.slurmhelpers import makeSlurmTasksMonitor as makeTasksMonitor + clusMon = makeTasksMonitor() + runPlotterOnSamples(plotterName, ctrl_samples, outdir=histosdir, useCluster=True, clusterworkdir=os.path.join(workdir, "slurm_histos"), taskMon=clusMon) + clusMon.collect(wait=60) ## wait for the jobs to finish + + ## prepare plotting + from cp3_llbb.CommonTools.plotithelpers import writePlotIt + with open(os.path.join(workdir, "plots.yml"), "w") as plotsFile: + writePlotIt(controlPlots, plotsFile) + + ### Example: run plotIt in one go + ##import subprocess + ##plotIt = os.path.join(os.path.dirname(pathCommonTools), "plotIt", "plotIt") + ##if not args.skipplotit: + ## logger.info("Running plotIt") + ## try: + ## with open(os.path.join(plotsdir, "out.log"), "w") as logFile: + ## subprocess.check_call([plotIt, "-i", workdir, "-o", plotsdir, os.path.join(pathCommonTools, "plotit", "")], stdout=logFile, cwd=pathCommonTools) + ## except subprocess.CalledProcessError, e: + ## logger.error("Command {0} failed with exit code {1}\n{2}".format(" ".join(e.cmd), e.returncode, e.output)) diff --git a/scripts/nanoAODInteractive.py b/scripts/nanoAODInteractive.py new file mode 100755 index 0000000..ce03d35 --- /dev/null +++ b/scripts/nanoAODInteractive.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python2 +import ROOT +from cp3_llbb.CommonTools.nanoaoddeco import decoratedNanoAOD +from cp3_llbb.CommonTools.treedecorators import adaptArg, op + +tChain = ROOT.TChain("Events") +tChain.Add("root://xrootd-cms.infn.it//store/data/Run2016E/SingleMuon/NANOAOD/05Jan2018-v1/00000/1CC58832-C5F5-E711-BBC2-0CC47A13D3B2.root") +tChain.GetEntries() ## force load +tup = decoratedNanoAOD(tChain) + +def toDraw(sth): + return adaptArg(sth).get_TTreeDrawStr() +def leafDeps(expr): + return list(expr._parent.leafDeps) + +import IPython; IPython.embed() diff --git a/templates/CMakeLists_plotter.txt.tpl b/templates/CMakeLists_plotter.txt.tpl new file mode 100644 index 0000000..14c7a82 --- /dev/null +++ b/templates/CMakeLists_plotter.txt.tpl @@ -0,0 +1,69 @@ +cmake_minimum_required (VERSION 2.6) +project (Plotter) + +# Configure paths +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/modules") + +# Detect if we are inside a CMSSW env +include(CMSSW) + +# Ensure C++11 is available +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX0X) +if(COMPILER_SUPPORTS_CXX0X) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O2") +else() + message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") +endif() + +# Find ROOT +find_package(ROOT REQUIRED) +include_directories(${ROOT_INCLUDE_DIR}) +find_library(ROOT_TMVA_LIBRARY TMVA ${ROOT_LIBRARY_DIR} NO_DEFAULT_PATH) + +find_library(ROOT_GENVECTOR_LIB GenVector PATHS ${ROOT_LIBRARY_DIR} + NO_DEFAULT_PATH) +find_library(ROOT_TREEPLAYER_LIB TreePlayer PATHS ${ROOT_LIBRARY_DIR} + NO_DEFAULT_PATH) + +{{ADD_FIND}} + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${PROJECT_BINARY_DIR}) +include_directories({{ADD_INCLUDE_DIRS}}) + +set(PLOTTER_SOURCES + Plotter.cc + {{ADD_SOURCES}} + ) + +if(IN_CMSSW) + set_property(DIRECTORY ${PROJECT_BINARY_DIR} PROPERTY COMPILE_DEFINITIONS CP3LLBB_PLOTTER) + include(CP3Dictionaries) + list(APPEND PLOTTER_SOURCES ${DICTIONARIES_SOURCES}) + add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/classes.h COMMAND + ${PROJECT_SOURCE_DIR}/generateHeader.sh + ${PROJECT_BINARY_DIR}/classes.h + COMMENT "Generating classes.h...") + STRING(REPLACE ":" ";" CMSSW_INCLUDES $ENV{CMSSW_SEARCH_PATH}) + STRING(REPLACE ":" ";" FWLITE_INCLUDES $ENV{CMSSW_FWLITE_INCLUDE_PATH}) + include_directories(${CMSSW_INCLUDES} ${FWLITE_INCLUDES}) + +endif() + +list(APPEND PLOTTER_SOURCES classes.h) + +add_executable(plotter ${PLOTTER_SOURCES}) +set_target_properties(plotter PROPERTIES OUTPUT_NAME "plotter.exe") +{{ADD_PROPERTIES}} + +# Link libraries +target_link_libraries(plotter ${ROOT_LIBRARIES}) +target_link_libraries(plotter ${ROOT_GENVECTOR_LIB}) +target_link_libraries(plotter ${ROOT_TMVA_LIBRARY}) +target_link_libraries(plotter ${ROOT_TREEPLAYER_LIB}) +{{ADD_LINK}} + +find_library(TREEWRAPPER_LIB cp3_llbbTreeWrapper PATHS + "$ENV{CMSSW_BASE}/lib/$ENV{SCRAM_ARCH}" NO_DEFAULT_PATH) +target_link_libraries(plotter ${TREEWRAPPER_LIB}) diff --git a/templates/CMakeLists_skimmer.txt.tpl b/templates/CMakeLists_skimmer.txt.tpl new file mode 100644 index 0000000..d319483 --- /dev/null +++ b/templates/CMakeLists_skimmer.txt.tpl @@ -0,0 +1,72 @@ +cmake_minimum_required (VERSION 2.6) +project (Skimmer) + +# Configure paths +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/modules") + +# Detect if we are inside a CMSSW env +include(CMSSW) + +# Ensure C++11 is available +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX0X) +if(COMPILER_SUPPORTS_CXX0X) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O2") +else() + message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") +endif() + +# Find ROOT +find_package(ROOT REQUIRED) +include_directories(${ROOT_INCLUDE_DIR}) +find_library(ROOT_TMVA_LIBRARY TMVA ${ROOT_LIBRARY_DIR} NO_DEFAULT_PATH) + +find_library(ROOT_GENVECTOR_LIB GenVector PATHS ${ROOT_LIBRARY_DIR} + NO_DEFAULT_PATH) +find_library(ROOT_TREEPLAYER_LIB TreePlayer PATHS ${ROOT_LIBRARY_DIR} + NO_DEFAULT_PATH) + +{{ADD_FIND}} + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${PROJECT_BINARY_DIR}) +include_directories({{ADD_INCLUDE_DIRS}}) + +## TODO add EXTERNAL_DIR/include to ADD_INCLUDES and EXTERNAL_DIR/src/jsoncpp.cpp to sources + + +set(SKIMMER_SOURCES + Skimmer.cc + {{ADD_SOURCES}} + ) + +if(IN_CMSSW) + set_property(DIRECTORY ${PROJECT_BINARY_DIR} PROPERTY COMPILE_DEFINITIONS CP3LLBB_PLOTTER) + include(CP3Dictionaries) + list(APPEND SKIMMER_SOURCES ${DICTIONARIES_SOURCES}) + add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/classes.h COMMAND + ${PROJECT_SOURCE_DIR}/generateHeader.sh + ${PROJECT_BINARY_DIR}/classes.h + COMMENT "Generating classes.h...") + STRING(REPLACE ":" ";" CMSSW_INCLUDES $ENV{CMSSW_SEARCH_PATH}) + STRING(REPLACE ":" ";" FWLITE_INCLUDES $ENV{CMSSW_FWLITE_INCLUDE_PATH}) + include_directories(${CMSSW_INCLUDES} ${FWLITE_INCLUDES}) + +endif() + +list(APPEND SKIMMER_SOURCES classes.h) + +add_executable(skimmer ${SKIMMER_SOURCES}) +set_target_properties(skimmer PROPERTIES OUTPUT_NAME "skimmer.exe") +{{ADD_PROPERTIES}} + +# Link libraries +target_link_libraries(skimmer ${ROOT_LIBRARIES}) +target_link_libraries(skimmer ${ROOT_GENVECTOR_LIB}) +target_link_libraries(skimmer ${ROOT_TMVA_LIBRARY}) +target_link_libraries(skimmer ${ROOT_TREEPLAYER_LIB}) +{{ADD_LINK}} + +find_library(TREEWRAPPER_LIB cp3_llbbTreeWrapper PATHS + "$ENV{CMSSW_BASE}/lib/$ENV{SCRAM_ARCH}" NO_DEFAULT_PATH) +target_link_libraries(skimmer ${TREEWRAPPER_LIB}) diff --git a/templates/Plot.tpl b/templates/Plot.tpl new file mode 100644 index 0000000..b432856 --- /dev/null +++ b/templates/Plot.tpl @@ -0,0 +1,6 @@ + __cut = ({{CUT}}); + if (__cut) { + __weight = ({{WEIGHT}}); + fill({{HIST}}.get(), {{VAR}}, __weight); + } + diff --git a/templates/Plotter.cc.tpl b/templates/Plotter.cc.tpl new file mode 100644 index 0000000..cb0d219 --- /dev/null +++ b/templates/Plotter.cc.tpl @@ -0,0 +1,104 @@ +// vim: syntax=cpp + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +{{INCLUDES}} + +volatile bool MUST_STOP = false; + +void Plotter::plot(const std::string& output_file) { + +{{BEFORE_LOOP}} + + size_t index = 1; + while (tree.next()) { + + if (MUST_STOP) { + break; + } + + if (m_sample_cut){ + if( !m_sample_cut->EvalInstance() ) + continue; + } + + const std::string filename = raw_tree->GetFile()->GetName(); + const bool runOnElEl = filename.find("DoubleEG") != std::string::npos; + const bool runOnMuMu = filename.find("DoubleMuon") != std::string::npos; + const bool runOnMuEl = filename.find("MuonEG") != std::string::npos; + const bool runOnElMu = runOnMuEl; + const bool runOnMC = !(runOnElEl || runOnMuMu || runOnMuEl); + + if ((index - 1) % 1000 == 0) + std::cout << "Processing entry " << index << " of " << tree.getEntries() << std::endl; + + bool __cut = false; + double __weight = 0; + +{{IN_LOOP}} + + index++; + } + + std::unique_ptr outfile(TFile::Open(output_file.c_str(), "recreate")); + +{{AFTER_LOOP}} +} + +int main(int argc, char** argv) { + + gErrorIgnoreLevel = kWarning + 1; + + // very basic command-line argument parsing + // 1: name of the output file + // 2: name of the input tree + // 3: selection cut for the TChain + // 4-...: names of input files + if ( argc < 5 ) { // FIXME check if the first one is special indeed + std::cout << "Not enough arguments to do anything meaningful -> exiting" << std::endl; + std::cout << "Command line arguments should be : outFileName, inTreeName, sampleCut, inFile1 [inFile2...]" << std::endl; + } else { + std::string output_file{argv[1]};// FIXME + TChain t{argv[2]}; // FIXME + std::string cut{argv[3]}; + std::size_t n_files{0}; + for ( std::size_t idx{4}; argc != idx; ++idx ) { // FIXME + n_files += t.Add(argv[idx], 0); + } + if ( n_files != (argc-4) ) { // FIXME + std::cerr << "Error: some files failed to open" << std::endl; + return 2; + } + + // Register CTRL+C signal handler + struct sigaction sigIntHandler; + sigIntHandler.sa_handler = [](int signal) { MUST_STOP = true; }; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + sigaction(SIGINT, &sigIntHandler, NULL); + + // Set cache size to 10 MB + t.SetCacheSize(10 * 1024 * 1024); + // Learn tree structure from the first 10 entries + t.SetCacheLearnEntries(10); + + TH1::SetDefaultSumw2(kTRUE); + + Plotter p(cut, &t); + p.plot(output_file); + std::cout << "Done. Output saved as '" << output_file << "'" << std::endl; + } + return 0; +} diff --git a/templates/Plotter.h.tpl b/templates/Plotter.h.tpl new file mode 100644 index 0000000..22c5d29 --- /dev/null +++ b/templates/Plotter.h.tpl @@ -0,0 +1,110 @@ +// vim: syntax=cpp + +#pragma once + +#include +#include +#include +#include +#include + +// ROOT +#include +#include +#include +#include +#include +#include +#include + +// Generated automatically +#include + +#include + +// No other choices, as ROOT strips the 'std' namespace from types... +using namespace std; + +template +size_t Length$(const T& t) { + return t.size(); +} + +// Traits to check if a given typename is iterable + +template struct is_stl_container_like +{ + typedef typename std::remove_const::type>::type test_type; + + template static constexpr bool test( + A * pt, + A const * cpt = nullptr, + decltype(pt->begin()) * = nullptr, + decltype(pt->end()) * = nullptr, + decltype(cpt->begin()) * = nullptr, + decltype(cpt->end()) * = nullptr, + typename A::iterator * pi = nullptr, + typename A::const_iterator * pci = nullptr, + typename A::value_type * pv = nullptr) { + + typedef typename A::iterator iterator; + typedef typename A::const_iterator const_iterator; + typedef typename A::value_type value_type; + return std::is_samebegin()),iterator>::value && + std::is_sameend()),iterator>::value && + std::is_samebegin()),const_iterator>::value && + std::is_sameend()),const_iterator>::value && + std::is_same::value && + std::is_same::value; + + } + + template static constexpr bool test(...) { + return false; + } + + static const bool value = test(nullptr); +}; + +class Plotter { + public: + Plotter(const std::string& cut, TChain* ttree): + tree(ttree), m_sample_cut(nullptr), raw_tree(ttree) + { + if(cut != "" && cut != "1"){ + m_sample_cut = std::unique_ptr(new TTreeFormula("sample_cut", cut.c_str(), ttree)); + ttree->SetNotify(m_sample_cut.get()); + } + }; + + void plot(const std::string&); + + private: + + // Helper functions to fill an histogram + template::value, bool>::type = 0> void fill(TH1* h, const T& value, double weight) { + h->Fill(value, weight); + } + + template::value, bool>::type = 0> void fill(TH1* h, const T& value, double weight) { + for (const auto& v: value) + h->Fill(v, weight); + } + + // 2D + template void fill(TH2* h, const T& value_x, const U& value_y, double weight) { + h->Fill(value_x, value_y, weight); + } + + // 3D + template void fill(TH3* h, const T& value_x, const U& value_y, const V& value_z, double weight) { + h->Fill(value_x, value_y, value_z, weight); + } + + ROOT::TreeWrapper tree; + std::unique_ptr m_sample_cut; + TChain* raw_tree; + + // List of branches + {{BRANCHES}} +}; diff --git a/templates/SavePlot.tpl b/templates/SavePlot.tpl new file mode 100644 index 0000000..ae90e09 --- /dev/null +++ b/templates/SavePlot.tpl @@ -0,0 +1,3 @@ + {{UNIQUE_NAME}}->SetName("{{PLOT_NAME}}"); + {{UNIQUE_NAME}}->Write("{{PLOT_NAME}}", TObject::kOverwrite); + diff --git a/templates/Skimmer.cc.tpl b/templates/Skimmer.cc.tpl new file mode 100644 index 0000000..d3b8359 --- /dev/null +++ b/templates/Skimmer.cc.tpl @@ -0,0 +1,212 @@ +// vim: syntax=cpp + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +{{INCLUDES}} + +volatile bool MUST_STOP = false; + +void Skimmer::skim(const std::string& output_file) { + + std::unique_ptr outfile(TFile::Open(output_file.c_str(), "recreate")); + + TTree* output_tree_ = new TTree("{{OUTPUT_TREE_NAME}}", "Skimmed tree"); + ROOT::TreeWrapper output_tree(output_tree_); + +{{BEFORE_LOOP}} + + uint64_t selected_entries = 0; + uint64_t index = 1; + while (tree.next()) { + + if (MUST_STOP) { + break; + } + + if ((index - 1) % 1000 == 0) + std::cout << "Processing entry " << index << " of " << tree.getEntries() << std::endl; + index++; + + std::string filename = raw_tree->GetFile()->GetName(); + bool runOnElEl = filename.find("DoubleEG") != std::string::npos; + bool runOnMuMu = filename.find("DoubleMuon") != std::string::npos; + bool runOnMuEl = filename.find("MuonEG") != std::string::npos; + bool runOnElMu = runOnMuEl; + bool runOnMC = !(runOnElEl || runOnMuMu || runOnMuEl); + +{{GLOBAL_CUT_AND_OUTPUT_BRANCHES_FILLING}} + } + + outfile->cd(); + output_tree_->Write(); + outfile->Close(); + + if (index == 1) { + std::cout << "No entry selected" << std::endl; + } else { + std::cout << "Number of selected entries: " << selected_entries << " (" << selected_entries / (float) (index - 1) * 100 << " %)" << std::endl; + } +} + +bool parse_datasets(const std::string& json_file, std::vector& datasets) { + + Json::Value root; + { + std::ifstream f(json_file); + Json::Reader reader; + if (! reader.parse(f, root, false)) { + std::cerr << "Failed to parse '" << json_file << "'" << std::endl; + return false; + } + } + + datasets.clear(); + + // Looping over the different samples + Json::Value::Members samples_str = root.getMemberNames(); + for (size_t index = 0; index < root.size(); index++) { + + const Json::Value& sample = root[samples_str[index]]; + Dataset dataset; + + // Mandatory fields + dataset.name = samples_str[index]; + dataset.db_name = sample["db_name"].asString(); + dataset.cut = sample.get("sample_cut", "1").asString(); + dataset.tree_name = sample.get("tree_name", "t").asString(); + + //dataset.output_name + if (sample.isMember("output_name")) { + dataset.output_name = sample["output_name"].asString(); + } else { + dataset.output_name = dataset.db_name + "_skim"; + } + + // If a list of files is specified, only use those + if (sample.isMember("files")) { + Json::Value files = sample["files"]; + + for(auto it = files.begin(); it != files.end(); ++it) { + dataset.files.push_back((*it).asString()); + } + } else if (sample.isMember("path")) { + dataset.path = sample["path"].asString(); + dataset.files.push_back(dataset.path + "/*.root"); + } + + datasets.push_back(dataset); + } + + return true; +} + +int main(int argc, char** argv) { + + + try { + + TCLAP::CmdLine cmd("Create trees from trees", ' ', "0.2.0"); + + TCLAP::ValueArg datasetArg("d", "dataset", "JSON file describing the dataset to run on", true, "", "JSON file", cmd); + TCLAP::ValueArg outputArg("o", "output", "Output directory", false, "", "FOLDER", cmd); + + cmd.parse(argc, argv); + + std::vector datasets; + parse_datasets(datasetArg.getValue(), datasets); + + std::string output_dir = outputArg.getValue(); + if (output_dir.empty()) + output_dir = "."; + + // Register CTRL+C signal handler + struct sigaction sigIntHandler; + + sigIntHandler.sa_handler = [](int signal) { MUST_STOP = true; }; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + + sigaction(SIGINT, &sigIntHandler, NULL); + + // Random numbers + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> sleep_time(1 * 1000, 30 * 1000); + + auto addFileToChain = [&sleep_time, &gen](TChain* chain, const std::string& filename) -> bool { + if (! chain) + return false; + + // Try 5 times to open the file + size_t n = 5; + while (n--) { + size_t old_gErrorIgnoreLevel = gErrorIgnoreLevel; + gErrorIgnoreLevel = kError + 1; + bool success = chain->Add(filename.c_str(), 0) != 0; + gErrorIgnoreLevel = old_gErrorIgnoreLevel; + if (success) + return true; + + auto sleep_time_value = sleep_time(gen); + std::cout << "Error while opening '" << filename << "'. Sleeping for " << sleep_time_value << " ms and trying " << n << " more time." << std::endl; + + // Sleep a bit before trying to open the file again + // The sleep time is random to avoid all the jobs on the cluster + // to re-try to open the files at the same time + std::this_thread::sleep_for(std::chrono::milliseconds(sleep_time_value)); + } + + std::cerr << "Error: cannot open '" << filename << "'" << std::endl; + return false; + }; + + for (const Dataset& d: datasets) { + if (MUST_STOP) + break; + + std::cout << "Creating skim for dataset '" << d.name << "'" << std::endl; + std::unique_ptr t(new TChain(d.tree_name.c_str())); + + for (const std::string& file: d.files) { + bool success = addFileToChain(t.get(), file); + if (!success) + return 2; + } + + std::string output_file = output_dir + "/" + d.output_name + ".root"; + + ROOT::TreeWrapper wrapped_tree(t.get()); + + // Set cache size to 10 MB + t->SetCacheSize(10 * 1024 * 1024); + // Learn tree structure from the first 10 entries + t->SetCacheLearnEntries(10); + + Skimmer s(d, wrapped_tree, t.get()); + s.skim(output_file); + std::cout << "Done. Output saved as '" << output_file << "'" << std::endl; + } + + } catch (TCLAP::ArgException &e) { + std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl; + return 1; + } + + return 0; +} diff --git a/templates/Skimmer.h.tpl b/templates/Skimmer.h.tpl new file mode 100644 index 0000000..c2280f7 --- /dev/null +++ b/templates/Skimmer.h.tpl @@ -0,0 +1,56 @@ +// vim: syntax=cpp + +#pragma once + +#include +#include +#include +#include + +// ROOT +#include +#include +#include +#include +#include +#include + +// Generated automatically +#include + +#include + +// No other choices, as ROOT strips the 'std' namespace from types... +using namespace std; + +template +size_t Length$(const T& t) { + return t.size(); +} + +struct Dataset { + std::string name; + std::string db_name; + std::string output_name; + std::string tree_name; + std::string path; + std::vector files; + std::string cut; +}; + +class Skimmer { + public: + Skimmer(const Dataset& dataset, ROOT::TreeWrapper& tree, TChain* raw_tree): + m_dataset(dataset), tree(tree), raw_tree(raw_tree) {}; + virtual ~Skimmer() {}; + + void skim(const std::string&); + + private: + Dataset m_dataset; + ROOT::TreeWrapper& tree; + TChain* raw_tree; + + // List of input branches + {{BRANCHES}} +}; diff --git a/templates/generateHeader.sh b/templates/generateHeader.sh new file mode 100755 index 0000000..35b33d8 --- /dev/null +++ b/templates/generateHeader.sh @@ -0,0 +1,13 @@ +#! /bin/sh + +OUTPUT=$1 + +echo "" > $OUTPUT + +find ${CMSSW_BASE}/src/cp3_llbb -maxdepth 1 -type d | +while read dir +do + if [ -e ${dir}/src/classes.h ]; then + echo "#include <${dir}/src/classes.h>" >> $OUTPUT + fi +done