From e927a88d87e7db6d2683effbc6287b3156b15721 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Fri, 26 Jan 2018 14:44:43 +0100 Subject: [PATCH 01/14] Copy plots (basic classes) and treedecorators, as a first step --- python/plots.py | 148 ++++ python/treedecorators.py | 1412 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 1560 insertions(+) create mode 100644 python/plots.py create mode 100644 python/treedecorators.py diff --git a/python/plots.py b/python/plots.py new file mode 100644 index 0000000..55947ea --- /dev/null +++ b/python/plots.py @@ -0,0 +1,148 @@ +""" +Helper classes describing histograms, binnings etc. +""" +__all__ = ("Plot", "EquidistantBinning", "VariableBinning") + +import numpy as np +from itertools import chain + +class EquidistantBinning(object): + """ Equidistant binning + """ + __slots__ = ("__weakref__", "N", "mn", "mx") + def __init__(self, N, mn, mx): + self.N = N + self.mn = mn + self.mx = mx + @property + def minimum(self): + return self.mn + @property + def maximum(self): + return self.mx + +class VariableBinning(object): + """ Variable-sized bins + """ + __slots__ = ("__weakref__", "binEdges") + def __init__(self, binEdges): + self.binEdges = np.array(binEdges) + @property + def N(self): + return len(self.binEdges)-1 + @property + def minimum(self): + return self.binEdges[0] + @property + def maximum(self): + return self.binEdges[-1] + +from .treedecorators import adaptArg + +class Plot(object): + """ Plot representation (specifications for making a 1,2,3,...-dimensional histogram) + """ + __slots__ = ("__weakref__", "name", "variables", "selection", "binnings", "title", "axisTitles", "axisBinLabels", "plotopts") + def __init__(self, name, variables, selection, binnings, title="", axisTitles=tuple(), axisBinLabels=tuple(), plotopts=None): + self.name = name + self.variables = variables + self.selection = selection + self.binnings = binnings + self.title = title + self.axisTitles = axisTitles + self.axisBinLabels = axisBinLabels + self.plotopts = plotopts if plotopts else dict() + def clone(self, name=None, variables=None, selection=None, binnings=None, title=None, axisTitles=None, axisBinLabels=None, plotopts=None): + """ Low-level helper method: copy with optional re-setting of attributes + """ + return Plot( (name if name is not None else self.name) + , (variables if variables is not None else self.variables) + , (selection if selection is not None else self.selection) + , (binnings if binnings is not None else self.binnings) + , title=(title if title is not None else self.title) + , axisTitles=(axisTitles if axisTitles is not None else self.axisTitles) + , axisBinLabels=(axisBinLabels if axisBinLabels is not None else self.axisBinLabels) + , plotopts=(plotopts if plotopts is not None else self.plotopts) + ) + + @property + def cut(self): + return self.selection.cut + @property + def weight(self): + return self.selection.weight + + @property + def longTitle(self): + return ";".join(chain([self.title], self.axisTitles)) + + def __repr__(self): + return "Plot({0!r}, {1!r}, {2!r}, {3!r}, title={4!r}, axisTitles={5!r})".format(self.name, self.variables, self.selection, self.binnings, self.title, self.axisTitles) + + @staticmethod + def make1D(name, variable, selection, binning, **kwargs): + kwargs["axisTitles"] = (kwargs.pop("xTitle", ""),) + kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None),) + return Plot(name, (adaptArg(variable),), selection, (binning,), **kwargs) + @staticmethod + def make2D(name, variables, selection, binnings, **kwargs): + kwargs["axisTitles"] = (kwargs.pop("xTitle", ""), kwargs.pop("yTitle", "")) + kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None), kwargs.pop("yBinLabels", None)) + return Plot(name, tuple(adaptArg(v) for v in variables), selection, binnings, **kwargs) + @staticmethod + def make3D(name, variables, selection, binnings, **kwargs): + kwargs["axisTitles"] = (kwargs.pop("xTitle", ""), kwargs.pop("yTitle", ""), kwargs.pop("zTitle", "")) + kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None), kwargs.pop("yBinLabels", None), kwargs.pop("zBinLabels", None)) + return Plot(name, tuple(adaptArg(v) for v in variables), selection, binnings, **kwargs) + +class Selection(object): + """ Group of selection requirements, weight factors and a handle on the candidate (they logically belong together) + """ + __slots__ = ("__weakref__", "cuts", "weights", "candidate", "systVars") + def __init__(self, cuts, weights, candidate): + self.cuts = [ adaptArg(cut, "Bool_t") for cut in list(cuts) ] + self.weights = [ adaptArg(wgt, "Float_t") for wgt in list(weights) ] + self.candidate = candidate + self.systVars = dict() + def clone(self, cuts=None, weights=None, candidate=None): + """ Low-level helper method: copy with optional re-setting of attributes + """ + return Selection( (cuts if cuts is not None else self.cuts) + , (weights if weights is not None else self.weights) + , (candidate if candidate is not None else self.candidate) + ) + @property + def cut(self): + from .treedecorators import makeExprAnd + return makeExprAnd(self.cuts) + @property + def weight(self): + from .treedecorators import makeExprProduct + return makeExprProduct(self.weights) + def __repr__(self): + return "Selection({0!r}, {1!r}, {2!r})".format(self.cut, self.weight, self.candidate) + def __eq__(self, other): + return ( ( len(self.cuts) == len(other.cuts) ) and all( sc == oc for sc,oc in izip(self.cuts, other.cuts) ) + and ( len(self.weights) == len(other.weights) ) and all( sw == ow for sw,ow in izip(self.weights, other.weights) ) + and ( self.candidate == other.candidate ) ) + + @staticmethod + def addTo(prev, cut=None, weight=None, candidate=None): + """ + Create a new selection by adding a cut and/or weight, and possibly replacing the candidate + """ + newCuts = list(prev.cuts) + if cut is not None: + if hasattr(cut, "__len__"): + newCuts += list(adaptArg(ct, "Bool_t") for ct in cut) + else: + newCuts.append(adaptArg(cut, "Bool_t")) + newWeights = list(prev.weights) + if weight is not None: + if hasattr(weight, "__len__"): + newWeights += list(adaptArg(wgt, "Float_t") for wgt in weight) + else: + newWeights.append(adaptArg(weight, "Float_t")) + return Selection(newCuts, newWeights, candidate if candidate is not None else prev.candidate) + + ## TODO add more helpers for manipulation diff --git a/python/treedecorators.py b/python/treedecorators.py new file mode 100644 index 0000000..8a2653f --- /dev/null +++ b/python/treedecorators.py @@ -0,0 +1,1412 @@ +import ROOT +from itertools import chain, izip +import copy + +boolType = "bool" +PODTypes = set(("Float_t", "Double_t", "Int_t", "UInt_t", "Bool_t", "Char_t", "UChar_t", "ULong64_t", "int", "unsigned", "unsigned short", "char", "signed char", "unsigned char", "float", "double", "bool", "Short_t", "size_t", "std::size_t", "unsigned short")) ## there are many more (at least unsigned) +SizeType = "Int_t" +## TODO implement a "type system" - only need numeric types (boolean, integral and floating point, different precisions, math operators -> largest, comparison operators always -> bool) +## -> Python data model (emulating numeric types) +## for iterables / non-zero length things: may need new "elementary" operations (to be converted into control flow) - adapt "streaming" style and use bit-shift operations for readabilty (filter, map, reduce, zip (should not even be necessary: just stream enough info and optimise through getters later) etc. - fine as long as it is transparent enough, PURE functions start to matter here) + +################################################################################ +## INTERFACES ## +################################################################################ + +class TupleStub(object): + """ + Interface & base class for stubs + + NOTE: to avoid conflicts with branch names, subclasses should not define + new "visible" attributes (not starting with "_") + """ + __slots__ = ("__weakref__", "_typeName") + def __init__(self, typeName): + self._typeName = typeName + @property + def op(self): + raise ValueError("Can only get operation for 'complete' stubs (with a defined type)") + def _get(self, name): + raise AttributeError("No attribute called {0!r} for object {1!r}".format(name, self)) + def __getattr__(self, name): + return self._get(name) + +class TupleOp(object): + """ + Interface for operations on leafs and resulting objects / numbers + """ + __slots__ = ("__weakref__", "uname", "_explValidDeps") + def __init__(self): + self.uname = None ## cache ID for optimisations + self._explValidDeps = [] + @property + def args(self): + # return iterable over other operations + return tuple() + @property + def leafDeps(self): + # return iterable over other all used fields from the TTree + return tuple() + @property + def validDeps(self): + # requirements for this expression to be valid (recursive by construction) + lst = list(self._explValidDeps) + for a in self.args: + lst += a.validDeps + return lst + def addValidDep(self, dependency): + self._explValidDeps.append(adaptArg(dependency, typeHint=boolType)) + @property + def extDeps(self): + # return iterable of (type, name) over externally-defined variables used (no-recursive) + return tuple() + @property + def result(self): + pass + + def __hash__(self): + return hash(repr(self)) + + ## Backends TODO: there should be a way to separate this a bit... + def get_TTreeDrawStr(self): + """ + Generate string for TTree::Draw (TTreeFormula etc.) + Mostly for debugging and making sure all features are there + """ + if self.uname is not None: + return self.uname + else: + return self._get_TTreeDrawStr() + + def __eq__(self, other): + raise Exception("TupleOp has no default __eq__") ## FIXME should be something like not implemented or so + def __ne__(self, other): + return not ( self == other ) + +# helpers +def allArgsOfOp(op, stopAtUName=False): + """ Recursively yield all TupleOps this one depends on + """ + for ia in op.args: + yield ia + if ( not stopAtUName ) or ( ia.uname is None ): + for aa in allArgsOfOp(ia): + yield aa + +def opDependsOn(arg, other): + """ Is other present anywhere in the dependency tree of arg? + """ + return ( arg == other ) or ( other in arg.validDeps ) or any(ia == other for ia in allArgsOfOp(arg)) + +################################################################################ +## STUBS ## +################################################################################ + +class PODStub(TupleStub): + """ + Just a number + """ + __slots__ = ("_parent",) + def __init__(self, parent, typeName): + self._parent = parent + super(PODStub, self).__init__(typeName) + @property + def op(self): + return self._parent + def __repr__(self): + return "PODStub({0!r}, {1!r})".format(self._parent, self._typeName) + def _binaryOp(self, opName, other, outType="Double_t"): + return MathOp(opName, self, other, outType=outType).result +## operator overloads +for nm,opNm in { + "__add__" : "add" + , "__sub__" : "subtract" + , "__mul__" : "multiply" + , "__truediv__" : " divide" + , "__div__" : "divide" + }.iteritems(): + setattr(PODStub, nm, (lambda oN : (lambda self, other : self._binaryOp(oN, other)))(opNm)) +for nm in ("__lt__", "__le__", "__eq__", "__ne__", "__gt__", "__ge__"): + setattr(PODStub, nm, (lambda oN, oT : (lambda self, other : self._binaryOp(oN, other, outType=oT)))(nm.strip("_"), boolType)) + +class ObjectStub(PODStub): + """ + Imitate an object + """ + __slots__ = ("_typ",) + def __init__(self, parent, typeName): + self._typ = getattr(ROOT, typeName) ## NOTE could also use TClass machinery + super(ObjectStub, self).__init__(parent, typeName) + def _get(self, name): + if name not in dir(self._typ): + raise AttributeError("Type {0} has no member {1}".format(self._typeName, name)) + if hasattr(self._typ, name) and isinstance(getattr(self._typ, name), ROOT.MethodProxy): + return ObjectMethodStub(self, name) + else: + return GetDataMember(self, name).result + def __repr__(self): + return "ObjectStub({0!r}, {1!r})".format(self._parent, self._typeName) + + ## TODO operators as well, provided that they are supported by the underlying class - need to figure out how to do that efficiently + +class ArrayStub(TupleStub): ## different from an array result of a calculation or not? Not really, in fact... + """ + (possibly var-sized) array of anything + """ + __slots__ = ("_parent", "_length", "valueType") + def __init__(self, parent, typeName, length): + self._parent = parent + self._length = length + self.valueType = typeName + super(ArrayStub, self).__init__("[{0}]".format(typeName)) + @property + def op(self): + return self._parent + def __getitem__(self, index): + return GetItem(self, index).result + def __len__(self): + return self._length + def __repr__(self): + return "ArrayStub({0!r}, {1!r}, {2!r})".format(self._parent, self._typeName, self._length) + + +class VectorStub(ObjectStub): + """ + Vector-as-array (to be eliminated with var-array branches / generalised into object) + """ + __slots__ = ("_parent", "valueType") + def __init__(self, parent, typeName): + self._parent = parent + ##self.valueType = getattr(ROOT, typeName).value_type ## usable from 6.04.something on + self.valueType = ">".join("<".join(next(mem for mem in dir(getattr(ROOT, typeName)) if mem.startswith("_vector<") and mem.endswith("___M_range_check")).split("<")[1:]).split(">")[:-1]) + super(VectorStub, self).__init__(parent, typeName) + @property + def op(self): + return self._parent + def __getitem__(self, index): + return GetItem(self, index).result + def __len__(self): + return op.extMethod("Length$")(self) + def __repr__(self): + return "VectorStub({0!r}, {1!r})".format(self._parent, self._typeName) + +import re +vecPat = re.compile("vector\<(?P[a-zA-Z_0-9\<\>\: ]+)\>") + +def makeStubForType(typeName, parent, length=None): + if length is not None: + return ArrayStub(parent, typeName, length) + if typeName in PODTypes: + return PODStub(parent, typeName) ## maybe this one can also take the type name + else: + m = vecPat.match(typeName) + if m: + return VectorStub(parent, typeName) + else: + return ObjectStub(parent, typeName) + +def adaptArg(arg, typeHint=None): + if isinstance(arg, TupleStub): + return arg.op + elif isinstance(arg, TupleOp): + return arg + elif typeHint is not None: + return Const(typeHint, arg) + else: + raise ValueError("Should get some kind of type hint") + +def makeConst(value, typeHint): + return makeStubForType(typeHint, adaptArg(value, typeHint)) + +class MethodStub(TupleStub): + """ + Imitate a free-standing method + """ + __slots__ = ("_name",) + def __init__(self, name): + self._name = name + super(MethodStub, self).__init__("{0}(...)".format(self._name)) + def __call__(self, *args): + ## TODO maybe tihs is a good place to resolve the right overload? or do some arguments checking + return CallMethod(self._name, tuple(args)).result + def __repr__(self): + return "MethodStub({0!r})".format(self._name) + +class ObjectMethodStub(TupleStub): ## TODO data members? + """ + Imitate a member method of an object + """ + __slots__ = ("_objStb", "_name") + def __init__(self, objStb, name): + self._objStb = objStb + self._name = name + super(ObjectMethodStub, self).__init__("{0}.{1}(...)".format(objStb._typeName, self._name)) + def __call__(self, *args): + ## TODO maybe tihs is a good place to resolve the right overload? or do some arguments checking + return CallMemberMethod(self._objStb, self._name, tuple(args)).result + def __repr__(self): + return "ObjectMethodStub({0!r}, {1!r})".format(self._objStb, self._name) + +def allLeafs(branch, split=False): ## study splitlevel in tree as well + """ + Recursively collect TTree leaves (TLeaf and TBranchElement) + """ + if split: + raise NotImplementedError("Going into TBranchElement not implemented yet") + for lv in branch.GetListOfLeaves(): + yield lv + for br in branch.GetListOfBranches(): + if isinstance(br, ROOT.TBranchElement): + yield br + else: + for lv in allLeafs(br): + yield lv + +class SmartTupleStub(TupleStub): + """ + First try a fixed list of proxies (which should have a _parent attribute to refer up the tree) + """ + __slots__ = ("_parent", "_smartLeafs", "_extraCap") + ## TODO make a SmartLeafStub base class (problems with multiple inheritance, maybe, unless these can always have smart leafs as well) + def __init__(self, typeName, parent=None): + self._smartLeafs = dict() + self._extraCap = list() + self._registerParent(parent) + super(SmartTupleStub, self).__init__(typeName) + @property + def op(self): + return self._parent + + def _registerSmartLeaf(self, name, stub, extraCap=None): + stub._registerParent(self, extraCap=extraCap) + self._smartLeafs[name] = stub + + def _registerParent(self, parent, extraCap=None): + self._parent = parent + if extraCap: + for hlp in extraCap: + self._addCapability(hlp) + + def _addCapability(self, hlp): + hlp._parent = self + self._extraCap.append(hlp) + + def __getattr__(self, name): ## FIXME as all are registered, we could probably also do this with properties + # override to first check smart leafs + if name in self._smartLeafs: + return self._smartLeafs[name] + else: + att = self._getattrFromCap(name) + if att is not None: + return att + else: + return self._get(name) + + def _getCapWithAttr(self, name, exceptionIfAbsent=False): + for hlp in self._extraCap: + if name in hlp._dir: + return hlp + if exceptionIfAbsent: + raise AttributeError("Object {0!r} has no attribute with name {1!r}".format(self, name)) + + def _getattrFromCap(self, name, exceptionIfAbsent=False): + hlp = self._getCapWithAttr(name, exceptionIfAbsent=exceptionIfAbsent) + if hlp is not None: + return getattr(hlp, name) + + ## inspect + def __dir__(self): + return list(set(self._availLeafs()).difference(self._listedLeafsBelow())) + ## pass full list top-down + def _moreAvailLeafs(self): ## excluding own + if self._parent is not None: + return self._parent._availLeafs() + else: + return set() + def _availLeafs(self): ## including own + avail = self._moreAvailLeafs() + avail.update(self._smartLeafs.iterkeys()) + for hlp in self._extraCap: + avail.update(hlp._dir) + return avail + ## extra helper (split in two) + def _listedLeafsBelow(self): ## excluding own + lst = set() + for ch in self._smartLeafs.itervalues(): + lst.update(set(ch._listedLeafs())) + for dc in self._extraCap: + lst.update(dc._replLeafs()) + return lst + ## pass already-listed bottom up + def _listedLeafs(self): ## including own + lst = self._listedLeafsBelow() + availListed = set(self.__dir__()).intersection(self._moreAvailLeafs()) ## exclude those that have already been listed + ## add object members + lst.update(availListed) + return lst + + ## extra decorations + class _SmartLeafDecoration(object): + __slots__ = ("__weakref__", "_parent") + _dir = tuple() + def __init__(self, parent=None): + self._parent = parent + def _replLeafs(self): + return set() + @property + def _leafDeps(self): + return tuple() + + class _SmartIterable(_SmartLeafDecoration): + """ Approach a structure of arrays as an array of structures """ + __slots__ = ("_len",) + _dir = ("__getitem__", "__len__", "__nonzero__", "__iter__") + def __init__(self, _len, parent=None): + if str(_len) == _len: + self._len = ( lambda ilen : (lambda self : getattr(self._parent, ilen)) )(_len) ## one level up from parent + else: + self._len = _len + super(SmartTupleStub._SmartIterable, self).__init__(parent=parent) + def __getitem__(self, idx): + return SmartLeafItemStub(idx, parent=self._parent) + def __len__(self): + return self._len(self._parent) + def __iter__(self): + for i in xrange(len(self)): + yield self[i] + def __nonzero__(self): + return self.__len__() > 0 ## FIXME put what makes sense... + def _replLeafs(self): + return self[-1]._availLeafs() + @property + def _leafDeps(self): + return adaptArg(self.__len__()).leafDeps + + class _WrappedIterable(_SmartLeafDecoration): + """ Return wrapped elements of the array (attached to a LeafFacade) """ + __slots__ = ("_base", "_stubMap") + _dir = ("__getitem__", "__len__", "__nonzero__", "__iter__") + def __init__(self, base, stubMap, parent=None): + self._base = base + self._stubMap = stubMap + super(SmartTupleStub._WrappedIterable, self).__init__(parent=parent) + def _wrap(self, rawItm): + return self._stubMap[rawItm._typeName](rawItm._parent, pStub=self._parent) + def __getitem__(self, idx): + return self._wrap(self._base[idx]) + def __len__(self): + return self._base.__len__() + def __iter__(self): + for itm in self._base: + yield self_wrap(itm) + def __nonzero__(self): + return self.__len__() > 0 ## FIXME put what makes sense + def _replLeafs(self): + return self[-1]._availLeafs() + @property + def _leafDeps(self): + return self._base.leafDeps + + class _BoolTestableFromLeaf(_SmartLeafDecoration): + __slots__ = ("_bool",) + _dir = ("__nonzero__",) + def __init__(self, _bool, parent=None): + self._bool = _bool + super(SmartTupleStub._BoolTestableFromLeaf, self).__init__(parent=parent) + def __nonzero__(self): + return self._bool(self._parent) + + class _RefToOther(_SmartLeafDecoration): + __slots__ = ("_name", "_refGetter", "_repl") + def __init__(self, name, refGetter, parent=None, repl=None): + self._name = name + self._refGetter = refGetter + self._repl = tuple(repl) if repl else tuple() + super(SmartTupleStub._RefToOther, self).__init__(parent=parent) + @property + def _dir(self): + return (self._name,) + def _replLeafs(self): + return self._repl + def __getattr__(self, name): + if name == self._name: + return self._refGetter(self._parent) + else: + raise AttributeError("No attribute called {0!r} for object {1!r}".format(name, self)) + def __repr__(self): + return "SmartTupleStub._RefToOther({0!r}, {1!r}, parent={2!r})".format(self._name, self._refGetter, self._parent) + + class _RefList(_SmartLeafDecoration): + __slots__ = ("_base", "_cont") + _dir = ("__getitem__", "__iter__", "__len__", "__nonzero__") + def __init__(self, base, cont, parent=None): + self._base = base + self._cont = cont + super(SmartTupleStub._RefList, self).__init__(parent=parent) + def __getitem__(self, idx): + return self._cont[self._base[idx]] + def __iter__(self): + for i in self._base: + yield self._cont[i] + def __len__(self): + return self._base.__len__() + def __nonzero__(self): + return self.__len__() != 0 ### FIXME + @property + def _leafDeps(self): + ###print "RefList with base {0!r} and container {1!r}".format(self._base, self._cont) + return chain(self._base._parent.leafDeps, self._cont._get_leafDeps()) ## FIXME this probably won't work for MC particle refs + + ## resolve iterable-like methods dynamically (could be implemented generically using properties) + def __getitem__(self, idx): + method = self._getattrFromCap("__getitem__") + if method: + return method(idx) + else: + return NotImplemented + def __len__(self): + method = self._getattrFromCap("__len__") + if method: + return method() + else: + return NotImplemented + def __iter__(self): + for elm in self._getattFromCap("__iter__", True)(): + yield elm + ## FIXME this should probably be done differently (use filter/map/reduce/... directly) + def __nonzero__(self): + ## NOTE important as soon as __len__ is implemented (see python reference on the data model) + method = self._getattrFromCap("__nonzero__") + if method: + return method() + else: + return NotImplemented + + def _get_leafDeps(self): + return chain.from_iterable(cp._leafDeps for cp in self._extraCap) + +class TreeStub(SmartTupleStub): + """ + Tree stub + """ + __slots__ = ("_skeleton", "_lvs") + def __init__(self, skeleton): + self._skeleton = skeleton + self._lvs = dict() + self._readLeafs() ## also fill self._lvs + super(TreeStub, self).__init__("struct") + + def _readLeafs(self): + self._lvs = dict( (br.GetName(), br) for br in allLeafs(self._skeleton) ) + + def _get(self, name): + if name in self._lvs: + lv = self._lvs[name] + if not lv: ## update in case we moved somewhere else or so + print "DBG: re-reading leafs" + self._readLeafs() + lv = self._lvs[name] + assert lv + if isinstance(lv, ROOT.TLeaf): + if lv.GetLeafCount(): + lenLv = lv.GetLeafCount() + return GetArrayLeaf(lv.GetTypeName(), name, GetLeaf(lenLv.GetTypeName(), lenLv.GetName())).result + elif lv.GetTitle().endswith("]"): + tp,ln = tuple(lv.GetTitle().strip("]").split("[")) + return GetArrayLeaf(lv.GetTypeName(), name, Const(SizeType, int(ln))).result + else: + return GetLeaf(lv.GetTypeName(), name).result + else: + return GetLeaf(lv.GetClassName() if lv.GetClassName() != '' else lv.GetTypeName(), name).result + else: + raise AttributeError("Tree has no branch with name {0}".format(repr(name))) + def __repr__(self): + return "TreeStub({0!r})".format(self._skeleton) + + def _availLeafs(self): + avail = super(TreeStub, self)._availLeafs() + avail.update(set(lv for lv in self._lvs.iterkeys() if "." not in lv and not lv.endswith("_"))) ## TODO let the classes tell us... + return avail + +class BranchGroupStub(SmartTupleStub): + """ + Stub for a group of related branches (with shared prefix) + """ + __slots__ = ("_prefix", "_nStrp") + def __init__(self, prefix, nStrp=0, **kwargs): + self._prefix = prefix + self._nStrp = nStrp + super(BranchGroupStub, self).__init__("struct", **kwargs) + def _get(self, name): + return getattr(self._parent, "".join((("".join(self._prefix.split("_")[:-self._nStrp]) if self._nStrp > 0 else self._prefix), name))) ## TODO could composition of join be made optional? + def __repr__(self): + return "BranchGroupStub({0!r}, parent={1!r})".format(self._prefix, self._parent) + + def _listedLeafs(self): + return set("".join((self._prefix, ky)) for ky in super(BranchGroupStub, self)._listedLeafs()) + + def _moreAvailLeafs(self): + return set(lf[len(self._prefix):] for lf in super(BranchGroupStub, self)._moreAvailLeafs() if lf.startswith(self._prefix) and lf != self._prefix) + +class SmartLeafItemStub(SmartTupleStub): + ### FIXME maybe need to make BranchGroupStub a base class, with the current behaviour for non-indexed and + """ + Stub for an item in a group of related branches + """ + __slots__ = ("_idx",) + ## NOTE special case: always has a parent, but is never registered - maybe we should change that and use the copy trick here + def __init__(self, idx, parent=None): + self._idx = adaptArg(idx, typeHint=SizeType) + super(SmartLeafItemStub, self).__init__("struct", parent=parent) + + @property + def i(self): + return makeStubForType(SizeType, self._idx) + + class _ItemSmartLeafStub(SmartTupleStub): + __slots__ = ("_orig",) + def __init__(self, orig, parent=None): + self._orig = orig + super(SmartLeafItemStub._ItemSmartLeafStub, self).__init__("struct", parent=parent) + def __getattr__(self, name): + return getattr(self._orig, name)[self._orig._idx] ## first attempt + + def __getattr__(self, name): + if name.startswith("_ipython"): + raise AttributeError("No attribute called {0!r} for object {1!r}".format(name, self)) + # override to first check smart leafs + if name in self._parent._smartLeafs: ## unless they are refs, then jump from parent + print "Getting smart leaf {0} of {1}".format(name, self) + return SmartLeafItemStub._ItemSmartLeafStub(self._parent._smartLeafs[name], self) + else: + att = self._getattrFromCap(name) + if att is not None: + return att + hlp = self._parent._getCapWithAttr(name) + if hlp is not None: + cp = copy.copy(hlp) + cp._parent = self + if isinstance(cp, SmartTupleStub._RefToOther) and isinstance(cp._refGetter, GoUpBy): + cp._refGetter = copy.copy(cp._refGetter) + cp._refGetter.nSteps += 1 + return getattr(cp, name) + else: + return self._parent._get(name)[self._idx] + def __repr__(self): + return "SmartLeafItemStub({0!r}, parent={1!r})".format(self._idx, self._parent) + + def _moreAvailLeafs(self): ## everything from parent except list inteface + avail = self._parent._availLeafs() + avail.discard("__len__") + avail.discard("__getitem__") + ## TODO discard a few more (references that are not up and not indexed like our array, see above - needs a bit of logic, may not be possible until arriving at the final branch) + return avail + + def __eq__(self, other): + if self._parent == other._parent: + return self.i == other.i + else: + raise Exception("Only objects from the same container can be compared for equality") + def __ne__(self, other): + return not ( self == other ) + +class GoUpBy(object): + """ + Construct .., ../.. etc. + """ + __slots__ = ("__weakref__", "nSteps",) + def __init__(self, nSteps): + self.nSteps = nSteps + def __call__(self, start): + i = 0 + res = start + while i < self.nSteps: + res = res._parent + i += 1 + return res + def __repr__(self): + return "GoUpBy({0:d})".format(self.nSteps) + +################################################################################ +## OPERATIONS ## +################################################################################ + +class Const(TupleOp): + """ + Hard-coded number + """ + __slots__ = ("typeName", "value") + def __init__(self, typeName, value): + self.typeName = typeName + self.value = value + super(Const, self).__init__() + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "Const({0!r})".format(self.value) + def __eq__(self, other): + return isinstance(other, Const) and ( self.typeName == other.typeName ) and ( self.value == other.value ) + # backends + def _get_TTreeDrawStr(self): + try: + if abs(self.value) == float("inf"): + return "std::numeric_limits<{0}>::{mnmx}()".format(self.typeName, mnmx=("min" if self.value < 0. else "max")) + except: + pass + return str(self.value) ## should maybe be type-aware... + +class Construct(TupleOp): + __slots__ = ("typeName", "_args") + def __init__(self, typeName, args): + self.typeName = typeName + self._args = tuple(adaptArg(a, typeHint="Double_t") for a in args) + super(Construct, self).__init__() + @property + def args(self): + return self._args + @property + def leafDeps(self): + return chain.from_iterable( arg.leafDeps for arg in self._args ) + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "Construct({0!r}, {1})".format(self.typeName, ", ".join(repr(a) for a in self._args)) + def __eq__(self, other): + return isinstance(other, Construct) and ( self.typeName == other.typeName ) and len(self._args) == len(other._args) and all( ( aa == ab ) for aa,ab in izip(self._args, other._args) ) + def _get_TTreeDrawStr(self): + return "{0}{{{1}}}".format(self.typeName, ", ".join(a.get_TTreeDrawStr() for a in self._args)) + +class InitList(TupleOp): + __slots__ = ("typeName", "elms") + def __init__(self, typeName, elmType, elms): + self.typeName = typeName + self.elms = tuple(adaptArg(e, typeHint=elmType) for e in elms) + super(InitList, self).__init__() + @property + def args(self): + return self.elms + @property + def leafDeps(self): + return chain.from_iterable( arg.leafDeps for arg in self.elms ) + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "InitList<{0}>({1})".format(self.typeName, ", ".join(repr(elm) for elm in self.elms)) + def __eq__(self, other): + return isinstance(other, InitList) and ( self.typeName == other.typeName ) and len(self.elms) == len(other.elms) and all( ( ea == eb ) for ea, eb in izip(self.elms, other.elms) ) + def _get_TTreeDrawStr(self): + return "{{ {0} }}".format(", ".join(elm.get_TTreeDrawStr() for elm in self.elms)) + +class ExtVar(TupleOp): + """ + Externally-defined variable (used by name) + """ + __slots__ = ("typeName", "name") + def __init__(self, typeName, name): + self.typeName = typeName + self.name = name + super(ExtVar, self).__init__() + @property + def extDeps(self): + return ((self.typeName, self.name),) + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "ExtVar({0!r}, {1!r})".format(self.typeName, self.name) + def __eq__(self, other): + return isinstance(other, ExtVar) and ( self.typeName == other.typeName ) and ( self.name == other.name ) + # backends + def _get_TTreeDrawStr(self): + return self.name + +class GetLeaf(TupleOp): + """ + Get the number from a leaf + """ + __slots__ = ("typeName", "name") + def __init__(self, typeName, name): + self.typeName = typeName + self.name = name + super(GetLeaf, self).__init__() + @property + def leafDeps(self): + return self.name, + @property + def result(self): + return makeStubForType(self.typeName, self) + def __repr__(self): + return "GetLeaf({0!r}, {1!r})".format(self.typeName, self.name) + def __eq__(self, other): ## TODO maybe name is enough + return isinstance(other, GetLeaf) and ( self.typeName == other.typeName ) and ( self.name == other.name ) + # backends + def _get_TTreeDrawStr(self): + return self.name + +class GetArrayLeaf(TupleOp): + """ + Get the number from a leaf + """ + __slots__ = ("typeName", "name", "length") + def __init__(self, typeName, name, length): + self.typeName = typeName + self.name = name + self.length = length + super(GetArrayLeaf, self).__init__() + @property + def leafDeps(self): + return self.name, ## TODO add length (if applicable !!!). Maybe at this point need to allow for Const "stub" (with same operator overloads as POD stub) + @property + def result(self): + return makeStubForType(self.typeName, self, makeStubForType(SizeType, self.length)) + def __repr__(self): + return "GetArrayLeaf({0!r}, {1!r}, {2!r})".format(self.typeName, self.name, self.length) + def __eq__(self, other): ## TODO maybe name is enough + return isinstance(other, GetArrayLeaf) and ( self.typeName == other.typeName ) and ( self.name == other.name ) and ( self.length == other.length ) + # backends + def _get_TTreeDrawStr(self): + return self.name + +class GetItem(TupleOp): + """ + Get item from array (from function call or from array leaf) + """ + __slots__ = ("arg", "typeName", "index") + def __init__(self, arg, index): + self.arg = adaptArg(arg) + self.typeName = arg.valueType + self.index = adaptArg(index, typeHint=SizeType) + super(GetItem, self).__init__() + ## add validity requirement + ## TODO in the case of a ref (MC, -1 or good) or index (< len), we *could* do some checks + ## TODO for the rng_min_element_by etc. case, the best would be to add (to GetItem) a validity requirement at construction time + if isinstance(self.index, CallMethod) and self.index.name == "*" and isinstance(self.index._args[0], Next): + self.addValidDep(self.index._args[0].isValid()) + @property + def args(self): + return [ self.arg ] + ([ self.index ] if not isinstance(self.index, Const) else []) + @property + def leafDeps(self): + return chain.from_iterable(dep.leafDeps for dep in (self.arg, self.index)) + @property + def result(self): + return makeStubForType(self.typeName, self) ## TODO make sure that any parent gives back the right type name (may have to call it itemtype or so) + def __repr__(self): + return "GetItem({0!r}, {1!r})".format(self.arg, self.index) + def __eq__(self, other): ## TODO maybe typeName is not needed + return isinstance(other, GetItem) and ( self.arg == other.arg ) and ( self.typeName == other.typeName ) and ( self.index == other.index ) + # backends + def _get_TTreeDrawStr(self): + return "{0}[{1}]".format(self.arg.get_TTreeDrawStr(), self.index.get_TTreeDrawStr()) + +class LocalVariablePlaceholder(TupleOp): + """ + Placeholder type for a local variable connected to an index (first step in a specific-to-general strategy) + """ + __slots__ = ("name", "typeHint") + def __init__(self, name, typeHint): + self.name = name + self.typeHint = typeHint + super(LocalVariablePlaceholder, self).__init__() + @property + def leafDeps(self): + return tuple() + @property + def result(self): + return makeStubForType(self.typeHint, self) + def __eq__(self, other): ## TODO ?? + return isinstance(other, LocalVariablePlaceholder) and ( self.name == other.name ) and ( self.typeHint == other.typeHint ) + def _get_TTreeDrawStr(self): + return str(self.name) + def __repr__(self): + return "LocalVariablePlaceholder({0!r}, {1!r})".format(self.name, self.typeHint) + +def isRefList(rng): + """ Test if the range is a list of references or a group of vectors + """ + iterCap = rng._getCapWithAttr("__getitem__") + return ( iterCap is not None ) and isinstance(iterCap, SmartTupleStub._RefList) + +def getCapturesForExprs(expressions): + captures = set() + for expr in expressions: + for a in allArgsOfOp(expr, stopAtUName=True): + if a.uname: + captures.add(a.uname) + for evT,evN in a.extDeps: + if True: ## TODO only if evT is small + captures.add(evN) + else: + captures.add("&{0}".format(evN)) + return captures + +class Next(TupleOp): + """ std::find_if + """ + __slots__ = ("rng", "predFun", "_itArg", "_predExpr", "_refList") + def __init__(self, rng, predFun): + self.rng = rng + self.predFun = predFun + self._init() + super(Next, self).__init__() + def _init(self): + if isRefList(self.rng): + refListCap = self.rng._getCapWithAttr("__getitem__") + self._itArg = adaptArg(refListCap._base) + self._predExpr = adaptArg(self.predFun(refListCap._cont[LocalVariablePlaceholder("item", "unsigned short")]), typeHint=boolType) + self._refList = True + else: + self._itArg = adaptArg(op.len(self.rng)) + self._predExpr = adaptArg(self.predFun(self.rng[LocalVariablePlaceholder("i", "unsigned short")]), typeHint=boolType) + self._refList = False + @property + def args(self): + return [ self._predExpr ] + @property + def leafDeps(self): + return chain(self.rng._get_leafDeps(), self._predExpr.leafDeps) + def isValid(self): + if self._refList: + return ( self.result != op.extMethod("std::end")(self._itArg) ) + else: + return ( self.result != op.extMethod("IndexRangeIterator")(self._itArg, self._itArg) ) + @property + def result(self): + return makeStubForType("unsigned short", self) + def __repr__(self): + if self._refList: + return "Next_RefList( {0!r}, {1!r} )".format(self._itArg, self._predExpr) + else: + return "Next_Generic( {0!r}, {1!r} )".format(self._itArg, self._predExpr) + def __eq__(self, other): + return isinstance(other, Next) and ( self.rng == other.rng ) and ( self._predExpr == other._predExpr ) and ( self._itArg == other._itArg ) and ( self._refList == other._refList ) + def _get_TTreeDrawStr(self): + if self._refList: + return "std::find_if(std::begin({arg}), std::end({arg}), [{captures}] ( unsigned short item ) {{ return {predExpr}; }} )".format( + arg=self._itArg.get_TTreeDrawStr() + , predExpr=self._predExpr.get_TTreeDrawStr() + , captures=",".join(chain(["this"], getCapturesForExprs((self._predExpr,)))) + ) + else: + return "std::find_if({itTp}(0,{N}), {itTp}({N},{N}), [{captures}] ( unsigned short i ) {{ return {predExpr}; }} )".format( + itTp="IndexRangeIterator" + , N=self._itArg.get_TTreeDrawStr() + , predExpr=self._predExpr.get_TTreeDrawStr() + , captures=",".join(chain(["this"], getCapturesForExprs((self._predExpr,)))) + ) + +class Reduce(TupleOp): + """ + reduce (std::accumulate) over an iterable + + accuFun: res, item -> nextResExpr + isAssociative: if not, items are guaranteed to be processed in order (otherwise could be distribute) + """ + __slots__ = ("rng", "start", "accuFun", "valueType", "isAssociative", "_funExpr", "_itArg", "_refList") + def __init__(self, rng, start, accuFun, valueType=None, isAssociative=False): + self.rng = rng + self.start = adaptArg(start) + self.accuFun = accuFun + self.valueType = valueType if valueType else self.start.valueType + self.isAssociative = isAssociative + self._init() + super(Reduce, self).__init__() + def _init(self): + if isRefList(self.rng): + refListCap = self.rng._getCapWithAttr("__getitem__") + self._funExpr = adaptArg(self.accuFun( + makeStubForType(self.valueType, LocalVariablePlaceholder("res", self.valueType)) + , refListCap._cont[LocalVariablePlaceholder("item", "unsigned short int")] + ), typeHint=self.valueType) + self._itArg = adaptArg(refListCap._base) + self._refList = True + else: + self._funExpr= adaptArg(self.accuFun( + makeStubForType(self.valueType, LocalVariablePlaceholder("res", self.valueType)) + , self.rng[LocalVariablePlaceholder("i", "unsigned short int")] + ), typeHint=self.valueType) + self._itArg = adaptArg(op.len(self.rng)) + self._refList = False + + @property + def args(self): + return [ self.start, self._funExpr ] + @property + def leafDeps(self): + return chain(self.rng._get_leafDeps(), self.start.leafDeps, self._funExpr.leafDeps) + @property + def result(self): + return makeStubForType(self.valueType, self) + + def __repr__(self): + if self._refList: + return "Reduce_RefList( {0!r}, {1!r}, {2!r}, valueType={3!r}, isAssociative={4} )".format(self._itArg, self.start, self._funExpr, self.valueType, self.isAssociative) + else: + return "Reduce_Generic( {0!r}, {1!r}, {2!r}, valueType={3!r}, isAssociative={4} )".format(self._itArg, self.start, self._funExpr, self.valueType, self.isAssociative) + def __eq__(self, other): + return isinstance(other, Reduce) and ( self.rng == other.rng ) and ( self.start == other.start ) and ( self._refList == other._refList ) and ( self._itArg == other._itArg ) and ( self._funExpr == other._funExpr ) and ( self.valueType == other.valueType ) and ( self.isAssociative == other.isAssociative ) + # backend + def _get_TTreeDrawStr(self): + if self._refList: + return "std::accumulate(std::begin({arg}), std::end({arg}), {valTp}({start}), [{captures}] ( {valTp} res, unsigned short item ) -> {valTp} {{ return {funExpr}; }} )".format( + valTp=self.valueType + , arg=self._itArg.get_TTreeDrawStr() + , start=self.start.get_TTreeDrawStr() + , funExpr=self._funExpr.get_TTreeDrawStr() + , captures=",".join(chain(["this"], getCapturesForExprs((self._funExpr,)))) + ) + else: # generic case (typically virtual container (e.g. ttW_lepton_*) -> isinstance(iterCap, SmartTupleStub._SmartIterable)) + return "std::accumulate({itTp}(0,{N}), {itTp}({N},{N}), {valTp}({start}), [{captures}] ( {valTp} res, unsigned short i ) -> {valTp} {{ return {funExpr}; }} )".format( + valTp=self.valueType + , itTp="IndexRangeIterator" + , N=self._itArg.get_TTreeDrawStr() + , start=self.start.get_TTreeDrawStr() + , funExpr=self._funExpr.get_TTreeDrawStr() + , captures=",".join(chain(["this"], getCapturesForExprs((self._funExpr,)))) + ) + ## what works now: op.rng_sum(tup.muons, lambda mu : mu.p4.Pt()) + op.rng_sum(tup.electrons, lambda el : el.p4.Pt()) - op.rng_sum(tup.ttW.l, lambda l : l.L.p4.Pt()) (sum of non-selected lepton PT's) + +class GetDataMember(TupleOp): + """ + Get a data member + """ + __slots__ = ("this", "name") + def __init__(self, this, name): + self.this = adaptArg(this) + self.name = name ## NOTE can only be a hardcoded string this way + super(GetDataMember, self).__init__() + @property + def args(self): + return [ self.this ] + @property + def leafDeps(self): + return self.this.leafDeps ## TODO and add something for the (possibly) new branches we ask for if splitlevel is high? + @property + def result(self): + if not self.name.startswith("_"): + try: + protoTp = self.this.result._typ + proto = protoTp() ## should *in principle* work for most ROOT objects + att = getattr(proto, self.name) + tpNm = type(att).__name__ + if protoTp.__name__.startswith("pair<") and self.name in ("first", "second"): + tpNms = tuple(tok.strip() for tok in protoTp.__name__[5:-1].split(",")) + return makeStubForType((tpNms[0] if self.name == "first" else tpNms[1]), self) + return makeStubForType(tpNm, self) + except Exception, e: + print "Problem getting type of data member {0} of {1!r}".format(self.name, self.this), e + return makeStubForType("void", self) + def __repr__(self): + return "GetDataMember({0!r}, {1!r})".format(self.this, self.name) + def __eq__(self, other): + return isinstance(other, GetDataMember) and ( self.this == other.this ) and ( self.name == other.name ) + # backends + def _get_TTreeDrawStr(self): + return "{0}.{1}".format(self.this.get_TTreeDrawStr(), self.name) + +class CallMethod(TupleOp): + """ + Call a method + """ + __slots__ = ("name", "_mp", "_args") + def __init__(self, name, args): + self.name = name ## NOTE can only be a hardcoded string this way + try: + self._mp = getattr(ROOT, name) + except: + self._mp = None + self._args = tuple(adaptArg(arg) for arg in args) + super(CallMethod, self).__init__() + @property + def args(self): + return self._args + @property + def leafDeps(self): + return chain.from_iterable( arg.leafDeps for arg in self._args ) + @property + def result(self): + retTypeN = next( tok.strip("*&") for tok in self._mp.func_doc.split() if tok != "const" ) if self._mp else "Float_t" + return makeStubForType(retTypeN, self) + def __repr__(self): + return "CallMethod({0!r}, ({1}))".format(self.name, ", ".join(repr(arg) for arg in self._args)) + def __eq__(self, other): + return isinstance(other, CallMethod) and ( self.name == other.name ) and ( len(self._args) == len(other._args) ) and all( ( sa == oa ) for sa, oa in izip(self._args, other._args)) + # backends + def _get_TTreeDrawStr(self): + return "{0}({1})".format(self.name, ", ".join(arg.get_TTreeDrawStr() for arg in self._args)) + +class CallMemberMethod(TupleOp): + #return CallMemberMethod(self._objStb.parent, self._name, tuple(args)).result + """ + Call a member method + """ + __slots__ = ("this", "name", "_mp", "_args") + def __init__(self, this, name, args): + self.this = adaptArg(this) + self.name = name ## NOTE can only be a hardcoded string this way + self._mp = getattr(this._typ, name) + self._args = tuple(adaptArg(arg) for arg in args) + super(CallMemberMethod, self).__init__() + @property + def args(self): + return chain([self.this], self._args) + @property + def leafDeps(self): + return chain( self.this.leafDeps, chain.from_iterable( arg.leafDeps for arg in self._args ) ) + @property + def result(self): + retTypeN = next( tok.strip("*&") for tok in self._mp.func_doc.split() if tok != "const" ) + return makeStubForType(retTypeN, self) + def __repr__(self): + return "CallMemberMethod({0!r}, {1!r}, ({2}))".format(self.this, self.name, ", ".join(repr(arg) for arg in self._args)) + def __eq__(self, other): + return isinstance(other, CallMemberMethod) and ( self.this == other.this ) and ( self.name == other.name ) and ( len(self._args) == len(other._args) ) and all( ( sa == oa ) for sa, oa in izip(self._args, other._args)) + # backends + def _get_TTreeDrawStr(self): + return "{0}.{1}({2})".format(self.this.get_TTreeDrawStr(), self.name, ", ".join(arg.get_TTreeDrawStr() for arg in self._args)) + +mathOpFuns_TTreeDrawStr = { + "add" : lambda *args : "( {0} )".format(" + ".join(arg.get_TTreeDrawStr() for arg in args)) + , "multiply" : lambda *args : "( {0} )".format(" * ".join(arg.get_TTreeDrawStr() for arg in args)) + , "subtract" : lambda a1,a2 : "( {0} - {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "divide" : lambda a1,a2 : "( {0} / {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + # + , "lt" : lambda a1,a2 : "( {0} < {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "le" : lambda a1,a2 : "( {0} <= {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "eq" : lambda a1,a2 : "( {0} == {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "ne" : lambda a1,a2 : "( {0} != {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "gt" : lambda a1,a2 : "( {0} > {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "ge" : lambda a1,a2 : "( {0} >= {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "and" : lambda *args : "( {0} )".format(" && ".join(a.get_TTreeDrawStr() for a in args)) + , "or" : lambda *args : "( {0} )".format(" || ".join(a.get_TTreeDrawStr() for a in args)) + , "not" : lambda a : "( ! {0} )".format(a.get_TTreeDrawStr()) + # + , "abs" : lambda arg : "std::abs( {0} )".format(arg.get_TTreeDrawStr()) + , "log" : lambda arg : "std::log( {0} )".format(arg.get_TTreeDrawStr()) + , "log10" : lambda arg : "std::log10( {0} )".format(arg.get_TTreeDrawStr()) + , "max" : lambda a1,a2 : "std::max( {0}, {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + , "min" : lambda a1,a2 : "std::min( {0}, {1} )".format(a1.get_TTreeDrawStr(), a2.get_TTreeDrawStr()) + } + +class MathOp(TupleOp): + """ + Mathematical function N->1, e.g. sin, abs, ( lambda x, y : x*y ) + """ + __slots__ = ("outType", "op", "_args") + def __init__(self, op, *args, **kwargs): + self.outType = kwargs.pop("outType", "Double_t") + assert len(kwargs) == 0 + self.op = op + self._args = tuple(adaptArg(a, typeHint="Double_t") for a in args) + super(MathOp, self).__init__() + @property + def args(self): + return self._args + @property + def leafDeps(self): + return chain.from_iterable(arg.leafDeps for arg in self._args) + @property + def result(self): + return makeStubForType(self.outType, self) + def __repr__(self): + return "{0}({1})".format(self.op, ", ".join(repr(arg) for arg in self._args)) + def __eq__(self, other): + return isinstance(other, MathOp) and ( self.outType == other.outType ) and ( self.op == other.op ) and ( len(self._args) == len(other._args) ) and all( ( sa == oa ) for sa, oa in izip(self._args, other._args)) + # no TTreeDrawStr backend, this is an abstract base class + # convention: wrap *result*, if all parts do that, we have not too many excess parentheses + def _get_TTreeDrawStr(self): + return mathOpFuns_TTreeDrawStr[self.op](*self._args) + +## FIXME this can be done by MathOp, as things are now +# also of this type, for now ( control flow -> may be lazy, dep. on implementation ) +class SwitchOp(MathOp): ## may need another layer of indirection around... + """ + Ternary operator if cond then trueBranch else falseBranch + """ + __slots__ = () + def __init__(self, cond, trueBr, falseBr): + if isinstance(trueBr, TupleStub) and isinstance(falseBr, TupleStub): + assert trueBr._typeName == falseBr._typeName + super(SwitchOp, self).__init__("switch", adaptArg(cond, typeHint=boolType), trueBr, falseBr, outType=(trueBr._typeName if isinstance(trueBr, TupleStub) else falseBr._typeName if isinstance(falseBr, TupleStub) else "Double_t")) + # backends + def _get_TTreeDrawStr(self): + return "( {0} ? {1} : {2} )".format(*(arg.get_TTreeDrawStr() for arg in self._args)) + #return "( {0}*{1} + ( ! {0} )*{2} )".format(*(arg.get_TTreeDrawStr() for arg in self._args)) + +class Adaptor(object): + """ + Helper class: compose two single-argument callables + """ + __slots__ = ("__weakref__", "getter", "worker") + def __init__(self, getter, worker): + self.getter = getter + self.worker = worker + def __call__(self, arg): + return self.worker(self.getter(arg)) + def __repr__(self): + return "Adaptor({0!r}, {1!r})".format(self.getter, self.worker) + +class op(object): + """ + pseudo-module + """ + @staticmethod + def NOT(sth): + return MathOp("not", sth, outType=boolType).result + @staticmethod + def AND(*args): + return MathOp("and", *args, outType=boolType).result + @staticmethod + def OR(*args): + return MathOp("or", *args, outType=boolType).result + @staticmethod + def switch(test, trueBranch, falseBranch): + return SwitchOp(test, trueBranch, falseBranch).result + @staticmethod + def extMethod(name): + return MethodStub(name) ## TODO somehow take care of includes as well + @staticmethod + def extVar(typeName, name): + return ExtVar(typeName, name).result + @staticmethod + def construct(typeName, args): + return Construct(typeName, args).result + @staticmethod + def initList(typeName, elmName, elms): + return InitList(typeName, elmName, elms).result + @staticmethod + def abs(sth): + return MathOp("abs", sth, outType="Float_t").result + @staticmethod + def sign(sth): + return op.switch(sth!=0., sth/op.abs(sth), 0.) + @staticmethod + def sum(*args, **kwargs): + return MathOp("add", *args, outType=kwargs.pop("outType", "Float_t")).result + @staticmethod + def product(*args): + return MathOp("multiply", *args, outType="Float_t").result + @staticmethod + def log(sth): + return MathOp("log", sth, outType="Float_t").result + @staticmethod + def log10(sth): + return MathOp("log10", sth, outType="Float_t").result + @staticmethod + def max(a1,a2): + return MathOp("max", a1, a2, outType="Float_t").result + @staticmethod + def min(a1,a2): + return MathOp("min", a1, a2, outType="Float_t").result + @staticmethod + def in_range(low, arg, up): + return op.AND(arg > low, arg < up) + + ## range operations + @staticmethod + def len(sth): + return sth.__len__() ## __builtins__.len check it is an integer + @staticmethod + def rng_sum(rng, fun, start=makeConst(0., "Float_t")): + return Reduce(rng, start, ( lambda fn : ( lambda res, elm : res+fn(elm) ) )(fun), valueType=start._typeName, isAssociative=True).result + @staticmethod + def rng_count(rng, pred): ## specialised version of sum, for convenience + return Reduce(rng, makeConst(0, "Int_t"), ( lambda prd : ( lambda res, elm : res+op.switch(prd(elm), makeConst(1, "Int_t"), makeConst(0, "Int_t")) ) )(pred), valueType="Int_t", isAssociative=True).result + @staticmethod + def rng_product(rng, fun): + return Reduce(rng, makeConst(1., "Float_t"), ( lambda fn : ( lambda res, elm : res*fn(elm) ) )(fun), valueType="Float_t", isAssociative=True).result + @staticmethod + def rng_max(rng, fun): + return Reduce(rng, makeConst(float("-inf"), "Float_t"), ( lambda fn : ( lambda res, elm : op.max(res, fn(elm)) ) )(fun), valueType="Float_t", isAssociative=True).result + @staticmethod + def rng_min(rng, fun): + return Reduce(rng, makeConst(float("+inf"), "Float_t"), ( lambda fn : ( lambda res, elm : op.min(res, fn(elm)) ) )(fun), valueType="Float_t", isAssociative=True).result + ## + @staticmethod + def rng_max_element_by(rng, fun=lambda elm : elm): + ## stick with the convention: only work with indices referring to the object containers (not to the position in the one with selected object indices) + cont = rng._getCapWithAttr("__getitem__")._cont if isRefList(rng) else rng + return cont[Reduce(rng, makeConst(-1, "Int_t"), + ( lambda fn,lst : + ( lambda ires, elm : op.switch(fn(elm) > fn(lst[ires]), elm.i, ires) ) + )(fun, cont), + valueType="Int_t", isAssociative=True).result] ## FIXME if we want to automatically check the validity of this, probably need to cache it in the GetItem that is created here + @staticmethod + def rng_min_element_by(rng, fun=lambda elm : elm): + ## stick with the convention: only work with indices referring to the object containers (not to the position in the one with selected object indices) + cont = rng._getCapWithAttr("__getitem__")._cont if isRefList(rng) else rng + return cont[Reduce(rng, makeConst(-1, "Int_t"), + ( lambda fn,lst : + ( lambda ires, elm : op.switch(op.OR(ires < 0, fn(elm) < fn(lst[ires])), elm.i, ires) ) + )(fun, cont), + valueType="Int_t", isAssociative=True).result] + ## early-exit algorithms + @staticmethod + def rng_any(rng, fun=lambda elm : elm): + return Next(rng, fun).isValid() + @staticmethod + def rng_find(rng, pred=lambda elm : makeConst("true", boolType)): + cont = rng._getCapWithAttr("__getitem__")._cont if isRefList(rng) else rng + return cont[op.extMethod("*")(Next(rng, pred))] + + + ## Kinematics and helpers + @staticmethod + def invariant_mass(*args): + return op.extMethod("ROOT::Math::VectorUtil::InvariantMass")(*args) + @staticmethod + def invariant_mass_squared(*args): + return op.extMethod("ROOT::Math::VectorUtil::InvariantMass2")(*args) + @staticmethod + def deltaPhi(a1, a2): + return op.extMethod("Kinematics::deltaPhi")(a1, a2) + @staticmethod + def deltaR(a1, a2): + return op.extMethod("Kinematics::deltaR")(a1, a2) + @staticmethod + def signedDeltaPhi(a1, a2): + return op.extMethod("Kinematics::signedDeltaPhi")(a1, a2) + @staticmethod + def signedDeltaEta(a1, a2): + return op.extMethod("Kinematics::signedDeltaEta")(a1, a2) + +## related helpers +def makeExprAnd(listOfReqs): + """ op.AND for expressions (helper for histfactory etc.) + """ + if len(listOfReqs) > 1: + return adaptArg(op.AND(*listOfReqs)) + elif len(listOfReqs) == 1: + return listOfReqs[0] + else: + return adaptArg(makeConst("true", boolType), typeHint=boolType) +def makeExprProduct(listOfFactors): + """ op.product for expressions (helper for histfactory etc.) + """ + if len(listOfFactors) > 1: + return adaptArg(op.product(*listOfFactors)) + elif len(listOfFactors) == 1: + return listOfFactors[0] + else: + return adaptArg(makeConst(1., "float"), typeHint="float") + +def levelsAbove(prevLevels, thisLevel): + levels = [ SmartTupleStub._RefToOther(rf._name, GoUpBy(rf._refGetter.nSteps+1)) for rf in prevLevels ] + levels.append(SmartTupleStub._RefToOther(thisLevel, GoUpBy(1))) + return levels + +def addIntoHierarchy(name, smartLeaf, parent, hierarchy, capabilities=[]): + for cap in capabilities+list(copy.copy(cap) for cap in hierarchy): + smartLeaf._addCapability(cap) + parent._registerSmartLeaf(name, smartLeaf) + return smartLeaf + +def addRefListFacade(parent, name, collection): + fac = LeafFacade(name, parent) + base = getattr(parent, name) + #fac = SmartTupleStub(base._typeName, parent=parent) ## TODO may need to customise a little bit + fac._addCapability(SmartTupleStub._RefList(base, collection)) + parent._registerSmartLeaf(name, fac) + +def addWPSelGroup(name, prefix, collection, parent, hierarchy): + grp = BranchGroupStub(prefix) ## TODO __dir__ is not so happy with this, maybe need a (fairly simple) customisation of SmartTupleStub and/or BranchGroupStub + addIntoHierarchy(name, grp, parent, hierarchy) + for wp in grp._availLeafs(): + #if not wp.startswith("_"): ## TODO improve a bit on that + if wp.startswith("I") or wp.startswith("B"): + if hasattr(getattr(grp, wp), "__len__"): + addRefListFacade(grp, wp, collection) + else: + grp._addCapability(SmartTupleStub._RefToOther(wp, (lambda coll, iwp : ( lambda wpgrp : collection[wpgrp._get(iwp)] ))(collection, wp))) + return grp + +def addWPWPSelGroup(name, prefix, collectionGroup, parent, hierarchy): + ## FIXME can be removed when truth-matching gives jet index back (instead of index in selected-jets container) + grp = BranchGroupStub(prefix) ## TODO __dir__ is not so happy with this, maybe need a (fairly simple) customisation of SmartTupleStub and/or BranchGroupStub + addIntoHierarchy(name, grp, parent, hierarchy) + for wp in grp._availLeafs(): + #if not wp.startswith("_"): ## TODO improve a bit on that + if wp.startswith("I") or wp.startswith("B"): + if hasattr(getattr(grp, wp), "__len__"): + addRefListFacade(grp, wp, getattr(collectionGroup, wp)) + else: + grp._addCapability(SmartTupleStub._RefToOther(wp, (lambda collGrp, iwp : ( lambda wpgrp : getattr(collGrp, iwp)[wpgrp._get(iwp)] ))(collectionGroup, wp))) + return grp + +class LeafFacade(SmartTupleStub): + __slots__ = ("_name",) + def __init__(self, name, parent=None): + self._name = name + typeName = ( getattr(parent, name)._typeName if parent is not None else "struct" ) + super(LeafFacade, self).__init__(typeName, parent=parent) + ### main use: say we only hide one leaf. All other functionality is in the base class and works fine + def _listedLeafs(self): ## including own + lst = self._listedLeafsBelow() + lst.update(self._name) + return lst + ## FIXME this is wrong - do not want to inherit all branches from the parent + +class SmartObjectStub(ObjectStub): + """ + Imitate an object, with additional methods/properties + """ + def __init__(self, parent, typeName, caps=None, parentStub=None): + ## NOTE: inherits _typ (PyROOT type) + self._caps = caps + self._parentStub = parentStub + super(SmartObjectStub, self).__init__(parent, typeName) + def __getattr__(self, name): + for cap in self._caps: + if name in cap._dir: + cp = copy.copy(cap) + cp._parent = self + return getattr(cp, name) + + hlp = self._parentStub._getCapWithAttr(name) + if hlp: + return getattr(hlp, name) + + return self._get(name) ## fallback (which is the most common) + def __dir__(self): ## including own + return list(self._availLeafs()) + def _availLeafs(self): + avail = set() + for hlp in self._caps: + avail.update(hlp._dir) + for hlp in self._parentStub._extraCap: + avail.update(hlp._dir) + for att in dir(self._typ): + if not att.startswith("_"): + avail.add(att) + return avail + def __repr__(self): + return "SmartObjectStub({0!r}, {1!r})".format(self._parent, self._typeName) + + from collections import MutableMapping + class Map(MutableMapping): + """ + usage: instStub = myMap[typNm](parent) + """ + def __init__(self, items): + self._capDict = dict(items) + def __iter__(self): + for k in self._capDict: + yield k + def __len__(self): + return len(self._capDict) + def __setitem__(self, k, v): + self._capDict[k] = v + def __delitem__(self, k): + del self._capDict[k] + def get(self, k): ## without decoration + return self._capDict[k] + def __getitem__(self, typeName): + if typeName in self._capDict: + return ( lambda typNm, cap : + ( lambda p,pStub=None : SmartObjectStub(p, typNm, caps=cap, parentStub=pStub) ) + )(typeName, self._capDict[typeName]) + else: + raise KeyError("Type with name {} not found".format(typeName)) From 93ee6c737c7ddaedf54e8882cde067c8ad68eaf7 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Fri, 2 Mar 2018 18:45:49 +0100 Subject: [PATCH 02/14] a few fixes for vector> (electron IDs in ZA) --- python/treedecorators.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/python/treedecorators.py b/python/treedecorators.py index 8a2653f..5e257f7 100644 --- a/python/treedecorators.py +++ b/python/treedecorators.py @@ -146,8 +146,28 @@ def _get(self, name): return GetDataMember(self, name).result def __repr__(self): return "ObjectStub({0!r}, {1!r})".format(self._parent, self._typeName) + ## for mappable types: operator[] and valueType + def __getitem__(self, ky): + if self._get("__getitem__"): + return GetItem(self, ky, indexType=self.indexType).result + else: + return NotImplemented + @property + def valueType(self): + if self._typ.__name__.startswith("map<"): + return self._typ.__name__[4:-1].split(",")[-1].strip() + else: + print "Name: ", self._typ.__name__ + return NotImplemented + @property + def indexType(self): + if self._typ.__name__.startswith("map<"): + return self._typ.__name__[4:-1].split(",")[0].strip() + else: + print "Name: ", self._typ.__name__ + return NotImplemented - ## TODO operators as well, provided that they are supported by the underlying class - need to figure out how to do that efficiently + ## TODO implement more operators, if they are supported by the underlying class class ArrayStub(TupleStub): ## different from an array result of a calculation or not? Not really, in fact... """ @@ -178,7 +198,7 @@ class VectorStub(ObjectStub): def __init__(self, parent, typeName): self._parent = parent ##self.valueType = getattr(ROOT, typeName).value_type ## usable from 6.04.something on - self.valueType = ">".join("<".join(next(mem for mem in dir(getattr(ROOT, typeName)) if mem.startswith("_vector<") and mem.endswith("___M_range_check")).split("<")[1:]).split(">")[:-1]) + self.valueType = ">".join(tk.strip() for tk in "<".join(tok.strip() for tok in next(mem for mem in dir(getattr(ROOT, typeName)) if mem.startswith("_vector<") and mem.endswith("___M_range_check")).split("<")[1:]).split(">")[:-1]) super(VectorStub, self).__init__(parent, typeName) @property def op(self): @@ -191,7 +211,7 @@ def __repr__(self): return "VectorStub({0!r}, {1!r})".format(self._parent, self._typeName) import re -vecPat = re.compile("vector\<(?P[a-zA-Z_0-9\<\>\: ]+)\>") +vecPat = re.compile("vector\<(?P[a-zA-Z_0-9\<\>,\: ]+)\>") def makeStubForType(typeName, parent, length=None): if length is not None: @@ -211,7 +231,10 @@ def adaptArg(arg, typeHint=None): elif isinstance(arg, TupleOp): return arg elif typeHint is not None: - return Const(typeHint, arg) + if str(arg) == arg: ## string, needs quote + return Const(typeHint, '"{}"'.format(arg)) + else: + return Const(typeHint, arg) else: raise ValueError("Should get some kind of type hint") @@ -775,7 +798,7 @@ class GetItem(TupleOp): Get item from array (from function call or from array leaf) """ __slots__ = ("arg", "typeName", "index") - def __init__(self, arg, index): + def __init__(self, arg, index, indexType=SizeType): self.arg = adaptArg(arg) self.typeName = arg.valueType self.index = adaptArg(index, typeHint=SizeType) From 91525359ed24bb342bf3da5a344f198c00099e27 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Sun, 4 Mar 2018 13:02:45 +0100 Subject: [PATCH 03/14] Add helper decorators for leptons in Framework-style trees, and nanoAOD demo --- python/lldeco.py | 113 ++++++++++++++++++++++++++++++++++ python/nanoaoddeco.py | 62 +++++++++++++++++++ scripts/nanoAODInteractive.py | 16 +++++ 3 files changed, 191 insertions(+) create mode 100644 python/lldeco.py create mode 100644 python/nanoaoddeco.py create mode 100755 scripts/nanoAODInteractive.py diff --git a/python/lldeco.py b/python/lldeco.py new file mode 100644 index 0000000..9c1e7ac --- /dev/null +++ b/python/lldeco.py @@ -0,0 +1,113 @@ +""" +Helper classes for llX tree decorators +""" +__all__ = ("leptonAdaptor", "onlyMuon", "onlyElectron", "LeptonScaleFactor", "DiLeptonScaleFactor") + +from .treedecorators import PODStub, SwitchOp + +class LeptonStub(PODStub): + """ Reference to a lepton (electron or muon), or anything operation on it """ + __slots__ = ("el", "mu") + def __init__(self, parent, el=None, mu=None): + self.el = el if el is not None else parent.record.electrons[parent.idx] + self.mu = mu if mu is not None else parent.record.muons[parent.idx] + tpNm = self.el._typeName + assert self.mu._typeName == tpNm + super(LeptonStub, self).__init__(parent, tpNm) + @property + def op(self): + return SwitchOp(self._parent.isEl, self.el, self.mu) + def _get(self, name): + try: + return LeptonStub(self._parent, el=getattr(self.el, name), mu=getattr(self.mu, name)) + except Exception, e: + raise AttributeError("Problem getting attribute '{0}' for LeptonStub with {1} and {2}: {3}".format(name, self.el, self.mu, str(e))) + def __call__(self, *args): + try: ## TODO the same as for _binaryOp + return LeptonStub(self._parent, el=self.el(*args), mu=self.mu(*args)) + except Exception, e: + raise AttributeError("Problem with __all__ on LeptonStub with {0} and {1}: {2}".format(self.el, self.mu, str(e))) + def __repr__(self): + return "LeptonStub({0!r}, el={1!r}, mu={2!r})".format(self._parent, self.el, self.mu) + + def _binaryOp(self, opName, other, outType="Double_t"): + if isinstance(other, LeptonStub) and ( self._parent == other._parent ): + return LeptonStub(self._parent, + el=self.el._binaryOp(opName, other.el, outType=outType), + mu=self.mu._binaryOp(opName, other.mu, outType=outType)) + else: + return super(LeptonStub, self)._binaryOp(opName, other, outType=outType) + +def onlyElectron(fun, muonValue=-1.): + """ apply fun to the electron (or fill the muon value) """ + return lambda l : op.switch(l.isEl, fun(l.L.el), muonValue) +def onlyMuon(fun, electronValue=-1.): + """ apply fun to the muon (or fill the electron value) """ + return lambda l : op.switch(op.NOT(l.isEl), fun(l.L.mu), electronValue) +def leptonAdaptor(elFun, muFun): + """ apply fun to the electron or the muon, as applicable """ + return lambda l : op.switch(l.isEl, elFun(l.L.el), muFun(l.L.mu)) + +from .treedecorators import SmartTupleStub + +class LeptonRef(SmartTupleStub._RefToOther): + __slots__ = tuple() + def __init__(self, name, parent=None): + super(LeptonRef, self).__init__(name, None, parent=parent) + def __getattr__(self, name): + if name == self._name: + return LeptonStub(self._parent) + else: + raise AttributeError("No attribute called {0!r} for object {1!r}".format(name, self)) + def __repr__(self): + return "LeptonRef({0!r}, parent={1!r})".format(self._name, self._parent) + +def prodIfIterable(sfArg, **kwargs): + return (( lambda x : op.product(*(iw(x, **kwargs) for iw in sfArg)) ) + if hasattr(sfArg, "__iter__") else + ( lambda x : sfArg(x, **kwargs) )) + +from .treedecorators import op, boolType +class LeptonScaleFactor(object): + def __init__(self, elSF, muSF, cache=True): + self.elSF = elSF + self.muSF = muSF + self.cache = cache + def __call__(self, lepton, variation="Nominal"): + kwargs = dict(withMCCheck=False, precalc=False, variation=variation) + expr = op.switch(op.extVar(boolType, "runOnMC") + , leptonAdaptor(prodIfIterable(self.elSF, **kwargs), prodIfIterable(self.muSF, **kwargs))(lepton) + , makeConst(1., "Float_t") + ) + if self.cache: + import uuid ## caching + expr._parent.uname = "sf_{0}".format(str(uuid.uuid4()).replace("-", "_")) + return expr + +class DiLeptonScaleFactor(object): + class _LL(object): + def __init__(self, l1, l2): + self.l1 = l1 + self.l2 = l2 + + def __init__(self, elelSF=None, elmuSF=None, muelSF=None, mumuSF=None, cache=True): + assert all(arg is not None for arg in (elelSF, elmuSF, muelSF, mumuSF)) + self.elelSF = elelSF + self.elmuSF = elmuSF + self.muelSF = muelSF + self.mumuSF = mumuSF + self.cache = cache + def __call__(self, ll, variation="Nominal"): + kwargs = dict(withMCCheck=False, precalc=False, variation=variation) + expr = op.switch(op.extVar(boolType, "runOnMC") + , op.switch(ll.isElEl, prodIfIterable(self.elelSF, **kwargs)(DiLeptonScaleFactor._LL(ll.l1.L.el, ll.l2.L.el)) + , op.switch(ll.isElMu, prodIfIterable(self.elmuSF, **kwargs)(DiLeptonScaleFactor._LL(ll.l1.L.el, ll.l2.L.mu)) + , op.switch(ll.isMuEl, prodIfIterable(self.muelSF, **kwargs)(DiLeptonScaleFactor._LL(ll.l1.L.mu, ll.l2.L.el)) + , op.switch(ll.isMuMu, prodIfIterable(self.mumuSF, **kwargs)(DiLeptonScaleFactor._LL(ll.l1.L.mu, ll.l2.L.mu)) + , makeConst(1., "Float_t"))))) + , makeConst(1., "Float_t") + ) + if self.cache: + import uuid ## caching + expr._parent.uname = "sf_{0}".format(str(uuid.uuid4()).replace("-", "_")) + return expr diff --git a/python/nanoaoddeco.py b/python/nanoaoddeco.py new file mode 100644 index 0000000..94d453f --- /dev/null +++ b/python/nanoaoddeco.py @@ -0,0 +1,62 @@ +from .treedecorators import levelsAbove, addIntoHierarchy, TreeStub, BranchGroupStub, SmartTupleStub + +def decoratedNanoAOD(nnAODTree): + allLvs = dict((lv.GetName(), lv) for lv in nnAODTree.GetListOfLeaves()) + noCountLvs = set() + collectionLvs = dict() + for lvNm, lv in allLvs.iteritems(): + cntLv = lv.GetLeafCount() + if cntLv: + if cntLv.GetName() not in collectionLvs: + collectionLvs[cntLv.GetName()] = [] + collectionLvs[cntLv.GetName()].append(lvNm) + else: + noCountLvs.add(lvNm) + + import re + refPat = re.compile("(?P\w+)Idx(?P\w+)?") + def prefix_to_collectionName(prefix): + return "{0}{1}s".format(prefix[0].lower(), prefix[1:].rstrip("_")) # un-capitalize + ## construct the collections + tree = TreeStub(nnAODTree) + evtHierarchy = levelsAbove([], "event") + # > event + for cntNm, collAttrs in collectionLvs.iteritems(): + prefix = "{}_".format(cntNm[1:]) ## from nJet to Jet_ + if not all(lvNm.startswith(prefix) for lvNm in collAttrs): + if all(lvNm.startswith(prefix[:-1]) for lvNm in collAttrs): + prefix = prefix[:-1] + else: + print "Warning: not all leafs countable with '{0}' have names starting with '{1}': {2}".format(cntNm, prefix, ", ".join(lvNm for lvNm in collAttrs if not lvNm.startswith(prefix))) + objs_smart = BranchGroupStub(prefix) + collName = prefix_to_collectionName(prefix) + addIntoHierarchy(collName, objs_smart, tree, evtHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(cntNm) ]) + for lvNm in collAttrs: + name_nopref = lvNm[len(prefix):] + m = refPat.match(name_nopref) + if m: + target = m.group("target") + nm = ( "{0}{1}".format(target, m.group("suff")) if m.group("suff") else target ) + targetColl = ("genParts" if target == "mcMatch" else prefix_to_collectionName(target)) + objs_smart._addCapability(SmartTupleStub._RefToOther(nm, ( lambda collN, idxN : ( lambda obj : getattr(obj.event, collN)[getattr(obj, idxN)] ) )(targetColl, name_nopref), repl=(name_nopref,))) + + addIntoHierarchy("PV", BranchGroupStub("PV_"), tree, evtHierarchy) + + addIntoHierarchy("HLT", BranchGroupStub("HLT_"), tree, evtHierarchy) + addIntoHierarchy("Flag", BranchGroupStub("Flag_"), tree, evtHierarchy) + + addIntoHierarchy("LHE", BranchGroupStub("LHE_"), tree, evtHierarchy) + + addIntoHierarchy("MET", BranchGroupStub("MET_"), tree, evtHierarchy) + addIntoHierarchy("TkMET", BranchGroupStub("TkMET_"), tree, evtHierarchy) + addIntoHierarchy("CaloMET", BranchGroupStub("CaloMET_"), tree, evtHierarchy) + addIntoHierarchy("RawMET", BranchGroupStub("RawMET_"), tree, evtHierarchy) + addIntoHierarchy("PuppiMET", BranchGroupStub("PuppiMET_"), tree, evtHierarchy) + + addIntoHierarchy("SoftActivityJet", BranchGroupStub("SoftActivityJet"), tree, evtHierarchy) + addIntoHierarchy("HLTrigger", BranchGroupStub("HLTrigger"), tree, evtHierarchy) + addIntoHierarchy("fixedGridRhoFastjet", BranchGroupStub("fixedGridRhoFastjet"), tree, evtHierarchy) + # > event + + return tree diff --git a/scripts/nanoAODInteractive.py b/scripts/nanoAODInteractive.py new file mode 100755 index 0000000..ce03d35 --- /dev/null +++ b/scripts/nanoAODInteractive.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python2 +import ROOT +from cp3_llbb.CommonTools.nanoaoddeco import decoratedNanoAOD +from cp3_llbb.CommonTools.treedecorators import adaptArg, op + +tChain = ROOT.TChain("Events") +tChain.Add("root://xrootd-cms.infn.it//store/data/Run2016E/SingleMuon/NANOAOD/05Jan2018-v1/00000/1CC58832-C5F5-E711-BBC2-0CC47A13D3B2.root") +tChain.GetEntries() ## force load +tup = decoratedNanoAOD(tChain) + +def toDraw(sth): + return adaptArg(sth).get_TTreeDrawStr() +def leafDeps(expr): + return list(expr._parent.leafDeps) + +import IPython; IPython.embed() From f7f06b6a71699ee937b1b8e351915e35f71d5f7d Mon Sep 17 00:00:00 2001 From: Pieter David Date: Sun, 4 Mar 2018 13:24:16 +0100 Subject: [PATCH 04/14] Import templates and python modules from ttWTools (and change path resolutions) --- common/include/ScaleFactors.h | 203 +++++++ common/include/kinematics.h | 30 ++ python/batchhelpers.py | 221 ++++++++ python/condorhelpers.py | 174 ++++++ python/factoryhelpers.py | 83 +++ python/hist_opt_2v2.py | 184 +++++++ python/histfactory.py | 775 +++++++++++++++++++++++++++ python/slurmhelpers.py | 182 +++++++ python/treefactory.py | 384 +++++++++++++ templates/CMakeLists_plotter.txt.tpl | 69 +++ templates/CMakeLists_skimmer.txt.tpl | 72 +++ templates/Plot.tpl | 6 + templates/Plotter.cc.tpl | 104 ++++ templates/Plotter.h.tpl | 110 ++++ templates/SavePlot.tpl | 3 + templates/Skimmer.cc.tpl | 212 ++++++++ templates/Skimmer.h.tpl | 56 ++ templates/generateHeader.sh | 13 + 18 files changed, 2881 insertions(+) create mode 100644 common/include/ScaleFactors.h create mode 100644 common/include/kinematics.h create mode 100644 python/batchhelpers.py create mode 100644 python/condorhelpers.py create mode 100644 python/factoryhelpers.py create mode 100644 python/hist_opt_2v2.py create mode 100644 python/histfactory.py create mode 100644 python/slurmhelpers.py create mode 100644 python/treefactory.py create mode 100644 templates/CMakeLists_plotter.txt.tpl create mode 100644 templates/CMakeLists_skimmer.txt.tpl create mode 100644 templates/Plot.tpl create mode 100644 templates/Plotter.cc.tpl create mode 100644 templates/Plotter.h.tpl create mode 100644 templates/SavePlot.tpl create mode 100644 templates/Skimmer.cc.tpl create mode 100644 templates/Skimmer.h.tpl create mode 100755 templates/generateHeader.sh diff --git a/common/include/ScaleFactors.h b/common/include/ScaleFactors.h new file mode 100644 index 0000000..46ff4ca --- /dev/null +++ b/common/include/ScaleFactors.h @@ -0,0 +1,203 @@ +#pragma once + +#include "BinnedValuesJSONParser.h" +#include + +class ILeptonScaleFactor { +public: + virtual ~ILeptonScaleFactor() {} + virtual float get(const Parameters& parameters, Variation variation) const = 0; +}; +class IDiLeptonScaleFactor { +public: + virtual ~IDiLeptonScaleFactor() {} + virtual float get(const Parameters& l1Param, const Parameters& l2Param, Variation variation) const = 0; +}; + +class IJetScaleFactor { +public: + virtual ~IJetScaleFactor() {} + + enum Flavour { + Light = 0, + Charm = 1, + Beauty = 2 + }; + static inline Flavour get_flavour(int hadron_flavour) { + switch(hadron_flavour) { + case 5: + return Flavour::Beauty; + case 4: + return Flavour::Charm; + default: + return Flavour::Light; + } + } + virtual float get(const Parameters& parameters, Flavour flavour, Variation variation) const = 0; +}; + +/** + * Simple case: one scale factor from file + */ +class ScaleFactor : public ILeptonScaleFactor { +public: + explicit ScaleFactor(const std::string& file) + : m_values{BinnedValuesJSONParser(file).get_values()} + {} + virtual ~ScaleFactor() {} + + virtual float get(const Parameters& parameters, Variation variation) const override final { + return m_values.get(parameters)[static_cast(variation)]; + } +private: + BinnedValues m_values; +}; + +/** + * Dilepton scalefactor from two lepton scalefactors (takes a parameter set for each) + */ +class DiLeptonFromLegsScaleFactor : public IDiLeptonScaleFactor { +public: + DiLeptonFromLegsScaleFactor( std::unique_ptr&& l1_leg1 + , std::unique_ptr&& l1_leg2 + , std::unique_ptr&& l2_leg1 + , std::unique_ptr&& l2_leg2 ) + : m_l1_leg1{std::move(l1_leg1)} + , m_l1_leg2{std::move(l1_leg2)} + , m_l2_leg1{std::move(l2_leg1)} + , m_l2_leg2{std::move(l2_leg2)} + {} + virtual ~DiLeptonFromLegsScaleFactor() {} + + virtual float get(const Parameters& l1Param, const Parameters& l2Param, Variation variation) const override final { + const float eff_lep1_leg1 = m_l1_leg1->get(l1Param, variation); + const float eff_lep1_leg2 = m_l1_leg2->get(l1Param, variation); + const float eff_lep2_leg1 = m_l2_leg1->get(l2Param, variation); + const float eff_lep2_leg2 = m_l2_leg2->get(l2Param, variation); + + if ( variation == Nominal ) { + return -(eff_lep1_leg1 * eff_lep2_leg1) + + (1 - (1 - eff_lep1_leg2)) * eff_lep2_leg1 + + eff_lep1_leg1 * (1 - (1 - eff_lep2_leg2)) ; + } else { + const float nom_eff_lep1_leg1 = m_l1_leg1->get(l1Param, Nominal); + const float nom_eff_lep1_leg2 = m_l1_leg2->get(l1Param, Nominal); + const float nom_eff_lep2_leg1 = m_l2_leg1->get(l2Param, Nominal); + const float nom_eff_lep2_leg2 = m_l2_leg2->get(l2Param, Nominal); + + const float nominal = -(eff_lep1_leg1 * eff_lep2_leg1) + + (1 - (1 - eff_lep1_leg2)) * eff_lep2_leg1 + + eff_lep1_leg1 * (1 - (1 - eff_lep2_leg2)) ; + + const float error_squared = + std::pow(1 - nom_eff_lep2_leg1 - (1 - nom_eff_lep2_leg2), 2) * + std::pow(eff_lep1_leg1, 2) + + std::pow(nom_eff_lep2_leg1, 2) * + std::pow(eff_lep1_leg2, 2) + + std::pow(1 - nom_eff_lep1_leg1 - (1 - nom_eff_lep1_leg2), 2) * + std::pow(eff_lep2_leg1, 2) + + std::pow(nom_eff_lep1_leg1, 2) * + std::pow(eff_lep2_leg2, 2); + + if ( variation == Up ) { + return std::min(nominal+std::sqrt(error_squared), 1.f); + } else if ( variation == Down ) { + return std::max(0.f, nominal-std::sqrt(error_squared)); + } + } + } +private: + std::unique_ptr m_l1_leg1; + std::unique_ptr m_l1_leg2; + std::unique_ptr m_l2_leg1; + std::unique_ptr m_l2_leg2; +}; + +/** + * B-tagging scale factor (depends on flavour) + */ +class BTaggingScaleFactor : public IJetScaleFactor { +public: + BTaggingScaleFactor(const std::string& lightFile, const std::string& charmFile, const std::string& beautyFile) + : m_lightValues {BinnedValuesJSONParser(lightFile ).get_values()} + , m_charmValues {BinnedValuesJSONParser(charmFile ).get_values()} + , m_beautyValues{BinnedValuesJSONParser(beautyFile).get_values()} + {} + virtual ~BTaggingScaleFactor() {} + + virtual float get(const Parameters& parameters, Flavour flavour, Variation variation) const override final { + switch(flavour) { + case Flavour::Beauty: + return m_beautyValues.get(parameters)[static_cast(variation)]; + case Flavour::Charm: + return m_charmValues .get(parameters)[static_cast(variation)]; + case Flavour::Light: + return m_lightValues .get(parameters)[static_cast(variation)]; + default: + throw std::invalid_argument("Unsupported flavour: "+flavour); + } + } +private: + BinnedValues m_lightValues; + BinnedValues m_charmValues; + BinnedValues m_beautyValues; +}; + +#include +#include +#include "IndexRangeIterator.h" +/** + * Weight between different scale factors + */ +template +class WeightedScaleFactor : public ISingle { +public: + WeightedScaleFactor( const std::vector& probs, std::vector>&& sfs ) + : m_terms{std::move(sfs)} + { + const double norm{1./std::accumulate(std::begin(probs), std::end(probs), 0.)}; + std::transform(std::begin(probs), std::end(probs), std::back_inserter(m_probs), [norm] ( float p ) -> float { return norm*p; } ); + } + virtual ~WeightedScaleFactor() {} + + virtual float get(Args&&... args, Variation variation) const override final { + return std::accumulate(IndexRangeIterator(0, m_terms.size()), IndexRangeIterator(m_terms.size(), m_terms.size()), 0., + [this,&args...,variation] ( float wsum, std::size_t i ) -> float { + return wsum + m_probs[i]*m_terms[i]->get(std::forward(args)..., variation); + } + ); + } +private: + std::vector m_probs; + std::vector> m_terms; +}; +using WScaleFactor = WeightedScaleFactor; +using WLLScaleFactor = WeightedScaleFactor; +using WBTaggingScaleFactor = WeightedScaleFactor; + +#include +/** + * Sample according to fractions from different scale factors + */ +template +class SampledScaleFactor : public ISingle +{ +public: + SampledScaleFactor( const std::vector probs, std::vector>&& sfs ) + : m_rg{42} + , m_probs{std::discrete_distribution(std::begin(probs), std::end(probs))} + , m_terms{std::move(sfs)} + {} + virtual ~SampledScaleFactor() {} + + virtual float get(Args... args, Variation variation) const override final { + return m_terms[m_probs(m_rg)]->get(std::forward(args)..., variation); + } +private: + mutable std::mt19937 m_rg; + mutable std::discrete_distribution m_probs; + std::vector> m_terms; +}; +using SmpScaleFactor = SampledScaleFactor; +using SmpLLScaleFactor = SampledScaleFactor; +using SmpBTaggingScaleFactor = SampledScaleFactor; diff --git a/common/include/kinematics.h b/common/include/kinematics.h new file mode 100644 index 0000000..97f9ab4 --- /dev/null +++ b/common/include/kinematics.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace Kinematics { + using LorentzVector = ROOT::Math::LorentzVector >; + + float deltaPhi( const LorentzVector& pa, const LorentzVector& pb ) + { + return std::acos(std::min(std::max(-1., + (pa.Px()*pb.Px()+pa.Py()*pb.Py())/(pa.Pt()*pb.Pt()) + ), 1.)); + } + float deltaR( const LorentzVector& pa, const LorentzVector& pb ) + { + return std::sqrt(std::pow(pb.Eta()-pa.Eta(), 2)+std::pow(deltaPhi(pa, pb), 2)); + } + + float signedDeltaPhi( const LorentzVector& pa, const LorentzVector& pb ) + { + return ( ( pb.Py()*pa.Px()-pb.Px()*pa.Py() > 0. ) ? 1. : -1. )*deltaPhi(pa, pb); + } + + float signedDeltaEta( const LorentzVector& pa, const LorentzVector& pb ) + { + const float etaA{pa.Eta()}; + const float etaB{pb.Eta()}; + return ( etaA > 0. ? 1. : -1. )*( etaB - etaA ); // (small) positive and negative = more and less forward + } +}; diff --git a/python/batchhelpers.py b/python/batchhelpers.py new file mode 100644 index 0000000..5617f0c --- /dev/null +++ b/python/batchhelpers.py @@ -0,0 +1,221 @@ +""" +Batch cluster tools (common part for HTCondor and slurm) +""" +__author__ = "Pieter David " +__date__ = "February-April 2017" + +import logging +logger = logging.getLogger(__name__) + +import os +import os.path + +def makeExecutable(path): + """ Set file permissions to executable """ + import stat + if os.path.exists(path) and os.path.isfile(path): + perm = os.stat(path) + os.chmod(path, perm.st_mode | stat.S_IEXEC) + +class CommandListJob(object): + """ Interface/base for 'backend' classes to create a job cluster/array from a list of commands (eah becoming a subjob) """ + def __init__(self, commandList, workDir=None, workdir_default_pattern="batch_work"): + self.commandList = commandList + self.workDir = self.init_dirtowrite(workDir, default_pattern=workdir_default_pattern) + self.workDirs = self.setupBatchDirs(self.workDir) + + ## interface methods + def submit(self): + """ Submit the job(s) """ + pass + def commandOutFiles(self, command): + """ Output files for a given command (when finished) """ + pass + ## for monitoring + @property + def status(self): + """ overall job status summary """ + pass + def statuses(self): + """ list of subjob statuses (numeric, using indices in CondorJobStatus) """ + pass + def updateStatuses(self): + """ Update the subjob status cache (if used in the implementation) """ + pass + def subjobStatus(self, i): + """ get status of subjob """ + pass + def commandStatus(self, command): + """ get the status of the jobs corresponding to one of the commands """ + pass + + ## helper methods + @staticmethod + def init_dirtowrite(given=None, default_pattern="batch"): + """ Initialisation helper: check that the directory does not exist, then create it + + If None is given, a default ("$(pwd)/{default_pattern}", "$(pwd)/default_pattern_0" etc.) is used + """ + if given is None: + test_dir = os.path.join(os.getcwd(), default_pattern) + if os.path.exists(test_dir): + i = 0 + while os.path.exists(test_dir) and i < 10: + test_dir = os.path.join(os.getcwd(), "{0}_{1:d}".format(default_pattern, i)) + i += 1 + else: + test_dir = given + if os.path.exists(test_dir): + raise Exception("Directory {0} exists".format(test_dir)) + + os.makedirs(test_dir) + + return os.path.abspath(test_dir) ## make sure we keep absolute paths + + @staticmethod + def setupBatchDirs(workDir): + """ Create up the working directories (input, output, logs) under workDir """ + dirs = { + "in" : os.path.join(workDir, "input") + , "out" : os.path.join(workDir, "output") + , "log" : os.path.join(workDir, "logs") + } + for subdir in dirs.itervalues(): + os.mkdir(subdir) + logger.info("Created working directories under {0}".format(workDir)) + return dirs + +class SplitAggregationTask(object): + """ Task split in commands, whose results can be merged """ + def __init__(self, commandList, finalizeAction=None): + self._jobCluster = None + self.commandList = commandList + self.finalizeAction = finalizeAction + @property + def jobCluster(self): + return self._jobCluster + @jobCluster.setter + def jobCluster(self, jobCluster): + if self._jobCluster: + raise Exception("SplitAggregationTask.jobCluster has already been set") + self._jobCluster = jobCluster + if self.finalizeAction: ## pass it on + self.finalizeAction.jobCluster = jobCluster + + def tryFinalize(self): + if self.jobCluster and all(self.jobCluster.commandStatus(cmd).upper() == "COMPLETED" for cmd in self.commandList): + if self.finalizeAction: + self.finalizeAction.perform() + return True + return False + +class Action(object): + """ interface for job finalization """ + def perform(self): + """ interface method """ + return False + +import subprocess + +class HaddAction(Action): + """ merge results with hadd + """ + def __init__(self, commandList, outDir, options=None): + self.jobCluster = None + self.commandList = commandList + self.outDir = outDir + self.options = options if options is not None else [] + super(HaddAction, self).__init__() + def perform(self): + if len(self.commandList) == 0: ## nothing + return + elif len(self.commandList) == 1: ## move + cmd = self.commandList[0] + for outf in self.jobCluster.commandOutFiles(cmd): + logger.info("finalization: moving output file {0} to {1}".format(outf, self.outDir)) + subprocess.check_call(["mv", outf, self.outDir]) + else: ## merge + ## collect for each output file name which jobs produced one + filesToMerge = dict() + for cmd in self.commandList: + for outf in self.jobCluster.commandOutFiles(cmd): + outfb = os.path.basename(outf) + if outfb not in filesToMerge: + filesToMerge[outfb] = [] + filesToMerge[outfb].append(outf) + + for outfb, outfin in filesToMerge.iteritems(): + fullout = os.path.join(self.outDir, outfb) + logger.info("finalization: merging {0} to {1}".format(", ".join(outfin), fullout)) + subprocess.check_call(["hadd"]+self.options+[fullout]+outfin) + +from itertools import izip, chain + +class TasksMonitor(object): + """ Monitor a number of tasks and the associated job clusters """ + def __init__(self, jobs=[], tasks=[], interval=120, allStatuses=None, activeStatuses=None, completedStatus=None): + """ Constructor + + jobs: job clusters to monitor + tasks: tasks to monitor (and finalize) + interval: number of seconds between status checks + """ + self.jobs = set() + self.tasks = set() + self.activetasks = set() + self.add(jobs, tasks) + self.interval = interval + self.allStatuses = list(allStatuses) if allStatuses else [] + self.activeStatuses = list(activeStatuses) if activeStatuses else [] + self.completedStatus = completedStatus + def add(self, jobs, tasks): + """ add jobs and tasks """ + self.jobs.update(jobs) + self.tasks.update(tasks) + assert sum(len(j.commandList) for j in self.jobs) == sum(len(t.commandList) for t in self.tasks) ## assumption: same set of commands + self.activetasks.update(tasks) + @staticmethod + def makeStats(statuses, allStatuses): + histo = [ 0 for st in allStatuses ] + for jSt in statuses: + histo[jSt] += 1 + return histo + @staticmethod + def formatStats(stats, allStatuses): + return "{0} / {1:d} Total".format(", ".join("{0:d} {1}".format(n,nm) for n,nm in izip(stats, allStatuses)), sum(stats)) + + def _tryFinalize(self): + """ try to finalize tasks """ + finalized = [] + for t in self.activetasks: + if t.tryFinalize(): + finalized.append(t) ## don't change while iterating + for ft in finalized: + self.activetasks.remove(ft) + if len(finalized) > 0: + logger.info("Finalized {0:d} tasks, {1:d} remaining".format(len(finalized), len(self.activetasks))) + + def collect(self, wait=None): + """ wait for the jobs to finish, then finalize tasks """ + for j in self.jobs: + j.updateStatuses() + self._tryFinalize() + if len(self.activetasks) > 0: + logger.info("Waiting for jobs to finish...") + from datetime import datetime + import time + nJobs = sum( len(j.commandList) for j in self.jobs ) + if wait: + time.sleep(wait) + for j in self.jobs: + j.updateStatuses() + stats = self.makeStats(chain.from_iterable(j.statuses() for j in self.jobs), self.allStatuses) + while len(self.activetasks) > 0 and sum(stats[sa] for sa in self.activeStatuses) > 0: + time.sleep(self.interval) + prevStats = stats + for j in self.jobs: + j.updateStatuses() + stats = self.makeStats(chain.from_iterable(j.statuses() for j in self.jobs), self.allStatuses) + logger.info("[ {0} :: {1} ]".format(datetime.now().strftime("%H:%M:%S"), self.formatStats(stats, self.allStatuses))) + if stats[self.completedStatus] > prevStats[self.completedStatus]: + self._tryFinalize() diff --git a/python/condorhelpers.py b/python/condorhelpers.py new file mode 100644 index 0000000..3c54790 --- /dev/null +++ b/python/condorhelpers.py @@ -0,0 +1,174 @@ +""" +HTCondor tools (based on cp3-llbb/CommonTools condorSubmitter) +""" +__author__ = "Pieter David 200)\n" + "executable = {indir}/condor.sh\n" + "arguments = $(Process)\n" + "output = {logdir_rel}/condor_$(Process).out\n" + "error = {logdir_rel}/condor_$(Process).err\n" + "log = {logdir_rel}/condor_$(Process).log\n" + "queue {nJobs:d}\n" + ) + MasterShell = ( + "#!/usr/bin/env bash\n" + "\n" + "{indir}/condor_$1.sh\n" + ) + + JobShell = ( + "#!/usr/bin/env bash\n" + "\n" + "{environment_setup}" + # "# Setup our CMS environment\n" + # "pushd {CMS_PATH}\n" + # "source /cvmfs/cms.cern.ch/cmsset_default.sh\n" + # "eval `scram runtime --sh`\n" + # "popd\n" + "\n" + "function move_files {{\n" + "{move_fragment}" + "\n}}\n" + "\n" + "{command} && move_files" + ) + + def _writeCondorFiles(self): + """ Create Condor .sh and .cmd files """ + masterCmdName = os.path.join(self.workDirs["in"], "condor.cmd") + with open(masterCmdName, "w") as masterCmd: + masterCmd.write(CommandListCondorJob.MasterCmd.format( + indir=self.workDirs["in"] + , logdir_rel=os.path.relpath(self.workDirs["log"]) + , nJobs=len(self.commandList) + )) + masterShName = os.path.join(self.workDirs["in"], "condor.sh") + with open(masterShName, "w") as masterSh: + masterSh.write(CommandListCondorJob.MasterShell.format( + indir=self.workDirs["in"] + )) + makeExecutable(masterShName) + + for i,command in izip(count(), self.commandList): + jobShName = os.path.join(self.workDirs["in"], "condor_{:d}.sh".format(i)) + job_outdir = os.path.join(self.workDirs["out"], str(i)) + os.makedirs(job_outdir) + with open(jobShName, "w") as jobSh: + jobSh.write(CommandListCondorJob.JobShell.format( + environment_setup="\n".join(self.envSetupLines) + , move_fragment="\n".join(( + " for file in {pattern}; do\n" + ' echo "Moving $file to {outdir}/"\n' + " mv $file {outdir}/\n" + " done" + ).format(pattern=ipatt, outdir=job_outdir) + for ipatt in self.outputPatterns) + , command=command + )) + makeExecutable(jobShName) + + return masterCmdName + + def _commandOutDir(self, command): + """ Output directory for a given command """ + return os.path.join(self.workDirs["out"], str(self.commandList.index(command))) + def commandOutFiles(self, command): + """ Output files for a given command """ + import fnmatch + cmdOutDir = self._commandOutDir(command) + return list( os.path.join(cmdOutDir, fn) for fn in os.listdir(cmdOutDir) + if any( fnmatch.fnmatch(fn, pat) for pat in self.outputPatterns) ) + + def submit(self): + """ Submit the jobs to condor """ + logger.info("Submitting {0:d} condor jobs.".format(len(self.commandList))) + result = subprocess.check_output(["condor_submit", self.masterCmd]) + + import re + pat = re.compile("\d+ job\(s\) submitted to cluster (\d+)\.") + g = pat.search(result) + self.clusterId = g.group(1) + + ## save to file in case + with open(os.path.join(self.workDirs["in"], "cluster_id"), "w") as f: + f.write(self.clusterId) + + logger.info("Submitted, job ID is {}".format(self.clusterId)) + + def statuses(self): + """ list of subjob statuses (numeric, using indices in CondorJobStatus) """ + if self.clusterId is None: + raise Exception("Cannot get status before submitting the jobs to condor") + return map(int, list(subprocess.check_output(["condor_q" , self.clusterId, "-format", '%d ', "JobStatus"]).strip().split()) + + list(subprocess.check_output(["condor_history", self.clusterId, "-format", '%d ', "JobStatus"]).strip().split()) ) + + @property + def status(self): + statuses = self.statuses() + if all(st == statuses[0] for st in statuses): + return CondorJobStatus[statuses[0]] + elif any(st == 2 for st in statuses): + return "Running" + elif any(st == 3 for st in statuses): + return "Removed" + else: + return "unknown" + + def subjobStatus(self, i): + subjobId = "{0}.{1:d}".format(self.clusterId, i) + ret = subprocess.check_output(["condor_q", subjobId, "-format", '%d', "JobStatus"]) + if len(ret) == 0: # search in the completed ones + ret = subprocess.check_output(["condor_history", subjobId, "-format", '%d', "JobStatus"]) + return CondorJobStatus[int(ret)] + def commandStatus(self, command): + return self.subjobStatus(self.commandList.index(command)) + +def makeCondorTasksMonitor(jobs=[], tasks=[], interval=120): + """ make a TasksMonitor for condor jobs """ + from .batchhelpers import TasksMonitor + return TasksMonitor(jobs=jobs, tasks=tasks, interval=interval + , allStasuses=CondorJobStatus + , activeStatuses=(1,2) + , completedStatus=4 + ) diff --git a/python/factoryhelpers.py b/python/factoryhelpers.py new file mode 100644 index 0000000..d9de511 --- /dev/null +++ b/python/factoryhelpers.py @@ -0,0 +1,83 @@ +""" +Helper methods shared between histFactory and treeFactory +""" + +import logging +logger = logging.getLogger(__name__) + +import os.path + +from . import pathCommonTools +TEMPLATE_DIR = os.path.join(pathCommonTools, "templates") +def getTemplate(templName): + return os.path.join(TEMPLATE_DIR, "{0}.tpl".format(templName)) +COMMON_DIR = os.path.join(pathCommonTools, "common") +COMMON_INCLUDE_DIR = os.path.join(COMMON_DIR, "include") +COMMON_SRC_DIR = os.path.join(COMMON_DIR, "src") + +################################################################################ +## TEMPLATE EXPANSION ## +################################################################################ + +def expandTemplateLines(inFileName, **replacements): + """ Generator: lines with replacements """ + fmtReplacements = dict(("{{{{{0}}}}}".format(fro), to) for fro,to in replacements.iteritems()) + with open(inFileName, "r") as inFile: + for line in inFile: + myLn = line + for fro,to in fmtReplacements.iteritems(): + myLn = myLn.replace(fro, to) + if "{{" not in myLn: + break ## stop searching + yield myLn ## could also split by "\n" again +def expandTemplate(inFileName, **replacements): + """ expand as a single string """ + return "".join(expandTemplateLines(inFileName, **replacements)) +def expandTemplateAndWrite(outFileName, inFileName, **replacements): + """ expand and write to file """ + with open(outFileName, "w") as outFile: + outFile.writelines(expandTemplateLines(inFileName, **replacements)) + + +## other helpers +def getBranchType(name, skeleton): + ## TODO plots should contain expressions not callables, and leafDeps should return the type as well + ## then this method disappears (code is in ttWdeco already), expressions compiled when creating the plot + ## and createPlotter does not even need a skeleton any more + ibr = next(br for br in skeleton.GetListOfBranches() if br.GetName() == name) + clNm = ibr.GetClassName() + if len(clNm) > 0: + return clNm + else: + lf = skeleton.GetLeaf(ibr.GetName()) + if not lf: + logger.error("Can't deduce type for branch '{0}'".format(ibr.GetName())) + else: + return lf.GetTypeName() + +import os +from contextlib import contextmanager + +@contextmanager +def insideOtherDirectory(path): + prevDir = os.getcwd() + os.chdir(path) + assert os.getcwd() != prevDir + yield + os.chdir(prevDir) + +def getARootFileName(jsonName): + """ + Get any file from the json files field + """ + return next(fn for sampDict in loadSamples(jsonName).itervalues() for fn in sampDict["files"]) + +def loadSamples(jsonName): + """ + Load all samples from the json file + + """ + import json + with open(jsonName, "r") as sampFile: + samples = json.load(sampFile) + return samples diff --git a/python/hist_opt_2v2.py b/python/hist_opt_2v2.py new file mode 100644 index 0000000..255dfda --- /dev/null +++ b/python/hist_opt_2v2.py @@ -0,0 +1,184 @@ +""" +histfactory opt2v2 (complete tree of cuts, precalculation etc.) helper code +""" + +from .treedecorators import allArgsOfOp, opDependsOn + +class NodeTreeNeedsOpCache(object): + """ Store nodes that depend on the operator to save time + """ + def __init__(self, op): + self.depends = set() + self.notDepends = set() + self.op = op + def __call__(self, aNode): + if aNode in self.depends: + return True + elif aNode in self.notDepends: + return False + else: + ret = aNode.treeNeedsOp_cache(self) + if ret: + self.depends.add(aNode) + else: + self.notDepends.add(aNode) + return ret + +class IDTNode(object): + """ Interface for a node in the dependency tree + """ + __slots__ = ("__weakref__", "_parent") + def __init__(self, parent=None): + self._parent = parent + @property + def depNodes(self): + pass + @property + def isTerminal(self): + return False + @property + def parent(self): + return self._parent + @parent.setter + def parent(self, parent): + self._parent = parent + def clone(self): + """ + Clone this node (without dependencies) + """ + pass + def ownDbgStr(self): + pass + def treeNeedsOp(self, op): + pass + def treeNeedsOp_cache(self, cache): + return self.treeNeedsOp(cache.op) ## default: don't use the cache + +class PlotNode(IDTNode): + """ Plot (terminal) node + """ + __slots__ = ("plot",) + def __init__(self, plot, parent=None): + self.plot = plot + super(PlotNode, self).__init__(parent=parent) + @property + def isTerminal(self): + return True + @property + def depNodes(self): + return tuple() + def clone(self): + return PlotNode(self.plot, parent=self.parent) + def ownDbgStr(self): ## for reasonably compact debug printing + return "PlotNode(( {0} ), {1})".format(", ".join(v.get_TTreeDrawStr() for v in self.plot.variables), self.plot.selection.weight.get_TTreeDrawStr()) + def treeNeedsOp(self, op): + return opDependsOn(self.plot.weight, op) or any(opDependsOn(var, op) for var in self.plot.variables) + +class DTNode(IDTNode): + """ Selection or reduce (non-terminal) node + """ + __slots__ = ("_deps",) + def __init__(self, parent=None, deps=None): + if deps is None: + deps = [] + self._deps = deps + super(DTNode, self).__init__(parent=parent) + def addDependentNode(self, nd): + self._deps.append(nd) + def removeDependentNode(self, nd): + self._deps.remove(nd) + @property + def depNodes(self): + return self._deps + @property + def isTerminal(self): + return False + def treeNeedsOp(self, op): + return any( nd.treeNeedsOp(op) for nd in self.depNodes ) + def treeNeedsOp_cache(self, cache): + return any( cache(nd) for nd in self.depNodes ) + +class RootNode(DTNode): + """ Special case: root node (no parent by definition, only holds deps list) + """ + __slots__ = () + def __init__(self, deps=None): + super(RootNode, self).__init__(parent=None, deps=deps) + def ownDbgStr(self): + return "/" + +class SelectionNode(DTNode): + """ Selection node (cuts list and dependencies) + """ + __slots__ = ("cuts",) + def __init__(self, cuts, parent=None, deps=None): + self.cuts = cuts + super(SelectionNode, self).__init__(parent=parent, deps=deps) + def clone(self): + return SelectionNode(self.cuts, parent=self.parent, deps=self._deps) + def aboveSel(self): + ## return the first SelectionNode in the ancestor tree (or None) + cand = self.parent + while cand and not isinstance(cand, SelectionNode): + cand = cand.parent + return cand + @property + def ownCuts(self): + parSel = self.aboveSel() + if not parSel: + return self.cuts + else: + return tuple(c for c in self.cuts if c not in parSel.cuts) + def ownDbgStr(self): ## for reasonably compact debug printing + from .treedecorators import makeExprAnd + return "SelectionNode({0}) with {1:d} dependent nodes".format(makeExprAnd(self.ownCuts).get_TTreeDrawStr(), len(self._deps)) + def treeNeedsOp(self, op): + return any( opDependsOn(ci, op) for ci in self.cuts ) or super(SelectionNode, self).treeNeedsOp(op) + def treeNeedsOp_cache(self, cache): + return any( opDependsOn(ci, cache.op) for ci in self.cuts ) or super(SelectionNode, self).treeNeedsOp_cache(cache) + +class PrecalcNode(DTNode): + """ Precalculation node (expression and dependencies) + """ + __slots__ = ("op",) + def __init__(self, op, parent=None, deps=None): + self.op = op + super(PrecalcNode, self).__init__(parent=parent, deps=deps) + def clone(self): + return PrecalcNode(self.op, parent=self.parent, deps=self._deps) + def ownDbgStr(self): ## for reasonably compact debug printing + return "PrecalcNode({0}={1}) with {2:d} dependent nodes".format(self.op.uname, self.op._get_TTreeDrawStr(), len(self._deps)) + def treeNeedsOp(self, op): + return opDependsOn(self.op, op) or super(PrecalcNode, self).treeNeedsOp(op) + def treeNeedsOp_cache(self, cache): + return opDependsOn(self.op, cache.op) or super(PrecalcNode, self).treeNeedsOp_cache(cache) + +class SkimmerFillBranchesNode(IDTNode): + __slots__ = ("branches", "fill_tree_lines") + def __init__(self, branches, parent=None, fill_tree_lines=None): + self.branches = branches + self.fill_tree_lines = fill_tree_lines + super(SkimmerFillBranchesNode, self).__init__(parent=parent) + @property + def isTerminal(self): + return True + @property + def depNodes(self): + return tuple() + def clone(self): + return SkimmerFillBranchesNode(self.branches, parent=self.parent, fill_tree_lines=self.fill_tree_lines) + def ownDbgStr(self): + return "SkimmerFillBranchesNode(...)" + def treeNeedsOp(self, op): + return any(opDependsOn(var, op) for nm,var,unm in self.branches) + + +def printTree(nd, indent=0): + ## helper to (recursively) print a tree + yield (indent*" ")+nd.ownDbgStr() + nPlots = sum( 1 for dep in nd.depNodes if isinstance(dep, PlotNode) ) + yield ((indent+1)*" ")+"{0:d} plots".format(nPlots) + for dep in nd.depNodes: + if not isinstance(dep, PlotNode): + for ln in printTree(dep, indent=indent+1): + yield ln diff --git a/python/histfactory.py b/python/histfactory.py new file mode 100644 index 0000000..7183c92 --- /dev/null +++ b/python/histfactory.py @@ -0,0 +1,775 @@ +""" +Generate C++ code to fill the histos, with helpers to run it +""" +__all__ = ("createPlotter", "compilePlotter", "runPlotterOnSamples") + +import logging +logger = logging.getLogger(__name__) + +from itertools import izip, count, chain, groupby + +from .factoryhelpers import getTemplate, expandTemplate, expandTemplateAndWrite, getBranchType, insideOtherDirectory +from .plots import * + +def collectIdentifiersFromPlots(plots): + identifiers = set() + for i,plot in izip(count(1), plots): + if i % 200 == 0: + print "Parsing plot #{0:d}/{1:d}".format(i,len(plots)) + try: + identifiers.update(plot.cut.leafDeps) + except Exception, e: + logger.error("Problem parsing cut of plot {0} : {1}".format(plot.name, str(e))) + try: + identifiers.update(plot.weight.leafDeps) + except Exception, e: + logger.error("Problem parsing weight of plot {0} : {1}".format(plot.name, str(e))) + for j,sVar in izip(count(), plot.variables): + try: + identifiers.update(sVar.leafDeps) + except Exception, e: + logger.error("Problem parsing sVar of plot {0} : {1}".format(plot.name, str(e))) + return identifiers + + +def getHistType(plot): + assert len(plot.variables) == len(plot.binnings) and len(plot.variables) == len(plot.axisTitles) + if len(plot.variables) == 1: + return "TH1F" + elif len(plot.variables) == 2: + return "TH2F" + elif len(plot.variables) == 3: + return "TH3F" + else: + raise Exception("Invalid number of dimensions") + +def getBinningDefinitions(plot, uname): + decls = [] + for idx,bn in izip(count(), plot.binnings): + if isinstance(bn, EquidistantBinning): + pass + elif isinstance(bn, VariableBinning): + arrNm = "{name}_{idx}".format(name=uname, idx=idx) + decls.append(" double {name}[] = {{ {0} }};".format(", ".join("{0:e}".format(iEdg) for iEdg in bn.binEdges), name=arrNm)) + else: + raise Exception("Unsupported binning: {0!r}".format(bn)) + return decls + +def getBinningUses(plot, uname): + uses = [] + for idx,bn in izip(count(), plot.binnings): + if isinstance(bn, EquidistantBinning): + uses.append("{0:d}, {1:e}, {2:e}".format(bn.N, bn.mn, bn.mx)) + elif isinstance(bn, VariableBinning): + arrNm = "{name}_{idx}".format(name=uname, idx=idx) + uses.append("{0:d}, &({name}[0])".format(len(bn.binEdges)-1, name=arrNm)) + else: + raise Exception("Unsupported binning: {0!r}".format(bn)) + return ", ".join(uses) + +def getOtherHistSettings(plot, uname): + outLines = [] + ## set bin labels if necessary + ## x-axis + if len(plot.axisBinLabels) > 0 and plot.axisBinLabels[0] is not None: + axisBinLabels = plot.axisBinLabels[0] + if len(axisBinLabels) == plot.binnings[0].N: + outLines.append(" {0}".format(" ".join("{hn}->GetXaxis()->SetBinLabel({idx:d}, \"{iLabel}\");".format(hn=uname, idx=i, iLabel=lbl) for i,lbl in izip(count(1), axisBinLabels)))) + else: + logger.error("Not the same number of x bins ({0:d}) and labels ({1:d}) -> not setting them".format(plot.binnings[0].N, len(axisBinLabels))) + ## y-axis + if len(plot.axisBinLabels) > 1 and plot.axisBinLabels[1] is not None: + axisBinLabels = plot.axisBinLabels[1] + if len(axisBinLabels) == plot.binnings[1].N: + outLines.append(" {0}".format(" ".join("{hn}->GetYaxis()->SetBinLabel({idx:d}, \"{iLabel}\");".format(hn=uname, idx=i, iLabel=lbl) for i,lbl in izip(count(1), axisBinLabels)))) + else: + logger.error("Not the same number of y bins ({0:d}) and labels ({1:d}) -> not setting them".format(plot.binnings[1].N, len(axisBinLabels))) + ## z-axis + if len(plot.axisBinLabels) > 2 and plot.axisBinLabels[2] is not None: + axisBinLabels = plot.axisBinLabels[2] + if len(axisBinLabels) == plot.binnings[2].N: + outLines.append(" {0}".format(" ".join("{hn}->GetZaxis()->SetBinLabel({idx:d}, \"{iLabel}\");".format(hn=uname, idx=i, iLabel=lbl) for i,lbl in izip(count(1), axisBinLabels)))) + else: + logger.error("Not the same number of z bins ({0:d}) and labels ({1:d}) -> not setting them".format(plot.binnings[2].N, len(axisBinLabels))) + # + return outLines + +def createPlotsInnerLoop_ref(plots, uhNames): + """ + Original (reference) implementation: evaluate selection, weight and value for each and every histogram + """ + return "\n".join( + expandTemplate(getTemplate("Plot") + , CUT=plot.cut.get_TTreeDrawStr() + , WEIGHT=plot.weight.get_TTreeDrawStr() + , VAR=", ".join(v.get_TTreeDrawStr() for v in plot.variables) + , HIST=uhNames[plot] + ) for plot in plots + ) + +def greedyGroupBy(inputs, keyFunc): + """ + Greedy version of groupby + """ + items = list(inputs) + while len(items) > 0: + idxs = set() + val = keyFunc(items[0]) + idxs.add(0) + thisGroup = [ items[0] ] + for i,it in izip(count(1), items[1:]): + if keyFunc(it) == val: + idxs.add(i) + thisGroup.append(it) + yield val, thisGroup + nBefore = len(items) + items = [ it for i,it in izip(count(),items) if i not in idxs ] + assert len(items)+len(thisGroup) == nBefore + +def createPlotsInnerLoop_opt1(plots, uhNames, indent=2, alreadyReq=tuple()): + """ + More optimised plotting loop - take 1: group by cut and weight + """ + from .treedecorators import makeExprAnd + return "\n".join((indent*" ")+ln if len(ln) > 0 else ln for ln in chain.from_iterable( + [ "__cut = ({0});".format(makeExprAnd([ct for ct in selGrpPlots[0].selection.cuts if ct not in alreadyReq]).get_TTreeDrawStr()) + , "if (__cut) {" + ] + list(" {0}".format(ln) if len(ln) > 0 else ln for ln in chain.from_iterable( + [ "__weight = ({0});".format(weight.get_TTreeDrawStr()) + ] + [ "fill({HIST}.get(), {VAR}, __weight);".format(HIST=uhNames[plot], VAR=", ".join(v.get_TTreeDrawStr() for v in plot.variables)) + for plot in wgtGrpPlots + ] for weight,wgtGrpPlots in greedyGroupBy(selGrpPlots, lambda ip : ip.weight) )) + + [ "}" + , "" + ] for ct,selGrpPlots in greedyGroupBy(plots, lambda ip : ip.cut) + )) + +from .treedecorators import allArgsOfOp, opDependsOn + +def plotDependsOn(pt, other): + return any(opDependsOn(ct, other) for ct in pt.selection.cuts) or any(opDependsOn(wt, other) for wt in pt.selection.weights) or any(opDependsOn(var, other) for var in pt.variables) + +def iunique(otherGen): + """ Remove duplicates from another generator + """ + alreadyReturned = set() + for elm in otherGen: + if elm not in alreadyReturned: + yield elm + alreadyReturned.add(elm) + +def collectReduces(listOfPlots): + from .treedecorators import TupleOp, Reduce, Next + import uuid + def _addReduces(depDict, op, abovePlotOrReduce): + ## the one to be used for recursion + if isinstance(op, TupleOp): + top = abovePlotOrReduce + if ( isinstance(op, Reduce) or isinstance(op, Next) ): + if op not in depDict: + depDict[op] = ("r_{0}".format(str(uuid.uuid4()).replace("-", "_")), set()) + assert op.uname is None + uname, deps = depDict[op] + op.uname = uname + deps.add(top) + top = op ## build a dependency tree + for ia in op.args: + _addReduces(depDict, ia, top) + else: + raise Exception("_addReduces called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + depDict = dict() + for plot in listOfPlots: + for var in chain((plot.cut, plot.weight), plot.variables): + _addReduces(depDict, var, plot) + return depDict ## { op : (uname, depPlotsAndOps) } ## only for reduce opts + +def removeReduceUnames(listOfPlots): + # mirror of the above: clean up afterwards + from .treedecorators import TupleOp, Reduce, Next + def _clearReduces(op): + ## recurse + if isinstance(op, TupleOp): + if ( isinstance(op, Reduce) or isinstance(op, Next) ): + op.uname = None + for ia in op.args: + _clearReduces(ia) + else: + raise Exception("_clearReduces called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + for plot in listOfPlots: + for var in chain((plot.cut, plot.weight), plot.variables): + _clearReduces(var) + +def collectPrecalc(listOfPlots): + from .treedecorators import TupleOp + def _addPrecalc(depDict, op, abovePlotOrPrecalc): + ## the one to be used for recursion + if isinstance(op, TupleOp): + top = abovePlotOrPrecalc + if op.uname is not None: + if op not in depDict: + depDict[op] = (op.uname, set()) + uname, deps = depDict[op] + op.uname = uname ## keep to make sure they get renamed to the same + deps.add(top) + top = op ## build a dependency tree + for ia in op.args: + _addPrecalc(depDict, ia, top) + else: + raise Exception("_addPrecalc called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + depDict = dict() + for plot in listOfPlots: + for var in chain((plot.cut, plot.weight), plot.variables): + _addPrecalc(depDict, var, plot) + return depDict ## { op : (uname, depPlotsAndOps) } ## only for precalc ops + + +def commonConditionForAll(somePlots): + """ find common condition for all plots in the list (allows to move selection before reduce) + """ + inCommon = [] + notInAll = set() ## caching to avoid too much double work + for ap in somePlots: + for cdc in ap.selection.cuts: + if cdc not in inCommon and cdc not in notInAll: + if all( cdc in op.selection.cuts for op in somePlots ): + inCommon.append(cdc) + else: + notInAll.add(cdc) + return inCommon + +def createPlotsInnerLoop_opt2(plots, uhNames, indent=2): + """ + Second optimisation round: pre-calculate "expensive" reduce operations + """ + from .plots import Plot + reducesWithAllDeps = collectReduces(plots) + ## 1: not to be forgotten: plots that don't depend on these (keep this simple for now) + plotLines_noDeps = createPlotsInnerLoop_opt1([ p for p in plots if all( p not in depList for uN,depList in reducesWithAllDeps.itervalues() ) ], uhNames, indent=indent) + + ## make minimal (real reduce+plot-dependency tree) + reducesWithMinDeps = dict((redOp, (uname, + [ di for di in deps if not any( ( plotDependsOn(di, do) if isinstance(di, Plot) else opDependsOn(di, do) ) for do in deps if ( not isinstance(do, Plot) ) and ( do != di ) ) ])) + ## if you depend on another dependee, you need to be further downstream + ## top-down traversal strategy: always take the next key that is not in any list of dependees (shallow copy then pop) + for redOp,(uname, deps) in reducesWithAllDeps.iteritems()) + + ## temporary solution: put in the right order (leave out-of-bounds to selection) + def getLinesForRedsAndPlotsDepTreeTopDown(myMinDepDict, indent=0, alreadyReq=tuple()): + ##print (indent*" ")+"getLinesForRedsAndPlotsDepTreeTopDown called with {0:d} already required selections".format(len(alreadyReq)) + ##print (indent*" ")+"Treating plots: ", ", ".join(p.name for p in set(di for r,(u,ds) in myMinDepDict.iteritems() for di in ds if isinstance(di, Plot))) + ## + while len(myMinDepDict) > 0: + redsToTreat = sorted([ k for k,v in myMinDepDict.iteritems() if not any(k in od for ou,od in myMinDepDict.itervalues()) ], key=(lambda k : len(myMinDepDict[k][1])), reverse=True) + if len(redsToTreat) == 0: + print "ERROR: also the operators have multiple sibling dependencies -> not handled yet"; break + ##print (indent*" ")+"--> Adding reduces in this loop:" + ##for redOp in redsToTreat: + ## print (indent*" ")+" - ", redOp._get_TTreeDrawStr() + + for redOp in redsToTreat: + if redOp in myMinDepDict: + uName,deps = myMinDepDict[redOp] + # + commonCuts = [ ct for ct in commonConditionForAll(list(di for di in reducesWithAllDeps[redOp][1] if isinstance(di, Plot))) ] + commonCuts_apply = [ ct for ct in commonCuts if ct not in alreadyReq ] + commonCuts_applyBefore, commonCuts_applyAfter = [], [] + for ct in commonCuts_apply: + if opDependsOn(ct, redOp): + commonCuts_applyAfter.append(ct) + else: + commonCuts_applyBefore.append(ct) + # + if len(commonCuts_applyBefore) > 0: + from .treedecorators import makeExprAnd + yield (indent*" ")+"if ( {0} ) {{".format(makeExprAnd(commonCuts_applyBefore).get_TTreeDrawStr()) + else: + yield (indent*" ")+"{" + # + yield ((indent+1)*" ")+"const auto {0} = {1};".format(uName, redOp._get_TTreeDrawStr()) + # + if len(commonCuts_applyAfter) > 0: + from .treedecorators import makeExprAnd + yield ((indent+1)*" ")+"if ( {0} ) {{".format(makeExprAnd(commonCuts_applyAfter).get_TTreeDrawStr()) + else: + yield ((indent+1)*" ")+"{" + # + + req_uptohere = list(alreadyReq)+commonCuts_apply + ###print "Making block - already applied: ", req_uptohere + # + ### find those that depend on (not interdependent) reduce ops - this kind of reordering is not handled yet + otherDeps = set() + for di in deps: + if isinstance(di, Plot): + for oro,(ouN,odeps) in myMinDepDict.iteritems(): + if oro != redOp and di in odeps: + otherDeps.add(oro) + odeps.remove(di) + for oro in otherDeps: + (ouN,odeps) = myMinDepDict[oro] + yield ((indent+2)*" ")+"const auto {0} = {1};".format(ouN, oro._get_TTreeDrawStr()) + ## clean up: if we got all the dependencies, it's time to clean up + if len(odeps) == 0: + del myMinDepDict[oro] + ### + ## NOTE the bookkeeping of this part is actually done above + ##print (indent*" ")+"Passing plots to createPlotsInnerloop_opt1: ", ", ".join(di.name for di in deps if isinstance(di, Plot)) + for ln in createPlotsInnerLoop_opt1([ di for di in deps if isinstance(di, Plot) ], uhNames, indent=indent+2, alreadyReq=req_uptohere).split("\n"): + yield ln + ## can forget about dependant reduces (at this stage) they won't appear any more after this and can be handled next + ## more interesting case: C depends on A and B, D only on A, E only on B - how will they be ordered if you do everything nicely in blocks? + ## + ###del myMinDepDict[redOp] ## keep simple like this for now (other option: pass context to the same algo for the next layer) + # RECURSE + sub_depDict = dict((k,v) for k,v in myMinDepDict.iteritems() if k in reducesWithAllDeps[redOp][1]) ## -> straight down to the next level + cp_depDict = dict(sub_depDict) + for ln in getLinesForRedsAndPlotsDepTreeTopDown(cp_depDict, indent=indent+2, alreadyReq=req_uptohere): + yield ln + # + yield ((indent+1)*" ")+"}" ## inner block + # + yield (indent*" ")+"}" ## outer block + # + for k in sub_depDict: ## remove handled deps + if k not in cp_depDict: + del myMinDepDict[k] + # + for redOp in redsToTreat: + if redOp in myMinDepDict: + del myMinDepDict[redOp] + + ## 2. handle these as well + plotLines_redsWithPlots = list(getLinesForRedsAndPlotsDepTreeTopDown(dict(reducesWithMinDeps), indent=2)) + #import IPython;IPython.embed() + + ## clean up + removeReduceUnames(plots) + + return "\n".join((plotLines_noDeps, "\n".join(plotLines_redsWithPlots))) + +def createPlotsInnerLoop_opt2v2(plots, uhNames, indent=2): + """ + Second attempt: keep track of things in a bit more organised way + + Build up a tree of Plots and non-terminal nodes (selections and reduces) + """ + from .hist_opt_2v2 import RootNode, SelectionNode, PrecalcNode, PlotNode, printTree, NodeTreeNeedsOpCache + + reducesWithAllDeps = collectPrecalc(plots) + reducesWithAllDeps.update(collectReduces(plots)) # { op : (uname, depPlotsAndOps) } ## TODO to be changed - but without the unames the rest is unreadable + + def nCommonCuts_cons(listA, listB): + for i,iA,iB in izip(count(), listA, listB): + if iA != iB: + break + return i+(1 if iA == iB else 0) + def commonCuts_cons(listA, listB): + result = [] + for iA,iB in izip(listA, listB): + if iA != iB: + break + else: + result.append(iA) + return result + + logger.info("Ordering selections and plots") + from itertools import imap + rootNode = RootNode() + ## start with selections + for ap in plots: + #print "\n\nAdding plot {0} with selection of {1:d} cuts\n".format(ap.name, len(ap.selection.cuts)) + prevSelNode = rootNode + prevNCommon = 0 + selNode = rootNode + toSearch = rootNode.depNodes + #while sum(1 for sn in toSearch if isinstance(sn, SelectionNode)) > 0: + while True: + prevSelNode = selNode + if len(toSearch) > 0 and any(isinstance(nd, SelectionNode) for nd in toSearch): + selNode,nCommon = max(imap(lambda nd : ( (nd,nCommonCuts_cons(ap.selection.cuts, nd.cuts)) if isinstance(nd, SelectionNode) else (nd,0) ), toSearch), key=lambda (nd,n) : n) + else: + selNode,nCommon = None,0 + if selNode and nCommon == len(selNode.cuts): + if nCommon == len(ap.selection.cuts): + #print "Found fully matching selection node" + break + else: + if any(isinstance(sn, SelectionNode) for sn in toSearch): + #print "Found fully required selection node, going deeper..." + toSearch = selNode.depNodes ## next iteration + prevNCommon = nCommon + else: + #print "Found fully required selection node without selections, adding here" + newSelNode = SelectionNode(cuts=ap.selection.cuts, parent=selNode) + selNode.addDependentNode(newSelNode) + selNode = newSelNode + break + elif nCommon > prevNCommon: + #print "Not fully matching, but common part ({0:d} - versus {1:d} in our selection and {2:d} in best match) -> splitting".format(nCommon, len(ap.selection.cuts), len(selNode.cuts)) + newSel = SelectionNode(cuts=( ap.selection.cuts if nCommon == len(ap.selection.cuts) else commonCuts_cons(ap.selection.cuts, selNode.cuts)), parent=prevSelNode) + prevSelNode.addDependentNode(newSel) + prevSelNode.removeDependentNode(selNode) + selNode.parent = newSel + newSel.addDependentNode(selNode) + if nCommon == len(ap.selection.cuts): + #print "Adding plot to split node" + selNode = newSel + else: + #print "Adding one with our selection" + selNode = SelectionNode(cuts=ap.selection.cuts, parent=newSel) + newSel.addDependentNode(selNode) + break + else: + #print "Nothing more in common, adding one level higher" + if len(ap.selection.cuts) > prevNCommon: + #print "With selection" + selNode = SelectionNode(cuts=ap.selection.cuts, parent=prevSelNode) + prevSelNode.addDependentNode(selNode) + else: + #print "Without selection" + selNode = prevSelNode + break + selNode.addDependentNode(PlotNode(ap, parent=selNode)) + + # print "\n\nTree after adding all plot:" + # ## how does our tree look at this point? + # for ln in printTree(rootNode): + # print ln + + logger.info("Inserting pre-calculated variables") + ## make minimal (real reduce+plot-dependency tree) + reducesWithMinDeps = dict((redOp, (uname, + [ di for di in deps if not any( ( plotDependsOn(di, do) if isinstance(di, Plot) else opDependsOn(di, do) ) for do in deps if ( not isinstance(do, Plot) ) and ( do != di ) ) ])) + for redOp,(uname, deps) in reducesWithAllDeps.iteritems()) + + ## collect weights + import uuid + uwNames = dict() + for ap in plots: + wOp = ap.selection.weight + if wOp not in uwNames: + uname = "w_{0}".format(str(uuid.uuid4()).replace("-", "_")) + wOp.uname = uname + uwNames[wOp] = (uname, [ ap ]) + else: + uwNames[wOp][1].append(ap) + + reducesWithAllDeps.update(uwNames) ## yes that's a bit of a workaround... + reducesWithMinDeps.update(uwNames) + + logger.debug("Collected pre-calc expressions to insert, found {0:d}".format(len(reducesWithMinDeps))) + + while len(reducesWithMinDeps) > 0: + redsToTreat = sorted([ k for k,v in reducesWithMinDeps.iteritems() if not any(k in od for ou,od in reducesWithMinDeps.itervalues()) ], key=(lambda k : len(reducesWithMinDeps[k][1])), reverse=True) + logger.debug("Reduces to treat in this round: {0:d}".format(len(redsToTreat))) + for redOp in redsToTreat: + #nodeTreeNeedsOp = ( lambda op : ( lambda nd : nd.treeNeedsOp(redOp) ) )(redOp) + nodeTreeNeedsOp = NodeTreeNeedsOpCache(redOp) + logger.debug("Inserting reduce {0}".format(redOp._get_TTreeDrawStr())) + ## FIXME following line isn't used? + redCuts = [ ct for ct in commonConditionForAll(list(di for di in reducesWithAllDeps[redOp][1] if isinstance(di, Plot))) ] + cand = rootNode + while True: + if ( isinstance(cand, PrecalcNode) and opDependsOn(cand.op, redOp) ) or ( isinstance(cand, SelectionNode) and any( opDependsOn(ci, redOp) for ci in cand.cuts ) ): + ## TODO split the selection node if only some depend - will need that when treating precalc deps as well + logger.debug("Candidate is precalc or selection node that depends on us -> insert above") + myNode = PrecalcNode(redOp, parent=cand.parent) + cand.parent.addDependentNode(myNode) + cand.parent.removeDependentNode(cand) + cand.parent = myNode + myNode.addDependentNode(cand) + break + elif any(isinstance(nd, PlotNode) and nodeTreeNeedsOp(nd) for nd in cand.depNodes) or sum(1 for nd in cand.depNodes if nodeTreeNeedsOp(nd)) > 1: + ## TODO we should also check that our validity requirements are satisfied here, otherwise should go further + logger.debug("Candidate has a plot or several dependencies that depend on us -> insert below") + myNode = PrecalcNode(redOp, parent=cand) + for nd in cand.depNodes: + if nodeTreeNeedsOp(nd): + myNode.addDependentNode(nd) + nd.parent = myNode + for nd in myNode.depNodes: + cand.removeDependentNode(nd) + cand.addDependentNode(myNode) + break + else: ## here is where things get interesting + if not any( nodeTreeNeedsOp(nd) for nd in cand.depNodes ): + print "Nothing depends on this operation... that's most likely wrong" + break ## this is a bit problematic + else: + cand = next(nd for nd in cand.depNodes if nodeTreeNeedsOp(nd)) + logger.debug("Found one node that depends: {0}".format(cand.ownDbgStr())) + for trtd in redsToTreat: + del reducesWithMinDeps[trtd] + + # print "\n\nTree after adding all precalculated variables:" + # ## how does our tree look at this point? + # for ln in printTree(rootNode): + # print ln + + from .treedecorators import makeExprAnd + indentStr = " " ## 4 spaces + def cppLines(someNode, indent=0): + dindent = indent + closeBlock = False + if isinstance(someNode, SelectionNode): + yield (indent*indentStr)+"if ( {0} ) {{".format(makeExprAnd(someNode.ownCuts).get_TTreeDrawStr()) + dindent = indent+1 + closeBlock = True + elif isinstance(someNode, PrecalcNode): + yield (indent*indentStr)+"const auto {0} = {1};".format(someNode.op.uname, someNode.op._get_TTreeDrawStr()) + elif isinstance(someNode, PlotNode): + ## TODO check that all cuts have been applied here + yield (indent*indentStr)+"fill({HIST}.get(), {VAR}, {WEIGHT});".format( + HIST=uhNames[someNode.plot] + , VAR=", ".join(v.get_TTreeDrawStr() for v in someNode.plot.variables) + , WEIGHT=uwNames[someNode.plot.selection.weight][0] + ) + if not someNode.isTerminal: + ## first terminal nodes + for dn in someNode.depNodes: + if dn.isTerminal: + for ln in cppLines(dn, indent=dindent): + yield ln + ## then dependency tree of the others + for dn in someNode.depNodes: + if not dn.isTerminal: + for ln in cppLines(dn, indent=dindent): + yield ln + if closeBlock: + yield (indent*indentStr)+"}" + + logger.info("Generating output") + ret = "\n".join(ln for ln in cppLines(rootNode, indent=2)) + + ## clean up unames + removeReduceUnames(plots) + for wOp in uwNames.iterkeys(): + wOp.uname = None + + return ret + +################################################################################ +## MAIN METHOD ## +################################################################################ + +import os, os.path + +def createPlotter(plots, skeleton, outdir=None, **kwargs): + """ + Turned the config into the main script + + this is only a helper method that deals with creating a C++ plotter from it + """ + if not outdir: + outdir = os.getcwd() + logger.warning("No output directory set, writing plotter code to current directory {0}".format(outdir)) + else: + logger.info("Writing plotter code to output directory {0}".format(outdir)) + + ## common forced helpers + add_packages = list(kwargs.get("add_packages", [])) + include_dirs = list(kwargs.get("include_dirs", [])) + includes = list(kwargs.get("includes", [])) + sources = list(kwargs.get("sources", [])) + linked_libraries = list(kwargs.get("libraries", [])) + add_properties = [] + + # add some defaults + from .factoryhelpers import COMMON_INCLUDE_DIR + include_dirs.append(os.path.join(COMMON_INCLUDE_DIR)) + includes.append("kinematics.h") + + if kwargs.get("addScaleFactorsLib", False): + from . import pathCommonTools + pathCP3llbb = os.path.dirname(os.path.abspath(pathCommonTools)) + linked_libraries.append(os.path.join(pathCP3llbb, "Framework", "libBinnedValues.so")) + include_dirs.append(os.path.join(pathCP3llbb, "Framework", "interface")) + add_properties.append('set_target_properties(plotter PROPERTIES COMPILE_FLAGS "-DSTANDALONE_SCALEFACTORS")') + + logger.info("List of requested plots: {0}".format(", ".join(plot.name for plot in plots))) + logger.info("List of requested include files: {0}".format(", ".join(inc for inc in includes))) + logger.info("List of requested source files: {0}".format(", ".join(src for src in sources))) + + ### add systematic variations histos + allPlots = list(plots)+list(chain.from_iterable( ( mplt.clone(name="__".join((mplt.name, varNm)), selection=varSel) for varNm,varSel in mplt.selection.systVars.iteritems() ) for mplt in plots )) + + import uuid + unames = dict((plot, "p_{0}".format(str(uuid.uuid4()).replace("-", "_"))) for plot in allPlots) + + expandTemplateAndWrite(os.path.join(outdir, "Plotter.h"), getTemplate("Plotter.h") + , BRANCHES="\n ".join( + 'const {typ}& {name} = tree["{name}"].read<{typ}>();'.format(typ=getBranchType(brName, skeleton._skeleton), name=brName) + for brName in collectIdentifiersFromPlots(allPlots)) + ) + + expandTemplateAndWrite(os.path.join(outdir, "Plotter.cc"), getTemplate("Plotter.cc") + , INCLUDES="\n".join('#include "{0}"'.format(incl) for incl in includes) + , BEFORE_LOOP="\n".join(chain( + ## declare histograms + chain.from_iterable(chain( + getBinningDefinitions(plot, unames[plot]) + , [ ' std::unique_ptr<{typ}> {uname}(new {typ}("{uname}", "{title}", {binning})); {uname}->SetDirectory(nullptr);'.format( + typ=getHistType(plot), uname=unames[plot], title=plot.longTitle, binning=getBinningUses(plot, unames[plot])) ] + , getOtherHistSettings(plot, unames[plot]) + ) for plot in allPlots) + , (" {0}".format(ln) for ln in list(kwargs.get("user_initialisation", []))) + )) + , IN_LOOP=createPlotsInnerLoop_opt2v2(allPlots, unames) + , AFTER_LOOP="\n".join( ## save plots + expandTemplate(getTemplate("SavePlot") + , UNIQUE_NAME=unames[plot] + , PLOT_NAME=plot.name + ) for plot in allPlots + ) + ) + + expandTemplateAndWrite(os.path.join(outdir, "CMakeLists.txt"), getTemplate("CMakeLists_plotter.txt") + , ADD_FIND="\n".join("find_package({0})".format(fnd) for fnd in add_packages) + , ADD_INCLUDE_DIRS=" ".join(include_dirs) + , ADD_SOURCES=" ".join(sources) + , ADD_PROPERTIES="\n".join(add_properties) + , ADD_LINK="\n".join("target_link_libraries(plotter {0})".format(lib) for lib in linked_libraries) + ) + +def prepareWorkdir(workdir): ## TODO rename to preparePlotsWorkdir + if os.path.exists(workdir): + raise Exception("Work directory {0} exists already!".format(workdir)) + os.mkdir(workdir) + plotterdir = os.path.join(workdir, "plotter") + histosdir = os.path.join(workdir, "histos") + plotsdir = os.path.join(workdir, "plots") + for aDir in (plotterdir, histosdir, plotsdir): + os.mkdir(aDir) + return plotterdir, histosdir, plotsdir + +################################################################################ +## HELPER: COMPILE PLOTTER DIRECTORY ## +################################################################################ + +import subprocess + +def compilePlotter(plotterdir): + """ + Copy the necessary files and compile the plotter + """ + try: + from .import pathCommonTools + subprocess.check_output(["cp", "-r", os.path.join(pathCommonTools, "cmake"), plotterdir], stderr=subprocess.STDOUT) + subprocess.check_output(["cp", os.path.join(pathCommonTools, "templates", "generateHeader.sh"), plotterdir], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(e.cmd, e.returncode, e.output)) + raise e + + with insideOtherDirectory(plotterdir): + logger.info("Compiling plotter in {0}".format(plotterdir)) + try: + logger.debug("Calling CMake") + subprocess.check_output(["cmake", "."], stderr=subprocess.STDOUT) + logger.debug("Calling make") + subprocess.check_output(["make"], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(e.cmd, e.returncode, e.output)) + raise e + + return os.path.join(plotterdir, "plotter.exe") + +def splitInChunks(theList, n): + from itertools import izip, islice + import math + N = len(theList) + chunkLength = int(math.ceil(1.*N/n)) + for iStart, iStop in izip(xrange(0, len(theList), chunkLength), xrange(chunkLength, len(theList)+chunkLength, chunkLength)): + yield islice(theList, iStart, min(iStop,N)) + +def _makeSplitTask(commonArgs, toSplitArgs, outDir=None, info=None): + splitSplitArgs = [ toSplitArgs ] ## no-splitting strategy + if info and "db_name" in info: + if info["db_name"].startswith("DYJetsToLL_M-10to50"): + splitSplitArgs = list(list(chunk) for chunk in splitInChunks(toSplitArgs, 4)) + elif info["db_name"].startswith("TT_Tune"): + splitSplitArgs = list(list(chunk) for chunk in splitInChunks(toSplitArgs, 3)) + cmds = [ " ".join(commonArgs+splitArgs) for splitArgs in splitSplitArgs ] + from .batchhelpers import SplitAggregationTask, HaddAction + return SplitAggregationTask(cmds, finalizeAction=HaddAction(cmds, outDir=outDir)) + +def _condorClusterFromTasks(taskList, workdir=None): + from . import pathCMS + from .condorhelpers import CommandListCondorJob + condorJob = CommandListCondorJob(list(chain.from_iterable(task.commandList for task in taskList)), + workDir=workdir, + envSetupLines=[ + "# Setup our CMS environment" + , "pushd {CMS_PATH}".format(CMS_PATH=pathCMS) + , "source /cvmfs/cms.cern.ch/cmsset_default.sh" + , "eval `scram runtime --sh`" + , "popd" + , 'export THEANO_FLAGS="base_compiledir=$(pwd)"' ## isolate compilation cache for each job + , "" + ]) + for task in taskList: + task.jobCluster = condorJob + return condorJob + +def _slurmArrayFromTasks(taskList, workdir=None): + from . import pathCMS + from .slurmhelpers import CommandListSlurmJob + cfg_opts = { + "environmentType" : "cms" + , "cmsswDir" : pathCMS + , "sbatch_time" : "0-02:00" + , "sbatch_mem" : "2048" + , "stageoutFiles" : ["*.root"] + } + slurmJob = CommandListSlurmJob(list(chain.from_iterable(task.commandList for task in taskList)), workDir=workdir, configOpts=cfg_opts) + for task in taskList: + task.jobCluster = slurmJob + return slurmJob + +def runPlotterOnSamples(plotterName, samples, outdir=None, suffix=None, useCluster=False, clusterworkdir=None, taskMon=None, verbose=False): + """ Run the plotter for a sample """ + import os.path + if useCluster: + if outdir: + if not ( os.path.exists(outdir) and os.path.isdir(outdir) ): + raise Exception("Output path {} is not a directory") + else: + import os + outdir = os.path.abspath(os.getcwd()) + taskList = [] + for sampName, sampDict in samples.iteritems(): + outFileName = ( "{0}.root".format(sampName) if suffix is None else "{0}{1}.root".format(sampName, suffix) ) + taskList.append(_makeSplitTask([plotterName, outFileName, sampDict["tree_name"], sampDict["sample_cut"]], sampDict["files"], outDir=outdir, info=sampDict)) + logger.info("{0:d} samples -> created {1:d} tasks with in total {2:d} commands".format(len(samples), len(taskList), sum(len(tsk.commandList) for tsk in taskList))) + + if False: + clusterJob = _condorClusterFromTasks(taskList, workdir=clusterworkdir) + else: + clusterJob = _slurmArrayFromTasks(taskList, workdir=clusterworkdir) + + clusterJob.submit() + if taskMon: + taskMon.add([clusterJob], taskList) + logger.info("Added slurm job to monitor") + else: + from .slurmhelpers import makeSlurmTasksMonitor as makeTasksMonitor + mon = makeTasksMonitor([clusterJob], taskList) + mon.collect() ## wait here + + else: ## run one after the other + for sampName, sampDict in samples.iteritems(): + outFileName = ( "{0}.root".format(sampName) if suffix is None else "{0}{1}.root".format(sampName, suffix) ) + if outdir: + outFileName = os.path.join(outdir, outFileName) + logger.info("Writing histograms for sample {0} to file {1}".format(sampName, outFileName)) + cmdTokens = [plotterName, outFileName, sampDict["tree_name"], sampDict["sample_cut"]]+sampDict["files"] + try: + allout = subprocess.check_output(cmdTokens, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(" ".join(e.cmd), e.returncode, e.output)) + raise e + if verbose: + logger.info("Output of {}:".format(" ".join(cmdTokens))) + print allout + logger.info("END of command output") diff --git a/python/slurmhelpers.py b/python/slurmhelpers.py new file mode 100644 index 0000000..8e8ab53 --- /dev/null +++ b/python/slurmhelpers.py @@ -0,0 +1,182 @@ +""" +Slurm tools (based on previous condorhelpers and cp3-llbb/CommonTools condorSubmitter and slurmSubmitter) +""" +__author__ = "Pieter David " +__date__ = "April 2017" + +import logging +logger = logging.getLogger(__name__) + +import os.path +import subprocess + +from .batchhelpers import CommandListJob + +SlurmJobStatus = ["PENDING", "RUNNING", "COMPLETED", "FAILED", "COMPLETING", "CONFIGURING", "CANCELLED", "BOOT_FAIL", "NODE_FAIL", "PREEMPTED", "RESIZING", "SUSPENDED", "TIMEOUT", "unknown"] + +from contextlib import contextmanager + +# take from contextlib when moving to python3 +@contextmanager +def redirect_stdout(whereto=None): + import sys + bk_out = sys.stdout + sys.stdout = whereto + yield + sys.stdout = bk_out + +class CommandListSlurmJob(CommandListJob): + """ + Helper class to create a slurm job array from a list of commands (each becoming a task in the array) + + Default work directory will be $(pwd)/slurm_work, default output pattern is "*.root" + """ + default_cfg_opts = { + "environmentType" : "cms" + , "sbatch_time" : "0-04:00" + , "sbatch_mem" : "2048" + , "stageoutFiles" : ["*.root"] + } + + def __init__(self, commandList, workDir=None, configOpts=None): + super(CommandListSlurmJob, self).__init__(commandList, workDir=workDir, workdir_default_pattern="slurm_work") + ## + from CP3SlurmUtils.Configuration import Configuration + self.cfg = Configuration() + ## apply user-specified + cfg_opts = dict(CommandListSlurmJob.default_cfg_opts) + if configOpts: + cfg_opts.update(configOpts) + for k, v in cfg_opts.iteritems(): + setattr(self.cfg, k, v) + + ## check working and output directory + self.cfg.sbatch_workdir = self.workDir + self.cfg.inputSandboxDir = self.workDirs["in"] + self.cfg.batchScriptsDir = self.workDir + self.cfg.batchScriptsFilename = "slurmSubmission.sh" + self.cfg.stageoutDir = os.path.join(self.workDirs["out"], "${SLURM_ARRAY_TASK_ID}") + self.cfg.stageoutLogsDir = self.workDirs["log"] + self.cfg.useJobArray = True + self.cfg.inputParamsNames = ["taskcmd"] + self.cfg.inputParams = list([cmd] for cmd in self.commandList) + self.cfg.payload = ("${taskcmd}") + + self.slurmScript = os.path.join(self.cfg.batchScriptsDir, self.cfg.batchScriptsFilename) + self.clusterId = None ## will be set by submit + self._finishedTasks = dict() + self._statuses = ["PENDING" for cmd in self.commandList] + + ## output + try: + from CP3SlurmUtils.SubmitWorker import SubmitWorker as slurmSubmitWorker + except ImportError as ex: + logger.info("Could not import slurmSubmitWorker from CP3SlurmUtils.SubmitUtils. Please run 'module load slurm/slurm_utils'") + try: + slurm_submit = slurmSubmitWorker(self.cfg, submit=False, debug=False, quiet=False) + except Exception as ex: + logger.error("Problem constructing slurm submit worker from CP3SlurmUtils: {}".format(str(ex))) + raise ex + else: + from StringIO import StringIO ## python3: from io import StringIO + workerout = StringIO() + submitLoggerFun = logger.debug + try: + with redirect_stdout(workerout): + slurm_submit() + except Exception as ex: + submitLoggerFun = logger.info + raise RuntimeError("Problem in slurm_submit: {}".format(str())) + finally: + submitLoggerFun("========== BEGIN slurm_submit output ==========") + for ln in workerout.getvalue().split("\n"): + submitLoggerFun(ln) + submitLoggerFun("========== END slurm_submit output ==========") + + def _commandOutDir(self, command): + """ Output directory for a given command """ + return os.path.join(self.workDirs["out"], str(self.commandList.index(command)+1)) + + def commandOutFiles(self, command): + """ Output files for a given command """ + import fnmatch + cmdOutDir = self._commandOutDir(command) + return list( os.path.join(cmdOutDir, fn) for fn in os.listdir(cmdOutDir) + if any( fnmatch.fnmatch(fn, pat) for pat in self.cfg.stageoutFiles) ) + + def submit(self): + """ Submit the job to slurm """ + logger.info("Submitting an array of {0:d} jobs to slurm".format(len(self.commandList))) + result = subprocess.check_output(["sbatch" + , "--partition={}".format(self.cfg.sbatch_partition) + , "--qos={}".format(self.cfg.sbatch_qos) + , "--wckey=cms" + , self.slurmScript]) + + self.clusterId = next(tok for tok in reversed(result.split()) if tok.isdigit()) + + ## save to file in case + with open(os.path.join(self.workDirs["in"], "cluster_id"), "w") as f: + f.write(self.clusterId) + + logger.info("Submitted, job ID is {}".format(self.clusterId)) + + def statuses(self, update=True): + """ list of subjob statuses (numeric, using indices in SlurmJobStatus) """ + if update: + self.updateStatuses() + return [ SlurmJobStatus.index(sjst) for sjst in self._statuses ] + + @property + def status(self): + if all(st == self._statuses[0] for st in self._statuses): + return self._statuses[0] + elif any(st == "RUNNING" for st in self._statuses): + return "RUNNING" + elif any(st == "CANCELLED" for st in self._statuses): + return "CANCELLED" + else: + return "unknown" + + def subjobStatus(self, i): + return self._statuses[i-1] + + def updateStatuses(self): + for i in xrange(len(self.commandList)): + subjobId = "{0}_{1:d}".format(self.clusterId, i+1) + status = "unknown" + if subjobId in self._finishedTasks: + status = self._finishedTasks[subjobId] + else: + sacctCmdArgs = ["sacct", "-n", "--format", "State", "-j", subjobId] + ret = subprocess.check_output(sacctCmdArgs).strip() + if "\n" in ret: ## if finished + if len(ret.split("\n")) == 2: + ret = subprocess.check_output(["sacct", "-n", "--format", "State", "-j", "{}.batch".format(subjobId)]).strip() + self._finishedTasks[subjobId] = ret + status = ret + else: + raise AssertionError("More than two lines in sacct... there's something wrong") + else: + if len(ret) != 0: + status = ret + else: + squeueCmdArgs = ["squeue", "-h", "-O", "state", "-j", subjobId] + ret = subprocess.check_output(squeueCmdArgs).strip() + if len(ret) != 0: + status = ret + else: # fall back to previous status (probably PENDING or RUNNING) + status = self._statuses[i] + self._statuses[i] = status + + def commandStatus(self, command): + return self.subjobStatus(self.commandList.index(command)+1) + +def makeSlurmTasksMonitor(jobs=[], tasks=[], interval=120): + """ make a TasksMonitor for slurm jobs """ + from .batchhelpers import TasksMonitor + return TasksMonitor(jobs=jobs, tasks=tasks, interval=interval + , allStatuses=SlurmJobStatus + , activeStatuses=[SlurmJobStatus.index(stNm) for stNm in ("CONFIGURING", "COMPLETING", "PENDING", "RUNNING", "RESIZING", "SUSPENDED")] + , completedStatus=SlurmJobStatus.index("COMPLETED") + ) diff --git a/python/treefactory.py b/python/treefactory.py new file mode 100644 index 0000000..cca9794 --- /dev/null +++ b/python/treefactory.py @@ -0,0 +1,384 @@ +""" +Generate C++ to fill a skimmed tree, with helpers to run it +""" +__all__ = ("createSkimmer", "compileSkimmer", "runSkimmerOnSamples") + +import logging +logger = logging.getLogger(__name__) + +from itertools import chain + +from .factoryhelpers import getTemplate, expandTemplate, expandTemplateAndWrite, getBranchType, insideOtherDirectory + +def collectIdentifiersForSkimmer(selection, branchVars): + identifiers = set() + from .treedecorators import adaptArg + try: + identifiers.update(adaptArg(selection).leafDeps) + except Exception, e: + logger.error("Problem parsing selection for skimmer: {0}".format(str(e))) + for brNm,brVar in branchVars.iteritems(): + try: + identifiers.update(adaptArg(brVar).leafDeps) + except Exception, e: + logger.error("Problem parsing branch {0} : {1}".format(brNm, str(e))) + return identifiers + +def collectReducesFromCuts(cutsList, varList): + from .treedecorators import TupleOp, Reduce, Next + import uuid + def _addReduces(depDict, op, aboveCut): + if isinstance(op, TupleOp): + top = aboveCut + if ( isinstance(op, Reduce) or isinstance(op, Next) ): + if op not in depDict: + depDict[op] = ("r_{0}".format(str(uuid.uuid4()).replace("-", "_")), set()) + assert op.uname is None + uname, deps = depDict[op] + op.uname = uname + deps.add(top) + top = op + for ia in op.args: + _addReduces(depDict, ia, top) + else: + raise Exception("_addReduces called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + depDict = dict() + for ct in cutsList: + _addReduces(depDict, ct, ct) + for v in varList: + _addReduces(depDict, v, v) + return depDict ## { op : (uname, depCutsAndOps) } + +def clearReducesFromCuts(cutsList, varList): + from .treedecorators import TupleOp, Reduce, Next + import uuid + def _clearReduces(op): + if isinstance(op, TupleOp): + if ( isinstance(op, Reduce) or isinstance(op, Next) ): + op.uname = None + for ia in op.args: + _clearReduces(ia) + else: + raise Exception("_clearReduces called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + for expr in chain(cutsList, varList): + _clearReduces(expr) + +def collectPrecalcFromCuts(cutsList, varList): + from .treedecorators import TupleOp + def _addPrecalc(depDict, op, aboveCut): + ## the one to be used for recursion + if isinstance(op, TupleOp): + top = aboveCut + if op.uname is not None: + if op not in depDict: + depDict[op] = (op.uname, set()) + uname, deps = depDict[op] + op.uname = uname ## keep to make sure they get renamed to the same + deps.add(top) + top = op ## build a dependency tree + for ia in op.args: + _addPrecalc(depDict, ia, top) + else: + raise Exception("_addPrecalc called on {0!r}, which is not a TupleOp but a {1}".format(op, op.__class__.__name__)) + + depDict = dict() + for ct in cutsList: + _addPrecalc(depDict, ct, ct) + for v in varList: + _addPrecalc(depDict, v, v) + return depDict ## { op : (uname, depPlotsAndOps) } ## only for precalc ops + +def createSkimmerInnerLoop_opt1(selCuts, branchesWithUnameList, fill_tree_lines=None): + """ + """ + ## NOTE this is a bit more limited scope than the histfactory: only need to arrange the cuts and reduces properly + from .treedecorators import adaptArg, opDependsOn, boolType + varsList = [ adaptArg(var) for nm,var,unm in branchesWithUnameList ] + reducesWithAllDeps = collectPrecalcFromCuts(selCuts, varsList) + reducesWithAllDeps.update(collectReducesFromCuts(selCuts, varsList)) + + def selDependsOnSel(s1, s2): + # does the first arg depend on the second? (i.e. is s2 a validity requirement for s1) + vd1 = set(adaptArg(d, typeHint=boolType) for ct in s1.cuts for d in ct.validDeps) + return any( c2 in vd1 for c2 in s2.cuts ) + def selDependsOnRed(s1, r2): + return any( opDependsOn(ct, r2.op) for ct in s1.cuts ) + def redDependsOnSel(r1, s2): + vd1 = set(adaptArg(d, typeHint=boolType) for d in r1.op.validDeps) + return any( c2 in vd1 for c2 in s2.cuts ) + def redDependsOnRed(r1, r2): + vd1 = set(adaptArg(d, typeHint=boolType) for d in r1.op.validDeps) + return opDependsOn(r1.op, r2.op) or any( opDependsOn(si, r2.op) for si in vd1 ) + def cut_reduce_comparator(o1, o2): + ## sel dep on red : via opDependsOn + ## red dep on sel : only if in validDeps + ## sel dep on sel : only if in validDeps of any of the reduce ops it depends on + ## red dep on dep + ## neg if o1 should come first + if isinstance(o1, SelectionNode): + if isinstance(o2, SelectionNode): + if selDependsOnSel(o2, o1): + #print "{0} depends on {1}".format(o2.ownDbgStr(), o1.ownDbgStr()) + return -1 + elif selDependsOnSel(o1, o2): + #print "{0} depends on {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 1 + else: + #print "no dependency between {0} and {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 0 + elif isinstance(o2, PrecalcNode): + if redDependsOnSel(o2, o1): + #print "{0} depends on {1}".format(o2.ownDbgStr(), o1.ownDbgStr()) + return -1 + elif selDependsOnRed(o1, o2): + #print "{0} depends on {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 1 + else: + #print "no dependency between {0} and {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 0 + else: + print "Unexpected type for second argument: ", type(o2) + elif isinstance(o1, PrecalcNode): + if isinstance(o2, SelectionNode): + if selDependsOnRed(o2, o1): + #print "{0} depends on {1}".format(o2.ownDbgStr(), o1.ownDbgStr()) + return -1 + elif redDependsOnSel(o1, o2): + #print "{0} depends on {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 1 + else: + return 0 + elif isinstance(o2, PrecalcNode): + if redDependsOnRed(o2, o1): + #print "{0} depends on {1}".format(o2.ownDbgStr(), o1.ownDbgStr()) + return -1 + elif redDependsOnRed(o1, o2): + #print "{0} depends on {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 1 + else: + #print "no dependency between {0} and {1}".format(o1.ownDbgStr(), o2.ownDbgStr()) + return 0 + else: + print "Unexpected type for second argument: ", type(o2) + elif isinstance(o1, RootNode): + return -1 + else: + print "Unexpected type for first argument: ", type(o1) + + def treeDependsOn(ndWithTree, otherNode): + return cut_reduce_comparator(ndWithTree, otherNode) > 0 or any( treeDependsOn(dnd, otherNode) for dnd in ndWithTree.depNodes ) + def depOnTree(myNode, otherWithTree): + return cut_reduce_comparator(myNode, otherWithTree) > 0 or any( depOnTree(myNode, dnd) for dnd in otherWithTree.depNodes ) + + from .hist_opt_2v2 import RootNode, SelectionNode, PrecalcNode, SkimmerFillBranchesNode, printTree + + skimmerNode = SkimmerFillBranchesNode(branchesWithUnameList, fill_tree_lines=fill_tree_lines) + + unsortedNodes = [ SelectionNode((c,)) for c in selCuts ] + [ PrecalcNode(red) for red in reducesWithAllDeps.iterkeys() ] + + rootNode = RootNode() + for nd in unsortedNodes: + whereAmI = rootNode + #print "Inserting ", nd.ownDbgStr() + while True: + if any(depOnTree(nd, ond) for ond in whereAmI.depNodes): + #print "Stepping to ", whereAmI.ownDbgStr() + whereAmI = next(ond for ond in whereAmI.depNodes if depOnTree(nd, ond)) + else: ## you don't depend on anything here + if depOnTree(whereAmI, nd): ## insert above + #print "Inserting above ", whereAmI.ownDbgStr() + if whereAmI.parent: + nd.parent = whereAmI.parent + nd.parent.removeDependentNode(whereAmI) + nd.parent.addDependentNode(nd) + else: + print "Inserting above the root node - you must be kidding" + whereAmI.parent = nd + nd.addDependentNode(whereAmI) + break + elif len(whereAmI.depNodes) == 1: ## special case here: go as deep as possible to stay in the same order + whereAmI = whereAmI.depNodes[0] + elif len(whereAmI.depNodes) > 1: ## sanity check + print "There's something seriously wrong: more than one dependency for the same node" + else: ## at end, insert below + #print "Inserting below ", whereAmI.ownDbgStr() + nd.parent = whereAmI + whereAmI.addDependentNode(nd) + break + #print "Tree after inserting ", nd.ownDbgStr() + #for ln in printTree(rootNode): + # print ln + + ## collapse selection nodes + ndAbove = rootNode + while len(ndAbove.depNodes) > 0: + nd = ndAbove.depNodes[0] + if isinstance(ndAbove, SelectionNode) and isinstance(nd, SelectionNode): + deps = list(nd.depNodes) + ndAbove.removeDependentNode(nd) + ndAbove.cuts = tuple(chain(ndAbove.cuts, nd.cuts)) + for ad in deps: + nd.removeDependentNode(ad) + ad.parent = ndAbove + ndAbove.addDependentNode(ad) + elif len(ndAbove.depNodes) > 1: + print "ERROR more than one dependent node" + else: + ndAbove = nd + ## stopping criterium makes this the "tip" of the tree + ndAbove.addDependentNode(skimmerNode) + skimmerNode.parent = ndAbove + + from .treedecorators import makeExprAnd + indentStr = " " ## 4 spaces + def cppLines(someNode, indent=0): + dindent = indent + closeBlock = False + if isinstance(someNode, SelectionNode): + yield (indent*indentStr)+"if ( {0} ) {{".format(makeExprAnd(someNode.ownCuts).get_TTreeDrawStr()) + dindent = indent+1 + closeBlock = True + elif isinstance(someNode, PrecalcNode): + yield (indent*indentStr)+"const auto {0} = {1};".format(someNode.op.uname, someNode.op._get_TTreeDrawStr()) + elif isinstance(someNode, SkimmerFillBranchesNode): + for nm,var,unm in someNode.branches: + yield (indent*indentStr)+ "{uname} = ( {expr} );".format(uname=unm, expr=adaptArg(var).get_TTreeDrawStr()) + if someNode.fill_tree_lines: + yield "" + for ln in someNode.fill_tree_lines: + yield (indent*indentStr)+ln + if not someNode.isTerminal: + ## first terminal nodes + for dn in someNode.depNodes: + if dn.isTerminal: + for ln in cppLines(dn, indent=dindent): + yield ln + ## then dependency tree of the others + for dn in someNode.depNodes: + if not dn.isTerminal: + for ln in cppLines(dn, indent=dindent): + yield ln + if closeBlock: + yield (indent*indentStr)+"}" + + logger.info("Generating output") + retCode = "\n".join(ln for ln in cppLines(rootNode, indent=2)) + + clearReducesFromCuts(selCuts, varsList) + + return retCode + +################################################################################ +## MAIN METHOD ## +################################################################################ + +import os, os.path + +def createSkimmer(selection, branchVars, skeleton, outdir=None, treeName="t", **kwargs): ## includes, sources as kwargs + """ + Generate C++ code from selection and variable definitions + """ + if not outdir: + outdir = os.getcwd() + logger.warning("No output directory set, writing skimmer code to current directory {0}".format(outdir)) + else: + logger.info("Writing plotter code to output directory {0}".format(outdir)) + + ## common forced helpers + add_packages = list(kwargs.get("add_packages", [])) + include_dirs = list(kwargs.get("include_dirs", [])) + includes = list(kwargs.get("includes", [])) + sources = list(kwargs.get("sources", [])) + linked_libraries = list(kwargs.get("libraries", [])) + add_properties = [] + + # add some defaults + from .factoryhelpers import COMMON_INCLUDE_DIR + include_dirs.append(os.path.join(COMMON_INCLUDE_DIR)) + includes.append("kinematics.h") + # add jsoncpp (from external) + from . import pathCommonTools + external_dir = os.path.join(os.path.abspath(pathCommonTools), "external") + include_dirs.append(os.path.join(external_dir, "include")) + sources.append(os.path.join(external_dir, "src", "jsoncpp.cpp")) + + if kwargs.get("addScaleFactorsLib", False): + pathCP3llbb = os.path.dirname(os.path.abspath(pathCommonTools)) + linked_libraries.append(os.path.join(pathCP3llbb, "Framework", "libBinnedValues.so")) + include_dirs.append(os.path.join(pathCP3llbb, "Framework", "interface")) + add_properties.append('set_target_properties(skimmer PROPERTIES COMPILE_FLAGS "-DSTANDALONE_SCALEFACTORS")') + + logger.info("List of requested branches: {0}".format(", ".join(nm for nm in branchVars.iterkeys()))) + logger.info("List of requested include files: {0}".format(", ".join(inc for inc in includes))) + logger.info("List of requested source files: {0}".format(", ".join(src for src in sources))) + + expandTemplateAndWrite(os.path.join(outdir, "Skimmer.h"), getTemplate("Skimmer.h") + , BRANCHES="\n ".join( + 'const {typ}& {name} = tree["{name}"].read<{typ}>();'.format(typ=getBranchType(brName, skeleton._skeleton), name=brName) + for brName in collectIdentifiersForSkimmer(selection.cut, branchVars)) + ) + + import uuid + branchesWithUnameList = list((nm, var, "b_{0}".format(str(uuid.uuid4()).replace("-", "_"))) for nm,var in branchVars.iteritems()) ## keep declarations and filling in the same order + expandTemplateAndWrite(os.path.join(outdir, "Skimmer.cc"), getTemplate("Skimmer.cc") + , INCLUDES="\n".join('#include "{0}"'.format(incl) for incl in includes) + ## initialisation + , OUTPUT_TREE_NAME=treeName + , BEFORE_LOOP="\n".join(chain( + + (" {typ}& {uname} = output_tree[\"{name}\"].write<{typ}>();".format( + typ=var._typeName, uname=unm, name=nm ## assumes stubs + ) for nm,var,unm in branchesWithUnameList) + , (" {0}".format(ln) for ln in list(kwargs.get("user_initialisation", []))) + )) + ## event loop + , GLOBAL_CUT_AND_OUTPUT_BRANCHES_FILLING=createSkimmerInnerLoop_opt1(selection.cuts, branchesWithUnameList, fill_tree_lines=("output_tree.fill();", "++selected_entries;")) + ) + + expandTemplateAndWrite(os.path.join(outdir, "CMakeLists.txt"), getTemplate("CMakeLists_skimmer.txt") + , ADD_FIND="\n".join("find_package({0})".format(fnd) for fnd in add_packages) + , ADD_INCLUDE_DIRS=" ".join(include_dirs) + , ADD_SOURCES =" ".join(sources) + , ADD_PROPERTIES="\n".join(add_properties) + , ADD_LINK="\n".join("target_link_libraries(skimmer {0})".format(lib) for lib in linked_libraries) + ) + +def prepareWorkdir(workdir): + # need much less than for plotter (output dir is passed as arg to skimmer, name generated from dataset) + if os.path.exists(workdir): + raise Exception("Work directory {0} exists already!".format(workdir)) + os.mkdir(workdir) + return workdir + +################################################################################ +## HELPER: COMPILE SKIMMER DIRECTORY ## +################################################################################ + +import subprocess + +def compileSkimmer(skimmerdir): + """ + Copy the necessary files and compile the skimmer + """ + try: + from .import pathCommonTools + subprocess.check_output(["cp", "-r", os.path.join(pathCommonTools, "cmake"), skimmerdir], stderr=subprocess.STDOUT) + subprocess.check_output(["cp", os.path.join(pathCommonTools, "templates", "generateHeader.sh"), skimmerdir], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(e.cmd, e.returncode, e.output)) + raise e + + with insideOtherDirectory(skimmerdir): + logger.info("Compiling skimmer in {0}".format(skimmerdir)) + try: + logger.debug("Calling CMake") + subprocess.check_output(["cmake", "."], stderr=subprocess.STDOUT) + logger.debug("Calling make") + subprocess.check_output(["make"], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError, e: + logger.error("Command {0} failed with exit code {1}\n{2}".format(e.cmd, e.returncode, e.output)) + raise e + + return os.path.join(skimmerdir, "skimmer.exe") diff --git a/templates/CMakeLists_plotter.txt.tpl b/templates/CMakeLists_plotter.txt.tpl new file mode 100644 index 0000000..14c7a82 --- /dev/null +++ b/templates/CMakeLists_plotter.txt.tpl @@ -0,0 +1,69 @@ +cmake_minimum_required (VERSION 2.6) +project (Plotter) + +# Configure paths +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/modules") + +# Detect if we are inside a CMSSW env +include(CMSSW) + +# Ensure C++11 is available +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX0X) +if(COMPILER_SUPPORTS_CXX0X) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O2") +else() + message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") +endif() + +# Find ROOT +find_package(ROOT REQUIRED) +include_directories(${ROOT_INCLUDE_DIR}) +find_library(ROOT_TMVA_LIBRARY TMVA ${ROOT_LIBRARY_DIR} NO_DEFAULT_PATH) + +find_library(ROOT_GENVECTOR_LIB GenVector PATHS ${ROOT_LIBRARY_DIR} + NO_DEFAULT_PATH) +find_library(ROOT_TREEPLAYER_LIB TreePlayer PATHS ${ROOT_LIBRARY_DIR} + NO_DEFAULT_PATH) + +{{ADD_FIND}} + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${PROJECT_BINARY_DIR}) +include_directories({{ADD_INCLUDE_DIRS}}) + +set(PLOTTER_SOURCES + Plotter.cc + {{ADD_SOURCES}} + ) + +if(IN_CMSSW) + set_property(DIRECTORY ${PROJECT_BINARY_DIR} PROPERTY COMPILE_DEFINITIONS CP3LLBB_PLOTTER) + include(CP3Dictionaries) + list(APPEND PLOTTER_SOURCES ${DICTIONARIES_SOURCES}) + add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/classes.h COMMAND + ${PROJECT_SOURCE_DIR}/generateHeader.sh + ${PROJECT_BINARY_DIR}/classes.h + COMMENT "Generating classes.h...") + STRING(REPLACE ":" ";" CMSSW_INCLUDES $ENV{CMSSW_SEARCH_PATH}) + STRING(REPLACE ":" ";" FWLITE_INCLUDES $ENV{CMSSW_FWLITE_INCLUDE_PATH}) + include_directories(${CMSSW_INCLUDES} ${FWLITE_INCLUDES}) + +endif() + +list(APPEND PLOTTER_SOURCES classes.h) + +add_executable(plotter ${PLOTTER_SOURCES}) +set_target_properties(plotter PROPERTIES OUTPUT_NAME "plotter.exe") +{{ADD_PROPERTIES}} + +# Link libraries +target_link_libraries(plotter ${ROOT_LIBRARIES}) +target_link_libraries(plotter ${ROOT_GENVECTOR_LIB}) +target_link_libraries(plotter ${ROOT_TMVA_LIBRARY}) +target_link_libraries(plotter ${ROOT_TREEPLAYER_LIB}) +{{ADD_LINK}} + +find_library(TREEWRAPPER_LIB cp3_llbbTreeWrapper PATHS + "$ENV{CMSSW_BASE}/lib/$ENV{SCRAM_ARCH}" NO_DEFAULT_PATH) +target_link_libraries(plotter ${TREEWRAPPER_LIB}) diff --git a/templates/CMakeLists_skimmer.txt.tpl b/templates/CMakeLists_skimmer.txt.tpl new file mode 100644 index 0000000..d319483 --- /dev/null +++ b/templates/CMakeLists_skimmer.txt.tpl @@ -0,0 +1,72 @@ +cmake_minimum_required (VERSION 2.6) +project (Skimmer) + +# Configure paths +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/modules") + +# Detect if we are inside a CMSSW env +include(CMSSW) + +# Ensure C++11 is available +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX0X) +if(COMPILER_SUPPORTS_CXX0X) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O2") +else() + message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") +endif() + +# Find ROOT +find_package(ROOT REQUIRED) +include_directories(${ROOT_INCLUDE_DIR}) +find_library(ROOT_TMVA_LIBRARY TMVA ${ROOT_LIBRARY_DIR} NO_DEFAULT_PATH) + +find_library(ROOT_GENVECTOR_LIB GenVector PATHS ${ROOT_LIBRARY_DIR} + NO_DEFAULT_PATH) +find_library(ROOT_TREEPLAYER_LIB TreePlayer PATHS ${ROOT_LIBRARY_DIR} + NO_DEFAULT_PATH) + +{{ADD_FIND}} + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${PROJECT_BINARY_DIR}) +include_directories({{ADD_INCLUDE_DIRS}}) + +## TODO add EXTERNAL_DIR/include to ADD_INCLUDES and EXTERNAL_DIR/src/jsoncpp.cpp to sources + + +set(SKIMMER_SOURCES + Skimmer.cc + {{ADD_SOURCES}} + ) + +if(IN_CMSSW) + set_property(DIRECTORY ${PROJECT_BINARY_DIR} PROPERTY COMPILE_DEFINITIONS CP3LLBB_PLOTTER) + include(CP3Dictionaries) + list(APPEND SKIMMER_SOURCES ${DICTIONARIES_SOURCES}) + add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/classes.h COMMAND + ${PROJECT_SOURCE_DIR}/generateHeader.sh + ${PROJECT_BINARY_DIR}/classes.h + COMMENT "Generating classes.h...") + STRING(REPLACE ":" ";" CMSSW_INCLUDES $ENV{CMSSW_SEARCH_PATH}) + STRING(REPLACE ":" ";" FWLITE_INCLUDES $ENV{CMSSW_FWLITE_INCLUDE_PATH}) + include_directories(${CMSSW_INCLUDES} ${FWLITE_INCLUDES}) + +endif() + +list(APPEND SKIMMER_SOURCES classes.h) + +add_executable(skimmer ${SKIMMER_SOURCES}) +set_target_properties(skimmer PROPERTIES OUTPUT_NAME "skimmer.exe") +{{ADD_PROPERTIES}} + +# Link libraries +target_link_libraries(skimmer ${ROOT_LIBRARIES}) +target_link_libraries(skimmer ${ROOT_GENVECTOR_LIB}) +target_link_libraries(skimmer ${ROOT_TMVA_LIBRARY}) +target_link_libraries(skimmer ${ROOT_TREEPLAYER_LIB}) +{{ADD_LINK}} + +find_library(TREEWRAPPER_LIB cp3_llbbTreeWrapper PATHS + "$ENV{CMSSW_BASE}/lib/$ENV{SCRAM_ARCH}" NO_DEFAULT_PATH) +target_link_libraries(skimmer ${TREEWRAPPER_LIB}) diff --git a/templates/Plot.tpl b/templates/Plot.tpl new file mode 100644 index 0000000..b432856 --- /dev/null +++ b/templates/Plot.tpl @@ -0,0 +1,6 @@ + __cut = ({{CUT}}); + if (__cut) { + __weight = ({{WEIGHT}}); + fill({{HIST}}.get(), {{VAR}}, __weight); + } + diff --git a/templates/Plotter.cc.tpl b/templates/Plotter.cc.tpl new file mode 100644 index 0000000..cb0d219 --- /dev/null +++ b/templates/Plotter.cc.tpl @@ -0,0 +1,104 @@ +// vim: syntax=cpp + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +{{INCLUDES}} + +volatile bool MUST_STOP = false; + +void Plotter::plot(const std::string& output_file) { + +{{BEFORE_LOOP}} + + size_t index = 1; + while (tree.next()) { + + if (MUST_STOP) { + break; + } + + if (m_sample_cut){ + if( !m_sample_cut->EvalInstance() ) + continue; + } + + const std::string filename = raw_tree->GetFile()->GetName(); + const bool runOnElEl = filename.find("DoubleEG") != std::string::npos; + const bool runOnMuMu = filename.find("DoubleMuon") != std::string::npos; + const bool runOnMuEl = filename.find("MuonEG") != std::string::npos; + const bool runOnElMu = runOnMuEl; + const bool runOnMC = !(runOnElEl || runOnMuMu || runOnMuEl); + + if ((index - 1) % 1000 == 0) + std::cout << "Processing entry " << index << " of " << tree.getEntries() << std::endl; + + bool __cut = false; + double __weight = 0; + +{{IN_LOOP}} + + index++; + } + + std::unique_ptr outfile(TFile::Open(output_file.c_str(), "recreate")); + +{{AFTER_LOOP}} +} + +int main(int argc, char** argv) { + + gErrorIgnoreLevel = kWarning + 1; + + // very basic command-line argument parsing + // 1: name of the output file + // 2: name of the input tree + // 3: selection cut for the TChain + // 4-...: names of input files + if ( argc < 5 ) { // FIXME check if the first one is special indeed + std::cout << "Not enough arguments to do anything meaningful -> exiting" << std::endl; + std::cout << "Command line arguments should be : outFileName, inTreeName, sampleCut, inFile1 [inFile2...]" << std::endl; + } else { + std::string output_file{argv[1]};// FIXME + TChain t{argv[2]}; // FIXME + std::string cut{argv[3]}; + std::size_t n_files{0}; + for ( std::size_t idx{4}; argc != idx; ++idx ) { // FIXME + n_files += t.Add(argv[idx], 0); + } + if ( n_files != (argc-4) ) { // FIXME + std::cerr << "Error: some files failed to open" << std::endl; + return 2; + } + + // Register CTRL+C signal handler + struct sigaction sigIntHandler; + sigIntHandler.sa_handler = [](int signal) { MUST_STOP = true; }; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + sigaction(SIGINT, &sigIntHandler, NULL); + + // Set cache size to 10 MB + t.SetCacheSize(10 * 1024 * 1024); + // Learn tree structure from the first 10 entries + t.SetCacheLearnEntries(10); + + TH1::SetDefaultSumw2(kTRUE); + + Plotter p(cut, &t); + p.plot(output_file); + std::cout << "Done. Output saved as '" << output_file << "'" << std::endl; + } + return 0; +} diff --git a/templates/Plotter.h.tpl b/templates/Plotter.h.tpl new file mode 100644 index 0000000..22c5d29 --- /dev/null +++ b/templates/Plotter.h.tpl @@ -0,0 +1,110 @@ +// vim: syntax=cpp + +#pragma once + +#include +#include +#include +#include +#include + +// ROOT +#include +#include +#include +#include +#include +#include +#include + +// Generated automatically +#include + +#include + +// No other choices, as ROOT strips the 'std' namespace from types... +using namespace std; + +template +size_t Length$(const T& t) { + return t.size(); +} + +// Traits to check if a given typename is iterable + +template struct is_stl_container_like +{ + typedef typename std::remove_const::type>::type test_type; + + template static constexpr bool test( + A * pt, + A const * cpt = nullptr, + decltype(pt->begin()) * = nullptr, + decltype(pt->end()) * = nullptr, + decltype(cpt->begin()) * = nullptr, + decltype(cpt->end()) * = nullptr, + typename A::iterator * pi = nullptr, + typename A::const_iterator * pci = nullptr, + typename A::value_type * pv = nullptr) { + + typedef typename A::iterator iterator; + typedef typename A::const_iterator const_iterator; + typedef typename A::value_type value_type; + return std::is_samebegin()),iterator>::value && + std::is_sameend()),iterator>::value && + std::is_samebegin()),const_iterator>::value && + std::is_sameend()),const_iterator>::value && + std::is_same::value && + std::is_same::value; + + } + + template static constexpr bool test(...) { + return false; + } + + static const bool value = test(nullptr); +}; + +class Plotter { + public: + Plotter(const std::string& cut, TChain* ttree): + tree(ttree), m_sample_cut(nullptr), raw_tree(ttree) + { + if(cut != "" && cut != "1"){ + m_sample_cut = std::unique_ptr(new TTreeFormula("sample_cut", cut.c_str(), ttree)); + ttree->SetNotify(m_sample_cut.get()); + } + }; + + void plot(const std::string&); + + private: + + // Helper functions to fill an histogram + template::value, bool>::type = 0> void fill(TH1* h, const T& value, double weight) { + h->Fill(value, weight); + } + + template::value, bool>::type = 0> void fill(TH1* h, const T& value, double weight) { + for (const auto& v: value) + h->Fill(v, weight); + } + + // 2D + template void fill(TH2* h, const T& value_x, const U& value_y, double weight) { + h->Fill(value_x, value_y, weight); + } + + // 3D + template void fill(TH3* h, const T& value_x, const U& value_y, const V& value_z, double weight) { + h->Fill(value_x, value_y, value_z, weight); + } + + ROOT::TreeWrapper tree; + std::unique_ptr m_sample_cut; + TChain* raw_tree; + + // List of branches + {{BRANCHES}} +}; diff --git a/templates/SavePlot.tpl b/templates/SavePlot.tpl new file mode 100644 index 0000000..ae90e09 --- /dev/null +++ b/templates/SavePlot.tpl @@ -0,0 +1,3 @@ + {{UNIQUE_NAME}}->SetName("{{PLOT_NAME}}"); + {{UNIQUE_NAME}}->Write("{{PLOT_NAME}}", TObject::kOverwrite); + diff --git a/templates/Skimmer.cc.tpl b/templates/Skimmer.cc.tpl new file mode 100644 index 0000000..d3b8359 --- /dev/null +++ b/templates/Skimmer.cc.tpl @@ -0,0 +1,212 @@ +// vim: syntax=cpp + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +{{INCLUDES}} + +volatile bool MUST_STOP = false; + +void Skimmer::skim(const std::string& output_file) { + + std::unique_ptr outfile(TFile::Open(output_file.c_str(), "recreate")); + + TTree* output_tree_ = new TTree("{{OUTPUT_TREE_NAME}}", "Skimmed tree"); + ROOT::TreeWrapper output_tree(output_tree_); + +{{BEFORE_LOOP}} + + uint64_t selected_entries = 0; + uint64_t index = 1; + while (tree.next()) { + + if (MUST_STOP) { + break; + } + + if ((index - 1) % 1000 == 0) + std::cout << "Processing entry " << index << " of " << tree.getEntries() << std::endl; + index++; + + std::string filename = raw_tree->GetFile()->GetName(); + bool runOnElEl = filename.find("DoubleEG") != std::string::npos; + bool runOnMuMu = filename.find("DoubleMuon") != std::string::npos; + bool runOnMuEl = filename.find("MuonEG") != std::string::npos; + bool runOnElMu = runOnMuEl; + bool runOnMC = !(runOnElEl || runOnMuMu || runOnMuEl); + +{{GLOBAL_CUT_AND_OUTPUT_BRANCHES_FILLING}} + } + + outfile->cd(); + output_tree_->Write(); + outfile->Close(); + + if (index == 1) { + std::cout << "No entry selected" << std::endl; + } else { + std::cout << "Number of selected entries: " << selected_entries << " (" << selected_entries / (float) (index - 1) * 100 << " %)" << std::endl; + } +} + +bool parse_datasets(const std::string& json_file, std::vector& datasets) { + + Json::Value root; + { + std::ifstream f(json_file); + Json::Reader reader; + if (! reader.parse(f, root, false)) { + std::cerr << "Failed to parse '" << json_file << "'" << std::endl; + return false; + } + } + + datasets.clear(); + + // Looping over the different samples + Json::Value::Members samples_str = root.getMemberNames(); + for (size_t index = 0; index < root.size(); index++) { + + const Json::Value& sample = root[samples_str[index]]; + Dataset dataset; + + // Mandatory fields + dataset.name = samples_str[index]; + dataset.db_name = sample["db_name"].asString(); + dataset.cut = sample.get("sample_cut", "1").asString(); + dataset.tree_name = sample.get("tree_name", "t").asString(); + + //dataset.output_name + if (sample.isMember("output_name")) { + dataset.output_name = sample["output_name"].asString(); + } else { + dataset.output_name = dataset.db_name + "_skim"; + } + + // If a list of files is specified, only use those + if (sample.isMember("files")) { + Json::Value files = sample["files"]; + + for(auto it = files.begin(); it != files.end(); ++it) { + dataset.files.push_back((*it).asString()); + } + } else if (sample.isMember("path")) { + dataset.path = sample["path"].asString(); + dataset.files.push_back(dataset.path + "/*.root"); + } + + datasets.push_back(dataset); + } + + return true; +} + +int main(int argc, char** argv) { + + + try { + + TCLAP::CmdLine cmd("Create trees from trees", ' ', "0.2.0"); + + TCLAP::ValueArg datasetArg("d", "dataset", "JSON file describing the dataset to run on", true, "", "JSON file", cmd); + TCLAP::ValueArg outputArg("o", "output", "Output directory", false, "", "FOLDER", cmd); + + cmd.parse(argc, argv); + + std::vector datasets; + parse_datasets(datasetArg.getValue(), datasets); + + std::string output_dir = outputArg.getValue(); + if (output_dir.empty()) + output_dir = "."; + + // Register CTRL+C signal handler + struct sigaction sigIntHandler; + + sigIntHandler.sa_handler = [](int signal) { MUST_STOP = true; }; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + + sigaction(SIGINT, &sigIntHandler, NULL); + + // Random numbers + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> sleep_time(1 * 1000, 30 * 1000); + + auto addFileToChain = [&sleep_time, &gen](TChain* chain, const std::string& filename) -> bool { + if (! chain) + return false; + + // Try 5 times to open the file + size_t n = 5; + while (n--) { + size_t old_gErrorIgnoreLevel = gErrorIgnoreLevel; + gErrorIgnoreLevel = kError + 1; + bool success = chain->Add(filename.c_str(), 0) != 0; + gErrorIgnoreLevel = old_gErrorIgnoreLevel; + if (success) + return true; + + auto sleep_time_value = sleep_time(gen); + std::cout << "Error while opening '" << filename << "'. Sleeping for " << sleep_time_value << " ms and trying " << n << " more time." << std::endl; + + // Sleep a bit before trying to open the file again + // The sleep time is random to avoid all the jobs on the cluster + // to re-try to open the files at the same time + std::this_thread::sleep_for(std::chrono::milliseconds(sleep_time_value)); + } + + std::cerr << "Error: cannot open '" << filename << "'" << std::endl; + return false; + }; + + for (const Dataset& d: datasets) { + if (MUST_STOP) + break; + + std::cout << "Creating skim for dataset '" << d.name << "'" << std::endl; + std::unique_ptr t(new TChain(d.tree_name.c_str())); + + for (const std::string& file: d.files) { + bool success = addFileToChain(t.get(), file); + if (!success) + return 2; + } + + std::string output_file = output_dir + "/" + d.output_name + ".root"; + + ROOT::TreeWrapper wrapped_tree(t.get()); + + // Set cache size to 10 MB + t->SetCacheSize(10 * 1024 * 1024); + // Learn tree structure from the first 10 entries + t->SetCacheLearnEntries(10); + + Skimmer s(d, wrapped_tree, t.get()); + s.skim(output_file); + std::cout << "Done. Output saved as '" << output_file << "'" << std::endl; + } + + } catch (TCLAP::ArgException &e) { + std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl; + return 1; + } + + return 0; +} diff --git a/templates/Skimmer.h.tpl b/templates/Skimmer.h.tpl new file mode 100644 index 0000000..c2280f7 --- /dev/null +++ b/templates/Skimmer.h.tpl @@ -0,0 +1,56 @@ +// vim: syntax=cpp + +#pragma once + +#include +#include +#include +#include + +// ROOT +#include +#include +#include +#include +#include +#include + +// Generated automatically +#include + +#include + +// No other choices, as ROOT strips the 'std' namespace from types... +using namespace std; + +template +size_t Length$(const T& t) { + return t.size(); +} + +struct Dataset { + std::string name; + std::string db_name; + std::string output_name; + std::string tree_name; + std::string path; + std::vector files; + std::string cut; +}; + +class Skimmer { + public: + Skimmer(const Dataset& dataset, ROOT::TreeWrapper& tree, TChain* raw_tree): + m_dataset(dataset), tree(tree), raw_tree(raw_tree) {}; + virtual ~Skimmer() {}; + + void skim(const std::string&); + + private: + Dataset m_dataset; + ROOT::TreeWrapper& tree; + TChain* raw_tree; + + // List of input branches + {{BRANCHES}} +}; diff --git a/templates/generateHeader.sh b/templates/generateHeader.sh new file mode 100755 index 0000000..35b33d8 --- /dev/null +++ b/templates/generateHeader.sh @@ -0,0 +1,13 @@ +#! /bin/sh + +OUTPUT=$1 + +echo "" > $OUTPUT + +find ${CMSSW_BASE}/src/cp3_llbb -maxdepth 1 -type d | +while read dir +do + if [ -e ${dir}/src/classes.h ]; then + echo "#include <${dir}/src/classes.h>" >> $OUTPUT + fi +done From 38428524e9be0521cfd0ae1996cc0a6d429c51f6 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Sun, 4 Mar 2018 13:39:05 +0100 Subject: [PATCH 05/14] Example H->ZA script, part 1 (interactive) --- scripts/example_plotsHtoZA.py | 139 ++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100755 scripts/example_plotsHtoZA.py diff --git a/scripts/example_plotsHtoZA.py b/scripts/example_plotsHtoZA.py new file mode 100755 index 0000000..a0205a6 --- /dev/null +++ b/scripts/example_plotsHtoZA.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python2 +""" +Example: H->ZA tree decoration, interactive exploration based on that, and a plotter +""" +import ROOT +ROOT.PyConfig.IgnoreCommandLineOptions = True # let python do option parsing + +""" +za decoration + +This part would go into a zadeco.py module and used as 'from cp3_llbb.ZATools.zadeco import decoratedZATree +""" +from cp3_llbb.CommonTools.lldeco import LeptonRef +from cp3_llbb.CommonTools.treedecorators import levelsAbove, addIntoHierarchy, TreeStub, BranchGroupStub, SmartTupleStub, SmartObjectStub, LeafFacade + +import ROOT +## Load the library +ROOT.gSystem.Load("libcp3_llbbZAAnalysis.so") + +hZAObjectStubMap = SmartObjectStub.Map({ + "HtoZA::Lepton" : [ + LeptonRef("L") + , SmartTupleStub._RefToOther("hlt", lambda l : l.record.hlt.object[l.hlt_idx]) + ] + , "HtoZA::Dilepton" : [ + SmartTupleStub._RefToOther("l1", lambda ll : ll.hZA.leptons[ll.ilep1]) + , SmartTupleStub._RefToOther("l2", lambda ll : ll.hZA.leptons[ll.ilep2]) + , SmartTupleStub._RefToOther("hlt1", lambda ll : ll.record.hlt.object[ll.hlt_idxs.first]) + , SmartTupleStub._RefToOther("hlt2", lambda ll : ll.record.hlt.object[ll.hlt_idxs.second]) + ] + , "HtoZA::Met" : [] + , "HtoZA::Jet" : [ + ## Framework jet ref + SmartTupleStub._RefToOther("J", lambda j : ( + getattr(j.record, "jets_{}".format(j.systVar._prefix.strip("_")))[j.idx] + if hasattr(j, "systVar") else j.record.jets[j.idx] + ) ) + ] + , "HtoZA::Dijet" : [ + ## HtoZA::Jet refs + SmartTupleStub._RefToOther("j1", lambda jj : (jj.systVar.jets[jj.ijet1] if hasattr(jj, "systVar") else jj.hZA.jets[jj.ijet1])) + , SmartTupleStub._RefToOther("j2", lambda jj : (jj.systVar.jets[jj.ijet2] if hasattr(jj, "systVar") else jj.hZA.jets[jj.ijet2])) + ## Framework jet refs + , SmartTupleStub._RefToOther("J1", lambda jj : jj.j1.J) + , SmartTupleStub._RefToOther("J2", lambda jj : jj.j2.J) + ] + , "HtoZA::MELAAngles" : [] + }) +hZAObjectStubMap["HtoZA::DileptonDijet"] = hZAObjectStubMap.get("HtoZA::Dilepton")+hZAObjectStubMap.get("HtoZA::Dijet") + +def decoratedZATree(someTree): + tree = TreeStub(someTree) + recHierarchy = levelsAbove([], "record") + # + evt = BranchGroupStub("event_") + addIntoHierarchy("event", evt, tree, recHierarchy) + evtHierarchy = levelsAbove(recHierarchy, "event") + # > event + addIntoHierarchy("pdf", BranchGroupStub("pdf_"), evt, evtHierarchy) + # < event + hlt = BranchGroupStub("hlt_") + addIntoHierarchy("hlt", hlt, tree, recHierarchy) + # > hlt + hltHierarchy = levelsAbove(recHierarchy, "hlt") + addIntoHierarchy("object", BranchGroupStub("object_"), hlt, hltHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(lambda objs : objs.record.hlt.object_p4.size()) ] ) + # < hlt + ## objects + addIntoHierarchy("muons", BranchGroupStub("muon_"), tree, recHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(lambda muons : muons.record.muon_charge.size()) ] ) + addIntoHierarchy("electrons", BranchGroupStub("electron_"), tree, recHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(lambda electrons : electrons.record.electron_charge.size()) ] ) + addIntoHierarchy("jets", BranchGroupStub("jet_"), tree, recHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(lambda jets : jets.record.jet_gen_charge.size()) ] ) + for systVar in ("jecdown", "jecup", "jerdown", "jerup"): + cNm = "jets_{}".format(systVar) + addIntoHierarchy("jets_{}".format(systVar), BranchGroupStub("jet_{}_".format(systVar)), tree, recHierarchy, + capabilities=[ SmartTupleStub._SmartIterable(( lambda sv : ( lambda jets : getattr(jets.record, "jet_{}_gen_p4".format(sv)).size() ) )(systVar)) ]) + met = BranchGroupStub("met_") + addIntoHierarchy("met", met, tree, recHierarchy) + cand = BranchGroupStub("hZA_") + addIntoHierarchy("hZA", cand, tree, recHierarchy) + hZAHierarchy = levelsAbove(recHierarchy, "hZA") + # > hZA + gen = BranchGroupStub("gen_") + addIntoHierarchy("gen", gen, cand, hZAHierarchy) + # + for systVar in (None, "jecdown", "jecup", "jerdown", "jerup"): + if systVar: + cand_sv = BranchGroupStub("{}_".format(systVar)) + addIntoHierarchy(systVar, cand_sv, cand, hZAHierarchy) + svHierarchy = levelsAbove(hZAHierarchy, "systVar") ## used by jet refs + svJetColl = getattr(tree, "jets_{}".format(systVar)) + else: + cand_sv = cand + svHierarchy = hZAHierarchy + svJetColl = tree.jets + + addIntoHierarchy("leptons", LeafFacade("leptons", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.leptons, hZAObjectStubMap)]) + addIntoHierarchy("ll", LeafFacade("ll", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.ll, hZAObjectStubMap)]) + + addIntoHierarchy("jets", LeafFacade("jets", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.jets, hZAObjectStubMap)]) + addIntoHierarchy("jj", LeafFacade("jj", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.jj, hZAObjectStubMap)]) + + addIntoHierarchy("lljj_cmva", LeafFacade("lljj_cmva", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.lljj_cmva, hZAObjectStubMap)]) + addIntoHierarchy("lljj_deepCSV", LeafFacade("lljj_deepCSV", cand_sv), cand_sv, svHierarchy, + capabilities=[SmartTupleStub._WrappedIterable(cand_sv.lljj_deepCSV, hZAObjectStubMap)]) + + return tree + +""" +Interactive +""" +def doInteractive(): + from cp3_llbb.CommonTools.treedecorators import adaptArg, op + + tChain = ROOT.TChain("t") + tChain.Add("/storage/data/cms/store/user/asaggio/DYToLL_2J_13TeV-amcatnloFXFX-pythia8/DYToLL_2J_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8_Summer16MiniAODv2/180219_144051/0000/output_mc_271.root") + tChain.GetEntries() ## force load + tup = decoratedZATree(tChain) + + def toDraw(sth): + return adaptArg(sth).get_TTreeDrawStr() + def leafDeps(expr): + return list(expr._parent.leafDeps) + + import IPython; IPython.embed() + +""" +Plotter demo: a method to generate plots +""" + +if __name__ == "__main__": + doInteractive() ## FIXME only with -i From 152780100d73de320dcf62fa29c2fd27f0ab866f Mon Sep 17 00:00:00 2001 From: Pieter David Date: Mon, 5 Mar 2018 15:28:17 +0100 Subject: [PATCH 06/14] small fixes after moving code around --- python/lldeco.py | 2 +- python/treedecorators.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/lldeco.py b/python/lldeco.py index 9c1e7ac..ec228f9 100644 --- a/python/lldeco.py +++ b/python/lldeco.py @@ -67,7 +67,7 @@ def prodIfIterable(sfArg, **kwargs): if hasattr(sfArg, "__iter__") else ( lambda x : sfArg(x, **kwargs) )) -from .treedecorators import op, boolType +from .treedecorators import op, boolType, makeConst class LeptonScaleFactor(object): def __init__(self, elSF, muSF, cache=True): self.elSF = elSF diff --git a/python/treedecorators.py b/python/treedecorators.py index 5e257f7..3f8b54b 100644 --- a/python/treedecorators.py +++ b/python/treedecorators.py @@ -428,7 +428,7 @@ def _replLeafs(self): return self[-1]._availLeafs() @property def _leafDeps(self): - return self._base.leafDeps + return self._base._parent.leafDeps class _BoolTestableFromLeaf(_SmartLeafDecoration): __slots__ = ("_bool",) From 5d2dd6ed600aa0ded35516f3689a35c1e7cfdac2 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Mon, 5 Mar 2018 15:28:43 +0100 Subject: [PATCH 07/14] add IndexRangeIterator (C++ helper) --- common/include/IndexRangeIterator.h | 28 ++++++++++++++++++++++++++++ python/histfactory.py | 1 + python/treefactory.py | 1 + 3 files changed, 30 insertions(+) create mode 100644 common/include/IndexRangeIterator.h diff --git a/common/include/IndexRangeIterator.h b/common/include/IndexRangeIterator.h new file mode 100644 index 0000000..8bacaba --- /dev/null +++ b/common/include/IndexRangeIterator.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +/* + * Provide an iterator interface over an index that runs from 0 to N + */ +template +class IndexRangeIterator + : public boost::iterator_facade, + IDX, // value + boost::random_access_traversal_tag, + IDX // ref + > +{ +public: + IndexRangeIterator(IDX idx, IDX max) + : m_idx{idx}, m_max{max} {} + + IDX dereference() const { return m_idx; } + bool equal( const IndexRangeIterator& other ) const { return ( m_max == other.m_max ) && ( m_idx == other.m_idx ); } + void increment() { ++m_idx; } + void decrement() { --m_idx; } + void advance(std::ptrdiff_t d) { m_idx += d; } + std::ptrdiff_t distance_to(const IndexRangeIterator& other) const { return m_idx-other.m_idx; } +private: + IDX m_idx, m_max; +}; diff --git a/python/histfactory.py b/python/histfactory.py index 7183c92..43ea7f3 100644 --- a/python/histfactory.py +++ b/python/histfactory.py @@ -580,6 +580,7 @@ def createPlotter(plots, skeleton, outdir=None, **kwargs): from .factoryhelpers import COMMON_INCLUDE_DIR include_dirs.append(os.path.join(COMMON_INCLUDE_DIR)) includes.append("kinematics.h") + includes.append("IndexRangeIterator.h") if kwargs.get("addScaleFactorsLib", False): from . import pathCommonTools diff --git a/python/treefactory.py b/python/treefactory.py index cca9794..d04e423 100644 --- a/python/treefactory.py +++ b/python/treefactory.py @@ -298,6 +298,7 @@ def createSkimmer(selection, branchVars, skeleton, outdir=None, treeName="t", ** from .factoryhelpers import COMMON_INCLUDE_DIR include_dirs.append(os.path.join(COMMON_INCLUDE_DIR)) includes.append("kinematics.h") + includes.append("IndexRangeIterator.h") # add jsoncpp (from external) from . import pathCommonTools external_dir = os.path.join(os.path.abspath(pathCommonTools), "external") From 4a7976f9c4a967a560c9a392a09267c02ccb4312 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Mon, 5 Mar 2018 16:00:04 +0100 Subject: [PATCH 08/14] Include scalefactors and plotit helpers and complete the example using those --- python/plotithelpers.py | 26 ++++ python/scalefactors.py | 278 ++++++++++++++++++++++++++++++++++ scripts/example_plotsHtoZA.py | 195 +++++++++++++++++++++++- 3 files changed, 496 insertions(+), 3 deletions(-) create mode 100644 python/plotithelpers.py create mode 100644 python/scalefactors.py diff --git a/python/plotithelpers.py b/python/plotithelpers.py new file mode 100644 index 0000000..c186176 --- /dev/null +++ b/python/plotithelpers.py @@ -0,0 +1,26 @@ +""" +Automatically generate plotIt yml file from list of plots etc. +""" + +def writePlotIt(plotList, outfile): + for plot in plotList: + if len(plot.variables) == 1: + plotopts = { + "x-axis" : '"{0}"'.format(plot.axisTitles[0]) + , "y-axis" : "Evt" + , "y-axis-format" : '"%1% / %2$.0f"' + , "normalized" : "false" + , "x-axis-format" : "[{0:e}, {1:e}]".format(plot.binnings[0].minimum, plot.binnings[0].maximum) + , "log-y" : "both" + , "y-axis-show-zero" : "true" + , "save-extensions" : '["pdf"]' + , "show-ratio" : "true" + , "sort-by-yields" : "false" + } + plotopts.update(plot.plotopts) ## user can set everything, but the above defaults are always there + frag = "\n".join([ + "'{name}':".format(name=plot.name) + ]+[ + " {0}: {1}".format(k,v) for k,v in plotopts.iteritems() + ]+['']) + outfile.write(frag) diff --git a/python/scalefactors.py b/python/scalefactors.py new file mode 100644 index 0000000..1df324e --- /dev/null +++ b/python/scalefactors.py @@ -0,0 +1,278 @@ +""" +Helpers for configuring scale factors, fake rates etc. + +The basic configuration parameter is the json file path for a set of factors. +There two basic types are +- lepton scale factors (dependent on a number of object variables, e.g. PT and ETA), +- jet (b-tagging) scale factors (grouped set for different flavours, for convenience) + +Different values (depending on the data-taking period) +can be taken into account by weighting or by randomly sampling. +""" +""" +Common definitions of scale factors +""" +__all__ = ("get_scalefactors",) + +import os.path +from itertools import chain, ifilter + +from . import pathCP3llbb +def localize_llbbFwk(aPath, cp3llbbBase=pathCP3llbb): + return os.path.join(cp3llbbBase, "Framework", "data", "ScaleFactors", aPath) + + +lumiPerPeriod_2016 = { + "B" : 5785.152 ## averaged 5783.740 (DoubleMuon), 5787.976 (DoubleEG) and 5783.740 (MuonEG) - max dev. from average is 5.e-4 << lumi syst + , "C" : 2573.399 + , "D" : 4248.384 + , "E" : 4009.132 + , "F" : 3101.618 + , "G" : 7540.488 + , "H" : 8605.689 + ## + , "Run271036to275783" : 6274.191 + , "Run275784to276500" : 3426.131 + , "Run276501to276811" : 3191.207 + } + +hwwMuonPeriods_2016 = [ "Run271036to275783", "Run275784to276500", "Run276501to276811" ] +hwwElePeriods_2016 = [] ## TODO + +all_scalefactors = { + "electron_2015_76" : dict((k,localize_llbbFwk(v)) for k, v in chain( + dict(("id_{wp}".format(wp=wp.lower()), ("Electron_CutBasedID_{wp}WP_fromTemplates_withSyst_76X.json".format(wp=wp))) + for wp in ("Veto", "Loose", "Medium", "Tight")).iteritems() + , { "hww_wp" : "Electrons_HWW_CutBasedID_TightWP_76X_forHWW_Final.json" }.iteritems() + )) + , "muon_2015_76" : dict((k, localize_llbbFwk(v)) for k, v in chain( + dict(("id_{wp}".format(wp=wp.lower()), "Muon_{wp}ID_genTracks_id.json".format(wp=wp)) for wp in ("Soft", "Loose", "Medium")).iteritems() + , { "id_tight" : "Muon_TightIDandIPCut_genTracks_id.json" }.iteritems() + , dict(("iso_{isowp}_id_{idwp}".format(isowp=isowp.lower(), idwp=idwp.lower()), "Muon_{isowp}RelIso_{idwp}ID_iso.json".format(isowp=isowp, idwp=idwp)) + for (idwp,isowp) in (("Loose", "Loose"), ("Loose", "Medium"), ("Loose", "Tight"), ("Tight", "Medium"), ("Tight", "Tight")) ).iteritems() + , { "id_hww" : "Muon_MediumID_Data_MC_25ns_PTvsETA_HWW_76.json" + , "iso_tight_id_hww" : "Muon_ISOTight_Data_MC_25ns_PTvsETA_HWW.json" }.iteritems() + )) + , "btag_2015_76" : dict() ## TODO + ## 2016 CMSSW_8_0_... + , "muon_2016_80" : dict((k,( localize_llbbFwk(v) if isinstance(v, str) + else [ (eras, localize_llbbFwk(path)) for eras,path in v ])) + for k, v in chain( + ## https://twiki.cern.ch/twiki/bin/view/CMS/MuonReferenceEffsRun2#Tracking_efficiency_provided_by + { "tracking" : "Muon_tracking_BCDEFGH.json" }.iteritems() + ## https://twiki.cern.ch/twiki/bin/viewauth/CMS/MuonWorkInProgressAndPagResults + , dict(("id_{wp}".format(wp=wp.lower()), [ (eras, "Muon_{wp}ID_genTracks_id_{era}.json".format(wp=wp, era=eras)) for eras in ("BCDEF", "GH") ]) for wp in ("Loose", "Medium", "Tight")).iteritems() + , { "id_medium2016" : [ (eras, "Muon_MediumID2016_genTracks_id_{era}.json".format(era=eras)) for eras in ("BCDEF", "GH") ] }.iteritems() + , dict(("iso_{isowp}_id_{idwp}".format(isowp=isowp.lower(), idwp=idwp.lower()) + , [ (eras, "Muon_{isowp}ISO_{idwp}ID_iso_{era}.json".format(isowp=isowp, idwp=idwp, era=eras)) for eras in ("BCDEF", "GH") ]) + for (isowp, idwp) in [("Loose", "Loose"), ("Loose", "Medium"), ("Loose", "Tight"), ("Tight", "Medium"), ("Tight", "Tight")]).iteritems() + ## https://twiki.cern.ch/twiki/bin/view/CMS/HWW2016TriggerAndIdIsoScaleFactorsResults + , { "iso_tight_hww" : [ ((era,), "Muon_data_mc_ISOTight_Run2016_{era}_PTvsETA_HWW.json".format(era=era)) for era in hwwMuonPeriods_2016 ] }.iteritems() + , { "id_tight_hww" : [ ((era,), "Muon_data_mc_TightID_Run2016_{era}_PTvsETA_HWW.json".format(era=era)) for era in hwwMuonPeriods_2016 ] }.iteritems() + )) + ## https://twiki.cern.ch/twiki/bin/view/CMS/EgammaIDRecipesRun2#Efficiencies_and_scale_factors + , "electron_2016_ichep2016" : dict((k,( localize_llbbFwk(v) if isinstance(v, str) + else [ (eras, localize_llbbFwk(path)) for eras,path in v ])) + for k, v in chain( + { "gsf_tracking" : "Electron_EGamma_SF2D_gsfTracking.json" }.iteritems() + , dict(("id_{wp}".format(wp=wp), "Electron_EGamma_SF2D_{wp}.json".format(wp=wp)) for wp in ("veto", "loose", "medium", "tight")).iteritems() + , { "hww_wp" : [ ((era,), "Electron_Tight_Run2016_{era}_HWW.json".format(era=era)) for era in hwwElePeriods_2016 ] }.iteritems() + )) + , "electron_2016_moriond2017" : dict((k,( localize_llbbFwk(v) if isinstance(v, str) + else [ (eras, localize_llbbFwk(path)) for eras,path in v ])) + for k, v in chain( + { "gsf_tracking" : "Electron_EGamma_SF2D_reco_moriond17.json" }.iteritems() + , dict(("id_{wp}".format(wp=wp), "Electron_EGamma_SF2D_{wp}_moriond17.json".format(wp=wp)) for wp in ("veto", "loose", "medium", "tight")).iteritems() + )) + , "eleltrig_2016_HHMoriond17" : tuple(os.path.join(pathCP3llbb, "ZAAnalysis", "data", "Efficiencies", "{0}.json".format(nm)) for nm in ("Electron_IsoEle23Leg", "Electron_IsoEle12Leg", "Electron_IsoEle23Leg", "Electron_IsoEle12Leg")) + , "elmutrig_2016_HHMoriond17" : tuple(os.path.join(pathCP3llbb, "ZAAnalysis", "data", "Efficiencies", "{0}.json".format(nm)) for nm in ("Electron_IsoEle23Leg", "Electron_IsoEle12Leg", "Muon_XPathIsoMu23leg", "Muon_XPathIsoMu8leg")) + , "mueltrig_2016_HHMoriond17" : tuple(os.path.join(pathCP3llbb, "ZAAnalysis", "data", "Efficiencies", "{0}.json".format(nm)) for nm in ("Muon_XPathIsoMu23leg", "Muon_XPathIsoMu8leg", "Electron_IsoEle23Leg", "Electron_IsoEle12Leg")) + , "mumutrig_2016_HHMoriond17" : tuple(os.path.join(pathCP3llbb, "ZAAnalysis", "data", "Efficiencies", "{0}.json".format(nm)) for nm in ("Muon_DoubleIsoMu17Mu8_IsoMu17leg", "Muon_DoubleIsoMu17TkMu8_IsoMu8legORTkMu8leg", "Muon_DoubleIsoMu17Mu8_IsoMu17leg", "Muon_DoubleIsoMu17TkMu8_IsoMu8legORTkMu8leg")) + ## https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation80XReReco + , "btag_2016_moriond2017" : dict((k,( tuple(localize_llbbFwk(fv) for fv in v) if isinstance(v,tuple) and all(isinstance(fv, str) for fv in v) + else [ (eras, tuple(localize_llbbFwk(fpath) for fpath in paths)) for eras,paths in v ])) + for k, v in + dict(("cMVAv2_{wp}".format(wp=wp), tuple("BTagging_{wp}_{flav}_{calib}_cmvav2_BtoH_moriond17.json".format(wp=wp, flav=flav, calib=calib) for (flav, calib) in (("lightjets", "incl"), ("cjets", "ttbar"), ("bjets", "ttbar")))) for wp in ("loose", "medium", "tight")).iteritems() + ) + } + +from .treedecorators import op +binningVariablesByName = { + "Eta" : lambda obj : obj.p4.Eta() + , "ClusEta" : lambda obj : obj.clusterEta + , "AbsEta" : lambda obj : op.abs(obj.p4.Eta()) + , "AbsClusEta": lambda obj : op.abs(obj.clusterEta) + , "Pt" : lambda obj : obj.p4.Pt() + } ## TODO add more? + +from collections import OrderedDict as odict +def getBinningVarNames(jsonpath): + import json + with open(jsonpath, "r") as jsf: + cont = json.load(jsf) + return tuple(cont["variables"]) + +class BinningParameters(object): + def __init__(self, binningVars): + self.binningVars = binningVars + def __call__(self, obj): + return op.construct("Parameters", + (op.initList("std::initializer_list", "Parameters::value_type::value_type", ( + op.initList("Parameters::value_type::value_type", "float", (op.extVar("int", "BinningVariable::{0}".format(bvNm.replace("Clus", ""))), bv(obj))) + for bvNm,bv in self.binningVars.iteritems())),) + ) + +def getBinningParameters(bVarNames, isElectron=False, moreVars=dict()): + if isElectron: + bVarNames = [ k.replace("Eta", "ClusEta") for k in bVarNames ] + theDict = dict(binningVariablesByName) + theDict.update(moreVars) + return BinningParameters(odict((k,theDict[k]) for k in bVarNames)) + +import ROOT +ROOT.gROOT.ProcessLine(".I {0}".format(os.path.join(pathCP3llbb, "Framework", "interface"))) +ROOT.gROOT.ProcessLine("#define STANDALONE_SCALEFACTORS") +ROOT.gROOT.ProcessLine('#include "BinnedValues.h"') + +class ScaleFactor(object): + def __init__(self, name, initLines=None, args=None): + self._name = name + self._initLines = initLines if initLines is not None else [] + self._args = args if args is not None else [] + @property + def cpp_initCode(self): + return tuple(self._initLines) + def __call__(self, obj, variation="Nominal", withMCCheck=True, precalc=True, check=None): + from .treedecorators import makeConst, boolType + expr = op.extMethod("{0}.get".format(self._name))(*tuple(chain( + list(a(obj) for a in self._args) + , (op.extVar("int", variation),) + ))) + if check: + expr = op.switch(check, expr, makeConst(1., "Float_t")) + if withMCCheck: + expr = op.switch(op.extVar(boolType, "runOnMC"), expr, makeConst(1., "Float_t")) + if precalc: + import uuid ## caching + expr._parent.uname = "sf_{0}".format(str(uuid.uuid4()).replace("-", "_")) + return expr + +def lepton_scalefactor(elSF, muSF, obj): + pass + +def get_scalefactors(name, objType, key, periods=None, combine=None, additionalVariables=dict()): + ## get the right config + if isinstance(key, tuple): + # interpret key=("a", "b") as ...["a"]["b"] + mainKey = key[0] + config = all_scalefactors[key[0]] + for idx in xrange(1,len(key)): + config = config[key[idx]] + else: + mainKey = key + config = all_scalefactors[key] + + if periods is None: + if "2016" in mainKey: + periods = "BCDEFGH" + else: + periods = [] + periods = set(periods) + + combPrefix = "" + objStr = "" + initLines = tuple() + sfArgs = [] + + if combine is not None: + combPrefix = { "weight" : "W" + , "sample" : "Smp" }.get(combine, "W") + + def doubleQuote(aStr): + return '"{0}"'.format(aStr) + def initList(elms): + return "{{ {0} }}".format(", ".join(elms)) + def mapInitList(items): + return initList(initList((k,v)) for k,v in items) + + if objType == "lepton": + if isinstance(config, str): + initLines = ("{obj}ScaleFactor {nm}{{{pth}}};".format(nm=name, obj=objStr, pth=doubleQuote(config)),) + sfArgs = [ getBinningParameters(getBinningVarNames(config), isElectron=(key[0].split("_")[0] == "electron"), moreVars=additionalVariables) ] + else: + if combPrefix == "": + raise ValueError("A combination mode needs to be specified for this scale factor") + selConfigs = list(ifilter(lambda (w,v) : w != 0., + ((sum(lumiPerPeriod_2016[ier] for ier in eras if ier in periods),path) + for eras,path in config if any(ier in periods for ier in eras)))) + if len(selConfigs) > 1: + initLines = tuple(["std::vector> __tmp_{nm};".format(nm=name)] + +[ "__tmp_{nm}.emplace_back(std::unique_ptr{{new {obj}ScaleFactor{{{0}}}}});".format(doubleQuote(path), obj=objStr, nm=name) for wgt,path in selConfigs ] + +["{cmb}{obj}ScaleFactor {nm}{{{{ {0} }}, std::move(__tmp_{nm}) }};".format(", ".join("{0:e}".format(wgt) for wgt,path in selConfigs), nm=name, obj=objStr, cmb=combPrefix)] + ) + else: + initLines = ("{obj}ScaleFactor {nm}{{{pth}}};".format(nm=name, obj=objStr, pth=doubleQuote(selConfigs[0][1])),) + bVarNames = set(chain.from_iterable(getBinningVarNames(iPth) for iWgt,iPth in selConfigs)) + sfArgs = [ getBinningParameters(bVarNames, isElectron=(key[0].split("_")[0] == "electron"), moreVars=additionalVariables) ] + + elif objType == "dilepton": + objStr = "DiLeptonFromLegs" + if isinstance(config, tuple) and len(config) == 4: + if not all(isinstance(iCfg, str) for iCfg in config): + raise TypeError("Config for dilepton scale factor should be quadruplet of paths or list f weights and triplets, found {0}".format(config)) + else: + initLines = ("{obj}ScaleFactor {nm}{{{args}}};".format(nm=name, obj=objStr, args=", ".join("std::unique_ptr(new ScaleFactor({0}))".format(doubleQuote(leplegCfg)) for leplegCfg in config)),) + sfArgs = [ (lambda bp : (lambda ll : bp(ll.l1)))(getBinningParameters(set(chain(getBinningVarNames(config[0]), getBinningVarNames(config[1]))), moreVars=additionalVariables)) + , (lambda bp : (lambda ll : bp(ll.l2)))(getBinningParameters(set(chain(getBinningVarNames(config[2]), getBinningVarNames(config[3]))), moreVars=additionalVariables)) + ] + else: + raise NotImplementedError("Still to do this part") + + elif objType == "jet": + objStr = "BTagging" + if isinstance(config, tuple) and len(config) == 3: + if not all(isinstance(iCfg, str) for iCfg in config): + raise TypeError("Config for b-tagging should be triplet of paths or list of weights and triplets, found {0}".format(config)) + else: + initLines = ("{obj}ScaleFactor {nm}{{{0}}};".format(", ".join(doubleQuote(iCfg) for iCfg in config), obj=objStr, nm=name),) + + bVarNames = set(chain.from_iterable(getBinningVarNames(iCfg) for iCfg in config)) + sfArgs = [ getBinningParameters(bVarNames, moreVars=additionalVariables) ] + else: + if not ( all((isinstance(iCfg, tuple) and len(iCfg) == 3 and all(isinstance(iPth, str) for iPth in iCfg) ) for iCfg in config) ): + raise TypeError("Config for b-tagging should be triplet of paths or list of weights and triplets, found {0}".format(config)) + else: + if combPrefix == "": + raise ValueError("A combination mode needs to be specified for this scale factor") + selConfigs = list(ifilter(lambda (w,v) : w != 0., + ((sum(lumiPerPeriod_2016[ier] for ier in eras if ier in periods),paths) + for eras,paths in config if any(ier in periods for ier in eras)))) + if len(selConfigs) > 1: + initLines = tuple(["std::vector> __tmp_{nm};".format(obj=objStr, nm=name)] + +[ "__tmp_{nm}.emplace_back(std::unique_ptr{{new {obj}ScaleFactor{{{0}}}}});".format(", ".join(doubleQuote(iPth) for iPth in paths), obj=objStr, nm=name) for wgt,paths in selConfigs ] + +["{cmb}{obj}ScaleFactor {nm}{{ {{ {0} }}, std::move(__tmp_{nm}) }};".format(", ".join("{0:e}".format(wgt) for wgt,paths in selConfigs), nm=name, obj=objStr, cmb=combPrefix)] + ) + else: + initLines = ("{obj}ScaleFactor {nm}{{{0}}};".format(", ".join(doubleQuote(iCfg) for iCfg in config[0][1]), obj=objStr, nm=name),) + + bVarNames = set(chain.from_iterable(getBinningVarNames(iPth) for iWgt,paths in selConfigs for iPth in paths)) + sfArgs = [ getBinningParameters(bVarNames, moreVars=additionalVariables) ] + sfArgs.append(lambda j : op.extMethod("IJetScaleFactor::get_flavour")(j.hadronFlavor)) + else: + raise ValueError("Unknown object type: {0}".format(objType)) + + ### cpp_initCode + return ("{typ} {name}{{{0}}};".format( + ", ".join(self._initArgs) + , typ=self._cppType, name=self._name + ),) ## combPrefix objStr + ### + + return ScaleFactor(name, initLines, args=sfArgs) + + +if __name__ == "__main__": + #aSF = get_scalefactors("test_mu", "lepton", ("muon_2015_76", "iso_loose_id_loose")) + lSF = get_scalefactors("test", "lepton", ("muon_2016_80", "iso_loose_id_loose"), combine="weight") + jSF = get_scalefactors("testb", "jet", ("btag_2016_moriond2017", "cMVAv2_loose"), combine="weight") + llSF = get_scalefactors("testTrig", "dilepton", "eleltrig_2016_HHMoriond17") diff --git a/scripts/example_plotsHtoZA.py b/scripts/example_plotsHtoZA.py index a0205a6..039fc83 100755 --- a/scripts/example_plotsHtoZA.py +++ b/scripts/example_plotsHtoZA.py @@ -4,7 +4,10 @@ """ import ROOT ROOT.PyConfig.IgnoreCommandLineOptions = True # let python do option parsing - +## logging +import logging +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) """ za decoration @@ -132,8 +135,194 @@ def leafDeps(expr): import IPython; IPython.embed() """ -Plotter demo: a method to generate plots +Plotter demo: generate plots """ +from cp3_llbb.CommonTools.plots import Plot, EquidistantBinning, Selection + +jetVars = { + "PT" : (lambda j : j.p4.Pt() , EquidistantBinning(50, 0., 350.), "transverse momentum", "p_{T} (GeV/c)") + , "ETA" : (lambda j : j.p4.Eta(), EquidistantBinning(50, -2.5, 2.5), "pseudorapidity" , "#eta") + ## flavour taggers + , "CSVv2" : (lambda j : j.pfCombinedInclusiveSecondaryVertexV2BJetTags, EquidistantBinning(50, 0., 1.), "CSVv2 classifier output", "CSVv2") + , "cMVAv2" : (lambda j : j.pfCombinedMVAV2BJetTags, EquidistantBinning(50, -1., 1.), "cMVAv2 classifier output", "cMVAv2") + , "deepCSV" : (lambda j : j.pfDeepCSVJetTags_probb+j.pfDeepCSVJetTags_probbb, EquidistantBinning(50, 0., 1.), "DeepCSV classifier output: probb+probbb", "probb+probbb") + } + +lepVars = { + "PT" : (lambda l : l.p4.Pt() , EquidistantBinning(50, 0., 300.), "transverse momentum", "p_{T} (GeV/c)") + , "ETA" : (lambda l : l.p4.Eta(), EquidistantBinning(50, -2.5, 2.5), "pseudorapidity" , "#eta") + } + +lepVars_split = { + "IsoEA" : (lambda l : l.relativeIsoR03_withEA, -1., EquidistantBinning(50, 0., 1.), "PFRelIsoR03 with EA", "RelIsoR03_withEA") + } + +## Get scale factors +scale_factors = set() +from cp3_llbb.CommonTools.scalefactors import get_scalefactors +from cp3_llbb.CommonTools.lldeco import LeptonScaleFactor, DiLeptonScaleFactor +sf_ele_trk = get_scalefactors("sf_ele_2016_trk" , "lepton", ("electron_2016_moriond2017", "gsf_tracking"), combine="weight") +sf_ele_IDL = get_scalefactors("sf_ele_2016_IDLtrk" , "lepton", ("electron_2016_moriond2017", "id_loose"), combine="weight") +sf_mu_trk = get_scalefactors("sf_muon_2016_trk", "lepton", ("muon_2016_80", "tracking"), combine="weight") +sf_mu_IDL = get_scalefactors("sf_muon_2016_IDLtrk", "lepton", ("muon_2016_80", "id_loose"), combine="weight") +lepton_sf_IDL = LeptonScaleFactor((sf_ele_trk, sf_ele_IDL), (sf_mu_trk, sf_mu_IDL)) +sf_elel_trig = get_scalefactors("sf_elel_2016_trig", "dilepton", "eleltrig_2016_HHMoriond17") +sf_elmu_trig = get_scalefactors("sf_elmu_2016_trig", "dilepton", "elmutrig_2016_HHMoriond17") +sf_muel_trig = get_scalefactors("sf_muel_2016_trig", "dilepton", "mueltrig_2016_HHMoriond17") +sf_mumu_trig = get_scalefactors("sf_mumu_2016_trig", "dilepton", "mumutrig_2016_HHMoriond17") +dilepton_sf_trig = DiLeptonScaleFactor(elelSF=sf_elel_trig, elmuSF=sf_elmu_trig, muelSF=sf_muel_trig, mumuSF=sf_mumu_trig) +cmva_discriVar = {"BTagDiscri":lambda j : j.pfCombinedMVAV2BJetTags} +sf_jet_btag_cmvaLoose = get_scalefactors("sf_jet_2016_btag_cmvaLoose", "jet", ("btag_2016_moriond2017", "cMVAv2_loose"), additionalVariables=cmva_discriVar) +sf_jet_btag_cmvaMedium = get_scalefactors("sf_jet_2016_btag_cmvaMedium", "jet", ("btag_2016_moriond2017", "cMVAv2_medium"), additionalVariables=cmva_discriVar) +for aSF in ( sf_ele_trk, sf_ele_IDL, sf_mu_trk, sf_mu_IDL + , sf_elel_trig, sf_elmu_trig, sf_muel_trig, sf_mumu_trig + , sf_jet_btag_cmvaLoose, sf_jet_btag_cmvaMedium + ): + scale_factors.add(aSF) +## FIXME temporary hack for ZA (no cluster eta in trees) +from cp3_llbb.CommonTools.scalefactors import BinningParameters, binningVariablesByName +for sf in scale_factors: + for bv in sf._args: + if isinstance(bv, BinningParameters): + new_bv = dict() + for bvNm in bv.binningVars.iterkeys(): + if bvNm.endswith("ClusEta"): + new_bv[bvNm] = binningVariablesByName[bvNm.replace("ClusEta", "Eta")] + for bvNm, bvFun in new_bv.iteritems(): + bv.binningVars[bvNm] = bvFun +## FIXME END + +def make1DPlot(name, variable, selection, binning, **kwargs): + """ 1D plot (allows to select categories, combined plot or both without duplication) """ + out = [] + doCombined = kwargs.pop("combined", True) + categories = kwargs.pop("categories", None) + if doCombined: + out.append(Plot.make1D(name, variable, selection, binning, **kwargs)) + if categories: + out += [ Plot.make1D("{0}_{cat}".format(name, cat=catNm), variable, Selection.addTo(selection, cut=catCut), binning, **kwargs) for catNm, catCut in categories.iteritems() ] + return out + +def makeControlPlots(record, hza): + from cp3_llbb.CommonTools.treedecorators import op, makeConst, boolType + from cp3_llbb.CommonTools.lldeco import onlyElectron, onlyMuon + + ## Define selections (this part would go into a shared python module in ZATools) + from collections import OrderedDict as odict + selDict = odict() + + evtSel_all = Selection([], [ record.event.weight, record.event.pu_weight ], None) + + llSel_loose = lambda ll : ( + ll.isOS + #, onlyElectron(lambda el : el.POGIDHLTPre, muonValue=makeConst("true", boolType))(ll.l1) + #, onlyElectron(lambda el : el.POGIDHLTPre, muonValue=makeConst("true", boolType))(ll.l2) + ) + llCand = op.rng_find(hza.ll, llSel_loose) + selDict["ll"] = Selection.addTo(evtSel_all, + weight=[ lepton_sf_IDL(llCand.l1), lepton_sf_IDL(llCand.l2), dilepton_sf_trig(llCand) ], + cut=[ ( op.rng_min(hza.ll, lambda ll : op.invariant_mass(ll.l1.L.p4, ll.l2.L.p4) ) > 12. ) + , op.rng_any(hza.ll, llSel_loose) ], + candidate=llCand) + lljjCand = op.rng_find(hza.lljj_deepCSV, lambda lljj : llSel_loose(lljj)) + selDict["lljj"] = Selection.addTo(evtSel_all, + weight=[ lepton_sf_IDL(lljjCand.l1), lepton_sf_IDL(lljjCand.l2), dilepton_sf_trig(lljjCand) ], + cut=[ ( op.rng_min(hza.ll, lambda ll : op.invariant_mass(ll.l1.L.p4, ll.l2.L.p4) ) > 12. ) + , op.rng_any(hza.lljj_deepCSV, lambda lljj : llSel_loose(lljj)) ], + candidate=lljjCand) + + from itertools import product, tee + allFlavCats = list("".join((f1,f2)) for f1,f2 in product(*tee(("El", "Mu"),2))) + flavCats_ll = dict((fc, getattr(llCand, "is{0}".format(fc))) for fc in allFlavCats) + flavCats_lljj = dict((fc, getattr(lljjCand, "is{0}".format(fc))) for fc in allFlavCats) + + ## Define plots + plots = [] + for psName, presel in selDict.iteritems(): + cand = presel.candidate + flavCats = dict() + if psName.startswith("lljj") or psName.startswith("llbb"): + flavCats = flavCats_lljj + elif psName.startswith("ll"): + flavCats = flavCats_ll + plots += make1DPlot("{0}_llMZ".format(psName) + , op.invariant_mass(cand.l1.L.p4, cand.l2.L.p4), presel + , EquidistantBinning(50, 0., 200.), categories=flavCats + , title="Dilepton invariant mass", xTitle="Invariant Mass (GeV/c^{2})") + + return plots if __name__ == "__main__": - doInteractive() ## FIXME only with -i + import argparse + argparser = argparse.ArgumentParser() + argparser.add_argument("-i", "--interactive", action="store_true", help="Launch an IPython shell to inspect the tree") + argparser.add_argument("--reuseplotter", action="store_true", help="Don't write and compile the plotter(s), but reuse from a previous run") + argparser.add_argument("--reusehistos", action="store_true", help="Don't create or run the plotters") + argparser.add_argument("--skipplotit", action="store_true", help="Don't produce PDF plots") + args = argparser.parse_args() + + if args.interactive: + doInteractive() + else: ## make and run a plotter + logger.info("Loading and decorating skeleton tree") + skelF = ROOT.TFile.Open("/storage/data/cms/store/user/asaggio/DYToLL_2J_13TeV-amcatnloFXFX-pythia8/DYToLL_2J_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8_Summer16MiniAODv2/180219_144051/0000/output_mc_271.root") + tup = decoratedZATree(skelF.Get("t")) + + controlPlots = makeControlPlots(tup, tup.hZA) + + import os.path + from cp3_llbb.SAMADhi.SAMADhi import DbStore, Sample + db = DbStore() + ctrl_samples = dict((smpName, { + "tree_name" : "t" + , "sample_cut" : "1." + , "files" : [ u"/storage/data/cms{}".format(f.lfn) for f in db.find(Sample, Sample.name.like(smpName)).one().files ] + }) for smpName in [ + u"DoubleEG_Run2016H-03Feb2017-v3_v6.2.0+80X_ZAAnalysis_2018-02-16" + , u"DYToLL_2J_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8_Summer16MiniAODv2_v6.2.0+80X_ZAAnalysis_2018-02-16" + ]) + + from cp3_llbb.CommonTools.histfactory import createPlotter, compilePlotter, runPlotterOnSamples, prepareWorkdir + import os + workdir = os.path.join(os.getcwd(), "latest_ratechecks") + if not args.reusehistos: + if not args.reuseplotter: + plotterdir, histosdir, plotsdir = prepareWorkdir(workdir) + + includes = [ "Math/VectorUtil.h", "ScaleFactors.h" ] + from itertools import chain + sf_init_lines = list(chain.from_iterable(sf.cpp_initCode for sf in scale_factors)) + + createPlotter(controlPlots, tup, includes=includes, outdir=plotterdir, + user_initialisation=sf_init_lines, addScaleFactorsLib=True) + plotterName = compilePlotter(plotterdir) + else: + plotterdir = os.path.join(workdir, "plotter") + histosdir = os.path.join(workdir, "histos") + plotterName = os.path.join(plotterdir, "plotter.exe") + if not os.path.exists(plotterName): + raise Exception("No such file {}".format(plotterName)) + logger.info("Reusing with-MC plotter {}".format(plotterName)) + plotsdir = os.path.join(workdir, "plots") + + ## run the plotter + from cp3_llbb.CommonTools.slurmhelpers import makeSlurmTasksMonitor as makeTasksMonitor + clusMon = makeTasksMonitor() + runPlotterOnSamples(plotterName, ctrl_samples, outdir=histosdir, useCluster=True, clusterworkdir=os.path.join(workdir, "slurm_histos"), taskMon=clusMon) + clusMon.collect(wait=60) ## wait for the jobs to finish + + ## prepare plotting + from cp3_llbb.CommonTools.plotithelpers import writePlotIt + with open(os.path.join(workdir, "plots.yml"), "w") as plotsFile: + writePlotIt(controlPlots, plotsFile) + + ### Example: run plotIt in one go + ##import subprocess + ##plotIt = os.path.join(os.path.dirname(pathCommonTools), "plotIt", "plotIt") + ##if not args.skipplotit: + ## logger.info("Running plotIt") + ## try: + ## with open(os.path.join(plotsdir, "out.log"), "w") as logFile: + ## subprocess.check_call([plotIt, "-i", workdir, "-o", plotsdir, os.path.join(pathCommonTools, "plotit", "")], stdout=logFile, cwd=pathCommonTools) + ## except subprocess.CalledProcessError, e: + ## logger.error("Command {0} failed with exit code {1}\n{2}".format(" ".join(e.cmd), e.returncode, e.output)) From 5ea90107859e581ff89635c8c43855e1e217bd42 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Mon, 5 Mar 2018 16:54:55 +0100 Subject: [PATCH 09/14] Whitespace cleanup --- python/condorhelpers.py | 4 ++-- python/histfactory.py | 2 +- python/nanoaoddeco.py | 2 +- python/scalefactors.py | 2 +- python/slurmhelpers.py | 4 ++-- python/treedecorators.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/python/condorhelpers.py b/python/condorhelpers.py index 3c54790..6f0123e 100644 --- a/python/condorhelpers.py +++ b/python/condorhelpers.py @@ -27,7 +27,7 @@ class CommandListCondorJob(CommandListJob): """ Helper class to create a condor master job from a list of commands (each becoming one subjob) - + Default work directory will be $(pwd)/condor_work, default output pattern is "*.root" """ def __init__(self, commandList, workDir=None, envSetupLines=None, outputPatterns=None): @@ -56,7 +56,7 @@ def __init__(self, commandList, workDir=None, envSetupLines=None, outputPatterns "\n" "{indir}/condor_$1.sh\n" ) - + JobShell = ( "#!/usr/bin/env bash\n" "\n" diff --git a/python/histfactory.py b/python/histfactory.py index 43ea7f3..36d4d0d 100644 --- a/python/histfactory.py +++ b/python/histfactory.py @@ -559,7 +559,7 @@ def cppLines(someNode, indent=0): def createPlotter(plots, skeleton, outdir=None, **kwargs): """ Turned the config into the main script - + this is only a helper method that deals with creating a C++ plotter from it """ if not outdir: diff --git a/python/nanoaoddeco.py b/python/nanoaoddeco.py index 94d453f..dc85199 100644 --- a/python/nanoaoddeco.py +++ b/python/nanoaoddeco.py @@ -3,7 +3,7 @@ def decoratedNanoAOD(nnAODTree): allLvs = dict((lv.GetName(), lv) for lv in nnAODTree.GetListOfLeaves()) noCountLvs = set() - collectionLvs = dict() + collectionLvs = dict() for lvNm, lv in allLvs.iteritems(): cntLv = lv.GetLeafCount() if cntLv: diff --git a/python/scalefactors.py b/python/scalefactors.py index 1df324e..c76a1e8 100644 --- a/python/scalefactors.py +++ b/python/scalefactors.py @@ -3,7 +3,7 @@ The basic configuration parameter is the json file path for a set of factors. There two basic types are -- lepton scale factors (dependent on a number of object variables, e.g. PT and ETA), +- lepton scale factors (dependent on a number of object variables, e.g. PT and ETA), - jet (b-tagging) scale factors (grouped set for different flavours, for convenience) Different values (depending on the data-taking period) diff --git a/python/slurmhelpers.py b/python/slurmhelpers.py index 8e8ab53..c66393a 100644 --- a/python/slurmhelpers.py +++ b/python/slurmhelpers.py @@ -28,7 +28,7 @@ def redirect_stdout(whereto=None): class CommandListSlurmJob(CommandListJob): """ Helper class to create a slurm job array from a list of commands (each becoming a task in the array) - + Default work directory will be $(pwd)/slurm_work, default output pattern is "*.root" """ default_cfg_opts = { @@ -114,7 +114,7 @@ def submit(self): , self.slurmScript]) self.clusterId = next(tok for tok in reversed(result.split()) if tok.isdigit()) - + ## save to file in case with open(os.path.join(self.workDirs["in"], "cluster_id"), "w") as f: f.write(self.clusterId) diff --git a/python/treedecorators.py b/python/treedecorators.py index 3f8b54b..3882a7a 100644 --- a/python/treedecorators.py +++ b/python/treedecorators.py @@ -793,7 +793,7 @@ def __eq__(self, other): ## TODO maybe name is enough def _get_TTreeDrawStr(self): return self.name -class GetItem(TupleOp): +class GetItem(TupleOp): """ Get item from array (from function call or from array leaf) """ From f42e432af137992836840bb14b5980f478f54ed8 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Mon, 5 Mar 2018 17:33:19 +0100 Subject: [PATCH 10/14] Add lepton and jet plots as well --- scripts/example_plotsHtoZA.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/scripts/example_plotsHtoZA.py b/scripts/example_plotsHtoZA.py index 039fc83..448076a 100755 --- a/scripts/example_plotsHtoZA.py +++ b/scripts/example_plotsHtoZA.py @@ -236,6 +236,7 @@ def makeControlPlots(record, hza): flavCats_ll = dict((fc, getattr(llCand, "is{0}".format(fc))) for fc in allFlavCats) flavCats_lljj = dict((fc, getattr(lljjCand, "is{0}".format(fc))) for fc in allFlavCats) + from itertools import izip, count, chain ## Define plots plots = [] for psName, presel in selDict.iteritems(): @@ -250,6 +251,34 @@ def makeControlPlots(record, hza): , EquidistantBinning(50, 0., 200.), categories=flavCats , title="Dilepton invariant mass", xTitle="Invariant Mass (GeV/c^{2})") + plots += chain.from_iterable(make1DPlot("{0}_L{1:d}_{2}".format(psName, iL, varName) + , fun(lep.L), presel, binning + , title=title, xTitle=xTitle) + for varName, (fun, binning, title, xTitle) in lepVars.iteritems() + for iL, lep in izip(count(1), (cand.l1, cand.l2))) + + plots += chain( + chain.from_iterable(make1DPlot("{0}_L{1:d}_mu_{2}".format(psName, iL, varName) + , onlyMuon(fun, electronValue=elVal)(lep), presel, binning + , title=title, xTitle=xTitle) + for varName, (fun, elVal, binning, title, xTitle) in lepVars_split.iteritems() + for iL, lep in izip(count(1), (cand.l1, cand.l2))) + , chain.from_iterable(make1DPlot("{0}_L{1:d}_el_{2}".format(psName, iL, varName) + , onlyElectron(fun, muonValue=muVal)(lep), presel, binning + , title=title, xTitle=xTitle) + for varName, (fun, muVal, binning, title, xTitle) in lepVars_split.iteritems() + for iL, lep in izip(count(1), (cand.l1, cand.l2))) + ) + + if psName.startswith("lljj"): + plots += chain.from_iterable( + make1DPlot("{0}_J{1:d}_{2}".format(psName, iJ, varName) + , fun(jet), presel, binning + , title=title, xTitle=xTitle) + for varName, (fun, binning, title, xTitle) in jetVars.iteritems() + for iJ, jet in izip(count(1), (cand.J1, cand.J2)) + ) + return plots if __name__ == "__main__": @@ -284,7 +313,7 @@ def makeControlPlots(record, hza): from cp3_llbb.CommonTools.histfactory import createPlotter, compilePlotter, runPlotterOnSamples, prepareWorkdir import os - workdir = os.path.join(os.getcwd(), "latest_ratechecks") + workdir = os.path.join(os.getcwd(), "latest_zaplots") if not args.reusehistos: if not args.reuseplotter: plotterdir, histosdir, plotsdir = prepareWorkdir(workdir) From b150fc15aac4f862d0f613a85871240189e19423 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Tue, 6 Mar 2018 09:19:05 +0100 Subject: [PATCH 11/14] Add JEC and JER systematic variations --- scripts/example_plotsHtoZA.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/example_plotsHtoZA.py b/scripts/example_plotsHtoZA.py index 448076a..166bab7 100755 --- a/scripts/example_plotsHtoZA.py +++ b/scripts/example_plotsHtoZA.py @@ -297,7 +297,14 @@ def makeControlPlots(record, hza): skelF = ROOT.TFile.Open("/storage/data/cms/store/user/asaggio/DYToLL_2J_13TeV-amcatnloFXFX-pythia8/DYToLL_2J_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8_Summer16MiniAODv2/180219_144051/0000/output_mc_271.root") tup = decoratedZATree(skelF.Get("t")) + from itertools import chain controlPlots = makeControlPlots(tup, tup.hZA) + controlPlots_systVariations = [] + for svNm in ("jecup", "jecdown", "jerup", "jerdown"): + plots_sv = makeControlPlots(tup, getattr(tup.hZA, svNm)) + for ap in plots_sv: + ap.name = "{0}__{1}".format(ap.name, svNm) + controlPlots_systVariations += plots_sv import os.path from cp3_llbb.SAMADhi.SAMADhi import DbStore, Sample @@ -319,10 +326,9 @@ def makeControlPlots(record, hza): plotterdir, histosdir, plotsdir = prepareWorkdir(workdir) includes = [ "Math/VectorUtil.h", "ScaleFactors.h" ] - from itertools import chain sf_init_lines = list(chain.from_iterable(sf.cpp_initCode for sf in scale_factors)) - createPlotter(controlPlots, tup, includes=includes, outdir=plotterdir, + createPlotter(controlPlots+controlPlots_systVariations, tup, includes=includes, outdir=plotterdir, user_initialisation=sf_init_lines, addScaleFactorsLib=True) plotterName = compilePlotter(plotterdir) else: From c07e2772ab36ef6d31664f38d341afb1be5db4de Mon Sep 17 00:00:00 2001 From: Pieter David Date: Tue, 6 Mar 2018 14:38:49 +0100 Subject: [PATCH 12/14] ZA types: compile the library on demand (by including the header) --- scripts/example_plotsHtoZA.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/example_plotsHtoZA.py b/scripts/example_plotsHtoZA.py index 166bab7..6cdfc85 100755 --- a/scripts/example_plotsHtoZA.py +++ b/scripts/example_plotsHtoZA.py @@ -18,7 +18,7 @@ import ROOT ## Load the library -ROOT.gSystem.Load("libcp3_llbbZAAnalysis.so") +ROOT.gROOT.ProcessLine('#include "cp3_llbb/ZAAnalysis/interface/Types.h"') hZAObjectStubMap = SmartObjectStub.Map({ "HtoZA::Lepton" : [ From bf5ccdd1eaef11d9193757c26305d1b38d274043 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Fri, 9 Mar 2018 17:23:11 +0100 Subject: [PATCH 13/14] Add python/__init__.py with some paths it was in .gitignore so far... also modified to default to using the files, so this should work with only PyROOT (with one warning on the first import) --- .gitignore | 1 - python/__init__.py | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 python/__init__.py diff --git a/.gitignore b/.gitignore index dbc7fd1..0d20b64 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1 @@ -python/__init__.py *.pyc diff --git a/python/__init__.py b/python/__init__.py new file mode 100644 index 0000000..6a82583 --- /dev/null +++ b/python/__init__.py @@ -0,0 +1,13 @@ +__all__ = ("pathCommonTools", "pathCP3llbb", "pathCMS") +import os.path +pathCommonTools = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +pathCP3llbb = os.path.dirname(os.path.abspath(pathCommonTools)) +_pathCMS_paths = os.path.dirname(os.path.dirname(pathCP3llbb)) +import os +_pathCMS_env = os.getenv("CMSSW_BASE") +pathCMS = _pathCMS_paths +if _pathCMS_env == "": + print("Warning: Could not get CMSSW_BASE variable from the environment") +elif _pathCMS_env != _pathCMS_paths: + print("Warning: CommonTools is not using the CMSSW release version from $CMSSW_BASE ({0} versus {1}), using the latter".format(_pathCMS_paths, _pathCMS_env)) + pathCMS = _pathCMS_paths From bf905053a12c30faaefcbce9922cc0d3fb7cfec8 Mon Sep 17 00:00:00 2001 From: Pieter David Date: Mon, 12 Mar 2018 08:50:15 +0100 Subject: [PATCH 14/14] Add script for standalone (only pyroot) install --- installpy_standalone.sh | 42 +++++++++++++++++++++++++++++++++++++++++ python/__init__.py | 2 +- 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100755 installpy_standalone.sh diff --git a/installpy_standalone.sh b/installpy_standalone.sh new file mode 100755 index 0000000..c7d8bc5 --- /dev/null +++ b/installpy_standalone.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# +# Creates a scram-like (symlinked) python install directory for *Tools packages +# in the equivalent of ${CMSSW_BASE}/src/install_python (or ${PREFIX}/install_python, if PREFIX is defined) +# +thisscript="$(realpath ${0})" +commontoolspath="$(dirname ${thisscript})" +cp3llbbpath="$(dirname ${commontoolspath})" +if [[ "$(basename ${cp3llbbpath})" != "cp3_llbb" ]]; then + echo "Directory above CommonTools is not called cp3_llbb, aborting" + exit 1 +fi +basepath="$(dirname ${cp3llbbpath})" +if [[ -z "${PREFIX}" ]]; then + PREFIX="${basepath}" +fi +installpath="${PREFIX}/install_python" +if [[ -a "${installpath}" ]]; then + echo "Install path ${installpath} already exists, aborting" + exit 1 +fi +echo "Installing into ${installpath}, make sure to add it to PYTHONPATH as well" +mkdir -p "${installpath}" +pushd "${basepath}" > /dev/null +for pkgpy in */*Tools/python; do + pkgpath_py="${basepath}/${pkgpy}" + if [[ ! -d "${pkgpath_py}" ]]; then + echo "ASSERT FAILURE: ${pkgpath_py}" + exit 1 + fi + pkgname_full="$(dirname ${pkgpy})" + pkgname="$(basename ${pkgname_full})" + hatname="$(dirname ${pkgname_full})" + hatinstalldir="${installpath}/${hatname}" + mkdir -p "${hatinstalldir}" + hatinitpy="${hatinstalldir}/__init__.py" + if [[ ! -f "${hatinitpy}" ]]; then + echo "" > "${hatinitpy}" + fi + ln -s "${pkgpath_py}" "${hatinstalldir}/${pkgname}" + echo "Installed ${pkgname_full}" +done diff --git a/python/__init__.py b/python/__init__.py index 6a82583..3e30dd5 100644 --- a/python/__init__.py +++ b/python/__init__.py @@ -9,5 +9,5 @@ if _pathCMS_env == "": print("Warning: Could not get CMSSW_BASE variable from the environment") elif _pathCMS_env != _pathCMS_paths: - print("Warning: CommonTools is not using the CMSSW release version from $CMSSW_BASE ({0} versus {1}), using the latter".format(_pathCMS_paths, _pathCMS_env)) + print("Warning: CommonTools is not using the CMSSW release version from $CMSSW_BASE ({0} versus {1}), using the former".format(_pathCMS_paths, _pathCMS_env)) pathCMS = _pathCMS_paths