diff --git a/liberty/.clangd b/liberty/.clangd index 9e4ac78e..0f1417b8 100644 --- a/liberty/.clangd +++ b/liberty/.clangd @@ -1,5 +1,5 @@ CompileFlags: # Tweak the parse settings - Add: [-Wold-style-cast] # treat all files as C++, enable more warnings + Add: [-Wold-style-cast, -Wunused-variable, -Wsigned-compare] # treat all files as C++, enable more warnings Diagnostics: ClangTidy: Add: bugprone-* diff --git a/liberty/include/liberty/SLAMP/SLAMP.h b/liberty/include/liberty/SLAMP/SLAMP.h index 5a9c66bd..942ada53 100644 --- a/liberty/include/liberty/SLAMP/SLAMP.h +++ b/liberty/include/liberty/SLAMP/SLAMP.h @@ -49,6 +49,7 @@ class SLAMP: public ModulePass void instrumentDestructor(Module& m); void instrumentGlobalVars(Module& m, Function* ctor); void instrumentAllocas(Module& m); + void instrumentBasePointer(Module &m, Loop *l); // functions used in instrumentAllocas void findLifetimeMarkers(Value* i, set& already, std::vector& starts, std::vector& ends); @@ -58,7 +59,8 @@ class SLAMP: public ModulePass void instrumentNonStandards(Module& m, Function* ctor); void allocErrnoLocation(Module& m, Function* ctor); - + void instrumentLoopStartStopForAll(Module &m); + void instrumentFunctionStartStop(Module&m); void instrumentLoopStartStop(Module&m, Loop* l); void instrumentInstructions(Module& m, Loop* l); diff --git a/liberty/lib/Repl/ReplParse.hpp b/liberty/lib/Repl/ReplParse.hpp index 5c89af74..717142cc 100644 --- a/liberty/lib/Repl/ReplParse.hpp +++ b/liberty/lib/Repl/ReplParse.hpp @@ -18,6 +18,7 @@ enum ReplAction { RemoveAll, Parallelize, Modref, + Save, Unknown = -1 }; @@ -45,6 +46,7 @@ const map ReplActions = { {"parallelize", ReplAction::Parallelize}, {"p", ReplAction::Parallelize}, {"modref", ReplAction::Modref}, + {"save", ReplAction::Save} }; // a helper to get the vocabulary of Repl, to help the auto completion @@ -70,6 +72,7 @@ const map HelpText = { {RemoveAll, "removeAll/ra $inst_id: \tremove all dependences from and to a instruction from the loop"}, {Parallelize, "paralelize/p: \tparallelize the selected loop with current dependences"}, {Modref, "modref/mr $inst_id1, $inst_id2: \tquery the modref between two instructions"}, + {Save, "save $filename: \tsave the loop to a file"}, {Quit, "quit/q: quit the repl"}, }; diff --git a/liberty/lib/Repl/repl.cpp b/liberty/lib/Repl/repl.cpp index 29d8b1d5..e4d4a655 100644 --- a/liberty/lib/Repl/repl.cpp +++ b/liberty/lib/Repl/repl.cpp @@ -1,8 +1,8 @@ #include "liberty/LoopProf/Targets.h" #include "liberty/Orchestration/Orchestrator.h" #include "liberty/Orchestration/PSDSWPCritic.h" -#include "liberty/Speculation/PDGBuilder.hpp" #include "liberty/Strategy/ProfilePerformanceEstimator.h" +#include "scaf/Utilities/ModuleLoops.h" #include "scaf/Utilities/ReportDump.h" #include "scaf/Utilities/Metadata.h" #include "llvm/Pass.h" @@ -22,399 +22,182 @@ #include "noelle/core/PDG.hpp" #include "noelle/core/PDGPrinter.hpp" #include "noelle/core/SCCDAG.hpp" +#include "noelle/core/LoopDependenceInfo.hpp" #include "ReplParse.hpp" +#include "noelle/tools/Repl.hpp" + using namespace llvm; using namespace std; using namespace liberty; using namespace llvm::noelle; -class OptRepl : public ModulePass { +cl::opt HistoryFileName("cpf-repl-history", cl::desc("Specify command history file name"), cl::init("")); + +class CpfRepl : public ModulePass { public: static char ID; void getAnalysisUsage(AnalysisUsage &au) const; - StringRef getPassName() const { return "remed-selector"; } + StringRef getPassName() const { return "cpf-repl"; } bool runOnModule(Module &M); - OptRepl() : ModulePass(ID) {} + CpfRepl() : ModulePass(ID) {} }; -char OptRepl::ID = 0; -static RegisterPass rp("opt-repl", "Opt Repl"); +char CpfRepl::ID = 0; +static RegisterPass rp("cpf-repl", "CPF Repl"); -void OptRepl::getAnalysisUsage(AnalysisUsage &au) const { +void CpfRepl::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired(); au.addRequired(); au.addRequired(); - au.addRequired(); au.addRequired< LoopProfLoad >(); au.addRequired< ProfilePerformanceEstimator >(); - au.addRequired(); + au.addRequired< LoopInfoWrapperPass >(); au.setPreservesAll(); } -typedef map *> InstIdMap_t; -typedef map *, unsigned> InstIdReverseMap_t; -typedef map *> DepIdMap_t; -typedef map *, uint32_t> DepIdReverseMap_t; - -// helper function to generate -static unique_ptr createInstIdMap(PDG *pdg) { - auto instIdMap = std::make_unique(); - unsigned instId = 0; - for (auto &instNode : pdg->getNodes()) { - instIdMap->insert(make_pair(instId, instNode)); - instId++; - } - - return instIdMap; -} - -static unique_ptr createInstIdLookupMap(InstIdMap_t m) { - auto lookupMap = std::make_unique(); - for (auto &[instId, node] : m) { - lookupMap->insert(make_pair(node, instId)); - } - - return lookupMap; -} - -static shared_ptr createDepIdLookupMap(DepIdMap_t m) { - auto lookupMap = std::make_shared(); - for (auto &[instId, node] : m) { - lookupMap->insert(make_pair(node, instId)); - } - - return lookupMap; -} - -// a simple autocompletion generator -char* completion_generator(const char* text, int state) { - // This function is called with state=0 the first time; subsequent calls are - // with a nonzero state. state=0 can be used to perform one-time - // initialization for this completion session. - static std::vector matches; - static size_t match_index = 0; - - if (state == 0) { - // During initialization, compute the actual matches for 'text' and keep - // them in a static vector. - matches.clear(); - match_index = 0; - - // Collect a vector of matches: vocabulary words that begin with text. - std::string textstr = std::string(text); - for (auto word : ReplVocab) { - if (word.size() >= textstr.size() && - word.compare(0, textstr.size(), textstr) == 0) { - matches.push_back(word); - } +class CpfReplDriver: public Repl::ReplDriver { + private: + LoopAA* loopAA; + const Targets &targets; + ModuleLoops &mloops; + vector loopAAs; + Pass &pass; + vector loopAAEnabled; + + Loop *getSelectedLLVMLoop() { + auto loops = &pass.getAnalysis(*selectedLoop->getLoopStructure()->getFunction()).getLoopInfo(); + auto loop = loops->getLoopFor(selectedLoop->getLoopStructure()->getHeader()); + return loop; } - } - - if (match_index >= matches.size()) { - // We return nullptr to notify the caller no more matches are available. - return nullptr; - } else { - // Return a malloc'd char* for the match. The caller frees it. - return strdup(matches[match_index++].c_str()); - } -} - -char** completer(const char* text, int start, int end) { - // Don't do filename completion even if our generator finds no matches. - rl_attempted_completion_over = 1; - - // Note: returning nullptr here will make readline use the default filename - // completer. - return rl_completion_matches(text, completion_generator); -} + public: + CpfReplDriver(Noelle &noelle, Module &m, LoopAA* loopAA, const Targets &targets, ModuleLoops &mloops, Pass &pass) : ReplDriver(noelle, m), loopAA(loopAA), targets(targets), mloops(mloops), pass(pass) { + // initialize loop aa + auto aa = loopAA; + while (aa) { + loopAAs.push_back(aa); + aa = aa->getNextAA(); + } + unsigned numLoopAAs = loopAAs.size(); + loopAAEnabled = vector(numLoopAAs, true); -bool OptRepl::runOnModule(Module &M) { - bool modified = false; - - ModuleLoops &mloops = getAnalysis(); - const Targets &targets = getAnalysis(); - PDGBuilder &pdgbuilder = getAnalysis(); - - // store the loopID - map loopIdMap; - - // the selected information - llvm::Function *selectedFunction; // TODO: not used yet - Loop *selectedLoop; - unique_ptr selectedPDG; - unique_ptr selectedSCCDAG; - - unique_ptr instIdMap; - unique_ptr instIdLookupMap; - unique_ptr depIdMap; - shared_ptr depIdLookupMap; - - // prepare hot loops from the targets - { - unsigned loopId = 0; - for (Targets::iterator i = targets.begin(mloops), e = targets.end(mloops); - i != e; ++i) { - Loop *loop = *i; - loopIdMap[loopId++] = loop; - continue; + outs() << "LoopAA (" << numLoopAAs << "): "; + loopAA->dump(); } - } - rl_attempted_completion_function = completer; - // the main repl while loop - while (true) { - string query; - char *buf = readline("(opt-repl) "); - query = (const char *)(buf); - if (query.size() > 0) { - add_history(buf); - free(buf); // free the buf readline created + string prompt() override { + stringstream ss; + ss << "(cpf-repl"; + if (selectedLoopId != -1) + ss << " loop " << selectedLoopId; + ss << ") "; + return ss.str(); } - // check if it's quit or unknown - ReplParser parser(query); - if (parser.getAction() == ReplAction::Quit) - break; - - if (parser.getAction() == ReplAction::Unknown) { - outs() << "Unknown command!\n"; - continue; + void createLoopMap() override { + + outs() << "CPF create loop map\n"; + + // prepare hot loops from the targets + unsigned loopId = 0; + for (Targets::iterator i = targets.begin(mloops), e = targets.end(mloops); + i != e; ++i) { + Loop *loop = *i; + LoopStructure loopStructure(loop); + auto ldi = noelle.getLoop(&loopStructure); + loopIdMap[loopId++] = ldi; + continue; + } } - // print all loops - auto loopsFn = [&loopIdMap]() { + void loopsFn() override { + outs() << "List of hot loops:\n"; - for (auto &[loopId, loop] : loopIdMap) { - outs() << loopId << ": " << loop->getHeader()->getParent()->getName() - << "::" << loop->getHeader()->getName() << '\n'; - } - }; + auto &load = pass.getAnalysis< LoopProfLoad >(); - // select one loop - auto selectFn = [&loopIdMap, &parser, &selectedLoop, &selectedPDG, &selectedSCCDAG, &pdgbuilder, &instIdMap, &instIdLookupMap]() { - int loopId = parser.getActionId(); - if (loopId == -1) { - outs() << "No number specified\n"; - return; - } + for (auto &[loopId, loop] : loopIdMap) { + auto header = loop->getLoopStructure()->getHeader(); + // get loop id and print it + outs() << loopId; - if (loopIdMap.find(loopId) == loopIdMap.end()) { - outs() << "Loop " << loopId << " does not exist\n"; - return; - } + auto loopNamerId = Namer::getBlkId(header); + if (loopNamerId != -1) + outs() << " (" << loopNamerId << ")"; - Loop *loop = loopIdMap[loopId]; - outs() << "Selecting loop " << loopId << ": "; - outs() << loop->getHeader()->getParent()->getName() - << "::" << loop->getHeader()->getName() << '\n'; - selectedLoop = loop; - selectedPDG = pdgbuilder.getLoopPDG(loop); - selectedSCCDAG = std::make_unique(selectedPDG.get()); + outs() << ": " << header->getName() + << "::" << header->getName(); + Instruction *term = header->getTerminator(); + if (term) + liberty::printInstDebugInfo(term); - instIdMap = createInstIdMap(selectedPDG.get()); - instIdLookupMap = createInstIdLookupMap(*instIdMap); - }; + char percent[10]; + const unsigned long loop_time = load.getLoopTime(header); - // show help - auto helpFn = [&parser]() { - string action = parser.getStringAfterAction(); - if (ReplActions.find(action) != ReplActions.end()) { - outs() << HelpText.at(ReplActions.at(action)) << "\n"; - } - else { - for (auto &[action, explaination] : HelpText) { - outs() << explaination << "\n"; - } + snprintf(percent,10, "%.1f", 100.0 * loop_time / load.getTotTime()); + errs() << "\tTime " << loop_time << " / " << load.getTotTime() + << " Coverage: " << percent << "%\n"; } - }; - - // early checks for several actions that do not need the loop set - if (parser.getAction() == ReplAction::Loops) { - loopsFn(); - continue; } - if (parser.getAction() == ReplAction::Select) { - selectFn(); - continue; - } + void instsFn() override { - if (parser.getAction() == ReplAction::Help){ - helpFn(); - continue; - } - - // after this assume the loop has been selected - if (!selectedLoop) { - outs() << "No loops selected\n"; - continue; - } - - // dump information about the loop - auto dumpFn = [&parser, &selectedLoop, &selectedPDG, &selectedSCCDAG]() { - outs() << *selectedLoop; - outs() << "Number of instructions: " - << selectedPDG->getNumberOfInstructionsIncluded() << "\n"; - outs() << "Number of dependences: " - << selectedPDG->getNumberOfDependencesBetweenInstructions() - << "\n"; - outs() << "Number of SCCs: " << selectedSCCDAG->numNodes(); - - outs() << "\n"; - - if (parser.isVerbose()) { - for (auto block : selectedLoop->getBlocks()) { - outs() << *block; - } - } - outs() << "\n"; - }; - - // show instructions with id - auto instsFn = [&parser, &instIdMap]() { auto printDebug = parser.isVerbose(); - for (auto &[instId, node] : *instIdMap) { - auto *inst = dyn_cast(node->getT()); - // not an instruction - if (!inst) { - outs() << instId << "\t" << *node->getT() << "\n"; - continue; - } - - auto instNamerId = Namer::getInstrId(inst); - outs() << instId << " (" << instNamerId << ")\t" << *node->getT(); - - if (printDebug) { - liberty::printInstDebugInfo(inst); - } - - outs()<< "\n"; - } - }; - - // helper function for dumping edge - auto dumpEdge = [&instIdLookupMap](unsigned depId, DGEdge *edge) { - auto idA = instIdLookupMap->at(edge->getOutgoingNode()); - auto idB = instIdLookupMap->at(edge->getIncomingNode()); - outs() << depId << "\t" << idA << "->" << idB << ":\t" << edge->toString() << (edge->isLoopCarriedDependence() ? "(LC)" : "(LL)") - << "\n"; - }; - - // show all deps with id; also generate a currentPDG.dot file, the edge number is annotated on the PDG - auto depsFn = [&instIdLookupMap, &parser, &depIdMap, &depIdLookupMap, &selectedPDG, &dumpEdge, &instIdMap]() { - int fromId = parser.getFromId(); - int toId = parser.getToId(); - if (fromId != -1) { - if (instIdMap->find(fromId) == instIdMap->end()) { - outs() << "From InstId " << fromId<< " not found\n"; - return; - } - } - - if (toId != -1) { - if (instIdMap->find(toId) == instIdMap->end()) { - outs() << "To InstId " << toId<< " not found\n"; - return; + int queryInstId = parser.getActionId(); + + // print the selected instruction + if (queryInstId != -1) { + bool found = false; + for (auto &[instId, node] : *instIdMap) { + auto *inst = dyn_cast(node->getT()); + auto instNamerId = Namer::getInstrId(inst); + if (queryInstId == instNamerId) { + outs() << instId << " (" << queryInstId << ")\t" << *inst; + if (printDebug) { + liberty::printInstDebugInfo(inst); + } + outs() << "\n"; + found = true; + break; + } } - } - depIdMap = std::make_unique(); - unsigned id = 0; - if (fromId == -1 && toId == -1) { // both not specified - for (auto &edge : selectedPDG->getEdges()) { - dumpEdge(id, edge); - depIdMap->insert(make_pair(id++, edge)); + if (!found) { + outs() << "Instruction with NamerId " << queryInstId + << " not found\n"; } - } else if (fromId != -1 && toId != -1) { // both specified - auto fromNode = instIdMap->at(fromId); - auto toNode = instIdMap->at(toId); - for (auto &edge : fromNode->getOutgoingEdges()) { - if (edge->getIncomingNode() == toNode) { - dumpEdge(id, edge); - depIdMap->insert(make_pair(id++, edge)); + } else { // print all instructions + for (auto &[instId, node] : *instIdMap) { + auto *inst = dyn_cast(node->getT()); + // not an instruction + if (!inst) { + outs() << instId << "\t" << *node->getT() << "\n"; + continue; } - } - } else if (fromId != -1) { // from is specified - auto node = instIdMap->at(fromId); - for (auto &edge : node->getOutgoingEdges()) { - dumpEdge(id, edge); - depIdMap->insert(make_pair(id++, edge)); - } - } else if (toId != -1) { // to is specified - auto node = instIdMap->at(toId); - for (auto &edge : node->getIncomingEdges()) { - dumpEdge(id, edge); - depIdMap->insert(make_pair(id++, edge)); - } - } - - depIdLookupMap = createDepIdLookupMap(*depIdMap); - selectedPDG->setDepLookupMap(depIdLookupMap); - llvm::noelle::DGPrinter::writeClusteredGraph("currentPDG.dot", selectedPDG.get()); - }; - - // remove a dependence - auto removeFn = [&parser, &depIdMap, &selectedPDG, &selectedSCCDAG]() { - int depId = parser.getActionId(); - if (depId == -1) { - outs() << "No number specified\n"; - return; - } - if (depIdMap->find(depId) == depIdMap->end()) { - outs() << "DepId" << depId << " not found\n"; - return; - } + auto instNamerId = Namer::getInstrId(inst); + outs() << instId << " (" << instNamerId << ")\t" << *node->getT(); - auto dep = depIdMap->at(depId); - selectedPDG->removeEdge(dep); - // update SCCDAG - selectedSCCDAG = std::make_unique(selectedPDG.get()); - }; - - // remove all dependence from a instruction node - auto removeAllFromInstFn = [&parser, &instIdMap, &selectedPDG, &selectedSCCDAG]() { - int instId = parser.getActionId(); - if (instId == -1) { - outs() << "No number specified\n"; - return; - } - - if (instIdMap->find(instId) == instIdMap->end()) { - outs() << "InstId" << instId << " not found\n"; - return; - } - - auto node = instIdMap->at(instId); - list*> edgesToRemove; - for (auto &edge : node->getOutgoingEdges()) { - edgesToRemove.push_back(edge); - } - - for (auto &edge : node->getIncomingEdges()) { - edgesToRemove.push_back(edge); - } + if (printDebug) { + liberty::printInstDebugInfo(inst); + } - for (auto edge : edgesToRemove) { - selectedPDG->removeEdge(edge); + outs() << "\n"; + } } - // update SCCDAG - selectedSCCDAG = std::make_unique(selectedPDG.get()); - }; + } + void parallelizeFn() override { - // try to parallelize - auto parallelizeFn = [&parser, this, &selectedPDG, &selectedLoop]() { int threadBudget = parser.getActionId(); if (threadBudget == -1) { threadBudget = 28; } - LoopProfLoad *lpl = &getAnalysis(); - auto perf = &getAnalysis(); + LoopProfLoad *lpl = &pass.getAnalysis(); + auto perf = &pass.getAnalysis(); // initialize performance estimator auto psdswp = std::make_shared(perf, threadBudget, lpl); @@ -433,18 +216,23 @@ bool OptRepl::runOnModule(Module &M) { } }; - check(doall, "DOALL", *selectedPDG.get(), selectedLoop); - check(psdswp, "PSDSWPCritic", *selectedPDG.get(), selectedLoop); - }; + auto loop = getSelectedLLVMLoop(); + + check(doall, "DOALL", *selectedPDG.get(), loop); + check(psdswp, "PSDSWPCritic", *selectedPDG.get(), loop); + } + + + void modrefFn() override { - // modref: create a modref query and (optionally explore the loopaa stack) - auto modrefFn = [this, &parser, &instIdMap, &selectedLoop]() { int fromId = parser.getFromId(); int toId = parser.getToId(); + unsigned numLoopAAs = loopAAs.size(); + if (fromId == -1) { - outs() << "From InstId not set\n"; - return; + outs() << "From InstId not set\n"; + return; } else { if (instIdMap->find(fromId) == instIdMap->end()) { @@ -454,8 +242,8 @@ bool OptRepl::runOnModule(Module &M) { } if (toId == -1) { - outs() << "To InstId not set\n"; - return; + outs() << "To InstId not set\n"; + return; } else { if (instIdMap->find(toId) == instIdMap->end()) { @@ -472,48 +260,190 @@ bool OptRepl::runOnModule(Module &M) { return; } - LoopAA *aa = getAnalysis().getTopAA(); + LoopAA *aa = loopAA; Remedies remeds; + Loop *loop = getSelectedLLVMLoop(); if (parser.isVerbose()) { // TODO: try all combination of analysis and find a setting that the result is different + + // try all loopAA, from only the first one, to all of them, the last one is always NoLoopAA + liberty::LoopAA::ModRefResult lastRet[3] = {liberty::LoopAA::ModRef, liberty::LoopAA::ModRef, liberty::LoopAA::ModRef}; + for (auto i = 1; i <= numLoopAAs - 1; i++) { + // set the first i loopAA to be enabled(loopAAEnabled[i] = true) + // and the rest to be disabled (loopAAEnabled[i] = false) + // [0~i-1] + for (auto j = 0; j < i; j++) { + loopAAEnabled[j] = true; + } + + // [i~numLoopAAs-2] + for (auto j = i; j < numLoopAAs - 1; j++) { + loopAAEnabled[j] = false; + } + // the last one is always NoLoopAA and enabled + loopAAEnabled[numLoopAAs - 1] = true; + + // configure the loop AAs prev and next based on the enabled/disabled setting + LoopAA *prev, *cur, *next; + prev = nullptr; + cur = nullptr; + next = nullptr; + // cur is the first enabled, next is the second enabled + for (auto j = 0; j < numLoopAAs; j++) { + if (loopAAEnabled[j]) { + // the first one + if (!cur) { + cur = loopAAs[j]; + continue; + } else { + next = loopAAs[j]; + cur->configure(prev, next); + prev = cur; + cur = next; + } + } + } + + // NoLoopAA is always enabled + assert(cur->getLoopAAName() == "NoLoopAA"); + cur->configure(prev, nullptr); + + // aa->dump(); + auto ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::Same, toInst, loop, remeds); + auto red = "\033[1;31m"; + auto green = "\033[1;32m"; + auto reset = "\033[0m"; + if (ret != lastRet[0]) { + outs() << "Modref (same) refine from " << red << lastRet[0] << reset + << " to " << red << ret << reset << " with " << green + << loopAAs[i - 1]->getLoopAAName() << reset << "\n"; + } + lastRet[0] = ret; + + ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::Before, toInst, loop, remeds); + if (ret != lastRet[1]) { + outs() << "Modref (before) refine from " << red << lastRet[1] << reset + << " to " << red << ret << reset << " with " << green + << loopAAs[i - 1]->getLoopAAName() << reset << "\n"; + } + lastRet[1] = ret; + + ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::After, toInst, loop, remeds); + if (ret != lastRet[2]) { + outs() << "Modref (after) refine from " << red << lastRet[2] << reset + << " to " << red << ret << reset << " with " << green + << loopAAs[i - 1]->getLoopAAName() << reset << "\n"; + } + lastRet[2] = ret; + + // outs() << *fromInst << "->" << *toInst << ": (Same)" << ret << " with " << remeds.size() << " remedies\n"; + // ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::Before, toInst, loop, remeds); + // outs() << *fromInst << "->" << *toInst << ": (Before)" << ret << " with " << remeds.size() << " remedies\n"; + // ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::After, toInst, loop, remeds); + // outs() << *fromInst << "->" << *toInst << ": (After)" << ret << " with " << remeds.size() << " remedies\n"; + } } else { - auto ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::Same, toInst, selectedLoop, remeds); + auto ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::Same, toInst, loop, remeds); outs() << *fromInst << "->" << *toInst << ": (Same)" << ret << " with " << remeds.size() << " remedies\n"; - ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::Before, toInst, selectedLoop, remeds); + ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::Before, toInst, loop, remeds); outs() << *fromInst << "->" << *toInst << ": (Before)" << ret << " with " << remeds.size() << " remedies\n"; - ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::After, toInst, selectedLoop, remeds); + ret = aa->modref(fromInst, liberty::LoopAA::TemporalRelation::After, toInst, loop, remeds); outs() << *fromInst << "->" << *toInst << ": (After)" << ret << " with " << remeds.size() << " remedies\n"; } - }; + } +}; - switch (parser.getAction()) { - case ReplAction::Deps: - depsFn(); - break; - case ReplAction::Dump: - dumpFn(); - break; - case ReplAction::Insts: - instsFn(); - break; - case ReplAction::Remove: - removeFn(); - break; - case ReplAction::RemoveAll: - removeAllFromInstFn(); - break; - case ReplAction::Parallelize: - parallelizeFn(); - break; - case ReplAction::Modref: - modrefFn(); +// a simple autocompletion generator +char* completion_generator(const char* text, int state) { + // This function is called with state=0 the first time; subsequent calls are + // with a nonzero state. state=0 can be used to perform one-time + // initialization for this completion session. + static std::vector matches; + static size_t match_index = 0; + + if (state == 0) { + // During initialization, compute the actual matches for 'text' and keep + // them in a static vector. + matches.clear(); + match_index = 0; + + // Collect a vector of matches: vocabulary words that begin with text. + std::string textstr = std::string(text); + for (auto word : ReplVocab) { + if (word.size() >= textstr.size() && + word.compare(0, textstr.size(), textstr) == 0) { + matches.push_back(word); + } + } + } + + if (match_index >= matches.size()) { + // We return nullptr to notify the caller no more matches are available. + return nullptr; + } else { + // Return a malloc'd char* for the match. The caller frees it. + return strdup(matches[match_index++].c_str()); + } +} + +char** completer(const char* text, int start, int end) { + // Don't do filename completion even if our generator finds no matches. + rl_attempted_completion_over = 1; + + // Note: returning nullptr here will make readline use the default filename + // completer. + return rl_completion_matches(text, completion_generator); +} + + +bool CpfRepl::runOnModule(Module &M) { + bool modified = false; + + auto &noelle = getAnalysis(); + + ModuleLoops &mloops = getAnalysis(); + const Targets &targets = getAnalysis(); + + // have a vector of all the loop aas + LoopAA* loopAA = (LoopAA*)getSCAFLoopAA(); + + CpfReplDriver driver(noelle, M, loopAA, targets, mloops, *this); + driver.createLoopMap(); + + rl_attempted_completion_function = completer; + // execute command history file if specified + string historyFileName = HistoryFileName; + if (historyFileName != "") { + read_history(historyFileName.c_str()); + // DISCUSSION: the last command won't get executed if using 'i < + // history_length' + for (int i = history_base; i <= history_length; i++) { + char *buf = history_get(i)->line; + string query = (const char *)(buf); + driver.run(query); + } + clear_history(); + } + + // the main repl while loop + while (true) { + if (driver.hasTerminated()) { break; - default: - outs() << "SHOULD NOT HAPPEN\n"; + } + + char *buf = readline(driver.prompt().c_str()); + if (!buf) { + outs() << "Quit\n"; break; } + string query = (const char *)(buf); + if (query.size() > 0) { + add_history(buf); + free(buf); // free the buf readline created + } + driver.run(query); } return modified; diff --git a/liberty/lib/SLAMP/CMakeLists.txt b/liberty/lib/SLAMP/CMakeLists.txt index 4bcfe78d..2ca86ba5 100644 --- a/liberty/lib/SLAMP/CMakeLists.txt +++ b/liberty/lib/SLAMP/CMakeLists.txt @@ -19,3 +19,9 @@ include_directories(./) add_llvm_library(${PassName} SHARED ${SRCS}) # This is to generate libxxx.so add_subdirectory(SLAMPlib/hooks) +add_subdirectory(SLAMPnng) +add_subdirectory(SLAMPboost) +add_subdirectory(SLAMPatomicq) +add_subdirectory(SLAMPstats) +add_subdirectory(SLAMPsmtxq) +add_subdirectory(SLAMPcustom) diff --git a/liberty/lib/SLAMP/SLAMP.cpp b/liberty/lib/SLAMP/SLAMP.cpp index 5189ee50..038ec622 100644 --- a/liberty/lib/SLAMP/SLAMP.cpp +++ b/liberty/lib/SLAMP/SLAMP.cpp @@ -3,9 +3,11 @@ // Single Loop Aware Memory Profiler. // -#include "llvm/IR/LLVMContext.h" #define DEBUG_TYPE "SLAMP" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/raw_ostream.h" + #define USE_PDG #ifdef USE_PDG @@ -14,6 +16,8 @@ #include "liberty/SLAMP/SLAMP.h" #include "liberty/SLAMP/externs.h" +#include "scaf/Utilities/CallSiteFactory.h" +#include "liberty/PointsToProfiler/Indeterminate.h" #include "llvm/IR/CFG.h" #include "llvm/ADT/Statistic.h" @@ -98,7 +102,7 @@ static cl::opt UseLinearValueModule("slamp-linear-value-module", cl::init( static cl::opt UseConstantAddressModule("slamp-constant-address-module", cl::init(false), cl::NotHidden, cl::desc("Use address module")); // linear address module -static cl::opt UseLinearAddressModule("slamp-linear-address-module", cl::init(false), cl::NotHidden, cl::desc("Use linear address module")); + static cl::opt UseLinearAddressModule("slamp-linear-address-module", cl::init(false), cl::NotHidden, cl::desc("Use linear address module")); // trace module static cl::opt UseTraceModule("slamp-trace-module", cl::init(false), cl::NotHidden, cl::desc("Use trace module")); @@ -142,6 +146,7 @@ SLAMP::~SLAMP() = default; void SLAMP::getAnalysisUsage(AnalysisUsage &au) const { // au.addRequired(); // use static ID (requires the bitcode to be exact the same) au.addRequired(); + au.addRequired(); #ifdef USE_PDG au.addRequired(); au.addRequired(); @@ -191,7 +196,25 @@ Instruction* updateDebugInfo(Instruction *inserted, Instruction *location, Modul return inserted; } +unordered_map instOffsetMap; + +void generateInstOffset(Module &m) { + // generate instruction offset + for (auto &f : m) { + if (f.isDeclaration()) continue; + for (auto &bb : f) { + unsigned offsetWithinBlock = 0; + for (auto &inst : bb) { + instOffsetMap[&inst] = offsetWithinBlock++; + } + } + } +} + bool SLAMP::runOnModule(Module &m) { + // generate instruction offset + generateInstOffset(m); + LLVMContext &ctxt = m.getContext(); // frequently used types @@ -287,21 +310,21 @@ bool SLAMP::runOnModule(Module &m) { // debug LLVM_DEBUG(if (Namer::getInstrId(&I) == 21182) { - Remedies remedies; - LoopAA::ModRefResult modrefIIFW = aa->modref( - target_inst, LoopAA::Same, &I, target_loop, remedies); - auto modrefIIBW = aa->modref(&I, LoopAA::Same, target_inst, - target_loop, remedies); - errs() << "Target inst: " << *target_inst << "\n"; - errs() << "I: " << I << "\n"; - // convert retLCFW to int and print - errs() << "retLCFW: " << (int)(retLCFW) << "\n"; - errs() << "retLCBW: " << (int)(retLCBW) << "\n"; - errs() << "retIIFW: " << (int)(retIIFW) << "\n"; - errs() << "retIIBW: " << (int)(retIIBW) << "\n"; - errs() << "modrefIIFW: " << (modrefIIFW) << "\n"; - errs() << "modrefIIBW: " << (modrefIIBW) << "\n"; - }); + Remedies remedies; + LoopAA::ModRefResult modrefIIFW = aa->modref( + target_inst, LoopAA::Same, &I, target_loop, remedies); + auto modrefIIBW = aa->modref(&I, LoopAA::Same, target_inst, + target_loop, remedies); + errs() << "Target inst: " << *target_inst << "\n"; + errs() << "I: " << I << "\n"; + // convert retLCFW to int and print + errs() << "retLCFW: " << (int)(retLCFW) << "\n"; + errs() << "retLCBW: " << (int)(retLCBW) << "\n"; + errs() << "retIIFW: " << (int)(retIIFW) << "\n"; + errs() << "retIIBW: " << (int)(retIIBW) << "\n"; + errs() << "modrefIIFW: " << (modrefIIFW) << "\n"; + errs() << "modrefIIBW: " << (modrefIIBW) << "\n"; + }); // RAW disproved for all deps if ((retLCFW & 0b001) && (retLCBW & 0b001) && (retIIFW & 0b001) && (retIIBW & 0b001)) { @@ -409,7 +432,7 @@ bool SLAMP::runOnModule(Module &m) { std::sort(elidedLoopInstsId.begin(), elidedLoopInstsId.end()); errs() << "Elided Hash: " << elidedHash(elidedLoopInstsId) << "\n"; - // replace external function calls to wrapper function calls + //// replace external function calls to wrapper function calls replaceExternalFunctionCalls(m); @@ -451,13 +474,18 @@ bool SLAMP::runOnModule(Module &m) { instrumentGlobalVars(m, ctor); } - if (UsePointsToModule){ + //// FIXME: temporarily instrument alloca and base pointer for all cases + // if (UsePointsToModule){ instrumentAllocas(m); - } + // instrument all base pointer creation + instrumentBasePointer(m, this->target_loop); + // } + instrumentFunctionStartStop(m); instrumentMainFunction(m); instrumentLoopStartStop(m, this->target_loop); + instrumentLoopStartStopForAll(m); instrumentInstructions(m, this->target_loop); @@ -517,7 +545,7 @@ bool SLAMP::mayCallSetjmpLongjmp(Loop *loop) { getCallableFunctions(loop, callables); return (find_if(callables.begin(), callables.end(), is_setjmp_or_longjmp) != - callables.end()); + callables.end()); } void SLAMP::getCallableFunctions(Loop *loop, set &callables) { @@ -558,7 +586,7 @@ void SLAMP::getCallableFunctions(CallInst *ci, set &callables) { void SLAMP::getCallableFunctions(Function *f, set &callables) { for (inst_iterator ii = inst_begin(f); ii != inst_end(f); ii++) { - // FIXME: not only callinst are callable + // FIXME: not only callinst are callable auto *ci = dyn_cast(&*ii); if (!ci) continue; @@ -587,7 +615,7 @@ void SLAMP::getFunctionsWithSign(CallInst *ci, set matched) { Function::arg_iterator fai; CallSite::arg_iterator cai; for (fai = func->arg_begin(), cai = cs.arg_begin(); - fai != func->arg_end(); fai++, cai++) { + fai != func->arg_end(); fai++, cai++) { Value *af = &*fai; Value *ac = *cai; if (af->getType() != ac->getType()) { @@ -602,6 +630,31 @@ void SLAMP::getFunctionsWithSign(CallInst *ci, set matched) { } } +std::string getInstructionName(Instruction *inst) { + auto fcn = inst->getParent()->getParent(); + auto bb = inst->getParent(); + + std::stringstream sout; + sout << fcn->getName().str() << ' ' << bb->getName().str() << ' '; + + if( inst->hasName() ) + sout << inst->getName().str(); + else { + // find the offset within the block + sout << '$' << instOffsetMap[inst]; + } + + return sout.str(); +} + +std::string getArgName(Argument *arg) { + Function *fcn = arg->getParent(); + + std::ostringstream name; + name << "argument " << fcn->getName().str() << " %" << arg->getArgNo(); + return name.str(); +} + // Replace external functions with SLAMP prefixed ones (SLAMP_xxx) // The list of SLAMP functions are given in `externs.h` void SLAMP::replaceExternalFunctionCalls(Module &m) { @@ -609,18 +662,18 @@ void SLAMP::replaceExternalFunctionCalls(Module &m) { auto *push = cast( m.getOrInsertFunction("SLAMP_ext_push", Void, I32).getCallee()); auto *pop = - cast(m.getOrInsertFunction("SLAMP_ext_pop", Void).getCallee()); + cast(m.getOrInsertFunction("SLAMP_ext_pop", Void).getCallee()); set externs; for (unsigned i = 0, e = sizeof(externs_str) / sizeof(externs_str[0]); i < e; - i++) + i++) externs.insert(externs_str[i]); // initialize a set of external functions not to be implemented set ignores; for (unsigned i = 0, - e = sizeof(ignore_externs_str) / sizeof(ignore_externs_str[0]); - i < e; i++) + e = sizeof(ignore_externs_str) / sizeof(ignore_externs_str[0]); + i < e; i++) ignores.insert(ignore_externs_str[i]); vector funcs; @@ -640,7 +693,7 @@ void SLAMP::replaceExternalFunctionCalls(Module &m) { if (func->isIntrinsic()) { // just confirm that all uses is an intrinsic instruction for (Value::user_iterator ui = func->user_begin(); ui != func->user_end(); - ui++) + ui++) assert(isa(*ui)); continue; } @@ -650,75 +703,92 @@ void SLAMP::replaceExternalFunctionCalls(Module &m) { bool hasUnrecognizedFunction = false; for (auto func : funcs) { - string name = func->getName(); + string name = func->getName(); + + // start with SLAMP_, ignore it + if (name.find("SLAMP_") == 0) { + continue; + } - if (externs.find(name) == externs.end()) { - // check if the function argument is `readnone`, then it's pure - if (func->hasFnAttribute(llvm::Attribute::AttrKind::ReadNone)) { + // find all usage of the function + // add a slamp_push and slamp_pop around it + for (auto user : func->users()) { + // get instruction + auto *inst = dyn_cast(user); + if (inst == nullptr) continue; - } - // start with SLAMP_, ignore it - if (name.find("SLAMP_") == 0) { + + // make sure it's a call to the function + if (!isa(inst)) + continue; + auto *cb = dyn_cast(inst); + if (cb->getCalledFunction() != func) continue; - } - errs() << "WARNING: Wrapper for external function " << name - << " not implemented.\n"; - hasUnrecognizedFunction = true; - // find all usage of the function - // add a slamp_push and slamp_pop around it - for (auto user : func->users()) { - // get instruction - auto *inst = dyn_cast(user); - if (inst == nullptr) - continue; - // FIXME: duplicated code as instrumentLoopInst - auto id = Namer:: getInstrId(inst); - if (id == -1) { - continue; - } - vector args; - args.push_back(ConstantInt::get(I32, id)); - InstInsertPt pt = InstInsertPt::Before(inst); - pt << updateDebugInfo(CallInst::Create(push, args), pt.getPosition(), m); + // FIXME: duplicated code as instrumentLoopInst + auto id = Namer:: getInstrId(inst); + if (id == -1) { + continue; + } + vector args; + args.push_back(ConstantInt::get(I32, id)); + InstInsertPt pt = InstInsertPt::Before(inst); + pt << updateDebugInfo(CallInst::Create(push, args), pt.getPosition(), m); + + errs() << "Malloc ID " << id << " : " + << getInstructionName(inst) << "\n"; + + + if (isa(inst)) { + pt = InstInsertPt::After(inst); + pt << updateDebugInfo(CallInst::Create(pop), pt.getPosition(), m); + } else if (auto *invokeI = dyn_cast(inst)) { + // for invoke, need to find the two paths and add pop + auto insertPop = [&pop, &m](BasicBlock* entry){ + InstInsertPt pt; + if (isa(entry->getFirstNonPHI())) + pt = InstInsertPt::After(entry->getFirstNonPHI()); + else + pt = InstInsertPt::Before(entry->getFirstNonPHI()); - if (isa(inst)) { - pt = InstInsertPt::After(inst); pt << updateDebugInfo(CallInst::Create(pop), pt.getPosition(), m); - } else if (auto *invokeI = dyn_cast(inst)) { - // for invoke, need to find the two paths and add pop - auto insertPop = [&pop, &m](BasicBlock* entry){ - InstInsertPt pt; - if (isa(entry->getFirstNonPHI())) - pt = InstInsertPt::After(entry->getFirstNonPHI()); - else - pt = InstInsertPt::Before(entry->getFirstNonPHI()); - - pt << updateDebugInfo(CallInst::Create(pop), pt.getPosition(), m); - }; - - insertPop(invokeI->getNormalDest()); - // FIXME: will generate mulitiple `slamp_pop` after the landing pad - // Fine for now because `slamp_pop` only set the context to 0 - insertPop(invokeI->getUnwindDest()); + }; - } else { - assert(false && "Call but not CallInst nor InvokeInst"); - } + insertPop(invokeI->getNormalDest()); + // FIXME: will generate mulitiple `slamp_pop` after the landing pad + // Fine for now because `slamp_pop` only set the context to 0 + insertPop(invokeI->getUnwindDest()); + + } else { + assert(false && "Call but not CallInst nor InvokeInst"); } + } - } else { - string wrapper_name = "SLAMP_" + name; + //// FIXME: temporarily turn off the replacement + // if (externs.find(name) == externs.end()) { + // // check if the function argument is `readnone`, then it's pure + // if (func->hasFnAttribute(llvm::Attribute::AttrKind::ReadNone)) { + // continue; + // } + + // errs() << "WARNING: Wrapper for external function " << name + // << " not implemented.\n"; + // hasUnrecognizedFunction = true; + + // } else { + // string wrapper_name = "SLAMP_" + name; /* Function* wrapper = cast( m.getOrInsertFunction(wrapper_name, * func->getFunctionType() ) ); */ - FunctionCallee wrapper = - m.getOrInsertFunction(wrapper_name, func->getFunctionType()); + // FunctionCallee wrapper = + // m.getOrInsertFunction(wrapper_name, func->getFunctionType()); + + // // replace 'func' to 'wrapper' in uses + // func->replaceAllUsesWith(wrapper.getCallee()); + // } + - // replace 'func' to 'wrapper' in uses - func->replaceAllUsesWith(wrapper.getCallee()); - } } if (hasUnrecognizedFunction) { @@ -891,11 +961,135 @@ void SLAMP::reportEndOfAllocaLifetime(AllocaInst *inst, Instruction *end, bool e else { //TODO:search for terminator block auto *F = inst->getFunction(); + + // find all return instructions //IRBuilder<> Builder(); } return; } +/// For each pointer use in the targeted loop +/// 1. find the base pointer +/// 2. Go to the creation time of the pointer +/// 3. Insert a call to SLAMP_report_base_pointer(instruction, address) +void SLAMP::instrumentBasePointer(Module &m, Loop* l) { + + const DataLayout &DL = m.getDataLayout(); + + auto *find_underlying_arg = cast( + m.getOrInsertFunction("SLAMP_report_base_pointer_arg", Void, I32, I32, I8Ptr) + .getCallee()); + auto *find_underlying_inst = cast( + m.getOrInsertFunction("SLAMP_report_base_pointer_inst", Void, I32, I8Ptr) + .getCallee()); + + // collect all pointer use by load, store, function argument in the targeted loop + std::set indeterminate_pointers, indeterminate_objects, already; + + for (auto &F : m) { + for (auto &BB : F) { + Indeterminate::findIndeterminateObjects(BB, indeterminate_pointers, indeterminate_objects); + } + } + + // for(auto *bb: l->getBlocks()) + // { + // Indeterminate::findIndeterminateObjects(*bb, indeterminate_pointers, indeterminate_objects); + // } + + for (auto &object : indeterminate_objects) { + if (const auto *const_arg = dyn_cast(object)) { + if (already.count(const_arg)) + continue; + already.insert(const_arg); + + LLVM_DEBUG( + errs() + << "Instrumenting indeterminate base object in function argument " + << *const_arg << "\n"); + + auto *arg = const_cast(const_arg); + Function *fcn = arg->getParent(); + + Instruction *cast = new BitCastInst(arg, I8Ptr); + auto fcnId = Namer::getFuncId(fcn); + auto argId = arg->getArgNo(); + + auto pt = InstInsertPt::Beginning(fcn); + Value *args[] = {ConstantInt::get(I32, fcnId), + ConstantInt::get(I32, argId), cast}; + pt << cast + << updateDebugInfo(CallInst::Create(find_underlying_arg, args), + pt.getPosition(), m); + + // errs() << "UO Arg (" << fcnId << "," << argId << ") : " + errs() << "UO Arg " << (fcnId << 5 | ((0x1f & (argId << 4)) | 0x1)) << " : " + << getArgName(arg) << "\n"; + } + else if (const auto *const_inst = dyn_cast(object)) { + if (already.count(const_inst)) + continue; + already.insert(const_inst); + + if (isa(const_inst)) + continue; + + LLVM_DEBUG(errs() << "Instrumenting indeterminate base object: " + << *const_inst << '\n'); + auto *inst = const_cast(const_inst); + + Instruction *cast = new BitCastInst(inst, I8Ptr); + + InstInsertPt where; + if (auto *invoke = dyn_cast(inst)) { + auto entry = invoke->getNormalDest(); + if (isa(entry->getFirstNonPHI())) { + where = InstInsertPt::After(entry->getFirstNonPHI()); + } + else { + // iterate all the phi node in the entry block + // if inst is one of the incoming value of the phi node + // then convert the cast to the phi node + for (auto &phi : entry->phis()) { + bool found = false; + for (auto &incoming : phi.incoming_values()) { + if (incoming == inst) { + // replace the cast with the phi node + cast->deleteValue(); + cast = new BitCastInst(&phi, I8Ptr); + found = true; + break; + } + } + if (found) + break; + } + where = InstInsertPt::Before(entry->getFirstNonPHI()); + } + } else if (auto *phi = dyn_cast(inst)) { + // Don't accidentally insert instrumentation before + // later PHIs or landing pad instructions. + where = InstInsertPt::Beginning(phi->getParent()); + } else + where = InstInsertPt::After(inst); + + auto instId = Namer::getInstrId(inst); + Value *args[] = { + ConstantInt::get(I32, instId), + cast + }; + where << cast + << updateDebugInfo(CallInst::Create(find_underlying_inst, args), where.getPosition(), m); + + errs() << "UO Inst " << (instId << 1 | 0x0) << " : " << getInstructionName(inst) << "\n"; + } + else { + errs() << "What is: " << *object << '\n'; + assert(false && "Unknown object type?!?!"); + } + } +} + // For each alloca, find the lifetime starts and ends // and insert calls to `SLAMP_callback_stack_alloca` and // `SLAMP_callback_stack_free` @@ -1055,6 +1249,155 @@ void SLAMP::instrumentMainFunction(Module &m) { } } +/// Pass in the loop and instrument enter/exit hooks +void SLAMP::instrumentLoopStartStopForAll(Module &m) { + + // for all functions + for (auto &f : m) { + if (f.isDeclaration()) + continue; + // get all loops + LoopInfo &li = getAnalysis(f).getLoopInfo(); + for (auto &loop : li.getLoopsInPreorder()) { + + // TODO: check setjmp/longjmp + BasicBlock *header = loop->getHeader(); + auto loopId = Namer::getBlkId(header); + if (loopId == -1) { + assert(false && "Loop header has no id"); + } + BasicBlock *latch = loop->getLoopLatch(); + vector args; + args.push_back(ConstantInt::get(I32, loopId)); + + errs() << "Loop ID " << loopId << " : " << f.getName() << " " << header->getName() << " " << li.getLoopDepth(header) << "\n"; + + // check if loop-simplify pass executed + assert(loop->getNumBackEdges() == 1 && + "Should be only 1 back edge, loop-simplify?"); + assert(latch && "Loop latch needs to exist, loop-simplify?"); + + // add instrumentation on loop header: + // if new invocation, call SLAMP_loop_invocation, else, call + // SLAMP_loop_iteration + auto *f_loop_invoke = cast( + m.getOrInsertFunction("SLAMP_enter_loop", Void, I32).getCallee()); + auto *f_loop_iter= cast( + m.getOrInsertFunction("SLAMP_loop_iter_ctx", Void, I32).getCallee()); + auto *f_loop_exit = cast( + m.getOrInsertFunction("SLAMP_exit_loop", Void, I32).getCallee()); + + PHINode *funcphi = PHINode::Create(f_loop_invoke->getType(), 2, "funcphi_loop_context"); + InstInsertPt pt; + + if (isa(header->getFirstNonPHI())) + pt = InstInsertPt::After(header->getFirstNonPHI()); + else + pt = InstInsertPt::Before(header->getFirstNonPHI()); + + pt << funcphi; + + // choose which function to execute (iter or invoke) + for (auto pred : predecessors(header)) { + if (pred == latch) + funcphi->addIncoming(f_loop_iter, pred); + else + funcphi->addIncoming(f_loop_invoke, pred); + } + + updateDebugInfo(CallInst::Create(funcphi, args, "", header->getFirstNonPHI()), header->getFirstNonPHI(), m); + + // Add `SLAMP_loop_exit` to all loop exits + SmallVector exits; + loop->getExitBlocks(exits); + + // one instrumentation per block + set s; + + for (unsigned i = 0; i < exits.size(); i++) { + if (s.count(exits[i])) + continue; + + CallInst *ci = CallInst::Create(f_loop_exit, args); + + InstInsertPt pt2; + if (isa(exits[i]->getFirstNonPHI())) + pt2 = InstInsertPt::After(exits[i]->getFirstNonPHI()); + else + pt2 = InstInsertPt::Before(exits[i]->getFirstNonPHI()); + + pt2 << updateDebugInfo(ci, pt2.getPosition(), m); + + s.insert(exits[i]); + } + } + } + +} + + +/// Instrumnent each function entry and exit with SLAMP function entry and exit calls +void SLAMP::instrumentFunctionStartStop(Module &m) { + // for each function body + for (auto &fi : m) { + Function *func = &fi; + if (func->isDeclaration()) + continue; + + // ignore all SLAMP calls + if (func->getName().startswith("SLAMP_")) + continue; + + + // find function ID + auto fcnID = Namer::getFuncId(func); + if (fcnID == -1) { + errs() << "Cannot find function ID for " << func->getName() << "\n"; + continue; + } + + errs() << "Function ID " << fcnID << " : " << func->getName() << "\n"; + + // set parameters + vector args; + args.push_back(ConstantInt::get(I32, fcnID)); + + // find the function entry + auto *f_function_entry = cast( + m.getOrInsertFunction("SLAMP_enter_fcn", Void, I32).getCallee()); + auto *f_function_exit = cast( + m.getOrInsertFunction("SLAMP_exit_fcn", Void, I32).getCallee()); + + // insert SLAMP_enter_fcn at the beginning of the function + BasicBlock *entry = &(func->getEntryBlock()); + InstInsertPt pt = InstInsertPt::Before(entry->getFirstNonPHI()); + pt << updateDebugInfo(CallInst::Create(f_function_entry, args), pt.getPosition(), m); + + // find all exits of the function + vector exits; + for (auto &bi : *func) { + BasicBlock *bb = &bi; + if (isa(bb->getTerminator())) + exits.push_back(bb->getTerminator()); + // else if (isa(bb->getTerminator())) + // exits.push_back(bb->getTerminator()); + // // FIXME: should be at the beginning of the block + // else if (isa(bb->getTerminator())) + // exits.push_back(bb->getTerminator()); + //// FIXME: invoke the exception end + // else if (isa(bb->getTerminator())) + // exits.push_back(bb->getTerminator()); + } + + // insert SLAMP_exit_fcn at the end of the function + for (auto &exit : exits) { + InstInsertPt pt = InstInsertPt::Before(exit); + pt << updateDebugInfo(CallInst::Create(f_function_exit, args), pt.getPosition(), m); + } + + } +} + /// Pass in the loop and instrument invocation/iteration/exit hooks void SLAMP::instrumentLoopStartStop(Module &m, Loop *loop) { // TODO: check setjmp/longjmp diff --git a/liberty/lib/SLAMP/SLAMPatomicq/AtomicQueueSend.cpp b/liberty/lib/SLAMP/SLAMPatomicq/AtomicQueueSend.cpp new file mode 100644 index 00000000..2b684d73 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPatomicq/AtomicQueueSend.cpp @@ -0,0 +1,376 @@ +#include "slamp_hooks.h" +#include // ring buffer + +#include +#include +#include +#include + +#include "atomic_queue/atomic_queue.h" + +namespace bip = boost::interprocess; +namespace shm +{ + + using Element = char; // Queue element type. + using char_alloc = bip::allocator; + Element constexpr NIL = static_cast(-1); // Atomic elements require a special value that cannot be pushed/popped. + // using Queue = atomic_queue::AtomicQueueB, NIL>; // Use heap-allocated buffer. + // using Queue = atomic_queue::AtomicQueueB; // Use heap-allocated buffer. + using Queue = atomic_queue::AtomicQueueB, NIL, false, false, true>; + +} + +#include + + +// create segment and corresponding allocator +bip::managed_shared_memory *segment; +shm::char_alloc *char_alloc; + +shm::Queue *queue; +unsigned long counter4 = 0; +unsigned long counter8 = 0; + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id) { + segment = new bip::managed_shared_memory(bip::open_or_create, "MySharedMemory", 65536UL*1600); + // auto a_queue = new atomic_queue::AtomicQueueB(65536); + auto q = static_cast(segment->find_or_construct("atomic_queue")[65536]()); + queue = new shm::Queue(65536, q); + + + queue->push((char)fn_id); + // send a msg with "fn_id, loop_id" + // char msg[100]; + // sprintf(msg, "%d,%d", fn_id, loop_id); + // a_queue->push(msg); + // queue->push(shm::shared_string(msg, *char_alloc)); +} + +void SLAMP_fini(const char* filename){ + // send a msg with "fini" + // queue->push(shm::shared_string("fini", *char_alloc)); + std::cout << counter4 << " " << counter8 << std::endl; +} + +void SLAMP_allocated(uint64_t addr){} +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size){} +void SLAMP_main_entry(uint32_t argc, char** argv, char** env){} + +void SLAMP_enter_fcn(uint32_t id){} +void SLAMP_exit_fcn(uint32_t id){} +void SLAMP_enter_loop(uint32_t id){} +void SLAMP_exit_loop(uint32_t id){} +void SLAMP_loop_iter_ctx(uint32_t id){} +void SLAMP_loop_invocation(){} +void SLAMP_loop_iteration(){} +void SLAMP_loop_exit(){} + +void SLAMP_report_base_pointer_arg(uint32_t, uint32_t, void *ptr){} +void SLAMP_report_base_pointer_inst(uint32_t, void *ptr){} +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t){} +void SLAMP_callback_stack_free(){} + +void SLAMP_ext_push(const uint32_t instr){} +void SLAMP_ext_pop(){} + +void SLAMP_push(const uint32_t instr){} +void SLAMP_pop(){} + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ +} +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ +} +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + counter4++; + // char msg[] = "load4"; + // queue->push(shm::shared_string(msg, *char_alloc)); +} + +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + counter8++; + queue->push((char)instr); + // char msg[1] = {8}; + // // // sprintf(msg, "load8,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // queue->push(shm::shared_string(msg, *char_alloc)); +} + +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n){} + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){} +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){} +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){} +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){} +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n){} + +void SLAMP_store1(uint32_t instr, const uint64_t addr){} +void SLAMP_store2(uint32_t instr, const uint64_t addr){} +void SLAMP_store4(uint32_t instr, const uint64_t addr){} +void SLAMP_store8(uint32_t instr, const uint64_t addr){} +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n){} + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst){} +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst){} +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst){} +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst){} +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n){} + +/* wrappers */ +static void* SLAMP_malloc_hook(size_t size, const void *caller){} +static void SLAMP_free_hook(void *ptr, const void *caller){} +static void* SLAMP_memalign_hook(size_t alignment, size_t size, const void *caller){} +void* SLAMP_malloc(size_t size, uint32_t instr, size_t alignment){} + +void* SLAMP_calloc(size_t nelem, size_t elsize){} +void* SLAMP_realloc(void* ptr, size_t size){} +void* SLAMP__Znam(size_t size){} +void* SLAMP__Znwm(size_t size){} + + + +char* SLAMP_strdup(const char *s1){} +char* SLAMP___strdup(const char *s1){} +void SLAMP_free(void* ptr){} +void SLAMP_cfree(void* ptr){} +void SLAMP__ZdlPv(void* ptr){} +void SLAMP__ZdaPv(void* ptr){} +int SLAMP_brk(void *end_data_segment){} +void* SLAMP_sbrk(intptr_t increment){} + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len){} +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len){} + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr){} +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr){} + +/* String functions */ +size_t SLAMP_strlen(const char *str){} +char* SLAMP_strchr(char *s, int c){} +char* SLAMP_strrchr(char *s, int c){} +int SLAMP_strcmp(const char *s1, const char *s2){} +int SLAMP_strncmp(const char *s1, const char *s2, size_t n){} +char* SLAMP_strcpy(char *dest, const char *src){} +char* SLAMP_strncpy(char *dest, const char *src, size_t n){} +char* SLAMP_strcat(char *s1, const char *s2){} +char* SLAMP_strncat(char *s1, const char *s2, size_t n){} +char* SLAMP_strstr(char *s1, char *s2){} +size_t SLAMP_strspn(const char *s1, const char *s2){} +size_t SLAMP_strcspn(const char *s1, const char *s2){} +char* SLAMP_strtok(char *s, const char *delim){} +double SLAMP_strtod(const char *nptr, char **endptr){} +long int SLAMP_strtol(const char *nptr, char **endptr, int base){} +char* SLAMP_strpbrk(char *s1, char *s2){} + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n){} +void *SLAMP_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP_memmove (void *dest, const void *src, size_t n){} +int SLAMP_memcmp(const void *s1, const void *s2, size_t n){} +void* SLAMP_memchr(void* ptr, int value, size_t num){} +void* SLAMP___rawmemchr(void* ptr, int value){} + +void SLAMP_bzero(void *s, size_t n){} +void SLAMP_bcopy(const void *s1, void *s2, size_t n){} + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count){} +int SLAMP_open(const char *pathname, int flags, mode_t mode){} +int SLAMP_close(int fd){} +ssize_t SLAMP_write(int fd, const void *buf, size_t count){} +off_t SLAMP_lseek(int fildes, off_t offset, int whence){} + +FILE * SLAMP_fopen(const char *path, const char *mode){} +FILE * SLAMP_fopen64(const char *path, const char *mode){} +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream){} +int SLAMP_fflush(FILE *stream){} +int SLAMP_fclose(FILE *stream){} +int SLAMP_ferror(FILE *stream){} +int SLAMP_feof(FILE *stream){} +long SLAMP_ftell(FILE *stream){} +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream){} +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream){} +int SLAMP_fseek(FILE *stream, long offset, int whence){} +void SLAMP_rewind(FILE *stream){} + +int SLAMP_fgetc(FILE *stream){} +int SLAMP_fputc(int c, FILE *stream){} +char * SLAMP_fgets(char *s, int n, FILE *stream){} +int SLAMP_fputs(const char *s, FILE *stream){} + +int SLAMP_ungetc(int c, FILE *stream){} +int SLAMP_putchar(int c){} +int SLAMP_getchar(void){} + +int SLAMP_fileno(FILE *stream){} +char * SLAMP_gets(char *s){} +int SLAMP_puts(const char *s){} + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout){} +int SLAMP_remove(const char *path){} + +void SLAMP_setbuf(FILE * stream, char * buf){} +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size){} +char * SLAMP_tmpnam(char *s){} +FILE* SLAMP_tmpfile(void){} +char * SLAMP_ttyname(int fildes){} + +FILE * SLAMP_fdopen(int fildes, const char *mode){} +void SLAMP_clearerr(FILE *stream){} + +int SLAMP_truncate(const char *path, off_t length){} +int SLAMP_ftruncate(int fildes, off_t length){} + +int SLAMP_dup(int oldfd){} +int SLAMP_dup2(int oldfd, int newfd){} +int SLAMP_pipe(int filedes[2]){} + +int SLAMP_chmod(const char *path, mode_t mode){} +int SLAMP_fchmod(int fildes, mode_t mode){} +int SLAMP_fchown(int fd, uid_t owner, gid_t group){} +int SLAMP_access(const char *pathname, int mode){} +long SLAMP_pathconf(char *path, int name){} +int SLAMP_mkdir(const char *pathname, mode_t mode){} +int SLAMP_rmdir(const char *pathname){} +mode_t SLAMP_umask(mode_t mask){} +int SLAMP_fcntl(int fd, int cmd, struct flock *lock){} + +DIR* SLAMP_opendir(const char* name){} +struct dirent* SLAMP_readdir(DIR *dirp){} +struct dirent64* SLAMP_readdir64(DIR *dirp){} +int SLAMP_closedir(DIR* dirp){} + +/* Printf */ +int SLAMP_printf(const char *format, ...){} +int SLAMP_fprintf(FILE *stream, const char *format, ...){} +int SLAMP_sprintf(char *str, const char *format, ...){} +int SLAMP_snprintf(char *str, size_t size, const char *format, ...){} + +int SLAMP_vprintf(const char *format, va_list ap){} +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vsprintf(char *str, const char *format, va_list ap){} +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap){} + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ){} +int SLAMP_scanf(const char *format, ... ){} +int SLAMP_sscanf(const char *s, const char *format, ... ){} +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ){} + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vscanf(const char *format, va_list ap){} +int SLAMP_vsscanf(const char *s, const char *format, va_list ap){} + +/* Time */ +time_t SLAMP_time(time_t *t){} +struct tm *SLAMP_localtime(const time_t *timer){} +struct lconv* SLAMP_localeconv(){} +struct tm *SLAMP_gmtime(const time_t *timer){} +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz){} + +/* Math */ +double SLAMP_ldexp(double x, int exp){} +float SLAMP_ldexpf(float x, int exp){} +long double SLAMP_ldexpl(long double x, int exp){} +double SLAMP_log10(double x){} +float SLAMP_log10f(float x){} +long double SLAMP_log10l(long double x){} +double SLAMP_log(double x){} +float SLAMP_logf(float x){} +long double SLAMP_logl(long double x){} + +double SLAMP_exp(double x){} +float SLAMP_expf(float x){} +long double SLAMP_expl(long double x){} + +double SLAMP_cos(double x){} +float SLAMP_cosf(float x){} +long double SLAMP_cosl(long double x){} +double SLAMP_sin(double x){} +double SLAMP_tan(double x){} +float SLAMP_sinf(float x){} +long double SLAMP_sinl(long double x){} + +double SLAMP_atan(double x){} +float SLAMP_atanf(float x){} +long double SLAMP_atanl(long double x){} + +double SLAMP_floor(double x){} +float SLAMP_floorf(float x){} +long double SLAMP_floorl(long double x){} +double SLAMP_ceil(double x){} +float SLAMP_ceilf(float x){} +long double SLAMP_ceill(long double x){} + +double SLAMP_atan2(double y, double x){} +float SLAMP_atan2f(float y, float x){} +long double SLAMP_atan2l(long double y, long double x){} + +double SLAMP_sqrt(double x){} +float SLAMP_sqrtf(float x){} +long double SLAMP_sqrtl(long double x){} + +double SLAMP_pow(double x, double y){} +float SLAMP_powf(float x, float y){} +long double SLAMP_powl(long double x, long double y){} + +double SLAMP_fabs(double x){} +float SLAMP_fabsf(float x){} +long double SLAMP_fabsl(long double x){} + +double SLAMP_modf(double x, double *iptr){} +float SLAMP_modff(float x, float *iptr){} +long double SLAMP_modfl(long double x, long double *iptr){} + +double SLAMP_fmod(double x, double y){} + +double SLAMP_frexp(double num, int *exp){} +float SLAMP_frexpf(float num, int *exp){} +long double SLAMP_frexpl(long double num, int *exp){} + +int SLAMP_isnan(){} + +/* MISC */ +char *SLAMP_getenv(const char *name){} +int SLAMP_putenv(char* string){} +char *SLAMP_getcwd(char *buf, size_t size){} +char* SLAMP_strerror(int errnum){} +void SLAMP_exit(int status){} +void SLAMP__exit(int status){} +int SLAMP_link(const char *oldpath, const char *newpath){} +int SLAMP_unlink(const char *pathname){} +int SLAMP_isatty(int desc){} +int SLAMP_setuid(uid_t uid){} +uid_t SLAMP_getuid(void){} +uid_t SLAMP_geteuid(void){} +int SLAMP_setgid(gid_t gid){} +gid_t SLAMP_getgid(void){} +gid_t SLAMP_getegid(void){} +pid_t SLAMP_getpid(void){} +int SLAMP_chdir(const char *path){} +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */){} +int SLAMP_execv(const char *path, char *const argv[]){} +int SLAMP_execvp(const char *file, char *const argv[]){} +int SLAMP_kill(pid_t pid, int sig){} +pid_t SLAMP_fork(void){} +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler){} +pid_t SLAMP_waitpid(pid_t pid, int* status, int options){} +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)){} +int SLAMP_ioctl(int d, int request, ...){} +unsigned int SLAMP_sleep(unsigned int seconds){} +char* SLAMP_gcvt(double number, size_t ndigit, char* buf){} +char* SLAMP_nl_langinfo(nl_item item){} + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function){} +const unsigned short int **SLAMP___ctype_b_loc(void){} +int SLAMP__IO_getc(_IO_FILE * __fp){} +int SLAMP__IO_putc(int __c, _IO_FILE *__fp){} + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf){} +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf){} diff --git a/liberty/lib/SLAMP/SLAMPatomicq/CMakeLists.txt b/liberty/lib/SLAMP/SLAMPatomicq/CMakeLists.txt new file mode 100644 index 00000000..69ff2d42 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPatomicq/CMakeLists.txt @@ -0,0 +1,34 @@ +file(GLOB SRCS + "*.cpp" +) +set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "/u/ziyangx/test/boost/boost_1_80_0/install/lib/cmake") + +# add CMAKE_PREFIX_PATH +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "clang++") + +# Compilation flags +# set_source_files_properties(${SRCS} PROPERTIES COMPILE_FLAGS "-Wl,-save-temps -std=c++17 -Wno-inline -O3 -fexceptions")# -emit-llvm") +set_source_files_properties(${SRCS} PROPERTIES COMPILE_FLAGS "-flto -std=c++17 -Wno-inline -O3 -fexceptions")# -emit-llvm") +set(PassName "slamp_hooks_atomic_queue") + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Boost 1.80.0 REQUIRED COMPONENTS system) +include_directories(${Boost_INCLUDE_DIRS}) + + +# list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") +#include(HandleLLVMOptions) +# include(AddLLVM) + + +include_directories(./ + /u/ziyangx/test/atomic_queue/atomic_queue/include) + +add_library(${PassName} STATIC ${SRCS}) +target_link_libraries(${PassName} ${Boost_LIBRARIES}) +# target_link_libraries(${PassName} nng::nng) +# add_llvm_library(${PassName}_shared SHARED ${SRCS}) +# set_target_properties(${PassName}_shared PROPERTIES OUTPUT_NAME ${PassName}) +# set_property(TARGET ${PassName} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +#add_llvm_library(${PassName} SHARED ${SRCS}) # This is to generate libxxx.so diff --git a/liberty/lib/SLAMP/SLAMPatomicq/consumer/CMakeLists.txt b/liberty/lib/SLAMP/SLAMPatomicq/consumer/CMakeLists.txt new file mode 100644 index 00000000..65b55888 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPatomicq/consumer/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required (VERSION 3.6.2 FATAL_ERROR) + +set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "/u/ziyangx/test/boost/boost_1_80_0/install/lib/cmake") + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + + +find_package(Boost 1.80.0 REQUIRED COMPONENTS system) +include_directories(${Boost_INCLUDE_DIRS} + /u/ziyangx/test/atomic_queue/atomic_queue/include) + +add_executable (consumer consumer.cpp) +target_link_libraries(consumer LINK_PUBLIC + ${Boost_LIBRARIES} + rt + Threads::Threads +) diff --git a/liberty/lib/SLAMP/SLAMPatomicq/consumer/consumer.cpp b/liberty/lib/SLAMP/SLAMPatomicq/consumer/consumer.cpp new file mode 100644 index 00000000..5cfca3fc --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPatomicq/consumer/consumer.cpp @@ -0,0 +1,40 @@ +#include // ring buffer + +#include +#include +#include + +#include "atomic_queue/atomic_queue.h" + +namespace bip = boost::interprocess; +namespace shm +{ + using Element = char; + Element constexpr NIL = static_cast(-1); + using Queue = atomic_queue::AtomicQueueB, NIL, false, false, true>; +} + +#include + +int main() +{ + // create segment and corresponding allocator + bip::managed_shared_memory segment(bip::open_or_create, "MySharedMemory", 104857600); + auto q = static_cast(segment.find_or_construct("atomic_queue")[65536]()); + std::cout << q << std::endl; + + shm::Queue *queue = new shm::Queue(65536, q); + + auto counter = 0; + // shm::shared_string v(char_alloc); + while (true) + { + if (shm::Element v = queue->pop()) { + counter++; + + if (counter % 1000000 == 0) { + std::cout << "counter: " << counter << std::endl; + } + } + } +} diff --git a/liberty/lib/SLAMP/SLAMPatomicq/slamp_hooks.h b/liberty/lib/SLAMP/SLAMPatomicq/slamp_hooks.h new file mode 100644 index 00000000..df546117 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPatomicq/slamp_hooks.h @@ -0,0 +1,371 @@ +#ifndef SLAMPLIB_HOOKS_SLAMP_HOOKS_H +#define SLAMPLIB_HOOKS_SLAMP_HOOKS_H + +// FIXME: inline tweak actually make things worse in sequential and better with +// 16x, so turn it on at all time before understanding why +#define ATTRIBUTE(x) __attribute__((x)) +// #ifdef ITO_ENABLE +// // #define ATTRIBUTE(x) +// #define ATTRIBUTE(x) __attribute__((x)) +// #else +// #define ATTRIBUTE(x) +// #endif + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void SLAMP_dbggv(int id); +void SLAMP_dbggvstr(char* str); + +// SLAMP measure functions +void SLAMP_measure_init(); +void SLAMP_measure_fini(); +void SLAMP_measure_load(uint32_t id, uint64_t size); +void SLAMP_measure_store(uint32_t id, uint64_t size); +static void* SLAMP_measure_malloc_hook(size_t size, const void *caller); +static void SLAMP_measure_free_hook(void *ptr, const void *caller); + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id); +void SLAMP_fini(const char* filename); + +void SLAMP_allocated(uint64_t addr); +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size); +void SLAMP_main_entry(uint32_t argc, char** argv, char** env); + +void SLAMP_enter_fcn(uint32_t id); +void SLAMP_exit_fcn(uint32_t id); +void SLAMP_enter_loop(uint32_t id); +void SLAMP_exit_loop(uint32_t id); +void SLAMP_loop_iter_ctx(uint32_t id); +void SLAMP_loop_invocation(); +void SLAMP_loop_iteration(); +void SLAMP_loop_exit(); + +void SLAMP_report_base_pointer_arg(uint32_t, uint32_t, void *ptr); +void SLAMP_report_base_pointer_inst(uint32_t, void *ptr); +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t); +void SLAMP_callback_stack_free(void); + +void SLAMP_ext_push(const uint32_t instr); +void SLAMP_ext_pop(); + +void SLAMP_push(const uint32_t instr); +void SLAMP_pop(); + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n) ATTRIBUTE(always_inline);; + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n) ATTRIBUTE(always_inline);; + +void SLAMP_store1(uint32_t instr, const uint64_t addr) ATTRIBUTE(always_inline); +void SLAMP_store2(uint32_t instr, const uint64_t addr) ATTRIBUTE(always_inline); +void SLAMP_store4(uint32_t instr, const uint64_t addr) ATTRIBUTE(always_inline); +void SLAMP_store8(uint32_t instr, const uint64_t addr) ATTRIBUTE(always_inline); +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n) ATTRIBUTE(always_inline);; + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst) ATTRIBUTE(always_inline); +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst) ATTRIBUTE(always_inline); +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst) ATTRIBUTE(always_inline); +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst) ATTRIBUTE(always_inline); +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n) ATTRIBUTE(always_inline);; + +/* wrappers */ +static void* SLAMP_malloc_hook(size_t size, const void *caller); +static void SLAMP_free_hook(void *ptr, const void *caller); +static void* SLAMP_memalign_hook(size_t alignment, size_t size, const void *caller); +void* SLAMP_malloc(size_t size, uint32_t instr=0, size_t alignment=16); + +void* SLAMP_calloc(size_t nelem, size_t elsize); +void* SLAMP_realloc(void* ptr, size_t size); +void* SLAMP__Znam(size_t size); +void* SLAMP__Znwm(size_t size); + +char* SLAMP_strdup(const char *s1); +char* SLAMP___strdup(const char *s1); +void SLAMP_free(void* ptr); +void SLAMP_cfree(void* ptr); +void SLAMP__ZdlPv(void* ptr); +void SLAMP__ZdaPv(void* ptr); +int SLAMP_brk(void *end_data_segment); +void* SLAMP_sbrk(intptr_t increment); + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes); +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes); +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes); +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes); +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len); +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len); + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr); +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr); + +/* String functions */ +size_t SLAMP_strlen(const char *str); +char* SLAMP_strchr(char *s, int c); +char* SLAMP_strrchr(char *s, int c); +int SLAMP_strcmp(const char *s1, const char *s2); +int SLAMP_strncmp(const char *s1, const char *s2, size_t n); +char* SLAMP_strcpy(char *dest, const char *src); +char* SLAMP_strncpy(char *dest, const char *src, size_t n); +char* SLAMP_strcat(char *s1, const char *s2); +char* SLAMP_strncat(char *s1, const char *s2, size_t n); +char* SLAMP_strstr(char *s1, char *s2); +size_t SLAMP_strspn(const char *s1, const char *s2); +size_t SLAMP_strcspn(const char *s1, const char *s2); +char* SLAMP_strtok(char *s, const char *delim); +double SLAMP_strtod(const char *nptr, char **endptr); +long int SLAMP_strtol(const char *nptr, char **endptr, int base); +char* SLAMP_strpbrk(char *s1, char *s2); + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n); +void *SLAMP_memcpy (void *dest, const void *src, size_t n); +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n); +void *SLAMP_memmove (void *dest, const void *src, size_t n); +int SLAMP_memcmp(const void *s1, const void *s2, size_t n); +void* SLAMP_memchr(void* ptr, int value, size_t num); +void* SLAMP___rawmemchr(void* ptr, int value); + +void SLAMP_bzero(void *s, size_t n); +void SLAMP_bcopy(const void *s1, void *s2, size_t n); + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count); +int SLAMP_open(const char *pathname, int flags, mode_t mode); +int SLAMP_close(int fd); +ssize_t SLAMP_write(int fd, const void *buf, size_t count); +off_t SLAMP_lseek(int fildes, off_t offset, int whence); + +FILE * SLAMP_fopen(const char *path, const char *mode); +FILE * SLAMP_fopen64(const char *path, const char *mode); +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream); +int SLAMP_fflush(FILE *stream); +int SLAMP_fclose(FILE *stream); +int SLAMP_ferror(FILE *stream); +int SLAMP_feof(FILE *stream); +long SLAMP_ftell(FILE *stream); +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream); +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream); +int SLAMP_fseek(FILE *stream, long offset, int whence); +void SLAMP_rewind(FILE *stream); + +int SLAMP_fgetc(FILE *stream); +int SLAMP_fputc(int c, FILE *stream); +char * SLAMP_fgets(char *s, int n, FILE *stream); +int SLAMP_fputs(const char *s, FILE *stream); + +int SLAMP_ungetc(int c, FILE *stream); +int SLAMP_putchar(int c); +int SLAMP_getchar(void); + +int SLAMP_fileno(FILE *stream); +char * SLAMP_gets(char *s); +int SLAMP_puts(const char *s); + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); +int SLAMP_remove(const char *path); + +void SLAMP_setbuf(FILE * stream, char * buf); +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size); +char * SLAMP_tmpnam(char *s); +FILE* SLAMP_tmpfile(void); +char * SLAMP_ttyname(int fildes); + +FILE * SLAMP_fdopen(int fildes, const char *mode); +void SLAMP_clearerr(FILE *stream); + +int SLAMP_truncate(const char *path, off_t length); +int SLAMP_ftruncate(int fildes, off_t length); + +int SLAMP_dup(int oldfd); +int SLAMP_dup2(int oldfd, int newfd); +int SLAMP_pipe(int filedes[2]); + +int SLAMP_chmod(const char *path, mode_t mode); +int SLAMP_fchmod(int fildes, mode_t mode); +int SLAMP_fchown(int fd, uid_t owner, gid_t group); +int SLAMP_access(const char *pathname, int mode); +long SLAMP_pathconf(char *path, int name); +int SLAMP_mkdir(const char *pathname, mode_t mode); +int SLAMP_rmdir(const char *pathname); +mode_t SLAMP_umask(mode_t mask); +int SLAMP_fcntl(int fd, int cmd, struct flock *lock); + +DIR* SLAMP_opendir(const char* name); +struct dirent* SLAMP_readdir(DIR *dirp); +struct dirent64* SLAMP_readdir64(DIR *dirp); +int SLAMP_closedir(DIR* dirp); + +/* Printf */ +int SLAMP_printf(const char *format, ...); +int SLAMP_fprintf(FILE *stream, const char *format, ...); +int SLAMP_sprintf(char *str, const char *format, ...); +int SLAMP_snprintf(char *str, size_t size, const char *format, ...); + +int SLAMP_vprintf(const char *format, va_list ap); +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap); +int SLAMP_vsprintf(char *str, const char *format, va_list ap); +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap); + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ); +int SLAMP_scanf(const char *format, ... ); +int SLAMP_sscanf(const char *s, const char *format, ... ); +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ); + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap); +int SLAMP_vscanf(const char *format, va_list ap); +int SLAMP_vsscanf(const char *s, const char *format, va_list ap); + +/* Time */ +time_t SLAMP_time(time_t *t); +struct tm *SLAMP_localtime(const time_t *timer); +struct lconv* SLAMP_localeconv(); +struct tm *SLAMP_gmtime(const time_t *timer); +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz); + +/* Math */ +double SLAMP_ldexp(double x, int exp); +float SLAMP_ldexpf(float x, int exp); +long double SLAMP_ldexpl(long double x, int exp); +double SLAMP_log10(double x); +float SLAMP_log10f(float x); +long double SLAMP_log10l(long double x); +double SLAMP_log(double x); +float SLAMP_logf(float x); +long double SLAMP_logl(long double x); + +double SLAMP_exp(double x); +float SLAMP_expf(float x); +long double SLAMP_expl(long double x); + +double SLAMP_cos(double x); +float SLAMP_cosf(float x); +long double SLAMP_cosl(long double x); +double SLAMP_sin(double x); +double SLAMP_tan(double x); +float SLAMP_sinf(float x); +long double SLAMP_sinl(long double x); + +double SLAMP_atan(double x); +float SLAMP_atanf(float x); +long double SLAMP_atanl(long double x); + +double SLAMP_floor(double x); +float SLAMP_floorf(float x); +long double SLAMP_floorl(long double x); +double SLAMP_ceil(double x); +float SLAMP_ceilf(float x); +long double SLAMP_ceill(long double x); + +double SLAMP_atan2(double y, double x); +float SLAMP_atan2f(float y, float x); +long double SLAMP_atan2l(long double y, long double x); + +double SLAMP_sqrt(double x); +float SLAMP_sqrtf(float x); +long double SLAMP_sqrtl(long double x); + +double SLAMP_pow(double x, double y); +float SLAMP_powf(float x, float y); +long double SLAMP_powl(long double x, long double y); + +double SLAMP_fabs(double x); +float SLAMP_fabsf(float x); +long double SLAMP_fabsl(long double x); + +double SLAMP_modf(double x, double *iptr); +float SLAMP_modff(float x, float *iptr); +long double SLAMP_modfl(long double x, long double *iptr); + +double SLAMP_fmod(double x, double y); + +double SLAMP_frexp(double num, int *exp); +float SLAMP_frexpf(float num, int *exp); +long double SLAMP_frexpl(long double num, int *exp); + +int SLAMP_isnan(); + +/* MISC */ +char *SLAMP_getenv(const char *name); +int SLAMP_putenv(char* string); +char *SLAMP_getcwd(char *buf, size_t size); +char* SLAMP_strerror(int errnum); +void SLAMP_exit(int status); +void SLAMP__exit(int status); +int SLAMP_link(const char *oldpath, const char *newpath); +int SLAMP_unlink(const char *pathname); +int SLAMP_isatty(int desc); +int SLAMP_setuid(uid_t uid); +uid_t SLAMP_getuid(void); +uid_t SLAMP_geteuid(void); +int SLAMP_setgid(gid_t gid); +gid_t SLAMP_getgid(void); +gid_t SLAMP_getegid(void); +pid_t SLAMP_getpid(void); +int SLAMP_chdir(const char *path); +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */); +int SLAMP_execv(const char *path, char *const argv[]); +int SLAMP_execvp(const char *file, char *const argv[]); +int SLAMP_kill(pid_t pid, int sig); +pid_t SLAMP_fork(void); +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler); +pid_t SLAMP_waitpid(pid_t pid, int* status, int options); +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)); +int SLAMP_ioctl(int d, int request, ...); +unsigned int SLAMP_sleep(unsigned int seconds); +char* SLAMP_gcvt(double number, size_t ndigit, char* buf); +char* SLAMP_nl_langinfo(nl_item item); + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function); +const unsigned short int **SLAMP___ctype_b_loc(void); +int SLAMP__IO_getc(_IO_FILE * __fp); +int SLAMP__IO_putc(int __c, _IO_FILE *__fp); +int* SLAMP___errno_location (void); + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf); +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf); + +#ifdef __cplusplus +} +#endif + +#endif /* SLAMP_HOOKS_H */ diff --git a/liberty/lib/SLAMP/SLAMPboost/BoostSend.cpp b/liberty/lib/SLAMP/SLAMPboost/BoostSend.cpp new file mode 100644 index 00000000..98395d67 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/BoostSend.cpp @@ -0,0 +1,539 @@ +#include "slamp_hooks.h" +#include // ring buffer + +#include +#include +#include +#include +#include +#include "malloc.h" + +// 2MB +#define LOCAL_BUFFER_SIZE 2097152 + +namespace bip = boost::interprocess; +namespace shm +{ + // typedef bip::allocator char_alloc; + // typedef bip::basic_string, char_alloc > shared_string; + + // using ring_buffer = boost::lockfree::spsc_queue>; + // 4MB + using ring_buffer = boost::lockfree::spsc_queue>; +} + +#include + +static void *(*old_malloc_hook)(size_t, const void *); +static void (*old_free_hook)(void *, const void *); +static void *(*old_memalign_hook)(size_t, size_t, const void *); +// create segment and corresponding allocator +bip::managed_shared_memory *segment; + +// Ringbuffer fully constructed in shared memory. The element strings are +// also allocated from the same shared memory segment. This vector can be +// safely accessed from other processes. +shm::ring_buffer *queue; +unsigned long counter_load = 0; +unsigned long counter_store = 0; +unsigned long counter_ctx = 0; +unsigned long counter_alloc = 0; +// char local_buffer[LOCAL_BUFFER_SIZE]; +// unsigned buffer_counter = 0; +bool onProfiling = false; + +// template +struct LocalBuffer { + char buffer[LOCAL_BUFFER_SIZE]; + shm::ring_buffer *queue; + unsigned counter = 0; + + LocalBuffer(shm::ring_buffer *queue) : queue(queue) {} + + template + LocalBuffer *push(T value) ATTRIBUTE(always_inline) { + size_t size = sizeof(T); + if (counter + size > LOCAL_BUFFER_SIZE) { + flush(); + } + for (unsigned i = 0; i < size; i++) { + // lsb first + buffer[counter++] = (value >> (i << 3)) & 0xFF; + } + return this; + } + + void flush() { + // for (int i = 0; i < counter; i++) { + unsigned long pushed = 0; + while (pushed < counter) { + pushed += queue->push(buffer + pushed, counter - pushed); + } + // } + counter = 0; + } +}; + +LocalBuffer *local_buffer; + +enum DepModAction: char +{ + INIT = 0, + LOAD, + STORE, + ALLOC, + LOOP_INVOC, + LOOP_ITER, + FINISHED +}; + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id) { + segment = new bip::managed_shared_memory(bip::open_or_create, "MySharedMemory", 65536UL*1600); + queue = segment->find_or_construct("queue")(); + local_buffer = new LocalBuffer(queue); + + // send a msg with "fn_id, loop_id" + // char msg[100]; + // sprintf(msg, "%d,%d", fn_id, loop_id); + // queue->push(shm::shared_string(msg, *char_alloc)); + + local_buffer->push(INIT); + local_buffer->push(loop_id); + uint32_t pid = getpid(); + printf("SLAMP_init: %d, %d, %d\n", fn_id, loop_id, pid); + local_buffer->push(pid); + + old_malloc_hook = __malloc_hook; + // old_free_hook = __free_hook; + old_memalign_hook = __memalign_hook; + __malloc_hook = SLAMP_malloc_hook; + __free_hook = nullptr; + __realloc_hook = nullptr; + // __free_hook = SLAMP_free_hook; + __memalign_hook = SLAMP_memalign_hook; +} + +void SLAMP_fini(const char* filename){ + // send a msg with "fini" + // queue->push(shm::shared_string("fini", *char_alloc)); + std::cout << counter_load << " " << counter_store << " " << counter_ctx << std::endl; + local_buffer->push(FINISHED); + local_buffer->flush(); +} + +void SLAMP_allocated(uint64_t addr){} +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size){ + local_buffer->push(ALLOC)->push(addr)->push(size); +} +void SLAMP_main_entry(uint32_t argc, char** argv, char** env){} + +void SLAMP_enter_fcn(uint32_t id){} +void SLAMP_exit_fcn(uint32_t id){} +void SLAMP_enter_loop(uint32_t id){} +void SLAMP_exit_loop(uint32_t id){} +void SLAMP_loop_iter_ctx(uint32_t id){} +void SLAMP_loop_invocation(){ + // send a msg with "loop_invocation" + local_buffer->push(LOOP_INVOC); + + counter_ctx++; + onProfiling = true; +} +void SLAMP_loop_iteration(){ + local_buffer->push(LOOP_ITER); + counter_ctx++; +} + +void SLAMP_loop_exit(){ + onProfiling = false; +} + +void SLAMP_report_base_pointer_arg(uint32_t, uint32_t, void *ptr){} +void SLAMP_report_base_pointer_inst(uint32_t, void *ptr){} +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t){} +void SLAMP_callback_stack_free(){} + +void SLAMP_ext_push(const uint32_t instr){} +void SLAMP_ext_pop(){} + +void SLAMP_push(const uint32_t instr){} +void SLAMP_pop(){} + +void SLAMP_load(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline) { + // send a msg with "load, instr, addr, bare_instr, value" + // char msg[100]; + // sprintf(msg, "load,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // queue->push(shm::shared_string(msg, *char_alloc)); + // + if (onProfiling) { + local_buffer->push(LOAD)->push(instr)->push(addr)->push(bare_instr); //->push(value); + counter_load++; + } +} + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_load(instr, addr, bare_instr, 0); +} + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load1(bare_instr, addr, bare_instr, value); +} +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load2(bare_instr, addr, bare_instr, value); +} +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load4(bare_instr, addr, bare_instr, value); +} +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load8(bare_instr, addr, bare_instr, value); +} +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_loadn(bare_instr, addr, bare_instr, n); +} + +void SLAMP_store(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr) ATTRIBUTE(always_inline) { + // send a msg with "store, instr, addr, bare_instr, value" + // char msg[100]; + // sprintf(msg, "store,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // queue->push(shm::shared_string(msg, *char_alloc)); + if (onProfiling) { + local_buffer->push(STORE)->push(instr)->push(bare_instr)->push(addr); + counter_store++; + } +} + +void SLAMP_store1(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store2(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store4(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store8(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store1(bare_inst, addr); +} +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store2(bare_inst, addr); +} +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store4(bare_inst, addr); +} +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store8(bare_inst, addr); +} +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n){ + SLAMP_storen(bare_inst, addr, n); +} + +/* wrappers */ +static void* SLAMP_malloc_hook(size_t size, const void *caller){ + __malloc_hook = old_malloc_hook; + void* ptr = malloc(size); + local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // printf("malloc %lu at %p\n", size, ptr); + counter_alloc++; + __malloc_hook = SLAMP_malloc_hook; + return ptr; +} +static void SLAMP_free_hook(void *ptr, const void *caller){ + old_free_hook(ptr, caller); +} +static void* SLAMP_memalign_hook(size_t alignment, size_t size, const void *caller){ + old_memalign_hook = __memalign_hook; + void* ptr = memalign(alignment, size); + local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // printf("memalign %lu at %p\n", size, ptr); + counter_alloc++; + __memalign_hook = SLAMP_memalign_hook; + return ptr; +} +void* SLAMP_malloc(size_t size, uint32_t instr, size_t alignment){ + // void* ptr = malloc(size); + // // send a msg with "malloc, instr, ptr, size, alignment" + // // char msg[100]; + // // sprintf(msg, "malloc,%d,%lu,%lu,%lu", instr, ptr, size, alignment); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // queue->push(ALLOC); + // queue->push(reinterpret_cast(ptr)); + // queue->push(size); + // counter_alloc++; + // return ptr; +} + +void* SLAMP_calloc(size_t nelem, size_t elsize){} +void* SLAMP_realloc(void* ptr, size_t size){} +void* SLAMP__Znam(size_t size){} +void* SLAMP__Znwm(size_t size){} + + + +char* SLAMP_strdup(const char *s1){} +char* SLAMP___strdup(const char *s1){} +void SLAMP_free(void* ptr){} +void SLAMP_cfree(void* ptr){} +void SLAMP__ZdlPv(void* ptr){} +void SLAMP__ZdaPv(void* ptr){} +int SLAMP_brk(void *end_data_segment){} +void* SLAMP_sbrk(intptr_t increment){} + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len){} +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len){} + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr){} +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr){} + +/* String functions */ +size_t SLAMP_strlen(const char *str){} +char* SLAMP_strchr(char *s, int c){} +char* SLAMP_strrchr(char *s, int c){} +int SLAMP_strcmp(const char *s1, const char *s2){} +int SLAMP_strncmp(const char *s1, const char *s2, size_t n){} +char* SLAMP_strcpy(char *dest, const char *src){} +char* SLAMP_strncpy(char *dest, const char *src, size_t n){} +char* SLAMP_strcat(char *s1, const char *s2){} +char* SLAMP_strncat(char *s1, const char *s2, size_t n){} +char* SLAMP_strstr(char *s1, char *s2){} +size_t SLAMP_strspn(const char *s1, const char *s2){} +size_t SLAMP_strcspn(const char *s1, const char *s2){} +char* SLAMP_strtok(char *s, const char *delim){} +double SLAMP_strtod(const char *nptr, char **endptr){} +long int SLAMP_strtol(const char *nptr, char **endptr, int base){} +char* SLAMP_strpbrk(char *s1, char *s2){} + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n){} +void *SLAMP_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP_memmove (void *dest, const void *src, size_t n){} +int SLAMP_memcmp(const void *s1, const void *s2, size_t n){} +void* SLAMP_memchr(void* ptr, int value, size_t num){} +void* SLAMP___rawmemchr(void* ptr, int value){} + +void SLAMP_bzero(void *s, size_t n){} +void SLAMP_bcopy(const void *s1, void *s2, size_t n){} + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count){} +int SLAMP_open(const char *pathname, int flags, mode_t mode){} +int SLAMP_close(int fd){} +ssize_t SLAMP_write(int fd, const void *buf, size_t count){} +off_t SLAMP_lseek(int fildes, off_t offset, int whence){} + +FILE * SLAMP_fopen(const char *path, const char *mode){} +FILE * SLAMP_fopen64(const char *path, const char *mode){} +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream){} +int SLAMP_fflush(FILE *stream){} +int SLAMP_fclose(FILE *stream){} +int SLAMP_ferror(FILE *stream){} +int SLAMP_feof(FILE *stream){} +long SLAMP_ftell(FILE *stream){} +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream){} +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream){} +int SLAMP_fseek(FILE *stream, long offset, int whence){} +void SLAMP_rewind(FILE *stream){} + +int SLAMP_fgetc(FILE *stream){} +int SLAMP_fputc(int c, FILE *stream){} +char * SLAMP_fgets(char *s, int n, FILE *stream){} +int SLAMP_fputs(const char *s, FILE *stream){} + +int SLAMP_ungetc(int c, FILE *stream){} +int SLAMP_putchar(int c){} +int SLAMP_getchar(void){} + +int SLAMP_fileno(FILE *stream){} +char * SLAMP_gets(char *s){} +int SLAMP_puts(const char *s){} + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout){} +int SLAMP_remove(const char *path){} + +void SLAMP_setbuf(FILE * stream, char * buf){} +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size){} +char * SLAMP_tmpnam(char *s){} +FILE* SLAMP_tmpfile(void){} +char * SLAMP_ttyname(int fildes){} + +FILE * SLAMP_fdopen(int fildes, const char *mode){} +void SLAMP_clearerr(FILE *stream){} + +int SLAMP_truncate(const char *path, off_t length){} +int SLAMP_ftruncate(int fildes, off_t length){} + +int SLAMP_dup(int oldfd){} +int SLAMP_dup2(int oldfd, int newfd){} +int SLAMP_pipe(int filedes[2]){} + +int SLAMP_chmod(const char *path, mode_t mode){} +int SLAMP_fchmod(int fildes, mode_t mode){} +int SLAMP_fchown(int fd, uid_t owner, gid_t group){} +int SLAMP_access(const char *pathname, int mode){} +long SLAMP_pathconf(char *path, int name){} +int SLAMP_mkdir(const char *pathname, mode_t mode){} +int SLAMP_rmdir(const char *pathname){} +mode_t SLAMP_umask(mode_t mask){} +int SLAMP_fcntl(int fd, int cmd, struct flock *lock){} + +DIR* SLAMP_opendir(const char* name){} +struct dirent* SLAMP_readdir(DIR *dirp){} +struct dirent64* SLAMP_readdir64(DIR *dirp){} +int SLAMP_closedir(DIR* dirp){} + +/* Printf */ +int SLAMP_printf(const char *format, ...){} +int SLAMP_fprintf(FILE *stream, const char *format, ...){} +int SLAMP_sprintf(char *str, const char *format, ...){} +int SLAMP_snprintf(char *str, size_t size, const char *format, ...){} + +int SLAMP_vprintf(const char *format, va_list ap){} +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vsprintf(char *str, const char *format, va_list ap){} +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap){} + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ){} +int SLAMP_scanf(const char *format, ... ){} +int SLAMP_sscanf(const char *s, const char *format, ... ){} +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ){} + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vscanf(const char *format, va_list ap){} +int SLAMP_vsscanf(const char *s, const char *format, va_list ap){} + +/* Time */ +time_t SLAMP_time(time_t *t){} +struct tm *SLAMP_localtime(const time_t *timer){} +struct lconv* SLAMP_localeconv(){} +struct tm *SLAMP_gmtime(const time_t *timer){} +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz){} + +/* Math */ +double SLAMP_ldexp(double x, int exp){} +float SLAMP_ldexpf(float x, int exp){} +long double SLAMP_ldexpl(long double x, int exp){} +double SLAMP_log10(double x){} +float SLAMP_log10f(float x){} +long double SLAMP_log10l(long double x){} +double SLAMP_log(double x){} +float SLAMP_logf(float x){} +long double SLAMP_logl(long double x){} + +double SLAMP_exp(double x){} +float SLAMP_expf(float x){} +long double SLAMP_expl(long double x){} + +double SLAMP_cos(double x){} +float SLAMP_cosf(float x){} +long double SLAMP_cosl(long double x){} +double SLAMP_sin(double x){} +double SLAMP_tan(double x){} +float SLAMP_sinf(float x){} +long double SLAMP_sinl(long double x){} + +double SLAMP_atan(double x){} +float SLAMP_atanf(float x){} +long double SLAMP_atanl(long double x){} + +double SLAMP_floor(double x){} +float SLAMP_floorf(float x){} +long double SLAMP_floorl(long double x){} +double SLAMP_ceil(double x){} +float SLAMP_ceilf(float x){} +long double SLAMP_ceill(long double x){} + +double SLAMP_atan2(double y, double x){} +float SLAMP_atan2f(float y, float x){} +long double SLAMP_atan2l(long double y, long double x){} + +double SLAMP_sqrt(double x){} +float SLAMP_sqrtf(float x){} +long double SLAMP_sqrtl(long double x){} + +double SLAMP_pow(double x, double y){} +float SLAMP_powf(float x, float y){} +long double SLAMP_powl(long double x, long double y){} + +double SLAMP_fabs(double x){} +float SLAMP_fabsf(float x){} +long double SLAMP_fabsl(long double x){} + +double SLAMP_modf(double x, double *iptr){} +float SLAMP_modff(float x, float *iptr){} +long double SLAMP_modfl(long double x, long double *iptr){} + +double SLAMP_fmod(double x, double y){} + +double SLAMP_frexp(double num, int *exp){} +float SLAMP_frexpf(float num, int *exp){} +long double SLAMP_frexpl(long double num, int *exp){} + +int SLAMP_isnan(){} + +/* MISC */ +char *SLAMP_getenv(const char *name){} +int SLAMP_putenv(char* string){} +char *SLAMP_getcwd(char *buf, size_t size){} +char* SLAMP_strerror(int errnum){} +void SLAMP_exit(int status){} +void SLAMP__exit(int status){} +int SLAMP_link(const char *oldpath, const char *newpath){} +int SLAMP_unlink(const char *pathname){} +int SLAMP_isatty(int desc){} +int SLAMP_setuid(uid_t uid){} +uid_t SLAMP_getuid(void){} +uid_t SLAMP_geteuid(void){} +int SLAMP_setgid(gid_t gid){} +gid_t SLAMP_getgid(void){} +gid_t SLAMP_getegid(void){} +pid_t SLAMP_getpid(void){} +int SLAMP_chdir(const char *path){} +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */){} +int SLAMP_execv(const char *path, char *const argv[]){} +int SLAMP_execvp(const char *file, char *const argv[]){} +int SLAMP_kill(pid_t pid, int sig){} +pid_t SLAMP_fork(void){} +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler){} +pid_t SLAMP_waitpid(pid_t pid, int* status, int options){} +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)){} +int SLAMP_ioctl(int d, int request, ...){} +unsigned int SLAMP_sleep(unsigned int seconds){} +char* SLAMP_gcvt(double number, size_t ndigit, char* buf){} +char* SLAMP_nl_langinfo(nl_item item){} + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function){} +const unsigned short int **SLAMP___ctype_b_loc(void){} +int SLAMP__IO_getc(_IO_FILE * __fp){} +int SLAMP__IO_putc(int __c, _IO_FILE *__fp){} + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf){} +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf){} diff --git a/liberty/lib/SLAMP/SLAMPboost/CMakeLists.txt b/liberty/lib/SLAMP/SLAMPboost/CMakeLists.txt new file mode 100644 index 00000000..604d7ffd --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/CMakeLists.txt @@ -0,0 +1,34 @@ +file(GLOB SRCS + "*.cpp" +) +set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "/u/ziyangx/test/boost/boost_1_80_0/install/lib/cmake") + +# add CMAKE_PREFIX_PATH +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "clang++") + +# Compilation flags +# set_source_files_properties(${SRCS} PROPERTIES COMPILE_FLAGS "-Wl,-save-temps -std=c++17 -Wno-inline -O3 -fexceptions")# -emit-llvm") +set_source_files_properties(${SRCS} PROPERTIES COMPILE_FLAGS "-flto -std=c++17 -Wno-inline -O3 -fexceptions")# -emit-llvm") +set(PassName "slamp_hooks_boost") + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Boost 1.80.0 REQUIRED COMPONENTS system) +include_directories(${Boost_INCLUDE_DIRS}) + + +# list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") +#include(HandleLLVMOptions) +# include(AddLLVM) + + +include_directories(./ + /u/ziyangx/test/boost/boost_1_80_0/install/include) + +add_library(${PassName} STATIC ${SRCS}) +target_link_libraries(${PassName} ${Boost_LIBRARIES}) +# target_link_libraries(${PassName} nng::nng) +# add_llvm_library(${PassName}_shared SHARED ${SRCS}) +# set_target_properties(${PassName}_shared PROPERTIES OUTPUT_NAME ${PassName}) +# set_property(TARGET ${PassName} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +#add_llvm_library(${PassName} SHARED ${SRCS}) # This is to generate libxxx.so diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/.gitignore b/liberty/lib/SLAMP/SLAMPboost/consumer/.gitignore new file mode 100644 index 00000000..378eac25 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/.gitignore @@ -0,0 +1 @@ +build diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/CMakeLists.txt b/liberty/lib/SLAMP/SLAMPboost/consumer/CMakeLists.txt new file mode 100644 index 00000000..9ad2d772 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required (VERSION 3.6.2 FATAL_ERROR) + +# set(CMAKE_C_COMPILER "clang") +# set(CMAKE_CXX_COMPILER "clang++") +# set(CMAKE_LINKER "ld.gold") + +set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "/u/ziyangx/test/boost/boost_1_80_0/install/lib/cmake") + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto") +# set(LINK_FLAGS "${LINK_FLAGS} -flto") + + +find_package(Boost 1.80.0 REQUIRED COMPONENTS system) +include_directories(${Boost_INCLUDE_DIRS}) + +add_executable (consumer consumer.cpp ./ProfilingModules/DependenceModule.cpp) +target_link_libraries(consumer LINK_PUBLIC + ${Boost_LIBRARIES} + rt + Threads::Threads +) diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/DependenceModule.cpp b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/DependenceModule.cpp new file mode 100644 index 00000000..a78b3487 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/DependenceModule.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include + +#include "slamp_timestamp.h" +#include "slamp_logger.h" +#include "slamp_shadow_mem.h" +#include "DependenceModule.h" +#define SIZE_8M 0x800000 + + +static slamp::MemoryMap* smmap = nullptr; +static std::unordered_set *deplog_set; +static uint64_t slamp_iteration = 0; +static uint64_t slamp_invocation = 0; +static uint32_t target_loop_id = 0; + +namespace DepMod { +// init: setup the shadow memory +void init(uint32_t loop_id, uint32_t pid) { + + target_loop_id = loop_id; + smmap = new slamp::MemoryMap(TIMESTAMP_SIZE_IN_BYTES); + deplog_set = new std::unordered_set(); + + smmap->init_stack(SIZE_8M, pid); + smmap->allocate((void*)&errno, sizeof(errno)); + smmap->allocate((void*)&stdin, sizeof(stdin)); + smmap->allocate((void*)&stdout, sizeof(stdout)); + smmap->allocate((void*)&stderr, sizeof(stderr)); + smmap->allocate((void*)&sys_nerr, sizeof(sys_nerr)); + + { + const unsigned short int* ctype_ptr = (*__ctype_b_loc()) - 128; + smmap->allocate((void*)ctype_ptr, 384 * sizeof(*ctype_ptr)); + } + { + const int32_t* itype_ptr = (*__ctype_tolower_loc()) - 128; + smmap->allocate((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + } + { + const int32_t* itype_ptr = (*__ctype_toupper_loc()) - 128; + smmap->allocate((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + } +} + +void fini(const char *filename) { + std::ofstream of(filename); + of << target_loop_id << " " << 0 << " " << 0 << " " + << 0 << " " << 0 << " " << 0 << "\n"; + + std::set ordered(deplog_set->begin(), deplog_set->end()); + for (auto &k: ordered) { + of << target_loop_id << " " << k.src << " " << k.dst << " " << k.dst_bare << " " + << (k.cross ? 1 : 0) << " " << 1 << " "; + of << "\n"; + } + + delete smmap; + delete deplog_set; +} + +void allocate(void *addr, uint64_t size) { + smmap->allocate(addr, size); + // std::cout << "allocate " << addr << " " << size << std::endl; +} + +// void log(TS ts, const uint32_t dst_inst, TS *pts, const uint32_t bare_inst, + // uint64_t addr, uint64_t value, uint8_t size) { +void log(TS ts, const uint32_t dst_inst, const uint32_t bare_inst){ + + uint32_t src_inst = GET_INSTR(ts); + uint64_t src_iter = GET_ITER(ts); + + // uint64_t src_invoc = GET_INVOC(ts); + + slamp::KEY key(src_inst, dst_inst, bare_inst, src_iter != slamp_iteration); + + // std::cout << "src_inst: " << src_inst << " dst_inst: " << dst_inst << " bare_inst: " << bare_inst << " src_iter: " << src_iter << " slamp_iteration: " << slamp_iteration << " src_iter != slamp_iteration: " << (src_iter != slamp_iteration) << std::endl; + // deplog_set->insert(key); +} + +// template +void load(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + TS* s = (TS*)GET_SHADOW(addr, TIMESTAMP_SIZE_IN_POWER_OF_TWO); + TS tss = s[0]; + if (tss != 0) { + log(tss, instr, bare_instr); + } +} + +// template +void store(uint32_t instr, uint32_t bare_instr, const uint64_t addr) { + TS *s = (TS *)GET_SHADOW(addr, TIMESTAMP_SIZE_IN_POWER_OF_TWO); + // if (!smmap->is_allocated(s)) { + // std::cout << "store not allocated: " << instr << " " << bare_instr << " " << addr << std::endl; + // } + TS ts = CREATE_TS(instr, slamp_iteration, slamp_invocation); + + // TODO: handle output dependence. ignore it as of now. + // if (ASSUME_ONE_ADDR) { + s[0] = ts; + // } else { + // for (auto i = 0; i < size; i++) + // s[i] = ts; + // } +} + +void loop_invoc() { + slamp_iteration = 0; + slamp_invocation++; +} + +void loop_iter() { + slamp_iteration++; +} +} // namespace DepMod diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/DependenceModule.h b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/DependenceModule.h new file mode 100644 index 00000000..a2664cdc --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/DependenceModule.h @@ -0,0 +1,23 @@ +#include + +namespace DepMod { +void init(uint32_t loop_id, uint32_t pid); +void fini(const char *filename); +void load(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value); +void store(uint32_t instr, uint32_t bare_instr, const uint64_t addr); +void allocate(void *addr, uint64_t size); +void loop_invoc(); +void loop_iter(); + +enum DepModAction: char +{ + INIT = 0, + LOAD, + STORE, + ALLOC, + LOOP_INVOC, + LOOP_ITER, + FINISHED +}; + +} // namespace DepMod diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_bound_malloc.h b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_bound_malloc.h new file mode 100644 index 00000000..0d6b9410 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_bound_malloc.h @@ -0,0 +1,22 @@ +#ifndef SLAMPLIB_HOOKS_SLAMP_BOUND_MALLOC_H +#define SLAMPLIB_HOOKS_SLAMP_BOUND_MALLOC_H + +#include +#include +namespace slamp +{ + +void init_bound_malloc(void* heap_bound); +void fini_bound_malloc(); + +size_t get_object_size(void* ptr); + +void* bound_malloc(size_t size, size_t alignment=16); +bool bound_free(void *ptr, uint64_t &starting_page, unsigned &purge_cnt); +void* bound_calloc(size_t num, size_t size); +void* bound_realloc(void* ptr, size_t size); +void bound_discard_page(); + +} + +#endif diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_logger.h b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_logger.h new file mode 100644 index 00000000..89641a03 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_logger.h @@ -0,0 +1,110 @@ +#ifndef SLAMPLIB_HOOKS_SLAMP_LOGGER +#define SLAMPLIB_HOOKS_SLAMP_LOGGER + +#include +#include +#include +#include + +#include "slamp_timestamp.h" + +namespace slamp +{ + +struct KEY +{ + uint32_t src; + uint32_t dst; + uint32_t dst_bare; + uint32_t cross; + + KEY() {} + KEY(uint32_t s, uint32_t d, uint32_t b, uint32_t c) : src(s), dst(d), dst_bare(b), cross(c) {} +}; + +struct KEYHash +{ + size_t operator()(const KEY& key) const + { + static_assert(sizeof(size_t) == sizeof(uint64_t), "Should be 64bit address"); + std::hash hash_fn; + + // hash(32-32) ^ hash(32-1) + // FIXME: find a better hash function + return hash_fn(((uint64_t)key.src << 32) | key.dst) ^ hash_fn(((uint64_t)key.dst_bare << 1) | (key.cross & 0x1)); + } +}; + +struct KEYComp +{ + bool operator()(const KEY& key1, const KEY& key2) const + { + uint32_t src1 = key1.src; + uint32_t src2 = key2.src; + + if (src1 < src2) + return true; + else if (src1 > src2) + return false; + + uint32_t dst1 = key1.dst; + uint32_t dst2 = key2.dst; + + if (dst1 < dst2) + return true; + else if (dst1 > dst2) + return false; + + uint32_t dst_bare1 = key1.dst_bare; + uint32_t dst_bare2 = key2.dst_bare; + + if (dst_bare1 < dst_bare2) + return true; + else if (dst_bare1 > dst_bare2) + return false; + + uint32_t cross1 = key1.cross; + uint32_t cross2 = key2.cross; + + return cross1 < cross2; + } +}; + +struct KEYEqual +{ + bool operator()(const KEY& key1, const KEY& key2) const + { + uint32_t src1 = key1.src; + uint32_t src2 = key2.src; + + if ( src1 != src2 ) return false; + + uint32_t dst1 = key1.dst; + uint32_t dst2 = key2.dst; + + if ( dst1 != dst2 ) return false; + + uint32_t dst_bare1 = key1.dst_bare; + uint32_t dst_bare2 = key2.dst_bare; + + if ( dst_bare1 != dst_bare2 ) return false; + + uint32_t cross1 = key1.cross; + uint32_t cross2 = key2.cross; + + if ( cross1 != cross2 ) return false; + + return true; + } +}; + + +void init_logger(uint32_t fn_id, uint32_t loop_id); +void fini_logger(const char* filename); + +uint32_t log(TS ts, const uint32_t dst_instr, TS* pts, const uint32_t bare_inst, uint64_t addr, uint64_t value, uint8_t size); +void print_log(const char* filename); + +} + +#endif diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_shadow_mem.h b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_shadow_mem.h new file mode 100644 index 00000000..ef07201b --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_shadow_mem.h @@ -0,0 +1,197 @@ +#ifndef SLAMPLIB_HOOKS_SLAMP_SHADOW_MEM_H +#define SLAMPLIB_HOOKS_SLAMP_SHADOW_MEM_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +// higher half of canonical region cannot be used + +/// left shift by `shift`, mask 47 LSB, toggle #45 bit? +#define MASK1 0x00007fffffffffffL +#define MASK2 0x0000200000000000L +#define GET_SHADOW(addr, shift) \ + (((((uint64_t)(addr)) << (shift)) & MASK1) ^ MASK2) + +namespace slamp { + +class MemoryMap { +public: + uint64_t heapStart = 0; + MemoryMap(unsigned r) : ratio(r), ratio_shift(0) { + // ratio expected to be a power of 2 + assert((r & (r - 1)) == 0); + + // log(r) + unsigned n = r; + while ((n & 1) == 0) { + this->ratio_shift += 1; + n = n >> 1; + } + + // get the page size of the host system + pagesize = getpagesize(); + pagemask = ~(pagesize - 1); + } + + ~MemoryMap() { + // freeing all remaining shadow addresses + for (auto page : pages) { + uint64_t s = GET_SHADOW(page, ratio_shift); + munmap(reinterpret_cast(s), pagesize * ratio); + } + } + + unsigned get_ratio() { return ratio; } + + bool is_allocated(void *addr) { + auto a = reinterpret_cast(addr); + uint64_t page = a & pagemask; + if (pages.find(page) != pages.end()) + return true; + else + return false; + } + + /// allocate shadow page if not exist + void *allocate(void *addr, size_t size) { + auto a = reinterpret_cast(addr); + uint64_t pagebegin = a & pagemask; + uint64_t pageend = (a + size - 1) & pagemask; + + // try mmap + + std::set shadow_pages; + bool success = true; + + for (uint64_t page = pagebegin; page <= pageend; page += pagesize) { + if (pages.find(page) != pages.end()) + continue; + + uint64_t s = GET_SHADOW(page, ratio_shift); + // create a shadow page for the page + void *p = mmap(reinterpret_cast(s), pagesize * ratio, + PROT_WRITE | PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (p == MAP_FAILED) { + int err = errno; + printf("mmap failed: %lx errno: %d\n", s, err); + raise(SIGINT); + + success = false; + break; + } else { + shadow_pages.insert(p); + } + } + + if (success) { + for (uint64_t page = pagebegin; page <= pageend; page += pagesize) + pages.insert(page); + uint64_t pagebegin = a & pagemask; + uint64_t pageend = (a + size - 1) & pagemask; + + // return shadow_mem + auto *shadow_addr = (uint64_t *)GET_SHADOW(a, ratio_shift); + return (void *)(shadow_addr); + } else { + // cleanup + for (auto shadow_page : shadow_pages) + munmap(shadow_page, pagesize * ratio); + return nullptr; + } + } + + // free the shadow pages + void deallocate_pages(uint64_t page, unsigned cnt) { + munmap(reinterpret_cast(GET_SHADOW(page, ratio_shift)), + pagesize * ratio * cnt); + + // fprintf(stderr, "deallocate_pages: %lx %d\n", GET_SHADOW(page, ratio_shift), cnt); + + for (auto i = 0; i < cnt; i++, page += pagesize) + pages.erase(page); + } + + /// for realloc; the dependence carries over + void copy(void *dst, void *src, size_t size) { + size_t shadow_size = size * ratio; + void *shadow_dst = (void *)GET_SHADOW(dst, ratio_shift); + void *shadow_src = (void *)GET_SHADOW(src, ratio_shift); + memcpy(shadow_dst, shadow_src, shadow_size); + } + + void init_heap(void *addr) { + heapStart = reinterpret_cast(addr); + } + + // init stack size to be fixed 8MB + // the stack in /proc/$pid/maps changes in runtime, use it to find the end + void init_stack(uint64_t stack_size, uint64_t pid) { + char filename[256]; + char buf[5000]; + sprintf(filename, "/proc/%lu/maps", pid); + // sprintf(filename, "/proc/%u/maps", getpid()); + + FILE *fp = fopen(filename, "r"); + if (!fp) { + perror(filename); + exit(EXIT_FAILURE); + } + + bool allocated = false; + + while (fgets(buf, sizeof(buf), fp) != nullptr) { + uint64_t start, end; + char name[5000]; + + int n = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*llx %*x:%*x %*lu %s", &start, + &end, name); + + if (n != 3) { + continue; + } + + // FIXME: get heap start addr + if (!strcmp(name, "[heap]")) { + heapStart = start; + } + + + if (!strcmp(name, "[stack]")) { + // stack grow from end (big address) backwards + // print the stack start and end + printf("stack: %lx %lx\n", end - stack_size, end); + allocate(reinterpret_cast(end - stack_size), stack_size); + allocated = true; + break; + } + } + + if (!allocated) { + fprintf(stderr, "Error: failed to allocate shadow for stack"); + exit(EXIT_FAILURE); + } + } + +private: + std::set pages; // page table + + unsigned ratio; // (size of metadata) / (size of real data) + unsigned ratio_shift; + uint64_t pagesize; + uint64_t pagemask; +}; + +} // namespace slamp +#endif diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_timestamp.h b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_timestamp.h new file mode 100644 index 00000000..fc9c1428 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/ProfilingModules/slamp_timestamp.h @@ -0,0 +1,22 @@ +#ifndef SLAMPLIB_HOOKS_SLAMP_TIMESTAMP +#define SLAMPLIB_HOOKS_SLAMP_TIMESTAMP + +#include + +typedef uint64_t TS; // first 20 bits for instr and following 44 bits for iter +#define TIMESTAMP_SIZE_IN_BYTES 8 +#define TIMESTAMP_SIZE_IN_POWER_OF_TWO 3 +#define ITERATION_SIZE 40 +#define INVOCATION_SIZE 4 // 44-40 +#define CREATE_TS(instr, iter, invoc) ( ((TS)instr << 44) | (((TS)iter & (TS)0xffffffffff) << INVOCATION_SIZE) | ((TS)invoc & (TS)0xf)) +#define CREATE_TS_HASH(instr, hash, iter, invoc) \ + (((TS)instr << 44) | \ + (((TS)hash & (TS)0xfffffffff) << (INVOCATION_SIZE + 4)) | \ + (((TS)iter & (TS)0xf) << INVOCATION_SIZE) | ((TS)invoc & (TS)0xf)) +#define GET_INSTR(ts) ((ts >> 44) & 0xfffff) +#define GET_HASH(ts) ( (ts >> 8) & 0xfffffffff) +#define GET_ITER(ts) ( (ts >> INVOCATION_SIZE) & 0xffffffffff) +#define GET_INVOC(ts) ( ts & 0xf) + + +#endif diff --git a/liberty/lib/SLAMP/SLAMPboost/consumer/consumer.cpp b/liberty/lib/SLAMP/SLAMPboost/consumer/consumer.cpp new file mode 100644 index 00000000..c86dc291 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/consumer/consumer.cpp @@ -0,0 +1,200 @@ +#include // ring buffer + +#include +#include +#include +#include +#include "ProfilingModules/DependenceModule.h" +#include +#include + +#define DEBUG 0 +#define ACTION 0 + + +namespace bip = boost::interprocess; +namespace shm +{ + // typedef bip::allocator char_alloc; + // typedef bip::basic_string, char_alloc > shared_string; + + // shared_string, + using ring_buffer = boost::lockfree::spsc_queue>; +} + + +// 2MB +#define LOCAL_BUFFER_SIZE 4194304 + +struct LocalReceiveBuffer { + shm::ring_buffer *queue; + + char buffer[LOCAL_BUFFER_SIZE]; + unsigned counter = 0; + unsigned buffer_size = 0; + + LocalReceiveBuffer(shm::ring_buffer *queue) : queue(queue) {} + + template + void pop(T &t) { + size_t size = sizeof(T); + T value = 0; + uint8_t current_byte = 0; + + while (current_byte < size) { + // load the value from the buffer + while (counter < buffer_size && current_byte < size) { + // lsb first + // auto v = (uint64_t)(uint8_t)buffer[counter]; + // value |= (v << (8 * current_byte)); + counter++; + current_byte++; + } + + // load the buffer up + if (counter == buffer_size) { + buffer_size = queue->pop(buffer, LOCAL_BUFFER_SIZE); + counter = 0; + } + } + + // std::cout << "pop " << (uint64_t)value << std::endl; + + // set the value + t = value; + } +}; + + +int main() +{ + // create segment and corresponding allocator + bip::managed_shared_memory segment(bip::open_or_create, "MySharedMemory", 104857600); + // shm::char_alloc char_alloc(segment.get_segment_manager()); + + shm::ring_buffer *queue = segment.find_or_construct("queue")(); + LocalReceiveBuffer buffer(queue); + + uint64_t counter = 0; + // shm::shared_string v(char_alloc); + + using Action=DepMod::DepModAction; + + uint32_t loop_id; + // while (true) { + while (false) { + char v; + buffer.pop(v); + counter++; + + switch (v) { + case Action::INIT: { + uint32_t pid; + buffer.pop(loop_id); + buffer.pop(pid); + + if (DEBUG) { + std::cout << "INIT: " << loop_id << " " << pid << std::endl; + } +#if ACTION + DepMod::init(loop_id, pid); +#endif + break; + }; + case Action::LOAD: { + uint32_t instr; + uint64_t addr; + uint32_t bare_instr; + uint64_t value = 0; + buffer.pop(instr); + buffer.pop(addr); + buffer.pop(bare_instr); + // buffer.pop(value); + if (DEBUG) { + std::cout << "LOAD: " << instr << " " << addr << " " << bare_instr << " " << value << std::endl; + } +#if ACTION + DepMod::load(instr, addr, bare_instr, value); + // DepMod::load(instr, addr, bare_instr, value); +#endif + + break; + }; + case Action::STORE: { + uint32_t instr; + uint32_t bare_instr; + uint64_t addr; + buffer.pop(instr); + buffer.pop(bare_instr); + buffer.pop(addr); + if (DEBUG) { + std::cout << "STORE: " << instr << " " << bare_instr << " " << addr << std::endl; + } +#if ACTION + DepMod::store(instr, bare_instr, addr); +#endif + break; + }; + case Action::ALLOC: { + uint64_t addr; + uint64_t size; + buffer.pop(addr); + buffer.pop(size); + if (DEBUG) { + std::cout << "ALLOC: " << addr << " " << size << std::endl; + } +#if ACTION + DepMod::allocate(reinterpret_cast(addr), size); +#endif + break; + }; + case Action::LOOP_INVOC: { +#if ACTION + DepMod::loop_invoc(); +#endif + + if (DEBUG) { + std::cout << "LOOP_INVOC" << std::endl; + } + break; + }; + case Action::LOOP_ITER: { +#if ACTION + DepMod::loop_iter(); +#endif + + if (DEBUG) { + std::cout << "LOOP_ITER" << std::endl; + } + break; + }; + case Action::FINISHED: { + std::stringstream ss; + ss << "deplog-" << loop_id << ".txt"; +#if ACTION + DepMod::fini(ss.str().c_str()); +#endif + std::cout << "Finished loop: " << loop_id << " after " << counter + << " events" << std::endl; + break; + }; + default: + std::cout << "Unknown action: " << v << std::endl; + exit(-1); + } + + if (counter % 100000000 == 0) { + std::cout << "Processed " << counter / 1000000 << "M events" << std::endl; + } + } + + while (true) { + char v; + while (!queue->pop(v)); + counter++; + + if (counter % 100000000 == 0) { + std::cout << "Processed " << counter / 1000000 << "M events" << std::endl; + } + } +} diff --git a/liberty/lib/SLAMP/SLAMPboost/slamp_hooks.h b/liberty/lib/SLAMP/SLAMPboost/slamp_hooks.h new file mode 100644 index 00000000..df546117 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPboost/slamp_hooks.h @@ -0,0 +1,371 @@ +#ifndef SLAMPLIB_HOOKS_SLAMP_HOOKS_H +#define SLAMPLIB_HOOKS_SLAMP_HOOKS_H + +// FIXME: inline tweak actually make things worse in sequential and better with +// 16x, so turn it on at all time before understanding why +#define ATTRIBUTE(x) __attribute__((x)) +// #ifdef ITO_ENABLE +// // #define ATTRIBUTE(x) +// #define ATTRIBUTE(x) __attribute__((x)) +// #else +// #define ATTRIBUTE(x) +// #endif + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void SLAMP_dbggv(int id); +void SLAMP_dbggvstr(char* str); + +// SLAMP measure functions +void SLAMP_measure_init(); +void SLAMP_measure_fini(); +void SLAMP_measure_load(uint32_t id, uint64_t size); +void SLAMP_measure_store(uint32_t id, uint64_t size); +static void* SLAMP_measure_malloc_hook(size_t size, const void *caller); +static void SLAMP_measure_free_hook(void *ptr, const void *caller); + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id); +void SLAMP_fini(const char* filename); + +void SLAMP_allocated(uint64_t addr); +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size); +void SLAMP_main_entry(uint32_t argc, char** argv, char** env); + +void SLAMP_enter_fcn(uint32_t id); +void SLAMP_exit_fcn(uint32_t id); +void SLAMP_enter_loop(uint32_t id); +void SLAMP_exit_loop(uint32_t id); +void SLAMP_loop_iter_ctx(uint32_t id); +void SLAMP_loop_invocation(); +void SLAMP_loop_iteration(); +void SLAMP_loop_exit(); + +void SLAMP_report_base_pointer_arg(uint32_t, uint32_t, void *ptr); +void SLAMP_report_base_pointer_inst(uint32_t, void *ptr); +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t); +void SLAMP_callback_stack_free(void); + +void SLAMP_ext_push(const uint32_t instr); +void SLAMP_ext_pop(); + +void SLAMP_push(const uint32_t instr); +void SLAMP_pop(); + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n) ATTRIBUTE(always_inline);; + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline); +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n) ATTRIBUTE(always_inline);; + +void SLAMP_store1(uint32_t instr, const uint64_t addr) ATTRIBUTE(always_inline); +void SLAMP_store2(uint32_t instr, const uint64_t addr) ATTRIBUTE(always_inline); +void SLAMP_store4(uint32_t instr, const uint64_t addr) ATTRIBUTE(always_inline); +void SLAMP_store8(uint32_t instr, const uint64_t addr) ATTRIBUTE(always_inline); +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n) ATTRIBUTE(always_inline);; + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst) ATTRIBUTE(always_inline); +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst) ATTRIBUTE(always_inline); +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst) ATTRIBUTE(always_inline); +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst) ATTRIBUTE(always_inline); +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n) ATTRIBUTE(always_inline);; + +/* wrappers */ +static void* SLAMP_malloc_hook(size_t size, const void *caller); +static void SLAMP_free_hook(void *ptr, const void *caller); +static void* SLAMP_memalign_hook(size_t alignment, size_t size, const void *caller); +void* SLAMP_malloc(size_t size, uint32_t instr=0, size_t alignment=16); + +void* SLAMP_calloc(size_t nelem, size_t elsize); +void* SLAMP_realloc(void* ptr, size_t size); +void* SLAMP__Znam(size_t size); +void* SLAMP__Znwm(size_t size); + +char* SLAMP_strdup(const char *s1); +char* SLAMP___strdup(const char *s1); +void SLAMP_free(void* ptr); +void SLAMP_cfree(void* ptr); +void SLAMP__ZdlPv(void* ptr); +void SLAMP__ZdaPv(void* ptr); +int SLAMP_brk(void *end_data_segment); +void* SLAMP_sbrk(intptr_t increment); + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes); +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes); +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes); +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes); +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len); +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len); + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr); +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr); + +/* String functions */ +size_t SLAMP_strlen(const char *str); +char* SLAMP_strchr(char *s, int c); +char* SLAMP_strrchr(char *s, int c); +int SLAMP_strcmp(const char *s1, const char *s2); +int SLAMP_strncmp(const char *s1, const char *s2, size_t n); +char* SLAMP_strcpy(char *dest, const char *src); +char* SLAMP_strncpy(char *dest, const char *src, size_t n); +char* SLAMP_strcat(char *s1, const char *s2); +char* SLAMP_strncat(char *s1, const char *s2, size_t n); +char* SLAMP_strstr(char *s1, char *s2); +size_t SLAMP_strspn(const char *s1, const char *s2); +size_t SLAMP_strcspn(const char *s1, const char *s2); +char* SLAMP_strtok(char *s, const char *delim); +double SLAMP_strtod(const char *nptr, char **endptr); +long int SLAMP_strtol(const char *nptr, char **endptr, int base); +char* SLAMP_strpbrk(char *s1, char *s2); + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n); +void *SLAMP_memcpy (void *dest, const void *src, size_t n); +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n); +void *SLAMP_memmove (void *dest, const void *src, size_t n); +int SLAMP_memcmp(const void *s1, const void *s2, size_t n); +void* SLAMP_memchr(void* ptr, int value, size_t num); +void* SLAMP___rawmemchr(void* ptr, int value); + +void SLAMP_bzero(void *s, size_t n); +void SLAMP_bcopy(const void *s1, void *s2, size_t n); + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count); +int SLAMP_open(const char *pathname, int flags, mode_t mode); +int SLAMP_close(int fd); +ssize_t SLAMP_write(int fd, const void *buf, size_t count); +off_t SLAMP_lseek(int fildes, off_t offset, int whence); + +FILE * SLAMP_fopen(const char *path, const char *mode); +FILE * SLAMP_fopen64(const char *path, const char *mode); +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream); +int SLAMP_fflush(FILE *stream); +int SLAMP_fclose(FILE *stream); +int SLAMP_ferror(FILE *stream); +int SLAMP_feof(FILE *stream); +long SLAMP_ftell(FILE *stream); +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream); +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream); +int SLAMP_fseek(FILE *stream, long offset, int whence); +void SLAMP_rewind(FILE *stream); + +int SLAMP_fgetc(FILE *stream); +int SLAMP_fputc(int c, FILE *stream); +char * SLAMP_fgets(char *s, int n, FILE *stream); +int SLAMP_fputs(const char *s, FILE *stream); + +int SLAMP_ungetc(int c, FILE *stream); +int SLAMP_putchar(int c); +int SLAMP_getchar(void); + +int SLAMP_fileno(FILE *stream); +char * SLAMP_gets(char *s); +int SLAMP_puts(const char *s); + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); +int SLAMP_remove(const char *path); + +void SLAMP_setbuf(FILE * stream, char * buf); +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size); +char * SLAMP_tmpnam(char *s); +FILE* SLAMP_tmpfile(void); +char * SLAMP_ttyname(int fildes); + +FILE * SLAMP_fdopen(int fildes, const char *mode); +void SLAMP_clearerr(FILE *stream); + +int SLAMP_truncate(const char *path, off_t length); +int SLAMP_ftruncate(int fildes, off_t length); + +int SLAMP_dup(int oldfd); +int SLAMP_dup2(int oldfd, int newfd); +int SLAMP_pipe(int filedes[2]); + +int SLAMP_chmod(const char *path, mode_t mode); +int SLAMP_fchmod(int fildes, mode_t mode); +int SLAMP_fchown(int fd, uid_t owner, gid_t group); +int SLAMP_access(const char *pathname, int mode); +long SLAMP_pathconf(char *path, int name); +int SLAMP_mkdir(const char *pathname, mode_t mode); +int SLAMP_rmdir(const char *pathname); +mode_t SLAMP_umask(mode_t mask); +int SLAMP_fcntl(int fd, int cmd, struct flock *lock); + +DIR* SLAMP_opendir(const char* name); +struct dirent* SLAMP_readdir(DIR *dirp); +struct dirent64* SLAMP_readdir64(DIR *dirp); +int SLAMP_closedir(DIR* dirp); + +/* Printf */ +int SLAMP_printf(const char *format, ...); +int SLAMP_fprintf(FILE *stream, const char *format, ...); +int SLAMP_sprintf(char *str, const char *format, ...); +int SLAMP_snprintf(char *str, size_t size, const char *format, ...); + +int SLAMP_vprintf(const char *format, va_list ap); +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap); +int SLAMP_vsprintf(char *str, const char *format, va_list ap); +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap); + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ); +int SLAMP_scanf(const char *format, ... ); +int SLAMP_sscanf(const char *s, const char *format, ... ); +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ); + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap); +int SLAMP_vscanf(const char *format, va_list ap); +int SLAMP_vsscanf(const char *s, const char *format, va_list ap); + +/* Time */ +time_t SLAMP_time(time_t *t); +struct tm *SLAMP_localtime(const time_t *timer); +struct lconv* SLAMP_localeconv(); +struct tm *SLAMP_gmtime(const time_t *timer); +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz); + +/* Math */ +double SLAMP_ldexp(double x, int exp); +float SLAMP_ldexpf(float x, int exp); +long double SLAMP_ldexpl(long double x, int exp); +double SLAMP_log10(double x); +float SLAMP_log10f(float x); +long double SLAMP_log10l(long double x); +double SLAMP_log(double x); +float SLAMP_logf(float x); +long double SLAMP_logl(long double x); + +double SLAMP_exp(double x); +float SLAMP_expf(float x); +long double SLAMP_expl(long double x); + +double SLAMP_cos(double x); +float SLAMP_cosf(float x); +long double SLAMP_cosl(long double x); +double SLAMP_sin(double x); +double SLAMP_tan(double x); +float SLAMP_sinf(float x); +long double SLAMP_sinl(long double x); + +double SLAMP_atan(double x); +float SLAMP_atanf(float x); +long double SLAMP_atanl(long double x); + +double SLAMP_floor(double x); +float SLAMP_floorf(float x); +long double SLAMP_floorl(long double x); +double SLAMP_ceil(double x); +float SLAMP_ceilf(float x); +long double SLAMP_ceill(long double x); + +double SLAMP_atan2(double y, double x); +float SLAMP_atan2f(float y, float x); +long double SLAMP_atan2l(long double y, long double x); + +double SLAMP_sqrt(double x); +float SLAMP_sqrtf(float x); +long double SLAMP_sqrtl(long double x); + +double SLAMP_pow(double x, double y); +float SLAMP_powf(float x, float y); +long double SLAMP_powl(long double x, long double y); + +double SLAMP_fabs(double x); +float SLAMP_fabsf(float x); +long double SLAMP_fabsl(long double x); + +double SLAMP_modf(double x, double *iptr); +float SLAMP_modff(float x, float *iptr); +long double SLAMP_modfl(long double x, long double *iptr); + +double SLAMP_fmod(double x, double y); + +double SLAMP_frexp(double num, int *exp); +float SLAMP_frexpf(float num, int *exp); +long double SLAMP_frexpl(long double num, int *exp); + +int SLAMP_isnan(); + +/* MISC */ +char *SLAMP_getenv(const char *name); +int SLAMP_putenv(char* string); +char *SLAMP_getcwd(char *buf, size_t size); +char* SLAMP_strerror(int errnum); +void SLAMP_exit(int status); +void SLAMP__exit(int status); +int SLAMP_link(const char *oldpath, const char *newpath); +int SLAMP_unlink(const char *pathname); +int SLAMP_isatty(int desc); +int SLAMP_setuid(uid_t uid); +uid_t SLAMP_getuid(void); +uid_t SLAMP_geteuid(void); +int SLAMP_setgid(gid_t gid); +gid_t SLAMP_getgid(void); +gid_t SLAMP_getegid(void); +pid_t SLAMP_getpid(void); +int SLAMP_chdir(const char *path); +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */); +int SLAMP_execv(const char *path, char *const argv[]); +int SLAMP_execvp(const char *file, char *const argv[]); +int SLAMP_kill(pid_t pid, int sig); +pid_t SLAMP_fork(void); +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler); +pid_t SLAMP_waitpid(pid_t pid, int* status, int options); +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)); +int SLAMP_ioctl(int d, int request, ...); +unsigned int SLAMP_sleep(unsigned int seconds); +char* SLAMP_gcvt(double number, size_t ndigit, char* buf); +char* SLAMP_nl_langinfo(nl_item item); + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function); +const unsigned short int **SLAMP___ctype_b_loc(void); +int SLAMP__IO_getc(_IO_FILE * __fp); +int SLAMP__IO_putc(int __c, _IO_FILE *__fp); +int* SLAMP___errno_location (void); + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf); +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf); + +#ifdef __cplusplus +} +#endif + +#endif /* SLAMP_HOOKS_H */ diff --git a/liberty/lib/SLAMP/SLAMPcustom/CMakeLists.txt b/liberty/lib/SLAMP/SLAMPcustom/CMakeLists.txt new file mode 100644 index 00000000..f270e779 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/CMakeLists.txt @@ -0,0 +1,41 @@ +file(GLOB SRCS + # "CustomSend_all.cpp" + # "CustomSend_ol.cpp" + # "CustomSend_lv.cpp" + # "CustomSend_pt.cpp" + "CustomSend.cpp" + # "*.c" +) +set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "/u/ziyangx/test/boost/boost_1_80_0/install/lib/cmake") + +# add CMAKE_PREFIX_PATH +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "clang++") + +# Compilation flags +# set_source_files_properties(${SRCS} PROPERTIES COMPILE_FLAGS "-Wl,-save-temps -std=c++17 -Wno-inline -O3 -fexceptions")# -emit-llvm") +# set C++ flags +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g") +set_source_files_properties(${SRCS} PROPERTIES COMPILE_FLAGS "-flto -Wno-inline -O3 -g -fexceptions")# -emit-llvm") +set(PassName "slamp_hooks_custom") + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Boost 1.80.0 REQUIRED COMPONENTS system) +include_directories(${Boost_INCLUDE_DIRS}) + + +# list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") +#include(HandleLLVMOptions) +# include(AddLLVM) + + +include_directories(./ + /u/ziyangx/test/boost/boost_1_80_0/install/include) + +add_library(${PassName} STATIC ${SRCS}) +target_link_libraries(${PassName} ${Boost_LIBRARIES}) +# target_link_libraries(${PassName} nng::nng) +# add_llvm_library(${PassName}_shared SHARED ${SRCS}) +# set_target_properties(${PassName}_shared PROPERTIES OUTPUT_NAME ${PassName}) +# set_property(TARGET ${PassName} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +#add_llvm_library(${PassName} SHARED ${SRCS}) # This is to generate libxxx.so diff --git a/liberty/lib/SLAMP/SLAMPcustom/CustomSend.cpp b/liberty/lib/SLAMP/SLAMPcustom/CustomSend.cpp new file mode 100644 index 00000000..40ea862a --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/CustomSend.cpp @@ -0,0 +1,812 @@ +#include "slamp_hooks.h" +#include +#include +#include +#include +#include +#include +#include +#include "malloc.h" + +#include +#include +#include "sw_queue_astream.h" + +#include +#include + +#define TRACK_CONTEXT + +#define MM_STREAM +//#define SAMPLING_ITER + +namespace bip = boost::interprocess; + +static constexpr uint64_t QSIZE_GUARD = QSIZE - 60; + +// static unsigned long counter_load = 0; +// static unsigned long counter_store = 0; +// static unsigned long counter_ctx = 0; +// static unsigned long counter_alloc = 0; +// static unsigned long counter_invoc = 0; +// static unsigned long counter_iter = 0; +// char local_buffer[LOCAL_BUFFER_SIZE]; +// unsigned buffer_counter = 0; +static int nested_level = 0; +static bool on_profiling = false; + +static unsigned int context = 0; + +static void *(*old_malloc_hook)(size_t, const void *); +static void *(*old_realloc_hook)(void *, size_t, const void *); +static void (*old_free_hook)(void *, const void *); +static void *(*old_memalign_hook)(size_t, size_t, const void *); +// create segment and corresponding allocator +bip::fixed_managed_shared_memory *segment; +bip::fixed_managed_shared_memory *segment2; +static Queue_p dqA, dqB, dq, dq_other; +static uint64_t dq_index = 0; +static uint32_t *dq_data; +// static uint64_t total_pushed = 0; +static uint64_t total_swapped = 0; +static void swap(){ + if(dq == dqA){ + dq = dqB; + dq_other = dqA; + }else{ + dq = dqA; + dq_other = dqB; + } + dq_data = dq->data; +} + +static void produce_wait() ATTRIBUTE(noinline){ + dq->size = dq_index; + dq->ready_to_read = true; + dq->ready_to_write = false; + while (!dq_other->ready_to_write){ + // spin + usleep(10); + } + swap(); + dq->ready_to_read = false; + dq_index = 0; + total_swapped++; +} + +// the packet is always 128bit, pad with 0 +static void produce_32(uint32_t x) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, 0, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); +#else + dq_data[dq_index] = x; + // dq_data[dq_index + 1] = 0; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32(uint32_t x, uint32_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); +#else + dq_data[dq_index] = x; + dq_data[dq_index + 1] = y; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_64_64(const uint64_t x, const uint64_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi64x(y, x)); + // _mm_stream_si64((long long *) &dq_data[dq_index], x); + // _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); +#else + *((uint64_t *) &dq_data[dq_index]) = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +// static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) { +static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + // FIXME: set 32bit x, 32bit y, 64bit z, small endian + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], z); + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32( z >> 32, z & 0xFFFFFFFF, y, x)); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + *(uint64_t*)&dq_data[dq_index+2] = z; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32_32(uint32_t x, uint32_t y, uint32_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, z, y, x)); + + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_si32((int *) &dq_data[dq_index+1], y); + // _mm_stream_si32((int *) &dq_data[dq_index+2], z); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + dq_data[dq_index+2] = z; + dq_data[dq_index+3] = 0; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + + +// static void produce_32_64_64(uint32_t x, uint64_t y, uint64_t z) ATTRIBUTE(noinline) { +// #ifdef MM_STREAM + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_pi((__m64*)&dq_data[dq_index+1], (__m64)y); + // _mm_stream_pi((__m64*)&dq_data[dq_index+3], (__m64)z); +// #else + // dq_data[dq_index] = x; + // *(uint64_t*)&dq_data[dq_index+1] = y; + // *(uint64_t*)&dq_data[dq_index+3] = z; +// #endif + // dq_index += 5; + // if (dq_index >= QSIZE_GUARD) [[unlikely]] { + // produce_wait(); + // } +// } + + +// #define CONSUME sq_consume(the_queue); +// #define PRODUCE(x) sq_produce(the_queue,(uint64_t)x); +#define PRODUCE(x) produce_32((uint32_t)x); + +#define COMBINE_2_32(x,y) ((((uint64_t)y)<<32) | (uint32_t)(x)) +#define CONSUME_2(x,y) do { uint64_t tmp = CONSUME; x = (uint32_t)(tmp>>32); y = (uint32_t) tmp; } while(0) + +#define TURN_ON_HOOKS \ + __malloc_hook = SLAMP_malloc_hook;\ + __realloc_hook = SLAMP_realloc_hook;\ + __memalign_hook = SLAMP_memalign_hook; + + +#define TURN_OFF_HOOKS \ + __malloc_hook = old_malloc_hook; \ + __realloc_hook = old_realloc_hook; \ + __memalign_hook = old_memalign_hook; + +// Ringbuffer fully constructed in shared memory. The element strings are +// also allocated from the same shared memory segment. This vector can be +// safely accessed from other processes. + +enum DepModAction: char +{ + INIT = 0, + LOAD, + STORE, + ALLOC, + LOOP_INVOC, + LOOP_ITER, + LOOP_EXIT, + FINISHED, + FUNC_ENTRY, + FUNC_EXIT, +}; + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id) { + uint32_t pid = getpid(); + + // get QUEUE ID from env + char *env = getenv("SLAMP_QUEUE_ID"); + if (env == NULL) { + std::cerr << "SLAMP_QUEUE_ID not set" << std::endl; + exit(-1); + } + auto queue_name = std::string("slamp_queue_") + env; + segment = new bip::fixed_managed_shared_memory(bip::open_or_create, queue_name.c_str(), sizeof(uint32_t) *QSIZE *4, (void*)(1UL << 32)); + // segment2 = new bip::fixed_managed_shared_memory(bip::open_or_create, "MySharedMemory2", sizeof(uint64_t) *QSIZE *2, (void*)(1UL << 28)); + + dqA = segment->find("DQ_A").first; + dqB = segment->find("DQ_B").first; + dq = dqA; + dq_other = dqB; + dq_index = 0; + dq_data = dq->data; + + // managed_shared_memory(bip::open_or_create, "MySharedMemory", sizeof(uint64_t) *QSIZE *2); + // auto a_queue = new atomic_queue::AtomicQueueB(65536); + + // the_queue = static_cast(segment->find_or_construct("MyQueue")()); + // auto data = static_cast(segment2->find_or_construct("smtx_queue_data")[QSIZE]()); + // if (the_queue == nullptr) { + // std::cout << "Error: could not create queue" << std::endl; + // exit(-1); + // } + // the_queue->data = data; + // if (the_queue->data == nullptr) { + // std::cout << "Error: could not create queue data" << std::endl; + // exit(-1); + // } + + // [> Initialize the queue data structure <] + // the_queue->p_data = (uint64_t) the_queue->data; + // the_queue->c_inx = 0; + // the_queue->c_margin = 0; + // the_queue->p_glb_inx = 0; + // the_queue->c_glb_inx = 0; + // the_queue->ptr_c_glb_inx = &(the_queue->c_glb_inx); + // the_queue->ptr_p_glb_inx = &(the_queue->p_glb_inx); + // // local_buffer = new LocalBuffer(queue); + + // // send a msg with "fn_id, loop_id" + // // char msg[100]; + // // sprintf(msg, "%d,%d", fn_id, loop_id); + // // queue->push(shm::shared_string(msg, *char_alloc)); + + // local_buffer->push(INIT); + // local_buffer->push(loop_id); + printf("SLAMP_init: %d, %d, %d\n", fn_id, loop_id, pid); + // local_buffer->push(pid); + produce_32_32_32(INIT, loop_id, pid); + + auto allocateLibcReqs = [](void *addr, size_t size) { + produce_32_32_64(ALLOC, size, (uint64_t)addr); + }; + + allocateLibcReqs((void*)&errno, sizeof(errno)); + allocateLibcReqs((void*)&stdin, sizeof(stdin)); + allocateLibcReqs((void*)&stdout, sizeof(stdout)); + allocateLibcReqs((void*)&stderr, sizeof(stderr)); + allocateLibcReqs((void*)&sys_nerr, sizeof(sys_nerr)); + + // const unsigned short int* ctype_ptr = (*__ctype_b_loc()) - 128; + // allocateLibcReqs((void*)ctype_ptr, 384 * sizeof(*ctype_ptr)); + // const int32_t* itype_ptr = (*__ctype_tolower_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + // itype_ptr = (*__ctype_toupper_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + + // // FIXME: a dirty way to get xalancbmk to work + // auto locale = localeconv(); + // auto decimal = locale->decimal_point; + // allocateLibcReqs((void*)locale, sizeof(*locale)); + // allocateLibcReqs((void*)decimal, sizeof(*decimal)); + + old_malloc_hook = __malloc_hook; + // old_free_hook = __free_hook; + old_memalign_hook = __memalign_hook; + old_realloc_hook = __realloc_hook; + __malloc_hook = SLAMP_malloc_hook; + __free_hook = nullptr; + __realloc_hook = SLAMP_realloc_hook; + // __free_hook = SLAMP_free_hook; + __memalign_hook = SLAMP_memalign_hook; + + // flush + // produce_wait(); +} + +void SLAMP_fini(const char* filename){ + // send a msg with "fini" + // queue->push(shm::shared_string("fini", *char_alloc)); + // std::cout << counter_load << " " << counter_store << " " << counter_ctx << std::endl; + // local_buffer->push(FINISHED); + // local_buffer->flush(); + PRODUCE(FINISHED); + // sq_flushQueue(the_queue); + dq->size = dq_index; + dq->ready_to_read = true; + + if (nested_level != 0) { + std::cerr << "Error: nested_level != 0 on exit" << std::endl; + exit(-1); + } +} + +void SLAMP_allocated(uint64_t addr){} +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size){ + // local_buffer->push(ALLOC)->push(addr)->push(size); + + produce_32_32_64(ALLOC, size, addr); +} +void SLAMP_main_entry(uint32_t argc, char** argv, char** env){} + +void SLAMP_enter_fcn(uint32_t id){} +void SLAMP_exit_fcn(uint32_t id){} +void SLAMP_enter_loop(uint32_t id){} +void SLAMP_exit_loop(uint32_t id){} +void SLAMP_loop_iter_ctx(uint32_t id){} + +void SLAMP_loop_invocation(){ + // send a msg with "loop_invocation" + // local_buffer->push(LOOP_INVOC); + PRODUCE(LOOP_INVOC); + + // counter_ctx++; + + nested_level++; + on_profiling = true; + + // if (counter_invoc % 1 == 0) { + // on_profiling= true; + // } + // counter_invoc++; +} + +void SLAMP_loop_iteration(){ + // local_buffer->push(LOOP_ITER); + PRODUCE(LOOP_ITER); + // counter_ctx++; + +#ifdef SAMPLING_ITER + if (counter_iter % 100 == 0) { + on_profiling = true; + } + if (counter_iter % 100 == 10) { // 0-10000 out of 100000, sampling 10% + on_profiling = false; + } + counter_iter++; +#endif +} + +void SLAMP_loop_exit(){ + nested_level--; + PRODUCE(LOOP_EXIT); + if (nested_level < 0) { + // huge problem + std::cerr << "Error: nested_level < 0" << std::endl; + exit(-1); + } + if (nested_level == 0) { + on_profiling = false; + } +} + +void SLAMP_report_base_pointer_arg(uint32_t, uint32_t, void *ptr){} +void SLAMP_report_base_pointer_inst(uint32_t, void *ptr){} +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t){} +void SLAMP_callback_stack_free(){} + +void SLAMP_ext_push(const uint32_t instr){} +void SLAMP_ext_pop(){} + +void SLAMP_push(const uint32_t instr){ +#ifdef TRACK_CONTEXT + if (nested_level == 1) { + context = instr; + produce_32_32(FUNC_ENTRY, instr); + } +#endif +} +void SLAMP_pop(){ +#ifdef TRACK_CONTEXT + if (nested_level == 1) { + produce_32_32(FUNC_EXIT, context); + context = 0; + } +#endif +} + +void SLAMP_load(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline) { + // send a msg with "load, instr, addr, bare_instr, value" + // char msg[100]; + // sprintf(msg, "load,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // queue->push(shm::shared_string(msg, *char_alloc)); + // + if (on_profiling) { + produce_32_32_64(LOAD, instr, addr); + // counter_load++; + } +} + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_load(instr, addr, bare_instr, 0); +} + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load1(bare_instr, addr, bare_instr, value); +} +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load2(bare_instr, addr, bare_instr, value); +} +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load4(bare_instr, addr, bare_instr, value); +} +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load8(bare_instr, addr, bare_instr, value); +} +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_loadn(bare_instr, addr, bare_instr, n); +} + +void SLAMP_store(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr) ATTRIBUTE(always_inline) { + // send a msg with "store, instr, addr, bare_instr, value" + // char msg[100]; + // sprintf(msg, "store,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // queue->push(shm::shared_string(msg, *char_alloc)); + if (on_profiling) { + // produce_3(STORE, COMBINE_2_32(instr, bare_instr), addr); + // produce_64_64(COMBINE_2_32(STORE, instr), addr); + produce_32_32_64(STORE, instr, addr); + // counter_store++; + } +} + +void SLAMP_store1(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store2(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store4(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store8(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store1(bare_inst, addr); +} +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store2(bare_inst, addr); +} +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store4(bare_inst, addr); +} +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store8(bare_inst, addr); +} +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n){ + SLAMP_storen(bare_inst, addr, n); +} + +/* wrappers */ +static void* SLAMP_malloc_hook(size_t size, const void *caller){ + TURN_OFF_HOOKS + void* ptr = malloc(size); + // local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // + produce_32_32_64(ALLOC, size, (uint64_t)ptr); + // printf("malloc %lu at %p\n", size, ptr); + // counter_alloc++; + TURN_ON_HOOKS + return ptr; +} + +static void* SLAMP_realloc_hook(void* ptr, size_t size, const void *caller){ + TURN_OFF_HOOKS + void* new_ptr = realloc(ptr, size); + // local_buffer->push(REALLOC)->push((uint64_t)ptr)->push((uint64_t)new_ptr)->push(size); + // produce_3(ALLOC, (uint64_t)new_ptr, size); + produce_32_32_64(ALLOC, size, (uint64_t)new_ptr); + // printf("realloc %p to %lu at %p", ptr, size, new_ptr); + // counter_alloc++; + TURN_ON_HOOKS + return new_ptr; +} + +static void SLAMP_free_hook(void *ptr, const void *caller){ + old_free_hook(ptr, caller); +} +static void* SLAMP_memalign_hook(size_t alignment, size_t size, const void *caller){ + TURN_OFF_HOOKS + void* ptr = memalign(alignment, size); + // local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // produce_3(ALLOC, (uint64_t)ptr, size); + produce_32_32_64(ALLOC, size, (uint64_t)ptr); + + // printf("memalign %lu at %p\n", size, ptr); + // counter_alloc++; + TURN_ON_HOOKS + return ptr; +} +void* SLAMP_malloc(size_t size, uint32_t instr, size_t alignment){ + // void* ptr = malloc(size); + // // send a msg with "malloc, instr, ptr, size, alignment" + // // char msg[100]; + // // sprintf(msg, "malloc,%d,%lu,%lu,%lu", instr, ptr, size, alignment); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // queue->push(ALLOC); + // queue->push(reinterpret_cast(ptr)); + // queue->push(size); + // counter_alloc++; + // return ptr; +} + +void* SLAMP_calloc(size_t nelem, size_t elsize){} +void* SLAMP_realloc(void* ptr, size_t size){} +void* SLAMP__Znam(size_t size){} +void* SLAMP__Znwm(size_t size){} + + + +char* SLAMP_strdup(const char *s1){} +char* SLAMP___strdup(const char *s1){} +void SLAMP_free(void* ptr){} +void SLAMP_cfree(void* ptr){} +void SLAMP__ZdlPv(void* ptr){} +void SLAMP__ZdaPv(void* ptr){} +int SLAMP_brk(void *end_data_segment){} +void* SLAMP_sbrk(intptr_t increment){} + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len){} +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len){} + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr){} +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr){} + +/* String functions */ +size_t SLAMP_strlen(const char *str){} +char* SLAMP_strchr(char *s, int c){} +char* SLAMP_strrchr(char *s, int c){} +int SLAMP_strcmp(const char *s1, const char *s2){} +int SLAMP_strncmp(const char *s1, const char *s2, size_t n){} +char* SLAMP_strcpy(char *dest, const char *src){} +char* SLAMP_strncpy(char *dest, const char *src, size_t n){} +char* SLAMP_strcat(char *s1, const char *s2){} +char* SLAMP_strncat(char *s1, const char *s2, size_t n){} +char* SLAMP_strstr(char *s1, char *s2){} +size_t SLAMP_strspn(const char *s1, const char *s2){} +size_t SLAMP_strcspn(const char *s1, const char *s2){} +char* SLAMP_strtok(char *s, const char *delim){} +double SLAMP_strtod(const char *nptr, char **endptr){} +long int SLAMP_strtol(const char *nptr, char **endptr, int base){} +char* SLAMP_strpbrk(char *s1, char *s2){} + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n){} +void *SLAMP_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP_memmove (void *dest, const void *src, size_t n){} +int SLAMP_memcmp(const void *s1, const void *s2, size_t n){} +void* SLAMP_memchr(void* ptr, int value, size_t num){} +void* SLAMP___rawmemchr(void* ptr, int value){} + +void SLAMP_bzero(void *s, size_t n){} +void SLAMP_bcopy(const void *s1, void *s2, size_t n){} + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count){} +int SLAMP_open(const char *pathname, int flags, mode_t mode){} +int SLAMP_close(int fd){} +ssize_t SLAMP_write(int fd, const void *buf, size_t count){} +off_t SLAMP_lseek(int fildes, off_t offset, int whence){} + +FILE * SLAMP_fopen(const char *path, const char *mode){} +FILE * SLAMP_fopen64(const char *path, const char *mode){} +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream){} +int SLAMP_fflush(FILE *stream){} +int SLAMP_fclose(FILE *stream){} +int SLAMP_ferror(FILE *stream){} +int SLAMP_feof(FILE *stream){} +long SLAMP_ftell(FILE *stream){} +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream){} +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream){} +int SLAMP_fseek(FILE *stream, long offset, int whence){} +void SLAMP_rewind(FILE *stream){} + +int SLAMP_fgetc(FILE *stream){} +int SLAMP_fputc(int c, FILE *stream){} +char * SLAMP_fgets(char *s, int n, FILE *stream){} +int SLAMP_fputs(const char *s, FILE *stream){} + +int SLAMP_ungetc(int c, FILE *stream){} +int SLAMP_putchar(int c){} +int SLAMP_getchar(void){} + +int SLAMP_fileno(FILE *stream){} +char * SLAMP_gets(char *s){} +int SLAMP_puts(const char *s){} + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout){} +int SLAMP_remove(const char *path){} + +void SLAMP_setbuf(FILE * stream, char * buf){} +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size){} +char * SLAMP_tmpnam(char *s){} +FILE* SLAMP_tmpfile(void){} +char * SLAMP_ttyname(int fildes){} + +FILE * SLAMP_fdopen(int fildes, const char *mode){} +void SLAMP_clearerr(FILE *stream){} + +int SLAMP_truncate(const char *path, off_t length){} +int SLAMP_ftruncate(int fildes, off_t length){} + +int SLAMP_dup(int oldfd){} +int SLAMP_dup2(int oldfd, int newfd){} +int SLAMP_pipe(int filedes[2]){} + +int SLAMP_chmod(const char *path, mode_t mode){} +int SLAMP_fchmod(int fildes, mode_t mode){} +int SLAMP_fchown(int fd, uid_t owner, gid_t group){} +int SLAMP_access(const char *pathname, int mode){} +long SLAMP_pathconf(char *path, int name){} +int SLAMP_mkdir(const char *pathname, mode_t mode){} +int SLAMP_rmdir(const char *pathname){} +mode_t SLAMP_umask(mode_t mask){} +int SLAMP_fcntl(int fd, int cmd, struct flock *lock){} + +DIR* SLAMP_opendir(const char* name){} +struct dirent* SLAMP_readdir(DIR *dirp){} +struct dirent64* SLAMP_readdir64(DIR *dirp){} +int SLAMP_closedir(DIR* dirp){} + +/* Printf */ +int SLAMP_printf(const char *format, ...){} +int SLAMP_fprintf(FILE *stream, const char *format, ...){} +int SLAMP_sprintf(char *str, const char *format, ...){} +int SLAMP_snprintf(char *str, size_t size, const char *format, ...){} + +int SLAMP_vprintf(const char *format, va_list ap){} +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vsprintf(char *str, const char *format, va_list ap){} +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap){} + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ){} +int SLAMP_scanf(const char *format, ... ){} +int SLAMP_sscanf(const char *s, const char *format, ... ){} +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ){} + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vscanf(const char *format, va_list ap){} +int SLAMP_vsscanf(const char *s, const char *format, va_list ap){} + +/* Time */ +time_t SLAMP_time(time_t *t){} +struct tm *SLAMP_localtime(const time_t *timer){} +struct lconv* SLAMP_localeconv(){} +struct tm *SLAMP_gmtime(const time_t *timer){} +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz){} + +/* Math */ +double SLAMP_ldexp(double x, int exp){} +float SLAMP_ldexpf(float x, int exp){} +long double SLAMP_ldexpl(long double x, int exp){} +double SLAMP_log10(double x){} +float SLAMP_log10f(float x){} +long double SLAMP_log10l(long double x){} +double SLAMP_log(double x){} +float SLAMP_logf(float x){} +long double SLAMP_logl(long double x){} + +double SLAMP_exp(double x){} +float SLAMP_expf(float x){} +long double SLAMP_expl(long double x){} + +double SLAMP_cos(double x){} +float SLAMP_cosf(float x){} +long double SLAMP_cosl(long double x){} +double SLAMP_sin(double x){} +double SLAMP_tan(double x){} +float SLAMP_sinf(float x){} +long double SLAMP_sinl(long double x){} + +double SLAMP_atan(double x){} +float SLAMP_atanf(float x){} +long double SLAMP_atanl(long double x){} + +double SLAMP_floor(double x){} +float SLAMP_floorf(float x){} +long double SLAMP_floorl(long double x){} +double SLAMP_ceil(double x){} +float SLAMP_ceilf(float x){} +long double SLAMP_ceill(long double x){} + +double SLAMP_atan2(double y, double x){} +float SLAMP_atan2f(float y, float x){} +long double SLAMP_atan2l(long double y, long double x){} + +double SLAMP_sqrt(double x){} +float SLAMP_sqrtf(float x){} +long double SLAMP_sqrtl(long double x){} + +double SLAMP_pow(double x, double y){} +float SLAMP_powf(float x, float y){} +long double SLAMP_powl(long double x, long double y){} + +double SLAMP_fabs(double x){} +float SLAMP_fabsf(float x){} +long double SLAMP_fabsl(long double x){} + +double SLAMP_modf(double x, double *iptr){} +float SLAMP_modff(float x, float *iptr){} +long double SLAMP_modfl(long double x, long double *iptr){} + +double SLAMP_fmod(double x, double y){} + +double SLAMP_frexp(double num, int *exp){} +float SLAMP_frexpf(float num, int *exp){} +long double SLAMP_frexpl(long double num, int *exp){} + +int SLAMP_isnan(){} + +/* MISC */ +char *SLAMP_getenv(const char *name){} +int SLAMP_putenv(char* string){} +char *SLAMP_getcwd(char *buf, size_t size){} +char* SLAMP_strerror(int errnum){} +void SLAMP_exit(int status){} +void SLAMP__exit(int status){} +int SLAMP_link(const char *oldpath, const char *newpath){} +int SLAMP_unlink(const char *pathname){} +int SLAMP_isatty(int desc){} +int SLAMP_setuid(uid_t uid){} +uid_t SLAMP_getuid(void){} +uid_t SLAMP_geteuid(void){} +int SLAMP_setgid(gid_t gid){} +gid_t SLAMP_getgid(void){} +gid_t SLAMP_getegid(void){} +pid_t SLAMP_getpid(void){} +int SLAMP_chdir(const char *path){} +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */){} +int SLAMP_execv(const char *path, char *const argv[]){} +int SLAMP_execvp(const char *file, char *const argv[]){} +int SLAMP_kill(pid_t pid, int sig){} +pid_t SLAMP_fork(void){} +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler){} +pid_t SLAMP_waitpid(pid_t pid, int* status, int options){} +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)){} +int SLAMP_ioctl(int d, int request, ...){} +unsigned int SLAMP_sleep(unsigned int seconds){} +char* SLAMP_gcvt(double number, size_t ndigit, char* buf){} +char* SLAMP_nl_langinfo(nl_item item){} + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function){} +const unsigned short int **SLAMP___ctype_b_loc(void){} +int SLAMP__IO_getc(_IO_FILE * __fp){} +int SLAMP__IO_putc(int __c, _IO_FILE *__fp){} + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf){} +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf){} diff --git a/liberty/lib/SLAMP/SLAMPcustom/CustomSend_all.cpp b/liberty/lib/SLAMP/SLAMPcustom/CustomSend_all.cpp new file mode 100644 index 00000000..82494607 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/CustomSend_all.cpp @@ -0,0 +1,894 @@ +#include "slamp_hooks.h" +#include +#include +#include +#include +#include +#include +#include +#include "malloc.h" + +#include +#include +#include "sw_queue_astream.h" + +#include +#include + +#define MM_STREAM +//#define SAMPLING_ITER + +namespace bip = boost::interprocess; + +static constexpr uint64_t QSIZE_GUARD = QSIZE - 60; + +// static unsigned long counter_load = 0; +// static unsigned long counter_store = 0; +// static unsigned long counter_ctx = 0; +// static unsigned long counter_alloc = 0; +// static unsigned long counter_invoc = 0; +// static unsigned long counter_iter = 0; +// char local_buffer[LOCAL_BUFFER_SIZE]; +// unsigned buffer_counter = 0; +static int nested_level = 0; +static bool on_profiling = false; + +static uint32_t ext_fn_inst_id = 0; + +static void *(*old_malloc_hook)(size_t, const void *); +static void *(*old_realloc_hook)(void *, size_t, const void *); +static void (*old_free_hook)(void *, const void *); +static void *(*old_memalign_hook)(size_t, size_t, const void *); +// create segment and corresponding allocator +bip::fixed_managed_shared_memory *segment; +bip::fixed_managed_shared_memory *segment2; +static Queue_p dqA, dqB, dq, dq_other; +static uint64_t dq_index = 0; +static uint32_t *dq_data; +// static uint64_t total_pushed = 0; +static uint64_t total_swapped = 0; +static void swap(){ + if(dq == dqA){ + dq = dqB; + dq_other = dqA; + }else{ + dq = dqA; + dq_other = dqB; + } + dq_data = dq->data; +} + +static void produce_wait() ATTRIBUTE(noinline){ + dq->size = dq_index; + dq->ready_to_read = true; + dq->ready_to_write = false; + while (!dq_other->ready_to_write){ + // spin + usleep(10); + } + swap(); + dq->ready_to_read = false; + dq_index = 0; + total_swapped++; +} + +// the packet is always 128bit, pad with 0 +static void produce_32(uint32_t x) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, 0, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); +#else + dq_data[dq_index] = x; + // dq_data[dq_index + 1] = 0; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32(uint32_t x, uint32_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); +#else + dq_data[dq_index] = x; + dq_data[dq_index + 1] = y; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_64(uint32_t x, uint64_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); +#else + dq_data[dq_index] = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32_64_64(uint32_t w, uint32_t x, uint64_t y, uint64_t z) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], w); + _mm_stream_si32((int *) &dq_data[dq_index+1], x); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); + _mm_stream_si64((long long *) &dq_data[dq_index + 6], z); +#else + dq_data[dq_index] = w; + dq_data[dq_index+1] = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; + dq_data[dq_index + 6] = z; +#endif + dq_index += 8; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_8_24_32_64(uint8_t x, uint32_t y, uint32_t z, uint64_t w) ATTRIBUTE(noinline){ + uint32_t xy = (y << 8) | x; +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(z, w, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], xy); + _mm_stream_si32((int *) &dq_data[dq_index + 1], z); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], w); +#else + dq_data[dq_index] = xy; + dq_data[dq_index + 1] = z; + *((uint64_t *) &dq_data[dq_index + 2]) = w; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + + +static void produce_64_64(const uint64_t x, const uint64_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi64x(y, x)); + // _mm_stream_si64((long long *) &dq_data[dq_index], x); + // _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); +#else + *((uint64_t *) &dq_data[dq_index]) = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +// static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) { +static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], z); + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32( z >> 32, z & 0xFFFFFFFF, y, x)); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + *(uint64_t*)&dq_data[dq_index+2] = z; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32_32(uint32_t x, uint32_t y, uint32_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, z, y, x)); + + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_si32((int *) &dq_data[dq_index+1], y); + // _mm_stream_si32((int *) &dq_data[dq_index+2], z); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + dq_data[dq_index+2] = z; + dq_data[dq_index+3] = 0; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + + +// static void produce_32_64_64(uint32_t x, uint64_t y, uint64_t z) ATTRIBUTE(noinline) { +// #ifdef MM_STREAM + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_pi((__m64*)&dq_data[dq_index+1], (__m64)y); + // _mm_stream_pi((__m64*)&dq_data[dq_index+3], (__m64)z); +// #else + // dq_data[dq_index] = x; + // *(uint64_t*)&dq_data[dq_index+1] = y; + // *(uint64_t*)&dq_data[dq_index+3] = z; +// #endif + // dq_index += 5; + // if (dq_index >= QSIZE_GUARD) [[unlikely]] { + // produce_wait(); + // } +// } + + +// #define CONSUME sq_consume(the_queue); +// #define PRODUCE(x) sq_produce(the_queue,(uint64_t)x); +#define PRODUCE(x) produce_32((uint32_t)x); + +#define COMBINE_2_32(x,y) ((((uint64_t)y)<<32) | (uint32_t)(x)) +#define CONSUME_2(x,y) do { uint64_t tmp = CONSUME; x = (uint32_t)(tmp>>32); y = (uint32_t) tmp; } while(0) + +#define TURN_ON_HOOKS \ + __malloc_hook = SLAMP_malloc_hook;\ + __realloc_hook = SLAMP_realloc_hook;\ + __memalign_hook = SLAMP_memalign_hook; \ + __free_hook = SLAMP_free_hook; + + + +#define TURN_OFF_HOOKS \ + __malloc_hook = old_malloc_hook; \ + __realloc_hook = old_realloc_hook; \ + __memalign_hook = old_memalign_hook; \ + __free_hook = old_free_hook; + +// Ringbuffer fully constructed in shared memory. The element strings are +// also allocated from the same shared memory segment. This vector can be +// safely accessed from other processes. + +// enum DepModAction: char +// { +// INIT = 0, +// LOAD, +// STORE, +// ALLOC, +// LOOP_INVOC, +// LOOP_ITER, +// FINISHED +// }; +enum UnifiedAction : char { + INIT = 0, + LOAD, + STORE, + ALLOC, + FREE, + LOOP_INVOC, + LOOP_ITER, + LOOP_ENTRY, + LOOP_EXIT, + LOOP_ITER_CTX, + FUNC_ENTRY, + FUNC_EXIT, + POINTS_TO_INST, + POINTS_TO_ARG, + FINISHED +}; + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id) { + + // get QUEUE ID from env + char *env = getenv("SLAMP_QUEUE_ID"); + if (env == NULL) { + std::cerr << "SLAMP_QUEUE_ID not set" << std::endl; + exit(-1); + } + auto queue_name = std::string("slamp_queue_") + env; + segment = new bip::fixed_managed_shared_memory(bip::open_or_create, queue_name.c_str(), sizeof(uint32_t) *QSIZE *4, (void*)(1UL << 32)); + // segment2 = new bip::fixed_managed_shared_memory(bip::open_or_create, "MySharedMemory2", sizeof(uint64_t) *QSIZE *2, (void*)(1UL << 28)); + + dqA = segment->find("DQ_A").first; + dqB = segment->find("DQ_B").first; + dq = dqA; + dq_other = dqB; + dq_index = 0; + dq_data = dq->data; + + // managed_shared_memory(bip::open_or_create, "MySharedMemory", sizeof(uint64_t) *QSIZE *2); + // auto a_queue = new atomic_queue::AtomicQueueB(65536); + + // the_queue = static_cast(segment->find_or_construct("MyQueue")()); + // auto data = static_cast(segment2->find_or_construct("smtx_queue_data")[QSIZE]()); + // if (the_queue == nullptr) { + // std::cout << "Error: could not create queue" << std::endl; + // exit(-1); + // } + // the_queue->data = data; + // if (the_queue->data == nullptr) { + // std::cout << "Error: could not create queue data" << std::endl; + // exit(-1); + // } + + // [> Initialize the queue data structure <] + // the_queue->p_data = (uint64_t) the_queue->data; + // the_queue->c_inx = 0; + // the_queue->c_margin = 0; + // the_queue->p_glb_inx = 0; + // the_queue->c_glb_inx = 0; + // the_queue->ptr_c_glb_inx = &(the_queue->c_glb_inx); + // the_queue->ptr_p_glb_inx = &(the_queue->p_glb_inx); + // // local_buffer = new LocalBuffer(queue); + + // // send a msg with "fn_id, loop_id" + // // char msg[100]; + // // sprintf(msg, "%d,%d", fn_id, loop_id); + // // queue->push(shm::shared_string(msg, *char_alloc)); + + // local_buffer->push(INIT); + // local_buffer->push(loop_id); + uint32_t pid = getpid(); + printf("SLAMP_init: %d, %d, %d\n", fn_id, loop_id, pid); + // local_buffer->push(pid); + produce_32_32_32(INIT, loop_id, pid); + + auto allocateLibcReqs = [](void *addr, size_t size) { + produce_8_24_32_64(ALLOC, 0, size, (uint64_t)addr); + // produce_32_32_64(ALLOC, size, (uint64_t)addr); + }; + + allocateLibcReqs((void*)&errno, sizeof(errno)); + allocateLibcReqs((void*)&stdin, sizeof(stdin)); + allocateLibcReqs((void*)&stdout, sizeof(stdout)); + allocateLibcReqs((void*)&stderr, sizeof(stderr)); + allocateLibcReqs((void*)&sys_nerr, sizeof(sys_nerr)); + + // const unsigned short int* ctype_ptr = (*__ctype_b_loc()) - 128; + // allocateLibcReqs((void*)ctype_ptr, 384 * sizeof(*ctype_ptr)); + // const int32_t* itype_ptr = (*__ctype_tolower_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + // itype_ptr = (*__ctype_toupper_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + + + old_malloc_hook = __malloc_hook; + // old_free_hook = __free_hook; + old_memalign_hook = __memalign_hook; + old_realloc_hook = __realloc_hook; + __malloc_hook = SLAMP_malloc_hook; + __free_hook = nullptr; + __realloc_hook = SLAMP_realloc_hook; + __free_hook = SLAMP_free_hook; + __memalign_hook = SLAMP_memalign_hook; + // flush + produce_wait(); +} + +void SLAMP_fini(const char* filename){ + // send a msg with "fini" + // queue->push(shm::shared_string("fini", *char_alloc)); + // std::cout << counter_load << " " << counter_store << " " << counter_ctx << std::endl; + // local_buffer->push(FINISHED); + // local_buffer->flush(); + PRODUCE(FINISHED); + // sq_flushQueue(the_queue); + dq->size = dq_index; + dq->ready_to_read = true; + + if (nested_level != 0) { + std::cerr << "Error: nested_level != 0 on exit" << std::endl; + exit(-1); + } +} + +void SLAMP_allocated(uint64_t addr){} +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size){ + // local_buffer->push(ALLOC)->push(addr)->push(size); + + produce_32_32_64(ALLOC, size, addr); +} +void SLAMP_main_entry(uint32_t argc, char** argv, char** env){} + +void SLAMP_enter_fcn(uint32_t id){ + produce_32_32(FUNC_ENTRY, id); +} + +void SLAMP_exit_fcn(uint32_t id){ + produce_32_32(FUNC_EXIT, id); +} + +void SLAMP_enter_loop(uint32_t id){ + produce_32_32(LOOP_ENTRY, id); +} + +void SLAMP_exit_loop(uint32_t id){ + produce_32_32(LOOP_EXIT, id); +} + +void SLAMP_loop_iter_ctx(uint32_t id){ + // produce_32_32(LOOP_ITER_CTX, id); +} + +void SLAMP_loop_invocation(){ + // send a msg with "loop_invocation" + // local_buffer->push(LOOP_INVOC); + PRODUCE(LOOP_INVOC); + + // counter_ctx++; + + nested_level++; + on_profiling = true; + + // if (counter_invoc % 1 == 0) { + // on_profiling= true; + // } + // counter_invoc++; +} + +void SLAMP_loop_iteration(){ + // local_buffer->push(LOOP_ITER); + PRODUCE(LOOP_ITER); + // counter_ctx++; + +#ifdef SAMPLING_ITER + if (counter_iter % 100 == 0) { + on_profiling = true; + } + if (counter_iter % 100 == 10) { // 0-10000 out of 100000, sampling 10% + on_profiling = false; + } + counter_iter++; +#endif +} + +void SLAMP_loop_exit(){ + nested_level--; + if (nested_level < 0) { + // huge problem + std::cerr << "Error: nested_level < 0" << std::endl; + exit(-1); + } + if (nested_level == 0) { + on_profiling = false; + } +} + +void SLAMP_report_base_pointer_arg(uint32_t fcnId, uint32_t argId, void *ptr){ + // FIXME: combine fcnid and argid to 32 bit + uint32_t id = (fcnId << 16) | (argId & 0xffff); + + produce_32_32_64(POINTS_TO_ARG, id, (uint64_t)ptr); +} +void SLAMP_report_base_pointer_inst(uint32_t instId, void *ptr){ + produce_32_32_64(POINTS_TO_INST, instId, (uint64_t)ptr); +} + +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t){} +void SLAMP_callback_stack_free(){} + +void SLAMP_ext_push(const uint32_t instr){ + ext_fn_inst_id = instr; +} +void SLAMP_ext_pop(){ + ext_fn_inst_id = 0; +} + +void SLAMP_push(const uint32_t instr){} +void SLAMP_pop(){} + +void SLAMP_load(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline) { + // // send a msg with "load, instr, addr, bare_instr, value" + // // char msg[100]; + // // sprintf(msg, "load,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // // + if (on_profiling) { + produce_32_32_64_64(LOAD, instr, addr, value); + // counter_load++; + } +} + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_load(instr, addr, bare_instr, 0); +} + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load1(bare_instr, addr, bare_instr, value); +} +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load2(bare_instr, addr, bare_instr, value); +} +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load4(bare_instr, addr, bare_instr, value); +} +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load8(bare_instr, addr, bare_instr, value); +} +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_loadn(bare_instr, addr, bare_instr, n); +} + +void SLAMP_store(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr) ATTRIBUTE(always_inline) { + // // send a msg with "store, instr, addr, bare_instr, value" + // // char msg[100]; + // // sprintf(msg, "store,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // // queue->push(shm::shared_string(msg, *char_alloc)); + if (on_profiling) { + // produce_3(STORE, COMBINE_2_32(instr, bare_instr), addr); + // produce_64_64(COMBINE_2_32(STORE, instr), addr); + produce_32_32_64(STORE, instr, addr); + // counter_store++; + } +} + +void SLAMP_store1(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store2(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store4(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store8(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store1(bare_inst, addr); +} +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store2(bare_inst, addr); +} +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store4(bare_inst, addr); +} +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store8(bare_inst, addr); +} +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n){ + SLAMP_storen(bare_inst, addr, n); +} + +/* wrappers */ +static void* SLAMP_malloc_hook(size_t size, const void *caller){ + TURN_OFF_HOOKS + void* ptr = malloc(size); + // local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)ptr); + // printf("malloc %lu at %p\n", size, ptr); + // counter_alloc++; + TURN_ON_HOOKS + return ptr; +} + +static void* SLAMP_realloc_hook(void* ptr, size_t size, const void *caller){ + TURN_OFF_HOOKS + void* new_ptr = realloc(ptr, size); + // local_buffer->push(REALLOC)->push((uint64_t)ptr)->push((uint64_t)new_ptr)->push(size); + // produce_3(ALLOC, (uint64_t)new_ptr, size); + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)new_ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)new_ptr); + // printf("realloc %p to %lu at %p", ptr, size, new_ptr); + // counter_alloc++; + TURN_ON_HOOKS + return new_ptr; +} + +static void SLAMP_free_hook(void *ptr, const void *caller){ + TURN_OFF_HOOKS + free(ptr); + produce_32_64(FREE, (uint64_t)ptr); + TURN_ON_HOOKS +} +static void* SLAMP_memalign_hook(size_t alignment, size_t size, const void *caller){ + TURN_OFF_HOOKS + void* ptr = memalign(alignment, size); + // local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // produce_3(ALLOC, (uint64_t)ptr, size); + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)ptr); + + // printf("memalign %lu at %p\n", size, ptr); + // counter_alloc++; + TURN_ON_HOOKS + return ptr; +} +void* SLAMP_malloc(size_t size, uint32_t instr, size_t alignment){ + // void* ptr = malloc(size); + // // send a msg with "malloc, instr, ptr, size, alignment" + // // char msg[100]; + // // sprintf(msg, "malloc,%d,%lu,%lu,%lu", instr, ptr, size, alignment); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // queue->push(ALLOC); + // queue->push(reinterpret_cast(ptr)); + // queue->push(size); + // counter_alloc++; + // return ptr; +} + +void* SLAMP_calloc(size_t nelem, size_t elsize){} +void* SLAMP_realloc(void* ptr, size_t size){} +void* SLAMP__Znam(size_t size){} +void* SLAMP__Znwm(size_t size){} + + + +char* SLAMP_strdup(const char *s1){} +char* SLAMP___strdup(const char *s1){} +void SLAMP_free(void* ptr){} +void SLAMP_cfree(void* ptr){} +void SLAMP__ZdlPv(void* ptr){} +void SLAMP__ZdaPv(void* ptr){} +int SLAMP_brk(void *end_data_segment){} +void* SLAMP_sbrk(intptr_t increment){} + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len){} +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len){} + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr){} +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr){} + +/* String functions */ +size_t SLAMP_strlen(const char *str){} +char* SLAMP_strchr(char *s, int c){} +char* SLAMP_strrchr(char *s, int c){} +int SLAMP_strcmp(const char *s1, const char *s2){} +int SLAMP_strncmp(const char *s1, const char *s2, size_t n){} +char* SLAMP_strcpy(char *dest, const char *src){} +char* SLAMP_strncpy(char *dest, const char *src, size_t n){} +char* SLAMP_strcat(char *s1, const char *s2){} +char* SLAMP_strncat(char *s1, const char *s2, size_t n){} +char* SLAMP_strstr(char *s1, char *s2){} +size_t SLAMP_strspn(const char *s1, const char *s2){} +size_t SLAMP_strcspn(const char *s1, const char *s2){} +char* SLAMP_strtok(char *s, const char *delim){} +double SLAMP_strtod(const char *nptr, char **endptr){} +long int SLAMP_strtol(const char *nptr, char **endptr, int base){} +char* SLAMP_strpbrk(char *s1, char *s2){} + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n){} +void *SLAMP_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP_memmove (void *dest, const void *src, size_t n){} +int SLAMP_memcmp(const void *s1, const void *s2, size_t n){} +void* SLAMP_memchr(void* ptr, int value, size_t num){} +void* SLAMP___rawmemchr(void* ptr, int value){} + +void SLAMP_bzero(void *s, size_t n){} +void SLAMP_bcopy(const void *s1, void *s2, size_t n){} + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count){} +int SLAMP_open(const char *pathname, int flags, mode_t mode){} +int SLAMP_close(int fd){} +ssize_t SLAMP_write(int fd, const void *buf, size_t count){} +off_t SLAMP_lseek(int fildes, off_t offset, int whence){} + +FILE * SLAMP_fopen(const char *path, const char *mode){} +FILE * SLAMP_fopen64(const char *path, const char *mode){} +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream){} +int SLAMP_fflush(FILE *stream){} +int SLAMP_fclose(FILE *stream){} +int SLAMP_ferror(FILE *stream){} +int SLAMP_feof(FILE *stream){} +long SLAMP_ftell(FILE *stream){} +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream){} +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream){} +int SLAMP_fseek(FILE *stream, long offset, int whence){} +void SLAMP_rewind(FILE *stream){} + +int SLAMP_fgetc(FILE *stream){} +int SLAMP_fputc(int c, FILE *stream){} +char * SLAMP_fgets(char *s, int n, FILE *stream){} +int SLAMP_fputs(const char *s, FILE *stream){} + +int SLAMP_ungetc(int c, FILE *stream){} +int SLAMP_putchar(int c){} +int SLAMP_getchar(void){} + +int SLAMP_fileno(FILE *stream){} +char * SLAMP_gets(char *s){} +int SLAMP_puts(const char *s){} + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout){} +int SLAMP_remove(const char *path){} + +void SLAMP_setbuf(FILE * stream, char * buf){} +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size){} +char * SLAMP_tmpnam(char *s){} +FILE* SLAMP_tmpfile(void){} +char * SLAMP_ttyname(int fildes){} + +FILE * SLAMP_fdopen(int fildes, const char *mode){} +void SLAMP_clearerr(FILE *stream){} + +int SLAMP_truncate(const char *path, off_t length){} +int SLAMP_ftruncate(int fildes, off_t length){} + +int SLAMP_dup(int oldfd){} +int SLAMP_dup2(int oldfd, int newfd){} +int SLAMP_pipe(int filedes[2]){} + +int SLAMP_chmod(const char *path, mode_t mode){} +int SLAMP_fchmod(int fildes, mode_t mode){} +int SLAMP_fchown(int fd, uid_t owner, gid_t group){} +int SLAMP_access(const char *pathname, int mode){} +long SLAMP_pathconf(char *path, int name){} +int SLAMP_mkdir(const char *pathname, mode_t mode){} +int SLAMP_rmdir(const char *pathname){} +mode_t SLAMP_umask(mode_t mask){} +int SLAMP_fcntl(int fd, int cmd, struct flock *lock){} + +DIR* SLAMP_opendir(const char* name){} +struct dirent* SLAMP_readdir(DIR *dirp){} +struct dirent64* SLAMP_readdir64(DIR *dirp){} +int SLAMP_closedir(DIR* dirp){} + +/* Printf */ +int SLAMP_printf(const char *format, ...){} +int SLAMP_fprintf(FILE *stream, const char *format, ...){} +int SLAMP_sprintf(char *str, const char *format, ...){} +int SLAMP_snprintf(char *str, size_t size, const char *format, ...){} + +int SLAMP_vprintf(const char *format, va_list ap){} +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vsprintf(char *str, const char *format, va_list ap){} +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap){} + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ){} +int SLAMP_scanf(const char *format, ... ){} +int SLAMP_sscanf(const char *s, const char *format, ... ){} +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ){} + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vscanf(const char *format, va_list ap){} +int SLAMP_vsscanf(const char *s, const char *format, va_list ap){} + +/* Time */ +time_t SLAMP_time(time_t *t){} +struct tm *SLAMP_localtime(const time_t *timer){} +struct lconv* SLAMP_localeconv(){} +struct tm *SLAMP_gmtime(const time_t *timer){} +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz){} + +/* Math */ +double SLAMP_ldexp(double x, int exp){} +float SLAMP_ldexpf(float x, int exp){} +long double SLAMP_ldexpl(long double x, int exp){} +double SLAMP_log10(double x){} +float SLAMP_log10f(float x){} +long double SLAMP_log10l(long double x){} +double SLAMP_log(double x){} +float SLAMP_logf(float x){} +long double SLAMP_logl(long double x){} + +double SLAMP_exp(double x){} +float SLAMP_expf(float x){} +long double SLAMP_expl(long double x){} + +double SLAMP_cos(double x){} +float SLAMP_cosf(float x){} +long double SLAMP_cosl(long double x){} +double SLAMP_sin(double x){} +double SLAMP_tan(double x){} +float SLAMP_sinf(float x){} +long double SLAMP_sinl(long double x){} + +double SLAMP_atan(double x){} +float SLAMP_atanf(float x){} +long double SLAMP_atanl(long double x){} + +double SLAMP_floor(double x){} +float SLAMP_floorf(float x){} +long double SLAMP_floorl(long double x){} +double SLAMP_ceil(double x){} +float SLAMP_ceilf(float x){} +long double SLAMP_ceill(long double x){} + +double SLAMP_atan2(double y, double x){} +float SLAMP_atan2f(float y, float x){} +long double SLAMP_atan2l(long double y, long double x){} + +double SLAMP_sqrt(double x){} +float SLAMP_sqrtf(float x){} +long double SLAMP_sqrtl(long double x){} + +double SLAMP_pow(double x, double y){} +float SLAMP_powf(float x, float y){} +long double SLAMP_powl(long double x, long double y){} + +double SLAMP_fabs(double x){} +float SLAMP_fabsf(float x){} +long double SLAMP_fabsl(long double x){} + +double SLAMP_modf(double x, double *iptr){} +float SLAMP_modff(float x, float *iptr){} +long double SLAMP_modfl(long double x, long double *iptr){} + +double SLAMP_fmod(double x, double y){} + +double SLAMP_frexp(double num, int *exp){} +float SLAMP_frexpf(float num, int *exp){} +long double SLAMP_frexpl(long double num, int *exp){} + +int SLAMP_isnan(){} + +/* MISC */ +char *SLAMP_getenv(const char *name){} +int SLAMP_putenv(char* string){} +char *SLAMP_getcwd(char *buf, size_t size){} +char* SLAMP_strerror(int errnum){} +void SLAMP_exit(int status){} +void SLAMP__exit(int status){} +int SLAMP_link(const char *oldpath, const char *newpath){} +int SLAMP_unlink(const char *pathname){} +int SLAMP_isatty(int desc){} +int SLAMP_setuid(uid_t uid){} +uid_t SLAMP_getuid(void){} +uid_t SLAMP_geteuid(void){} +int SLAMP_setgid(gid_t gid){} +gid_t SLAMP_getgid(void){} +gid_t SLAMP_getegid(void){} +pid_t SLAMP_getpid(void){} +int SLAMP_chdir(const char *path){} +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */){} +int SLAMP_execv(const char *path, char *const argv[]){} +int SLAMP_execvp(const char *file, char *const argv[]){} +int SLAMP_kill(pid_t pid, int sig){} +pid_t SLAMP_fork(void){} +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler){} +pid_t SLAMP_waitpid(pid_t pid, int* status, int options){} +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)){} +int SLAMP_ioctl(int d, int request, ...){} +unsigned int SLAMP_sleep(unsigned int seconds){} +char* SLAMP_gcvt(double number, size_t ndigit, char* buf){} +char* SLAMP_nl_langinfo(nl_item item){} + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function){} +const unsigned short int **SLAMP___ctype_b_loc(void){} +int SLAMP__IO_getc(_IO_FILE * __fp){} +int SLAMP__IO_putc(int __c, _IO_FILE *__fp){} + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf){} +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf){} diff --git a/liberty/lib/SLAMP/SLAMPcustom/CustomSend_lv.cpp b/liberty/lib/SLAMP/SLAMPcustom/CustomSend_lv.cpp new file mode 100644 index 00000000..ead7b414 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/CustomSend_lv.cpp @@ -0,0 +1,675 @@ +#include "slamp_hooks.h" +#include +#include +#include +#include +#include +#include +#include +#include "malloc.h" + +#include +#include +#include "sw_queue_astream.h" + +#include +#include + +#define MM_STREAM +//#define SAMPLING_ITER + +namespace bip = boost::interprocess; + +static constexpr uint64_t QSIZE_GUARD = QSIZE - 60; + +static unsigned long counter_load = 0; +static unsigned long counter_store = 0; +static unsigned long counter_ctx = 0; +static unsigned long counter_alloc = 0; +static unsigned long counter_invoc = 0; +static unsigned long counter_iter = 0; +// char local_buffer[LOCAL_BUFFER_SIZE]; +// unsigned buffer_counter = 0; +static int nested_level = 0; + +static void *(*old_malloc_hook)(size_t, const void *); +static void *(*old_realloc_hook)(void *, size_t, const void *); +static void (*old_free_hook)(void *, const void *); +static void *(*old_memalign_hook)(size_t, size_t, const void *); +// create segment and corresponding allocator +bip::fixed_managed_shared_memory *segment; +bip::fixed_managed_shared_memory *segment2; +static Queue_p dqA, dqB, dq, dq_other; +static uint64_t dq_index = 0; +static uint32_t *dq_data; +// static uint64_t total_pushed = 0; +static uint64_t total_swapped = 0; +static void swap(){ + if(dq == dqA){ + dq = dqB; + dq_other = dqA; + }else{ + dq = dqA; + dq_other = dqB; + } + dq_data = dq->data; +} + +static void produce_wait() ATTRIBUTE(noinline){ + dq->size = dq_index; + dq->ready_to_read = true; + dq->ready_to_write = false; + while (!dq_other->ready_to_write){ + // spin + usleep(10); + } + swap(); + dq->ready_to_read = false; + dq_index = 0; + total_swapped++; +} + +// the packet is always 128bit, pad with 0 +static void produce_32(uint32_t x) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, 0, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); +#else + dq_data[dq_index] = x; + // dq_data[dq_index + 1] = 0; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_64_64(const uint64_t x, const uint64_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi64x(y, x)); + // _mm_stream_si64((long long *) &dq_data[dq_index], x); + // _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); +#else + *((uint64_t *) &dq_data[dq_index]) = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +// static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) { +static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + // FIXME: set 32bit x, 32bit y, 64bit z, small endian + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], z); + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32( z >> 32, z & 0xFFFFFFFF, y, x)); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + *(uint64_t*)&dq_data[dq_index+2] = z; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32_32(uint32_t x, uint32_t y, uint32_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, z, y, x)); + + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_si32((int *) &dq_data[dq_index+1], y); + // _mm_stream_si32((int *) &dq_data[dq_index+2], z); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + dq_data[dq_index+2] = z; + dq_data[dq_index+3] = 0; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + + +// static void produce_32_64_64(uint32_t x, uint64_t y, uint64_t z) ATTRIBUTE(noinline) { +// #ifdef MM_STREAM + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_pi((__m64*)&dq_data[dq_index+1], (__m64)y); + // _mm_stream_pi((__m64*)&dq_data[dq_index+3], (__m64)z); +// #else + // dq_data[dq_index] = x; + // *(uint64_t*)&dq_data[dq_index+1] = y; + // *(uint64_t*)&dq_data[dq_index+3] = z; +// #endif + // dq_index += 5; + // if (dq_index >= QSIZE_GUARD) [[unlikely]] { + // produce_wait(); + // } +// } + + +// #define CONSUME sq_consume(the_queue); +// #define PRODUCE(x) sq_produce(the_queue,(uint64_t)x); +#define PRODUCE(x) produce_32((uint32_t)x); + +#define COMBINE_2_32(x,y) ((((uint64_t)y)<<32) | (uint32_t)(x)) +#define CONSUME_2(x,y) do { uint64_t tmp = CONSUME; x = (uint32_t)(tmp>>32); y = (uint32_t) tmp; } while(0) + +#define TURN_ON_HOOKS \ + __malloc_hook = SLAMP_malloc_hook;\ + __realloc_hook = SLAMP_realloc_hook;\ + __memalign_hook = SLAMP_memalign_hook; + + +#define TURN_OFF_HOOKS \ + __malloc_hook = old_malloc_hook; \ + __realloc_hook = old_realloc_hook; \ + __memalign_hook = old_memalign_hook; + +// Ringbuffer fully constructed in shared memory. The element strings are +// also allocated from the same shared memory segment. This vector can be +// safely accessed from other processes. + +enum LoadeValueModAction: char +{ + INIT = 0, + LOAD, + FINISHED +}; + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id) { + + // get QUEUE ID from env + char *env = getenv("SLAMP_QUEUE_ID"); + if (env == NULL) { + std::cerr << "SLAMP_QUEUE_ID not set" << std::endl; + exit(-1); + } + auto queue_name = std::string("slamp_queue_") + env; + segment = new bip::fixed_managed_shared_memory(bip::open_or_create, queue_name.c_str(), sizeof(uint32_t) *QSIZE *4, (void*)(1UL << 32)); + // segment2 = new bip::fixed_managed_shared_memory(bip::open_or_create, "MySharedMemory2", sizeof(uint64_t) *QSIZE *2, (void*)(1UL << 28)); + + dqA = segment->find("DQ_A").first; + dqB = segment->find("DQ_B").first; + dq = dqA; + dq_other = dqB; + dq_index = 0; + dq_data = dq->data; + + // managed_shared_memory(bip::open_or_create, "MySharedMemory", sizeof(uint64_t) *QSIZE *2); + // auto a_queue = new atomic_queue::AtomicQueueB(65536); + + // the_queue = static_cast(segment->find_or_construct("MyQueue")()); + // auto data = static_cast(segment2->find_or_construct("smtx_queue_data")[QSIZE]()); + // if (the_queue == nullptr) { + // std::cout << "Error: could not create queue" << std::endl; + // exit(-1); + // } + // the_queue->data = data; + // if (the_queue->data == nullptr) { + // std::cout << "Error: could not create queue data" << std::endl; + // exit(-1); + // } + + // [> Initialize the queue data structure <] + // the_queue->p_data = (uint64_t) the_queue->data; + // the_queue->c_inx = 0; + // the_queue->c_margin = 0; + // the_queue->p_glb_inx = 0; + // the_queue->c_glb_inx = 0; + // the_queue->ptr_c_glb_inx = &(the_queue->c_glb_inx); + // the_queue->ptr_p_glb_inx = &(the_queue->p_glb_inx); + // // local_buffer = new LocalBuffer(queue); + + // // send a msg with "fn_id, loop_id" + // // char msg[100]; + // // sprintf(msg, "%d,%d", fn_id, loop_id); + // // queue->push(shm::shared_string(msg, *char_alloc)); + + // local_buffer->push(INIT); + // local_buffer->push(loop_id); + uint32_t pid = getpid(); + printf("SLAMP_init: %d, %d, %d\n", fn_id, loop_id, pid); + // local_buffer->push(pid); + produce_32_32_32(INIT, loop_id, pid); + + // auto allocateLibcReqs = [](void *addr, size_t size) { + // produce_32_32_64(ALLOC, size, (uint64_t)addr); + // }; + + // allocateLibcReqs((void*)&errno, sizeof(errno)); + // allocateLibcReqs((void*)&stdin, sizeof(stdin)); + // allocateLibcReqs((void*)&stdout, sizeof(stdout)); + // allocateLibcReqs((void*)&stderr, sizeof(stderr)); + // allocateLibcReqs((void*)&sys_nerr, sizeof(sys_nerr)); + + // const unsigned short int* ctype_ptr = (*__ctype_b_loc()) - 128; + // allocateLibcReqs((void*)ctype_ptr, 384 * sizeof(*ctype_ptr)); + // const int32_t* itype_ptr = (*__ctype_tolower_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + // itype_ptr = (*__ctype_toupper_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + + // // FIXME: a dirty way to get xalancbmk to work + // auto locale = localeconv(); + // auto decimal = locale->decimal_point; + // allocateLibcReqs((void*)locale, sizeof(*locale)); + // allocateLibcReqs((void*)decimal, sizeof(*decimal)); + + // old_malloc_hook = __malloc_hook; + // // old_free_hook = __free_hook; + // old_memalign_hook = __memalign_hook; + // old_realloc_hook = __realloc_hook; + // __malloc_hook = SLAMP_malloc_hook; + // __free_hook = nullptr; + // __realloc_hook = SLAMP_realloc_hook; + // // __free_hook = SLAMP_free_hook; + // __memalign_hook = SLAMP_memalign_hook; + + // flush + produce_wait(); +} + +void SLAMP_fini(const char* filename){ + // send a msg with "fini" + // queue->push(shm::shared_string("fini", *char_alloc)); + std::cout << counter_load << " " << counter_store << " " << counter_ctx << std::endl; + // local_buffer->push(FINISHED); + // local_buffer->flush(); + PRODUCE(FINISHED); + // sq_flushQueue(the_queue); + dq->size = dq_index; + dq->ready_to_read = true; + + if (nested_level != 0) { + std::cerr << "Error: nested_level != 0 on exit" << std::endl; + exit(-1); + } +} + +void SLAMP_allocated(uint64_t addr){} +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size){ +} +void SLAMP_main_entry(uint32_t argc, char** argv, char** env){} + +void SLAMP_enter_fcn(uint32_t id){} +void SLAMP_exit_fcn(uint32_t id){} +void SLAMP_enter_loop(uint32_t id){} +void SLAMP_exit_loop(uint32_t id){} +void SLAMP_loop_iter_ctx(uint32_t id){} + +void SLAMP_loop_invocation(){} + +void SLAMP_loop_iteration(){} +void SLAMP_loop_exit(){} + +void SLAMP_report_base_pointer_arg(uint32_t, uint32_t, void *ptr){} +void SLAMP_report_base_pointer_inst(uint32_t, void *ptr){} +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t){} +void SLAMP_callback_stack_free(){} + +void SLAMP_ext_push(const uint32_t instr){} +void SLAMP_ext_pop(){} + +void SLAMP_push(const uint32_t instr){} +void SLAMP_pop(){} + +void SLAMP_load(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value, uint32_t size) ATTRIBUTE(always_inline) { + // send a msg with "load, instr, addr, bare_instr, value" + // char msg[100]; + // sprintf(msg, "load,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // queue->push(shm::shared_string(msg, *char_alloc)); + // + produce_32_32_64(LOAD, instr, value); + // produce_32_32_64(LOAD, instr, addr); + // produce_32_32_64(LOAD, size, value); + counter_load++; +} + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value, 1); +} +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value, 2); +} +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value, 4); +} + +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value, 8); +} + +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_load(instr, addr, bare_instr, 0, n); +} + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + // SLAMP_load1(bare_instr, addr, bare_instr, value); +} +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + // SLAMP_load2(bare_instr, addr, bare_instr, value); +} +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + // SLAMP_load4(bare_instr, addr, bare_instr, value); +} +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + // SLAMP_load8(bare_instr, addr, bare_instr, value); +} +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n){ + // SLAMP_loadn(bare_instr, addr, bare_instr, n); +} + +void SLAMP_store(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr) ATTRIBUTE(always_inline) { +} + +void SLAMP_store1(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store2(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store4(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store8(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store1(bare_inst, addr); +} +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store2(bare_inst, addr); +} +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store4(bare_inst, addr); +} +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store8(bare_inst, addr); +} +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n){ + SLAMP_storen(bare_inst, addr, n); +} + +void* SLAMP_malloc(size_t size, uint32_t instr, size_t alignment){ + // void* ptr = malloc(size); + // // send a msg with "malloc, instr, ptr, size, alignment" + // // char msg[100]; + // // sprintf(msg, "malloc,%d,%lu,%lu,%lu", instr, ptr, size, alignment); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // queue->push(ALLOC); + // queue->push(reinterpret_cast(ptr)); + // queue->push(size); + // counter_alloc++; + // return ptr; +} + +void* SLAMP_calloc(size_t nelem, size_t elsize){} +void* SLAMP_realloc(void* ptr, size_t size){} +void* SLAMP__Znam(size_t size){} +void* SLAMP__Znwm(size_t size){} + + + +char* SLAMP_strdup(const char *s1){} +char* SLAMP___strdup(const char *s1){} +void SLAMP_free(void* ptr){} +void SLAMP_cfree(void* ptr){} +void SLAMP__ZdlPv(void* ptr){} +void SLAMP__ZdaPv(void* ptr){} +int SLAMP_brk(void *end_data_segment){} +void* SLAMP_sbrk(intptr_t increment){} + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len){} +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len){} + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr){} +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr){} + +/* String functions */ +size_t SLAMP_strlen(const char *str){} +char* SLAMP_strchr(char *s, int c){} +char* SLAMP_strrchr(char *s, int c){} +int SLAMP_strcmp(const char *s1, const char *s2){} +int SLAMP_strncmp(const char *s1, const char *s2, size_t n){} +char* SLAMP_strcpy(char *dest, const char *src){} +char* SLAMP_strncpy(char *dest, const char *src, size_t n){} +char* SLAMP_strcat(char *s1, const char *s2){} +char* SLAMP_strncat(char *s1, const char *s2, size_t n){} +char* SLAMP_strstr(char *s1, char *s2){} +size_t SLAMP_strspn(const char *s1, const char *s2){} +size_t SLAMP_strcspn(const char *s1, const char *s2){} +char* SLAMP_strtok(char *s, const char *delim){} +double SLAMP_strtod(const char *nptr, char **endptr){} +long int SLAMP_strtol(const char *nptr, char **endptr, int base){} +char* SLAMP_strpbrk(char *s1, char *s2){} + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n){} +void *SLAMP_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP_memmove (void *dest, const void *src, size_t n){} +int SLAMP_memcmp(const void *s1, const void *s2, size_t n){} +void* SLAMP_memchr(void* ptr, int value, size_t num){} +void* SLAMP___rawmemchr(void* ptr, int value){} + +void SLAMP_bzero(void *s, size_t n){} +void SLAMP_bcopy(const void *s1, void *s2, size_t n){} + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count){} +int SLAMP_open(const char *pathname, int flags, mode_t mode){} +int SLAMP_close(int fd){} +ssize_t SLAMP_write(int fd, const void *buf, size_t count){} +off_t SLAMP_lseek(int fildes, off_t offset, int whence){} + +FILE * SLAMP_fopen(const char *path, const char *mode){} +FILE * SLAMP_fopen64(const char *path, const char *mode){} +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream){} +int SLAMP_fflush(FILE *stream){} +int SLAMP_fclose(FILE *stream){} +int SLAMP_ferror(FILE *stream){} +int SLAMP_feof(FILE *stream){} +long SLAMP_ftell(FILE *stream){} +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream){} +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream){} +int SLAMP_fseek(FILE *stream, long offset, int whence){} +void SLAMP_rewind(FILE *stream){} + +int SLAMP_fgetc(FILE *stream){} +int SLAMP_fputc(int c, FILE *stream){} +char * SLAMP_fgets(char *s, int n, FILE *stream){} +int SLAMP_fputs(const char *s, FILE *stream){} + +int SLAMP_ungetc(int c, FILE *stream){} +int SLAMP_putchar(int c){} +int SLAMP_getchar(void){} + +int SLAMP_fileno(FILE *stream){} +char * SLAMP_gets(char *s){} +int SLAMP_puts(const char *s){} + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout){} +int SLAMP_remove(const char *path){} + +void SLAMP_setbuf(FILE * stream, char * buf){} +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size){} +char * SLAMP_tmpnam(char *s){} +FILE* SLAMP_tmpfile(void){} +char * SLAMP_ttyname(int fildes){} + +FILE * SLAMP_fdopen(int fildes, const char *mode){} +void SLAMP_clearerr(FILE *stream){} + +int SLAMP_truncate(const char *path, off_t length){} +int SLAMP_ftruncate(int fildes, off_t length){} + +int SLAMP_dup(int oldfd){} +int SLAMP_dup2(int oldfd, int newfd){} +int SLAMP_pipe(int filedes[2]){} + +int SLAMP_chmod(const char *path, mode_t mode){} +int SLAMP_fchmod(int fildes, mode_t mode){} +int SLAMP_fchown(int fd, uid_t owner, gid_t group){} +int SLAMP_access(const char *pathname, int mode){} +long SLAMP_pathconf(char *path, int name){} +int SLAMP_mkdir(const char *pathname, mode_t mode){} +int SLAMP_rmdir(const char *pathname){} +mode_t SLAMP_umask(mode_t mask){} +int SLAMP_fcntl(int fd, int cmd, struct flock *lock){} + +DIR* SLAMP_opendir(const char* name){} +struct dirent* SLAMP_readdir(DIR *dirp){} +struct dirent64* SLAMP_readdir64(DIR *dirp){} +int SLAMP_closedir(DIR* dirp){} + +/* Printf */ +int SLAMP_printf(const char *format, ...){} +int SLAMP_fprintf(FILE *stream, const char *format, ...){} +int SLAMP_sprintf(char *str, const char *format, ...){} +int SLAMP_snprintf(char *str, size_t size, const char *format, ...){} + +int SLAMP_vprintf(const char *format, va_list ap){} +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vsprintf(char *str, const char *format, va_list ap){} +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap){} + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ){} +int SLAMP_scanf(const char *format, ... ){} +int SLAMP_sscanf(const char *s, const char *format, ... ){} +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ){} + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vscanf(const char *format, va_list ap){} +int SLAMP_vsscanf(const char *s, const char *format, va_list ap){} + +/* Time */ +time_t SLAMP_time(time_t *t){} +struct tm *SLAMP_localtime(const time_t *timer){} +struct lconv* SLAMP_localeconv(){} +struct tm *SLAMP_gmtime(const time_t *timer){} +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz){} + +/* Math */ +double SLAMP_ldexp(double x, int exp){} +float SLAMP_ldexpf(float x, int exp){} +long double SLAMP_ldexpl(long double x, int exp){} +double SLAMP_log10(double x){} +float SLAMP_log10f(float x){} +long double SLAMP_log10l(long double x){} +double SLAMP_log(double x){} +float SLAMP_logf(float x){} +long double SLAMP_logl(long double x){} + +double SLAMP_exp(double x){} +float SLAMP_expf(float x){} +long double SLAMP_expl(long double x){} + +double SLAMP_cos(double x){} +float SLAMP_cosf(float x){} +long double SLAMP_cosl(long double x){} +double SLAMP_sin(double x){} +double SLAMP_tan(double x){} +float SLAMP_sinf(float x){} +long double SLAMP_sinl(long double x){} + +double SLAMP_atan(double x){} +float SLAMP_atanf(float x){} +long double SLAMP_atanl(long double x){} + +double SLAMP_floor(double x){} +float SLAMP_floorf(float x){} +long double SLAMP_floorl(long double x){} +double SLAMP_ceil(double x){} +float SLAMP_ceilf(float x){} +long double SLAMP_ceill(long double x){} + +double SLAMP_atan2(double y, double x){} +float SLAMP_atan2f(float y, float x){} +long double SLAMP_atan2l(long double y, long double x){} + +double SLAMP_sqrt(double x){} +float SLAMP_sqrtf(float x){} +long double SLAMP_sqrtl(long double x){} + +double SLAMP_pow(double x, double y){} +float SLAMP_powf(float x, float y){} +long double SLAMP_powl(long double x, long double y){} + +double SLAMP_fabs(double x){} +float SLAMP_fabsf(float x){} +long double SLAMP_fabsl(long double x){} + +double SLAMP_modf(double x, double *iptr){} +float SLAMP_modff(float x, float *iptr){} +long double SLAMP_modfl(long double x, long double *iptr){} + +double SLAMP_fmod(double x, double y){} + +double SLAMP_frexp(double num, int *exp){} +float SLAMP_frexpf(float num, int *exp){} +long double SLAMP_frexpl(long double num, int *exp){} + +int SLAMP_isnan(){} + +/* MISC */ +char *SLAMP_getenv(const char *name){} +int SLAMP_putenv(char* string){} +char *SLAMP_getcwd(char *buf, size_t size){} +char* SLAMP_strerror(int errnum){} +void SLAMP_exit(int status){} +void SLAMP__exit(int status){} +int SLAMP_link(const char *oldpath, const char *newpath){} +int SLAMP_unlink(const char *pathname){} +int SLAMP_isatty(int desc){} +int SLAMP_setuid(uid_t uid){} +uid_t SLAMP_getuid(void){} +uid_t SLAMP_geteuid(void){} +int SLAMP_setgid(gid_t gid){} +gid_t SLAMP_getgid(void){} +gid_t SLAMP_getegid(void){} +pid_t SLAMP_getpid(void){} +int SLAMP_chdir(const char *path){} +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */){} +int SLAMP_execv(const char *path, char *const argv[]){} +int SLAMP_execvp(const char *file, char *const argv[]){} +int SLAMP_kill(pid_t pid, int sig){} +pid_t SLAMP_fork(void){} +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler){} +pid_t SLAMP_waitpid(pid_t pid, int* status, int options){} +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)){} +int SLAMP_ioctl(int d, int request, ...){} +unsigned int SLAMP_sleep(unsigned int seconds){} +char* SLAMP_gcvt(double number, size_t ndigit, char* buf){} +char* SLAMP_nl_langinfo(nl_item item){} + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function){} +const unsigned short int **SLAMP___ctype_b_loc(void){} +int SLAMP__IO_getc(_IO_FILE * __fp){} +int SLAMP__IO_putc(int __c, _IO_FILE *__fp){} + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf){} +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf){} diff --git a/liberty/lib/SLAMP/SLAMPcustom/CustomSend_ol.cpp b/liberty/lib/SLAMP/SLAMPcustom/CustomSend_ol.cpp new file mode 100644 index 00000000..7438741b --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/CustomSend_ol.cpp @@ -0,0 +1,862 @@ +#include "slamp_hooks.h" +#include +#include +#include +#include +#include +#include +#include +#include "malloc.h" + +#include +#include +#include "sw_queue_astream.h" + +#include +#include + +#define MM_STREAM +//#define SAMPLING_ITER + +namespace bip = boost::interprocess; + +static constexpr uint64_t QSIZE_GUARD = QSIZE - 60; + +static unsigned long counter_load = 0; +static unsigned long counter_store = 0; +static unsigned long counter_ctx = 0; +static unsigned long counter_alloc = 0; +static unsigned long counter_invoc = 0; +static unsigned long counter_iter = 0; +// char local_buffer[LOCAL_BUFFER_SIZE]; +// unsigned buffer_counter = 0; +static int nested_level = 0; +static bool on_profiling = false; + +static uint32_t ext_fn_inst_id = 0; + +static void *(*old_malloc_hook)(size_t, const void *); +static void *(*old_realloc_hook)(void *, size_t, const void *); +static void (*old_free_hook)(void *, const void *); +static void *(*old_memalign_hook)(size_t, size_t, const void *); +// create segment and corresponding allocator +bip::fixed_managed_shared_memory *segment; +bip::fixed_managed_shared_memory *segment2; +static Queue_p dqA, dqB, dq, dq_other; +static uint64_t dq_index = 0; +static uint32_t *dq_data; +// static uint64_t total_pushed = 0; +static uint64_t total_swapped = 0; +static void swap(){ + if(dq == dqA){ + dq = dqB; + dq_other = dqA; + }else{ + dq = dqA; + dq_other = dqB; + } + dq_data = dq->data; +} + +static void produce_wait() ATTRIBUTE(noinline){ + dq->size = dq_index; + dq->ready_to_read = true; + dq->ready_to_write = false; + while (!dq_other->ready_to_write){ + // spin + usleep(10); + } + swap(); + dq->ready_to_read = false; + dq_index = 0; + total_swapped++; +} + +// the packet is always 128bit, pad with 0 +static void produce_32(uint32_t x) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, 0, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); +#else + dq_data[dq_index] = x; + // dq_data[dq_index + 1] = 0; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32(uint32_t x, uint32_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); +#else + dq_data[dq_index] = x; + dq_data[dq_index + 1] = y; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_64(uint32_t x, uint64_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); +#else + dq_data[dq_index] = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_8_24_32_64(uint8_t x, uint32_t y, uint32_t z, uint64_t w) ATTRIBUTE(noinline){ + uint32_t xy = (y << 8) | x; +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(z, w, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], xy); + _mm_stream_si32((int *) &dq_data[dq_index + 1], z); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], w); +#else + dq_data[dq_index] = xy; + dq_data[dq_index + 1] = z; + *((uint64_t *) &dq_data[dq_index + 2]) = w; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + + +static void produce_64_64(const uint64_t x, const uint64_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi64x(y, x)); + // _mm_stream_si64((long long *) &dq_data[dq_index], x); + // _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); +#else + *((uint64_t *) &dq_data[dq_index]) = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +// static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) { +static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], z); + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32( z >> 32, z & 0xFFFFFFFF, y, x)); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + *(uint64_t*)&dq_data[dq_index+2] = z; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32_32(uint32_t x, uint32_t y, uint32_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, z, y, x)); + + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_si32((int *) &dq_data[dq_index+1], y); + // _mm_stream_si32((int *) &dq_data[dq_index+2], z); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + dq_data[dq_index+2] = z; + dq_data[dq_index+3] = 0; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + + +// static void produce_32_64_64(uint32_t x, uint64_t y, uint64_t z) ATTRIBUTE(noinline) { +// #ifdef MM_STREAM + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_pi((__m64*)&dq_data[dq_index+1], (__m64)y); + // _mm_stream_pi((__m64*)&dq_data[dq_index+3], (__m64)z); +// #else + // dq_data[dq_index] = x; + // *(uint64_t*)&dq_data[dq_index+1] = y; + // *(uint64_t*)&dq_data[dq_index+3] = z; +// #endif + // dq_index += 5; + // if (dq_index >= QSIZE_GUARD) [[unlikely]] { + // produce_wait(); + // } +// } + + +// #define CONSUME sq_consume(the_queue); +// #define PRODUCE(x) sq_produce(the_queue,(uint64_t)x); +#define PRODUCE(x) produce_32((uint32_t)x); + +#define COMBINE_2_32(x,y) ((((uint64_t)y)<<32) | (uint32_t)(x)) +#define CONSUME_2(x,y) do { uint64_t tmp = CONSUME; x = (uint32_t)(tmp>>32); y = (uint32_t) tmp; } while(0) + +#define TURN_ON_HOOKS \ + __malloc_hook = SLAMP_malloc_hook;\ + __realloc_hook = SLAMP_realloc_hook;\ + __memalign_hook = SLAMP_memalign_hook; \ + __free_hook = SLAMP_free_hook; + + + +#define TURN_OFF_HOOKS \ + __malloc_hook = old_malloc_hook; \ + __realloc_hook = old_realloc_hook; \ + __memalign_hook = old_memalign_hook; \ + __free_hook = old_free_hook; + +// Ringbuffer fully constructed in shared memory. The element strings are +// also allocated from the same shared memory segment. This vector can be +// safely accessed from other processes. + +// enum DepModAction: char +// { +// INIT = 0, +// LOAD, +// STORE, +// ALLOC, +// LOOP_INVOC, +// LOOP_ITER, +// FINISHED +// }; +enum ObjectLifetimeModAction : uint32_t { + INIT = 0, + // LOAD, + // STORE, + ALLOC, + FREE, + LOOP_INVOC, + LOOP_ITER, + LOOP_EXIT, + FUNC_ENTRY, + FUNC_EXIT, + FINISHED +}; + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id) { + + // get QUEUE ID from env + char *env = getenv("SLAMP_QUEUE_ID"); + if (env == NULL) { + std::cerr << "SLAMP_QUEUE_ID not set" << std::endl; + exit(-1); + } + auto queue_name = std::string("slamp_queue_") + env; + segment = new bip::fixed_managed_shared_memory(bip::open_or_create, queue_name.c_str(), sizeof(uint32_t) *QSIZE *4, (void*)(1UL << 32)); + // segment2 = new bip::fixed_managed_shared_memory(bip::open_or_create, "MySharedMemory2", sizeof(uint64_t) *QSIZE *2, (void*)(1UL << 28)); + + dqA = segment->find("DQ_A").first; + dqB = segment->find("DQ_B").first; + dq = dqA; + dq_other = dqB; + dq_index = 0; + dq_data = dq->data; + + // managed_shared_memory(bip::open_or_create, "MySharedMemory", sizeof(uint64_t) *QSIZE *2); + // auto a_queue = new atomic_queue::AtomicQueueB(65536); + + // the_queue = static_cast(segment->find_or_construct("MyQueue")()); + // auto data = static_cast(segment2->find_or_construct("smtx_queue_data")[QSIZE]()); + // if (the_queue == nullptr) { + // std::cout << "Error: could not create queue" << std::endl; + // exit(-1); + // } + // the_queue->data = data; + // if (the_queue->data == nullptr) { + // std::cout << "Error: could not create queue data" << std::endl; + // exit(-1); + // } + + // [> Initialize the queue data structure <] + // the_queue->p_data = (uint64_t) the_queue->data; + // the_queue->c_inx = 0; + // the_queue->c_margin = 0; + // the_queue->p_glb_inx = 0; + // the_queue->c_glb_inx = 0; + // the_queue->ptr_c_glb_inx = &(the_queue->c_glb_inx); + // the_queue->ptr_p_glb_inx = &(the_queue->p_glb_inx); + // // local_buffer = new LocalBuffer(queue); + + // // send a msg with "fn_id, loop_id" + // // char msg[100]; + // // sprintf(msg, "%d,%d", fn_id, loop_id); + // // queue->push(shm::shared_string(msg, *char_alloc)); + + // local_buffer->push(INIT); + // local_buffer->push(loop_id); + uint32_t pid = getpid(); + printf("SLAMP_init: %d, %d, %d\n", fn_id, loop_id, pid); + // local_buffer->push(pid); + produce_32_32_32(INIT, loop_id, pid); + + auto allocateLibcReqs = [](void *addr, size_t size) { + produce_8_24_32_64(ALLOC, 0, size, (uint64_t)addr); + // produce_32_32_64(ALLOC, size, (uint64_t)addr); + }; + + allocateLibcReqs((void*)&errno, sizeof(errno)); + allocateLibcReqs((void*)&stdin, sizeof(stdin)); + allocateLibcReqs((void*)&stdout, sizeof(stdout)); + allocateLibcReqs((void*)&stderr, sizeof(stderr)); + allocateLibcReqs((void*)&sys_nerr, sizeof(sys_nerr)); + + // const unsigned short int* ctype_ptr = (*__ctype_b_loc()) - 128; + // allocateLibcReqs((void*)ctype_ptr, 384 * sizeof(*ctype_ptr)); + // const int32_t* itype_ptr = (*__ctype_tolower_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + // itype_ptr = (*__ctype_toupper_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + + + old_malloc_hook = __malloc_hook; + // old_free_hook = __free_hook; + old_memalign_hook = __memalign_hook; + old_realloc_hook = __realloc_hook; + __malloc_hook = SLAMP_malloc_hook; + __free_hook = nullptr; + __realloc_hook = SLAMP_realloc_hook; + __free_hook = SLAMP_free_hook; + __memalign_hook = SLAMP_memalign_hook; + // flush + produce_wait(); +} + +void SLAMP_fini(const char* filename){ + // send a msg with "fini" + // queue->push(shm::shared_string("fini", *char_alloc)); + std::cout << counter_load << " " << counter_store << " " << counter_ctx << std::endl; + // local_buffer->push(FINISHED); + // local_buffer->flush(); + PRODUCE(FINISHED); + // sq_flushQueue(the_queue); + dq->size = dq_index; + dq->ready_to_read = true; + + if (nested_level != 0) { + std::cerr << "Error: nested_level != 0 on exit" << std::endl; + exit(-1); + } +} + +void SLAMP_allocated(uint64_t addr){} +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size){ + // local_buffer->push(ALLOC)->push(addr)->push(size); + + produce_32_32_64(ALLOC, size, addr); +} +void SLAMP_main_entry(uint32_t argc, char** argv, char** env){} + +void SLAMP_enter_fcn(uint32_t id){ + produce_32_32(FUNC_ENTRY, id); +} + +void SLAMP_exit_fcn(uint32_t id){ + produce_32_32(FUNC_EXIT, id); +} + +void SLAMP_enter_loop(uint32_t id){ +} + +void SLAMP_exit_loop(uint32_t id){ +} + +void SLAMP_loop_iter_ctx(uint32_t id){} + +void SLAMP_loop_invocation(){ + // send a msg with "loop_invocation" + // local_buffer->push(LOOP_INVOC); + PRODUCE(LOOP_INVOC); + + counter_ctx++; + + nested_level++; + on_profiling = true; + + // if (counter_invoc % 1 == 0) { + // on_profiling= true; + // } + counter_invoc++; +} + +void SLAMP_loop_iteration(){ + // local_buffer->push(LOOP_ITER); + PRODUCE(LOOP_ITER); + counter_ctx++; + +#ifdef SAMPLING_ITER + if (counter_iter % 100 == 0) { + on_profiling = true; + } + if (counter_iter % 100 == 10) { // 0-10000 out of 100000, sampling 10% + on_profiling = false; + } + counter_iter++; +#endif +} + +void SLAMP_loop_exit(){ + nested_level--; + if (nested_level < 0) { + // huge problem + std::cerr << "Error: nested_level < 0" << std::endl; + exit(-1); + } + if (nested_level == 0) { + on_profiling = false; + } + PRODUCE(LOOP_EXIT); +} + +void SLAMP_report_base_pointer_arg(uint32_t fcnId, uint32_t argId, void *ptr){ +} +void SLAMP_report_base_pointer_inst(uint32_t instId, void *ptr){ +} + +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t){} +void SLAMP_callback_stack_free(){} + +void SLAMP_ext_push(const uint32_t instr){ + ext_fn_inst_id = instr; +} +void SLAMP_ext_pop(){ + ext_fn_inst_id = 0; +} + +void SLAMP_push(const uint32_t instr){} +void SLAMP_pop(){} + +void SLAMP_load(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline) { + // // send a msg with "load, instr, addr, bare_instr, value" + // // char msg[100]; + // // sprintf(msg, "load,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // // + // if (on_profiling) { + // produce_32_32_64(LOAD, instr, addr); + // counter_load++; + // } +} + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_load(instr, addr, bare_instr, 0); +} + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load1(bare_instr, addr, bare_instr, value); +} +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load2(bare_instr, addr, bare_instr, value); +} +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load4(bare_instr, addr, bare_instr, value); +} +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load8(bare_instr, addr, bare_instr, value); +} +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_loadn(bare_instr, addr, bare_instr, n); +} + +void SLAMP_store(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr) ATTRIBUTE(always_inline) { + // // send a msg with "store, instr, addr, bare_instr, value" + // // char msg[100]; + // // sprintf(msg, "store,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // if (on_profiling) { + // // produce_3(STORE, COMBINE_2_32(instr, bare_instr), addr); + // // produce_64_64(COMBINE_2_32(STORE, instr), addr); + // produce_32_32_64(STORE, instr, addr); + // counter_store++; + // } +} + +void SLAMP_store1(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store2(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store4(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store8(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store1(bare_inst, addr); +} +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store2(bare_inst, addr); +} +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store4(bare_inst, addr); +} +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store8(bare_inst, addr); +} +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n){ + SLAMP_storen(bare_inst, addr, n); +} + +/* wrappers */ +static void* SLAMP_malloc_hook(size_t size, const void *caller){ + TURN_OFF_HOOKS + void* ptr = malloc(size); + // local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)ptr); + // printf("malloc %lu at %p\n", size, ptr); + counter_alloc++; + TURN_ON_HOOKS + return ptr; +} + +static void* SLAMP_realloc_hook(void* ptr, size_t size, const void *caller){ + TURN_OFF_HOOKS + void* new_ptr = realloc(ptr, size); + // local_buffer->push(REALLOC)->push((uint64_t)ptr)->push((uint64_t)new_ptr)->push(size); + // produce_3(ALLOC, (uint64_t)new_ptr, size); + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)new_ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)new_ptr); + // printf("realloc %p to %lu at %p", ptr, size, new_ptr); + counter_alloc++; + TURN_ON_HOOKS + return new_ptr; +} + +static void SLAMP_free_hook(void *ptr, const void *caller){ + TURN_OFF_HOOKS + free(ptr); + produce_32_64(FREE, (uint64_t)ptr); + TURN_ON_HOOKS +} +static void* SLAMP_memalign_hook(size_t alignment, size_t size, const void *caller){ + TURN_OFF_HOOKS + void* ptr = memalign(alignment, size); + // local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // produce_3(ALLOC, (uint64_t)ptr, size); + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)ptr); + + // printf("memalign %lu at %p\n", size, ptr); + counter_alloc++; + TURN_ON_HOOKS + return ptr; +} +void* SLAMP_malloc(size_t size, uint32_t instr, size_t alignment){ + // void* ptr = malloc(size); + // // send a msg with "malloc, instr, ptr, size, alignment" + // // char msg[100]; + // // sprintf(msg, "malloc,%d,%lu,%lu,%lu", instr, ptr, size, alignment); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // queue->push(ALLOC); + // queue->push(reinterpret_cast(ptr)); + // queue->push(size); + // counter_alloc++; + // return ptr; +} + +void* SLAMP_calloc(size_t nelem, size_t elsize){} +void* SLAMP_realloc(void* ptr, size_t size){} +void* SLAMP__Znam(size_t size){} +void* SLAMP__Znwm(size_t size){} + + + +char* SLAMP_strdup(const char *s1){} +char* SLAMP___strdup(const char *s1){} +void SLAMP_free(void* ptr){} +void SLAMP_cfree(void* ptr){} +void SLAMP__ZdlPv(void* ptr){} +void SLAMP__ZdaPv(void* ptr){} +int SLAMP_brk(void *end_data_segment){} +void* SLAMP_sbrk(intptr_t increment){} + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len){} +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len){} + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr){} +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr){} + +/* String functions */ +size_t SLAMP_strlen(const char *str){} +char* SLAMP_strchr(char *s, int c){} +char* SLAMP_strrchr(char *s, int c){} +int SLAMP_strcmp(const char *s1, const char *s2){} +int SLAMP_strncmp(const char *s1, const char *s2, size_t n){} +char* SLAMP_strcpy(char *dest, const char *src){} +char* SLAMP_strncpy(char *dest, const char *src, size_t n){} +char* SLAMP_strcat(char *s1, const char *s2){} +char* SLAMP_strncat(char *s1, const char *s2, size_t n){} +char* SLAMP_strstr(char *s1, char *s2){} +size_t SLAMP_strspn(const char *s1, const char *s2){} +size_t SLAMP_strcspn(const char *s1, const char *s2){} +char* SLAMP_strtok(char *s, const char *delim){} +double SLAMP_strtod(const char *nptr, char **endptr){} +long int SLAMP_strtol(const char *nptr, char **endptr, int base){} +char* SLAMP_strpbrk(char *s1, char *s2){} + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n){} +void *SLAMP_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP_memmove (void *dest, const void *src, size_t n){} +int SLAMP_memcmp(const void *s1, const void *s2, size_t n){} +void* SLAMP_memchr(void* ptr, int value, size_t num){} +void* SLAMP___rawmemchr(void* ptr, int value){} + +void SLAMP_bzero(void *s, size_t n){} +void SLAMP_bcopy(const void *s1, void *s2, size_t n){} + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count){} +int SLAMP_open(const char *pathname, int flags, mode_t mode){} +int SLAMP_close(int fd){} +ssize_t SLAMP_write(int fd, const void *buf, size_t count){} +off_t SLAMP_lseek(int fildes, off_t offset, int whence){} + +FILE * SLAMP_fopen(const char *path, const char *mode){} +FILE * SLAMP_fopen64(const char *path, const char *mode){} +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream){} +int SLAMP_fflush(FILE *stream){} +int SLAMP_fclose(FILE *stream){} +int SLAMP_ferror(FILE *stream){} +int SLAMP_feof(FILE *stream){} +long SLAMP_ftell(FILE *stream){} +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream){} +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream){} +int SLAMP_fseek(FILE *stream, long offset, int whence){} +void SLAMP_rewind(FILE *stream){} + +int SLAMP_fgetc(FILE *stream){} +int SLAMP_fputc(int c, FILE *stream){} +char * SLAMP_fgets(char *s, int n, FILE *stream){} +int SLAMP_fputs(const char *s, FILE *stream){} + +int SLAMP_ungetc(int c, FILE *stream){} +int SLAMP_putchar(int c){} +int SLAMP_getchar(void){} + +int SLAMP_fileno(FILE *stream){} +char * SLAMP_gets(char *s){} +int SLAMP_puts(const char *s){} + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout){} +int SLAMP_remove(const char *path){} + +void SLAMP_setbuf(FILE * stream, char * buf){} +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size){} +char * SLAMP_tmpnam(char *s){} +FILE* SLAMP_tmpfile(void){} +char * SLAMP_ttyname(int fildes){} + +FILE * SLAMP_fdopen(int fildes, const char *mode){} +void SLAMP_clearerr(FILE *stream){} + +int SLAMP_truncate(const char *path, off_t length){} +int SLAMP_ftruncate(int fildes, off_t length){} + +int SLAMP_dup(int oldfd){} +int SLAMP_dup2(int oldfd, int newfd){} +int SLAMP_pipe(int filedes[2]){} + +int SLAMP_chmod(const char *path, mode_t mode){} +int SLAMP_fchmod(int fildes, mode_t mode){} +int SLAMP_fchown(int fd, uid_t owner, gid_t group){} +int SLAMP_access(const char *pathname, int mode){} +long SLAMP_pathconf(char *path, int name){} +int SLAMP_mkdir(const char *pathname, mode_t mode){} +int SLAMP_rmdir(const char *pathname){} +mode_t SLAMP_umask(mode_t mask){} +int SLAMP_fcntl(int fd, int cmd, struct flock *lock){} + +DIR* SLAMP_opendir(const char* name){} +struct dirent* SLAMP_readdir(DIR *dirp){} +struct dirent64* SLAMP_readdir64(DIR *dirp){} +int SLAMP_closedir(DIR* dirp){} + +/* Printf */ +int SLAMP_printf(const char *format, ...){} +int SLAMP_fprintf(FILE *stream, const char *format, ...){} +int SLAMP_sprintf(char *str, const char *format, ...){} +int SLAMP_snprintf(char *str, size_t size, const char *format, ...){} + +int SLAMP_vprintf(const char *format, va_list ap){} +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vsprintf(char *str, const char *format, va_list ap){} +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap){} + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ){} +int SLAMP_scanf(const char *format, ... ){} +int SLAMP_sscanf(const char *s, const char *format, ... ){} +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ){} + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vscanf(const char *format, va_list ap){} +int SLAMP_vsscanf(const char *s, const char *format, va_list ap){} + +/* Time */ +time_t SLAMP_time(time_t *t){} +struct tm *SLAMP_localtime(const time_t *timer){} +struct lconv* SLAMP_localeconv(){} +struct tm *SLAMP_gmtime(const time_t *timer){} +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz){} + +/* Math */ +double SLAMP_ldexp(double x, int exp){} +float SLAMP_ldexpf(float x, int exp){} +long double SLAMP_ldexpl(long double x, int exp){} +double SLAMP_log10(double x){} +float SLAMP_log10f(float x){} +long double SLAMP_log10l(long double x){} +double SLAMP_log(double x){} +float SLAMP_logf(float x){} +long double SLAMP_logl(long double x){} + +double SLAMP_exp(double x){} +float SLAMP_expf(float x){} +long double SLAMP_expl(long double x){} + +double SLAMP_cos(double x){} +float SLAMP_cosf(float x){} +long double SLAMP_cosl(long double x){} +double SLAMP_sin(double x){} +double SLAMP_tan(double x){} +float SLAMP_sinf(float x){} +long double SLAMP_sinl(long double x){} + +double SLAMP_atan(double x){} +float SLAMP_atanf(float x){} +long double SLAMP_atanl(long double x){} + +double SLAMP_floor(double x){} +float SLAMP_floorf(float x){} +long double SLAMP_floorl(long double x){} +double SLAMP_ceil(double x){} +float SLAMP_ceilf(float x){} +long double SLAMP_ceill(long double x){} + +double SLAMP_atan2(double y, double x){} +float SLAMP_atan2f(float y, float x){} +long double SLAMP_atan2l(long double y, long double x){} + +double SLAMP_sqrt(double x){} +float SLAMP_sqrtf(float x){} +long double SLAMP_sqrtl(long double x){} + +double SLAMP_pow(double x, double y){} +float SLAMP_powf(float x, float y){} +long double SLAMP_powl(long double x, long double y){} + +double SLAMP_fabs(double x){} +float SLAMP_fabsf(float x){} +long double SLAMP_fabsl(long double x){} + +double SLAMP_modf(double x, double *iptr){} +float SLAMP_modff(float x, float *iptr){} +long double SLAMP_modfl(long double x, long double *iptr){} + +double SLAMP_fmod(double x, double y){} + +double SLAMP_frexp(double num, int *exp){} +float SLAMP_frexpf(float num, int *exp){} +long double SLAMP_frexpl(long double num, int *exp){} + +int SLAMP_isnan(){} + +/* MISC */ +char *SLAMP_getenv(const char *name){} +int SLAMP_putenv(char* string){} +char *SLAMP_getcwd(char *buf, size_t size){} +char* SLAMP_strerror(int errnum){} +void SLAMP_exit(int status){} +void SLAMP__exit(int status){} +int SLAMP_link(const char *oldpath, const char *newpath){} +int SLAMP_unlink(const char *pathname){} +int SLAMP_isatty(int desc){} +int SLAMP_setuid(uid_t uid){} +uid_t SLAMP_getuid(void){} +uid_t SLAMP_geteuid(void){} +int SLAMP_setgid(gid_t gid){} +gid_t SLAMP_getgid(void){} +gid_t SLAMP_getegid(void){} +pid_t SLAMP_getpid(void){} +int SLAMP_chdir(const char *path){} +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */){} +int SLAMP_execv(const char *path, char *const argv[]){} +int SLAMP_execvp(const char *file, char *const argv[]){} +int SLAMP_kill(pid_t pid, int sig){} +pid_t SLAMP_fork(void){} +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler){} +pid_t SLAMP_waitpid(pid_t pid, int* status, int options){} +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)){} +int SLAMP_ioctl(int d, int request, ...){} +unsigned int SLAMP_sleep(unsigned int seconds){} +char* SLAMP_gcvt(double number, size_t ndigit, char* buf){} +char* SLAMP_nl_langinfo(nl_item item){} + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function){} +const unsigned short int **SLAMP___ctype_b_loc(void){} +int SLAMP__IO_getc(_IO_FILE * __fp){} +int SLAMP__IO_putc(int __c, _IO_FILE *__fp){} + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf){} +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf){} diff --git a/liberty/lib/SLAMP/SLAMPcustom/CustomSend_pt.cpp b/liberty/lib/SLAMP/SLAMPcustom/CustomSend_pt.cpp new file mode 100644 index 00000000..8cc8fcb6 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/CustomSend_pt.cpp @@ -0,0 +1,871 @@ +#include "slamp_hooks.h" +#include +#include +#include +#include +#include +#include +#include +#include "malloc.h" + +#include +#include +#include "sw_queue_astream.h" + +#include +#include + +#define MM_STREAM +//#define SAMPLING_ITER + +namespace bip = boost::interprocess; + +static constexpr uint64_t QSIZE_GUARD = QSIZE - 60; + +static unsigned long counter_load = 0; +static unsigned long counter_store = 0; +static unsigned long counter_ctx = 0; +static unsigned long counter_alloc = 0; +static unsigned long counter_invoc = 0; +static unsigned long counter_iter = 0; +// char local_buffer[LOCAL_BUFFER_SIZE]; +// unsigned buffer_counter = 0; +static int nested_level = 0; +static bool on_profiling = false; + +static uint32_t ext_fn_inst_id = 0; + +static void *(*old_malloc_hook)(size_t, const void *); +static void *(*old_realloc_hook)(void *, size_t, const void *); +static void (*old_free_hook)(void *, const void *); +static void *(*old_memalign_hook)(size_t, size_t, const void *); +// create segment and corresponding allocator +bip::fixed_managed_shared_memory *segment; +bip::fixed_managed_shared_memory *segment2; +static Queue_p dqA, dqB, dq, dq_other; +static uint64_t dq_index = 0; +static uint32_t *dq_data; +// static uint64_t total_pushed = 0; +static uint64_t total_swapped = 0; +static void swap(){ + if(dq == dqA){ + dq = dqB; + dq_other = dqA; + }else{ + dq = dqA; + dq_other = dqB; + } + dq_data = dq->data; +} + +static void produce_wait() ATTRIBUTE(noinline){ + dq->size = dq_index; + dq->ready_to_read = true; + dq->ready_to_write = false; + while (!dq_other->ready_to_write){ + // spin + usleep(10); + } + swap(); + dq->ready_to_read = false; + dq_index = 0; + total_swapped++; +} + +// the packet is always 128bit, pad with 0 +static void produce_32(uint32_t x) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, 0, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); +#else + dq_data[dq_index] = x; + // dq_data[dq_index + 1] = 0; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32(uint32_t x, uint32_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); +#else + dq_data[dq_index] = x; + dq_data[dq_index + 1] = y; + // dq_data[dq_index + 2] = 0; + // dq_data[dq_index + 3] = 0; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_64(uint32_t x, uint64_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, 0, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); +#else + dq_data[dq_index] = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_8_24_32_64(uint8_t x, uint32_t y, uint32_t z, uint64_t w) ATTRIBUTE(noinline){ + uint32_t xy = (y << 8) | x; +#ifdef MM_STREAM + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(z, w, y, x)); + _mm_stream_si32((int *) &dq_data[dq_index], xy); + _mm_stream_si32((int *) &dq_data[dq_index + 1], z); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], w); +#else + dq_data[dq_index] = xy; + dq_data[dq_index + 1] = z; + *((uint64_t *) &dq_data[dq_index + 2]) = w; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + + +static void produce_64_64(const uint64_t x, const uint64_t y) ATTRIBUTE(noinline){ +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi64x(y, x)); + // _mm_stream_si64((long long *) &dq_data[dq_index], x); + // _mm_stream_si64((long long *) &dq_data[dq_index + 2], y); +#else + *((uint64_t *) &dq_data[dq_index]) = x; + *((uint64_t *) &dq_data[dq_index + 2]) = y; +#endif + dq_index += 4; + + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +// static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) { +static void produce_32_32_64(uint32_t x, uint32_t y, uint64_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + _mm_stream_si32((int *) &dq_data[dq_index], x); + _mm_stream_si32((int *) &dq_data[dq_index + 1], y); + _mm_stream_si64((long long *) &dq_data[dq_index + 2], z); + // _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32( z >> 32, z & 0xFFFFFFFF, y, x)); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + *(uint64_t*)&dq_data[dq_index+2] = z; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + +static void produce_32_32_32(uint32_t x, uint32_t y, uint32_t z) ATTRIBUTE(noinline) { +#ifdef MM_STREAM + _mm_stream_si128((__m128i *)(dq_data + dq_index), _mm_set_epi32(0, z, y, x)); + + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_si32((int *) &dq_data[dq_index+1], y); + // _mm_stream_si32((int *) &dq_data[dq_index+2], z); +#else + dq_data[dq_index] = x; + dq_data[dq_index+1] = y; + dq_data[dq_index+2] = z; + dq_data[dq_index+3] = 0; +#endif + dq_index += 4; + if (dq_index >= QSIZE_GUARD) [[unlikely]] { + produce_wait(); + } +} + + +// static void produce_32_64_64(uint32_t x, uint64_t y, uint64_t z) ATTRIBUTE(noinline) { +// #ifdef MM_STREAM + // _mm_stream_si32((int *) &dq_data[dq_index], x); + // _mm_stream_pi((__m64*)&dq_data[dq_index+1], (__m64)y); + // _mm_stream_pi((__m64*)&dq_data[dq_index+3], (__m64)z); +// #else + // dq_data[dq_index] = x; + // *(uint64_t*)&dq_data[dq_index+1] = y; + // *(uint64_t*)&dq_data[dq_index+3] = z; +// #endif + // dq_index += 5; + // if (dq_index >= QSIZE_GUARD) [[unlikely]] { + // produce_wait(); + // } +// } + + +// #define CONSUME sq_consume(the_queue); +// #define PRODUCE(x) sq_produce(the_queue,(uint64_t)x); +#define PRODUCE(x) produce_32((uint32_t)x); + +#define COMBINE_2_32(x,y) ((((uint64_t)y)<<32) | (uint32_t)(x)) +#define CONSUME_2(x,y) do { uint64_t tmp = CONSUME; x = (uint32_t)(tmp>>32); y = (uint32_t) tmp; } while(0) + +#define TURN_ON_HOOKS \ + __malloc_hook = SLAMP_malloc_hook;\ + __realloc_hook = SLAMP_realloc_hook;\ + __memalign_hook = SLAMP_memalign_hook; \ + __free_hook = SLAMP_free_hook; + + + +#define TURN_OFF_HOOKS \ + __malloc_hook = old_malloc_hook; \ + __realloc_hook = old_realloc_hook; \ + __memalign_hook = old_memalign_hook; \ + __free_hook = old_free_hook; + +// Ringbuffer fully constructed in shared memory. The element strings are +// also allocated from the same shared memory segment. This vector can be +// safely accessed from other processes. + +// enum DepModAction: char +// { +// INIT = 0, +// LOAD, +// STORE, +// ALLOC, +// LOOP_INVOC, +// LOOP_ITER, +// FINISHED +// }; +enum PointsToModAction : uint32_t { + INIT = 0, + // LOAD, + // STORE, + ALLOC, + FREE, + LOOP_INVOC, + LOOP_ITER, + LOOP_ENTRY, + LOOP_EXIT, + FUNC_ENTRY, + FUNC_EXIT, + POINTS_TO_INST, + POINTS_TO_ARG, + FINISHED +}; + +void SLAMP_init(uint32_t fn_id, uint32_t loop_id) { + + // get QUEUE ID from env + char *env = getenv("SLAMP_QUEUE_ID"); + if (env == NULL) { + std::cerr << "SLAMP_QUEUE_ID not set" << std::endl; + exit(-1); + } + auto queue_name = std::string("slamp_queue_") + env; + segment = new bip::fixed_managed_shared_memory(bip::open_or_create, queue_name.c_str(), sizeof(uint32_t) *QSIZE *4, (void*)(1UL << 32)); + // segment2 = new bip::fixed_managed_shared_memory(bip::open_or_create, "MySharedMemory2", sizeof(uint64_t) *QSIZE *2, (void*)(1UL << 28)); + + dqA = segment->find("DQ_A").first; + dqB = segment->find("DQ_B").first; + dq = dqA; + dq_other = dqB; + dq_index = 0; + dq_data = dq->data; + + // managed_shared_memory(bip::open_or_create, "MySharedMemory", sizeof(uint64_t) *QSIZE *2); + // auto a_queue = new atomic_queue::AtomicQueueB(65536); + + // the_queue = static_cast(segment->find_or_construct("MyQueue")()); + // auto data = static_cast(segment2->find_or_construct("smtx_queue_data")[QSIZE]()); + // if (the_queue == nullptr) { + // std::cout << "Error: could not create queue" << std::endl; + // exit(-1); + // } + // the_queue->data = data; + // if (the_queue->data == nullptr) { + // std::cout << "Error: could not create queue data" << std::endl; + // exit(-1); + // } + + // [> Initialize the queue data structure <] + // the_queue->p_data = (uint64_t) the_queue->data; + // the_queue->c_inx = 0; + // the_queue->c_margin = 0; + // the_queue->p_glb_inx = 0; + // the_queue->c_glb_inx = 0; + // the_queue->ptr_c_glb_inx = &(the_queue->c_glb_inx); + // the_queue->ptr_p_glb_inx = &(the_queue->p_glb_inx); + // // local_buffer = new LocalBuffer(queue); + + // // send a msg with "fn_id, loop_id" + // // char msg[100]; + // // sprintf(msg, "%d,%d", fn_id, loop_id); + // // queue->push(shm::shared_string(msg, *char_alloc)); + + // local_buffer->push(INIT); + // local_buffer->push(loop_id); + uint32_t pid = getpid(); + printf("SLAMP_init: %d, %d, %d\n", fn_id, loop_id, pid); + // local_buffer->push(pid); + produce_32_32_32(INIT, loop_id, pid); + + auto allocateLibcReqs = [](void *addr, size_t size) { + produce_8_24_32_64(ALLOC, 0, size, (uint64_t)addr); + // produce_32_32_64(ALLOC, size, (uint64_t)addr); + }; + + allocateLibcReqs((void*)&errno, sizeof(errno)); + allocateLibcReqs((void*)&stdin, sizeof(stdin)); + allocateLibcReqs((void*)&stdout, sizeof(stdout)); + allocateLibcReqs((void*)&stderr, sizeof(stderr)); + allocateLibcReqs((void*)&sys_nerr, sizeof(sys_nerr)); + + // const unsigned short int* ctype_ptr = (*__ctype_b_loc()) - 128; + // allocateLibcReqs((void*)ctype_ptr, 384 * sizeof(*ctype_ptr)); + // const int32_t* itype_ptr = (*__ctype_tolower_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + // itype_ptr = (*__ctype_toupper_loc()) - 128; + // allocateLibcReqs((void*)itype_ptr, 384 * sizeof(*itype_ptr)); + + + old_malloc_hook = __malloc_hook; + // old_free_hook = __free_hook; + old_memalign_hook = __memalign_hook; + old_realloc_hook = __realloc_hook; + __malloc_hook = SLAMP_malloc_hook; + __free_hook = nullptr; + __realloc_hook = SLAMP_realloc_hook; + __free_hook = SLAMP_free_hook; + __memalign_hook = SLAMP_memalign_hook; + // flush + produce_wait(); +} + +void SLAMP_fini(const char* filename){ + // send a msg with "fini" + // queue->push(shm::shared_string("fini", *char_alloc)); + std::cout << counter_load << " " << counter_store << " " << counter_ctx << std::endl; + // local_buffer->push(FINISHED); + // local_buffer->flush(); + PRODUCE(FINISHED); + // sq_flushQueue(the_queue); + dq->size = dq_index; + dq->ready_to_read = true; + + if (nested_level != 0) { + std::cerr << "Error: nested_level != 0 on exit" << std::endl; + exit(-1); + } +} + +void SLAMP_allocated(uint64_t addr){} +void SLAMP_init_global_vars(const char *name, uint64_t addr, size_t size){ + // local_buffer->push(ALLOC)->push(addr)->push(size); + + produce_32_32_64(ALLOC, size, addr); +} +void SLAMP_main_entry(uint32_t argc, char** argv, char** env){} + +void SLAMP_enter_fcn(uint32_t id){ + produce_32_32(FUNC_ENTRY, id); +} + +void SLAMP_exit_fcn(uint32_t id){ + produce_32_32(FUNC_EXIT, id); +} + +void SLAMP_enter_loop(uint32_t id){ + produce_32_32(LOOP_ENTRY, id); +} + +void SLAMP_exit_loop(uint32_t id){ + produce_32_32(LOOP_EXIT, id); +} + +void SLAMP_loop_iter_ctx(uint32_t id){} + +void SLAMP_loop_invocation(){ + // send a msg with "loop_invocation" + // local_buffer->push(LOOP_INVOC); + PRODUCE(LOOP_INVOC); + + counter_ctx++; + + nested_level++; + on_profiling = true; + + // if (counter_invoc % 1 == 0) { + // on_profiling= true; + // } + counter_invoc++; +} + +void SLAMP_loop_iteration(){ + // local_buffer->push(LOOP_ITER); + PRODUCE(LOOP_ITER); + counter_ctx++; + +#ifdef SAMPLING_ITER + if (counter_iter % 100 == 0) { + on_profiling = true; + } + if (counter_iter % 100 == 10) { // 0-10000 out of 100000, sampling 10% + on_profiling = false; + } + counter_iter++; +#endif +} + +void SLAMP_loop_exit(){ + nested_level--; + if (nested_level < 0) { + // huge problem + std::cerr << "Error: nested_level < 0" << std::endl; + exit(-1); + } + if (nested_level == 0) { + on_profiling = false; + } +} + +void SLAMP_report_base_pointer_arg(uint32_t fcnId, uint32_t argId, void *ptr){ + // FIXME: combine fcnid and argid to 32 bit + uint32_t id = (fcnId << 16) | (argId & 0xffff); + + produce_32_32_64(POINTS_TO_ARG, id, (uint64_t)ptr); +} +void SLAMP_report_base_pointer_inst(uint32_t instId, void *ptr){ + produce_32_32_64(POINTS_TO_INST, instId, (uint64_t)ptr); +} + +void SLAMP_callback_stack_alloca(uint64_t, uint64_t, uint32_t, uint64_t){} +void SLAMP_callback_stack_free(){} + +void SLAMP_ext_push(const uint32_t instr){ + ext_fn_inst_id = instr; +} +void SLAMP_ext_pop(){ + ext_fn_inst_id = 0; +} + +void SLAMP_push(const uint32_t instr){} +void SLAMP_pop(){} + +void SLAMP_load(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value) ATTRIBUTE(always_inline) { + // // send a msg with "load, instr, addr, bare_instr, value" + // // char msg[100]; + // // sprintf(msg, "load,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // // + // if (on_profiling) { + // produce_32_32_64(LOAD, instr, addr); + // counter_load++; + // } +} + +void SLAMP_load1(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load2(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} +void SLAMP_load4(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_load8(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load(instr, addr, bare_instr, value); +} + +void SLAMP_loadn(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_load(instr, addr, bare_instr, 0); +} + +void SLAMP_load1_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load1(bare_instr, addr, bare_instr, value); +} +void SLAMP_load2_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load2(bare_instr, addr, bare_instr, value); +} +void SLAMP_load4_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load4(bare_instr, addr, bare_instr, value); +} +void SLAMP_load8_ext(const uint64_t addr, const uint32_t bare_instr, uint64_t value){ + SLAMP_load8(bare_instr, addr, bare_instr, value); +} +void SLAMP_loadn_ext(const uint64_t addr, const uint32_t bare_instr, size_t n){ + SLAMP_loadn(bare_instr, addr, bare_instr, n); +} + +void SLAMP_store(const uint32_t instr, const uint64_t addr, const uint32_t bare_instr) ATTRIBUTE(always_inline) { + // // send a msg with "store, instr, addr, bare_instr, value" + // // char msg[100]; + // // sprintf(msg, "store,%d,%lu,%d,%lu", instr, addr, bare_instr, value); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // if (on_profiling) { + // // produce_3(STORE, COMBINE_2_32(instr, bare_instr), addr); + // // produce_64_64(COMBINE_2_32(STORE, instr), addr); + // produce_32_32_64(STORE, instr, addr); + // counter_store++; + // } +} + +void SLAMP_store1(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store2(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store4(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_store8(uint32_t instr, const uint64_t addr){ + SLAMP_store(instr, addr, instr); +} +void SLAMP_storen(uint32_t instr, const uint64_t addr, size_t n){ + SLAMP_store(instr, addr, instr); +} + +void SLAMP_store1_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store1(bare_inst, addr); +} +void SLAMP_store2_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store2(bare_inst, addr); +} +void SLAMP_store4_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store4(bare_inst, addr); +} +void SLAMP_store8_ext(const uint64_t addr, const uint32_t bare_inst){ + SLAMP_store8(bare_inst, addr); +} +void SLAMP_storen_ext(const uint64_t addr, const uint32_t bare_inst, size_t n){ + SLAMP_storen(bare_inst, addr, n); +} + +/* wrappers */ +static void* SLAMP_malloc_hook(size_t size, const void *caller){ + TURN_OFF_HOOKS + void* ptr = malloc(size); + // local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)ptr); + // printf("malloc %lu at %p\n", size, ptr); + counter_alloc++; + TURN_ON_HOOKS + return ptr; +} + +static void* SLAMP_realloc_hook(void* ptr, size_t size, const void *caller){ + TURN_OFF_HOOKS + void* new_ptr = realloc(ptr, size); + // local_buffer->push(REALLOC)->push((uint64_t)ptr)->push((uint64_t)new_ptr)->push(size); + // produce_3(ALLOC, (uint64_t)new_ptr, size); + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)new_ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)new_ptr); + // printf("realloc %p to %lu at %p", ptr, size, new_ptr); + counter_alloc++; + TURN_ON_HOOKS + return new_ptr; +} + +static void SLAMP_free_hook(void *ptr, const void *caller){ + TURN_OFF_HOOKS + free(ptr); + produce_32_64(FREE, (uint64_t)ptr); + TURN_ON_HOOKS +} +static void* SLAMP_memalign_hook(size_t alignment, size_t size, const void *caller){ + TURN_OFF_HOOKS + void* ptr = memalign(alignment, size); + // local_buffer->push(ALLOC)->push((uint64_t)ptr)->push(size); + // produce_3(ALLOC, (uint64_t)ptr, size); + produce_8_24_32_64(ALLOC, ext_fn_inst_id, size, (uint64_t)ptr); + // produce_32_32_64(ALLOC, size, (uint64_t)ptr); + + // printf("memalign %lu at %p\n", size, ptr); + counter_alloc++; + TURN_ON_HOOKS + return ptr; +} +void* SLAMP_malloc(size_t size, uint32_t instr, size_t alignment){ + // void* ptr = malloc(size); + // // send a msg with "malloc, instr, ptr, size, alignment" + // // char msg[100]; + // // sprintf(msg, "malloc,%d,%lu,%lu,%lu", instr, ptr, size, alignment); + // // queue->push(shm::shared_string(msg, *char_alloc)); + // queue->push(ALLOC); + // queue->push(reinterpret_cast(ptr)); + // queue->push(size); + // counter_alloc++; + // return ptr; +} + +void* SLAMP_calloc(size_t nelem, size_t elsize){} +void* SLAMP_realloc(void* ptr, size_t size){} +void* SLAMP__Znam(size_t size){} +void* SLAMP__Znwm(size_t size){} + + + +char* SLAMP_strdup(const char *s1){} +char* SLAMP___strdup(const char *s1){} +void SLAMP_free(void* ptr){} +void SLAMP_cfree(void* ptr){} +void SLAMP__ZdlPv(void* ptr){} +void SLAMP__ZdaPv(void* ptr){} +int SLAMP_brk(void *end_data_segment){} +void* SLAMP_sbrk(intptr_t increment){} + +/* llvm memory intrinsics */ +void SLAMP_llvm_memcpy_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memcpy_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i32(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint32_t sizeBytes){} +void SLAMP_llvm_memmove_p0i8_p0i8_i64(const uint8_t* dstAddr, const uint8_t* srcAddr, const uint64_t sizeBytes){} +void SLAMP_llvm_memset_p0i8_i32(const uint8_t* dstAddr, const uint32_t len){} +void SLAMP_llvm_memset_p0i8_i64(const uint8_t* dstAddr, const uint64_t len){} + +// void SLAMP_llvm_lifetime_start_p0i8(uint64_t size, uint8_t* ptr){} +// void SLAMP_llvm_lifetime_end_p0i8(uint64_t size, uint8_t* ptr){} + +/* String functions */ +size_t SLAMP_strlen(const char *str){} +char* SLAMP_strchr(char *s, int c){} +char* SLAMP_strrchr(char *s, int c){} +int SLAMP_strcmp(const char *s1, const char *s2){} +int SLAMP_strncmp(const char *s1, const char *s2, size_t n){} +char* SLAMP_strcpy(char *dest, const char *src){} +char* SLAMP_strncpy(char *dest, const char *src, size_t n){} +char* SLAMP_strcat(char *s1, const char *s2){} +char* SLAMP_strncat(char *s1, const char *s2, size_t n){} +char* SLAMP_strstr(char *s1, char *s2){} +size_t SLAMP_strspn(const char *s1, const char *s2){} +size_t SLAMP_strcspn(const char *s1, const char *s2){} +char* SLAMP_strtok(char *s, const char *delim){} +double SLAMP_strtod(const char *nptr, char **endptr){} +long int SLAMP_strtol(const char *nptr, char **endptr, int base){} +char* SLAMP_strpbrk(char *s1, char *s2){} + +/* Mem* and b* functions */ +void *SLAMP_memset (void *dest, int c, size_t n){} +void *SLAMP_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP___builtin_memcpy (void *dest, const void *src, size_t n){} +void *SLAMP_memmove (void *dest, const void *src, size_t n){} +int SLAMP_memcmp(const void *s1, const void *s2, size_t n){} +void* SLAMP_memchr(void* ptr, int value, size_t num){} +void* SLAMP___rawmemchr(void* ptr, int value){} + +void SLAMP_bzero(void *s, size_t n){} +void SLAMP_bcopy(const void *s1, void *s2, size_t n){} + +/* IO */ +ssize_t SLAMP_read(int fd, void *buf, size_t count){} +int SLAMP_open(const char *pathname, int flags, mode_t mode){} +int SLAMP_close(int fd){} +ssize_t SLAMP_write(int fd, const void *buf, size_t count){} +off_t SLAMP_lseek(int fildes, off_t offset, int whence){} + +FILE * SLAMP_fopen(const char *path, const char *mode){} +FILE * SLAMP_fopen64(const char *path, const char *mode){} +FILE * SLAMP_freopen(const char *path, const char *mode, FILE* stream){} +int SLAMP_fflush(FILE *stream){} +int SLAMP_fclose(FILE *stream){} +int SLAMP_ferror(FILE *stream){} +int SLAMP_feof(FILE *stream){} +long SLAMP_ftell(FILE *stream){} +size_t SLAMP_fread(void * ptr, size_t size, size_t nitems, FILE *stream){} +size_t SLAMP_fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream){} +int SLAMP_fseek(FILE *stream, long offset, int whence){} +void SLAMP_rewind(FILE *stream){} + +int SLAMP_fgetc(FILE *stream){} +int SLAMP_fputc(int c, FILE *stream){} +char * SLAMP_fgets(char *s, int n, FILE *stream){} +int SLAMP_fputs(const char *s, FILE *stream){} + +int SLAMP_ungetc(int c, FILE *stream){} +int SLAMP_putchar(int c){} +int SLAMP_getchar(void){} + +int SLAMP_fileno(FILE *stream){} +char * SLAMP_gets(char *s){} +int SLAMP_puts(const char *s){} + +int SLAMP_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout){} +int SLAMP_remove(const char *path){} + +void SLAMP_setbuf(FILE * stream, char * buf){} +void SLAMP_setvbuf(FILE * stream, char * buf, int mode, size_t size){} +char * SLAMP_tmpnam(char *s){} +FILE* SLAMP_tmpfile(void){} +char * SLAMP_ttyname(int fildes){} + +FILE * SLAMP_fdopen(int fildes, const char *mode){} +void SLAMP_clearerr(FILE *stream){} + +int SLAMP_truncate(const char *path, off_t length){} +int SLAMP_ftruncate(int fildes, off_t length){} + +int SLAMP_dup(int oldfd){} +int SLAMP_dup2(int oldfd, int newfd){} +int SLAMP_pipe(int filedes[2]){} + +int SLAMP_chmod(const char *path, mode_t mode){} +int SLAMP_fchmod(int fildes, mode_t mode){} +int SLAMP_fchown(int fd, uid_t owner, gid_t group){} +int SLAMP_access(const char *pathname, int mode){} +long SLAMP_pathconf(char *path, int name){} +int SLAMP_mkdir(const char *pathname, mode_t mode){} +int SLAMP_rmdir(const char *pathname){} +mode_t SLAMP_umask(mode_t mask){} +int SLAMP_fcntl(int fd, int cmd, struct flock *lock){} + +DIR* SLAMP_opendir(const char* name){} +struct dirent* SLAMP_readdir(DIR *dirp){} +struct dirent64* SLAMP_readdir64(DIR *dirp){} +int SLAMP_closedir(DIR* dirp){} + +/* Printf */ +int SLAMP_printf(const char *format, ...){} +int SLAMP_fprintf(FILE *stream, const char *format, ...){} +int SLAMP_sprintf(char *str, const char *format, ...){} +int SLAMP_snprintf(char *str, size_t size, const char *format, ...){} + +int SLAMP_vprintf(const char *format, va_list ap){} +int SLAMP_vfprintf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vsprintf(char *str, const char *format, va_list ap){} +int SLAMP_vsnprintf(char *str, size_t size, const char *format, va_list ap){} + +/* Scanf */ +int SLAMP_fscanf(FILE *stream, const char *format, ... ){} +int SLAMP_scanf(const char *format, ... ){} +int SLAMP_sscanf(const char *s, const char *format, ... ){} +int SLAMP___isoc99_sscanf(const char *s, const char *format, ... ){} + +int SLAMP_vfscanf(FILE *stream, const char *format, va_list ap){} +int SLAMP_vscanf(const char *format, va_list ap){} +int SLAMP_vsscanf(const char *s, const char *format, va_list ap){} + +/* Time */ +time_t SLAMP_time(time_t *t){} +struct tm *SLAMP_localtime(const time_t *timer){} +struct lconv* SLAMP_localeconv(){} +struct tm *SLAMP_gmtime(const time_t *timer){} +int SLAMP_gettimeofday(struct timeval *tv, struct timezone *tz){} + +/* Math */ +double SLAMP_ldexp(double x, int exp){} +float SLAMP_ldexpf(float x, int exp){} +long double SLAMP_ldexpl(long double x, int exp){} +double SLAMP_log10(double x){} +float SLAMP_log10f(float x){} +long double SLAMP_log10l(long double x){} +double SLAMP_log(double x){} +float SLAMP_logf(float x){} +long double SLAMP_logl(long double x){} + +double SLAMP_exp(double x){} +float SLAMP_expf(float x){} +long double SLAMP_expl(long double x){} + +double SLAMP_cos(double x){} +float SLAMP_cosf(float x){} +long double SLAMP_cosl(long double x){} +double SLAMP_sin(double x){} +double SLAMP_tan(double x){} +float SLAMP_sinf(float x){} +long double SLAMP_sinl(long double x){} + +double SLAMP_atan(double x){} +float SLAMP_atanf(float x){} +long double SLAMP_atanl(long double x){} + +double SLAMP_floor(double x){} +float SLAMP_floorf(float x){} +long double SLAMP_floorl(long double x){} +double SLAMP_ceil(double x){} +float SLAMP_ceilf(float x){} +long double SLAMP_ceill(long double x){} + +double SLAMP_atan2(double y, double x){} +float SLAMP_atan2f(float y, float x){} +long double SLAMP_atan2l(long double y, long double x){} + +double SLAMP_sqrt(double x){} +float SLAMP_sqrtf(float x){} +long double SLAMP_sqrtl(long double x){} + +double SLAMP_pow(double x, double y){} +float SLAMP_powf(float x, float y){} +long double SLAMP_powl(long double x, long double y){} + +double SLAMP_fabs(double x){} +float SLAMP_fabsf(float x){} +long double SLAMP_fabsl(long double x){} + +double SLAMP_modf(double x, double *iptr){} +float SLAMP_modff(float x, float *iptr){} +long double SLAMP_modfl(long double x, long double *iptr){} + +double SLAMP_fmod(double x, double y){} + +double SLAMP_frexp(double num, int *exp){} +float SLAMP_frexpf(float num, int *exp){} +long double SLAMP_frexpl(long double num, int *exp){} + +int SLAMP_isnan(){} + +/* MISC */ +char *SLAMP_getenv(const char *name){} +int SLAMP_putenv(char* string){} +char *SLAMP_getcwd(char *buf, size_t size){} +char* SLAMP_strerror(int errnum){} +void SLAMP_exit(int status){} +void SLAMP__exit(int status){} +int SLAMP_link(const char *oldpath, const char *newpath){} +int SLAMP_unlink(const char *pathname){} +int SLAMP_isatty(int desc){} +int SLAMP_setuid(uid_t uid){} +uid_t SLAMP_getuid(void){} +uid_t SLAMP_geteuid(void){} +int SLAMP_setgid(gid_t gid){} +gid_t SLAMP_getgid(void){} +gid_t SLAMP_getegid(void){} +pid_t SLAMP_getpid(void){} +int SLAMP_chdir(const char *path){} +int SLAMP_execl(const char *path, const char *arg0, ... /*, (char *)0 */){} +int SLAMP_execv(const char *path, char *const argv[]){} +int SLAMP_execvp(const char *file, char *const argv[]){} +int SLAMP_kill(pid_t pid, int sig){} +pid_t SLAMP_fork(void){} +sighandler_t SLAMP___sysv_signal(int signum, sighandler_t handler){} +pid_t SLAMP_waitpid(pid_t pid, int* status, int options){} +void SLAMP_qsort(void* base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)){} +int SLAMP_ioctl(int d, int request, ...){} +unsigned int SLAMP_sleep(unsigned int seconds){} +char* SLAMP_gcvt(double number, size_t ndigit, char* buf){} +char* SLAMP_nl_langinfo(nl_item item){} + +/* Compiler/Glibc Internals */ +void SLAMP___assert_fail(const char * assertion, const char * file, unsigned int line, const char * function){} +const unsigned short int **SLAMP___ctype_b_loc(void){} +int SLAMP__IO_getc(_IO_FILE * __fp){} +int SLAMP__IO_putc(int __c, _IO_FILE *__fp){} + +int SLAMP___fxstat (int __ver, int __fildes, struct stat *__stat_buf){} +int SLAMP___xstat (int __ver, __const char *__filename, struct stat *__stat_buf){} diff --git a/liberty/lib/SLAMP/SLAMPcustom/bitcast.h b/liberty/lib/SLAMP/SLAMPcustom/bitcast.h new file mode 100644 index 00000000..404a9a99 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/bitcast.h @@ -0,0 +1,29 @@ +#ifndef BITCAST_H +#define BITCAST_H + +typedef union { double d; int64_t i; } SQ_DI; +typedef union { float f; int32_t i; } SQ_FI; + +Inline int64_t doubleToInt(double d) { + SQ_DI conv; + conv.d = d; + return conv.i; +} +Inline int64_t floatToInt(float f) { + SQ_FI conv; + conv.f = f; + return conv.i; +} +Inline double intToDouble(int64_t i) { + SQ_DI conv; + conv.i = (int32_t)i; + return conv.d; +} + +Inline float intToFloat(int64_t i) { + SQ_FI conv; + conv.i = (int32_t)i; + return conv.f; +} + +#endif /* BITCAST_H */ diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/.gitignore b/liberty/lib/SLAMP/SLAMPcustom/consumer/.gitignore new file mode 100644 index 00000000..378eac25 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/.gitignore @@ -0,0 +1 @@ +build diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/CMakeLists.txt b/liberty/lib/SLAMP/SLAMPcustom/consumer/CMakeLists.txt new file mode 100644 index 00000000..f84fe941 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/CMakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required (VERSION 3.6.2 FATAL_ERROR) + +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "clang++") +set(CMAKE_LINKER "ld.gold") + +set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "/u/ziyangx/test/boost/boost_1_80_0/install/lib/cmake") +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +# set C++17 +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto") +set(LINK_FLAGS "${LINK_FLAGS} -flto") + +find_package(Boost 1.80.0 REQUIRED COMPONENTS system) +include_directories(${Boost_INCLUDE_DIRS}) +include_directories(./ + ../) + +add_executable (consumer consumer.cpp ProfilingModules/DependenceModule.cpp ProfilingModules/PointsToModule.cpp ProfilingModules/LoadedValueModule.cpp ProfilingModules/ObjectLifetimeModule.cpp) +target_link_libraries(consumer LINK_PUBLIC + ${Boost_LIBRARIES} + rt + Threads::Threads +) diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ContextManager.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ContextManager.h new file mode 100644 index 00000000..f91e0337 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ContextManager.h @@ -0,0 +1,142 @@ +#pragma once + +#include "ProfilingModules/context.h" +#include +#include +#include +#include +#include +#include +#include + + +// namespace SpecPrivLib { +// enum SpecPrivContextType { + // TopContext = 0, + // FunctionContext, + // LoopContext, +// }; + +#define CONTEXT_DEBUG 0 + +template +struct ContextId { + static_assert(TypeEnum::TopContext == 0, "TopContext must be 0"); + + TypeEnum type; + MetaIdType metaId; + ContextId(TypeEnum type, MetaIdType id) : type(type), metaId(id) {} + ContextId(std::pair p) + : type(p.first), metaId(p.second) {} + static ContextId getTopContextId() { return {TypeEnum::TopContext, 0}; } + // id left shift 2 bits and or with type + // ContextHash hash() { + // return (static_cast(metaId) << 2) | static_cast(type); + // } + + bool operator==(const ContextId &other) const { + return type == other.type && metaId == other.metaId; + } + + bool operator!=(const ContextId &other) const { + return !(*this == other); + } + + bool operator<(const ContextId &other) const { + return type < other.type || (type == other.type && metaId < other.metaId); + } + + void print(std::ostream &os) { + os << "(" << type << "," << metaId << ")"; + } +}; + +template +class NewContextManager { + using ContextId = ContextId; + std::vector contextStack; + + // FIXME: fix this hashing problem + std::map, HashType> contextToHashMap; + std::vector> hashToContextMap; + size_t contextIdHashCounter = 1; // avoid 0 + bool cached = false; + HashType cachedContextHash; + +public: + NewContextManager() { + contextStack.push_back(ContextId::getTopContextId()); + hashToContextMap.push_back(contextStack); + } + + ~NewContextManager() = default; + + void pushContext(ContextId contextId) { + cached = false; + contextStack.push_back(contextId); + } + + void popContext(ContextId contextId) { + cached = false; + if (contextStack.back() == contextId) { + contextStack.pop_back(); + } else { + if (CONTEXT_DEBUG) { + std::cerr << "ContextManager: popContext: context not found: "; + contextId.print(std::cerr); + std::cerr << "ContextManager: popContext: stack: "; + for (auto &c : contextStack) { + c.print(std::cerr); + } + std::cerr << "\n"; + } + // keep popping until we find the context + while (contextStack.back() != contextId && contextStack.back() != ContextId::getTopContextId()) { + contextStack.pop_back(); + } + + if (contextStack.back() == contextId) { + contextStack.pop_back(); + } else { + if (CONTEXT_DEBUG) { + std::cerr << "ContextManager: popContext: context not found: "; + } + } + } + } + + HashType encodeContext(std::vector context) { + if (contextToHashMap.count(context)) { + return contextToHashMap[context]; + } else { + contextToHashMap[context] = contextIdHashCounter; + hashToContextMap.push_back(context); + + return contextIdHashCounter++; + } + } + + HashType encodeActiveContext() { + if (cached) { + return cachedContextHash; + } else { + cached = true; + cachedContextHash = encodeContext(contextStack); + return cachedContextHash; + } + } + + std::vector decodeContext(HashType hash) { + assert(hash < hashToContextMap.size() && hash != 0 && "invalid hash"); + return hashToContextMap[hash]; + } + + void printContext(std::ostream &os, HashType hash) { + assert(hash < hashToContextMap.size() && hash != 0 && "invalid hash"); + auto &context = hashToContextMap[hash]; + for (auto it = context.rbegin(); it != context.rend(); it++) { + auto &c = *it; + c.print(os); + } + } +}; diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/DependenceModule.cpp b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/DependenceModule.cpp new file mode 100644 index 00000000..5cee1292 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/DependenceModule.cpp @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "DependenceModule.h" +#include "slamp_logger.h" +#include "slamp_shadow_mem.h" +#include "slamp_timestamp.h" + +// static std::map *inst_count; +static inline uint64_t rdtsc() { + uint64_t a, d; + __asm__ volatile("rdtsc" : "=a"(a), "=d"(d)); + return (d << 32) | a; +} + +// init: setup the shadow memory +void DependenceModule::init(uint32_t loop_id, uint32_t pid) { + target_loop_id = loop_id; + +#define SIZE_8M 0x800000 + smmap->init_stack(SIZE_8M, pid); + // inst_count = new std::map(); + +} + +// static uint64_t log_time = 0; + + +void DependenceModule::fini(const char *filename) { + // std::cout << "Load count: " << load_count << std::endl; + // std::cout << "Store count: " << store_count << std::endl; + + std::ofstream of(filename); + of << target_loop_id << " " << 0 << " " << 0 << " " + << 0 << " " << 0 << " " << 0 << "\n"; + +#ifdef TRACK_COUNT + // get all the keys of a hash table to ordered set + std::set ordered; + for (auto & it : deps) { + ordered.insert(it.first); + } +#else + std::set ordered(deps.begin(), deps.end()); +#endif + + for (auto &k: ordered) { +#ifdef TRACK_COUNT + auto count = deps[k]; +#else + auto count = 1; +#endif + of << target_loop_id << " " << k.src << " " << k.dst << " " << k.dst_bare << " " + << (k.cross ? 1 : 0) << " " << count << " "; +#ifdef TRACK_MIN_DISTANCE + auto dist = min_dist[k]; + of << dist; +#endif + of << "\n"; + } + of.close(); + + // std::cout << "Log time: " << log_time/ 2.6e9 << " s" << std::endl; + + // for (auto &i : *inst_count) { + // of << target_loop_id << " " << i.first << " " << i.second << "\n"; + // } + +#ifdef COLLECT_TRACE + // dump the dep trace to a binary file + std::ofstream trace_file("dep_trace.bin", std::ios::binary); + trace_file.write((char *)dep_trace.data(), dep_trace.size() * sizeof(slamp::KEY)); + trace_file.close(); +#endif +} + +void DependenceModule::allocate(void *addr, uint64_t size) { + smmap->allocate(addr, size); +} + +void DependenceModule::log(TS ts, const uint32_t dst_inst, const uint32_t context){ + + uint32_t src_inst = GET_INSTR(ts); + + uint64_t src_invoc = GET_INVOC(ts); + uint64_t src_iter = GET_ITER(ts); + + if (src_invoc != GET_INVOC(slamp_invocation)) { + return; + } + +#ifdef TRACK_CONTEXT + slamp::KEY key(src_inst, dst_inst, context, src_iter != slamp_iteration); +#else + slamp::KEY key(src_inst, dst_inst, 0, src_iter != slamp_iteration); +#endif + + +#ifdef TRACK_MIN_DISTANCE + auto dist = slamp_iteration - src_iter; + min_dist.emplace({key, dist}); +#endif + + deps.emplace(key); + +#ifdef COLLECT_TRACE + if (dep_trace_idx < dep_trace_size) { + dep_trace.emplace_back(key); + dep_trace_idx++; + } +#endif +} + +void DependenceModule::load(uint32_t instr, const uint64_t addr, const uint32_t bare_instr) { + local_write(addr, [&]() { + // load_count++; + + // if tracking multiple loops + + TS *s = (TS *)GET_SHADOW(addr, DM_TIMESTAMP_SIZE_IN_BYTES_LOG2); + + TS tss = s[0]; + if (tss != 0) { + // uint64_t start = rdtsc(); + log(tss, instr, context); + // uint64_t end = rdtsc(); + // log_time += end - start; + } +#ifdef TRACK_WAR +#ifdef TRACK_CONTEXT + if (context != 0) + instr = context; +#endif + TS ts = CREATE_TS(instr, slamp_iteration, slamp_invocation); + s[1] = ts; +#endif + }); +} + +void DependenceModule::store(uint32_t instr, uint32_t bare_instr, const uint64_t addr) { + + local_write(addr, [&]() { + // store_count++; + TS *shadow_addr = (TS *)GET_SHADOW(addr, DM_TIMESTAMP_SIZE_IN_BYTES_LOG2); + +#ifdef TRACK_WAW + if (shadow_addr[0] != 0) { + // uint64_t start = rdtsc(); + log(shadow_addr[0], instr, context); + // uint64_t end = rdtsc(); + // log_time += end - start; + } +#endif + +#ifdef TRACK_WAR + if (shadow_addr[1] != 0) { + // uint64_t start = rdtsc(); + log(shadow_addr[1], instr, context); + // uint64_t end = rdtsc(); + // log_time += end - start; + } +#endif + +#ifdef TRACK_CONTEXT + if (context != 0) + instr = context; +#endif + TS ts = CREATE_TS(instr, slamp_iteration, slamp_invocation); + shadow_addr[0] = ts; + + + }); +} + +void DependenceModule::loop_invoc() { + slamp_iteration = 0; + slamp_invocation++; + nested_level++; +} + +void DependenceModule::loop_iter() { + slamp_iteration++; +} + +void DependenceModule::loop_exit() { + nested_level--; +} + +void DependenceModule::func_entry(uint32_t instr) { + if (nested_level == 1) { + context = instr; + } +} + +void DependenceModule::func_exit(uint32_t instr) { + if (nested_level == 1) { + context = 0; + } +} + +void DependenceModule::merge_dep(DependenceModule &other) { + deps.merge(other.deps); +#ifdef TRACK_MIN_DISTANCE + min_dist.merge(other.min_dist); +#endif +} diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/DependenceModule.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/DependenceModule.h new file mode 100644 index 00000000..efcab04f --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/DependenceModule.h @@ -0,0 +1,104 @@ +#include +#include +#include +#include + +#include "slamp_logger.h" +#include "slamp_shadow_mem.h" +#include "slamp_timestamp.h" + +#include "LocalWriteModule.h" +#include "HTContainer.h" + +// #define TRACK_COUNT +// #define TRACK_MIN_DISTANCE + +#define TRACK_CONTEXT + +// #define TRACK_WAW +// #define TRACK_WAR + +#ifdef TRACK_WAR +#define DM_TIMESTAMP_SIZE_IN_BYTES 16 +#define DM_TIMESTAMP_SIZE_IN_BYTES_LOG2 4 +#else +#define DM_TIMESTAMP_SIZE_IN_BYTES 8 +#define DM_TIMESTAMP_SIZE_IN_BYTES_LOG2 3 +#endif + +// #define COLLECT_TRACE + +enum class DepModAction : uint32_t{ + INIT = 0, + LOAD, + STORE, + ALLOC, + LOOP_INVOC, + LOOP_ITER, + LOOP_EXIT, + FINISHED, + FUNC_ENTRY, + FUNC_EXIT, +}; + +class DependenceModule : public LocalWriteModule { +private: + uint64_t slamp_iteration = 0; + uint64_t slamp_invocation = 0; + uint32_t target_loop_id = 0; + + // debugging stats + uint64_t load_count = 0; + uint64_t store_count = 0; + + unsigned int context = 0; + int nested_level = 0; + +#ifdef COLLECT_TRACE + // Collect trace + std::vector dep_trace; + static constexpr unsigned dep_trace_size = 10'000'000; + unsigned dep_trace_idx = 0; +#endif + + + slamp::MemoryMap *smmap = nullptr; + +#ifdef TRACK_COUNT + HTMap_Sum deps; +#else + // HTSet deps; + phmap::flat_hash_set deps; +#endif + +#ifdef TRACK_MIN_DISTANCE + HTMap_Min min_dist; +#endif + + void log(TS ts, const uint32_t dst_inst, const uint32_t bare_inst); + +public: + DependenceModule(uint32_t mask, uint32_t pattern) + : LocalWriteModule(mask, pattern) { + smmap = new slamp::MemoryMap(mask, pattern, DM_TIMESTAMP_SIZE_IN_BYTES); +#ifdef COLLECT_TRACE + dep_trace.reserve(dep_trace_size + 10); // 10M +#endif + } + + ~DependenceModule() override { delete smmap; } + + void init(uint32_t loop_id, uint32_t pid); + void fini(const char *filename); + // always_inline attribute + void load(uint32_t instr, const uint64_t addr, const uint32_t bare_instr); + void store(uint32_t instr, uint32_t bare_instr, const uint64_t addr); + void allocate(void *addr, uint64_t size); + void loop_invoc(); + void loop_iter(); + void loop_exit(); + void func_entry(uint32_t context); + void func_exit(uint32_t context); + + void merge_dep(DependenceModule &other); +}; diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/HTContainer.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/HTContainer.h new file mode 100644 index 00000000..e8165711 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/HTContainer.h @@ -0,0 +1,1446 @@ +/* + * HT (High Throughput) Containers + * Author: Ziyang Xu + * + * Use vector as buffer and use parallelism to improve performance + * This can replace set and map in STL + * + */ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#define HT +// #define ADAPTIVE_HT +#define PB + +#ifdef PB +#include "parallel_hashmap/phmap.h" +#else +#include +#endif + + +#ifdef PB +#define hash_set phmap::flat_hash_set +#define hash_map phmap::flat_hash_map +#else +#define hash_set std::unordered_set +#define hash_map std::unordered_map +#endif + +#define HT_THREAD_POOL +#define CACHELINE_SIZE 64 + + +template , + typename KeyEqual = std::equal_to, + uint32_t MAX_THREAD = 56, + uint32_t BUFFER_SIZE = 1'000'000> +class HTSet { + + public: + void Start() { + } + + private: +#ifdef HT_THREAD_POOL + bool should_terminate = false; // Tells threads to stop looking for jobs + std::mutex queue_mutex; // Prevents data races to the job queue + std::condition_variable mutex_condition; // Allows threads to wait on new jobs or termination + std::vector threads; + volatile int pending_jobs = 0; // Number of jobs that have not been completed + // avoid false sharing + + std::vector ready; + + void ThreadLoop(const int id) { + auto set_chunk = std::make_unique>(); + + const auto thread_count = MAX_THREAD; + auto job = [&]() { + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + // take the chunk and convert to a set and return + // set_chunk->reserve(set_size); + + auto begin = id * (buffer_size / thread_count); + auto end = (id + 1) * (buffer_size / thread_count); + + set_chunk->insert(buffer.begin() + begin, buffer.begin() + end); + + m.lock(); + // lock the global set and insert the chunk + set.insert(set_chunk->begin(), set_chunk->end()); + m.unlock(); + set_chunk->clear(); + }; + while (true) { + { + std::unique_lock lock(queue_mutex); + mutex_condition.wait(lock, [this, id] { return ready[id * CACHELINE_SIZE] || should_terminate; }); + // std::cout << "Thread " << id << " is running" << std::endl; + if (should_terminate) { + return; + } + } + job(); + ready[id * CACHELINE_SIZE] = false; + { + std::unique_lock lock(queue_mutex); + pending_jobs--; + } + } + } +#endif + + +private: + std::vector buffer; + std::mutex m; + using hash_set_t = hash_set; + +public: + hash_set_t set; + HTSet() { + buffer.reserve(BUFFER_SIZE); +#ifdef HT_THREAD_POOL + const uint32_t num_threads = MAX_THREAD; + threads.resize(num_threads); + + ready.resize(num_threads * CACHELINE_SIZE); + for (uint32_t i = 0; i < num_threads; i++) { + threads[i] = std::thread(&HTSet::ThreadLoop, this, i); + ready[i * CACHELINE_SIZE] = false; + // threads.at(i) = std::thread(ThreadLoop, i); + } +#endif + } + + ~HTSet() { +#ifdef HT_THREAD_POOL + should_terminate = true; + mutex_condition.notify_all(); + for (auto &thread : threads) { + thread.join(); + } +#endif + } + + bool count(const T &key) { + return set.count(key); + } + + void emplace_back(T &&t) { +#ifdef HT + buffer.emplace_back(std::move(t)); + checkBuffer(); +#else + set.emplace(std::move(t)); +#endif + } + + void emplace_back(const T &t) { +#ifdef HT + buffer.emplace_back(t); + checkBuffer(); +#else + set.emplace(t); +#endif + } + + void emplace(T &&t) { + emplace_back(t); + } + + // the same as emplace_back + void emplace(const T &t) { + emplace_back(t); + } + + // iterator begin + auto begin() { + convertVectorToSet(); + return set.begin(); + } + + // iterator end + auto end() { + convertVectorToSet(); + return set.end(); + } + + void merge(HTSet &other) { + merge(other.begin(), other.end()); + } + + // insert (begin, end) + void merge(typename hash_set_t::iterator begin, typename hash_set_t::iterator end) { + for (auto it = begin; it != end; ++it) { + set.insert(*it); + } + } + + +private: + const uint32_t getThreadCount() { +#ifdef ADAPTIVE_HT + // TODO: adaptive thread count, measure the performance benefit of this + // get current active number of threads from /proc/loadavg + std::ifstream loadavg("/proc/loadavg"); + + // get active threads count + // ignore the first three fp numbers, find the 4th int number (before "/") + std::string load; + for (int i = 0; i < 3; i++) { + loadavg >> load; + } + loadavg >> load; + loadavg.close(); + load = load.substr(0, load.find('/')); + int active_threads = std::stoi(load); + + uint32_t MAX_CORES = 56; + + int running_threads = MAX_CORES - active_threads; + // max(1, running_threads) + running_threads = running_threads > 0 ? running_threads : 1; + // min(MAX_THREAD, running_threads) + running_threads = running_threads < MAX_THREAD ? running_threads : MAX_THREAD; + + + // std::cout << "active threads: " << running_threads << std::endl; + + return running_threads; +#else + return MAX_THREAD; +#endif + } + + inline void checkBuffer() { + if (buffer.size() == BUFFER_SIZE) { + convertVectorToSet(); + buffer.resize(0); + buffer.reserve(BUFFER_SIZE); + } + } + + void convertVectorToSet() { + const uint32_t thread_count = getThreadCount(); + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + if (buffer_size == 0) { + return; + } + + if (thread_count == 1) { + set.insert(buffer.begin(), buffer.end()); + return; + } + +#ifdef HT_THREAD_POOL + pending_jobs = thread_count; + + for (uint32_t i = 0; i < thread_count; i++) { + ready[i * CACHELINE_SIZE] = true; + } + + // std::cout << "pending jobs: " << pending_jobs << std::endl; + mutex_condition.notify_all(); + + // std:: cout << "waiting for jobs to finish" << std::endl; + + // busy wait: check if all threads are done + while (true) { + // std::cout << "pending jobs: " << pending_jobs << std::endl; + if (pending_jobs == 0) { + break; + } + } +#endif + +#ifndef HT_THREAD_POOL + // launch N threads to convert the vector to set independently, chunking + std::thread t[thread_count]; + for (unsigned long i = 0; i < thread_count; i++) { + t[i] = std::thread( + [&](int id) { + // take the chunk and convert to a set and return + auto *set_chunk = new hash_set(); + set_chunk->reserve(set_size); + + auto begin = id * (buffer_size / thread_count); + auto end = (id + 1) * (buffer_size / thread_count); + + set_chunk->insert(buffer.begin() + begin, buffer.begin() + end); + + m.lock(); + // lock the global set and insert the chunk + set.insert(set_chunk->begin(), set_chunk->end()); + m.unlock(); + delete set_chunk; + }, + i); + } + // join the threads + for (auto &i : t) { + i.join(); + } +#endif + } +}; + +// HTMap_Redux, HTMap_T_Set +template , + typename KeyEqual = std::equal_to, + uint32_t MAX_THREAD = 16, + uint32_t BUFFER_SIZE = 1'000'000> +class HTMap_Sum { + using MyType = HTMap_Sum; + + public: + void Start() { + } + + private: +#ifdef HT_THREAD_POOL + bool should_terminate = false; // Tells threads to stop looking for jobs + std::mutex queue_mutex; // Prevents data races to the job queue + std::condition_variable mutex_condition; // Allows threads to wait on new jobs or termination + std::vector threads; + volatile int pending_jobs = 0; // Number of jobs that have not been completed + // avoid false sharing + + std::vector ready; + + void ThreadLoop(const int id) { + auto map_chunk = std::make_unique>(); + + const auto thread_count = MAX_THREAD; + auto job = [&]() { + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + auto begin = id * (buffer_size / thread_count); + auto end = (id + 1) * (buffer_size / thread_count); + + // for each element in the chunk, insert into the map, and increment the count + // from begin to end + for (auto it = buffer.begin() + begin; it != buffer.begin() + end; ++it) { + auto key = *it; + auto search = map_chunk->find(key); + if (search != map_chunk->end()) { + search->second++; + } else { + map_chunk->insert({key, 1}); + } + } + + m.lock(); + // lock the global set and insert the chunk + // merge the map_chunk into the global map + for (auto it = map_chunk->begin(); it != map_chunk->end(); ++it) { + auto global_it = map.find(it->first); + if (global_it == map.end()) { + map.insert({it->first, it->second}); + } else { + global_it->second += it->second; + } + } + m.unlock(); + map_chunk->clear(); + }; + while (true) { + { + std::unique_lock lock(queue_mutex); + mutex_condition.wait(lock, [this, id] { return ready[id * CACHELINE_SIZE] || should_terminate; }); + // std::cout << "Thread " << id << " is running" << std::endl; + if (should_terminate) { + return; + } + } + job(); + ready[id * CACHELINE_SIZE] = false; + { + std::unique_lock lock(queue_mutex); + pending_jobs--; + } + } + } +#endif + + +private: + std::vector buffer; + std::mutex m; + using hash_map_t = hash_map; + +public: + hash_map_t map; + HTMap_Sum() { + buffer.reserve(BUFFER_SIZE); +#ifdef HT_THREAD_POOL + const uint32_t num_threads = MAX_THREAD; + threads.resize(num_threads); + + ready.resize(num_threads * CACHELINE_SIZE); + for (uint32_t i = 0; i < num_threads; i++) { + threads[i] = std::thread(&HTMap_Sum::ThreadLoop, this, i); + ready[i * CACHELINE_SIZE] = false; + // threads.at(i) = std::thread(ThreadLoop, i); + } +#endif + } + + ~HTMap_Sum() { +#ifdef HT_THREAD_POOL + should_terminate = true; + mutex_condition.notify_all(); + for (auto &thread : threads) { + thread.join(); + } +#endif + } + + void emplace_back(T &&t) { +#ifdef HT + buffer.emplace_back(std::move(t)); + checkBuffer(); +#else + set.emplace(std::move(t)); +#endif + } + + void emplace_back(const T &t) { +#ifdef HT + buffer.emplace_back(t); + checkBuffer(); +#else + set.emplace(t); +#endif + } + + void emplace(T &&t) { + emplace_back(t); + } + + // the same as emplace_back + void emplace(const T &t) { + emplace_back(t); + } + + // iterator begin + auto begin() { + convertVectorToSet(); + return map.begin(); + } + + // iterator end + auto end() { + convertVectorToSet(); + return map.end(); + } + + // provide [] operator + auto &operator[](const T &key) { + convertVectorToSet(); + return map[key]; + } + + void merge(MyType &other) { + merge(other.begin(), other.end()); + } + + // insert (begin, end) + void merge(typename hash_map_t::iterator begin, typename hash_map_t::iterator end) { + convertVectorToSet(); + for (auto it = begin; it != end; ++it) { + auto global_it = map.find(it->first); + if (global_it == map.end()) { + map.insert({it->first, it->second}); + } else { + global_it->second += it->second; + } + } + } + + +private: + const uint32_t getThreadCount() { +#ifdef ADAPTIVE_HT + // TODO: adaptive thread count, measure the performance benefit of this + // get current active number of threads from /proc/loadavg + std::ifstream loadavg("/proc/loadavg"); + + // get active threads count + // ignore the first three fp numbers, find the 4th int number (before "/") + std::string load; + for (int i = 0; i < 3; i++) { + loadavg >> load; + } + loadavg >> load; + loadavg.close(); + load = load.substr(0, load.find('/')); + int active_threads = std::stoi(load); + + uint32_t MAX_CORES = 56; + + int running_threads = MAX_CORES - active_threads; + // max(1, running_threads) + running_threads = running_threads > 0 ? running_threads : 1; + // min(MAX_THREAD, running_threads) + running_threads = running_threads < MAX_THREAD ? running_threads : MAX_THREAD; + + + // std::cout << "active threads: " << running_threads << std::endl; + + return running_threads; +#else + return MAX_THREAD; +#endif + } + + inline void checkBuffer() { + if (buffer.size() == BUFFER_SIZE) { + convertVectorToSet(); + buffer.resize(0); + buffer.reserve(BUFFER_SIZE); + } + } + + void convertVectorToSet() { + const uint32_t thread_count = getThreadCount(); + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + if (buffer_size == 0) { + return; + } + + if (thread_count == 1) { + // merge the buffer to the map + for (auto key : buffer) { + auto it = map.find(key); + if (it == map.end()) { + map.insert({key, 1}); + } else { + it->second++; + } + } + return; + } + +#ifdef HT_THREAD_POOL + pending_jobs = thread_count; + + for (uint32_t i = 0; i < thread_count; i++) { + ready[i * CACHELINE_SIZE] = true; + } + + // std::cout << "pending jobs: " << pending_jobs << std::endl; + mutex_condition.notify_all(); + + // std:: cout << "waiting for jobs to finish" << std::endl; + + // busy wait: check if all threads are done + while (true) { + // std::cout << "pending jobs: " << pending_jobs << std::endl; + if (pending_jobs == 0) { + break; + } + } +#endif + +#ifndef HT_THREAD_POOL + static_assert(false, "HT_THREAD_POOL is not defined, invalid for map"); +#endif + } +}; + +template , + typename KeyEqual = std::equal_to, uint32_t MAX_THREAD = 16, + uint32_t BUFFER_SIZE = 1'000'000> +class HTMap_Min { + using MyType = HTMap_Min; + using TV = uint32_t; + +public: + void Start() {} + +private: +#ifdef HT_THREAD_POOL + bool should_terminate = false; // Tells threads to stop looking for jobs + bool should_gather = false; + std::mutex queue_mutex; // Prevents data races to the job queue + std::condition_variable + mutex_condition; // Allows threads to wait on new jobs or termination + std::vector threads; + volatile int pending_jobs = 0; // Number of jobs that have not been completed + // avoid false sharing + + std::vector ready; + + void ThreadLoop(const int id) { + auto map_chunk = std::make_unique(); + + const auto thread_count = MAX_THREAD; + auto job = [&]() { + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + auto begin = id * (buffer_size / thread_count); + auto end = (id + 1) * (buffer_size / thread_count); + + // for each element in the chunk, insert into the map, and take min + for (auto it_buffer = buffer.begin() + begin; it_buffer != buffer.begin() + end; ++it_buffer) { + auto &key = it_buffer->first; + auto &value = it_buffer->second; + + auto it = map_chunk->find(key); + if (it == map_chunk->end()) { + map_chunk->insert({key, value}); + } else { + // check and takes the min + if (it->second > value) { + it->second = value; + } + } + } + + if (should_gather) { + m.lock(); + // std::cout << "thread " << id << " is gathering" << std::endl; + // lock the global set and insert the chunk + // merge the map_chunk into the global map + for (auto it = map_chunk->begin(); it != map_chunk->end(); ++it) { + auto global_it = map.find(it->first); + if (global_it == map.end()) { + map.insert({it->first, it->second}); + } else { + // check and takes the min + if (global_it->second > it->second) { + global_it->second = it->second; + } + } + } + m.unlock(); + map_chunk->clear(); + } + }; + while (true) { + { + std::unique_lock lock(queue_mutex); + mutex_condition.wait(lock, [this, id] { + return ready[id * CACHELINE_SIZE] || should_terminate; + }); + // std::cout << "Thread " << id << " is running" << std::endl; + if (should_terminate) { + return; + } + } + job(); + ready[id * CACHELINE_SIZE] = false; + { + std::unique_lock lock(queue_mutex); + pending_jobs--; + } + } + } +#endif + +private: + using buffer_item_t = std::pair; + std::vector buffer; + std::mutex m; + using hash_map_t = hash_map; + +public: + hash_map_t map; + HTMap_Min() { + buffer.reserve(BUFFER_SIZE); +#ifdef HT_THREAD_POOL + const uint32_t num_threads = MAX_THREAD; + threads.resize(num_threads); + + ready.resize(num_threads * CACHELINE_SIZE); + for (uint32_t i = 0; i < num_threads; i++) { + threads[i] = std::thread(&HTMap_Min::ThreadLoop, this, i); + ready[i * CACHELINE_SIZE] = false; + // threads.at(i) = std::thread(ThreadLoop, i); + } +#endif + } + + ~HTMap_Min() { +#ifdef HT_THREAD_POOL + should_terminate = true; + mutex_condition.notify_all(); + for (auto &thread : threads) { + thread.join(); + } +#endif + } + + void emplace_back(buffer_item_t &&t) { +#ifdef HT + buffer.emplace_back(std::move(t)); + checkBuffer(); +#else + set.emplace(std::move(t)); +#endif + } + + size_t size() { + convertVectorToSet(true); + return map.size(); + } + + void emplace_back(const buffer_item_t &t) { +#ifdef HT + buffer.emplace_back(t); + checkBuffer(); +#else + set.emplace(t); +#endif + } + + void emplace(buffer_item_t &&t) { emplace_back(t); } + + // the same as emplace_back + void emplace(const buffer_item_t &t) { emplace_back(t); } + + // iterator begin + auto begin() { + convertVectorToSet(true); + return map.begin(); + } + + // iterator end + auto end() { + convertVectorToSet(true); + return map.end(); + } + + auto count(const TK &key) { + convertVectorToSet(true); + return map.count(key); + } + + // provide [] operator + auto &operator[](const TK &key) { + convertVectorToSet(true); + return map[key]; + } + + void merge(MyType &other) { + merge(other.begin(), other.end()); + } + + // insert (begin, end) + void merge(typename hash_map_t::iterator begin, typename hash_map_t::iterator end) { + convertVectorToSet(true); + for (auto it = begin; it != end; ++it) { + auto global_it = map.find(it->first); + if (global_it == map.end()) { + map.insert({it->first, it->second}); + } else { + // check and takes the min + if (global_it->second > it->second) { + global_it->second = it->second; + } + } + } + } + +private: + const uint32_t getThreadCount() { +#ifdef ADAPTIVE_HT + // TODO: adaptive thread count, measure the performance benefit of this + // get current active number of threads from /proc/loadavg + std::ifstream loadavg("/proc/loadavg"); + + // get active threads count + // ignore the first three fp numbers, find the 4th int number (before "/") + std::string load; + for (int i = 0; i < 3; i++) { + loadavg >> load; + } + loadavg >> load; + loadavg.close(); + load = load.substr(0, load.find('/')); + int active_threads = std::stoi(load); + + uint32_t MAX_CORES = 56; + + int running_threads = MAX_CORES - active_threads; + // max(1, running_threads) + running_threads = running_threads > 0 ? running_threads : 1; + // min(MAX_THREAD, running_threads) + running_threads = + running_threads < MAX_THREAD ? running_threads : MAX_THREAD; + + // std::cout << "active threads: " << running_threads << std::endl; + + return running_threads; +#else + return MAX_THREAD; +#endif + } + + inline void checkBuffer() { + if (buffer.size() == BUFFER_SIZE) { + convertVectorToSet(); + buffer.resize(0); + buffer.reserve(BUFFER_SIZE); + } + } + + void convertVectorToSet(bool gather = false) { + const uint32_t thread_count = getThreadCount(); + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + if (buffer_size == 0) { + return; + } + + if (thread_count == 1) { + // merge the buffer to the map + for (auto p: buffer) { + auto &key = p.first; + auto &value = p.second; + auto it = map.find(key); + + if (it == map.end()) { + map.insert({key, value}); + } else { + // check and takes the min + if (it->second > value) { + it->second = value; + } + } + } + return; + } + +#ifdef HT_THREAD_POOL + pending_jobs = thread_count; + + for (uint32_t i = 0; i < thread_count; i++) { + ready[i * CACHELINE_SIZE] = true; + } + + // std::cout << "pending jobs: " << pending_jobs << std::endl; + if (gather) { + should_gather = true; + } + mutex_condition.notify_all(); + + // std:: cout << "waiting for jobs to finish" << std::endl; + + // busy wait: check if all threads are done + while (true) { + // std::cout << "pending jobs: " << pending_jobs << std::endl; + if (pending_jobs == 0) { + break; + } + } + should_gather = false; +#endif + +#ifndef HT_THREAD_POOL + static_assert(false, "HT_THREAD_POOL is not defined, invalid for map"); +#endif + } +}; + +template , + typename KeyEqual = std::equal_to, uint32_t MAX_THREAD = 16, + uint32_t BUFFER_SIZE = 1'000'000> +class HTMap_IsConstant { + using MyType = HTMap_IsConstant; + using TV = uint64_t; + +public: + constexpr static TV MAGIC_UNITIALIZED = 0xdeadbeefdeadbeef; + constexpr static TV MAGIC_INVALID = 0xbeefdeadbeefdead; + void Start() {} + +private: +#ifdef HT_THREAD_POOL + bool should_terminate = false; // Tells threads to stop looking for jobs + bool should_gather = false; + std::mutex queue_mutex; // Prevents data races to the job queue + std::condition_variable + mutex_condition; // Allows threads to wait on new jobs or termination + std::vector threads; + volatile int pending_jobs = 0; // Number of jobs that have not been completed + // avoid false sharing + + std::vector ready; + + void ThreadLoop(const int id) { + auto map_chunk = std::make_unique(); + + const auto thread_count = MAX_THREAD; + auto job = [&]() { + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + auto begin = id * (buffer_size / thread_count); + auto end = (id + 1) * (buffer_size / thread_count); + + // for each element in the chunk, insert into the map, and set invalid if not same + for (auto it_buffer = buffer.begin() + begin; it_buffer != buffer.begin() + end; ++it_buffer) { + auto &key = it_buffer->first; + auto &value = it_buffer->second; + + auto it = map_chunk->find(key); + if (it == map_chunk->end()) { + map_chunk->insert({key, value}); + } else { + // check if value is the same + if (it->second != MAGIC_INVALID && it->second != value) { + it->second = MAGIC_INVALID; + } + } + } + + if (should_gather) { + m.lock(); + // std::cout << "thread " << id << " is gathering" << std::endl; + // lock the global set and insert the chunk + // merge the map_chunk into the global map + for (auto it = map_chunk->begin(); it != map_chunk->end(); ++it) { + auto global_it = map.find(it->first); + if (global_it == map.end()) { + map.insert({it->first, it->second}); + } else { + // check if value is the same + if (global_it->second != MAGIC_INVALID && global_it->second != it->second) { + global_it->second = MAGIC_INVALID; + } + } + } + m.unlock(); + map_chunk->clear(); + } + }; + while (true) { + { + std::unique_lock lock(queue_mutex); + mutex_condition.wait(lock, [this, id] { + return ready[id * CACHELINE_SIZE] || should_terminate; + }); + // std::cout << "Thread " << id << " is running" << std::endl; + if (should_terminate) { + return; + } + } + job(); + ready[id * CACHELINE_SIZE] = false; + { + std::unique_lock lock(queue_mutex); + pending_jobs--; + } + } + } +#endif + +private: + using buffer_item_t = std::pair; + std::vector buffer; + std::mutex m; + using hash_map_t = hash_map; + +public: + hash_map_t map; + HTMap_IsConstant() { + buffer.reserve(BUFFER_SIZE); +#ifdef HT_THREAD_POOL + const uint32_t num_threads = MAX_THREAD; + threads.resize(num_threads); + + ready.resize(num_threads * CACHELINE_SIZE); + for (uint32_t i = 0; i < num_threads; i++) { + threads[i] = std::thread(&HTMap_IsConstant::ThreadLoop, this, i); + ready[i * CACHELINE_SIZE] = false; + // threads.at(i) = std::thread(ThreadLoop, i); + } +#endif + } + + ~HTMap_IsConstant() { +#ifdef HT_THREAD_POOL + should_terminate = true; + mutex_condition.notify_all(); + for (auto &thread : threads) { + thread.join(); + } +#endif + } + + void emplace_back(buffer_item_t &&t) { +#ifdef HT + buffer.emplace_back(std::move(t)); + checkBuffer(); +#else + set.emplace(std::move(t)); +#endif + } + + size_t size() { + convertVectorToSet(true); + return map.size(); + } + + void emplace_back(const buffer_item_t &t) { +#ifdef HT + buffer.emplace_back(t); + checkBuffer(); +#else + set.emplace(t); +#endif + } + + void emplace(buffer_item_t &&t) { emplace_back(t); } + + // the same as emplace_back + void emplace(const buffer_item_t &t) { emplace_back(t); } + + // iterator begin + auto begin() { + convertVectorToSet(true); + return map.begin(); + } + + // iterator end + auto end() { + convertVectorToSet(true); + return map.end(); + } + + auto count(const TK &key) { + convertVectorToSet(true); + return map.count(key); + } + + // provide [] operator + auto &operator[](const TK &key) { + convertVectorToSet(true); + return map[key]; + } + + void merge(MyType &other) { + merge(other.begin(), other.end()); + } + + // insert (begin, end) + void merge(typename hash_map_t::iterator begin, typename hash_map_t::iterator end) { + convertVectorToSet(true); + for (auto it = begin; it != end; ++it) { + auto global_it = map.find(it->first); + if (global_it == map.end()) { + map.insert({it->first, it->second}); + } else { + // check if value is the same + if (global_it->second != MAGIC_INVALID && global_it->second != it->second) { + global_it->second = MAGIC_INVALID; + } + } + } + } + +private: + const uint32_t getThreadCount() { +#ifdef ADAPTIVE_HT + // TODO: adaptive thread count, measure the performance benefit of this + // get current active number of threads from /proc/loadavg + std::ifstream loadavg("/proc/loadavg"); + + // get active threads count + // ignore the first three fp numbers, find the 4th int number (before "/") + std::string load; + for (int i = 0; i < 3; i++) { + loadavg >> load; + } + loadavg >> load; + loadavg.close(); + load = load.substr(0, load.find('/')); + int active_threads = std::stoi(load); + + uint32_t MAX_CORES = 56; + + int running_threads = MAX_CORES - active_threads; + // max(1, running_threads) + running_threads = running_threads > 0 ? running_threads : 1; + // min(MAX_THREAD, running_threads) + running_threads = + running_threads < MAX_THREAD ? running_threads : MAX_THREAD; + + // std::cout << "active threads: " << running_threads << std::endl; + + return running_threads; +#else + return MAX_THREAD; +#endif + } + + inline void checkBuffer() { + if (buffer.size() == BUFFER_SIZE) { + convertVectorToSet(); + buffer.resize(0); + buffer.reserve(BUFFER_SIZE); + } + } + + void convertVectorToSet(bool gather = false) { + const uint32_t thread_count = getThreadCount(); + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + if (buffer_size == 0) { + return; + } + + if (thread_count == 1) { + // merge the buffer to the map + for (auto p: buffer) { + auto &key = p.first; + auto &value = p.second; + auto it = map.find(key); + + if (it == map.end()) { + map.insert({key, value}); + } else { + // check if value is the same + if (it->second != MAGIC_INVALID && it->second != value) { + it->second = MAGIC_INVALID; + } + } + } + return; + } + +#ifdef HT_THREAD_POOL + pending_jobs = thread_count; + + for (uint32_t i = 0; i < thread_count; i++) { + ready[i * CACHELINE_SIZE] = true; + } + + // std::cout << "pending jobs: " << pending_jobs << std::endl; + if (gather) { + should_gather = true; + } + mutex_condition.notify_all(); + + // std:: cout << "waiting for jobs to finish" << std::endl; + + // busy wait: check if all threads are done + while (true) { + // std::cout << "pending jobs: " << pending_jobs << std::endl; + if (pending_jobs == 0) { + break; + } + } + should_gather = false; +#endif + +#ifndef HT_THREAD_POOL + static_assert(false, "HT_THREAD_POOL is not defined, invalid for map"); +#endif + } +}; + +template , + typename KeyEqual = std::equal_to, uint32_t MAX_THREAD = 16, + uint32_t BUFFER_SIZE = 1'000'000> +class HTMap_Set { + using MyType = HTMap_Set; + +public: + void Start() {} + +private: +#ifdef HT_THREAD_POOL + bool should_terminate = false; // Tells threads to stop looking for jobs + bool should_gather = false; + std::mutex queue_mutex; // Prevents data races to the job queue + std::condition_variable + mutex_condition; // Allows threads to wait on new jobs or termination + std::vector threads; + volatile int pending_jobs = 0; // Number of jobs that have not been completed + // avoid false sharing + + std::vector ready; + + void ThreadLoop(const int id) { + auto map_chunk = std::make_unique(); + + const auto thread_count = MAX_THREAD; + auto job = [&]() { + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + auto begin = id * (buffer_size / thread_count); + auto end = (id + 1) * (buffer_size / thread_count); + + // for each element in the chunk, insert into the map, and set invalid if not same + for (auto it = buffer.begin() + begin; it != buffer.begin() + end; ++it) { + auto &key = it->first; + auto &value = it->second; + + auto it_chunk = map_chunk->find(key); + + if (it_chunk == map_chunk->end()) { + // insert a new set as value + hash_set set = {value}; + map_chunk->insert({key, set}); + } else { + // check if value is the same + map_chunk->at(key).insert(value); + } + } + + if (should_gather) { + m.lock(); + // lock the global set and insert the chunk + // merge the map_chunk into the global map + for (auto it = map_chunk->begin(); it != map_chunk->end(); ++it) { + auto global_it = map.find(it->first); + if (global_it == map.end()) { + map.insert({it->first, it->second}); + } else { + // check if value is the same + global_it->second.insert(it->second.begin(), it->second.end()); + } + } + m.unlock(); + map_chunk->clear(); + } + }; + while (true) { + { + std::unique_lock lock(queue_mutex); + mutex_condition.wait(lock, [this, id] { + return ready[id * CACHELINE_SIZE] || should_terminate; + }); + // std::cout << "Thread " << id << " is running" << std::endl; + if (should_terminate) { + return; + } + } + job(); + ready[id * CACHELINE_SIZE] = false; + { + std::unique_lock lock(queue_mutex); + pending_jobs--; + } + } + } +#endif + +private: + using buffer_item_t = std::pair; + std::vector buffer; + std::mutex m; + using hash_map_t = hash_map, Hash, KeyEqual>; + +public: + hash_map_t map; + HTMap_Set() { + buffer.reserve(BUFFER_SIZE); +#ifdef HT_THREAD_POOL + const uint32_t num_threads = MAX_THREAD; + threads.resize(num_threads); + + ready.resize(num_threads * CACHELINE_SIZE); + for (uint32_t i = 0; i < num_threads; i++) { + threads[i] = std::thread(&HTMap_Set::ThreadLoop, this, i); + ready[i * CACHELINE_SIZE] = false; + // threads.at(i) = std::thread(ThreadLoop, i); + } +#endif + } + + ~HTMap_Set() { +#ifdef HT_THREAD_POOL + should_terminate = true; + mutex_condition.notify_all(); + for (auto &thread : threads) { + thread.join(); + } +#endif + } + + void emplace_back(buffer_item_t &&t) { +#ifdef HT + buffer.emplace_back(std::move(t)); + checkBuffer(); +#else + set.emplace(std::move(t)); +#endif + } + + void emplace_back(const buffer_item_t &t) { +#ifdef HT + buffer.emplace_back(t); + checkBuffer(); +#else + set.emplace(t); +#endif + } + + void emplace(buffer_item_t &&t) { emplace_back(t); } + + // the same as emplace_back + void emplace(const buffer_item_t &t) { emplace_back(t); } + + // iterator begin + auto begin() { + convertVectorToSet(true); + return map.begin(); + } + + // iterator end + auto end() { + convertVectorToSet(true); + return map.end(); + } + + auto count(const TK &key) { + convertVectorToSet(true); + return map.count(key); + } + + // provide [] operator + auto &operator[](const TK &key) { + convertVectorToSet(true); + return map[key]; + } + + void merge(MyType &other) { + merge(other.begin(), other.end()); + } + + // insert (begin, end) + void merge(typename hash_map_t::iterator begin, typename hash_map_t::iterator end) { + convertVectorToSet(true); + for (auto it = begin; it != end; ++it) { + // if the key is not in the map, insert it + auto global_it = map.find(it->first); + if (global_it == map.end()) { + map.insert({it->first, it->second}); + } else { + // merge the set + global_it->second.merge(it->second); + } + } + } + +private: + const uint32_t getThreadCount() { +#ifdef ADAPTIVE_HT + // TODO: adaptive thread count, measure the performance benefit of this + // get current active number of threads from /proc/loadavg + std::ifstream loadavg("/proc/loadavg"); + + // get active threads count + // ignore the first three fp numbers, find the 4th int number (before "/") + std::string load; + for (int i = 0; i < 3; i++) { + loadavg >> load; + } + loadavg >> load; + loadavg.close(); + load = load.substr(0, load.find('/')); + int active_threads = std::stoi(load); + + uint32_t MAX_CORES = 56; + + int running_threads = MAX_CORES - active_threads; + // max(1, running_threads) + running_threads = running_threads > 0 ? running_threads : 1; + // min(MAX_THREAD, running_threads) + running_threads = + running_threads < MAX_THREAD ? running_threads : MAX_THREAD; + + // std::cout << "active threads: " << running_threads << std::endl; + + return running_threads; +#else + return MAX_THREAD; +#endif + } + + inline void checkBuffer() { + if (buffer.size() == BUFFER_SIZE) { + convertVectorToSet(); + buffer.resize(0); + buffer.reserve(BUFFER_SIZE); + } + } + + void convertVectorToSet(bool gather = false) { + const uint32_t thread_count = getThreadCount(); + const auto set_size = buffer.size() / thread_count; + const auto buffer_size = buffer.size(); + + if (buffer_size == 0) { + return; + } + + if (thread_count == 1) { + // merge the buffer to the map + for (auto p: buffer) { + auto &key = p.first; + auto &value = p.second; + + auto global_it = map.find(key); + if (global_it == map.end()) { + hash_set set = {value}; + map.insert({key, set}); + } else { + global_it->second.insert(value); + } + } + return; + } + +#ifdef HT_THREAD_POOL + pending_jobs = thread_count; + + for (uint32_t i = 0; i < thread_count; i++) { + ready[i * CACHELINE_SIZE] = true; + } + + if (gather) { + should_gather = true; + } + // std::cout << "pending jobs: " << pending_jobs << std::endl; + mutex_condition.notify_all(); + + // std:: cout << "waiting for jobs to finish" << std::endl; + + // busy wait: check if all threads are done + while (true) { + // std::cout << "pending jobs: " << pending_jobs << std::endl; + if (pending_jobs == 0) { + break; + } + } + should_gather = false; +#endif + +#ifndef HT_THREAD_POOL + static_assert(false, "HT_THREAD_POOL is not defined, invalid for map"); +#endif + } +}; diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LoadedValueModule.cpp b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LoadedValueModule.cpp new file mode 100644 index 00000000..2457224e --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LoadedValueModule.cpp @@ -0,0 +1,62 @@ +#include "LoadedValueModule.h" +#include + +void LoadedValueModule::init(uint32_t loop_id, uint32_t pid) { +} + +void LoadedValueModule::fini(const char *filename) { + std::ofstream specprivfs(filename); + specprivfs << "BEGIN SPEC PRIV PROFILE\n"; + for (auto &[key, cp] : constmap_value) { + if (cp != constmap_value.MAGIC_INVALID) { + // instr and value + auto instr = key.first; + // auto value = cp->value; + auto value = cp; + // later, we need to parse the instruction to see if it's a pointer or a + // regular integer + specprivfs << "PRED VAL " << instr << " " << value << " ; \n"; + } + } + + specprivfs << " END SPEC PRIV PROFILE\n"; + specprivfs.close(); +} + +void LoadedValueModule::load(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value, uint8_t size) { + local_write(instr, [&]() { + AccessKey key(instr, bare_instr); + constmap_value.emplace(std::make_pair(key, value)); + }); + // if (constmap_value.count(key) != 0) { + // auto cp = constmap_value[key]; + + // if (cp->valid) { + // if (cp->value != value) { + // cp->valid = false; + // } + // } + // // // Remove check for constant need to have the same address + // // if (cp->valueinit && cp->addr != addr) + // // cp->valid = false; + // // if (cp->valid) { + // // if (cp->valueinit && cp->value != value) { + // // cp->valid = false; + // // } + // // else { + // // cp->valueinit = true; + // // cp->value = value; + // // cp->addr = addr; + // // } + // // } + // } else { + // auto cp = new Constant(true, value); + // constmap_value[key] = cp; + // // auto cp = new Constant(true, true, size, addr, value); + // // constmap_value->insert(std::make_pair(key, cp)); + // } +} + +void LoadedValueModule::merge_values(LoadedValueModule &other) { + constmap_value.merge(other.constmap_value); +} diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LoadedValueModule.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LoadedValueModule.h new file mode 100644 index 00000000..4c8d7331 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LoadedValueModule.h @@ -0,0 +1,67 @@ +#pragma once +#include +#include +#include + +#include "LocalWriteModule.h" +#include "HTContainer.h" +#include "context.h" + +enum class LoadedValueModAction : uint32_t { + INIT = 0, + LOAD, + FINISHED +}; + +struct PairHash +{ + std::size_t operator () (std::pair const &v) const + { + static_assert(sizeof(size_t) == sizeof(uint64_t), "Should be 64bit address"); + // std::hash hash_fn; + // need to make sure if the pair are the same, this doesn't generate 0 + return ((uint64_t)v.first << 32) | v.second; + } +}; + +// instr, bare_instr +using AccessKey = std::pair; +struct Constant { + bool valid; + // bool valueinit; + // uint8_t size; + // uint64_t addr; + uint64_t value; + // char pad[64 - sizeof(uint64_t) - sizeof(uint64_t) - sizeof(uint8_t) - + // sizeof(bool) - sizeof(bool)]; + + Constant(bool valid, uint64_t value) : valid(valid), value(value) {} + // Constant(bool va, bool vi, uint8_t s, uint64_t a, uint64_t v) + // : valid(va), valueinit(vi), size(s), addr(a), value(v) {} +}; + + +class LoadedValueModule: public GenericLocalWriteModule { + private: + uint64_t slamp_iteration = 0; + uint64_t slamp_invocation = 0; + uint32_t target_loop_id = 0; + + bool in_loop = false; + + // std::unordered_map *constmap_value; + HTMap_IsConstant constmap_value; + + public: + LoadedValueModule(uint32_t mask, uint32_t pattern) + : GenericLocalWriteModule(mask, pattern) { + } + + ~LoadedValueModule() override = default; + + void init(uint32_t loop_id, uint32_t pid); + void fini(const char *filename); + void load(uint32_t instr, const uint64_t addr, const uint32_t bare_instr, uint64_t value, uint8_t size); + + void merge_values(LoadedValueModule &other); +}; diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LocalWriteModule.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LocalWriteModule.h new file mode 100644 index 00000000..e41febf9 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/LocalWriteModule.h @@ -0,0 +1,47 @@ +#pragma once +#include + +class LocalWriteModule { +protected: + const uint32_t LOCALWRITE_MASK{}; + const uint32_t LOCALWRITE_PATTERN{}; + // PAGE SIZE 4096 = 2^12; FIXME: here we can make it finer because we + // allocate 8 bytes of metadata per one byte of data, but changes to + // shadow memory needed + static constexpr uint32_t LOCALWRITE_SHIFT = 12; + // takes in a lambda action and uint64_t addr + template + inline void local_write(uint64_t addr, const F &action) { + if (((addr >> LOCALWRITE_SHIFT) & LOCALWRITE_MASK) == LOCALWRITE_PATTERN) { + action(); + } + } + +public: + LocalWriteModule(uint32_t mask, uint32_t pattern) + : LOCALWRITE_MASK(mask), LOCALWRITE_PATTERN(pattern) {} + virtual ~LocalWriteModule() = default; +}; + +class GenericLocalWriteModule { +protected: + const uint32_t LOCALWRITE_MASK{}; + const uint32_t LOCALWRITE_PATTERN{}; + // PAGE SIZE 4096 = 2^12; FIXME: here we can make it finer because we + // allocate 8 bytes of metadata per one byte of data, but changes to + // shadow memory needed + static constexpr uint32_t LOCALWRITE_SHIFT = 0; + // takes in a lambda action and uint64_t addr + template + inline void local_write(uint64_t value, const F &action) { + if (((value >> LOCALWRITE_SHIFT) & LOCALWRITE_MASK) == LOCALWRITE_PATTERN) { + action(); + } + } + +public: + GenericLocalWriteModule(uint32_t mask, uint32_t pattern) + : LOCALWRITE_MASK(mask), LOCALWRITE_PATTERN(pattern) {} + virtual ~GenericLocalWriteModule() = default; +}; + diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ObjectLifetimeModule.cpp b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ObjectLifetimeModule.cpp new file mode 100644 index 00000000..9444a727 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ObjectLifetimeModule.cpp @@ -0,0 +1,105 @@ +#include "ObjectLifetimeModule.h" +#include "slamp_timestamp.h" + +void ObjectLifetimeModule::allocate(void *addr, uint32_t instr, uint64_t size) { + void* shadow = smmap->allocate(addr, size); + + TS *s = (TS *)shadow; + // log all data into sigle TS + // FIXME: static instruction and the dynamic context? + // context: static instr + function + loop + auto hash = contextManager.encodeActiveContext(); + // print active context + // contextManager->activeContext->print(std::cerr); + + // currentContext->print(std::cerr); + // std::cerr << "malloc hash: " << hash << "\n"; + + // TS ts = CREATE_TS(instr, hash, __slamp_invocation); + TS ts = CREATE_TS_HASH(instr, hash, slamp_iteration, slamp_iteration); + + //8 bytes per byte TODO: can we reduce this? + for (auto i = 0; i < size; i++) + s[i] = ts; +} + +void ObjectLifetimeModule::free(void *addr) { + local_write((uint64_t)addr, [&](){ + if (addr == nullptr) + return; + // if we are still in the loop and the iteration is the same, mark it as + // local otherwise mark it as not local + TS *s = (TS *)GET_SHADOW_OL(addr, TIMESTAMP_SIZE_IN_POWER_OF_TWO); + // auto instr = GET_INSTR(s[0]); + // auto hash = GET_HASH(s[0]); + auto iteration = GET_INVOC(s[0]) & 0xF0 >> 4; + auto invocation = GET_INVOC(s[0]) & 0xF; + + auto instrAndHash = s[0] & 0xFFFFFFFFFFFFFF00; + + if (iteration == (0xf & slamp_iteration) && + invocation == (0xf & slamp_invocation) && in_loop == true) { + shortLivedObjects.emplace(instrAndHash); + } else { + // is not short-lived + longLivedObjects.emplace(instrAndHash); + } + }); +} + +// manage context +void ObjectLifetimeModule::func_entry(uint32_t fcnId) { + auto contextId = ContextId(FunctionContext, fcnId); + contextManager.pushContext(contextId); +} + +void ObjectLifetimeModule::func_exit(uint32_t fcnId) { + auto contextId = ContextId(FunctionContext, fcnId); + contextManager.popContext(contextId); +} + + +void ObjectLifetimeModule::loop_invoc() { + slamp_iteration = 0; + slamp_invocation++; + in_loop = true; +} + +void ObjectLifetimeModule::loop_iter() { slamp_iteration++; } + +void ObjectLifetimeModule::loop_exit() { + in_loop = false; +} + + +void ObjectLifetimeModule::init(uint32_t loop_id, uint32_t pid) { + target_loop_id = loop_id; +#define SIZE_8M 0x800000 + smmap->init_stack(SIZE_8M, pid); +} + +// TODO: implement dumping +void ObjectLifetimeModule::fini(const char *filename) { + // Dump compatible one as SpecPriv output + std::ofstream specprivfs(filename); + specprivfs << "BEGIN SPEC PRIV PROFILE\n"; + specprivfs << "COMPLETE ALLOCATION INFO ; \n"; + + // local objects + for (auto &obj : shortLivedObjects) { + // LOCAL OBJECT AU HEAP main if.else.i call.i4.i FROM CONTEXT { LOOP main + // for.cond15 1 WITHIN FUNCTION main WITHIN TOP } IS LOCAL TO CONTEXT { + // LOOP main for.cond15 1 WITHIN FUNCTION main WITHIN TOP } COUNT 300 ; + if (longLivedObjects.count(obj)) + continue; + + auto instr = GET_INSTR(obj); + auto hash = GET_HASH(obj); + specprivfs << "LOCAL OBJECT " << instr << " at context "; + contextManager.printContext(specprivfs, hash); + specprivfs << ";\n"; + } + + specprivfs << " END SPEC PRIV PROFILE\n"; +} + diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ObjectLifetimeModule.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ObjectLifetimeModule.h new file mode 100644 index 00000000..08cd2cbf --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/ObjectLifetimeModule.h @@ -0,0 +1,75 @@ +#include +#include +#include + +#include "slamp_logger.h" +#include "slamp_shadow_mem.h" +#include "slamp_timestamp.h" + +#include "LocalWriteModule.h" +#include "HTContainer.h" +#include "ContextManager.h" + +enum class ObjectLifetimeModAction : uint32_t { + INIT = 0, + // LOAD, + // STORE, + ALLOC, + FREE, + LOOP_INVOC, + LOOP_ITER, + LOOP_EXIT, + FUNC_ENTRY, + FUNC_EXIT, + FINISHED +}; + +// using namespace SLAMPLib; + +class ObjectLifetimeModule: public LocalWriteModule { + private: + uint64_t slamp_iteration = 0; + uint64_t slamp_invocation = 0; + slamp::MemoryMap *smmap = nullptr; + uint32_t target_loop_id = 0; + + bool in_loop = false; + + + enum SpecPrivContextType { + TopContext = 0, + FunctionContext, + LoopContext, + }; + + using SpecPrivContextManager = NewContextManager; + using ContextId = ContextId; + SpecPrivContextManager contextManager; + + HTSet, std::equal_to<>, 16> shortLivedObjects, longLivedObjects; + // std::unordered_set shortLivedObjects, longLivedObjects; + + public: + ObjectLifetimeModule(uint32_t mask, uint32_t pattern) + : LocalWriteModule(mask, pattern) { + smmap = new slamp::MemoryMap(LOCALWRITE_MASK, LOCALWRITE_PATTERN, TIMESTAMP_SIZE_IN_BYTES); + } + + ~ObjectLifetimeModule() override { + delete smmap; + } + + void init(uint32_t loop_id, uint32_t pid); + void fini(const char *filename); + + void allocate(void *addr, uint32_t instr, uint64_t size); + void free(void *addr); + + void func_entry(uint32_t fcnId); + void func_exit(uint32_t fcnId); + + void loop_invoc(); + void loop_iter(); + void loop_exit(); + +}; diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/PointsToModule.cpp b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/PointsToModule.cpp new file mode 100644 index 00000000..423870e2 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/PointsToModule.cpp @@ -0,0 +1,365 @@ +#include "PointsToModule.h" +#include "context.h" +#include "slamp_timestamp.h" +#include + +// Points-to module +// Requires events: +// - Stack alloca +// - Stack free? +// - [x] Head allocation and free +// - [x] Target loop invocation, iteration +// - [x] Loop entry, exit +// - [x] Fcn entry, exit +// - Report base +// - init, fini +#define FORMAT_INST_ARG(fcnId, argId) (fcnId << 5 | (0x1f & (argId << 4) | 0x1)) +#define FORMAT_INST_INST(instId) (instId << 1 | 0x0) + +bool in_func5 = false; + +void PointsToModule::allocate(void *addr, uint32_t instr, uint64_t size) { + // FIXME: is it leagal + void* shadow = smmap->allocate(addr, size + 1); + // void* shadow = smmap->allocate(addr, size); + + TS *s = (TS *)shadow; + // log all data into sigle TS + // FIXME: static instruction and the dynamic context? + // context: static instr + function + loop + auto hash = contextManager.encodeActiveContext(); + // print active context + // contextManager->activeContext->print(std::cerr); + + // currentContext->print(std::cerr); + // std::cerr << "malloc hash: " << hash << "\n"; + + // TS ts = CREATE_TS(instr, hash, __slamp_invocation); + TS ts = CREATE_TS_HASH(instr, hash, slamp_iteration, slamp_iteration); + + //8 bytes per byte TODO: can we reduce this? + for (auto i = 0; i < size; i++) { + // FIXME: make it more compact + local_write((uint64_t)addr + i, [&](){ + s[i] = ts; + }); + } + + // FIXME: is this legal? + // Guard the last byte out of the range + local_write((uint64_t)addr + size, [&](){ + s[size] = ts; + }); + +} + +void PointsToModule::free(void *addr) { +} + +// manage context +void PointsToModule::func_entry(uint32_t fcnId) { + if (fcnId == 5) { + in_func5 = true; + } + + auto contextId = ContextId(FunctionContext, fcnId); + contextManager.pushContext(contextId); +} + +void PointsToModule::func_exit(uint32_t fcnId) { + if (fcnId == 5) { + in_func5 = false; + } + auto contextId = ContextId(FunctionContext, fcnId); + contextManager.popContext(contextId); +} + +void PointsToModule::loop_entry(uint32_t loopId) { + auto contextId = ContextId(LoopContext, loopId); + contextManager.pushContext(contextId); + + if (loopId == target_loop_id) { + in_loop = true; + } +} + +void PointsToModule::loop_exit(uint32_t loopId) { + auto contextId = ContextId(LoopContext, loopId); + contextManager.popContext(contextId); + if (loopId == target_loop_id) { + in_loop = false; + } +} + + +void PointsToModule::loop_invoc() { + targetLoopContexts.emplace(contextManager.encodeActiveContext()); + slamp_iteration = 0; + slamp_invocation++; +} + +void PointsToModule::loop_iter() { slamp_iteration++; } + +void PointsToModule::points_to_arg(uint32_t fcnId, uint32_t argId, void *ptr) { + local_write((uint64_t)ptr, [&]() { + auto instr = FORMAT_INST_ARG(fcnId, argId); + auto contextHash = contextManager.encodeActiveContext(); + uint64_t instrAndHash = ((uint64_t)instr << 32) | contextHash; + if (ptr == nullptr) { + pointsToMap.emplace(std::make_pair(instrAndHash, 0)); + // pointsToMap[instrAndHash].(0); + // pointsToMap.emplace(std::make_pair(instrAndHash, 0)); + return; + } + if (ptr < (void *)0x1000) { + // Protect against null pointers + // If it gets dereferenced, it will be caught by segfault + return; + } + TS *s = (TS *)GET_SHADOW_PT(ptr, TIMESTAMP_SIZE_IN_POWER_OF_TWO); + TS ts; + // if (!smmap->is_allocated(ptr)) + // return; + ts = s[0]; + + if (ts != 0) { + // mask off the iteration count + ts = ts & 0xffffffffffffff00; + // ts = ts & 0xfffffffffffffff0; + // ts = ts & 0xfffff0000000000f; + // create set of objects for each load/store + pointsToMap.emplace(std::make_pair(instrAndHash, ts)); + // pointsToMap[instrAndHash].insert(ts); + // pointsToMap.emplace(std::make_pair(instrAndHash, ts)); + } + }); +} + +void PointsToModule::points_to_inst(uint32_t instId, void *ptr) { + local_write((uint64_t)ptr, [&]() { + auto instr = FORMAT_INST_INST(instId); + auto contextHash = contextManager.encodeActiveContext(); + uint64_t instrAndHash = ((uint64_t)instr << 32) | contextHash; + if (ptr == nullptr) { + pointsToMap.emplace(std::make_pair(instrAndHash, 0)); + // pointsToMap[instrAndHash].insert(0); + // pointsToMap.emplace(std::make_pair(instrAndHash, 0)); + return; + } + if (ptr < (void *)0x1000) { + // Protect against null pointers + // If it gets dereferenced, it will be caught by segfault + return; + } + TS *s = (TS *)GET_SHADOW_PT(ptr, TIMESTAMP_SIZE_IN_POWER_OF_TWO); + TS ts; + + // not all pointers are well-defined + // if (!smmap->is_allocated(ptr)) + // return; + ts = s[0]; + + if (ts != 0) { + // mask off the iteration count + ts = ts & 0xffffffffffffff00; + // ts = ts & 0xfffffffffffffff0; + // ts = ts & 0xfffff0000000000f; + // create set of objects for each load/store + pointsToMap.emplace(std::make_pair(instrAndHash, ts)); + // pointsToMap[instrAndHash].insert(ts); + // pointsToMap.emplace(std::make_pair(instrAndHash, ts)); + } + }); +} + +void PointsToModule::init(uint32_t loop_id, uint32_t pid) { + target_loop_id = loop_id; +#define SIZE_8M 0x800000 + smmap->init_stack(SIZE_8M, pid); +} + +void PointsToModule::fini(const char *filename) { + // Dump compatible one as SpecPriv output + std::ofstream specprivfs(filename); + specprivfs << "BEGIN SPEC PRIV PROFILE\n"; + specprivfs << "COMPLETE ALLOCATION INFO ; \n"; + + + // print all loop contexts + specprivfs << "LOOP CONTEXTS: " << targetLoopContexts.size() << "\n"; + for (auto contextHash : targetLoopContexts) { + contextManager.printContext(specprivfs, contextHash); + specprivfs << "\n"; + } + + // predict OBJ + // PRED OBJ main if.else.i $0 AT CONTEXT { LOOP main for.cond15 1 WITHIN + // FUNCTION main WITHIN TOP } AS PREDICTABLE 300 SAMPLES OVER 1 VALUES { ( + // OFFSET 0 BASE AU HEAP allocate_matrices for.end call7 FROM CONTEXT { + // FUNCTION allocate_matrices WITHIN FUNCTION main WITHIN TOP } COUNT 300 ) + // } ; + + // // get ordered key from the map + // std::vector keys; + // for (auto &kv : pointsToMap) { + // keys.push_back(kv.first); + // } + // std::sort(keys.begin(), keys.end()); + + // for (auto &key : keys) { + // auto v = pointsToMap[key]; + // auto instr = key >> 32; + // auto instrHash = key & 0xFFFFFFFF; + // std::vector instrContext = + // contextManager.decodeContext(instrHash); + // specprivfs << "PRED OBJ " << instr << " at " << instrHash << " "; + // printContext(instrContext); + // specprivfs << ": " << v.size() << "\n"; // instruction ID + // for (auto &it2 : v) { // the set of allocation units + // auto hash = GET_HASH(it2); + + // specprivfs << "AU "; + // if (it2 == 0xffffffffffffff00) { + // specprivfs << " UNMANAGED"; + // } else if (it2 == 0) { + // specprivfs << " NULL"; + // } else { + // std::vector context = + // contextManager.decodeContext(hash); + + // specprivfs << GET_INSTR(it2); + // specprivfs << " FROM CONTEXT " << instrHash << " "; + // printContext(context); + // } + // specprivfs << ";\n"; + // } + // } + + auto printContext = [&](std::vector context) { + for (auto it = context.rbegin(); it != context.rend(); it++) { + auto &c = *it; + c.print(specprivfs); + } + }; + + for (auto &kv : decodedContextMap) { + auto ptrAndContext = kv.first; + auto instr = ptrAndContext.first; + auto context = ptrAndContext.second; + + specprivfs << "PRED OBJ " << instr << " at "; + printContext(context); + // auto auSet = kv.second; + // specprivfs << ": " << auSet.size() << "\n"; // instruction ID + // for (auto &[au, context] : auSet) { + // specprivfs << "AU "; + // if (au == -2) { + // specprivfs << " UNMANAGED"; + // } else if (au == -1) { + // specprivfs << " NULL"; + // } else { + // specprivfs << au; + // specprivfs << " FROM CONTEXT "; + // printContext(context); + // } + // specprivfs << ";\n"; + // } + specprivfs << ": " << 1 << "\n"; // instruction ID + auto au = kv.second.first; + auto auContext = kv.second.second; + specprivfs << "AU "; + if (au == -3) { + specprivfs << " NOT CONSTANT"; + } + else if (au == -2) { + specprivfs << " UNMANAGED"; + } else if (au == -1) { + specprivfs << " NULL"; + } else { + specprivfs << au; + specprivfs << " FROM CONTEXT "; + printContext(auContext); + } + specprivfs << ";\n"; + + } + + specprivfs << " END SPEC PRIV PROFILE\n"; +} + +void PointsToModule::decode_all() { + // convert it to decodedContextMap + for (auto &it : pointsToMap) { + + auto instr = it.first >> 32; + auto instrHash = it.first & 0xFFFFFFFF; + + std::vector instrContext = + contextManager.decodeContext(instrHash); + InstrAndContext instrAndContext = {instr, instrContext}; + + SlampAllocationUnit au = it.second; + auto hash = GET_HASH(au); + + if (au == HTMap_IsConstant::MAGIC_INVALID) { + // Not constant + decodedContextMap[instrAndContext] = {-3, {}}; + } + else if (au == 0xffffffffffffff00) { + // unmanaged + decodedContextMap[instrAndContext] = {-2, {}}; + } else if (au == 0) { + // null + decodedContextMap[instrAndContext] = {-1, {}}; + } else { + std::vector context = contextManager.decodeContext(hash); + decodedContextMap[instrAndContext] = {GET_INSTR(au), context}; + } + // for (auto &it2 : it.second) { // the set of allocation units + // auto hash = GET_HASH(it2); + // if (hash == 0xfffffffff) { + // // insert -2, empty + // decodedContextMap[instrAndContext].insert({-2, {}}); + // } else if (hash == 0) { + // // insert -1, empty + // decodedContextMap[instrAndContext].insert({-1, {}}); + // } else { + // std::vector context = + // contextManager.decodeContext(hash); + // decodedContextMap[instrAndContext].insert({GET_INSTR(it2), context}); + // } + // } + + + } +} + +void PointsToModule::merge(PointsToModule &other) { + other.decode_all(); + + for (auto &it : other.decodedContextMap) { + auto instrAndContext = it.first; + if (decodedContextMap.find(instrAndContext) == decodedContextMap.end()) { + // not found, insert + decodedContextMap[instrAndContext] = it.second; + } else { + // found, if different, set to unmanaged + if (decodedContextMap[instrAndContext] != it.second) { + decodedContextMap[instrAndContext] = {-2, {}}; + } + + // for (auto &it2 : it.second) { + // decodedContextMap[instrAndContext].insert(it2); + // } + } + } + + // // FIXME: this won't work because the context map is different + // for (auto &it : other.pointsToMap) { + // if (pointsToMap.find(it.first) == pointsToMap.end()) { + // pointsToMap[it.first] = it.second; + // } else { + // pointsToMap[it.first].merge(it.second); + // } + // } +} diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/PointsToModule.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/PointsToModule.h new file mode 100644 index 00000000..02600831 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/PointsToModule.h @@ -0,0 +1,89 @@ +#include +#include +#include + +#include "ProfilingModules/parallel_hashmap/phmap.h" +#include "slamp_logger.h" +#include "slamp_shadow_mem.h" +#include "slamp_timestamp.h" + +#include "LocalWriteModule.h" +#include "HTContainer.h" +#include "ContextManager.h" + +enum class PointsToModAction : uint32_t { + INIT = 0, + // LOAD, + // STORE, + ALLOC, + FREE, + LOOP_INVOC, + LOOP_ITER, + LOOP_ENTRY, + LOOP_EXIT, + FUNC_ENTRY, + FUNC_EXIT, + POINTS_TO_INST, + POINTS_TO_ARG, + FINISHED +}; + +class PointsToModule : public LocalWriteModule { + private: + uint64_t slamp_iteration = 0; + uint64_t slamp_invocation = 0; + slamp::MemoryMap *smmap = nullptr; + uint32_t target_loop_id = 0; + + bool in_loop = false; + + enum SpecPrivContextType { + TopContext = 0, + FunctionContext, + LoopContext, + }; + using ContextHash = uint64_t; + using SpecPrivContextManager = NewContextManager; + using ContextId = ContextId; + std::unordered_set targetLoopContexts; + SpecPrivContextManager contextManager; + + using SlampAllocationUnit = TS; + // std::unordered_map> pointsToMap; + // HTMap_Set, std::equal_to<>, 32> pointsToMap; + // phmap::flat_hash_map> pointsToMap; + HTMap_IsConstant pointsToMap; + + using InstrAndContext = std::pair>; + std::map decodedContextMap; + // std::map> decodedContextMap; + + public: + PointsToModule(uint32_t mask, uint32_t pattern) + : LocalWriteModule(mask, pattern) { + smmap = new slamp::MemoryMap(mask, pattern, TIMESTAMP_SIZE_IN_BYTES); + } + + ~PointsToModule() override { + delete smmap; + } + + void init(uint32_t loop_id, uint32_t pid); + void fini(const char *filename); + + void allocate(void *addr, uint32_t instr, uint64_t size); + void free(void *addr); + void loop_invoc(); + void loop_iter(); + + void func_entry(uint32_t fcnId); + void func_exit(uint32_t fcnId); + void loop_entry(uint32_t loopId); + void loop_exit(uint32_t loopId); + + void points_to_inst(uint32_t instId, void *ptr); + + void points_to_arg(uint32_t fcnId, uint32_t argId, void *ptr); + void merge(PointsToModule &other); + void decode_all(); +}; diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/context.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/context.h new file mode 100644 index 00000000..ef67f377 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/context.h @@ -0,0 +1,233 @@ +/* The context management + * ====================== + * The definition of a context is very specific to the client. For SpecPriv, + * the dynamic context is defined as the function and loop stack. + * + * For loop-aware memory dependnece profiling, the context is the loop and the + * invocation and iteration count. + * + * The context information usually needs to be stored in the shadow memory. + * + * The context management is responsible for keeping track the current context + * and creating a fixed length representation of any context. + * + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace SLAMPLib { + +using ContextHash = size_t; + +/// The context is stack of the ContextId +template struct Context { + ContextId id; + Context *parent; + + Context(ContextId id, Context *parent) : id(std::move(id)), parent(parent) {} + + /// Add a new context to the stack + Context *chain(ContextId id) { return new Context(id, this); } + + static Context* getTopContext() { + return new Context(ContextId::getTopContextId(), nullptr); + } + + ContextHash hash() { + ContextHash h = id.hash(); + if (parent) + h ^= parent->hash(); + return h; + } + + /// Remove the top context from the stack + Context *pop() { + auto parent = this->parent; + // FIXME: is this ok to use? + delete this; + return parent; + } + using FlattenContext = std::vector; + using ContextIdType = ContextId; + + /// make the context into a vector of only the context id + FlattenContext flatten() { + std::vector result; + Context *cur = this; + while (cur) { + result.push_back(cur->id); + cur = cur->parent; + } + return result; + }; + + /// print context + void print(std::ostream &os) { + os << "Context:\n"; + Context *cur = this; + while (cur) { + cur->id.print(os); + cur = cur->parent; + } + } +}; + +/// Note that we do no need to keep track of +/// all the objects that are alive +template +struct ContextManager { + Context *activeContext; + bool contextChanged = true; + + ContextManager() { + activeContext = Context::getTopContext(); + } + + std::unordered_map contextMap; + + ContextHash activeId() { + if (activeContext) + return activeContext->hash(); + else + return 0; + } + + void updateContext(typename Context::ContextIdType contextId) { + assert(activeContext && "active context is null"); + /// maybe this is not a great idea, expose the semantics of the context to the manager + activeContext = activeContext->chain(contextId); + contextChanged = true; + // addContext(activeContext->hash(), *activeContext); + } + + void popContext(typename Context::ContextIdType contextId) { + assert(activeContext && "active context is null"); + contextChanged = true; + + if (activeContext->id == contextId) { + activeContext = activeContext->pop(); + } else { + // The context is not matching, could be due to the longjmp/setjmp etc or + // exception handling in C++ + std::cerr << "Context mismatch! Current context: "; + auto tmp = activeContext; + while (tmp->parent) { + std::cerr << "(" << tmp->id.type << "," << tmp->id.metaId << ")->"; + tmp = tmp->parent; + } + std::cerr << "(" << tmp->id.type << "," << tmp->id.metaId << ")->"; + std::cerr << "Exiting context: (" << contextId.type << "," + << contextId.metaId << ")" << std::endl; + + // Let's try to find the correct context + bool foundInStack = false; + while (activeContext->parent) { + activeContext = activeContext->pop(); + if (activeContext->id == contextId) { + foundInStack = true; + break; + } + } + + assert(foundInStack && "Could not find the exiting context in the stack"); + } + } + + void addContext(ContextHash hash, Context &context) { + + // if exist one but not the same, then we have a problem + if (contextMap.count(hash) && contextMap[hash] != context.flatten()) { + // FIXME: need to turn this back on + // assert(false && "Context hash collision"); + } + + // insert the context + contextMap[hash] = context.flatten(); + } +}; + +namespace SpecPrivLib { +enum SpecPrivContextType { + TopContext = 0, + FunctionContext, + LoopContext, +}; + + +struct ContextId { + SpecPrivContextType type; + uint32_t metaId; + ContextId(SpecPrivContextType type, uint32_t id) : type(type), metaId(id) {} + ContextId(std::pair p) + : type(p.first), metaId(p.second) {} + static ContextId getTopContextId() { return {TopContext, 0}; } + // id left shift 2 bits and or with type + ContextHash hash() { + return (static_cast(metaId) << 2) | static_cast(type); + } + + bool operator==(const ContextId &other) const { + return type == other.type && metaId == other.metaId; + } + + bool operator<(const ContextId &other) const { + return type < other.type || (type == other.type && metaId < other.metaId); + } + + void print(std::ostream &os) { + os << "ContextId: " << type << " " << metaId << "\n"; + } +}; +using SpecPrivContext = Context; + +struct SpecPrivContextManager : public ContextManager { + + std::map, ContextHash> contextIdHashMap; + size_t contextIdHashCounter = 1; + + ContextHash cachedContextHash; + + ContextHash encodeContext(SpecPrivContext context) { + std::vector flattenContext = context.flatten(); + if (contextIdHashMap.count(flattenContext)) { + return contextIdHashMap[flattenContext]; + } else { + contextIdHashMap[flattenContext] = contextIdHashCounter; + contextMap[contextIdHashCounter] = flattenContext; + + return contextIdHashCounter++; + } + } + + ContextHash encodeActiveContext() { + // std::cerr << "encodeActiveContext: "; + // activeContext->print(std::cerr); + if (!contextChanged) { + return cachedContextHash; + } else { + cachedContextHash = encodeContext(*activeContext); + contextChanged = false; + return cachedContextHash; + } + } + + std::vector decodeContext(ContextHash hash) { + if (contextMap.count(hash)) { + return contextMap[hash]; + } else { + assert(false && "Could not find the context"); + return {}; + } + } + +}; +}; // namespace SpecPrivLib + +} // namespace SLAMPLib diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/btree.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/btree.h new file mode 100644 index 00000000..8aee516a --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/btree.h @@ -0,0 +1,4087 @@ +// --------------------------------------------------------------------------- +// Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Includes work from abseil-cpp (https://github.com/abseil/abseil-cpp) +// with modifications. +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// --------------------------------------------------------------------------- + +#ifndef PHMAP_BTREE_BTREE_CONTAINER_H_ +#define PHMAP_BTREE_BTREE_CONTAINER_H_ + +#ifdef _MSC_VER + #pragma warning(push) + + #pragma warning(disable : 4127) // conditional expression is constant + #pragma warning(disable : 4324) // structure was padded due to alignment specifier + #pragma warning(disable : 4355) // 'this': used in base member initializer list + #pragma warning(disable : 4365) // conversion from 'int' to 'const unsigned __int64', signed/unsigned mismatch + #pragma warning(disable : 4514) // unreferenced inline function has been removed + #pragma warning(disable : 4623) // default constructor was implicitly defined as deleted + #pragma warning(disable : 4625) // copy constructor was implicitly defined as deleted + #pragma warning(disable : 4626) // assignment operator was implicitly defined as deleted + #pragma warning(disable : 4710) // function not inlined + #pragma warning(disable : 4711) // selected for automatic inline expansion + #pragma warning(disable : 4820) // '6' bytes padding added after data member + #pragma warning(disable : 4868) // compiler may not enforce left-to-right evaluation order in braced initializer list + #pragma warning(disable : 5026) // move constructor was implicitly defined as deleted + #pragma warning(disable : 5027) // move assignment operator was implicitly defined as deleted + #pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#endif + + +#include +#include +#include +#include +#include + +#include "phmap_fwd_decl.h" +#include "phmap_base.h" + +#if PHMAP_HAVE_STD_STRING_VIEW + #include +#endif + +// MSVC constructibility traits do not detect destructor properties and so our +// implementations should not use them as a source-of-truth. +#if defined(_MSC_VER) && !defined(__clang__) && !defined(__GNUC__) + #define PHMAP_META_INTERNAL_STD_CONSTRUCTION_TRAITS_DONT_CHECK_DESTRUCTION 1 +#endif + +namespace phmap { + + // Defined and documented later on in this file. + template + struct is_trivially_destructible; + + // Defined and documented later on in this file. + template + struct is_trivially_move_assignable; + + namespace type_traits_internal { + + // Silence MSVC warnings about the destructor being defined as deleted. +#if defined(_MSC_VER) && !defined(__GNUC__) + #pragma warning(push) + #pragma warning(disable : 4624) +#endif // defined(_MSC_VER) && !defined(__GNUC__) + + template + union SingleMemberUnion { + T t; + }; + + // Restore the state of the destructor warning that was silenced above. +#if defined(_MSC_VER) && !defined(__GNUC__) + #pragma warning(pop) +#endif // defined(_MSC_VER) && !defined(__GNUC__) + + template + struct IsTriviallyMoveConstructibleObject + : std::integral_constant< + bool, std::is_move_constructible< + type_traits_internal::SingleMemberUnion>::value && + phmap::is_trivially_destructible::value> {}; + + template + struct IsTriviallyCopyConstructibleObject + : std::integral_constant< + bool, std::is_copy_constructible< + type_traits_internal::SingleMemberUnion>::value && + phmap::is_trivially_destructible::value> {}; + + template + struct IsTriviallyMoveAssignableReference : std::false_type {}; + + template + struct IsTriviallyMoveAssignableReference + : phmap::is_trivially_move_assignable::type {}; + + template + struct IsTriviallyMoveAssignableReference + : phmap::is_trivially_move_assignable::type {}; + + } // namespace type_traits_internal + + + template + using void_t = typename type_traits_internal::VoidTImpl::type; + + + template + struct is_function + : std::integral_constant< + bool, !(std::is_reference::value || + std::is_const::type>::value)> {}; + + + namespace type_traits_internal { + + template + class is_trivially_copyable_impl { + using ExtentsRemoved = typename std::remove_all_extents::type; + static constexpr bool kIsCopyOrMoveConstructible = + std::is_copy_constructible::value || + std::is_move_constructible::value; + static constexpr bool kIsCopyOrMoveAssignable = + phmap::is_copy_assignable::value || + phmap::is_move_assignable::value; + + public: + static constexpr bool kValue = + (__has_trivial_copy(ExtentsRemoved) || !kIsCopyOrMoveConstructible) && + (__has_trivial_assign(ExtentsRemoved) || !kIsCopyOrMoveAssignable) && + (kIsCopyOrMoveConstructible || kIsCopyOrMoveAssignable) && + is_trivially_destructible::value && + // We need to check for this explicitly because otherwise we'll say + // references are trivial copyable when compiled by MSVC. + !std::is_reference::value; + }; + + template + struct is_trivially_copyable + : std::integral_constant< + bool, type_traits_internal::is_trivially_copyable_impl::kValue> {}; + } // namespace type_traits_internal + + namespace swap_internal { + + // Necessary for the traits. + using std::swap; + + // This declaration prevents global `swap` and `phmap::swap` overloads from being + // considered unless ADL picks them up. + void swap(); + + template + using IsSwappableImpl = decltype(swap(std::declval(), std::declval())); + + // NOTE: This dance with the default template parameter is for MSVC. + template (), std::declval()))>> + using IsNothrowSwappableImpl = typename std::enable_if::type; + + template + struct IsSwappable + : phmap::type_traits_internal::is_detected {}; + + template + struct IsNothrowSwappable + : phmap::type_traits_internal::is_detected {}; + + template ::value, int> = 0> + void Swap(T& lhs, T& rhs) noexcept(IsNothrowSwappable::value) { + swap(lhs, rhs); + } + + using StdSwapIsUnconstrained = IsSwappable; + + } // namespace swap_internal + + namespace type_traits_internal { + + // Make the swap-related traits/function accessible from this namespace. + using swap_internal::IsNothrowSwappable; + using swap_internal::IsSwappable; + using swap_internal::Swap; + using swap_internal::StdSwapIsUnconstrained; + + } // namespace type_traits_internal + + namespace compare_internal { + + using value_type = int8_t; + + template + struct Fail { + static_assert(sizeof(T) < 0, "Only literal `0` is allowed."); + }; + + template + struct OnlyLiteralZero { + constexpr OnlyLiteralZero(NullPtrT) noexcept {} // NOLINT + + template < + typename T, + typename = typename std::enable_if< + std::is_same::value || + (std::is_integral::value && !std::is_same::value)>::type, + typename = typename Fail::type> + OnlyLiteralZero(T); // NOLINT + }; + + enum class eq : value_type { + equal = 0, + equivalent = equal, + nonequal = 1, + nonequivalent = nonequal, + }; + + enum class ord : value_type { less = -1, greater = 1 }; + + enum class ncmp : value_type { unordered = -127 }; + +#if defined(__cpp_inline_variables) && !defined(_MSC_VER) + +#define PHMAP_COMPARE_INLINE_BASECLASS_DECL(name) + +#define PHMAP_COMPARE_INLINE_SUBCLASS_DECL(type, name) \ + static const type name; + +#define PHMAP_COMPARE_INLINE_INIT(type, name, init) \ + inline constexpr type type::name(init) + +#else // __cpp_inline_variables + +#define PHMAP_COMPARE_INLINE_BASECLASS_DECL(name) \ + static const T name; + +#define PHMAP_COMPARE_INLINE_SUBCLASS_DECL(type, name) + +#define PHMAP_COMPARE_INLINE_INIT(type, name, init) \ + template \ + const T compare_internal::type##_base::name(init) + +#endif // __cpp_inline_variables + + // These template base classes allow for defining the values of the constants + // in the header file (for performance) without using inline variables (which + // aren't available in C++11). + template + struct weak_equality_base { + PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(nonequivalent) + }; + + template + struct strong_equality_base { + PHMAP_COMPARE_INLINE_BASECLASS_DECL(equal) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(nonequal) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(nonequivalent) + }; + + template + struct partial_ordering_base { + PHMAP_COMPARE_INLINE_BASECLASS_DECL(less) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(greater) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(unordered) + }; + + template + struct weak_ordering_base { + PHMAP_COMPARE_INLINE_BASECLASS_DECL(less) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(greater) + }; + + template + struct strong_ordering_base { + PHMAP_COMPARE_INLINE_BASECLASS_DECL(less) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(equal) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent) + PHMAP_COMPARE_INLINE_BASECLASS_DECL(greater) + }; + + } // namespace compare_internal + + class weak_equality + : public compare_internal::weak_equality_base { + explicit constexpr weak_equality(compare_internal::eq v) noexcept + : value_(static_cast(v)) {} + friend struct compare_internal::weak_equality_base; + + public: + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_equality, equivalent) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_equality, nonequivalent) + + // Comparisons + friend constexpr bool operator==( + weak_equality v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ == 0; + } + friend constexpr bool operator!=( + weak_equality v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ != 0; + } + friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>, + weak_equality v) noexcept { + return 0 == v.value_; + } + friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>, + weak_equality v) noexcept { + return 0 != v.value_; + } + + private: + compare_internal::value_type value_; + }; + PHMAP_COMPARE_INLINE_INIT(weak_equality, equivalent, + compare_internal::eq::equivalent); + PHMAP_COMPARE_INLINE_INIT(weak_equality, nonequivalent, + compare_internal::eq::nonequivalent); + + class strong_equality + : public compare_internal::strong_equality_base { + explicit constexpr strong_equality(compare_internal::eq v) noexcept + : value_(static_cast(v)) {} + friend struct compare_internal::strong_equality_base; + + public: + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_equality, equal) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_equality, nonequal) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_equality, equivalent) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_equality, nonequivalent) + + // Conversion + constexpr operator weak_equality() const noexcept { // NOLINT + return value_ == 0 ? weak_equality::equivalent + : weak_equality::nonequivalent; + } + // Comparisons + friend constexpr bool operator==( + strong_equality v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ == 0; + } + friend constexpr bool operator!=( + strong_equality v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ != 0; + } + friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>, + strong_equality v) noexcept { + return 0 == v.value_; + } + friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>, + strong_equality v) noexcept { + return 0 != v.value_; + } + + private: + compare_internal::value_type value_; + }; + + PHMAP_COMPARE_INLINE_INIT(strong_equality, equal, compare_internal::eq::equal); + PHMAP_COMPARE_INLINE_INIT(strong_equality, nonequal, + compare_internal::eq::nonequal); + PHMAP_COMPARE_INLINE_INIT(strong_equality, equivalent, + compare_internal::eq::equivalent); + PHMAP_COMPARE_INLINE_INIT(strong_equality, nonequivalent, + compare_internal::eq::nonequivalent); + + class partial_ordering + : public compare_internal::partial_ordering_base { + explicit constexpr partial_ordering(compare_internal::eq v) noexcept + : value_(static_cast(v)) {} + explicit constexpr partial_ordering(compare_internal::ord v) noexcept + : value_(static_cast(v)) {} + explicit constexpr partial_ordering(compare_internal::ncmp v) noexcept + : value_(static_cast(v)) {} + friend struct compare_internal::partial_ordering_base; + + constexpr bool is_ordered() const noexcept { + return value_ != + compare_internal::value_type(compare_internal::ncmp::unordered); + } + + public: + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(partial_ordering, less) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(partial_ordering, equivalent) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(partial_ordering, greater) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(partial_ordering, unordered) + + // Conversion + constexpr operator weak_equality() const noexcept { // NOLINT + return value_ == 0 ? weak_equality::equivalent + : weak_equality::nonequivalent; + } + // Comparisons + friend constexpr bool operator==( + partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.is_ordered() && v.value_ == 0; + } + friend constexpr bool operator!=( + partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return !v.is_ordered() || v.value_ != 0; + } + friend constexpr bool operator<( + partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.is_ordered() && v.value_ < 0; + } + friend constexpr bool operator<=( + partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.is_ordered() && v.value_ <= 0; + } + friend constexpr bool operator>( + partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.is_ordered() && v.value_ > 0; + } + friend constexpr bool operator>=( + partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.is_ordered() && v.value_ >= 0; + } + friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>, + partial_ordering v) noexcept { + return v.is_ordered() && 0 == v.value_; + } + friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>, + partial_ordering v) noexcept { + return !v.is_ordered() || 0 != v.value_; + } + friend constexpr bool operator<(compare_internal::OnlyLiteralZero<>, + partial_ordering v) noexcept { + return v.is_ordered() && 0 < v.value_; + } + friend constexpr bool operator<=(compare_internal::OnlyLiteralZero<>, + partial_ordering v) noexcept { + return v.is_ordered() && 0 <= v.value_; + } + friend constexpr bool operator>(compare_internal::OnlyLiteralZero<>, + partial_ordering v) noexcept { + return v.is_ordered() && 0 > v.value_; + } + friend constexpr bool operator>=(compare_internal::OnlyLiteralZero<>, + partial_ordering v) noexcept { + return v.is_ordered() && 0 >= v.value_; + } + + private: + compare_internal::value_type value_; + }; + + PHMAP_COMPARE_INLINE_INIT(partial_ordering, less, compare_internal::ord::less); + PHMAP_COMPARE_INLINE_INIT(partial_ordering, equivalent, + compare_internal::eq::equivalent); + PHMAP_COMPARE_INLINE_INIT(partial_ordering, greater, + compare_internal::ord::greater); + PHMAP_COMPARE_INLINE_INIT(partial_ordering, unordered, + compare_internal::ncmp::unordered); + + class weak_ordering + : public compare_internal::weak_ordering_base { + explicit constexpr weak_ordering(compare_internal::eq v) noexcept + : value_(static_cast(v)) {} + explicit constexpr weak_ordering(compare_internal::ord v) noexcept + : value_(static_cast(v)) {} + friend struct compare_internal::weak_ordering_base; + + public: + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_ordering, less) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_ordering, equivalent) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_ordering, greater) + + // Conversions + constexpr operator weak_equality() const noexcept { // NOLINT + return value_ == 0 ? weak_equality::equivalent + : weak_equality::nonequivalent; + } + constexpr operator partial_ordering() const noexcept { // NOLINT + return value_ == 0 ? partial_ordering::equivalent + : (value_ < 0 ? partial_ordering::less + : partial_ordering::greater); + } + // Comparisons + friend constexpr bool operator==( + weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ == 0; + } + friend constexpr bool operator!=( + weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ != 0; + } + friend constexpr bool operator<( + weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ < 0; + } + friend constexpr bool operator<=( + weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ <= 0; + } + friend constexpr bool operator>( + weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ > 0; + } + friend constexpr bool operator>=( + weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ >= 0; + } + friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>, + weak_ordering v) noexcept { + return 0 == v.value_; + } + friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>, + weak_ordering v) noexcept { + return 0 != v.value_; + } + friend constexpr bool operator<(compare_internal::OnlyLiteralZero<>, + weak_ordering v) noexcept { + return 0 < v.value_; + } + friend constexpr bool operator<=(compare_internal::OnlyLiteralZero<>, + weak_ordering v) noexcept { + return 0 <= v.value_; + } + friend constexpr bool operator>(compare_internal::OnlyLiteralZero<>, + weak_ordering v) noexcept { + return 0 > v.value_; + } + friend constexpr bool operator>=(compare_internal::OnlyLiteralZero<>, + weak_ordering v) noexcept { + return 0 >= v.value_; + } + + private: + compare_internal::value_type value_; + }; + + PHMAP_COMPARE_INLINE_INIT(weak_ordering, less, compare_internal::ord::less); + PHMAP_COMPARE_INLINE_INIT(weak_ordering, equivalent, + compare_internal::eq::equivalent); + PHMAP_COMPARE_INLINE_INIT(weak_ordering, greater, + compare_internal::ord::greater); + + class strong_ordering + : public compare_internal::strong_ordering_base { + explicit constexpr strong_ordering(compare_internal::eq v) noexcept + : value_(static_cast(v)) {} + explicit constexpr strong_ordering(compare_internal::ord v) noexcept + : value_(static_cast(v)) {} + friend struct compare_internal::strong_ordering_base; + + public: + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_ordering, less) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_ordering, equal) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_ordering, equivalent) + PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_ordering, greater) + + // Conversions + constexpr operator weak_equality() const noexcept { // NOLINT + return value_ == 0 ? weak_equality::equivalent + : weak_equality::nonequivalent; + } + constexpr operator strong_equality() const noexcept { // NOLINT + return value_ == 0 ? strong_equality::equal : strong_equality::nonequal; + } + constexpr operator partial_ordering() const noexcept { // NOLINT + return value_ == 0 ? partial_ordering::equivalent + : (value_ < 0 ? partial_ordering::less + : partial_ordering::greater); + } + constexpr operator weak_ordering() const noexcept { // NOLINT + return value_ == 0 + ? weak_ordering::equivalent + : (value_ < 0 ? weak_ordering::less : weak_ordering::greater); + } + // Comparisons + friend constexpr bool operator==( + strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ == 0; + } + friend constexpr bool operator!=( + strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ != 0; + } + friend constexpr bool operator<( + strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ < 0; + } + friend constexpr bool operator<=( + strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ <= 0; + } + friend constexpr bool operator>( + strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ > 0; + } + friend constexpr bool operator>=( + strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept { + return v.value_ >= 0; + } + friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>, + strong_ordering v) noexcept { + return 0 == v.value_; + } + friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>, + strong_ordering v) noexcept { + return 0 != v.value_; + } + friend constexpr bool operator<(compare_internal::OnlyLiteralZero<>, + strong_ordering v) noexcept { + return 0 < v.value_; + } + friend constexpr bool operator<=(compare_internal::OnlyLiteralZero<>, + strong_ordering v) noexcept { + return 0 <= v.value_; + } + friend constexpr bool operator>(compare_internal::OnlyLiteralZero<>, + strong_ordering v) noexcept { + return 0 > v.value_; + } + friend constexpr bool operator>=(compare_internal::OnlyLiteralZero<>, + strong_ordering v) noexcept { + return 0 >= v.value_; + } + + private: + compare_internal::value_type value_; + }; + PHMAP_COMPARE_INLINE_INIT(strong_ordering, less, compare_internal::ord::less); + PHMAP_COMPARE_INLINE_INIT(strong_ordering, equal, compare_internal::eq::equal); + PHMAP_COMPARE_INLINE_INIT(strong_ordering, equivalent, + compare_internal::eq::equivalent); + PHMAP_COMPARE_INLINE_INIT(strong_ordering, greater, + compare_internal::ord::greater); + +#undef PHMAP_COMPARE_INLINE_BASECLASS_DECL +#undef PHMAP_COMPARE_INLINE_SUBCLASS_DECL +#undef PHMAP_COMPARE_INLINE_INIT + + namespace compare_internal { + // We also provide these comparator adapter functions for internal phmap use. + + // Helper functions to do a boolean comparison of two keys given a boolean + // or three-way comparator. + // SFINAE prevents implicit conversions to bool (such as from int). + template ::value, int> = 0> + constexpr bool compare_result_as_less_than(const BoolType r) { return r; } + constexpr bool compare_result_as_less_than(const phmap::weak_ordering r) { + return r < 0; + } + + template + constexpr bool do_less_than_comparison(const Compare &compare, const K &x, + const LK &y) { + return compare_result_as_less_than(compare(x, y)); + } + + // Helper functions to do a three-way comparison of two keys given a boolean or + // three-way comparator. + // SFINAE prevents implicit conversions to int (such as from bool). + template ::value, int> = 0> + constexpr phmap::weak_ordering compare_result_as_ordering(const Int c) { + return c < 0 ? phmap::weak_ordering::less + : c == 0 ? phmap::weak_ordering::equivalent + : phmap::weak_ordering::greater; + } + constexpr phmap::weak_ordering compare_result_as_ordering( + const phmap::weak_ordering c) { + return c; + } + + template < + typename Compare, typename K, typename LK, + phmap::enable_if_t>::value, + int> = 0> + constexpr phmap::weak_ordering do_three_way_comparison(const Compare &compare, + const K &x, const LK &y) { + return compare_result_as_ordering(compare(x, y)); + } + template < + typename Compare, typename K, typename LK, + phmap::enable_if_t>::value, + int> = 0> + constexpr phmap::weak_ordering do_three_way_comparison(const Compare &compare, + const K &x, const LK &y) { + return compare(x, y) ? phmap::weak_ordering::less + : compare(y, x) ? phmap::weak_ordering::greater + : phmap::weak_ordering::equivalent; + } + + } // namespace compare_internal +} + + +namespace phmap { + +namespace priv { + + // A helper class that indicates if the Compare parameter is a key-compare-to + // comparator. + template + using btree_is_key_compare_to = + std::is_convertible, + phmap::weak_ordering>; + + struct StringBtreeDefaultLess { + using is_transparent = void; + + StringBtreeDefaultLess() = default; + + // Compatibility constructor. + StringBtreeDefaultLess(std::less) {} // NOLINT +#if PHMAP_HAVE_STD_STRING_VIEW + StringBtreeDefaultLess(std::less) {} // NOLINT + StringBtreeDefaultLess(phmap::Less) {} // NOLINT + + phmap::weak_ordering operator()(std::string_view lhs, + std::string_view rhs) const { + return compare_internal::compare_result_as_ordering(lhs.compare(rhs)); + } +#else + phmap::weak_ordering operator()(std::string lhs, + std::string rhs) const { + return compare_internal::compare_result_as_ordering(lhs.compare(rhs)); + } +#endif + }; + + struct StringBtreeDefaultGreater { + using is_transparent = void; + + StringBtreeDefaultGreater() = default; + + StringBtreeDefaultGreater(std::greater) {} // NOLINT +#if PHMAP_HAVE_STD_STRING_VIEW + StringBtreeDefaultGreater(std::greater) {} // NOLINT + + phmap::weak_ordering operator()(std::string_view lhs, + std::string_view rhs) const { + return compare_internal::compare_result_as_ordering(rhs.compare(lhs)); + } +#else + phmap::weak_ordering operator()(std::string lhs, + std::string rhs) const { + return compare_internal::compare_result_as_ordering(rhs.compare(lhs)); + } +#endif + }; + + // A helper class to convert a boolean comparison into a three-way "compare-to" + // comparison that returns a negative value to indicate less-than, zero to + // indicate equality and a positive value to indicate greater-than. This helper + // class is specialized for less, greater, + // less, and greater. + // + // key_compare_to_adapter is provided so that btree users + // automatically get the more efficient compare-to code when using common + // google string types with common comparison functors. + // These string-like specializations also turn on heterogeneous lookup by + // default. + template + struct key_compare_to_adapter { + using type = Compare; + }; + + template <> + struct key_compare_to_adapter> { + using type = StringBtreeDefaultLess; + }; + + template <> + struct key_compare_to_adapter> { + using type = StringBtreeDefaultLess; + }; + + template <> + struct key_compare_to_adapter> { + using type = StringBtreeDefaultGreater; + }; + +#if PHMAP_HAVE_STD_STRING_VIEW + template <> + struct key_compare_to_adapter> { + using type = StringBtreeDefaultLess; + }; + + template <> + struct key_compare_to_adapter> { + using type = StringBtreeDefaultLess; + }; + + template <> + struct key_compare_to_adapter> { + using type = StringBtreeDefaultGreater; + }; +#endif + + template + struct common_params { + // If Compare is a common comparator for a std::string-like type, then we adapt it + // to use heterogeneous lookup and to be a key-compare-to comparator. + using key_compare = typename key_compare_to_adapter::type; + // A type which indicates if we have a key-compare-to functor or a plain old + // key-compare functor. + using is_key_compare_to = btree_is_key_compare_to; + + using allocator_type = Alloc; + using key_type = Key; + using size_type = std::size_t ; + using difference_type = ptrdiff_t; + + // True if this is a multiset or multimap. + using is_multi_container = std::integral_constant; + + using slot_policy = SlotPolicy; + using slot_type = typename slot_policy::slot_type; + using value_type = typename slot_policy::value_type; + using init_type = typename slot_policy::mutable_value_type; + using pointer = value_type *; + using const_pointer = const value_type *; + using reference = value_type &; + using const_reference = const value_type &; + + enum { + kTargetNodeSize = TargetNodeSize, + + // Upper bound for the available space for values. This is largest for leaf + // nodes, which have overhead of at least a pointer + 4 bytes (for storing + // 3 field_types and an enum). + kNodeSlotSpace = + TargetNodeSize - /*minimum overhead=*/(sizeof(void *) + 4), + }; + + // This is an integral type large enough to hold as many + // ValueSize-values as will fit a node of TargetNodeSize bytes. + using node_count_type = + phmap::conditional_t<(kNodeSlotSpace / sizeof(slot_type) > + (std::numeric_limits::max)()), + uint16_t, uint8_t>; // NOLINT + + // The following methods are necessary for passing this struct as PolicyTraits + // for node_handle and/or are used within btree. + static value_type &element(slot_type *slot) { + return slot_policy::element(slot); + } + static const value_type &element(const slot_type *slot) { + return slot_policy::element(slot); + } + template + static void construct(Alloc *alloc, slot_type *slot, Args &&... args) { + slot_policy::construct(alloc, slot, std::forward(args)...); + } + static void construct(Alloc *alloc, slot_type *slot, slot_type *other) { + slot_policy::construct(alloc, slot, other); + } + static void destroy(Alloc *alloc, slot_type *slot) { + slot_policy::destroy(alloc, slot); + } + static void transfer(Alloc *alloc, slot_type *new_slot, slot_type *old_slot) { + construct(alloc, new_slot, old_slot); + destroy(alloc, old_slot); + } + static void swap(Alloc *alloc, slot_type *a, slot_type *b) { + slot_policy::swap(alloc, a, b); + } + static void move(Alloc *alloc, slot_type *src, slot_type *dest) { + slot_policy::move(alloc, src, dest); + } + static void move(Alloc *alloc, slot_type *first, slot_type *last, + slot_type *result) { + slot_policy::move(alloc, first, last, result); + } + }; + + // A parameters structure for holding the type parameters for a btree_map. + // Compare and Alloc should be nothrow copy-constructible. + template + struct map_params : common_params> { + using super_type = typename map_params::common_params; + using mapped_type = Data; + // This type allows us to move keys when it is safe to do so. It is safe + // for maps in which value_type and mutable_value_type are layout compatible. + using slot_policy = typename super_type::slot_policy; + using slot_type = typename super_type::slot_type; + using value_type = typename super_type::value_type; + using init_type = typename super_type::init_type; + + using key_compare = typename super_type::key_compare; + // Inherit from key_compare for empty base class optimization. + struct value_compare : private key_compare { + value_compare() = default; + explicit value_compare(const key_compare &cmp) : key_compare(cmp) {} + + template + auto operator()(const T &left, const U &right) const + -> decltype(std::declval()(left.first, right.first)) { + return key_compare::operator()(left.first, right.first); + } + }; + using is_map_container = std::true_type; + + static const Key &key(const value_type &x) { return x.first; } + static const Key &key(const init_type &x) { return x.first; } + static const Key &key(const slot_type *x) { return slot_policy::key(x); } + static mapped_type &value(value_type *value) { return value->second; } + }; + + // This type implements the necessary functions from the + // btree::priv::slot_type interface. + template + struct set_slot_policy { + using slot_type = Key; + using value_type = Key; + using mutable_value_type = Key; + + static value_type &element(slot_type *slot) { return *slot; } + static const value_type &element(const slot_type *slot) { return *slot; } + + template + static void construct(Alloc *alloc, slot_type *slot, Args &&... args) { + phmap::allocator_traits::construct(*alloc, slot, + std::forward(args)...); + } + + template + static void construct(Alloc *alloc, slot_type *slot, slot_type *other) { + phmap::allocator_traits::construct(*alloc, slot, std::move(*other)); + } + + template + static void destroy(Alloc *alloc, slot_type *slot) { + phmap::allocator_traits::destroy(*alloc, slot); + } + + template + static void swap(Alloc * /*alloc*/, slot_type *a, slot_type *b) { + using std::swap; + swap(*a, *b); + } + + template + static void move(Alloc * /*alloc*/, slot_type *src, slot_type *dest) { + *dest = std::move(*src); + } + + template + static void move(Alloc *alloc, slot_type *first, slot_type *last, + slot_type *result) { + for (slot_type *src = first, *dest = result; src != last; ++src, ++dest) + move(alloc, src, dest); + } + }; + + // A parameters structure for holding the type parameters for a btree_set. + // Compare and Alloc should be nothrow copy-constructible. + template + struct set_params : common_params> { + using value_type = Key; + using slot_type = typename set_params::common_params::slot_type; + using value_compare = typename set_params::common_params::key_compare; + using is_map_container = std::false_type; + + static const Key &key(const value_type &x) { return x; } + static const Key &key(const slot_type *x) { return *x; } + }; + + // An adapter class that converts a lower-bound compare into an upper-bound + // compare. Note: there is no need to make a version of this adapter specialized + // for key-compare-to functors because the upper-bound (the first value greater + // than the input) is never an exact match. + template + struct upper_bound_adapter { + explicit upper_bound_adapter(const Compare &c) : comp(c) {} + template + bool operator()(const K &a, const LK &b) const { + // Returns true when a is not greater than b. + return !phmap::compare_internal::compare_result_as_less_than(comp(b, a)); + } + + private: + Compare comp; + }; + + enum class MatchKind : uint8_t { kEq, kNe }; + + template + struct SearchResult { + V value; + MatchKind match; + + static constexpr bool HasMatch() { return true; } + bool IsEq() const { return match == MatchKind::kEq; } + }; + + // When we don't use CompareTo, `match` is not present. + // This ensures that callers can't use it accidentally when it provides no + // useful information. + template + struct SearchResult { + V value; + + static constexpr bool HasMatch() { return false; } + static constexpr bool IsEq() { return false; } + }; + + // A node in the btree holding. The same node type is used for both internal + // and leaf nodes in the btree, though the nodes are allocated in such a way + // that the children array is only valid in internal nodes. + template + class btree_node { + using is_key_compare_to = typename Params::is_key_compare_to; + using is_multi_container = typename Params::is_multi_container; + using field_type = typename Params::node_count_type; + using allocator_type = typename Params::allocator_type; + using slot_type = typename Params::slot_type; + + public: + using params_type = Params; + using key_type = typename Params::key_type; + using value_type = typename Params::value_type; + using pointer = typename Params::pointer; + using const_pointer = typename Params::const_pointer; + using reference = typename Params::reference; + using const_reference = typename Params::const_reference; + using key_compare = typename Params::key_compare; + using size_type = typename Params::size_type; + using difference_type = typename Params::difference_type; + + // Btree decides whether to use linear node search as follows: + // - If the key is arithmetic and the comparator is std::less or + // std::greater, choose linear. + // - Otherwise, choose binary. + // TODO(ezb): Might make sense to add condition(s) based on node-size. + using use_linear_search = std::integral_constant< + bool, + std::is_arithmetic::value && + (std::is_same, key_compare>::value || + std::is_same, key_compare>::value || + std::is_same, key_compare>::value)>; + + + ~btree_node() = default; + btree_node(btree_node const &) = delete; + btree_node &operator=(btree_node const &) = delete; + + // Public for EmptyNodeType. + constexpr static size_type Alignment() { + static_assert(LeafLayout(1).Alignment() == InternalLayout().Alignment(), + "Alignment of all nodes must be equal."); + return (size_type)InternalLayout().Alignment(); + } + + protected: + btree_node() = default; + + private: + using layout_type = phmap::priv::Layout; + constexpr static size_type SizeWithNValues(size_type n) { + return (size_type)layout_type(/*parent*/ 1, + /*position, start, count, max_count*/ 4, + /*values*/ (size_t)n, + /*children*/ 0) + .AllocSize(); + } + // A lower bound for the overhead of fields other than values in a leaf node. + constexpr static size_type MinimumOverhead() { + return (size_type)(SizeWithNValues(1) - sizeof(value_type)); + } + + // Compute how many values we can fit onto a leaf node taking into account + // padding. + constexpr static size_type NodeTargetValues(const int begin, const int end) { + return begin == end ? begin + : SizeWithNValues((begin + end) / 2 + 1) > + params_type::kTargetNodeSize + ? NodeTargetValues(begin, (begin + end) / 2) + : NodeTargetValues((begin + end) / 2 + 1, end); + } + + enum { + kTargetNodeSize = params_type::kTargetNodeSize, + kNodeTargetValues = NodeTargetValues(0, params_type::kTargetNodeSize), + + // We need a minimum of 3 values per internal node in order to perform + // splitting (1 value for the two nodes involved in the split and 1 value + // propagated to the parent as the delimiter for the split). + kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3, + + // The node is internal (i.e. is not a leaf node) if and only if `max_count` + // has this value. + kInternalNodeMaxCount = 0, + }; + + // Leaves can have less than kNodeValues values. + constexpr static layout_type LeafLayout(const int max_values = kNodeValues) { + return layout_type(/*parent*/ 1, + /*position, start, count, max_count*/ 4, + /*values*/ (size_t)max_values, + /*children*/ 0); + } + constexpr static layout_type InternalLayout() { + return layout_type(/*parent*/ 1, + /*position, start, count, max_count*/ 4, + /*values*/ kNodeValues, + /*children*/ kNodeValues + 1); + } + constexpr static size_type LeafSize(const int max_values = kNodeValues) { + return (size_type)LeafLayout(max_values).AllocSize(); + } + constexpr static size_type InternalSize() { + return (size_type)InternalLayout().AllocSize(); + } + + // N is the index of the type in the Layout definition. + // ElementType is the Nth type in the Layout definition. + template + inline typename layout_type::template ElementType *GetField() { + // We assert that we don't read from values that aren't there. + assert(N < 3 || !leaf()); + return InternalLayout().template Pointer(reinterpret_cast(this)); + } + + template + inline const typename layout_type::template ElementType *GetField() const { + assert(N < 3 || !leaf()); + return InternalLayout().template Pointer( + reinterpret_cast(this)); + } + + void set_parent(btree_node *p) { *GetField<0>() = p; } + field_type &mutable_count() { return GetField<1>()[2]; } + slot_type *slot(size_type i) { return &GetField<2>()[i]; } + const slot_type *slot(size_type i) const { return &GetField<2>()[i]; } + void set_position(field_type v) { GetField<1>()[0] = v; } + void set_start(field_type v) { GetField<1>()[1] = v; } + void set_count(field_type v) { GetField<1>()[2] = v; } + void set_max_count(field_type v) { GetField<1>()[3] = v; } + + public: + // Whether this is a leaf node or not. This value doesn't change after the + // node is created. + bool leaf() const { return GetField<1>()[3] != kInternalNodeMaxCount; } + + // Getter for the position of this node in its parent. + field_type position() const { return GetField<1>()[0]; } + + // Getter for the offset of the first value in the `values` array. + field_type start() const { return GetField<1>()[1]; } + + // Getters for the number of values stored in this node. + field_type count() const { return GetField<1>()[2]; } + field_type max_count() const { + // Internal nodes have max_count==kInternalNodeMaxCount. + // Leaf nodes have max_count in [1, kNodeValues]. + const field_type max_cnt = GetField<1>()[3]; + return max_cnt == field_type{kInternalNodeMaxCount} + ? field_type{kNodeValues} + : max_cnt; + } + + // Getter for the parent of this node. + btree_node *parent() const { return *GetField<0>(); } + // Getter for whether the node is the root of the tree. The parent of the + // root of the tree is the leftmost node in the tree which is guaranteed to + // be a leaf. + bool is_root() const { return parent()->leaf(); } + void make_root() { + assert(parent()->is_root()); + set_parent(parent()->parent()); + } + + // Getters for the key/value at position i in the node. + const key_type &key(size_type i) const { return params_type::key(slot(i)); } + reference value(size_type i) { return params_type::element(slot(i)); } + const_reference value(size_type i) const { return params_type::element(slot(i)); } + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif + // Getters/setter for the child at position i in the node. + btree_node *child(size_type i) const { return GetField<3>()[i]; } + btree_node *&mutable_child(size_type i) { return GetField<3>()[i]; } + void clear_child(size_type i) { + phmap::priv::SanitizerPoisonObject(&mutable_child(i)); + } + void set_child(size_type i, btree_node *c) { + phmap::priv::SanitizerUnpoisonObject(&mutable_child(i)); + mutable_child(i) = c; + c->set_position((field_type)i); + } +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + void init_child(int i, btree_node *c) { + set_child(i, c); + c->set_parent(this); + } + + // Returns the position of the first value whose key is not less than k. + template + SearchResult lower_bound( + const K &k, const key_compare &comp) const { + return use_linear_search::value ? linear_search(k, comp) + : binary_search(k, comp); + } + // Returns the position of the first value whose key is greater than k. + template + int upper_bound(const K &k, const key_compare &comp) const { + auto upper_compare = upper_bound_adapter(comp); + return use_linear_search::value ? linear_search(k, upper_compare).value + : binary_search(k, upper_compare).value; + } + + template + SearchResult::value> + linear_search(const K &k, const Compare &comp) const { + return linear_search_impl(k, 0, count(), comp, + btree_is_key_compare_to()); + } + + template + SearchResult::value> + binary_search(const K &k, const Compare &comp) const { + return binary_search_impl(k, 0, count(), comp, + btree_is_key_compare_to()); + } + + // Returns the position of the first value whose key is not less than k using + // linear search performed using plain compare. + template + SearchResult linear_search_impl( + const K &k, int s, const int e, const Compare &comp, + std::false_type /* IsCompareTo */) const { + while (s < e) { + if (!comp(key(s), k)) { + break; + } + ++s; + } + return {s}; + } + + // Returns the position of the first value whose key is not less than k using + // linear search performed using compare-to. + template + SearchResult linear_search_impl( + const K &k, int s, const int e, const Compare &comp, + std::true_type /* IsCompareTo */) const { + while (s < e) { + const phmap::weak_ordering c = comp(key(s), k); + if (c == 0) { + return {s, MatchKind::kEq}; + } else if (c > 0) { + break; + } + ++s; + } + return {s, MatchKind::kNe}; + } + + // Returns the position of the first value whose key is not less than k using + // binary search performed using plain compare. + template + SearchResult binary_search_impl( + const K &k, int s, int e, const Compare &comp, + std::false_type /* IsCompareTo */) const { + while (s != e) { + const int mid = (s + e) >> 1; + if (comp(key(mid), k)) { + s = mid + 1; + } else { + e = mid; + } + } + return {s}; + } + + // Returns the position of the first value whose key is not less than k using + // binary search performed using compare-to. + template + SearchResult binary_search_impl( + const K &k, int s, int e, const CompareTo &comp, + std::true_type /* IsCompareTo */) const { + if (is_multi_container::value) { + MatchKind exact_match = MatchKind::kNe; + while (s != e) { + const int mid = (s + e) >> 1; + const phmap::weak_ordering c = comp(key(mid), k); + if (c < 0) { + s = mid + 1; + } else { + e = mid; + if (c == 0) { + // Need to return the first value whose key is not less than k, + // which requires continuing the binary search if this is a + // multi-container. + exact_match = MatchKind::kEq; + } + } + } + return {s, exact_match}; + } else { // Not a multi-container. + while (s != e) { + const int mid = (s + e) >> 1; + const phmap::weak_ordering c = comp(key(mid), k); + if (c < 0) { + s = mid + 1; + } else if (c > 0) { + e = mid; + } else { + return {mid, MatchKind::kEq}; + } + } + return {s, MatchKind::kNe}; + } + } + + // Emplaces a value at position i, shifting all existing values and + // children at positions >= i to the right by 1. + template + void emplace_value(size_type i, allocator_type *alloc, Args &&... args); + + // Removes the value at position i, shifting all existing values and children + // at positions > i to the left by 1. + void remove_value(int i, allocator_type *alloc); + + // Removes the values at positions [i, i + to_erase), shifting all values + // after that range to the left by to_erase. Does not change children at all. + void remove_values_ignore_children(int i, size_type to_erase, + allocator_type *alloc); + + // Rebalances a node with its right sibling. + void rebalance_right_to_left(int to_move, btree_node *right, + allocator_type *alloc); + void rebalance_left_to_right(int to_move, btree_node *right, + allocator_type *alloc); + + // Splits a node, moving a portion of the node's values to its right sibling. + void split(int insert_position, btree_node *dest, allocator_type *alloc); + + // Merges a node with its right sibling, moving all of the values and the + // delimiting key in the parent node onto itself. + void merge(btree_node *sibling, allocator_type *alloc); + + // Swap the contents of "this" and "src". + void swap(btree_node *src, allocator_type *alloc); + + // Node allocation/deletion routines. + static btree_node *init_leaf(btree_node *n, btree_node *parent, + int max_cnt) { + n->set_parent(parent); + n->set_position(0); + n->set_start(0); + n->set_count(0); + n->set_max_count((field_type)max_cnt); + phmap::priv::SanitizerPoisonMemoryRegion( + n->slot(0), max_cnt * sizeof(slot_type)); + return n; + } + static btree_node *init_internal(btree_node *n, btree_node *parent) { + init_leaf(n, parent, kNodeValues); + // Set `max_count` to a sentinel value to indicate that this node is + // internal. + n->set_max_count(kInternalNodeMaxCount); + phmap::priv::SanitizerPoisonMemoryRegion( + &n->mutable_child(0), (kNodeValues + 1) * sizeof(btree_node *)); + return n; + } + void destroy(allocator_type *alloc) { + for (int i = 0; i < count(); ++i) { + value_destroy(i, alloc); + } + } + + public: + // Exposed only for tests. + static bool testonly_uses_linear_node_search() { + return use_linear_search::value; + } + + private: + template + void value_init(const size_type i, allocator_type *alloc, Args &&... args) { + phmap::priv::SanitizerUnpoisonObject(slot(i)); + params_type::construct(alloc, slot(i), std::forward(args)...); + } + void value_destroy(const size_type i, allocator_type *alloc) { + params_type::destroy(alloc, slot(i)); + phmap::priv::SanitizerPoisonObject(slot(i)); + } + + // Move n values starting at value i in this node into the values starting at + // value j in node x. + void uninitialized_move_n(const size_type n, const size_type i, + const size_type j, btree_node *x, + allocator_type *alloc) { + phmap::priv::SanitizerUnpoisonMemoryRegion( + x->slot(j), n * sizeof(slot_type)); + for (slot_type *src = slot(i), *end = src + n, *dest = x->slot(j); + src != end; ++src, ++dest) { + params_type::construct(alloc, dest, src); + } + } + + // Destroys a range of n values, starting at index i. + void value_destroy_n(const size_type i, const size_type n, + allocator_type *alloc) { + for (size_type j = 0; j < n; ++j) { + value_destroy(i + j, alloc); + } + } + + template + friend class btree; + template + friend struct btree_iterator; + friend class BtreeNodePeer; + }; + + template + struct btree_iterator { + private: + using key_type = typename Node::key_type; + using size_type = typename Node::size_type; + using params_type = typename Node::params_type; + + using node_type = Node; + using normal_node = typename std::remove_const::type; + using const_node = const Node; + using normal_pointer = typename params_type::pointer; + using normal_reference = typename params_type::reference; + using const_pointer = typename params_type::const_pointer; + using const_reference = typename params_type::const_reference; + using slot_type = typename params_type::slot_type; + + using iterator = + btree_iterator; + using const_iterator = + btree_iterator; + + public: + // These aliases are public for std::iterator_traits. + using difference_type = typename Node::difference_type; + using value_type = typename params_type::value_type; + using pointer = Pointer; + using reference = Reference; + using iterator_category = std::bidirectional_iterator_tag; + + btree_iterator() : node(nullptr), position(-1) {} + btree_iterator(Node *n, int p) : node(n), position(p) {} + + // NOTE: this SFINAE allows for implicit conversions from iterator to + // const_iterator, but it specifically avoids defining copy constructors so + // that btree_iterator can be trivially copyable. This is for performance and + // binary size reasons. + template , iterator>::value && + std::is_same::value, + int> = 0> + btree_iterator(const btree_iterator &x) // NOLINT + : node(x.node), position(x.position) {} + + private: + // This SFINAE allows explicit conversions from const_iterator to + // iterator, but also avoids defining a copy constructor. + // NOTE: the const_cast is safe because this constructor is only called by + // non-const methods and the container owns the nodes. + template , const_iterator>::value && + std::is_same::value, + int> = 0> + explicit btree_iterator(const btree_iterator &x) + : node(const_cast(x.node)), position(x.position) {} + + // Increment/decrement the iterator. + void increment() { + if (node->leaf() && ++position < node->count()) { + return; + } + increment_slow(); + } + void increment_slow(); + + void decrement() { + if (node->leaf() && --position >= 0) { + return; + } + decrement_slow(); + } + void decrement_slow(); + + public: + bool operator==(const const_iterator &x) const { + return node == x.node && position == x.position; + } + bool operator!=(const const_iterator &x) const { + return node != x.node || position != x.position; + } + bool operator==(const iterator &x) const { + return node == x.node && position == x.position; + } + bool operator!=(const iterator &x) const { + return node != x.node || position != x.position; + } + + // Accessors for the key/value the iterator is pointing at. + reference operator*() const { + return node->value(position); + } + pointer operator->() const { + return &node->value(position); + } + + btree_iterator& operator++() { + increment(); + return *this; + } + btree_iterator& operator--() { + decrement(); + return *this; + } + btree_iterator operator++(int) { + btree_iterator tmp = *this; + ++*this; + return tmp; + } + btree_iterator operator--(int) { + btree_iterator tmp = *this; + --*this; + return tmp; + } + + private: + template + friend class btree; + template + friend class btree_container; + template + friend class btree_set_container; + template + friend class btree_map_container; + template + friend class btree_multiset_container; + template + friend struct btree_iterator; + template + friend class base_checker; + + const key_type &key() const { return node->key(position); } + slot_type *slot() { return node->slot(position); } + + // The node in the tree the iterator is pointing at. + Node *node; + // The position within the node of the tree the iterator is pointing at. + // TODO(ezb): make this a field_type + int position; + }; + + template + class btree { + using node_type = btree_node; + using is_key_compare_to = typename Params::is_key_compare_to; + + // We use a static empty node for the root/leftmost/rightmost of empty btrees + // in order to avoid branching in begin()/end(). + struct alignas(node_type::Alignment()) EmptyNodeType : node_type { + using field_type = typename node_type::field_type; + node_type *parent; + field_type position = 0; + field_type start = 0; + field_type count = 0; + // max_count must be != kInternalNodeMaxCount (so that this node is regarded + // as a leaf node). max_count() is never called when the tree is empty. + field_type max_count = node_type::kInternalNodeMaxCount + 1; + +#ifdef _MSC_VER + // MSVC has constexpr code generations bugs here. + EmptyNodeType() : parent(this) {} +#else + constexpr EmptyNodeType(node_type *p) : parent(p) {} +#endif + }; + + static node_type *EmptyNode() { +#ifdef _MSC_VER + static EmptyNodeType empty_node; + // This assert fails on some other construction methods. + assert(empty_node.parent == &empty_node); + return &empty_node; +#else + static constexpr EmptyNodeType empty_node( + const_cast(&empty_node)); + return const_cast(&empty_node); +#endif + } + + enum { + kNodeValues = node_type::kNodeValues, + kMinNodeValues = kNodeValues / 2, + }; + + struct node_stats { + using size_type = typename Params::size_type; + + node_stats(size_type l, size_type i) + : leaf_nodes(l), + internal_nodes(i) { + } + + node_stats& operator+=(const node_stats &x) { + leaf_nodes += x.leaf_nodes; + internal_nodes += x.internal_nodes; + return *this; + } + + size_type leaf_nodes; + size_type internal_nodes; + }; + + public: + using key_type = typename Params::key_type; + using value_type = typename Params::value_type; + using size_type = typename Params::size_type; + using difference_type = typename Params::difference_type; + using key_compare = typename Params::key_compare; + using value_compare = typename Params::value_compare; + using allocator_type = typename Params::allocator_type; + using reference = typename Params::reference; + using const_reference = typename Params::const_reference; + using pointer = typename Params::pointer; + using const_pointer = typename Params::const_pointer; + using iterator = btree_iterator; + using const_iterator = typename iterator::const_iterator; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using node_handle_type = node_handle; + + // Internal types made public for use by btree_container types. + using params_type = Params; + using slot_type = typename Params::slot_type; + + private: + // For use in copy_or_move_values_in_order. + const value_type &maybe_move_from_iterator(const_iterator x) { return *x; } + value_type &&maybe_move_from_iterator(iterator x) { return std::move(*x); } + + // Copies or moves (depending on the template parameter) the values in + // x into this btree in their order in x. This btree must be empty before this + // method is called. This method is used in copy construction, copy + // assignment, and move assignment. + template + void copy_or_move_values_in_order(Btree *x); + + // Validates that various assumptions/requirements are true at compile time. + constexpr static bool static_assert_validation(); + + public: + btree(const key_compare &comp, const allocator_type &alloc); + + btree(const btree &x); + btree(btree &&x) noexcept + : root_(std::move(x.root_)), + rightmost_(phmap::exchange(x.rightmost_, EmptyNode())), + size_(phmap::exchange(x.size_, 0)) { + x.mutable_root() = EmptyNode(); + } + + ~btree() { + // Put static_asserts in destructor to avoid triggering them before the type + // is complete. + static_assert(static_assert_validation(), "This call must be elided."); + clear(); + } + + // Assign the contents of x to *this. + btree &operator=(const btree &x); + btree &operator=(btree &&x) noexcept; + + iterator begin() { + return iterator(leftmost(), 0); + } + const_iterator begin() const { + return const_iterator(leftmost(), 0); + } + iterator end() { return iterator(rightmost_, rightmost_->count()); } + const_iterator end() const { + return const_iterator(rightmost_, rightmost_->count()); + } + reverse_iterator rbegin() { + return reverse_iterator(end()); + } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + reverse_iterator rend() { + return reverse_iterator(begin()); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + // Finds the first element whose key is not less than key. + template + iterator lower_bound(const K &key) { + return internal_end(internal_lower_bound(key)); + } + template + const_iterator lower_bound(const K &key) const { + return internal_end(internal_lower_bound(key)); + } + + // Finds the first element whose key is greater than key. + template + iterator upper_bound(const K &key) { + return internal_end(internal_upper_bound(key)); + } + template + const_iterator upper_bound(const K &key) const { + return internal_end(internal_upper_bound(key)); + } + + // Finds the range of values which compare equal to key. The first member of + // the returned pair is equal to lower_bound(key). The second member pair of + // the pair is equal to upper_bound(key). + template + std::pair equal_range(const K &key) { + return {lower_bound(key), upper_bound(key)}; + } + template + std::pair equal_range(const K &key) const { + return {lower_bound(key), upper_bound(key)}; + } + + // Inserts a value into the btree only if it does not already exist. The + // boolean return value indicates whether insertion succeeded or failed. + // Requirement: if `key` already exists in the btree, does not consume `args`. + // Requirement: `key` is never referenced after consuming `args`. + template + std::pair insert_unique(const key_type &key, Args &&... args); + + // Inserts with hint. Checks to see if the value should be placed immediately + // before `position` in the tree. If so, then the insertion will take + // amortized constant time. If not, the insertion will take amortized + // logarithmic time as if a call to insert_unique() were made. + // Requirement: if `key` already exists in the btree, does not consume `args`. + // Requirement: `key` is never referenced after consuming `args`. + template + std::pair insert_hint_unique(iterator position, + const key_type &key, + Args &&... args); + + // Insert a range of values into the btree. + template + void insert_iterator_unique(InputIterator b, InputIterator e); + + // Inserts a value into the btree. + template + iterator insert_multi(const key_type &key, ValueType &&v); + + // Inserts a value into the btree. + template + iterator insert_multi(ValueType &&v) { + return insert_multi(params_type::key(v), std::forward(v)); + } + + // Insert with hint. Check to see if the value should be placed immediately + // before position in the tree. If it does, then the insertion will take + // amortized constant time. If not, the insertion will take amortized + // logarithmic time as if a call to insert_multi(v) were made. + template + iterator insert_hint_multi(iterator position, ValueType &&v); + + // Insert a range of values into the btree. + template + void insert_iterator_multi(InputIterator b, InputIterator e); + + // Erase the specified iterator from the btree. The iterator must be valid + // (i.e. not equal to end()). Return an iterator pointing to the node after + // the one that was erased (or end() if none exists). + // Requirement: does not read the value at `*iter`. + iterator erase(iterator iter); + + // Erases range. Returns the number of keys erased and an iterator pointing + // to the element after the last erased element. + std::pair erase(iterator begin, iterator end); + + // Erases the specified key from the btree. Returns 1 if an element was + // erased and 0 otherwise. + template + size_type erase_unique(const K &key); + + // Erases all of the entries matching the specified key from the + // btree. Returns the number of elements erased. + template + size_type erase_multi(const K &key); + + // Finds the iterator corresponding to a key or returns end() if the key is + // not present. + template + iterator find(const K &key) { + return internal_end(internal_find(key)); + } + template + const_iterator find(const K &key) const { + return internal_end(internal_find(key)); + } + + // Returns a count of the number of times the key appears in the btree. + template + size_type count_unique(const K &key) const { + const iterator beg = internal_find(key); + if (beg.node == nullptr) { + // The key doesn't exist in the tree. + return 0; + } + return 1; + } + // Returns a count of the number of times the key appears in the btree. + template + size_type count_multi(const K &key) const { + const auto range = equal_range(key); + return std::distance(range.first, range.second); + } + + // Clear the btree, deleting all of the values it contains. + void clear(); + + // Swap the contents of *this and x. + void swap(btree &x); + + const key_compare &key_comp() const noexcept { + return root_.template get<0>(); + } + template + bool compare_keys(const K &x, const LK &y) const { + return compare_internal::compare_result_as_less_than(key_comp()(x, y)); + } + + value_compare value_comp() const { return value_compare(key_comp()); } + + // Verifies the structure of the btree. + void verify() const; + + // Size routines. + size_type size() const { return size_; } + size_type max_size() const { return (std::numeric_limits::max)(); } + bool empty() const { return size_ == 0; } + + // The height of the btree. An empty tree will have height 0. + size_type height() const { + size_type h = 0; + if (!empty()) { + // Count the length of the chain from the leftmost node up to the + // root. We actually count from the root back around to the level below + // the root, but the calculation is the same because of the circularity + // of that traversal. + const node_type *n = root(); + do { + ++h; + n = n->parent(); + } while (n != root()); + } + return h; + } + + // The number of internal, leaf and total nodes used by the btree. + size_type leaf_nodes() const { + return internal_stats(root()).leaf_nodes; + } + size_type internal_nodes() const { + return internal_stats(root()).internal_nodes; + } + size_type nodes() const { + node_stats stats = internal_stats(root()); + return stats.leaf_nodes + stats.internal_nodes; + } + + // The total number of bytes used by the btree. + size_type bytes_used() const { + node_stats stats = internal_stats(root()); + if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) { + return sizeof(*this) + + node_type::LeafSize(root()->max_count()); + } else { + return sizeof(*this) + + stats.leaf_nodes * node_type::LeafSize() + + stats.internal_nodes * node_type::InternalSize(); + } + } + + // The average number of bytes used per value stored in the btree. + static double average_bytes_per_value() { + // Returns the number of bytes per value on a leaf node that is 75% + // full. Experimentally, this matches up nicely with the computed number of + // bytes per value in trees that had their values inserted in random order. + return node_type::LeafSize() / (kNodeValues * 0.75); + } + + // The fullness of the btree. Computed as the number of elements in the btree + // divided by the maximum number of elements a tree with the current number + // of nodes could hold. A value of 1 indicates perfect space + // utilization. Smaller values indicate space wastage. + // Returns 0 for empty trees. + double fullness() const { + if (empty()) return 0.0; + return static_cast(size()) / (nodes() * kNodeValues); + } + // The overhead of the btree structure in bytes per node. Computed as the + // total number of bytes used by the btree minus the number of bytes used for + // storing elements divided by the number of elements. + // Returns 0 for empty trees. + double overhead() const { + if (empty()) return 0.0; + return (bytes_used() - size() * sizeof(value_type)) / + static_cast(size()); + } + + // The allocator used by the btree. + allocator_type get_allocator() const { + return allocator(); + } + + private: + // Internal accessor routines. + node_type *root() { return root_.template get<2>(); } + const node_type *root() const { return root_.template get<2>(); } + node_type *&mutable_root() noexcept { return root_.template get<2>(); } + key_compare *mutable_key_comp() noexcept { return &root_.template get<0>(); } + + // The leftmost node is stored as the parent of the root node. + node_type *leftmost() { return root()->parent(); } + const node_type *leftmost() const { return root()->parent(); } + + // Allocator routines. + allocator_type *mutable_allocator() noexcept { + return &root_.template get<1>(); + } + const allocator_type &allocator() const noexcept { + return root_.template get<1>(); + } + + // Allocates a correctly aligned node of at least size bytes using the + // allocator. + node_type *allocate(const size_type sz) { + return reinterpret_cast( + phmap::priv::Allocate( + mutable_allocator(), (size_t)sz)); + } + + // Node creation/deletion routines. + node_type* new_internal_node(node_type *parent) { + node_type *p = allocate(node_type::InternalSize()); + return node_type::init_internal(p, parent); + } + node_type* new_leaf_node(node_type *parent) { + node_type *p = allocate(node_type::LeafSize()); + return node_type::init_leaf(p, parent, kNodeValues); + } + node_type *new_leaf_root_node(const int max_count) { + node_type *p = allocate(node_type::LeafSize(max_count)); + return node_type::init_leaf(p, p, max_count); + } + + // Deletion helper routines. + void erase_same_node(iterator begin, iterator end); + iterator erase_from_leaf_node(iterator begin, size_type to_erase); + iterator rebalance_after_delete(iterator iter); + + // Deallocates a node of a certain size in bytes using the allocator. + void deallocate(const size_type sz, node_type *node) { + phmap::priv::Deallocate( + mutable_allocator(), node, (size_t)sz); + } + + void delete_internal_node(node_type *node) { + node->destroy(mutable_allocator()); + deallocate(node_type::InternalSize(), node); + } + void delete_leaf_node(node_type *node) { + node->destroy(mutable_allocator()); + deallocate(node_type::LeafSize(node->max_count()), node); + } + + // Rebalances or splits the node iter points to. + void rebalance_or_split(iterator *iter); + + // Merges the values of left, right and the delimiting key on their parent + // onto left, removing the delimiting key and deleting right. + void merge_nodes(node_type *left, node_type *right); + + // Tries to merge node with its left or right sibling, and failing that, + // rebalance with its left or right sibling. Returns true if a merge + // occurred, at which point it is no longer valid to access node. Returns + // false if no merging took place. + bool try_merge_or_rebalance(iterator *iter); + + // Tries to shrink the height of the tree by 1. + void try_shrink(); + + iterator internal_end(iterator iter) { + return iter.node != nullptr ? iter : end(); + } + const_iterator internal_end(const_iterator iter) const { + return iter.node != nullptr ? iter : end(); + } + + // Emplaces a value into the btree immediately before iter. Requires that + // key(v) <= iter.key() and (--iter).key() <= key(v). + template + iterator internal_emplace(iterator iter, Args &&... args); + + // Returns an iterator pointing to the first value >= the value "iter" is + // pointing at. Note that "iter" might be pointing to an invalid location as + // iter.position == iter.node->count(). This routine simply moves iter up in + // the tree to a valid location. + // Requires: iter.node is non-null. + template + static IterType internal_last(IterType iter); + + // Returns an iterator pointing to the leaf position at which key would + // reside in the tree. We provide 2 versions of internal_locate. The first + // version uses a less-than comparator and is incapable of distinguishing when + // there is an exact match. The second version is for the key-compare-to + // specialization and distinguishes exact matches. The key-compare-to + // specialization allows the caller to avoid a subsequent comparison to + // determine if an exact match was made, which is important for keys with + // expensive comparison, such as strings. + template + SearchResult internal_locate( + const K &key) const; + + template + SearchResult internal_locate_impl( + const K &key, std::false_type /* IsCompareTo */) const; + + template + SearchResult internal_locate_impl( + const K &key, std::true_type /* IsCompareTo */) const; + + // Internal routine which implements lower_bound(). + template + iterator internal_lower_bound(const K &key) const; + + // Internal routine which implements upper_bound(). + template + iterator internal_upper_bound(const K &key) const; + + // Internal routine which implements find(). + template + iterator internal_find(const K &key) const; + + // Deletes a node and all of its children. + void internal_clear(node_type *node); + + // Verifies the tree structure of node. + size_type internal_verify(const node_type *node, + const key_type *lo, const key_type *hi) const; + + node_stats internal_stats(const node_type *node) const { + // The root can be a static empty node. + if (node == nullptr || (node == root() && empty())) { + return node_stats(0, 0); + } + if (node->leaf()) { + return node_stats(1, 0); + } + node_stats res(0, 1); + for (int i = 0; i <= node->count(); ++i) { + res += internal_stats(node->child(i)); + } + return res; + } + + public: + // Exposed only for tests. + static bool testonly_uses_linear_node_search() { + return node_type::testonly_uses_linear_node_search(); + } + + private: + // We use compressed tuple in order to save space because key_compare and + // allocator_type are usually empty. + phmap::priv::CompressedTuple + root_; + + // A pointer to the rightmost node. Note that the leftmost node is stored as + // the root's parent. + node_type *rightmost_; + + // Number of values. + size_type size_; + }; + + //// + // btree_node methods + template + template + inline void btree_node

::emplace_value(const size_type i, + allocator_type *alloc, + Args &&... args) { + assert(i <= count()); + // Shift old values to create space for new value and then construct it in + // place. + if (i < count()) { + value_init(count(), alloc, slot(count() - 1)); + for (size_type j = count() - 1; j > i; --j) + params_type::move(alloc, slot(j - 1), slot(j)); + value_destroy(i, alloc); + } + value_init(i, alloc, std::forward(args)...); + set_count((field_type)(count() + 1)); + + if (!leaf() && count() > i + 1) { + for (int j = count(); j > (int)(i + 1); --j) { + set_child(j, child(j - 1)); + } + clear_child(i + 1); + } + } + + template + inline void btree_node

::remove_value(const int i, allocator_type *alloc) { + if (!leaf() && count() > i + 1) { + assert(child(i + 1)->count() == 0); + for (size_type j = i + 1; j < count(); ++j) { + set_child(j, child(j + 1)); + } + clear_child(count()); + } + + remove_values_ignore_children(i, /*to_erase=*/1, alloc); + } + + template + inline void btree_node

::remove_values_ignore_children( + int i, size_type to_erase, allocator_type *alloc) { + params_type::move(alloc, slot(i + to_erase), slot(count()), slot(i)); + value_destroy_n(count() - to_erase, to_erase, alloc); + set_count((field_type)(count() - to_erase)); + } + + template + void btree_node

::rebalance_right_to_left(const int to_move, + btree_node *right, + allocator_type *alloc) { + assert(parent() == right->parent()); + assert(position() + 1 == right->position()); + assert(right->count() >= count()); + assert(to_move >= 1); + assert(to_move <= right->count()); + + // 1) Move the delimiting value in the parent to the left node. + value_init(count(), alloc, parent()->slot(position())); + + // 2) Move the (to_move - 1) values from the right node to the left node. + right->uninitialized_move_n(to_move - 1, 0, count() + 1, this, alloc); + + // 3) Move the new delimiting value to the parent from the right node. + params_type::move(alloc, right->slot(to_move - 1), + parent()->slot(position())); + + // 4) Shift the values in the right node to their correct position. + params_type::move(alloc, right->slot(to_move), right->slot(right->count()), + right->slot(0)); + + // 5) Destroy the now-empty to_move entries in the right node. + right->value_destroy_n(right->count() - to_move, to_move, alloc); + + if (!leaf()) { + // Move the child pointers from the right to the left node. + for (int i = 0; i < to_move; ++i) { + init_child(count() + i + 1, right->child(i)); + } + for (int i = 0; i <= right->count() - to_move; ++i) { + assert(i + to_move <= right->max_count()); + right->init_child(i, right->child(i + to_move)); + right->clear_child(i + to_move); + } + } + + // Fixup the counts on the left and right nodes. + set_count((field_type)(count() + to_move)); + right->set_count((field_type)(right->count() - to_move)); + } + + template + void btree_node

::rebalance_left_to_right(const int to_move, + btree_node *right, + allocator_type *alloc) { + assert(parent() == right->parent()); + assert(position() + 1 == right->position()); + assert(count() >= right->count()); + assert(to_move >= 1); + assert(to_move <= count()); + + // Values in the right node are shifted to the right to make room for the + // new to_move values. Then, the delimiting value in the parent and the + // other (to_move - 1) values in the left node are moved into the right node. + // Lastly, a new delimiting value is moved from the left node into the + // parent, and the remaining empty left node entries are destroyed. + + if (right->count() >= to_move) { + // The original location of the right->count() values are sufficient to hold + // the new to_move entries from the parent and left node. + + // 1) Shift existing values in the right node to their correct positions. + right->uninitialized_move_n(to_move, right->count() - to_move, + right->count(), right, alloc); + if (right->count() > to_move) { + for (slot_type *src = right->slot(right->count() - to_move - 1), + *dest = right->slot(right->count() - 1), + *end = right->slot(0); + src >= end; --src, --dest) { + params_type::move(alloc, src, dest); + } + } + + // 2) Move the delimiting value in the parent to the right node. + params_type::move(alloc, parent()->slot(position()), + right->slot(to_move - 1)); + + // 3) Move the (to_move - 1) values from the left node to the right node. + params_type::move(alloc, slot(count() - (to_move - 1)), slot(count()), + right->slot(0)); + } else { + // The right node does not have enough initialized space to hold the new + // to_move entries, so part of them will move to uninitialized space. + + // 1) Shift existing values in the right node to their correct positions. + right->uninitialized_move_n(right->count(), 0, to_move, right, alloc); + + // 2) Move the delimiting value in the parent to the right node. + right->value_init(to_move - 1, alloc, parent()->slot(position())); + + // 3) Move the (to_move - 1) values from the left node to the right node. + const size_type uninitialized_remaining = to_move - right->count() - 1; + uninitialized_move_n(uninitialized_remaining, + count() - uninitialized_remaining, right->count(), + right, alloc); + params_type::move(alloc, slot(count() - (to_move - 1)), + slot(count() - uninitialized_remaining), right->slot(0)); + } + + // 4) Move the new delimiting value to the parent from the left node. + params_type::move(alloc, slot(count() - to_move), parent()->slot(position())); + + // 5) Destroy the now-empty to_move entries in the left node. + value_destroy_n(count() - to_move, to_move, alloc); + + if (!leaf()) { + // Move the child pointers from the left to the right node. + for (int i = right->count(); i >= 0; --i) { + right->init_child(i + to_move, right->child(i)); + right->clear_child(i); + } + for (int i = 1; i <= to_move; ++i) { + right->init_child(i - 1, child(count() - to_move + i)); + clear_child(count() - to_move + i); + } + } + + // Fixup the counts on the left and right nodes. + set_count((field_type)(count() - to_move)); + right->set_count((field_type)(right->count() + to_move)); + } + + template + void btree_node

::split(const int insert_position, btree_node *dest, + allocator_type *alloc) { + assert(dest->count() == 0); + assert(max_count() == kNodeValues); + + // We bias the split based on the position being inserted. If we're + // inserting at the beginning of the left node then bias the split to put + // more values on the right node. If we're inserting at the end of the + // right node then bias the split to put more values on the left node. + if (insert_position == 0) { + dest->set_count((field_type)(count() - 1)); + } else if (insert_position == kNodeValues) { + dest->set_count(0); + } else { + dest->set_count((field_type)(count() / 2)); + } + set_count((field_type)(count() - dest->count())); + assert(count() >= 1); + + // Move values from the left sibling to the right sibling. + uninitialized_move_n(dest->count(), count(), 0, dest, alloc); + + // Destroy the now-empty entries in the left node. + value_destroy_n(count(), dest->count(), alloc); + + // The split key is the largest value in the left sibling. + set_count((field_type)(count() - 1)); + parent()->emplace_value(position(), alloc, slot(count())); + value_destroy(count(), alloc); + parent()->init_child(position() + 1, dest); + + if (!leaf()) { + for (int i = 0; i <= dest->count(); ++i) { + assert(child(count() + i + 1) != nullptr); + dest->init_child(i, child(count() + i + 1)); + clear_child(count() + i + 1); + } + } + } + + template + void btree_node

::merge(btree_node *src, allocator_type *alloc) { + assert(parent() == src->parent()); + assert(position() + 1 == src->position()); + + // Move the delimiting value to the left node. + value_init(count(), alloc, parent()->slot(position())); + + // Move the values from the right to the left node. + src->uninitialized_move_n(src->count(), 0, count() + 1, this, alloc); + + // Destroy the now-empty entries in the right node. + src->value_destroy_n(0, src->count(), alloc); + + if (!leaf()) { + // Move the child pointers from the right to the left node. + for (int i = 0; i <= src->count(); ++i) { + init_child(count() + i + 1, src->child(i)); + src->clear_child(i); + } + } + + // Fixup the counts on the src and dest nodes. + set_count((field_type)(1 + count() + src->count())); + src->set_count(0); + + // Remove the value on the parent node. + parent()->remove_value(position(), alloc); + } + + template + void btree_node

::swap(btree_node *x, allocator_type *alloc) { + using std::swap; + assert(leaf() == x->leaf()); + + // Determine which is the smaller/larger node. + btree_node *smaller = this, *larger = x; + if (smaller->count() > larger->count()) { + swap(smaller, larger); + } + + // Swap the values. + for (slot_type *a = smaller->slot(0), *b = larger->slot(0), + *end = a + smaller->count(); + a != end; ++a, ++b) { + params_type::swap(alloc, a, b); + } + + // Move values that can't be swapped. + const size_type to_move = larger->count() - smaller->count(); + larger->uninitialized_move_n(to_move, smaller->count(), smaller->count(), + smaller, alloc); + larger->value_destroy_n(smaller->count(), to_move, alloc); + + if (!leaf()) { + // Swap the child pointers. + std::swap_ranges(&smaller->mutable_child(0), + &smaller->mutable_child(smaller->count() + 1), + &larger->mutable_child(0)); + // Update swapped children's parent pointers. + int i = 0; + for (; i <= smaller->count(); ++i) { + smaller->child(i)->set_parent(smaller); + larger->child(i)->set_parent(larger); + } + // Move the child pointers that couldn't be swapped. + for (; i <= larger->count(); ++i) { + smaller->init_child(i, larger->child(i)); + larger->clear_child(i); + } + } + + // Swap the counts. + swap(mutable_count(), x->mutable_count()); + } + + //// + // btree_iterator methods + template + void btree_iterator::increment_slow() { + if (node->leaf()) { + assert(position >= node->count()); + btree_iterator save(*this); + while (position == node->count() && !node->is_root()) { + assert(node->parent()->child(node->position()) == node); + position = node->position(); + node = node->parent(); + } + if (position == node->count()) { + *this = save; + } + } else { + assert(position < node->count()); + node = node->child(position + 1); + while (!node->leaf()) { + node = node->child(0); + } + position = 0; + } + } + + template + void btree_iterator::decrement_slow() { + if (node->leaf()) { + assert(position <= -1); + btree_iterator save(*this); + while (position < 0 && !node->is_root()) { + assert(node->parent()->child(node->position()) == node); + position = node->position() - 1; + node = node->parent(); + } + if (position < 0) { + *this = save; + } + } else { + assert(position >= 0); + node = node->child(position); + while (!node->leaf()) { + node = node->child(node->count()); + } + position = node->count() - 1; + } + } + + //// + // btree methods + template + template + void btree

::copy_or_move_values_in_order(Btree *x) { + static_assert(std::is_same::value || + std::is_same::value, + "Btree type must be same or const."); + assert(empty()); + + // We can avoid key comparisons because we know the order of the + // values is the same order we'll store them in. + auto iter = x->begin(); + if (iter == x->end()) return; + insert_multi(maybe_move_from_iterator(iter)); + ++iter; + for (; iter != x->end(); ++iter) { + // If the btree is not empty, we can just insert the new value at the end + // of the tree. + internal_emplace(end(), maybe_move_from_iterator(iter)); + } + } + + template + constexpr bool btree

::static_assert_validation() { + static_assert(std::is_nothrow_copy_constructible::value, + "Key comparison must be nothrow copy constructible"); + static_assert(std::is_nothrow_copy_constructible::value, + "Allocator must be nothrow copy constructible"); + static_assert(type_traits_internal::is_trivially_copyable::value, + "iterator not trivially copyable."); + + // Note: We assert that kTargetValues, which is computed from + // Params::kTargetNodeSize, must fit the node_type::field_type. + static_assert( + kNodeValues < (1 << (8 * sizeof(typename node_type::field_type))), + "target node size too large"); + + // Verify that key_compare returns an phmap::{weak,strong}_ordering or bool. + using compare_result_type = + phmap::invoke_result_t; + static_assert( + std::is_same::value || + std::is_convertible::value, + "key comparison function must return phmap::{weak,strong}_ordering or " + "bool."); + + // Test the assumption made in setting kNodeSlotSpace. + static_assert(node_type::MinimumOverhead() >= sizeof(void *) + 4, + "node space assumption incorrect"); + + return true; + } + + template + btree

::btree(const key_compare &comp, const allocator_type &alloc) + : root_(comp, alloc, EmptyNode()), rightmost_(EmptyNode()), size_(0) {} + + template + btree

::btree(const btree &x) : btree(x.key_comp(), x.allocator()) { + copy_or_move_values_in_order(&x); + } + + template + template + auto btree

::insert_unique(const key_type &key, Args &&... args) + -> std::pair { + if (empty()) { + mutable_root() = rightmost_ = new_leaf_root_node(1); + } + + auto res = internal_locate(key); + iterator &iter = res.value; + + if (res.HasMatch()) { + if (res.IsEq()) { + // The key already exists in the tree, do nothing. + return {iter, false}; + } + } else { + iterator last = internal_last(iter); + if (last.node && !compare_keys(key, last.key())) { + // The key already exists in the tree, do nothing. + return {last, false}; + } + } + return {internal_emplace(iter, std::forward(args)...), true}; + } + + template + template + inline auto btree

::insert_hint_unique(iterator position, const key_type &key, + Args &&... args) + -> std::pair { + if (!empty()) { + if (position == end() || compare_keys(key, position.key())) { + iterator prev = position; + if (position == begin() || compare_keys((--prev).key(), key)) { + // prev.key() < key < position.key() + return {internal_emplace(position, std::forward(args)...), true}; + } + } else if (compare_keys(position.key(), key)) { + ++position; + if (position == end() || compare_keys(key, position.key())) { + // {original `position`}.key() < key < {current `position`}.key() + return {internal_emplace(position, std::forward(args)...), true}; + } + } else { + // position.key() == key + return {position, false}; + } + } + return insert_unique(key, std::forward(args)...); + } + + template + template + void btree

::insert_iterator_unique(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert_hint_unique(end(), params_type::key(*b), *b); + } + } + + template + template + auto btree

::insert_multi(const key_type &key, ValueType &&v) -> iterator { + if (empty()) { + mutable_root() = rightmost_ = new_leaf_root_node(1); + } + + iterator iter = internal_upper_bound(key); + if (iter.node == nullptr) { + iter = end(); + } + return internal_emplace(iter, std::forward(v)); + } + + template + template + auto btree

::insert_hint_multi(iterator position, ValueType &&v) -> iterator { + if (!empty()) { + const key_type &key = params_type::key(v); + if (position == end() || !compare_keys(position.key(), key)) { + iterator prev = position; + if (position == begin() || !compare_keys(key, (--prev).key())) { + // prev.key() <= key <= position.key() + return internal_emplace(position, std::forward(v)); + } + } else { + iterator next = position; + ++next; + if (next == end() || !compare_keys(next.key(), key)) { + // position.key() < key <= next.key() + return internal_emplace(next, std::forward(v)); + } + } + } + return insert_multi(std::forward(v)); + } + + template + template + void btree

::insert_iterator_multi(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert_hint_multi(end(), *b); + } + } + + template + auto btree

::operator=(const btree &x) -> btree & { + if (this != &x) { + clear(); + + *mutable_key_comp() = x.key_comp(); + if (phmap::allocator_traits< + allocator_type>::propagate_on_container_copy_assignment::value) { + *mutable_allocator() = x.allocator(); + } + + copy_or_move_values_in_order(&x); + } + return *this; + } + + template + auto btree

::operator=(btree &&x) noexcept -> btree & { + if (this != &x) { + clear(); + + using std::swap; + if (phmap::allocator_traits< + allocator_type>::propagate_on_container_copy_assignment::value) { + // Note: `root_` also contains the allocator and the key comparator. + swap(root_, x.root_); + swap(rightmost_, x.rightmost_); + swap(size_, x.size_); + } else { + if (allocator() == x.allocator()) { + swap(mutable_root(), x.mutable_root()); + swap(*mutable_key_comp(), *x.mutable_key_comp()); + swap(rightmost_, x.rightmost_); + swap(size_, x.size_); + } else { + // We aren't allowed to propagate the allocator and the allocator is + // different so we can't take over its memory. We must move each element + // individually. We need both `x` and `this` to have `x`s key comparator + // while moving the values so we can't swap the key comparators. + *mutable_key_comp() = x.key_comp(); + copy_or_move_values_in_order(&x); + } + } + } + return *this; + } + + template + auto btree

::erase(iterator iter) -> iterator { + bool internal_delete = false; + if (!iter.node->leaf()) { + // Deletion of a value on an internal node. First, move the largest value + // from our left child here, then delete that position (in remove_value() + // below). We can get to the largest value from our left child by + // decrementing iter. + iterator internal_iter(iter); + --iter; + assert(iter.node->leaf()); + params_type::move(mutable_allocator(), iter.node->slot(iter.position), + internal_iter.node->slot(internal_iter.position)); + internal_delete = true; + } + + // Delete the key from the leaf. + iter.node->remove_value(iter.position, mutable_allocator()); + --size_; + + // We want to return the next value after the one we just erased. If we + // erased from an internal node (internal_delete == true), then the next + // value is ++(++iter). If we erased from a leaf node (internal_delete == + // false) then the next value is ++iter. Note that ++iter may point to an + // internal node and the value in the internal node may move to a leaf node + // (iter.node) when rebalancing is performed at the leaf level. + + iterator res = rebalance_after_delete(iter); + + // If we erased from an internal node, advance the iterator. + if (internal_delete) { + ++res; + } + return res; + } + + template + auto btree

::rebalance_after_delete(iterator iter) -> iterator { + // Merge/rebalance as we walk back up the tree. + iterator res(iter); + bool first_iteration = true; + for (;;) { + if (iter.node == root()) { + try_shrink(); + if (empty()) { + return end(); + } + break; + } + if (iter.node->count() >= kMinNodeValues) { + break; + } + bool merged = try_merge_or_rebalance(&iter); + // On the first iteration, we should update `res` with `iter` because `res` + // may have been invalidated. + if (first_iteration) { + res = iter; + first_iteration = false; + } + if (!merged) { + break; + } + iter.position = iter.node->position(); + iter.node = iter.node->parent(); + } + + // Adjust our return value. If we're pointing at the end of a node, advance + // the iterator. + if (res.position == res.node->count()) { + res.position = res.node->count() - 1; + ++res; + } + + return res; + } + + template + auto btree

::erase(iterator _begin, iterator _end) + -> std::pair { + difference_type count = std::distance(_begin, _end); + assert(count >= 0); + + if (count == 0) { + return {0, _begin}; + } + + if (count == (difference_type)size_) { + clear(); + return {count, this->end()}; + } + + if (_begin.node == _end.node) { + erase_same_node(_begin, _end); + size_ -= count; + return {count, rebalance_after_delete(_begin)}; + } + + const size_type target_size = size_ - count; + while (size_ > target_size) { + if (_begin.node->leaf()) { + const size_type remaining_to_erase = size_ - target_size; + const size_type remaining_in_node = _begin.node->count() - _begin.position; + _begin = erase_from_leaf_node( + _begin, (std::min)(remaining_to_erase, remaining_in_node)); + } else { + _begin = erase(_begin); + } + } + return {count, _begin}; + } + + template + void btree

::erase_same_node(iterator _begin, iterator _end) { + assert(_begin.node == _end.node); + assert(_end.position > _begin.position); + + node_type *node = _begin.node; + size_type to_erase = _end.position - _begin.position; + if (!node->leaf()) { + // Delete all children between _begin and _end. + for (size_type i = 0; i < to_erase; ++i) { + internal_clear(node->child(_begin.position + i + 1)); + } + // Rotate children after _end into new positions. + for (size_type i = _begin.position + to_erase + 1; i <= node->count(); ++i) { + node->set_child(i - to_erase, node->child(i)); + node->clear_child(i); + } + } + node->remove_values_ignore_children(_begin.position, to_erase, + mutable_allocator()); + + // Do not need to update rightmost_, because + // * either _end == this->end(), and therefore node == rightmost_, and still + // exists + // * or _end != this->end(), and therefore rightmost_ hasn't been erased, since + // it wasn't covered in [_begin, _end) + } + + template + auto btree

::erase_from_leaf_node(iterator _begin, size_type to_erase) + -> iterator { + node_type *node = _begin.node; + assert(node->leaf()); + assert(node->count() > _begin.position); + assert(_begin.position + to_erase <= node->count()); + + node->remove_values_ignore_children(_begin.position, to_erase, + mutable_allocator()); + + size_ -= to_erase; + + return rebalance_after_delete(_begin); + } + + template + template + auto btree

::erase_unique(const K &key) -> size_type { + const iterator iter = internal_find(key); + if (iter.node == nullptr) { + // The key doesn't exist in the tree, return nothing done. + return 0; + } + erase(iter); + return 1; + } + + template + template + auto btree

::erase_multi(const K &key) -> size_type { + const iterator _begin = internal_lower_bound(key); + if (_begin.node == nullptr) { + // The key doesn't exist in the tree, return nothing done. + return 0; + } + // Delete all of the keys between _begin and upper_bound(key). + const iterator _end = internal_end(internal_upper_bound(key)); + return erase(_begin, _end).first; + } + + template + void btree

::clear() { + if (!empty()) { + internal_clear(root()); + } + mutable_root() = EmptyNode(); + rightmost_ = EmptyNode(); + size_ = 0; + } + + template + void btree

::swap(btree &x) { + using std::swap; + if (phmap::allocator_traits< + allocator_type>::propagate_on_container_swap::value) { + // Note: `root_` also contains the allocator and the key comparator. + swap(root_, x.root_); + } else { + // It's undefined behavior if the allocators are unequal here. + assert(allocator() == x.allocator()); + swap(mutable_root(), x.mutable_root()); + swap(*mutable_key_comp(), *x.mutable_key_comp()); + } + swap(rightmost_, x.rightmost_); + swap(size_, x.size_); + } + + template + void btree

::verify() const { + assert(root() != nullptr); + assert(leftmost() != nullptr); + assert(rightmost_ != nullptr); + assert(empty() || size() == internal_verify(root(), nullptr, nullptr)); + assert(leftmost() == (++const_iterator(root(), -1)).node); + assert(rightmost_ == (--const_iterator(root(), root()->count())).node); + assert(leftmost()->leaf()); + assert(rightmost_->leaf()); + } + + template + void btree

::rebalance_or_split(iterator *iter) { + node_type *&node = iter->node; + int &insert_position = iter->position; + assert(node->count() == node->max_count()); + assert(kNodeValues == node->max_count()); + + // First try to make room on the node by rebalancing. + node_type *parent = node->parent(); + if (node != root()) { + if (node->position() > 0) { + // Try rebalancing with our left sibling. + node_type *left = parent->child(node->position() - 1); + assert(left->max_count() == kNodeValues); + if (left->count() < kNodeValues) { + // We bias rebalancing based on the position being inserted. If we're + // inserting at the end of the right node then we bias rebalancing to + // fill up the left node. + int to_move = (kNodeValues - left->count()) / + (1 + (insert_position < kNodeValues)); + to_move = (std::max)(1, to_move); + + if (((insert_position - to_move) >= 0) || + ((left->count() + to_move) < kNodeValues)) { + left->rebalance_right_to_left(to_move, node, mutable_allocator()); + + assert(node->max_count() - node->count() == to_move); + insert_position = insert_position - to_move; + if (insert_position < 0) { + insert_position = insert_position + left->count() + 1; + node = left; + } + + assert(node->count() < node->max_count()); + return; + } + } + } + + if (node->position() < parent->count()) { + // Try rebalancing with our right sibling. + node_type *right = parent->child(node->position() + 1); + assert(right->max_count() == kNodeValues); + if (right->count() < kNodeValues) { + // We bias rebalancing based on the position being inserted. If we're + // inserting at the _beginning of the left node then we bias rebalancing + // to fill up the right node. + int to_move = + (kNodeValues - right->count()) / (1 + (insert_position > 0)); + to_move = (std::max)(1, to_move); + + if ((insert_position <= (node->count() - to_move)) || + ((right->count() + to_move) < kNodeValues)) { + node->rebalance_left_to_right(to_move, right, mutable_allocator()); + + if (insert_position > node->count()) { + insert_position = insert_position - node->count() - 1; + node = right; + } + + assert(node->count() < node->max_count()); + return; + } + } + } + + // Rebalancing failed, make sure there is room on the parent node for a new + // value. + assert(parent->max_count() == kNodeValues); + if (parent->count() == kNodeValues) { + iterator parent_iter(node->parent(), node->position()); + rebalance_or_split(&parent_iter); + } + } else { + // Rebalancing not possible because this is the root node. + // Create a new root node and set the current root node as the child of the + // new root. + parent = new_internal_node(parent); + parent->init_child(0, root()); + mutable_root() = parent; + // If the former root was a leaf node, then it's now the rightmost node. + assert(!parent->child(0)->leaf() || parent->child(0) == rightmost_); + } + + // Split the node. + node_type *split_node; + if (node->leaf()) { + split_node = new_leaf_node(parent); + node->split(insert_position, split_node, mutable_allocator()); + if (rightmost_ == node) rightmost_ = split_node; + } else { + split_node = new_internal_node(parent); + node->split(insert_position, split_node, mutable_allocator()); + } + + if (insert_position > node->count()) { + insert_position = insert_position - node->count() - 1; + node = split_node; + } + } + + template + void btree

::merge_nodes(node_type *left, node_type *right) { + left->merge(right, mutable_allocator()); + if (right->leaf()) { + if (rightmost_ == right) rightmost_ = left; + delete_leaf_node(right); + } else { + delete_internal_node(right); + } + } + + template + bool btree

::try_merge_or_rebalance(iterator *iter) { + node_type *parent = iter->node->parent(); + if (iter->node->position() > 0) { + // Try merging with our left sibling. + node_type *left = parent->child(iter->node->position() - 1); + assert(left->max_count() == kNodeValues); + if ((1 + left->count() + iter->node->count()) <= kNodeValues) { + iter->position += 1 + left->count(); + merge_nodes(left, iter->node); + iter->node = left; + return true; + } + } + if (iter->node->position() < parent->count()) { + // Try merging with our right sibling. + node_type *right = parent->child(iter->node->position() + 1); + assert(right->max_count() == kNodeValues); + if ((1 + iter->node->count() + right->count()) <= kNodeValues) { + merge_nodes(iter->node, right); + return true; + } + // Try rebalancing with our right sibling. We don't perform rebalancing if + // we deleted the first element from iter->node and the node is not + // empty. This is a small optimization for the common pattern of deleting + // from the front of the tree. + if ((right->count() > kMinNodeValues) && + ((iter->node->count() == 0) || + (iter->position > 0))) { + int to_move = (right->count() - iter->node->count()) / 2; + to_move = (std::min)(to_move, right->count() - 1); + iter->node->rebalance_right_to_left(to_move, right, mutable_allocator()); + return false; + } + } + if (iter->node->position() > 0) { + // Try rebalancing with our left sibling. We don't perform rebalancing if + // we deleted the last element from iter->node and the node is not + // empty. This is a small optimization for the common pattern of deleting + // from the back of the tree. + node_type *left = parent->child(iter->node->position() - 1); + if ((left->count() > kMinNodeValues) && + ((iter->node->count() == 0) || + (iter->position < iter->node->count()))) { + int to_move = (left->count() - iter->node->count()) / 2; + to_move = (std::min)(to_move, left->count() - 1); + left->rebalance_left_to_right(to_move, iter->node, mutable_allocator()); + iter->position += to_move; + return false; + } + } + return false; + } + + template + void btree

::try_shrink() { + if (root()->count() > 0) { + return; + } + // Deleted the last item on the root node, shrink the height of the tree. + if (root()->leaf()) { + assert(size() == 0); + delete_leaf_node(root()); + mutable_root() = EmptyNode(); + rightmost_ = EmptyNode(); + } else { + node_type *child = root()->child(0); + child->make_root(); + delete_internal_node(root()); + mutable_root() = child; + } + } + + template + template + inline IterType btree

::internal_last(IterType iter) { + assert(iter.node != nullptr); + while (iter.position == iter.node->count()) { + iter.position = iter.node->position(); + iter.node = iter.node->parent(); + if (iter.node->leaf()) { + iter.node = nullptr; + break; + } + } + return iter; + } + + template + template + inline auto btree

::internal_emplace(iterator iter, Args &&... args) + -> iterator { + if (!iter.node->leaf()) { + // We can't insert on an internal node. Instead, we'll insert after the + // previous value which is guaranteed to be on a leaf node. + --iter; + ++iter.position; + } + const int max_count = iter.node->max_count(); + if (iter.node->count() == max_count) { + // Make room in the leaf for the new item. + if (max_count < kNodeValues) { + // Insertion into the root where the root is smaller than the full node + // size. Simply grow the size of the root node. + assert(iter.node == root()); + iter.node = + new_leaf_root_node((std::min)(kNodeValues, 2 * max_count)); + iter.node->swap(root(), mutable_allocator()); + delete_leaf_node(root()); + mutable_root() = iter.node; + rightmost_ = iter.node; + } else { + rebalance_or_split(&iter); + } + } + iter.node->emplace_value(iter.position, mutable_allocator(), + std::forward(args)...); + ++size_; + return iter; + } + + template + template + inline auto btree

::internal_locate(const K &key) const + -> SearchResult { + return internal_locate_impl(key, is_key_compare_to()); + } + + template + template + inline auto btree

::internal_locate_impl( + const K &key, std::false_type /* IsCompareTo */) const + -> SearchResult { + iterator iter(const_cast(root()), 0); + for (;;) { + iter.position = iter.node->lower_bound(key, key_comp()).value; + // NOTE: we don't need to walk all the way down the tree if the keys are + // equal, but determining equality would require doing an extra comparison + // on each node on the way down, and we will need to go all the way to the + // leaf node in the expected case. + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + return {iter}; + } + + template + template + inline auto btree

::internal_locate_impl( + const K &key, std::true_type /* IsCompareTo */) const + -> SearchResult { + iterator iter(const_cast(root()), 0); + for (;;) { + SearchResult res = iter.node->lower_bound(key, key_comp()); + iter.position = res.value; + if (res.match == MatchKind::kEq) { + return {iter, MatchKind::kEq}; + } + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + return {iter, MatchKind::kNe}; + } + + template + template + auto btree

::internal_lower_bound(const K &key) const -> iterator { + iterator iter(const_cast(root()), 0); + for (;;) { + iter.position = iter.node->lower_bound(key, key_comp()).value; + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + return internal_last(iter); + } + + template + template + auto btree

::internal_upper_bound(const K &key) const -> iterator { + iterator iter(const_cast(root()), 0); + for (;;) { + iter.position = iter.node->upper_bound(key, key_comp()); + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + return internal_last(iter); + } + + template + template + auto btree

::internal_find(const K &key) const -> iterator { + auto res = internal_locate(key); + if (res.HasMatch()) { + if (res.IsEq()) { + return res.value; + } + } else { + const iterator iter = internal_last(res.value); + if (iter.node != nullptr && !compare_keys(key, iter.key())) { + return iter; + } + } + return {nullptr, 0}; + } + + template + void btree

::internal_clear(node_type *node) { + if (!node->leaf()) { + for (int i = 0; i <= node->count(); ++i) { + internal_clear(node->child(i)); + } + delete_internal_node(node); + } else { + delete_leaf_node(node); + } + } + + template + typename btree

::size_type btree

::internal_verify( + const node_type *node, const key_type *lo, const key_type *hi) const { + assert(node->count() > 0); + assert(node->count() <= node->max_count()); + if (lo) { + assert(!compare_keys(node->key(0), *lo)); + } + if (hi) { + assert(!compare_keys(*hi, node->key(node->count() - 1))); + } + for (int i = 1; i < node->count(); ++i) { + assert(!compare_keys(node->key(i), node->key(i - 1))); + } + size_type count = node->count(); + if (!node->leaf()) { + for (int i = 0; i <= node->count(); ++i) { + assert(node->child(i) != nullptr); + assert(node->child(i)->parent() == node); + assert(node->child(i)->position() == i); + count += internal_verify( + node->child(i), + (i == 0) ? lo : &node->key(i - 1), + (i == node->count()) ? hi : &node->key(i)); + } + } + return count; + } + + // A common base class for btree_set, btree_map, btree_multiset, and btree_multimap. + // --------------------------------------------------------------------------------- + template + class btree_container { + using params_type = typename Tree::params_type; + + protected: + // Alias used for heterogeneous lookup functions. + // `key_arg` evaluates to `K` when the functors are transparent and to + // `key_type` otherwise. It permits template argument deduction on `K` for the + // transparent case. + template + using key_arg = + typename KeyArg::value>:: + template type; + + public: + using key_type = typename Tree::key_type; + using value_type = typename Tree::value_type; + using size_type = typename Tree::size_type; + using difference_type = typename Tree::difference_type; + using key_compare = typename Tree::key_compare; + using value_compare = typename Tree::value_compare; + using allocator_type = typename Tree::allocator_type; + using reference = typename Tree::reference; + using const_reference = typename Tree::const_reference; + using pointer = typename Tree::pointer; + using const_pointer = typename Tree::const_pointer; + using iterator = typename Tree::iterator; + using const_iterator = typename Tree::const_iterator; + using reverse_iterator = typename Tree::reverse_iterator; + using const_reverse_iterator = typename Tree::const_reverse_iterator; + using node_type = typename Tree::node_handle_type; + + // Constructors/assignments. + btree_container() : tree_(key_compare(), allocator_type()) {} + explicit btree_container(const key_compare &comp, + const allocator_type &alloc = allocator_type()) + : tree_(comp, alloc) {} + btree_container(const btree_container &x) = default; + btree_container(btree_container &&x) noexcept = default; + btree_container &operator=(const btree_container &x) = default; + btree_container &operator=(btree_container &&x) noexcept( + std::is_nothrow_move_assignable::value) = default; + + // Iterator routines. + iterator begin() { return tree_.begin(); } + const_iterator begin() const { return tree_.begin(); } + const_iterator cbegin() const { return tree_.begin(); } + iterator end() { return tree_.end(); } + const_iterator end() const { return tree_.end(); } + const_iterator cend() const { return tree_.end(); } + reverse_iterator rbegin() { return tree_.rbegin(); } + const_reverse_iterator rbegin() const { return tree_.rbegin(); } + const_reverse_iterator crbegin() const { return tree_.rbegin(); } + reverse_iterator rend() { return tree_.rend(); } + const_reverse_iterator rend() const { return tree_.rend(); } + const_reverse_iterator crend() const { return tree_.rend(); } + + // Lookup routines. + // ---------------- + template + size_type count(const key_arg &key) const { + auto equal_range = this->equal_range(key); + return std::distance(equal_range.first, equal_range.second); + } + template + iterator find(const key_arg &key) { + return tree_.find(key); + } + template + const_iterator find(const key_arg &key) const { return tree_.find(key); } + + template + bool contains(const key_arg &key) const { return find(key) != end(); } + + template + iterator lower_bound(const key_arg &key) { return tree_.lower_bound(key); } + + template + const_iterator lower_bound(const key_arg &key) const { return tree_.lower_bound(key); } + + template + iterator upper_bound(const key_arg &key) { return tree_.upper_bound(key); } + + template + const_iterator upper_bound(const key_arg &key) const { return tree_.upper_bound(key); } + + template + std::pair equal_range(const key_arg &key) { return tree_.equal_range(key); } + + template + std::pair equal_range( + const key_arg &key) const { + return tree_.equal_range(key); + } + + iterator erase(const_iterator iter) { return tree_.erase(iterator(iter)); } + iterator erase(iterator iter) { return tree_.erase(iter); } + iterator erase(const_iterator first, const_iterator last) { + return tree_.erase(iterator(first), iterator(last)).second; + } + template + size_type erase(const key_arg &key) { + auto equal_range = this->equal_range(key); + return tree_.erase_range(equal_range.first, equal_range.second).first; + } + node_type extract(iterator position) { + // Use Move instead of Transfer, because the rebalancing code expects to + // have a valid object to scribble metadata bits on top of. + auto node = CommonAccess::Move(get_allocator(), position.slot()); + erase(position); + return node; + } + + node_type extract(const_iterator position) { + return extract(iterator(position)); + } + + public: + void clear() { tree_.clear(); } + void swap(btree_container &x) { tree_.swap(x.tree_); } + void verify() const { tree_.verify(); } + + size_type size() const { return tree_.size(); } + size_type max_size() const { return tree_.max_size(); } + bool empty() const { return tree_.empty(); } + + friend bool operator==(const btree_container &x, const btree_container &y) { + if (x.size() != y.size()) return false; + return std::equal(x.begin(), x.end(), y.begin()); + } + + friend bool operator!=(const btree_container &x, const btree_container &y) { return !(x == y); } + + friend bool operator<(const btree_container &x, const btree_container &y) { + return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end()); + } + + friend bool operator>(const btree_container &x, const btree_container &y) { return y < x; } + + friend bool operator<=(const btree_container &x, const btree_container &y) { return !(y < x); } + + friend bool operator>=(const btree_container &x, const btree_container &y) { return !(x < y); } + + // The allocator used by the btree. + allocator_type get_allocator() const { return tree_.get_allocator(); } + + // The key comparator used by the btree. + key_compare key_comp() const { return tree_.key_comp(); } + value_compare value_comp() const { return tree_.value_comp(); } + + // Support absl::Hash. + template + friend State AbslHashValue(State h, const btree_container &b) { + for (const auto &v : b) { + h = State::combine(std::move(h), v); + } + return State::combine(std::move(h), b.size()); + } + + protected: + Tree tree_; + }; + + // A common base class for btree_set and btree_map. + // ----------------------------------------------- + template + class btree_set_container : public btree_container { + using super_type = btree_container; + using params_type = typename Tree::params_type; + using init_type = typename params_type::init_type; + using is_key_compare_to = typename params_type::is_key_compare_to; + friend class BtreeNodePeer; + + protected: + template + using key_arg = typename super_type::template key_arg; + + public: + using key_type = typename Tree::key_type; + using value_type = typename Tree::value_type; + using size_type = typename Tree::size_type; + using key_compare = typename Tree::key_compare; + using allocator_type = typename Tree::allocator_type; + using iterator = typename Tree::iterator; + using const_iterator = typename Tree::const_iterator; + using node_type = typename super_type::node_type; + using insert_return_type = InsertReturnType; + using super_type::super_type; + btree_set_container() {} + + template + btree_set_container(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + insert(b, e); + } + + btree_set_container(std::initializer_list init, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : btree_set_container(init.begin(), init.end(), comp, alloc) {} + + btree_set_container(std::initializer_list init, + const allocator_type &alloc) + : btree_set_container(init.begin(), init.end(), alloc) {} + + // Lookup routines. + template + size_type count(const key_arg &key) const { + return this->tree_.count_unique(key); + } + + // Insertion routines. + std::pair insert(const value_type &x) { + return this->tree_.insert_unique(params_type::key(x), x); + } + std::pair insert(value_type &&x) { + return this->tree_.insert_unique(params_type::key(x), std::move(x)); + } + template + std::pair emplace(Args &&... args) { + init_type v(std::forward(args)...); + return this->tree_.insert_unique(params_type::key(v), std::move(v)); + } + iterator insert(const_iterator hint, const value_type &x) { + return this->tree_ + .insert_hint_unique(iterator(hint), params_type::key(x), x) + .first; + } + iterator insert(const_iterator hint, value_type &&x) { + return this->tree_ + .insert_hint_unique(iterator(hint), params_type::key(x), + std::move(x)) + .first; + } + + template + iterator emplace_hint(const_iterator hint, Args &&... args) { + init_type v(std::forward(args)...); + return this->tree_ + .insert_hint_unique(iterator(hint), params_type::key(v), + std::move(v)) + .first; + } + + template + void insert(InputIterator b, InputIterator e) { + this->tree_.insert_iterator_unique(b, e); + } + + void insert(std::initializer_list init) { + this->tree_.insert_iterator_unique(init.begin(), init.end()); + } + + insert_return_type insert(node_type &&node) { + if (!node) return {this->end(), false, node_type()}; + std::pair res = + this->tree_.insert_unique(params_type::key(CommonAccess::GetSlot(node)), + CommonAccess::GetSlot(node)); + if (res.second) { + CommonAccess::Destroy(&node); + return {res.first, true, node_type()}; + } else { + return {res.first, false, std::move(node)}; + } + } + + iterator insert(const_iterator hint, node_type &&node) { + if (!node) return this->end(); + std::pair res = this->tree_.insert_hint_unique( + iterator(hint), params_type::key(CommonAccess::GetSlot(node)), + CommonAccess::GetSlot(node)); + if (res.second) CommonAccess::Destroy(&node); + return res.first; + } + + template + size_type erase(const key_arg &key) { return this->tree_.erase_unique(key); } + using super_type::erase; + + template + node_type extract(const key_arg &key) { + auto it = this->find(key); + return it == this->end() ? node_type() : extract(it); + } + + using super_type::extract; + + // Merge routines. + // Moves elements from `src` into `this`. If the element already exists in + // `this`, it is left unmodified in `src`. + template < + typename T, + typename phmap::enable_if_t< + phmap::conjunction< + std::is_same, + std::is_same, + std::is_same>::value, + int> = 0> + void merge(btree_container &src) { // NOLINT + for (auto src_it = src.begin(); src_it != src.end();) { + if (insert(std::move(*src_it)).second) { + src_it = src.erase(src_it); + } else { + ++src_it; + } + } + } + + template < + typename T, + typename phmap::enable_if_t< + phmap::conjunction< + std::is_same, + std::is_same, + std::is_same>::value, + int> = 0> + void merge(btree_container &&src) { + merge(src); + } + }; + + // Base class for btree_map. + // ------------------------- + template + class btree_map_container : public btree_set_container { + using super_type = btree_set_container; + using params_type = typename Tree::params_type; + + protected: + template + using key_arg = typename super_type::template key_arg; + + public: + using key_type = typename Tree::key_type; + using mapped_type = typename params_type::mapped_type; + using value_type = typename Tree::value_type; + using key_compare = typename Tree::key_compare; + using allocator_type = typename Tree::allocator_type; + using iterator = typename Tree::iterator; + using const_iterator = typename Tree::const_iterator; + + // Inherit constructors. + using super_type::super_type; + btree_map_container() {} + + // Insertion routines. + template + std::pair try_emplace(const key_type &k, Args &&... args) { + return this->tree_.insert_unique( + k, std::piecewise_construct, std::forward_as_tuple(k), + std::forward_as_tuple(std::forward(args)...)); + } + template + std::pair try_emplace(key_type &&k, Args &&... args) { + // Note: `key_ref` exists to avoid a ClangTidy warning about moving from `k` + // and then using `k` unsequenced. This is safe because the move is into a + // forwarding reference and insert_unique guarantees that `key` is never + // referenced after consuming `args`. + const key_type& key_ref = k; + return this->tree_.insert_unique( + key_ref, std::piecewise_construct, std::forward_as_tuple(std::move(k)), + std::forward_as_tuple(std::forward(args)...)); + } + template + iterator try_emplace(const_iterator hint, const key_type &k, + Args &&... args) { + return this->tree_ + .insert_hint_unique(iterator(hint), k, std::piecewise_construct, + std::forward_as_tuple(k), + std::forward_as_tuple(std::forward(args)...)) + .first; + } + template + iterator try_emplace(const_iterator hint, key_type &&k, Args &&... args) { + // Note: `key_ref` exists to avoid a ClangTidy warning about moving from `k` + // and then using `k` unsequenced. This is safe because the move is into a + // forwarding reference and insert_hint_unique guarantees that `key` is + // never referenced after consuming `args`. + const key_type& key_ref = k; + return this->tree_ + .insert_hint_unique(iterator(hint), key_ref, std::piecewise_construct, + std::forward_as_tuple(std::move(k)), + std::forward_as_tuple(std::forward(args)...)) + .first; + } + mapped_type &operator[](const key_type &k) { + return try_emplace(k).first->second; + } + mapped_type &operator[](key_type &&k) { + return try_emplace(std::move(k)).first->second; + } + + template + mapped_type &at(const key_arg &key) { + auto it = this->find(key); + if (it == this->end()) + base_internal::ThrowStdOutOfRange("phmap::btree_map::at"); + return it->second; + } + template + const mapped_type &at(const key_arg &key) const { + auto it = this->find(key); + if (it == this->end()) + base_internal::ThrowStdOutOfRange("phmap::btree_map::at"); + return it->second; + } + }; + + // A common base class for btree_multiset and btree_multimap. + template + class btree_multiset_container : public btree_container { + using super_type = btree_container; + using params_type = typename Tree::params_type; + using init_type = typename params_type::init_type; + using is_key_compare_to = typename params_type::is_key_compare_to; + + template + using key_arg = typename super_type::template key_arg; + + public: + using key_type = typename Tree::key_type; + using value_type = typename Tree::value_type; + using size_type = typename Tree::size_type; + using key_compare = typename Tree::key_compare; + using allocator_type = typename Tree::allocator_type; + using iterator = typename Tree::iterator; + using const_iterator = typename Tree::const_iterator; + using node_type = typename super_type::node_type; + + // Inherit constructors. + using super_type::super_type; + btree_multiset_container() {} + + // Range constructor. + template + btree_multiset_container(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + insert(b, e); + } + + // Initializer list constructor. + btree_multiset_container(std::initializer_list init, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : btree_multiset_container(init.begin(), init.end(), comp, alloc) {} + + // Lookup routines. + template + size_type count(const key_arg &key) const { + return this->tree_.count_multi(key); + } + + // Insertion routines. + iterator insert(const value_type &x) { return this->tree_.insert_multi(x); } + iterator insert(value_type &&x) { + return this->tree_.insert_multi(std::move(x)); + } + iterator insert(const_iterator hint, const value_type &x) { + return this->tree_.insert_hint_multi(iterator(hint), x); + } + iterator insert(const_iterator hint, value_type &&x) { + return this->tree_.insert_hint_multi(iterator(hint), std::move(x)); + } + template + void insert(InputIterator b, InputIterator e) { + this->tree_.insert_iterator_multi(b, e); + } + void insert(std::initializer_list init) { + this->tree_.insert_iterator_multi(init.begin(), init.end()); + } + template + iterator emplace(Args &&... args) { + return this->tree_.insert_multi(init_type(std::forward(args)...)); + } + template + iterator emplace_hint(const_iterator hint, Args &&... args) { + return this->tree_.insert_hint_multi( + iterator(hint), init_type(std::forward(args)...)); + } + iterator insert(node_type &&node) { + if (!node) return this->end(); + iterator res = + this->tree_.insert_multi(params_type::key(CommonAccess::GetSlot(node)), + CommonAccess::GetSlot(node)); + CommonAccess::Destroy(&node); + return res; + } + iterator insert(const_iterator hint, node_type &&node) { + if (!node) return this->end(); + iterator res = this->tree_.insert_hint_multi( + iterator(hint), + std::move(params_type::element(CommonAccess::GetSlot(node)))); + CommonAccess::Destroy(&node); + return res; + } + + // Deletion routines. + template + size_type erase(const key_arg &key) { + return this->tree_.erase_multi(key); + } + using super_type::erase; + + // Node extraction routines. + template + node_type extract(const key_arg &key) { + auto it = this->find(key); + return it == this->end() ? node_type() : extract(it); + } + using super_type::extract; + + // Merge routines. + // Moves all elements from `src` into `this`. + template < + typename T, + typename phmap::enable_if_t< + phmap::conjunction< + std::is_same, + std::is_same, + std::is_same>::value, + int> = 0> + void merge(btree_container &src) { // NOLINT + insert(std::make_move_iterator(src.begin()), + std::make_move_iterator(src.end())); + src.clear(); + } + + template < + typename T, + typename phmap::enable_if_t< + phmap::conjunction< + std::is_same, + std::is_same, + std::is_same>::value, + int> = 0> + void merge(btree_container &&src) { + merge(src); + } + }; + + // A base class for btree_multimap. + template + class btree_multimap_container : public btree_multiset_container { + using super_type = btree_multiset_container; + using params_type = typename Tree::params_type; + + public: + using mapped_type = typename params_type::mapped_type; + + // Inherit constructors. + using super_type::super_type; + btree_multimap_container() {} + }; + +} // namespace priv + + + + // ---------------------------------------------------------------------- + // btree_set - default values in phmap_fwd_decl.h + // ---------------------------------------------------------------------- + template + class btree_set : public priv::btree_set_container< + priv::btree>> + { + using Base = typename btree_set::btree_set_container; + + public: + btree_set() {} + using Base::Base; + using Base::begin; + using Base::cbegin; + using Base::end; + using Base::cend; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::emplace; + using Base::emplace_hint; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::lower_bound; + using Base::upper_bound; + using Base::find; + using Base::get_allocator; + using Base::key_comp; + using Base::value_comp; + }; + + // Swaps the contents of two `phmap::btree_set` containers. + // ------------------------------------------------------- + template + void swap(btree_set &x, btree_set &y) { + return x.swap(y); + } + + // Erases all elements that satisfy the predicate pred from the container. + // ---------------------------------------------------------------------- + template + void erase_if(btree_set &set, Pred pred) { + for (auto it = set.begin(); it != set.end();) { + if (pred(*it)) { + it = set.erase(it); + } else { + ++it; + } + } + } + + // ---------------------------------------------------------------------- + // btree_multiset - default values in phmap_fwd_decl.h + // ---------------------------------------------------------------------- + template + class btree_multiset : public priv::btree_multiset_container< + priv::btree>> + { + using Base = typename btree_multiset::btree_multiset_container; + + public: + btree_multiset() {} + using Base::Base; + using Base::begin; + using Base::cbegin; + using Base::end; + using Base::cend; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::emplace; + using Base::emplace_hint; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::lower_bound; + using Base::upper_bound; + using Base::find; + using Base::get_allocator; + using Base::key_comp; + using Base::value_comp; + }; + + // Swaps the contents of two `phmap::btree_multiset` containers. + // ------------------------------------------------------------ + template + void swap(btree_multiset &x, btree_multiset &y) { + return x.swap(y); + } + + // Erases all elements that satisfy the predicate pred from the container. + // ---------------------------------------------------------------------- + template + void erase_if(btree_multiset &set, Pred pred) { + for (auto it = set.begin(); it != set.end();) { + if (pred(*it)) { + it = set.erase(it); + } else { + ++it; + } + } + } + + + // ---------------------------------------------------------------------- + // btree_map - default values in phmap_fwd_decl.h + // ---------------------------------------------------------------------- + template + class btree_map : public priv::btree_map_container< + priv::btree>> + { + using Base = typename btree_map::btree_map_container; + + public: + btree_map() {} + using Base::Base; + using Base::begin; + using Base::cbegin; + using Base::end; + using Base::cend; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::emplace; + using Base::emplace_hint; + using Base::try_emplace; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::at; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::lower_bound; + using Base::upper_bound; + using Base::find; + using Base::operator[]; + using Base::get_allocator; + using Base::key_comp; + using Base::value_comp; + }; + + // Swaps the contents of two `phmap::btree_map` containers. + // ------------------------------------------------------- + template + void swap(btree_map &x, btree_map &y) { + return x.swap(y); + } + + // ---------------------------------------------------------------------- + template + void erase_if(btree_map &map, Pred pred) { + for (auto it = map.begin(); it != map.end();) { + if (pred(*it)) { + it = map.erase(it); + } else { + ++it; + } + } + } + + // ---------------------------------------------------------------------- + // btree_multimap - default values in phmap_fwd_decl.h + // ---------------------------------------------------------------------- + template + class btree_multimap : public priv::btree_multimap_container< + priv::btree>> + { + using Base = typename btree_multimap::btree_multimap_container; + + public: + btree_multimap() {} + using Base::Base; + using Base::begin; + using Base::cbegin; + using Base::end; + using Base::cend; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::emplace; + using Base::emplace_hint; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::lower_bound; + using Base::upper_bound; + using Base::find; + using Base::get_allocator; + using Base::key_comp; + using Base::value_comp; + }; + + // Swaps the contents of two `phmap::btree_multimap` containers. + // ------------------------------------------------------------ + template + void swap(btree_multimap &x, btree_multimap &y) { + return x.swap(y); + } + + // Erases all elements that satisfy the predicate pred from the container. + // ---------------------------------------------------------------------- + template + void erase_if(btree_multimap &map, Pred pred) { + for (auto it = map.begin(); it != map.end();) { + if (pred(*it)) { + it = map.erase(it); + } else { + ++it; + } + } + } + + +} // namespace btree + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + + +#endif // PHMAP_BTREE_BTREE_CONTAINER_H_ diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/conanfile.py b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/conanfile.py new file mode 100644 index 00000000..42112473 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/conanfile.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from conans import ConanFile, tools +import os + +class SparseppConan(ConanFile): + name = "parallel_hashmap" + version = "1.36" + description = "A header-only, very fast and memory-friendly hash map" + url = "https://github.com/greg7mdp/parallel-hashmap/blob/master/parallel_hashmap/conanfile.py" + + # Indicates License type of the packaged library + license = "https://github.com/greg7mdp/parallel-hashmap/blob/master/LICENSE" + + # Packages the license for the conanfile.py + exports = ["LICENSE"] + + # Custom attributes for Bincrafters recipe conventions + source_subfolder = "source_subfolder" + + def source(self): + source_url = "https://github.com/greg7mdp/parallel-hashmap" + tools.get("{0}/archive/{1}.tar.gz".format(source_url, self.version)) + extracted_dir = self.name + "-" + self.version + + #Rename to "source_folder" is a convention to simplify later steps + os.rename(extracted_dir, self.source_subfolder) + + + def package(self): + include_folder = os.path.join(self.source_subfolder, "parallel_hashmap") + self.copy(pattern="LICENSE") + self.copy(pattern="*", dst="include/parallel_hashmap", src=include_folder) + + def package_id(self): + self.info.header_only() diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/meminfo.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/meminfo.h new file mode 100644 index 00000000..872f3c69 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/meminfo.h @@ -0,0 +1,195 @@ +#if !defined(spp_memory_h_guard) +#define spp_memory_h_guard + +#include +#include +#include + +#if defined(_WIN32) || defined( __CYGWIN__) + #define SPP_WIN +#endif + +#ifdef SPP_WIN + #include + #include + #undef min + #undef max +#elif defined(__linux__) + #include + #include +#elif defined(__FreeBSD__) + #include + #include + #include + #include + #include + #include +#endif + +namespace spp +{ + uint64_t GetSystemMemory(); + uint64_t GetTotalMemoryUsed(); + uint64_t GetProcessMemoryUsed(); + uint64_t GetPhysicalMemory(); + + uint64_t GetSystemMemory() + { +#ifdef SPP_WIN + MEMORYSTATUSEX memInfo; + memInfo.dwLength = sizeof(MEMORYSTATUSEX); + GlobalMemoryStatusEx(&memInfo); + return static_cast(memInfo.ullTotalPageFile); +#elif defined(__linux__) + struct sysinfo memInfo; + sysinfo (&memInfo); + auto totalVirtualMem = memInfo.totalram; + + totalVirtualMem += memInfo.totalswap; + totalVirtualMem *= memInfo.mem_unit; + return static_cast(totalVirtualMem); +#elif defined(__FreeBSD__) + kvm_t *kd; + u_int pageCnt; + size_t pageCntLen = sizeof(pageCnt); + u_int pageSize; + struct kvm_swap kswap; + uint64_t totalVirtualMem; + + pageSize = static_cast(getpagesize()); + + sysctlbyname("vm.stats.vm.v_page_count", &pageCnt, &pageCntLen, NULL, 0); + totalVirtualMem = pageCnt * pageSize; + + kd = kvm_open(NULL, _PATH_DEVNULL, NULL, O_RDONLY, "kvm_open"); + kvm_getswapinfo(kd, &kswap, 1, 0); + kvm_close(kd); + totalVirtualMem += kswap.ksw_total * pageSize; + + return totalVirtualMem; +#else + return 0; +#endif + } + + uint64_t GetTotalMemoryUsed() + { +#ifdef SPP_WIN + MEMORYSTATUSEX memInfo; + memInfo.dwLength = sizeof(MEMORYSTATUSEX); + GlobalMemoryStatusEx(&memInfo); + return static_cast(memInfo.ullTotalPageFile - memInfo.ullAvailPageFile); +#elif defined(__linux__) + struct sysinfo memInfo; + sysinfo(&memInfo); + auto virtualMemUsed = memInfo.totalram - memInfo.freeram; + + virtualMemUsed += memInfo.totalswap - memInfo.freeswap; + virtualMemUsed *= memInfo.mem_unit; + + return static_cast(virtualMemUsed); +#elif defined(__FreeBSD__) + kvm_t *kd; + u_int pageSize; + u_int pageCnt, freeCnt; + size_t pageCntLen = sizeof(pageCnt); + size_t freeCntLen = sizeof(freeCnt); + struct kvm_swap kswap; + uint64_t virtualMemUsed; + + pageSize = static_cast(getpagesize()); + + sysctlbyname("vm.stats.vm.v_page_count", &pageCnt, &pageCntLen, NULL, 0); + sysctlbyname("vm.stats.vm.v_free_count", &freeCnt, &freeCntLen, NULL, 0); + virtualMemUsed = (pageCnt - freeCnt) * pageSize; + + kd = kvm_open(NULL, _PATH_DEVNULL, NULL, O_RDONLY, "kvm_open"); + kvm_getswapinfo(kd, &kswap, 1, 0); + kvm_close(kd); + virtualMemUsed += kswap.ksw_used * pageSize; + + return virtualMemUsed; +#else + return 0; +#endif + } + + uint64_t GetProcessMemoryUsed() + { +#ifdef SPP_WIN + PROCESS_MEMORY_COUNTERS_EX pmc; + GetProcessMemoryInfo(GetCurrentProcess(), reinterpret_cast(&pmc), sizeof(pmc)); + return static_cast(pmc.PrivateUsage); +#elif defined(__linux__) + auto parseLine = + [](char* line)->int + { + auto i = strlen(line); + + while(*line < '0' || *line > '9') + { + line++; + } + + line[i-3] = '\0'; + i = atoi(line); + return i; + }; + + auto file = fopen("/proc/self/status", "r"); + auto result = -1; + char line[128]; + + while(fgets(line, 128, file) != nullptr) + { + if(strncmp(line, "VmSize:", 7) == 0) + { + result = parseLine(line); + break; + } + } + + fclose(file); + return static_cast(result) * 1024; +#elif defined(__FreeBSD__) + struct kinfo_proc info; + size_t infoLen = sizeof(info); + int mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid() }; + + sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &infoLen, NULL, 0); + return static_cast(info.ki_rssize * getpagesize()); +#else + return 0; +#endif + } + + uint64_t GetPhysicalMemory() + { +#ifdef SPP_WIN + MEMORYSTATUSEX memInfo; + memInfo.dwLength = sizeof(MEMORYSTATUSEX); + GlobalMemoryStatusEx(&memInfo); + return static_cast(memInfo.ullTotalPhys); +#elif defined(__linux__) + struct sysinfo memInfo; + sysinfo(&memInfo); + + auto totalPhysMem = memInfo.totalram; + + totalPhysMem *= memInfo.mem_unit; + return static_cast(totalPhysMem); +#elif defined(__FreeBSD__) + u_long physMem; + size_t physMemLen = sizeof(physMem); + int mib[] = { CTL_HW, HW_PHYSMEM }; + + sysctl(mib, sizeof(mib) / sizeof(*mib), &physMem, &physMemLen, NULL, 0); + return physMem; +#else + return 0; +#endif + } + +} + +#endif // spp_memory_h_guard diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/phmap.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/phmap.h new file mode 100644 index 00000000..a4215875 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/phmap.h @@ -0,0 +1,5118 @@ +#if !defined(phmap_h_guard_) +#define phmap_h_guard_ + +// --------------------------------------------------------------------------- +// Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Includes work from abseil-cpp (https://github.com/abseil/abseil-cpp) +// with modifications. +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// --------------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// IMPLEMENTATION DETAILS +// +// The table stores elements inline in a slot array. In addition to the slot +// array the table maintains some control state per slot. The extra state is one +// byte per slot and stores empty or deleted marks, or alternatively 7 bits from +// the hash of an occupied slot. The table is split into logical groups of +// slots, like so: +// +// Group 1 Group 2 Group 3 +// +---------------+---------------+---------------+ +// | | | | | | | | | | | | | | | | | | | | | | | | | +// +---------------+---------------+---------------+ +// +// On lookup the hash is split into two parts: +// - H2: 7 bits (those stored in the control bytes) +// - H1: the rest of the bits +// The groups are probed using H1. For each group the slots are matched to H2 in +// parallel. Because H2 is 7 bits (128 states) and the number of slots per group +// is low (8 or 16) in almost all cases a match in H2 is also a lookup hit. +// +// On insert, once the right group is found (as in lookup), its slots are +// filled in order. +// +// On erase a slot is cleared. In case the group did not have any empty slots +// before the erase, the erased slot is marked as deleted. +// +// Groups without empty slots (but maybe with deleted slots) extend the probe +// sequence. The probing algorithm is quadratic. Given N the number of groups, +// the probing function for the i'th probe is: +// +// P(0) = H1 % N +// +// P(i) = (P(i - 1) + i) % N +// +// This probing function guarantees that after N probes, all the groups of the +// table will be probed exactly once. +// +// The control state and slot array are stored contiguously in a shared heap +// allocation. The layout of this allocation is: `capacity()` control bytes, +// one sentinel control byte, `Group::kWidth - 1` cloned control bytes, +// , `capacity()` slots. The sentinel control byte is used in +// iteration so we know when we reach the end of the table. The cloned control +// bytes at the end of the table are cloned from the beginning of the table so +// groups that begin near the end of the table can see a full group. In cases in +// which there are more than `capacity()` cloned control bytes, the extra bytes +// are `kEmpty`, and these ensure that we always see at least one empty slot and +// can stop an unsuccessful search. +// --------------------------------------------------------------------------- + + + +#ifdef _MSC_VER + #pragma warning(push) + + #pragma warning(disable : 4127) // conditional expression is constant + #pragma warning(disable : 4324) // structure was padded due to alignment specifier + #pragma warning(disable : 4514) // unreferenced inline function has been removed + #pragma warning(disable : 4623) // default constructor was implicitly defined as deleted + #pragma warning(disable : 4625) // copy constructor was implicitly defined as deleted + #pragma warning(disable : 4626) // assignment operator was implicitly defined as deleted + #pragma warning(disable : 4710) // function not inlined + #pragma warning(disable : 4711) // selected for automatic inline expansion + #pragma warning(disable : 4820) // '6' bytes padding added after data member + #pragma warning(disable : 4868) // compiler may not enforce left-to-right evaluation order in braced initializer list + #pragma warning(disable : 5027) // move assignment operator was implicitly defined as deleted + #pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "phmap_fwd_decl.h" +#include "phmap_utils.h" +#include "phmap_base.h" + +#if PHMAP_HAVE_STD_STRING_VIEW + #include +#endif + +namespace phmap { + +namespace priv { + +// -------------------------------------------------------------------------- +template +void SwapAlloc(AllocType& lhs, AllocType& rhs, + std::true_type /* propagate_on_container_swap */) { + using std::swap; + swap(lhs, rhs); +} +template +void SwapAlloc(AllocType& /*lhs*/, AllocType& /*rhs*/, + std::false_type /* propagate_on_container_swap */) {} + +// -------------------------------------------------------------------------- +template +class probe_seq +{ +public: + probe_seq(size_t hashval, size_t mask) { + assert(((mask + 1) & mask) == 0 && "not a mask"); + mask_ = mask; + offset_ = hashval & mask_; + } + size_t offset() const { return offset_; } + size_t offset(size_t i) const { return (offset_ + i) & mask_; } + + void next() { + index_ += Width; + offset_ += index_; + offset_ &= mask_; + } + // 0-based probe index. The i-th probe in the probe sequence. + size_t getindex() const { return index_; } + +private: + size_t mask_; + size_t offset_; + size_t index_ = 0; +}; + +// -------------------------------------------------------------------------- +template +struct RequireUsableKey +{ + template + std::pair< + decltype(std::declval()(std::declval())), + decltype(std::declval()(std::declval(), + std::declval()))>* + operator()(const PassedKey&, const Args&...) const; +}; + +// -------------------------------------------------------------------------- +template +struct IsDecomposable : std::false_type {}; + +template +struct IsDecomposable< + phmap::void_t(), + std::declval()...))>, + Policy, Hash, Eq, Ts...> : std::true_type {}; + +// TODO(alkis): Switch to std::is_nothrow_swappable when gcc/clang supports it. +// -------------------------------------------------------------------------- +template +constexpr bool IsNoThrowSwappable() { + using std::swap; + return noexcept(swap(std::declval(), std::declval())); +} + +// -------------------------------------------------------------------------- +template +int TrailingZeros(T x) { + PHMAP_IF_CONSTEXPR(sizeof(T) == 8) + return base_internal::CountTrailingZerosNonZero64(static_cast(x)); + else + return base_internal::CountTrailingZerosNonZero32(static_cast(x)); +} + +// -------------------------------------------------------------------------- +template +int LeadingZeros(T x) { + PHMAP_IF_CONSTEXPR(sizeof(T) == 8) + return base_internal::CountLeadingZeros64(static_cast(x)); + else + return base_internal::CountLeadingZeros32(static_cast(x)); +} + +// -------------------------------------------------------------------------- +// An abstraction over a bitmask. It provides an easy way to iterate through the +// indexes of the set bits of a bitmask. When Shift=0 (platforms with SSE), +// this is a true bitmask. On non-SSE, platforms the arithematic used to +// emulate the SSE behavior works in bytes (Shift=3) and leaves each bytes as +// either 0x00 or 0x80. +// +// For example: +// for (int i : BitMask(0x5)) -> yields 0, 2 +// for (int i : BitMask(0x0000000080800000)) -> yields 2, 3 +// -------------------------------------------------------------------------- +template +class BitMask +{ + static_assert(std::is_unsigned::value, ""); + static_assert(Shift == 0 || Shift == 3, ""); + +public: + // These are useful for unit tests (gunit). + using value_type = int; + using iterator = BitMask; + using const_iterator = BitMask; + + explicit BitMask(T mask) : mask_(mask) {} + + BitMask& operator++() { // ++iterator + mask_ &= (mask_ - 1); // clear the least significant bit set + return *this; + } + + explicit operator bool() const { return mask_ != 0; } + uint32_t operator*() const { return LowestBitSet(); } + + uint32_t LowestBitSet() const { + return priv::TrailingZeros(mask_) >> Shift; + } + + uint32_t HighestBitSet() const { + return (sizeof(T) * CHAR_BIT - priv::LeadingZeros(mask_) - 1) >> Shift; + } + + BitMask begin() const { return *this; } + BitMask end() const { return BitMask(0); } + + uint32_t TrailingZeros() const { + return priv::TrailingZeros(mask_) >> Shift; + } + + uint32_t LeadingZeros() const { + constexpr uint32_t total_significant_bits = SignificantBits << Shift; + constexpr uint32_t extra_bits = sizeof(T) * 8 - total_significant_bits; + return priv::LeadingZeros(mask_ << extra_bits) >> Shift; + } + +private: + friend bool operator==(const BitMask& a, const BitMask& b) { + return a.mask_ == b.mask_; + } + friend bool operator!=(const BitMask& a, const BitMask& b) { + return a.mask_ != b.mask_; + } + + T mask_; +}; + +// -------------------------------------------------------------------------- +using ctrl_t = signed char; +using h2_t = uint8_t; + +// -------------------------------------------------------------------------- +// The values here are selected for maximum performance. See the static asserts +// below for details. +// -------------------------------------------------------------------------- +enum Ctrl : ctrl_t +{ + kEmpty = -128, // 0b10000000 or 0x80 + kDeleted = -2, // 0b11111110 or 0xfe + kSentinel = -1, // 0b11111111 or 0xff +}; + +static_assert( + kEmpty & kDeleted & kSentinel & 0x80, + "Special markers need to have the MSB to make checking for them efficient"); +static_assert(kEmpty < kSentinel && kDeleted < kSentinel, + "kEmpty and kDeleted must be smaller than kSentinel to make the " + "SIMD test of IsEmptyOrDeleted() efficient"); +static_assert(kSentinel == -1, + "kSentinel must be -1 to elide loading it from memory into SIMD " + "registers (pcmpeqd xmm, xmm)"); +static_assert(kEmpty == -128, + "kEmpty must be -128 to make the SIMD check for its " + "existence efficient (psignb xmm, xmm)"); +static_assert(~kEmpty & ~kDeleted & kSentinel & 0x7F, + "kEmpty and kDeleted must share an unset bit that is not shared " + "by kSentinel to make the scalar test for MatchEmptyOrDeleted() " + "efficient"); +static_assert(kDeleted == -2, + "kDeleted must be -2 to make the implementation of " + "ConvertSpecialToEmptyAndFullToDeleted efficient"); + +// -------------------------------------------------------------------------- +// A single block of empty control bytes for tables without any slots allocated. +// This enables removing a branch in the hot path of find(). +// -------------------------------------------------------------------------- +inline ctrl_t* EmptyGroup() { + alignas(16) static constexpr ctrl_t empty_group[] = { + kSentinel, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, + kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty}; + return const_cast(empty_group); +} + +// -------------------------------------------------------------------------- +inline size_t HashSeed(const ctrl_t* ctrl) { + // The low bits of the pointer have little or no entropy because of + // alignment. We shift the pointer to try to use higher entropy bits. A + // good number seems to be 12 bits, because that aligns with page size. + return reinterpret_cast(ctrl) >> 12; +} + +#ifdef PHMAP_NON_DETERMINISTIC + +inline size_t H1(size_t hashval, const ctrl_t* ctrl) { + // use ctrl_ pointer to add entropy to ensure + // non-deterministic iteration order. + return (hashval >> 7) ^ HashSeed(ctrl); +} + +#else + +inline size_t H1(size_t hashval, const ctrl_t* ) { + return (hashval >> 7); +} + +#endif + + +inline h2_t H2(size_t hashval) { return (ctrl_t)(hashval & 0x7F); } + +inline bool IsEmpty(ctrl_t c) { return c == kEmpty; } +inline bool IsFull(ctrl_t c) { return c >= static_cast(0); } +inline bool IsDeleted(ctrl_t c) { return c == kDeleted; } +inline bool IsEmptyOrDeleted(ctrl_t c) { return c < kSentinel; } + +#if PHMAP_HAVE_SSE2 + +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4365) // conversion from 'int' to 'T', signed/unsigned mismatch +#endif + +// -------------------------------------------------------------------------- +// https://github.com/abseil/abseil-cpp/issues/209 +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87853 +// _mm_cmpgt_epi8 is broken under GCC with -funsigned-char +// Work around this by using the portable implementation of Group +// when using -funsigned-char under GCC. +// -------------------------------------------------------------------------- +inline __m128i _mm_cmpgt_epi8_fixed(__m128i a, __m128i b) { +#if defined(__GNUC__) && !defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Woverflow" + + if (std::is_unsigned::value) { + const __m128i mask = _mm_set1_epi8(static_cast(0x80)); + const __m128i diff = _mm_subs_epi8(b, a); + return _mm_cmpeq_epi8(_mm_and_si128(diff, mask), mask); + } + + #pragma GCC diagnostic pop +#endif + return _mm_cmpgt_epi8(a, b); +} + +// -------------------------------------------------------------------------- +// -------------------------------------------------------------------------- +struct GroupSse2Impl +{ + enum { kWidth = 16 }; // the number of slots per group + + explicit GroupSse2Impl(const ctrl_t* pos) { + ctrl = _mm_loadu_si128(reinterpret_cast(pos)); + } + + // Returns a bitmask representing the positions of slots that match hash. + // ---------------------------------------------------------------------- + BitMask Match(h2_t hash) const { + auto match = _mm_set1_epi8((char)hash); + return BitMask( + static_cast(_mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl)))); + } + + // Returns a bitmask representing the positions of empty slots. + // ------------------------------------------------------------ + BitMask MatchEmpty() const { +#if PHMAP_HAVE_SSSE3 + // This only works because kEmpty is -128. + return BitMask( + static_cast(_mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl)))); +#else + return Match(static_cast(kEmpty)); +#endif + } + +#ifdef __INTEL_COMPILER +#pragma warning push +#pragma warning disable 68 +#endif + // Returns a bitmask representing the positions of empty or deleted slots. + // ----------------------------------------------------------------------- + BitMask MatchEmptyOrDeleted() const { + auto special = _mm_set1_epi8(static_cast(kSentinel)); + return BitMask( + static_cast(_mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)))); + } + + // Returns the number of trailing empty or deleted elements in the group. + // ---------------------------------------------------------------------- + uint32_t CountLeadingEmptyOrDeleted() const { + auto special = _mm_set1_epi8(static_cast(kSentinel)); + return TrailingZeros( + static_cast(_mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)) + 1)); + } +#ifdef __INTEL_COMPILER +#pragma warning pop +#endif + + // ---------------------------------------------------------------------- + void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { + auto msbs = _mm_set1_epi8(static_cast(-128)); + auto x126 = _mm_set1_epi8(126); +#if PHMAP_HAVE_SSSE3 + auto res = _mm_or_si128(_mm_shuffle_epi8(x126, ctrl), msbs); +#else + auto zero = _mm_setzero_si128(); + auto special_mask = _mm_cmpgt_epi8_fixed(zero, ctrl); + auto res = _mm_or_si128(msbs, _mm_andnot_si128(special_mask, x126)); +#endif + _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), res); + } + + __m128i ctrl; +}; + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +#endif // PHMAP_HAVE_SSE2 + +// -------------------------------------------------------------------------- +// -------------------------------------------------------------------------- +struct GroupPortableImpl +{ + enum { kWidth = 8 }; + + explicit GroupPortableImpl(const ctrl_t* pos) + : ctrl(little_endian::Load64(pos)) {} + + BitMask Match(h2_t hash) const { + // For the technique, see: + // http://graphics.stanford.edu/~seander/bithacks.html##ValueInWord + // (Determine if a word has a byte equal to n). + // + // Caveat: there are false positives but: + // - they only occur if there is a real match + // - they never occur on kEmpty, kDeleted, kSentinel + // - they will be handled gracefully by subsequent checks in code + // + // Example: + // v = 0x1716151413121110 + // hash = 0x12 + // retval = (v - lsbs) & ~v & msbs = 0x0000000080800000 + constexpr uint64_t msbs = 0x8080808080808080ULL; + constexpr uint64_t lsbs = 0x0101010101010101ULL; + auto x = ctrl ^ (lsbs * hash); + return BitMask((x - lsbs) & ~x & msbs); + } + + BitMask MatchEmpty() const { // bit 1 of each byte is 0 for empty (but not for deleted) + constexpr uint64_t msbs = 0x8080808080808080ULL; + return BitMask((ctrl & (~ctrl << 6)) & msbs); + } + + BitMask MatchEmptyOrDeleted() const { // lsb of each byte is 0 for empty or deleted + constexpr uint64_t msbs = 0x8080808080808080ULL; + return BitMask((ctrl & (~ctrl << 7)) & msbs); + } + + uint32_t CountLeadingEmptyOrDeleted() const { + constexpr uint64_t gaps = 0x00FEFEFEFEFEFEFEULL; + return (uint32_t)((TrailingZeros(((~ctrl & (ctrl >> 7)) | gaps) + 1) + 7) >> 3); + } + + void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { + constexpr uint64_t msbs = 0x8080808080808080ULL; + constexpr uint64_t lsbs = 0x0101010101010101ULL; + auto x = ctrl & msbs; + auto res = (~x + (x >> 7)) & ~lsbs; + little_endian::Store64(dst, res); + } + + uint64_t ctrl; +}; + +#if PHMAP_HAVE_SSE2 + using Group = GroupSse2Impl; +#else + using Group = GroupPortableImpl; +#endif + +// The number of cloned control bytes that we copy from the beginning to the +// end of the control bytes array. +// ------------------------------------------------------------------------- +constexpr size_t NumClonedBytes() { return Group::kWidth - 1; } + +template +class raw_hash_set; + +inline bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; } + +// -------------------------------------------------------------------------- +// PRECONDITION: +// IsValidCapacity(capacity) +// ctrl[capacity] == kSentinel +// ctrl[i] != kSentinel for all i < capacity +// Applies mapping for every byte in ctrl: +// DELETED -> EMPTY +// EMPTY -> EMPTY +// FULL -> DELETED +// -------------------------------------------------------------------------- +inline void ConvertDeletedToEmptyAndFullToDeleted( + ctrl_t* ctrl, size_t capacity) +{ + assert(ctrl[capacity] == kSentinel); + assert(IsValidCapacity(capacity)); + for (ctrl_t* pos = ctrl; pos != ctrl + capacity + 1; pos += Group::kWidth) { + Group{pos}.ConvertSpecialToEmptyAndFullToDeleted(pos); + } + // Copy the cloned ctrl bytes. + std::memcpy(ctrl + capacity + 1, ctrl, Group::kWidth); + ctrl[capacity] = kSentinel; +} + +// -------------------------------------------------------------------------- +// Rounds up the capacity to the next power of 2 minus 1, with a minimum of 1. +// -------------------------------------------------------------------------- +inline size_t NormalizeCapacity(size_t n) +{ + return n ? ~size_t{} >> LeadingZeros(n) : 1; +} + +// -------------------------------------------------------------------------- +// We use 7/8th as maximum load factor. +// For 16-wide groups, that gives an average of two empty slots per group. +// -------------------------------------------------------------------------- +inline size_t CapacityToGrowth(size_t capacity) +{ + assert(IsValidCapacity(capacity)); + // `capacity*7/8` + PHMAP_IF_CONSTEXPR (Group::kWidth == 8) { + if (capacity == 7) + { + // x-x/8 does not work when x==7. + return 6; + } + } + return capacity - capacity / 8; +} + +// -------------------------------------------------------------------------- +// From desired "growth" to a lowerbound of the necessary capacity. +// Might not be a valid one and required NormalizeCapacity(). +// -------------------------------------------------------------------------- +inline size_t GrowthToLowerboundCapacity(size_t growth) +{ + // `growth*8/7` + PHMAP_IF_CONSTEXPR (Group::kWidth == 8) { + if (growth == 7) + { + // x+(x-1)/7 does not work when x==7. + return 8; + } + } + return growth + static_cast((static_cast(growth) - 1) / 7); +} + +namespace hashtable_debug_internal { + +// If it is a map, call get<0>(). +using std::get; +template +auto GetKey(const typename T::value_type& pair, int) -> decltype(get<0>(pair)) { + return get<0>(pair); +} + +// If it is not a map, return the value directly. +template +const typename T::key_type& GetKey(const typename T::key_type& key, char) { + return key; +} + +// -------------------------------------------------------------------------- +// Containers should specialize this to provide debug information for that +// container. +// -------------------------------------------------------------------------- +template +struct HashtableDebugAccess +{ + // Returns the number of probes required to find `key` in `c`. The "number of + // probes" is a concept that can vary by container. Implementations should + // return 0 when `key` was found in the minimum number of operations and + // should increment the result for each non-trivial operation required to find + // `key`. + // + // The default implementation uses the bucket api from the standard and thus + // works for `std::unordered_*` containers. + // -------------------------------------------------------------------------- + static size_t GetNumProbes(const Container& c, + const typename Container::key_type& key) { + if (!c.bucket_count()) return {}; + size_t num_probes = 0; + size_t bucket = c.bucket(key); + for (auto it = c.begin(bucket), e = c.end(bucket);; ++it, ++num_probes) { + if (it == e) return num_probes; + if (c.key_eq()(key, GetKey(*it, 0))) return num_probes; + } + } +}; + +} // namespace hashtable_debug_internal + +// ---------------------------------------------------------------------------- +// I N F O Z S T U B S +// ---------------------------------------------------------------------------- +struct HashtablezInfo +{ + void PrepareForSampling() {} +}; + +inline void RecordRehashSlow(HashtablezInfo*, size_t ) {} + +static inline void RecordInsertSlow(HashtablezInfo* , size_t, size_t ) {} + +static inline void RecordEraseSlow(HashtablezInfo*) {} + +static inline HashtablezInfo* SampleSlow(int64_t*) { return nullptr; } +static inline void UnsampleSlow(HashtablezInfo* ) {} + +class HashtablezInfoHandle +{ +public: + inline void RecordStorageChanged(size_t , size_t ) {} + inline void RecordRehash(size_t ) {} + inline void RecordInsert(size_t , size_t ) {} + inline void RecordErase() {} + friend inline void swap(HashtablezInfoHandle& , + HashtablezInfoHandle& ) noexcept {} +}; + +static inline HashtablezInfoHandle Sample() { return HashtablezInfoHandle(); } + +class HashtablezSampler +{ +public: + // Returns a global Sampler. + static HashtablezSampler& Global() { static HashtablezSampler hzs; return hzs; } + HashtablezInfo* Register() { static HashtablezInfo info; return &info; } + void Unregister(HashtablezInfo* ) {} + + using DisposeCallback = void (*)(const HashtablezInfo&); + DisposeCallback SetDisposeCallback(DisposeCallback ) { return nullptr; } + int64_t Iterate(const std::function& ) { return 0; } +}; + +static inline void SetHashtablezEnabled(bool ) {} +static inline void SetHashtablezSampleParameter(int32_t ) {} +static inline void SetHashtablezMaxSamples(int32_t ) {} + + +namespace memory_internal { + +// Constructs T into uninitialized storage pointed by `ptr` using the args +// specified in the tuple. +// ---------------------------------------------------------------------------- +template +void ConstructFromTupleImpl(Alloc* alloc, T* ptr, Tuple&& t, + phmap::index_sequence) { + phmap::allocator_traits::construct( + *alloc, ptr, std::get(std::forward(t))...); +} + +template +struct WithConstructedImplF { + template + decltype(std::declval()(std::declval())) operator()( + Args&&... args) const { + return std::forward(f)(T(std::forward(args)...)); + } + F&& f; +}; + +template +decltype(std::declval()(std::declval())) WithConstructedImpl( + Tuple&& t, phmap::index_sequence, F&& f) { + return WithConstructedImplF{std::forward(f)}( + std::get(std::forward(t))...); +} + +template +auto TupleRefImpl(T&& t, phmap::index_sequence) + -> decltype(std::forward_as_tuple(std::get(std::forward(t))...)) { + return std::forward_as_tuple(std::get(std::forward(t))...); +} + +// Returns a tuple of references to the elements of the input tuple. T must be a +// tuple. +// ---------------------------------------------------------------------------- +template +auto TupleRef(T&& t) -> decltype( + TupleRefImpl(std::forward(t), + phmap::make_index_sequence< + std::tuple_size::type>::value>())) { + return TupleRefImpl( + std::forward(t), + phmap::make_index_sequence< + std::tuple_size::type>::value>()); +} + +template +decltype(std::declval()(std::declval(), std::piecewise_construct, + std::declval>(), std::declval())) +DecomposePairImpl(F&& f, std::pair, V> p) { + const auto& key = std::get<0>(p.first); + return std::forward(f)(key, std::piecewise_construct, std::move(p.first), + std::move(p.second)); +} + +} // namespace memory_internal + + +// ---------------------------------------------------------------------------- +// R A W _ H A S H _ S E T +// ---------------------------------------------------------------------------- +// An open-addressing +// hashtable with quadratic probing. +// +// This is a low level hashtable on top of which different interfaces can be +// implemented, like flat_hash_set, node_hash_set, string_hash_set, etc. +// +// The table interface is similar to that of std::unordered_set. Notable +// differences are that most member functions support heterogeneous keys when +// BOTH the hash and eq functions are marked as transparent. They do so by +// providing a typedef called `is_transparent`. +// +// When heterogeneous lookup is enabled, functions that take key_type act as if +// they have an overload set like: +// +// iterator find(const key_type& key); +// template +// iterator find(const K& key); +// +// size_type erase(const key_type& key); +// template +// size_type erase(const K& key); +// +// std::pair equal_range(const key_type& key); +// template +// std::pair equal_range(const K& key); +// +// When heterogeneous lookup is disabled, only the explicit `key_type` overloads +// exist. +// +// find() also supports passing the hash explicitly: +// +// iterator find(const key_type& key, size_t hash); +// template +// iterator find(const U& key, size_t hash); +// +// In addition the pointer to element and iterator stability guarantees are +// weaker: all iterators and pointers are invalidated after a new element is +// inserted. +// +// IMPLEMENTATION DETAILS +// +// The table stores elements inline in a slot array. In addition to the slot +// array the table maintains some control state per slot. The extra state is one +// byte per slot and stores empty or deleted marks, or alternatively 7 bits from +// the hash of an occupied slot. The table is split into logical groups of +// slots, like so: +// +// Group 1 Group 2 Group 3 +// +---------------+---------------+---------------+ +// | | | | | | | | | | | | | | | | | | | | | | | | | +// +---------------+---------------+---------------+ +// +// On lookup the hash is split into two parts: +// - H2: 7 bits (those stored in the control bytes) +// - H1: the rest of the bits +// The groups are probed using H1. For each group the slots are matched to H2 in +// parallel. Because H2 is 7 bits (128 states) and the number of slots per group +// is low (8 or 16) in almost all cases a match in H2 is also a lookup hit. +// +// On insert, once the right group is found (as in lookup), its slots are +// filled in order. +// +// On erase a slot is cleared. In case the group did not have any empty slots +// before the erase, the erased slot is marked as deleted. +// +// Groups without empty slots (but maybe with deleted slots) extend the probe +// sequence. The probing algorithm is quadratic. Given N the number of groups, +// the probing function for the i'th probe is: +// +// P(0) = H1 % N +// +// P(i) = (P(i - 1) + i) % N +// +// This probing function guarantees that after N probes, all the groups of the +// table will be probed exactly once. +// ---------------------------------------------------------------------------- +template +class raw_hash_set +{ + using PolicyTraits = hash_policy_traits; + using KeyArgImpl = + KeyArg::value && IsTransparent::value>; + +public: + using init_type = typename PolicyTraits::init_type; + using key_type = typename PolicyTraits::key_type; + // TODO(sbenza): Hide slot_type as it is an implementation detail. Needs user + // code fixes! + using slot_type = typename PolicyTraits::slot_type; + using allocator_type = Alloc; + using size_type = size_t; + using difference_type = ptrdiff_t; + using hasher = Hash; + using key_equal = Eq; + using policy_type = Policy; + using value_type = typename PolicyTraits::value_type; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = typename phmap::allocator_traits< + allocator_type>::template rebind_traits::pointer; + using const_pointer = typename phmap::allocator_traits< + allocator_type>::template rebind_traits::const_pointer; + + // Alias used for heterogeneous lookup functions. + // `key_arg` evaluates to `K` when the functors are transparent and to + // `key_type` otherwise. It permits template argument deduction on `K` for the + // transparent case. + template + using key_arg = typename KeyArgImpl::template type; + +private: + // Give an early error when key_type is not hashable/eq. + auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k)); + auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k)); + + using Layout = phmap::priv::Layout; + + static Layout MakeLayout(size_t capacity) { + assert(IsValidCapacity(capacity)); + return Layout(capacity + Group::kWidth + 1, capacity); + } + + using AllocTraits = phmap::allocator_traits; + using SlotAlloc = typename phmap::allocator_traits< + allocator_type>::template rebind_alloc; + using SlotAllocTraits = typename phmap::allocator_traits< + allocator_type>::template rebind_traits; + + static_assert(std::is_lvalue_reference::value, + "Policy::element() must return a reference"); + + template + struct SameAsElementReference + : std::is_same::type>::type, + typename std::remove_cv< + typename std::remove_reference::type>::type> {}; + + // An enabler for insert(T&&): T must be convertible to init_type or be the + // same as [cv] value_type [ref]. + // Note: we separate SameAsElementReference into its own type to avoid using + // reference unless we need to. MSVC doesn't seem to like it in some + // cases. + template + using RequiresInsertable = typename std::enable_if< + phmap::disjunction, + SameAsElementReference>::value, + int>::type; + + // RequiresNotInit is a workaround for gcc prior to 7.1. + // See https://godbolt.org/g/Y4xsUh. + template + using RequiresNotInit = + typename std::enable_if::value, int>::type; + + template + using IsDecomposable = IsDecomposable; + +public: + static_assert(std::is_same::value, + "Allocators with custom pointer types are not supported"); + static_assert(std::is_same::value, + "Allocators with custom pointer types are not supported"); + + class iterator + { + friend class raw_hash_set; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = typename raw_hash_set::value_type; + using reference = + phmap::conditional_t; + using pointer = phmap::remove_reference_t*; + using difference_type = typename raw_hash_set::difference_type; + + iterator() {} + + // PRECONDITION: not an end() iterator. + reference operator*() const { return PolicyTraits::element(slot_); } + + // PRECONDITION: not an end() iterator. + pointer operator->() const { return &operator*(); } + + // PRECONDITION: not an end() iterator. + iterator& operator++() { + ++ctrl_; + ++slot_; + skip_empty_or_deleted(); + return *this; + } + // PRECONDITION: not an end() iterator. + iterator operator++(int) { + auto tmp = *this; + ++*this; + return tmp; + } + +#if PHMAP_BIDIRECTIONAL + // PRECONDITION: not a begin() iterator. + iterator& operator--() { + assert(ctrl_); + do { + --ctrl_; + --slot_; + } while (IsEmptyOrDeleted(*ctrl_)); + return *this; + } + + // PRECONDITION: not a begin() iterator. + iterator operator--(int) { + auto tmp = *this; + --*this; + return tmp; + } +#endif + + friend bool operator==(const iterator& a, const iterator& b) { + return a.ctrl_ == b.ctrl_; + } + friend bool operator!=(const iterator& a, const iterator& b) { + return !(a == b); + } + + private: + iterator(ctrl_t* ctrl) : ctrl_(ctrl) {} // for end() + iterator(ctrl_t* ctrl, slot_type* slot) : ctrl_(ctrl), slot_(slot) {} + + void skip_empty_or_deleted() { + while (IsEmptyOrDeleted(*ctrl_)) { + // ctrl is not necessarily aligned to Group::kWidth. It is also likely + // to read past the space for ctrl bytes and into slots. This is ok + // because ctrl has sizeof() == 1 and slot has sizeof() >= 1 so there + // is no way to read outside the combined slot array. + uint32_t shift = Group{ctrl_}.CountLeadingEmptyOrDeleted(); + ctrl_ += shift; + slot_ += shift; + } + } + + ctrl_t* ctrl_ = nullptr; + // To avoid uninitialized member warnings, put slot_ in an anonymous union. + // The member is not initialized on singleton and end iterators. + union { + slot_type* slot_; + }; + }; + + class const_iterator + { + friend class raw_hash_set; + + public: + using iterator_category = typename iterator::iterator_category; + using value_type = typename raw_hash_set::value_type; + using reference = typename raw_hash_set::const_reference; + using pointer = typename raw_hash_set::const_pointer; + using difference_type = typename raw_hash_set::difference_type; + + const_iterator() {} + // Implicit construction from iterator. + const_iterator(iterator i) : inner_(std::move(i)) {} + + reference operator*() const { return *inner_; } + pointer operator->() const { return inner_.operator->(); } + + const_iterator& operator++() { + ++inner_; + return *this; + } + const_iterator operator++(int) { return inner_++; } + + friend bool operator==(const const_iterator& a, const const_iterator& b) { + return a.inner_ == b.inner_; + } + friend bool operator!=(const const_iterator& a, const const_iterator& b) { + return !(a == b); + } + + private: + const_iterator(const ctrl_t* ctrl, const slot_type* slot) + : inner_(const_cast(ctrl), const_cast(slot)) {} + + iterator inner_; + }; + + using node_type = node_handle, Alloc>; + using insert_return_type = InsertReturnType; + + raw_hash_set() noexcept( + std::is_nothrow_default_constructible::value&& + std::is_nothrow_default_constructible::value&& + std::is_nothrow_default_constructible::value) {} + + explicit raw_hash_set(size_t bucket_cnt, const hasher& hashfn = hasher(), + const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) + : ctrl_(EmptyGroup()), settings_(0, hashfn, eq, alloc) { + if (bucket_cnt) { + size_t new_capacity = NormalizeCapacity(bucket_cnt); + reset_growth_left(new_capacity); + initialize_slots(new_capacity); + capacity_ = new_capacity; + } + } + + raw_hash_set(size_t bucket_cnt, const hasher& hashfn, + const allocator_type& alloc) + : raw_hash_set(bucket_cnt, hashfn, key_equal(), alloc) {} + + raw_hash_set(size_t bucket_cnt, const allocator_type& alloc) + : raw_hash_set(bucket_cnt, hasher(), key_equal(), alloc) {} + + explicit raw_hash_set(const allocator_type& alloc) + : raw_hash_set(0, hasher(), key_equal(), alloc) {} + + template + raw_hash_set(InputIter first, InputIter last, size_t bucket_cnt = 0, + const hasher& hashfn = hasher(), const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) + : raw_hash_set(bucket_cnt, hashfn, eq, alloc) { + insert(first, last); + } + + template + raw_hash_set(InputIter first, InputIter last, size_t bucket_cnt, + const hasher& hashfn, const allocator_type& alloc) + : raw_hash_set(first, last, bucket_cnt, hashfn, key_equal(), alloc) {} + + template + raw_hash_set(InputIter first, InputIter last, size_t bucket_cnt, + const allocator_type& alloc) + : raw_hash_set(first, last, bucket_cnt, hasher(), key_equal(), alloc) {} + + template + raw_hash_set(InputIter first, InputIter last, const allocator_type& alloc) + : raw_hash_set(first, last, 0, hasher(), key_equal(), alloc) {} + + // Instead of accepting std::initializer_list as the first + // argument like std::unordered_set does, we have two overloads + // that accept std::initializer_list and std::initializer_list. + // This is advantageous for performance. + // + // // Turns {"abc", "def"} into std::initializer_list, then + // // copies the strings into the set. + // std::unordered_set s = {"abc", "def"}; + // + // // Turns {"abc", "def"} into std::initializer_list, then + // // copies the strings into the set. + // phmap::flat_hash_set s = {"abc", "def"}; + // + // The same trick is used in insert(). + // + // The enabler is necessary to prevent this constructor from triggering where + // the copy constructor is meant to be called. + // + // phmap::flat_hash_set a, b{a}; + // + // RequiresNotInit is a workaround for gcc prior to 7.1. + template = 0, RequiresInsertable = 0> + raw_hash_set(std::initializer_list init, size_t bucket_cnt = 0, + const hasher& hashfn = hasher(), const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) + : raw_hash_set(init.begin(), init.end(), bucket_cnt, hashfn, eq, alloc) {} + + raw_hash_set(std::initializer_list init, size_t bucket_cnt = 0, + const hasher& hashfn = hasher(), const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) + : raw_hash_set(init.begin(), init.end(), bucket_cnt, hashfn, eq, alloc) {} + + template = 0, RequiresInsertable = 0> + raw_hash_set(std::initializer_list init, size_t bucket_cnt, + const hasher& hashfn, const allocator_type& alloc) + : raw_hash_set(init, bucket_cnt, hashfn, key_equal(), alloc) {} + + raw_hash_set(std::initializer_list init, size_t bucket_cnt, + const hasher& hashfn, const allocator_type& alloc) + : raw_hash_set(init, bucket_cnt, hashfn, key_equal(), alloc) {} + + template = 0, RequiresInsertable = 0> + raw_hash_set(std::initializer_list init, size_t bucket_cnt, + const allocator_type& alloc) + : raw_hash_set(init, bucket_cnt, hasher(), key_equal(), alloc) {} + + raw_hash_set(std::initializer_list init, size_t bucket_cnt, + const allocator_type& alloc) + : raw_hash_set(init, bucket_cnt, hasher(), key_equal(), alloc) {} + + template = 0, RequiresInsertable = 0> + raw_hash_set(std::initializer_list init, const allocator_type& alloc) + : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {} + + raw_hash_set(std::initializer_list init, + const allocator_type& alloc) + : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {} + + raw_hash_set(const raw_hash_set& that) + : raw_hash_set(that, AllocTraits::select_on_container_copy_construction( + that.alloc_ref())) {} + + raw_hash_set(const raw_hash_set& that, const allocator_type& a) + : raw_hash_set(0, that.hash_ref(), that.eq_ref(), a) { + rehash(that.capacity()); // operator=() should preserve load_factor + // Because the table is guaranteed to be empty, we can do something faster + // than a full `insert`. + for (const auto& v : that) { + const size_t hashval = PolicyTraits::apply(HashElement{hash_ref()}, v); + auto target = find_first_non_full(hashval); + set_ctrl(target.offset, H2(hashval)); + emplace_at(target.offset, v); + infoz_.RecordInsert(hashval, target.probe_length); + } + size_ = that.size(); + growth_left() -= that.size(); + } + + raw_hash_set(raw_hash_set&& that) noexcept( + std::is_nothrow_copy_constructible::value&& + std::is_nothrow_copy_constructible::value&& + std::is_nothrow_copy_constructible::value) + : ctrl_(phmap::exchange(that.ctrl_, EmptyGroup())), + slots_(phmap::exchange(that.slots_, nullptr)), + size_(phmap::exchange(that.size_, 0)), + capacity_(phmap::exchange(that.capacity_, 0)), + infoz_(phmap::exchange(that.infoz_, HashtablezInfoHandle())), + // Hash, equality and allocator are copied instead of moved because + // `that` must be left valid. If Hash is std::function, moving it + // would create a nullptr functor that cannot be called. + settings_(that.settings_) { + // growth_left was copied above, reset the one from `that`. + that.growth_left() = 0; + } + + raw_hash_set(raw_hash_set&& that, const allocator_type& a) + : ctrl_(EmptyGroup()), + slots_(nullptr), + size_(0), + capacity_(0), + settings_(0, that.hash_ref(), that.eq_ref(), a) { + if (a == that.alloc_ref()) { + std::swap(ctrl_, that.ctrl_); + std::swap(slots_, that.slots_); + std::swap(size_, that.size_); + std::swap(capacity_, that.capacity_); + std::swap(growth_left(), that.growth_left()); + std::swap(infoz_, that.infoz_); + } else { + reserve(that.size()); + // Note: this will copy elements of dense_set and unordered_set instead of + // moving them. This can be fixed if it ever becomes an issue. + for (auto& elem : that) insert(std::move(elem)); + } + } + + raw_hash_set& operator=(const raw_hash_set& that) { + raw_hash_set tmp(that, + AllocTraits::propagate_on_container_copy_assignment::value + ? that.alloc_ref() + : alloc_ref()); + swap(tmp); + return *this; + } + + raw_hash_set& operator=(raw_hash_set&& that) noexcept( + phmap::allocator_traits::is_always_equal::value&& + std::is_nothrow_move_assignable::value&& + std::is_nothrow_move_assignable::value) { + // TODO(sbenza): We should only use the operations from the noexcept clause + // to make sure we actually adhere to that contract. + return move_assign( + std::move(that), + typename AllocTraits::propagate_on_container_move_assignment()); + } + + ~raw_hash_set() { destroy_slots(); } + + iterator begin() { + auto it = iterator_at(0); + it.skip_empty_or_deleted(); + return it; + } + iterator end() + { +#if PHMAP_BIDIRECTIONAL + return iterator_at(capacity_); +#else + return {ctrl_ + capacity_}; +#endif + } + + const_iterator begin() const { + return const_cast(this)->begin(); + } + const_iterator end() const { return const_cast(this)->end(); } + const_iterator cbegin() const { return begin(); } + const_iterator cend() const { return end(); } + + bool empty() const { return !size(); } + size_t size() const { return size_; } + size_t capacity() const { return capacity_; } + size_t max_size() const { return (std::numeric_limits::max)(); } + + PHMAP_ATTRIBUTE_REINITIALIZES void clear() { + // Iterating over this container is O(bucket_count()). When bucket_count() + // is much greater than size(), iteration becomes prohibitively expensive. + // For clear() it is more important to reuse the allocated array when the + // container is small because allocation takes comparatively long time + // compared to destruction of the elements of the container. So we pick the + // largest bucket_count() threshold for which iteration is still fast and + // past that we simply deallocate the array. + if (empty()) + return; + if (capacity_ > 127) { + destroy_slots(); + } else if (capacity_) { + for (size_t i = 0; i != capacity_; ++i) { + if (IsFull(ctrl_[i])) { + PolicyTraits::destroy(&alloc_ref(), slots_ + i); + } + } + size_ = 0; + reset_ctrl(capacity_); + reset_growth_left(capacity_); + } + assert(empty()); + infoz_.RecordStorageChanged(0, capacity_); + } + + // This overload kicks in when the argument is an rvalue of insertable and + // decomposable type other than init_type. + // + // flat_hash_map m; + // m.insert(std::make_pair("abc", 42)); + template = 0, + typename std::enable_if::value, int>::type = 0, + T* = nullptr> + std::pair insert(T&& value) { + return emplace(std::forward(value)); + } + + // This overload kicks in when the argument is a bitfield or an lvalue of + // insertable and decomposable type. + // + // union { int n : 1; }; + // flat_hash_set s; + // s.insert(n); + // + // flat_hash_set s; + // const char* p = "hello"; + // s.insert(p); + // + // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace + // RequiresInsertable with RequiresInsertable. + // We are hitting this bug: https://godbolt.org/g/1Vht4f. + template = 0, + typename std::enable_if::value, int>::type = 0> + std::pair insert(const T& value) { + return emplace(value); + } + + // This overload kicks in when the argument is an rvalue of init_type. Its + // purpose is to handle brace-init-list arguments. + // + // flat_hash_set s; + // s.insert({"abc", 42}); + std::pair insert(init_type&& value) { + return emplace(std::move(value)); + } + + template = 0, + typename std::enable_if::value, int>::type = 0, + T* = nullptr> + iterator insert(const_iterator, T&& value) { + return insert(std::forward(value)).first; + } + + // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace + // RequiresInsertable with RequiresInsertable. + // We are hitting this bug: https://godbolt.org/g/1Vht4f. + template = 0, + typename std::enable_if::value, int>::type = 0> + iterator insert(const_iterator, const T& value) { + return insert(value).first; + } + + iterator insert(const_iterator, init_type&& value) { + return insert(std::move(value)).first; + } + + template + using IsRandomAccess = std::is_same::iterator_category, + std::random_access_iterator_tag>; + + + template + struct has_difference_operator + { + private: + using yes = std::true_type; + using no = std::false_type; + + template static auto test(int) -> decltype(std::declval() - std::declval() == 1, yes()); + template static no test(...); + + public: + static constexpr bool value = std::is_same(0)), yes>::value; + }; + + template ::value, int> = 0> + void insert(InputIt first, InputIt last) { + this->reserve(this->size() + (last - first)); + for (; first != last; ++first) + emplace(*first); + } + + template ::value, int> = 0> + void insert(InputIt first, InputIt last) { + for (; first != last; ++first) + emplace(*first); + } + + template = 0, RequiresInsertable = 0> + void insert(std::initializer_list ilist) { + insert(ilist.begin(), ilist.end()); + } + + void insert(std::initializer_list ilist) { + insert(ilist.begin(), ilist.end()); + } + + insert_return_type insert(node_type&& node) { + if (!node) return {end(), false, node_type()}; + const auto& elem = PolicyTraits::element(CommonAccess::GetSlot(node)); + auto res = PolicyTraits::apply( + InsertSlot{*this, std::move(*CommonAccess::GetSlot(node))}, + elem); + if (res.second) { + CommonAccess::Reset(&node); + return {res.first, true, node_type()}; + } else { + return {res.first, false, std::move(node)}; + } + } + + insert_return_type insert(node_type&& node, size_t hashval) { + if (!node) return {end(), false, node_type()}; + const auto& elem = PolicyTraits::element(CommonAccess::GetSlot(node)); + auto res = PolicyTraits::apply( + InsertSlotWithHash{*this, std::move(*CommonAccess::GetSlot(node)), hashval}, + elem); + if (res.second) { + CommonAccess::Reset(&node); + return {res.first, true, node_type()}; + } else { + return {res.first, false, std::move(node)}; + } + } + + iterator insert(const_iterator, node_type&& node) { + auto res = insert(std::move(node)); + node = std::move(res.node); + return res.position; + } + + // This overload kicks in if we can deduce the key from args. This enables us + // to avoid constructing value_type if an entry with the same key already + // exists. + // + // For example: + // + // flat_hash_map m = {{"abc", "def"}}; + // // Creates no std::string copies and makes no heap allocations. + // m.emplace("abc", "xyz"); + template ::value, int>::type = 0> + std::pair emplace(Args&&... args) { + return PolicyTraits::apply(EmplaceDecomposable{*this}, + std::forward(args)...); + } + + template ::value, int>::type = 0> + std::pair emplace_with_hash(size_t hashval, Args&&... args) { + return PolicyTraits::apply(EmplaceDecomposableHashval{*this, hashval}, std::forward(args)...); + } + + // This overload kicks in if we cannot deduce the key from args. It constructs + // value_type unconditionally and then either moves it into the table or + // destroys. + template ::value, int>::type = 0> + std::pair emplace(Args&&... args) { + typename phmap::aligned_storage::type + raw; + slot_type* slot = reinterpret_cast(&raw); + + PolicyTraits::construct(&alloc_ref(), slot, std::forward(args)...); + const auto& elem = PolicyTraits::element(slot); + return PolicyTraits::apply(InsertSlot{*this, std::move(*slot)}, elem); + } + + template ::value, int>::type = 0> + std::pair emplace_with_hash(size_t hashval, Args&&... args) { + typename phmap::aligned_storage::type raw; + slot_type* slot = reinterpret_cast(&raw); + + PolicyTraits::construct(&alloc_ref(), slot, std::forward(args)...); + const auto& elem = PolicyTraits::element(slot); + return PolicyTraits::apply(InsertSlotWithHash{*this, std::move(*slot), hashval}, elem); + } + + template + iterator emplace_hint(const_iterator, Args&&... args) { + return emplace(std::forward(args)...).first; + } + + template + iterator emplace_hint_with_hash(size_t hashval, const_iterator, Args&&... args) { + return emplace_with_hash(hashval, std::forward(args)...).first; + } + + // Extension API: support for lazy emplace. + // + // Looks up key in the table. If found, returns the iterator to the element. + // Otherwise calls f with one argument of type raw_hash_set::constructor. f + // MUST call raw_hash_set::constructor with arguments as if a + // raw_hash_set::value_type is constructed, otherwise the behavior is + // undefined. + // + // For example: + // + // std::unordered_set s; + // // Makes ArenaStr even if "abc" is in the map. + // s.insert(ArenaString(&arena, "abc")); + // + // flat_hash_set s; + // // Makes ArenaStr only if "abc" is not in the map. + // s.lazy_emplace("abc", [&](const constructor& ctor) { + // ctor(&arena, "abc"); + // }); + // + // WARNING: This API is currently experimental. If there is a way to implement + // the same thing with the rest of the API, prefer that. + class constructor + { + friend class raw_hash_set; + + public: + template + void operator()(Args&&... args) const { + assert(*slot_); + PolicyTraits::construct(alloc_, *slot_, std::forward(args)...); + *slot_ = nullptr; + } + + private: + constructor(allocator_type* a, slot_type** slot) : alloc_(a), slot_(slot) {} + + allocator_type* alloc_; + slot_type** slot_; + }; + + template + iterator lazy_emplace(const key_arg& key, F&& f) { + auto res = find_or_prepare_insert(key); + if (res.second) { + lazy_emplace_at(res.first, std::forward(f)); + } + return iterator_at(res.first); + } + + template + iterator lazy_emplace_with_hash(const key_arg& key, size_t hashval, F&& f) { + auto res = find_or_prepare_insert(key, hashval); + if (res.second) { + lazy_emplace_at(res.first, std::forward(f)); + } + return iterator_at(res.first); + } + + template + void lazy_emplace_at(size_t& idx, F&& f) { + slot_type* slot = slots_ + idx; + std::forward(f)(constructor(&alloc_ref(), &slot)); + assert(!slot); + } + + template + void emplace_single_with_hash(const key_arg& key, size_t hashval, F&& f) { + auto res = find_or_prepare_insert(key, hashval); + if (res.second) + lazy_emplace_at(res.first, std::forward(f)); + else + _erase(iterator_at(res.first)); + } + + + // Extension API: support for heterogeneous keys. + // + // std::unordered_set s; + // // Turns "abc" into std::string. + // s.erase("abc"); + // + // flat_hash_set s; + // // Uses "abc" directly without copying it into std::string. + // s.erase("abc"); + template + size_type erase(const key_arg& key) { + auto it = find(key); + if (it == end()) return 0; + _erase(it); + return 1; + } + + + iterator erase(const_iterator cit) { return erase(cit.inner_); } + + // Erases the element pointed to by `it`. Unlike `std::unordered_set::erase`, + // this method returns void to reduce algorithmic complexity to O(1). In + // order to erase while iterating across a map, use the following idiom (which + // also works for standard containers): + // + // for (auto it = m.begin(), end = m.end(); it != end;) { + // if () { + // m._erase(it++); + // } else { + // ++it; + // } + // } + void _erase(iterator it) { + assert(it != end()); + PolicyTraits::destroy(&alloc_ref(), it.slot_); + erase_meta_only(it); + } + void _erase(const_iterator cit) { _erase(cit.inner_); } + + // This overload is necessary because otherwise erase(const K&) would be + // a better match if non-const iterator is passed as an argument. + iterator erase(iterator it) { + auto res = it; + ++res; + _erase(it); + return res; + } + + iterator erase(const_iterator first, const_iterator last) { + while (first != last) { + _erase(first++); + } + return last.inner_; + } + + // Moves elements from `src` into `this`. + // If the element already exists in `this`, it is left unmodified in `src`. + template + void merge(raw_hash_set& src) { // NOLINT + assert(this != &src); + for (auto it = src.begin(), e = src.end(); it != e; ++it) { + if (PolicyTraits::apply(InsertSlot{*this, std::move(*it.slot_)}, + PolicyTraits::element(it.slot_)) + .second) { + src.erase_meta_only(it); + } + } + } + + template + void merge(raw_hash_set&& src) { + merge(src); + } + + node_type extract(const_iterator position) { + auto node = + CommonAccess::Make(alloc_ref(), position.inner_.slot_); + erase_meta_only(position); + return node; + } + + template < + class K = key_type, + typename std::enable_if::value, int>::type = 0> + node_type extract(const key_arg& key) { + auto it = find(key); + return it == end() ? node_type() : extract(const_iterator{it}); + } + + void swap(raw_hash_set& that) noexcept( + IsNoThrowSwappable() && IsNoThrowSwappable() && + (!AllocTraits::propagate_on_container_swap::value || + IsNoThrowSwappable())) { + using std::swap; + swap(ctrl_, that.ctrl_); + swap(slots_, that.slots_); + swap(size_, that.size_); + swap(capacity_, that.capacity_); + swap(growth_left(), that.growth_left()); + swap(hash_ref(), that.hash_ref()); + swap(eq_ref(), that.eq_ref()); + swap(infoz_, that.infoz_); + if (AllocTraits::propagate_on_container_swap::value) { + swap(alloc_ref(), that.alloc_ref()); + } else { + // If the allocators do not compare equal it is officially undefined + // behavior. We choose to do nothing. + } + } + +#if !defined(PHMAP_NON_DETERMINISTIC) + template + bool phmap_dump(OutputArchive&) const; + + template + bool phmap_load(InputArchive&); +#endif + + void rehash(size_t n) { + if (n == 0 && capacity_ == 0) return; + if (n == 0 && size_ == 0) { + destroy_slots(); + infoz_.RecordStorageChanged(0, 0); + return; + } + // bitor is a faster way of doing `max` here. We will round up to the next + // power-of-2-minus-1, so bitor is good enough. + auto m = NormalizeCapacity((std::max)(n, size())); + // n == 0 unconditionally rehashes as per the standard. + if (n == 0 || m > capacity_) { + resize(m); + } + } + + void reserve(size_t n) { rehash(GrowthToLowerboundCapacity(n)); } + + // Extension API: support for heterogeneous keys. + // + // std::unordered_set s; + // // Turns "abc" into std::string. + // s.count("abc"); + // + // ch_set s; + // // Uses "abc" directly without copying it into std::string. + // s.count("abc"); + template + size_t count(const key_arg& key) const { + return find(key) == end() ? size_t(0) : size_t(1); + } + + // Issues CPU prefetch instructions for the memory needed to find or insert + // a key. Like all lookup functions, this support heterogeneous keys. + // + // NOTE: This is a very low level operation and should not be used without + // specific benchmarks indicating its importance. + void prefetch_hash(size_t hashval) const { + (void)hashval; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + auto seq = probe(hashval); + _mm_prefetch((const char *)(ctrl_ + seq.offset()), _MM_HINT_NTA); + _mm_prefetch((const char *)(slots_ + seq.offset()), _MM_HINT_NTA); +#elif defined(__GNUC__) + auto seq = probe(hashval); + __builtin_prefetch(static_cast(ctrl_ + seq.offset())); + __builtin_prefetch(static_cast(slots_ + seq.offset())); +#endif // __GNUC__ + } + + template + void prefetch(const key_arg& key) const { + prefetch_hash(this->hash(key)); + } + + // The API of find() has two extensions. + // + // 1. The hash can be passed by the user. It must be equal to the hash of the + // key. + // + // 2. The type of the key argument doesn't have to be key_type. This is so + // called heterogeneous key support. + template + iterator find(const key_arg& key, size_t hashval) { + size_t offset; + if (find_impl(key, hashval, offset)) + return iterator_at(offset); + else + return end(); + } + + template + pointer find_ptr(const key_arg& key, size_t hashval) { + size_t offset; + if (find_impl(key, hashval, offset)) + return &PolicyTraits::element(slots_ + offset); + else + return nullptr; + } + + template + iterator find(const key_arg& key) { + return find(key, this->hash(key)); + } + + template + const_iterator find(const key_arg& key, size_t hashval) const { + return const_cast(this)->find(key, hashval); + } + template + const_iterator find(const key_arg& key) const { + return find(key, this->hash(key)); + } + + template + bool contains(const key_arg& key) const { + return find(key) != end(); + } + + template + bool contains(const key_arg& key, size_t hashval) const { + return find(key, hashval) != end(); + } + + template + std::pair equal_range(const key_arg& key) { + auto it = find(key); + if (it != end()) return {it, std::next(it)}; + return {it, it}; + } + template + std::pair equal_range( + const key_arg& key) const { + auto it = find(key); + if (it != end()) return {it, std::next(it)}; + return {it, it}; + } + + size_t bucket_count() const { return capacity_; } + float load_factor() const { + return capacity_ ? static_cast(size()) / capacity_ : 0.0; + } + float max_load_factor() const { return 1.0f; } + void max_load_factor(float) { + // Does nothing. + } + + hasher hash_function() const { return hash_ref(); } // warning: doesn't match internal hash - use hash() member function + key_equal key_eq() const { return eq_ref(); } + allocator_type get_allocator() const { return alloc_ref(); } + + friend bool operator==(const raw_hash_set& a, const raw_hash_set& b) { + if (a.size() != b.size()) return false; + const raw_hash_set* outer = &a; + const raw_hash_set* inner = &b; + if (outer->capacity() > inner->capacity()) + std::swap(outer, inner); + for (const value_type& elem : *outer) + if (!inner->has_element(elem)) return false; + return true; + } + + friend bool operator!=(const raw_hash_set& a, const raw_hash_set& b) { + return !(a == b); + } + + friend void swap(raw_hash_set& a, + raw_hash_set& b) noexcept(noexcept(a.swap(b))) { + a.swap(b); + } + + template + size_t hash(const K& key) const { + return HashElement{hash_ref()}(key); + } + +private: + template + friend struct phmap::priv::hashtable_debug_internal::HashtableDebugAccess; + + template + bool find_impl(const key_arg& key, size_t hashval, size_t& offset) { + auto seq = probe(hashval); + while (true) { + Group g{ ctrl_ + seq.offset() }; + for (uint32_t i : g.Match((h2_t)H2(hashval))) { + offset = seq.offset((size_t)i); + if (PHMAP_PREDICT_TRUE(PolicyTraits::apply( + EqualElement{key, eq_ref()}, + PolicyTraits::element(slots_ + offset)))) + return true; + } + if (PHMAP_PREDICT_TRUE(g.MatchEmpty())) + return false; + seq.next(); + } + } + + struct FindElement + { + template + const_iterator operator()(const K& key, Args&&...) const { + return s.find(key); + } + const raw_hash_set& s; + }; + + struct HashElement + { + template + size_t operator()(const K& key, Args&&...) const { + return phmap_mix()(h(key)); + } + const hasher& h; + }; + + template + struct EqualElement + { + template + bool operator()(const K2& lhs, Args&&...) const { + return eq(lhs, rhs); + } + const K1& rhs; + const key_equal& eq; + }; + + template + std::pair emplace_decomposable(const K& key, size_t hashval, + Args&&... args) + { + auto res = find_or_prepare_insert(key, hashval); + if (res.second) { + emplace_at(res.first, std::forward(args)...); + } + return {iterator_at(res.first), res.second}; + } + + struct EmplaceDecomposable + { + template + std::pair operator()(const K& key, Args&&... args) const { + return s.emplace_decomposable(key, s.hash(key), std::forward(args)...); + } + raw_hash_set& s; + }; + + struct EmplaceDecomposableHashval { + template + std::pair operator()(const K& key, Args&&... args) const { + return s.emplace_decomposable(key, hashval, std::forward(args)...); + } + raw_hash_set& s; + size_t hashval; + }; + + template + struct InsertSlot + { + template + std::pair operator()(const K& key, Args&&...) && { + auto res = s.find_or_prepare_insert(key); + if (res.second) { + PolicyTraits::transfer(&s.alloc_ref(), s.slots_ + res.first, &slot); + } else if (do_destroy) { + PolicyTraits::destroy(&s.alloc_ref(), &slot); + } + return {s.iterator_at(res.first), res.second}; + } + raw_hash_set& s; + // Constructed slot. Either moved into place or destroyed. + slot_type&& slot; + }; + + template + struct InsertSlotWithHash + { + template + std::pair operator()(const K& key, Args&&...) && { + auto res = s.find_or_prepare_insert(key, hashval); + if (res.second) { + PolicyTraits::transfer(&s.alloc_ref(), s.slots_ + res.first, &slot); + } else if (do_destroy) { + PolicyTraits::destroy(&s.alloc_ref(), &slot); + } + return {s.iterator_at(res.first), res.second}; + } + raw_hash_set& s; + // Constructed slot. Either moved into place or destroyed. + slot_type&& slot; + size_t &hashval; + }; + + // "erases" the object from the container, except that it doesn't actually + // destroy the object. It only updates all the metadata of the class. + // This can be used in conjunction with Policy::transfer to move the object to + // another place. + void erase_meta_only(const_iterator it) { + assert(IsFull(*it.inner_.ctrl_) && "erasing a dangling iterator"); + --size_; + const size_t index = (size_t)(it.inner_.ctrl_ - ctrl_); + const size_t index_before = (index - Group::kWidth) & capacity_; + const auto empty_after = Group(it.inner_.ctrl_).MatchEmpty(); + const auto empty_before = Group(ctrl_ + index_before).MatchEmpty(); + + // We count how many consecutive non empties we have to the right and to the + // left of `it`. If the sum is >= kWidth then there is at least one probe + // window that might have seen a full group. + bool was_never_full = + empty_before && empty_after && + static_cast(empty_after.TrailingZeros() + + empty_before.LeadingZeros()) < Group::kWidth; + + set_ctrl(index, was_never_full ? kEmpty : kDeleted); + growth_left() += was_never_full; + infoz_.RecordErase(); + } + + void initialize_slots(size_t new_capacity) { + assert(new_capacity); + if (std::is_same>::value && + slots_ == nullptr) { + infoz_ = Sample(); + } + + auto layout = MakeLayout(new_capacity); + char* mem = static_cast( + Allocate(&alloc_ref(), layout.AllocSize())); + ctrl_ = reinterpret_cast(layout.template Pointer<0>(mem)); + slots_ = layout.template Pointer<1>(mem); + reset_ctrl(new_capacity); + reset_growth_left(new_capacity); + infoz_.RecordStorageChanged(size_, new_capacity); + } + + void destroy_slots() { + if (!capacity_) return; + for (size_t i = 0; i != capacity_; ++i) { + if (IsFull(ctrl_[i])) { + PolicyTraits::destroy(&alloc_ref(), slots_ + i); + } + } + auto layout = MakeLayout(capacity_); + // Unpoison before returning the memory to the allocator. + SanitizerUnpoisonMemoryRegion(slots_, sizeof(slot_type) * capacity_); + Deallocate(&alloc_ref(), ctrl_, layout.AllocSize()); + ctrl_ = EmptyGroup(); + slots_ = nullptr; + size_ = 0; + capacity_ = 0; + growth_left() = 0; + } + + void resize(size_t new_capacity) { + assert(IsValidCapacity(new_capacity)); + auto* old_ctrl = ctrl_; + auto* old_slots = slots_; + const size_t old_capacity = capacity_; + initialize_slots(new_capacity); + capacity_ = new_capacity; + + for (size_t i = 0; i != old_capacity; ++i) { + if (IsFull(old_ctrl[i])) { + size_t hashval = PolicyTraits::apply(HashElement{hash_ref()}, + PolicyTraits::element(old_slots + i)); + auto target = find_first_non_full(hashval); + size_t new_i = target.offset; + set_ctrl(new_i, H2(hashval)); + PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, old_slots + i); + } + } + if (old_capacity) { + SanitizerUnpoisonMemoryRegion(old_slots, + sizeof(slot_type) * old_capacity); + auto layout = MakeLayout(old_capacity); + Deallocate(&alloc_ref(), old_ctrl, + layout.AllocSize()); + } + } + + void drop_deletes_without_resize() PHMAP_ATTRIBUTE_NOINLINE { + assert(IsValidCapacity(capacity_)); + assert(!is_small()); + // Algorithm: + // - mark all DELETED slots as EMPTY + // - mark all FULL slots as DELETED + // - for each slot marked as DELETED + // hash = Hash(element) + // target = find_first_non_full(hash) + // if target is in the same group + // mark slot as FULL + // else if target is EMPTY + // transfer element to target + // mark slot as EMPTY + // mark target as FULL + // else if target is DELETED + // swap current element with target element + // mark target as FULL + // repeat procedure for current slot with moved from element (target) + ConvertDeletedToEmptyAndFullToDeleted(ctrl_, capacity_); + typename phmap::aligned_storage::type + raw; + slot_type* slot = reinterpret_cast(&raw); + for (size_t i = 0; i != capacity_; ++i) { + if (!IsDeleted(ctrl_[i])) continue; + size_t hashval = PolicyTraits::apply(HashElement{hash_ref()}, + PolicyTraits::element(slots_ + i)); + auto target = find_first_non_full(hashval); + size_t new_i = target.offset; + + // Verify if the old and new i fall within the same group wrt the hashval. + // If they do, we don't need to move the object as it falls already in the + // best probe we can. + const auto probe_index = [&](size_t pos) { + return ((pos - probe(hashval).offset()) & capacity_) / Group::kWidth; + }; + + // Element doesn't move. + if (PHMAP_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) { + set_ctrl(i, H2(hashval)); + continue; + } + if (IsEmpty(ctrl_[new_i])) { + // Transfer element to the empty spot. + // set_ctrl poisons/unpoisons the slots so we have to call it at the + // right time. + set_ctrl(new_i, H2(hashval)); + PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slots_ + i); + set_ctrl(i, kEmpty); + } else { + assert(IsDeleted(ctrl_[new_i])); + set_ctrl(new_i, H2(hashval)); + // Until we are done rehashing, DELETED marks previously FULL slots. + // Swap i and new_i elements. + PolicyTraits::transfer(&alloc_ref(), slot, slots_ + i); + PolicyTraits::transfer(&alloc_ref(), slots_ + i, slots_ + new_i); + PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slot); + --i; // repeat + } + } + reset_growth_left(capacity_); + } + + void rehash_and_grow_if_necessary() { + if (capacity_ == 0) { + resize(1); + } else if (size() <= CapacityToGrowth(capacity()) / 2) { + // Squash DELETED without growing if there is enough capacity. + drop_deletes_without_resize(); + } else { + // Otherwise grow the container. + resize(capacity_ * 2 + 1); + } + } + + bool has_element(const value_type& elem, size_t hashval) const { + auto seq = probe(hashval); + while (true) { + Group g{ctrl_ + seq.offset()}; + for (uint32_t i : g.Match((h2_t)H2(hashval))) { + if (PHMAP_PREDICT_TRUE(PolicyTraits::element(slots_ + seq.offset((size_t)i)) == + elem)) + return true; + } + if (PHMAP_PREDICT_TRUE(g.MatchEmpty())) return false; + seq.next(); + assert(seq.getindex() < capacity_ && "full table!"); + } + return false; + } + + bool has_element(const value_type& elem) const { + size_t hashval = PolicyTraits::apply(HashElement{hash_ref()}, elem); + return has_element(elem, hashval); + } + + // Probes the raw_hash_set with the probe sequence for hash and returns the + // pointer to the first empty or deleted slot. + // NOTE: this function must work with tables having both kEmpty and kDelete + // in one group. Such tables appears during drop_deletes_without_resize. + // + // This function is very useful when insertions happen and: + // - the input is already a set + // - there are enough slots + // - the element with the hash is not in the table + struct FindInfo + { + size_t offset; + size_t probe_length; + }; + FindInfo find_first_non_full(size_t hashval) { + auto seq = probe(hashval); + while (true) { + Group g{ctrl_ + seq.offset()}; + auto mask = g.MatchEmptyOrDeleted(); + if (mask) { + return {seq.offset((size_t)mask.LowestBitSet()), seq.getindex()}; + } + assert(seq.getindex() < capacity_ && "full table!"); + seq.next(); + } + } + + // TODO(alkis): Optimize this assuming *this and that don't overlap. + raw_hash_set& move_assign(raw_hash_set&& that, std::true_type) { + raw_hash_set tmp(std::move(that)); + swap(tmp); + return *this; + } + raw_hash_set& move_assign(raw_hash_set&& that, std::false_type) { + raw_hash_set tmp(std::move(that), alloc_ref()); + swap(tmp); + return *this; + } + +protected: + template + std::pair find_or_prepare_insert(const K& key, size_t hashval) { + auto seq = probe(hashval); + while (true) { + Group g{ctrl_ + seq.offset()}; + for (uint32_t i : g.Match((h2_t)H2(hashval))) { + if (PHMAP_PREDICT_TRUE(PolicyTraits::apply( + EqualElement{key, eq_ref()}, + PolicyTraits::element(slots_ + seq.offset((size_t)i))))) + return {seq.offset((size_t)i), false}; + } + if (PHMAP_PREDICT_TRUE(g.MatchEmpty())) break; + seq.next(); + } + return {prepare_insert(hashval), true}; + } + + template + std::pair find_or_prepare_insert(const K& key) { + return find_or_prepare_insert(key, this->hash(key)); + } + + size_t prepare_insert(size_t hashval) PHMAP_ATTRIBUTE_NOINLINE { + auto target = find_first_non_full(hashval); + if (PHMAP_PREDICT_FALSE(growth_left() == 0 && + !IsDeleted(ctrl_[target.offset]))) { + rehash_and_grow_if_necessary(); + target = find_first_non_full(hashval); + } + ++size_; + growth_left() -= IsEmpty(ctrl_[target.offset]); + set_ctrl(target.offset, H2(hashval)); + infoz_.RecordInsert(hashval, target.probe_length); + return target.offset; + } + + // Constructs the value in the space pointed by the iterator. This only works + // after an unsuccessful find_or_prepare_insert() and before any other + // modifications happen in the raw_hash_set. + // + // PRECONDITION: i is an index returned from find_or_prepare_insert(k), where + // k is the key decomposed from `forward(args)...`, and the bool + // returned by find_or_prepare_insert(k) was true. + // POSTCONDITION: *m.iterator_at(i) == value_type(forward(args)...). + template + void emplace_at(size_t i, Args&&... args) { + PolicyTraits::construct(&alloc_ref(), slots_ + i, + std::forward(args)...); + +#ifdef PHMAP_CHECK_CONSTRUCTED_VALUE + // this check can be costly, so do it only when requested + assert(PolicyTraits::apply(FindElement{*this}, *iterator_at(i)) == + iterator_at(i) && + "constructed value does not match the lookup key"); +#endif + } + + iterator iterator_at(size_t i) { return {ctrl_ + i, slots_ + i}; } + const_iterator iterator_at(size_t i) const { return {ctrl_ + i, slots_ + i}; } + +private: + friend struct RawHashSetTestOnlyAccess; + + probe_seq probe(size_t hashval) const { + return probe_seq(H1(hashval, ctrl_), capacity_); + } + + // Reset all ctrl bytes back to kEmpty, except the sentinel. + void reset_ctrl(size_t capacity) { + std::memset(ctrl_, kEmpty, capacity + Group::kWidth); + ctrl_[capacity] = kSentinel; + SanitizerPoisonMemoryRegion(slots_, sizeof(slot_type) * capacity); + } + + void reset_growth_left(size_t capacity) { + growth_left() = CapacityToGrowth(capacity) - size_; + } + + // Sets the control byte, and if `i < Group::kWidth`, set the cloned byte at + // the end too. + void set_ctrl(size_t i, ctrl_t h) { + assert(i < capacity_); + + if (IsFull(h)) { + SanitizerUnpoisonObject(slots_ + i); + } else { + SanitizerPoisonObject(slots_ + i); + } + + ctrl_[i] = h; + ctrl_[((i - Group::kWidth) & capacity_) + 1 + + ((Group::kWidth - 1) & capacity_)] = h; + } + + size_t& growth_left() { return settings_.template get<0>(); } + + template class RefSet, + class M, class P, class H, class E, class A> + friend class parallel_hash_set; + + template class RefSet, + class M, class P, class H, class E, class A> + friend class parallel_hash_map; + + // The representation of the object has two modes: + // - small: For capacities < kWidth-1 + // - large: For the rest. + // + // Differences: + // - In small mode we are able to use the whole capacity. The extra control + // bytes give us at least one "empty" control byte to stop the iteration. + // This is important to make 1 a valid capacity. + // + // - In small mode only the first `capacity()` control bytes after the + // sentinel are valid. The rest contain dummy kEmpty values that do not + // represent a real slot. This is important to take into account on + // find_first_non_full(), where we never try ShouldInsertBackwards() for + // small tables. + bool is_small() const { return capacity_ < Group::kWidth - 1; } + + hasher& hash_ref() { return settings_.template get<1>(); } + const hasher& hash_ref() const { return settings_.template get<1>(); } + key_equal& eq_ref() { return settings_.template get<2>(); } + const key_equal& eq_ref() const { return settings_.template get<2>(); } + allocator_type& alloc_ref() { return settings_.template get<3>(); } + const allocator_type& alloc_ref() const { + return settings_.template get<3>(); + } + + // TODO(alkis): Investigate removing some of these fields: + // - ctrl/slots can be derived from each other + // - size can be moved into the slot array + ctrl_t* ctrl_ = EmptyGroup(); // [(capacity + 1) * ctrl_t] + slot_type* slots_ = nullptr; // [capacity * slot_type] + size_t size_ = 0; // number of full slots + size_t capacity_ = 0; // total number of slots + HashtablezInfoHandle infoz_; + phmap::priv::CompressedTuple + settings_{0, hasher{}, key_equal{}, allocator_type{}}; +}; + + +// -------------------------------------------------------------------------- +// -------------------------------------------------------------------------- +template +class raw_hash_map : public raw_hash_set +{ + // P is Policy. It's passed as a template argument to support maps that have + // incomplete types as values, as in unordered_map. + // MappedReference<> may be a non-reference type. + template + using MappedReference = decltype(P::value( + std::addressof(std::declval()))); + + // MappedConstReference<> may be a non-reference type. + template + using MappedConstReference = decltype(P::value( + std::addressof(std::declval()))); + + using KeyArgImpl = + KeyArg::value && IsTransparent::value>; + + using Base = raw_hash_set; + +public: + using key_type = typename Policy::key_type; + using mapped_type = typename Policy::mapped_type; + template + using key_arg = typename KeyArgImpl::template type; + + static_assert(!std::is_reference::value, ""); + + // TODO(b/187807849): Evaluate whether to support reference mapped_type and + // remove this assertion if/when it is supported. + static_assert(!std::is_reference::value, ""); + + using iterator = typename raw_hash_map::raw_hash_set::iterator; + using const_iterator = typename raw_hash_map::raw_hash_set::const_iterator; + + raw_hash_map() {} + using Base::raw_hash_set; // use raw_hash_set constructor + + // The last two template parameters ensure that both arguments are rvalues + // (lvalue arguments are handled by the overloads below). This is necessary + // for supporting bitfield arguments. + // + // union { int n : 1; }; + // flat_hash_map m; + // m.insert_or_assign(n, n); + template + std::pair insert_or_assign(key_arg&& k, V&& v) { + return insert_or_assign_impl(std::forward(k), std::forward(v)); + } + + template + std::pair insert_or_assign(key_arg&& k, const V& v) { + return insert_or_assign_impl(std::forward(k), v); + } + + template + std::pair insert_or_assign(const key_arg& k, V&& v) { + return insert_or_assign_impl(k, std::forward(v)); + } + + template + std::pair insert_or_assign(const key_arg& k, const V& v) { + return insert_or_assign_impl(k, v); + } + + template + iterator insert_or_assign(const_iterator, key_arg&& k, V&& v) { + return insert_or_assign(std::forward(k), std::forward(v)).first; + } + + template + iterator insert_or_assign(const_iterator, key_arg&& k, const V& v) { + return insert_or_assign(std::forward(k), v).first; + } + + template + iterator insert_or_assign(const_iterator, const key_arg& k, V&& v) { + return insert_or_assign(k, std::forward(v)).first; + } + + template + iterator insert_or_assign(const_iterator, const key_arg& k, const V& v) { + return insert_or_assign(k, v).first; + } + + template ::value, int>::type = 0, + K* = nullptr> + std::pair try_emplace(key_arg&& k, Args&&... args) { + return try_emplace_impl(std::forward(k), std::forward(args)...); + } + + template ::value, int>::type = 0> + std::pair try_emplace(const key_arg& k, Args&&... args) { + return try_emplace_impl(k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator, key_arg&& k, Args&&... args) { + return try_emplace(std::forward(k), std::forward(args)...).first; + } + + template + iterator try_emplace(const_iterator, const key_arg& k, Args&&... args) { + return try_emplace(k, std::forward(args)...).first; + } + + template + MappedReference

at(const key_arg& key) { + auto it = this->find(key); + if (it == this->end()) + phmap::base_internal::ThrowStdOutOfRange("phmap at(): lookup non-existent key"); + return Policy::value(&*it); + } + + template + MappedConstReference

at(const key_arg& key) const { + auto it = this->find(key); + if (it == this->end()) + phmap::base_internal::ThrowStdOutOfRange("phmap at(): lookup non-existent key"); + return Policy::value(&*it); + } + + template + MappedReference

operator[](key_arg&& key) { + return Policy::value(&*try_emplace(std::forward(key)).first); + } + + template + MappedReference

operator[](const key_arg& key) { + return Policy::value(&*try_emplace(key).first); + } + +private: + template + std::pair insert_or_assign_impl(K&& k, V&& v) { + auto res = this->find_or_prepare_insert(k); + if (res.second) + this->emplace_at(res.first, std::forward(k), std::forward(v)); + else + Policy::value(&*this->iterator_at(res.first)) = std::forward(v); + return {this->iterator_at(res.first), res.second}; + } + + template + std::pair try_emplace_impl(K&& k, Args&&... args) { + auto res = this->find_or_prepare_insert(k); + if (res.second) + this->emplace_at(res.first, std::piecewise_construct, + std::forward_as_tuple(std::forward(k)), + std::forward_as_tuple(std::forward(args)...)); + return {this->iterator_at(res.first), res.second}; + } +}; + +// ---------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- +// Returns "random" seed. +inline size_t RandomSeed() +{ +#if PHMAP_HAVE_THREAD_LOCAL + static thread_local size_t counter = 0; + size_t value = ++counter; +#else // PHMAP_HAVE_THREAD_LOCAL + static std::atomic counter(0); + size_t value = counter.fetch_add(1, std::memory_order_relaxed); +#endif // PHMAP_HAVE_THREAD_LOCAL + return value ^ static_cast(reinterpret_cast(&counter)); +} + +// ---------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- +template class RefSet, + class Mtx_, + class Policy, class Hash, class Eq, class Alloc> +class parallel_hash_set +{ + using PolicyTraits = hash_policy_traits; + using KeyArgImpl = + KeyArg::value && IsTransparent::value>; + + static_assert(N <= 12, "N = 12 means 4096 hash tables!"); + constexpr static size_t num_tables = 1 << N; + constexpr static size_t mask = num_tables - 1; + +public: + using EmbeddedSet = RefSet; + using EmbeddedIterator= typename EmbeddedSet::iterator; + using EmbeddedConstIterator= typename EmbeddedSet::const_iterator; + using constructor = typename EmbeddedSet::constructor; + using init_type = typename PolicyTraits::init_type; + using key_type = typename PolicyTraits::key_type; + using slot_type = typename PolicyTraits::slot_type; + using allocator_type = Alloc; + using size_type = size_t; + using difference_type = ptrdiff_t; + using hasher = Hash; + using key_equal = Eq; + using policy_type = Policy; + using value_type = typename PolicyTraits::value_type; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = typename phmap::allocator_traits< + allocator_type>::template rebind_traits::pointer; + using const_pointer = typename phmap::allocator_traits< + allocator_type>::template rebind_traits::const_pointer; + + // Alias used for heterogeneous lookup functions. + // `key_arg` evaluates to `K` when the functors are transparent and to + // `key_type` otherwise. It permits template argument deduction on `K` for the + // transparent case. + // -------------------------------------------------------------------- + template + using key_arg = typename KeyArgImpl::template type; + +protected: + using Lockable = phmap::LockableImpl; + + // -------------------------------------------------------------------- + struct Inner : public Lockable + { + struct Params + { + size_t bucket_cnt; + const hasher& hashfn; + const key_equal& eq; + const allocator_type& alloc; + }; + + Inner() {} + + Inner(Params const &p) : set_(p.bucket_cnt, p.hashfn, p.eq, p.alloc) + {} + + bool operator==(const Inner& o) const + { + typename Lockable::SharedLocks l(const_cast(*this), const_cast(o)); + return set_ == o.set_; + } + + EmbeddedSet set_; + }; + +private: + // Give an early error when key_type is not hashable/eq. + // -------------------------------------------------------------------- + auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k)); + auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k)); + + using AllocTraits = phmap::allocator_traits; + + static_assert(std::is_lvalue_reference::value, + "Policy::element() must return a reference"); + + template + struct SameAsElementReference : std::is_same< + typename std::remove_cv::type>::type, + typename std::remove_cv::type>::type> {}; + + // An enabler for insert(T&&): T must be convertible to init_type or be the + // same as [cv] value_type [ref]. + // Note: we separate SameAsElementReference into its own type to avoid using + // reference unless we need to. MSVC doesn't seem to like it in some + // cases. + // -------------------------------------------------------------------- + template + using RequiresInsertable = typename std::enable_if< + phmap::disjunction, + SameAsElementReference>::value, + int>::type; + + // RequiresNotInit is a workaround for gcc prior to 7.1. + // See https://godbolt.org/g/Y4xsUh. + template + using RequiresNotInit = + typename std::enable_if::value, int>::type; + + template + using IsDecomposable = IsDecomposable; + +public: + static_assert(std::is_same::value, + "Allocators with custom pointer types are not supported"); + static_assert(std::is_same::value, + "Allocators with custom pointer types are not supported"); + + // --------------------- i t e r a t o r ------------------------------ + class iterator + { + friend class parallel_hash_set; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = typename parallel_hash_set::value_type; + using reference = + phmap::conditional_t; + using pointer = phmap::remove_reference_t*; + using difference_type = typename parallel_hash_set::difference_type; + using Inner = typename parallel_hash_set::Inner; + using EmbeddedSet = typename parallel_hash_set::EmbeddedSet; + using EmbeddedIterator = typename EmbeddedSet::iterator; + + iterator() {} + + reference operator*() const { return *it_; } + pointer operator->() const { return &operator*(); } + + iterator& operator++() { + assert(inner_); // null inner means we are already at the end + ++it_; + skip_empty(); + return *this; + } + + iterator operator++(int) { + assert(inner_); // null inner means we are already at the end + auto tmp = *this; + ++*this; + return tmp; + } + + friend bool operator==(const iterator& a, const iterator& b) { + return a.inner_ == b.inner_ && (!a.inner_ || a.it_ == b.it_); + } + + friend bool operator!=(const iterator& a, const iterator& b) { + return !(a == b); + } + + private: + iterator(Inner *inner, Inner *inner_end, const EmbeddedIterator& it) : + inner_(inner), inner_end_(inner_end), it_(it) { // for begin() and end() + if (inner) + it_end_ = inner->set_.end(); + } + + void skip_empty() { + while (it_ == it_end_) { + ++inner_; + if (inner_ == inner_end_) { + inner_ = nullptr; // marks end() + break; + } + else { + it_ = inner_->set_.begin(); + it_end_ = inner_->set_.end(); + } + } + } + + Inner *inner_ = nullptr; + Inner *inner_end_ = nullptr; + EmbeddedIterator it_, it_end_; + }; + + // --------------------- c o n s t i t e r a t o r ----------------- + class const_iterator + { + friend class parallel_hash_set; + + public: + using iterator_category = typename iterator::iterator_category; + using value_type = typename parallel_hash_set::value_type; + using reference = typename parallel_hash_set::const_reference; + using pointer = typename parallel_hash_set::const_pointer; + using difference_type = typename parallel_hash_set::difference_type; + using Inner = typename parallel_hash_set::Inner; + + const_iterator() {} + // Implicit construction from iterator. + const_iterator(iterator i) : iter_(std::move(i)) {} + + reference operator*() const { return *(iter_); } + pointer operator->() const { return iter_.operator->(); } + + const_iterator& operator++() { + ++iter_; + return *this; + } + const_iterator operator++(int) { return iter_++; } + + friend bool operator==(const const_iterator& a, const const_iterator& b) { + return a.iter_ == b.iter_; + } + friend bool operator!=(const const_iterator& a, const const_iterator& b) { + return !(a == b); + } + + private: + const_iterator(const Inner *inner, const Inner *inner_end, const EmbeddedIterator& it) + : iter_(const_cast(inner), + const_cast(inner_end), + const_cast(it)) {} + + iterator iter_; + }; + + using node_type = node_handle, Alloc>; + using insert_return_type = InsertReturnType; + + // ------------------------- c o n s t r u c t o r s ------------------ + + parallel_hash_set() noexcept( + std::is_nothrow_default_constructible::value&& + std::is_nothrow_default_constructible::value&& + std::is_nothrow_default_constructible::value) {} + +#if (__cplusplus >= 201703L || _MSVC_LANG >= 201402) && (defined(_MSC_VER) || defined(__clang__) || (defined(__GNUC__) && __GNUC__ > 6)) + explicit parallel_hash_set(size_t bucket_cnt, + const hasher& hash_param = hasher(), + const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) : + parallel_hash_set(typename Inner::Params{bucket_cnt, hash_param, eq, alloc}, + phmap::make_index_sequence{}) + {} + + template + parallel_hash_set(typename Inner::Params const &p, + phmap::index_sequence) : sets_{((void)i, p)...} + {} +#else + explicit parallel_hash_set(size_t bucket_cnt, + const hasher& hash_param = hasher(), + const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) { + for (auto& inner : sets_) + inner.set_ = EmbeddedSet(bucket_cnt / N, hash_param, eq, alloc); + } +#endif + + parallel_hash_set(size_t bucket_cnt, + const hasher& hash_param, + const allocator_type& alloc) + : parallel_hash_set(bucket_cnt, hash_param, key_equal(), alloc) {} + + parallel_hash_set(size_t bucket_cnt, const allocator_type& alloc) + : parallel_hash_set(bucket_cnt, hasher(), key_equal(), alloc) {} + + explicit parallel_hash_set(const allocator_type& alloc) + : parallel_hash_set(0, hasher(), key_equal(), alloc) {} + + template + parallel_hash_set(InputIter first, InputIter last, size_t bucket_cnt = 0, + const hasher& hash_param = hasher(), const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) + : parallel_hash_set(bucket_cnt, hash_param, eq, alloc) { + insert(first, last); + } + + template + parallel_hash_set(InputIter first, InputIter last, size_t bucket_cnt, + const hasher& hash_param, const allocator_type& alloc) + : parallel_hash_set(first, last, bucket_cnt, hash_param, key_equal(), alloc) {} + + template + parallel_hash_set(InputIter first, InputIter last, size_t bucket_cnt, + const allocator_type& alloc) + : parallel_hash_set(first, last, bucket_cnt, hasher(), key_equal(), alloc) {} + + template + parallel_hash_set(InputIter first, InputIter last, const allocator_type& alloc) + : parallel_hash_set(first, last, 0, hasher(), key_equal(), alloc) {} + + // Instead of accepting std::initializer_list as the first + // argument like std::unordered_set does, we have two overloads + // that accept std::initializer_list and std::initializer_list. + // This is advantageous for performance. + // + // // Turns {"abc", "def"} into std::initializer_list, then copies + // // the strings into the set. + // std::unordered_set s = {"abc", "def"}; + // + // // Turns {"abc", "def"} into std::initializer_list, then + // // copies the strings into the set. + // phmap::flat_hash_set s = {"abc", "def"}; + // + // The same trick is used in insert(). + // + // The enabler is necessary to prevent this constructor from triggering where + // the copy constructor is meant to be called. + // + // phmap::flat_hash_set a, b{a}; + // + // RequiresNotInit is a workaround for gcc prior to 7.1. + // -------------------------------------------------------------------- + template = 0, RequiresInsertable = 0> + parallel_hash_set(std::initializer_list init, size_t bucket_cnt = 0, + const hasher& hash_param = hasher(), const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) + : parallel_hash_set(init.begin(), init.end(), bucket_cnt, hash_param, eq, alloc) {} + + parallel_hash_set(std::initializer_list init, size_t bucket_cnt = 0, + const hasher& hash_param = hasher(), const key_equal& eq = key_equal(), + const allocator_type& alloc = allocator_type()) + : parallel_hash_set(init.begin(), init.end(), bucket_cnt, hash_param, eq, alloc) {} + + template = 0, RequiresInsertable = 0> + parallel_hash_set(std::initializer_list init, size_t bucket_cnt, + const hasher& hash_param, const allocator_type& alloc) + : parallel_hash_set(init, bucket_cnt, hash_param, key_equal(), alloc) {} + + parallel_hash_set(std::initializer_list init, size_t bucket_cnt, + const hasher& hash_param, const allocator_type& alloc) + : parallel_hash_set(init, bucket_cnt, hash_param, key_equal(), alloc) {} + + template = 0, RequiresInsertable = 0> + parallel_hash_set(std::initializer_list init, size_t bucket_cnt, + const allocator_type& alloc) + : parallel_hash_set(init, bucket_cnt, hasher(), key_equal(), alloc) {} + + parallel_hash_set(std::initializer_list init, size_t bucket_cnt, + const allocator_type& alloc) + : parallel_hash_set(init, bucket_cnt, hasher(), key_equal(), alloc) {} + + template = 0, RequiresInsertable = 0> + parallel_hash_set(std::initializer_list init, const allocator_type& alloc) + : parallel_hash_set(init, 0, hasher(), key_equal(), alloc) {} + + parallel_hash_set(std::initializer_list init, + const allocator_type& alloc) + : parallel_hash_set(init, 0, hasher(), key_equal(), alloc) {} + + parallel_hash_set(const parallel_hash_set& that) + : parallel_hash_set(that, AllocTraits::select_on_container_copy_construction( + that.alloc_ref())) {} + + parallel_hash_set(const parallel_hash_set& that, const allocator_type& a) + : parallel_hash_set(0, that.hash_ref(), that.eq_ref(), a) { + for (size_t i=0; i::value&& + std::is_nothrow_copy_constructible::value&& + std::is_nothrow_copy_constructible::value) + : parallel_hash_set(std::move(that), that.alloc_ref()) { + } + + parallel_hash_set(parallel_hash_set&& that, const allocator_type& a) + { + for (size_t i=0; i::is_always_equal::value && + std::is_nothrow_move_assignable::value && + std::is_nothrow_move_assignable::value) { + for (size_t i=0; i(this)->begin(); } + const_iterator end() const { return const_cast(this)->end(); } + const_iterator cbegin() const { return begin(); } + const_iterator cend() const { return end(); } + + bool empty() const { return !size(); } + + size_t size() const { + size_t sz = 0; + for (const auto& inner : sets_) + sz += inner.set_.size(); + return sz; + } + + size_t capacity() const { + size_t c = 0; + for (const auto& inner : sets_) + c += inner.set_.capacity(); + return c; + } + + size_t max_size() const { return (std::numeric_limits::max)(); } + + PHMAP_ATTRIBUTE_REINITIALIZES void clear() { + for (auto& inner : sets_) + { + typename Lockable::UniqueLock m(inner); + inner.set_.clear(); + } + } + + // extension - clears only soecified submap + // ---------------------------------------- + void clear(std::size_t submap_index) { + Inner& inner = sets_[submap_index]; + typename Lockable::UniqueLock m(inner); + inner.set_.clear(); + } + + // This overload kicks in when the argument is an rvalue of insertable and + // decomposable type other than init_type. + // + // flat_hash_map m; + // m.insert(std::make_pair("abc", 42)); + // -------------------------------------------------------------------- + template = 0, + typename std::enable_if::value, int>::type = 0, + T* = nullptr> + std::pair insert(T&& value) { + return emplace(std::forward(value)); + } + + // This overload kicks in when the argument is a bitfield or an lvalue of + // insertable and decomposable type. + // + // union { int n : 1; }; + // flat_hash_set s; + // s.insert(n); + // + // flat_hash_set s; + // const char* p = "hello"; + // s.insert(p); + // + // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace + // RequiresInsertable with RequiresInsertable. + // We are hitting this bug: https://godbolt.org/g/1Vht4f. + // -------------------------------------------------------------------- + template < + class T, RequiresInsertable = 0, + typename std::enable_if::value, int>::type = 0> + std::pair insert(const T& value) { + return emplace(value); + } + + // This overload kicks in when the argument is an rvalue of init_type. Its + // purpose is to handle brace-init-list arguments. + // + // flat_hash_set> s; + // s.insert({"abc", 42}); + // -------------------------------------------------------------------- + std::pair insert(init_type&& value) { + return emplace(std::move(value)); + } + + template = 0, + typename std::enable_if::value, int>::type = 0, + T* = nullptr> + iterator insert(const_iterator, T&& value) { + return insert(std::forward(value)).first; + } + + // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace + // RequiresInsertable with RequiresInsertable. + // We are hitting this bug: https://godbolt.org/g/1Vht4f. + // -------------------------------------------------------------------- + template < + class T, RequiresInsertable = 0, + typename std::enable_if::value, int>::type = 0> + iterator insert(const_iterator, const T& value) { + return insert(value).first; + } + + iterator insert(const_iterator, init_type&& value) { + return insert(std::move(value)).first; + } + + template + void insert(InputIt first, InputIt last) { + for (; first != last; ++first) insert(*first); + } + + template = 0, RequiresInsertable = 0> + void insert(std::initializer_list ilist) { + insert(ilist.begin(), ilist.end()); + } + + void insert(std::initializer_list ilist) { + insert(ilist.begin(), ilist.end()); + } + + insert_return_type insert(node_type&& node) { + if (!node) + return {end(), false, node_type()}; + auto& key = node.key(); + size_t hashval = this->hash(key); + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + + typename Lockable::UniqueLock m(inner); + auto res = set.insert(std::move(node), hashval); + return { make_iterator(&inner, res.position), + res.inserted, + res.inserted ? node_type() : std::move(res.node) }; + } + + iterator insert(const_iterator, node_type&& node) { + return insert(std::move(node)).first; + } + + struct ReturnKey_ + { + template + Key operator()(Key&& k, const Args&...) const { + return std::forward(k); + } + }; + + // -------------------------------------------------------------------- + // phmap extension: emplace_with_hash + // ---------------------------------- + // same as emplace, but hashval is provided + // -------------------------------------------------------------------- + template + std::pair emplace_decomposable_with_hash(const K& key, size_t hashval, Args&&... args) + { + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::UniqueLock m(inner); + return make_rv(&inner, set.emplace_decomposable(key, hashval, std::forward(args)...)); + } + + struct EmplaceDecomposableHashval + { + template + std::pair operator()(const K& key, Args&&... args) const { + return s.emplace_decomposable_with_hash(key, hashval, std::forward(args)...); + } + parallel_hash_set& s; + size_t hashval; + }; + + // This overload kicks in if we can deduce the key from args. This enables us + // to avoid constructing value_type if an entry with the same key already + // exists. + // + // For example: + // + // flat_hash_map m = {{"abc", "def"}}; + // // Creates no std::string copies and makes no heap allocations. + // m.emplace("abc", "xyz"); + // -------------------------------------------------------------------- + template ::value, int>::type = 0> + std::pair emplace_with_hash(size_t hashval, Args&&... args) { + return PolicyTraits::apply(EmplaceDecomposableHashval{*this, hashval}, + std::forward(args)...); + } + + // This overload kicks in if we cannot deduce the key from args. It constructs + // value_type unconditionally and then either moves it into the table or + // destroys. + // -------------------------------------------------------------------- + template ::value, int>::type = 0> + std::pair emplace_with_hash(size_t hashval, Args&&... args) { + typename phmap::aligned_storage::type raw; + slot_type* slot = reinterpret_cast(&raw); + + PolicyTraits::construct(&alloc_ref(), slot, std::forward(args)...); + const auto& elem = PolicyTraits::element(slot); + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::UniqueLock m(inner); + typename EmbeddedSet::template InsertSlotWithHash f { + inner, std::move(*slot), hashval}; + return make_rv(PolicyTraits::apply(f, elem)); + } + + template + iterator emplace_hint_with_hash(size_t hashval, const_iterator, Args&&... args) { + return emplace_with_hash(hashval, std::forward(args)...).first; + } + + template + iterator lazy_emplace_with_hash(const key_arg& key, size_t hashval, F&& f) { + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::UniqueLock m(inner); + return make_iterator(&inner, set.lazy_emplace_with_hash(key, hashval, std::forward(f))); + } + + // -------------------------------------------------------------------- + // end of phmap expension + // -------------------------------------------------------------------- + + template + std::pair emplace_decomposable(const K& key, Args&&... args) + { + size_t hashval = this->hash(key); + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::UniqueLock m(inner); + return make_rv(&inner, set.emplace_decomposable(key, hashval, std::forward(args)...)); + } + + struct EmplaceDecomposable + { + template + std::pair operator()(const K& key, Args&&... args) const { + return s.emplace_decomposable(key, std::forward(args)...); + } + parallel_hash_set& s; + }; + + // This overload kicks in if we can deduce the key from args. This enables us + // to avoid constructing value_type if an entry with the same key already + // exists. + // + // For example: + // + // flat_hash_map m = {{"abc", "def"}}; + // // Creates no std::string copies and makes no heap allocations. + // m.emplace("abc", "xyz"); + // -------------------------------------------------------------------- + template ::value, int>::type = 0> + std::pair emplace(Args&&... args) { + return PolicyTraits::apply(EmplaceDecomposable{*this}, + std::forward(args)...); + } + + // This overload kicks in if we cannot deduce the key from args. It constructs + // value_type unconditionally and then either moves it into the table or + // destroys. + // -------------------------------------------------------------------- + template ::value, int>::type = 0> + std::pair emplace(Args&&... args) { + typename phmap::aligned_storage::type raw; + slot_type* slot = reinterpret_cast(&raw); + size_t hashval = this->hash(PolicyTraits::key(slot)); + + PolicyTraits::construct(&alloc_ref(), slot, std::forward(args)...); + const auto& elem = PolicyTraits::element(slot); + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::UniqueLock m(inner); + typename EmbeddedSet::template InsertSlotWithHash f { + inner, std::move(*slot), hashval}; + return make_rv(PolicyTraits::apply(f, elem)); + } + + template + iterator emplace_hint(const_iterator, Args&&... args) { + return emplace(std::forward(args)...).first; + } + + iterator make_iterator(Inner* inner, const EmbeddedIterator it) + { + if (it == inner->set_.end()) + return iterator(); + return iterator(inner, &sets_[0] + num_tables, it); + } + + std::pair make_rv(Inner* inner, + const std::pair& res) + { + return {iterator(inner, &sets_[0] + num_tables, res.first), res.second}; + } + + // lazy_emplace + // ------------ + template + iterator lazy_emplace(const key_arg& key, F&& f) { + auto hashval = this->hash(key); + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::UniqueLock m(inner); + return make_iterator(&inner, set.lazy_emplace_with_hash(key, hashval, std::forward(f))); + } + + // emplace_single + // -------------- + template + void emplace_single_with_hash(const key_arg& key, size_t hashval, F&& f) { + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::UniqueLock m(inner); + set.emplace_single_with_hash(key, hashval, std::forward(f)); + } + + template + void emplace_single(const key_arg& key, F&& f) { + auto hashval = this->hash(key); + emplace_single_with_hash(key, hashval, std::forward(f)); + } + + // if set contains key, lambda is called with the value_type (under read lock protection), + // and if_contains returns true. This is a const API and lambda should not modify the value + // ----------------------------------------------------------------------------------------- + template + bool if_contains(const key_arg& key, F&& f) const { + return const_cast(this)->template + modify_if_impl(key, std::forward(f)); + } + + // if set contains key, lambda is called with the value_type without read lock protection, + // and if_contains_unsafe returns true. This is a const API and lambda should not modify the value + // This should be used only if we know that no other thread may be mutating the set at the time. + // ----------------------------------------------------------------------------------------- + template + bool if_contains_unsafe(const key_arg& key, F&& f) const { + return const_cast(this)->template + modify_if_impl::DoNothing>(key, std::forward(f)); + } + + // if map contains key, lambda is called with the value_type (under write lock protection), + // and modify_if returns true. This is a non-const API and lambda is allowed to modify the mapped value + // ---------------------------------------------------------------------------------------------------- + template + bool modify_if(const key_arg& key, F&& f) { + return modify_if_impl(key, std::forward(f)); + } + + // ----------------------------------------------------------------------------------------- + template + bool modify_if_impl(const key_arg& key, F&& f) { +#if __cplusplus >= 201703L + static_assert(std::is_invocable::value); +#endif + L m; + auto ptr = this->template find_ptr(key, this->hash(key), m); + if (ptr == nullptr) + return false; + std::forward(f)(*ptr); + return true; + } + + // if map contains key, lambda is called with the mapped value (under write lock protection). + // If the lambda returns true, the key is subsequently erased from the map (the write lock + // is only released after erase). + // returns true if key was erased, false otherwise. + // ---------------------------------------------------------------------------------------------------- + template + bool erase_if(const key_arg& key, F&& f) { + return erase_if_impl(key, std::forward(f)); + } + + template + bool erase_if_impl(const key_arg& key, F&& f) { +#if __cplusplus >= 201703L + static_assert(std::is_invocable::value); +#endif + L m; + auto it = this->template find(key, this->hash(key), m); + if (it == this->end()) return false; + if (std::forward(f)(const_cast(*it))) + { + this->erase(it); + return true; + } + return false; + } + + // if map already contains key, the first lambda is called with the mapped value (under + // write lock protection) and can update the mapped value. + // if map does not contains key, the second lambda is called and it should invoke the + // passed constructor to construct the value + // returns true if key was not already present, false otherwise. + // --------------------------------------------------------------------------------------- + template + bool lazy_emplace_l(const key_arg& key, FExists&& fExists, FEmplace&& fEmplace) { + typename Lockable::UniqueLock m; + auto res = this->find_or_prepare_insert(key, m); + Inner* inner = std::get<0>(res); + if (std::get<2>(res)) + inner->set_.lazy_emplace_at(std::get<1>(res), std::forward(fEmplace)); + else { + auto it = this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res))); + std::forward(fExists)(const_cast(*it)); // in case of the set, non "key" part of value_type can be changed + } + return std::get<2>(res); + } + + // Extension API: support iterating over all values + // + // flat_hash_set s; + // s.insert(...); + // s.for_each([](auto const & key) { + // // Safely iterates over all the keys + // }); + template + void for_each(F&& fCallback) const { + for (auto const& inner : sets_) { + typename Lockable::SharedLock m(const_cast(inner)); + std::for_each(inner.set_.begin(), inner.set_.end(), fCallback); + } + } + + // this version allows to modify the values + template + void for_each_m(F&& fCallback) { + for (auto& inner : sets_) { + typename Lockable::UniqueLock m(inner); + std::for_each(inner.set_.begin(), inner.set_.end(), fCallback); + } + } + +#if __cplusplus >= 201703L + template + void for_each(ExecutionPolicy&& policy, F&& fCallback) const { + std::for_each( + std::forward(policy), sets_.begin(), sets_.end(), + [&](auto const& inner) { + typename Lockable::SharedLock m(const_cast(inner)); + std::for_each(inner.set_.begin(), inner.set_.end(), fCallback); + } + ); + } + + template + void for_each_m(ExecutionPolicy&& policy, F&& fCallback) { + std::for_each( + std::forward(policy), sets_.begin(), sets_.end(), + [&](auto& inner) { + typename Lockable::UniqueLock m(inner); + std::for_each(inner.set_.begin(), inner.set_.end(), fCallback); + } + ); + } +#endif + + // Extension API: access internal submaps by index + // under lock protection + // ex: m.with_submap(i, [&](const Map::EmbeddedSet& set) { + // for (auto& p : set) { ...; }}); + // ------------------------------------------------- + template + void with_submap(size_t idx, F&& fCallback) const { + const Inner& inner = sets_[idx]; + const auto& set = inner.set_; + typename Lockable::SharedLock m(const_cast(inner)); + fCallback(set); + } + + template + void with_submap_m(size_t idx, F&& fCallback) { + Inner& inner = sets_[idx]; + auto& set = inner.set_; + typename Lockable::UniqueLock m(inner); + fCallback(set); + } + + // unsafe, for internal use only + Inner& get_inner(size_t idx) { + return sets_[idx]; + } + + // Extension API: support for heterogeneous keys. + // + // std::unordered_set s; + // // Turns "abc" into std::string. + // s.erase("abc"); + // + // flat_hash_set s; + // // Uses "abc" directly without copying it into std::string. + // s.erase("abc"); + // + // -------------------------------------------------------------------- + template + size_type erase(const key_arg& key) { + auto hashval = this->hash(key); + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::UpgradeLock m(inner); + auto it = set.find(key, hashval); + if (it == set.end()) + return 0; + + typename Lockable::UpgradeToUnique unique(m); + set._erase(it); + return 1; + } + + // -------------------------------------------------------------------- + iterator erase(const_iterator cit) { return erase(cit.iter_); } + + // Erases the element pointed to by `it`. Unlike `std::unordered_set::erase`, + // this method returns void to reduce algorithmic complexity to O(1). In + // order to erase while iterating across a map, use the following idiom (which + // also works for standard containers): + // + // for (auto it = m.begin(), end = m.end(); it != end;) { + // if () { + // m._erase(it++); + // } else { + // ++it; + // } + // } + // + // Do not use erase APIs taking iterators when accessing the map concurrently + // -------------------------------------------------------------------- + void _erase(iterator it) { + Inner* inner = it.inner_; + assert(inner != nullptr); + auto& set = inner->set_; + // typename Lockable::UniqueLock m(*inner); // don't lock here + + set._erase(it.it_); + } + void _erase(const_iterator cit) { _erase(cit.iter_); } + + // This overload is necessary because otherwise erase(const K&) would be + // a better match if non-const iterator is passed as an argument. + // Do not use erase APIs taking iterators when accessing the map concurrently + // -------------------------------------------------------------------- + iterator erase(iterator it) { _erase(it++); return it; } + + iterator erase(const_iterator first, const_iterator last) { + while (first != last) { + _erase(first++); + } + return last.iter_; + } + + // Moves elements from `src` into `this`. + // If the element already exists in `this`, it is left unmodified in `src`. + // Do not use erase APIs taking iterators when accessing the map concurrently + // -------------------------------------------------------------------- + template + void merge(parallel_hash_set& src) { // NOLINT + assert(this != &src); + if (this != &src) + { + for (size_t i=0; i + void merge(parallel_hash_set&& src) { + merge(src); + } + + node_type extract(const_iterator position) { + return position.iter_.inner_->set_.extract(EmbeddedConstIterator(position.iter_.it_)); + } + + template < + class K = key_type, + typename std::enable_if::value, int>::type = 0> + node_type extract(const key_arg& key) { + auto it = find(key); + return it == end() ? node_type() : extract(const_iterator{it}); + } + + template + void swap(parallel_hash_set& that) + noexcept(IsNoThrowSwappable() && + (!AllocTraits::propagate_on_container_swap::value || + IsNoThrowSwappable())) + { + using std::swap; + using Lockable2 = phmap::LockableImpl; + + for (size_t i=0; i target ? normalized : target); + } + + // Extension API: support for heterogeneous keys. + // + // std::unordered_set s; + // // Turns "abc" into std::string. + // s.count("abc"); + // + // ch_set s; + // // Uses "abc" directly without copying it into std::string. + // s.count("abc"); + // -------------------------------------------------------------------- + template + size_t count(const key_arg& key) const { + return find(key) == end() ? 0 : 1; + } + + // Issues CPU prefetch instructions for the memory needed to find or insert + // a key. Like all lookup functions, this support heterogeneous keys. + // + // NOTE: This is a very low level operation and should not be used without + // specific benchmarks indicating its importance. + // -------------------------------------------------------------------- + void prefetch_hash(size_t hashval) const { + const Inner& inner = sets_[subidx(hashval)]; + const auto& set = inner.set_; + typename Lockable::SharedLock m(const_cast(inner)); + set.prefetch_hash(hashval); + } + + template + void prefetch(const key_arg& key) const { + prefetch_hash(this->hash(key)); + } + + // The API of find() has two extensions. + // + // 1. The hash can be passed by the user. It must be equal to the hash of the + // key. + // + // 2. The type of the key argument doesn't have to be key_type. This is so + // called heterogeneous key support. + // -------------------------------------------------------------------- + template + iterator find(const key_arg& key, size_t hashval) { + typename Lockable::SharedLock m; + return find(key, hashval, m); + } + + template + iterator find(const key_arg& key) { + return find(key, this->hash(key)); + } + + template + const_iterator find(const key_arg& key, size_t hashval) const { + return const_cast(this)->find(key, hashval); + } + + template + const_iterator find(const key_arg& key) const { + return find(key, this->hash(key)); + } + + template + bool contains(const key_arg& key) const { + return find(key) != end(); + } + + template + bool contains(const key_arg& key, size_t hashval) const { + return find(key, hashval) != end(); + } + + template + std::pair equal_range(const key_arg& key) { + auto it = find(key); + if (it != end()) return {it, std::next(it)}; + return {it, it}; + } + + template + std::pair equal_range( + const key_arg& key) const { + auto it = find(key); + if (it != end()) return {it, std::next(it)}; + return {it, it}; + } + + size_t bucket_count() const { + size_t sz = 0; + for (const auto& inner : sets_) + { + typename Lockable::SharedLock m(const_cast(inner)); + sz += inner.set_.bucket_count(); + } + return sz; + } + + float load_factor() const { + size_t _capacity = bucket_count(); + return _capacity ? static_cast(static_cast(size()) / _capacity) : 0; + } + + float max_load_factor() const { return 1.0f; } + void max_load_factor(float) { + // Does nothing. + } + + hasher hash_function() const { return hash_ref(); } // warning: doesn't match internal hash - use hash() member function + key_equal key_eq() const { return eq_ref(); } + allocator_type get_allocator() const { return alloc_ref(); } + + friend bool operator==(const parallel_hash_set& a, const parallel_hash_set& b) { + return std::equal(a.sets_.begin(), a.sets_.end(), b.sets_.begin()); + } + + friend bool operator!=(const parallel_hash_set& a, const parallel_hash_set& b) { + return !(a == b); + } + + template + friend void swap(parallel_hash_set& a, + parallel_hash_set& b) + noexcept(noexcept(a.swap(b))) + { + a.swap(b); + } + + template + size_t hash(const K& key) const { + return HashElement{hash_ref()}(key); + } + +#if !defined(PHMAP_NON_DETERMINISTIC) + template + bool phmap_dump(OutputArchive& ar) const; + + template + bool phmap_load(InputArchive& ar); +#endif + +private: + template + friend struct phmap::priv::hashtable_debug_internal::HashtableDebugAccess; + + struct FindElement + { + template + const_iterator operator()(const K& key, Args&&...) const { + return s.find(key); + } + const parallel_hash_set& s; + }; + + struct HashElement + { + template + size_t operator()(const K& key, Args&&...) const { + return phmap_mix()(h(key)); + } + const hasher& h; + }; + + template + struct EqualElement + { + template + bool operator()(const K2& lhs, Args&&...) const { + return eq(lhs, rhs); + } + const K1& rhs; + const key_equal& eq; + }; + + // "erases" the object from the container, except that it doesn't actually + // destroy the object. It only updates all the metadata of the class. + // This can be used in conjunction with Policy::transfer to move the object to + // another place. + // -------------------------------------------------------------------- + void erase_meta_only(const_iterator cit) { + auto &it = cit.iter_; + assert(it.set_ != nullptr); + it.set_.erase_meta_only(const_iterator(it.it_)); + } + + void drop_deletes_without_resize() PHMAP_ATTRIBUTE_NOINLINE { + for (auto& inner : sets_) + { + typename Lockable::UniqueLock m(inner); + inner.set_.drop_deletes_without_resize(); + } + } + + bool has_element(const value_type& elem) const { + size_t hashval = PolicyTraits::apply(HashElement{hash_ref()}, elem); + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + typename Lockable::SharedLock m(const_cast(inner)); + return set.has_element(elem, hashval); + } + + // TODO(alkis): Optimize this assuming *this and that don't overlap. + // -------------------------------------------------------------------- + template + parallel_hash_set& move_assign(parallel_hash_set&& that, std::true_type) { + parallel_hash_set tmp(std::move(that)); + swap(tmp); + return *this; + } + + template + parallel_hash_set& move_assign(parallel_hash_set&& that, std::false_type) { + parallel_hash_set tmp(std::move(that), alloc_ref()); + swap(tmp); + return *this; + } + +protected: + template + pointer find_ptr(const key_arg& key, size_t hashval, L& mutexlock) + { + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + mutexlock = std::move(L(inner)); + return set.find_ptr(key, hashval); + } + + template + iterator find(const key_arg& key, size_t hashval, L& mutexlock) { + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + mutexlock = std::move(L(inner)); + return make_iterator(&inner, set.find(key, hashval)); + } + + template + std::tuple + find_or_prepare_insert_with_hash(size_t hashval, const K& key, typename Lockable::UniqueLock &mutexlock) { + Inner& inner = sets_[subidx(hashval)]; + auto& set = inner.set_; + mutexlock = std::move(typename Lockable::UniqueLock(inner)); + auto p = set.find_or_prepare_insert(key, hashval); // std::pair + return std::make_tuple(&inner, p.first, p.second); + } + + template + std::tuple + find_or_prepare_insert(const K& key, typename Lockable::UniqueLock &mutexlock) { + return find_or_prepare_insert_with_hash(this->hash(key), key, mutexlock); + } + + iterator iterator_at(Inner *inner, + const EmbeddedIterator& it) { + return {inner, &sets_[0] + num_tables, it}; + } + const_iterator iterator_at(Inner *inner, + const EmbeddedIterator& it) const { + return {inner, &sets_[0] + num_tables, it}; + } + + static size_t subidx(size_t hashval) { + return ((hashval >> 8) ^ (hashval >> 16) ^ (hashval >> 24)) & mask; + } + + static size_t subcnt() { + return num_tables; + } + +private: + friend struct RawHashSetTestOnlyAccess; + + size_t growth_left() { + size_t sz = 0; + for (const auto& set : sets_) + sz += set.growth_left(); + return sz; + } + + hasher& hash_ref() { return sets_[0].set_.hash_ref(); } + const hasher& hash_ref() const { return sets_[0].set_.hash_ref(); } + key_equal& eq_ref() { return sets_[0].set_.eq_ref(); } + const key_equal& eq_ref() const { return sets_[0].set_.eq_ref(); } + allocator_type& alloc_ref() { return sets_[0].set_.alloc_ref(); } + const allocator_type& alloc_ref() const { + return sets_[0].set_.alloc_ref(); + } + +protected: // protected in case users want to derive fromm this + std::array sets_; +}; + +// -------------------------------------------------------------------------- +// -------------------------------------------------------------------------- +template class RefSet, + class Mtx_, + class Policy, class Hash, class Eq, class Alloc> +class parallel_hash_map : public parallel_hash_set +{ + // P is Policy. It's passed as a template argument to support maps that have + // incomplete types as values, as in unordered_map. + // MappedReference<> may be a non-reference type. + template + using MappedReference = decltype(P::value( + std::addressof(std::declval()))); + + // MappedConstReference<> may be a non-reference type. + template + using MappedConstReference = decltype(P::value( + std::addressof(std::declval()))); + + using KeyArgImpl = + KeyArg::value && IsTransparent::value>; + + using Base = typename parallel_hash_map::parallel_hash_set; + using Lockable = phmap::LockableImpl; + +public: + using key_type = typename Policy::key_type; + using mapped_type = typename Policy::mapped_type; + using value_type = typename Base::value_type; + template + using key_arg = typename KeyArgImpl::template type; + + static_assert(!std::is_reference::value, ""); + // TODO(alkis): remove this assertion and verify that reference mapped_type is + // supported. + static_assert(!std::is_reference::value, ""); + + using iterator = typename parallel_hash_map::parallel_hash_set::iterator; + using const_iterator = typename parallel_hash_map::parallel_hash_set::const_iterator; + + parallel_hash_map() {} + +#ifdef __INTEL_COMPILER + using Base::parallel_hash_set; +#else + using parallel_hash_map::parallel_hash_set::parallel_hash_set; +#endif + + // The last two template parameters ensure that both arguments are rvalues + // (lvalue arguments are handled by the overloads below). This is necessary + // for supporting bitfield arguments. + // + // union { int n : 1; }; + // flat_hash_map m; + // m.insert_or_assign(n, n); + template + std::pair insert_or_assign(key_arg&& k, V&& v) { + return insert_or_assign_impl(std::forward(k), std::forward(v)); + } + + template + std::pair insert_or_assign(key_arg&& k, const V& v) { + return insert_or_assign_impl(std::forward(k), v); + } + + template + std::pair insert_or_assign(const key_arg& k, V&& v) { + return insert_or_assign_impl(k, std::forward(v)); + } + + template + std::pair insert_or_assign(const key_arg& k, const V& v) { + return insert_or_assign_impl(k, v); + } + + template + iterator insert_or_assign(const_iterator, key_arg&& k, V&& v) { + return insert_or_assign(std::forward(k), std::forward(v)).first; + } + + template + iterator insert_or_assign(const_iterator, key_arg&& k, const V& v) { + return insert_or_assign(std::forward(k), v).first; + } + + template + iterator insert_or_assign(const_iterator, const key_arg& k, V&& v) { + return insert_or_assign(k, std::forward(v)).first; + } + + template + iterator insert_or_assign(const_iterator, const key_arg& k, const V& v) { + return insert_or_assign(k, v).first; + } + + template ::value, int>::type = 0, + K* = nullptr> + std::pair try_emplace(key_arg&& k, Args&&... args) { + return try_emplace_impl(std::forward(k), std::forward(args)...); + } + + template ::value, int>::type = 0> + std::pair try_emplace(const key_arg& k, Args&&... args) { + return try_emplace_impl(k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator, key_arg&& k, Args&&... args) { + return try_emplace(std::forward(k), std::forward(args)...).first; + } + + template + iterator try_emplace(const_iterator, const key_arg& k, Args&&... args) { + return try_emplace(k, std::forward(args)...).first; + } + + template + MappedReference

at(const key_arg& key) { + auto it = this->find(key); + if (it == this->end()) + phmap::base_internal::ThrowStdOutOfRange("phmap at(): lookup non-existent key"); + return Policy::value(&*it); + } + + template + MappedConstReference

at(const key_arg& key) const { + auto it = this->find(key); + if (it == this->end()) + phmap::base_internal::ThrowStdOutOfRange("phmap at(): lookup non-existent key"); + return Policy::value(&*it); + } + + // ----------- phmap extensions -------------------------- + + template ::value, int>::type = 0, + K* = nullptr> + std::pair try_emplace_with_hash(size_t hashval, key_arg&& k, Args&&... args) { + return try_emplace_impl_with_hash(hashval, std::forward(k), std::forward(args)...); + } + + template ::value, int>::type = 0> + std::pair try_emplace_with_hash(size_t hashval, const key_arg& k, Args&&... args) { + return try_emplace_impl_with_hash(hashval, k, std::forward(args)...); + } + + template + iterator try_emplace_with_hash(size_t hashval, const_iterator, key_arg&& k, Args&&... args) { + return try_emplace_with_hash(hashval, std::forward(k), std::forward(args)...).first; + } + + template + iterator try_emplace_with_hash(size_t hashval, const_iterator, const key_arg& k, Args&&... args) { + return try_emplace_with_hash(hashval, k, std::forward(args)...).first; + } + + // if map does not contains key, it is inserted and the mapped value is value-constructed + // with the provided arguments (if any), as with try_emplace. + // if map already contains key, then the lambda is called with the mapped value (under + // write lock protection) and can update the mapped value. + // returns true if key was not already present, false otherwise. + // --------------------------------------------------------------------------------------- + template + bool try_emplace_l(K&& k, F&& f, Args&&... args) { + typename Lockable::UniqueLock m; + auto res = this->find_or_prepare_insert(k, m); + typename Base::Inner *inner = std::get<0>(res); + if (std::get<2>(res)) + inner->set_.emplace_at(std::get<1>(res), std::piecewise_construct, + std::forward_as_tuple(std::forward(k)), + std::forward_as_tuple(std::forward(args)...)); + else { + auto it = this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res))); + std::forward(f)(const_cast(*it)); // in case of the set, non "key" part of value_type can be changed + } + return std::get<2>(res); + } + + // ----------- end of phmap extensions -------------------------- + + template + MappedReference

operator[](key_arg&& key) { + return Policy::value(&*try_emplace(std::forward(key)).first); + } + + template + MappedReference

operator[](const key_arg& key) { + return Policy::value(&*try_emplace(key).first); + } + +private: + + template + std::pair insert_or_assign_impl(K&& k, V&& v) { + typename Lockable::UniqueLock m; + auto res = this->find_or_prepare_insert(k, m); + typename Base::Inner *inner = std::get<0>(res); + if (std::get<2>(res)) + inner->set_.emplace_at(std::get<1>(res), std::forward(k), std::forward(v)); + else + Policy::value(&*inner->set_.iterator_at(std::get<1>(res))) = std::forward(v); + return {this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res))), + std::get<2>(res)}; + } + + template + std::pair try_emplace_impl(K&& k, Args&&... args) { + typename Lockable::UniqueLock m; + auto res = this->find_or_prepare_insert(k, m); + typename Base::Inner *inner = std::get<0>(res); + if (std::get<2>(res)) + inner->set_.emplace_at(std::get<1>(res), std::piecewise_construct, + std::forward_as_tuple(std::forward(k)), + std::forward_as_tuple(std::forward(args)...)); + return {this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res))), + std::get<2>(res)}; + } + + template + std::pair try_emplace_impl_with_hash(size_t hashval, K&& k, Args&&... args) { + typename Lockable::UniqueLock m; + auto res = this->find_or_prepare_insert_with_hash(hashval, k, m); + typename Base::Inner *inner = std::get<0>(res); + if (std::get<2>(res)) + inner->set_.emplace_at(std::get<1>(res), std::piecewise_construct, + std::forward_as_tuple(std::forward(k)), + std::forward_as_tuple(std::forward(args)...)); + return {this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res))), + std::get<2>(res)}; + } + + +}; + + +// Constructs T into uninitialized storage pointed by `ptr` using the args +// specified in the tuple. +// ---------------------------------------------------------------------------- +template +void ConstructFromTuple(Alloc* alloc, T* ptr, Tuple&& t) { + memory_internal::ConstructFromTupleImpl( + alloc, ptr, std::forward(t), + phmap::make_index_sequence< + std::tuple_size::type>::value>()); +} + +// Constructs T using the args specified in the tuple and calls F with the +// constructed value. +// ---------------------------------------------------------------------------- +template +decltype(std::declval()(std::declval())) WithConstructed( + Tuple&& t, F&& f) { + return memory_internal::WithConstructedImpl( + std::forward(t), + phmap::make_index_sequence< + std::tuple_size::type>::value>(), + std::forward(f)); +} + +// ---------------------------------------------------------------------------- +// Given arguments of an std::pair's consructor, PairArgs() returns a pair of +// tuples with references to the passed arguments. The tuples contain +// constructor arguments for the first and the second elements of the pair. +// +// The following two snippets are equivalent. +// +// 1. std::pair p(args...); +// +// 2. auto a = PairArgs(args...); +// std::pair p(std::piecewise_construct, +// std::move(p.first), std::move(p.second)); +// ---------------------------------------------------------------------------- +inline std::pair, std::tuple<>> PairArgs() { return {}; } + +template +std::pair, std::tuple> PairArgs(F&& f, S&& s) { + return {std::piecewise_construct, std::forward_as_tuple(std::forward(f)), + std::forward_as_tuple(std::forward(s))}; +} + +template +std::pair, std::tuple> PairArgs( + const std::pair& p) { + return PairArgs(p.first, p.second); +} + +template +std::pair, std::tuple> PairArgs(std::pair&& p) { + return PairArgs(std::forward(p.first), std::forward(p.second)); +} + +template +auto PairArgs(std::piecewise_construct_t, F&& f, S&& s) + -> decltype(std::make_pair(memory_internal::TupleRef(std::forward(f)), + memory_internal::TupleRef(std::forward(s)))) { + return std::make_pair(memory_internal::TupleRef(std::forward(f)), + memory_internal::TupleRef(std::forward(s))); +} + +// A helper function for implementing apply() in map policies. +// ---------------------------------------------------------------------------- +template +auto DecomposePair(F&& f, Args&&... args) + -> decltype(memory_internal::DecomposePairImpl( + std::forward(f), PairArgs(std::forward(args)...))) { + return memory_internal::DecomposePairImpl( + std::forward(f), PairArgs(std::forward(args)...)); +} + +// A helper function for implementing apply() in set policies. +// ---------------------------------------------------------------------------- +template +decltype(std::declval()(std::declval(), std::declval())) +DecomposeValue(F&& f, Arg&& arg) { + const auto& key = arg; + return std::forward(f)(key, std::forward(arg)); +} + + +// -------------------------------------------------------------------------- +// Policy: a policy defines how to perform different operations on +// the slots of the hashtable (see hash_policy_traits.h for the full interface +// of policy). +// +// Hash: a (possibly polymorphic) functor that hashes keys of the hashtable. The +// functor should accept a key and return size_t as hash. For best performance +// it is important that the hash function provides high entropy across all bits +// of the hash. +// +// Eq: a (possibly polymorphic) functor that compares two keys for equality. It +// should accept two (of possibly different type) keys and return a bool: true +// if they are equal, false if they are not. If two keys compare equal, then +// their hash values as defined by Hash MUST be equal. +// +// Allocator: an Allocator [https://devdocs.io/cpp/concept/allocator] with which +// the storage of the hashtable will be allocated and the elements will be +// constructed and destroyed. +// -------------------------------------------------------------------------- +template +struct FlatHashSetPolicy +{ + using slot_type = T; + using key_type = T; + using init_type = T; + using constant_iterators = std::true_type; + + template + static void construct(Allocator* alloc, slot_type* slot, Args&&... args) { + phmap::allocator_traits::construct(*alloc, slot, + std::forward(args)...); + } + + template + static void destroy(Allocator* alloc, slot_type* slot) { + phmap::allocator_traits::destroy(*alloc, slot); + } + + template + static void transfer(Allocator* alloc, slot_type* new_slot, + slot_type* old_slot) { + construct(alloc, new_slot, std::move(*old_slot)); + destroy(alloc, old_slot); + } + + static T& element(slot_type* slot) { return *slot; } + + template + static decltype(phmap::priv::DecomposeValue( + std::declval(), std::declval()...)) + apply(F&& f, Args&&... args) { + return phmap::priv::DecomposeValue( + std::forward(f), std::forward(args)...); + } + + static size_t space_used(const T*) { return 0; } +}; + +// -------------------------------------------------------------------------- +// -------------------------------------------------------------------------- +template +struct FlatHashMapPolicy +{ + using slot_policy = priv::map_slot_policy; + using slot_type = typename slot_policy::slot_type; + using key_type = K; + using mapped_type = V; + using init_type = std::pair; + + template + static void construct(Allocator* alloc, slot_type* slot, Args&&... args) { + slot_policy::construct(alloc, slot, std::forward(args)...); + } + + template + static void destroy(Allocator* alloc, slot_type* slot) { + slot_policy::destroy(alloc, slot); + } + + template + static void transfer(Allocator* alloc, slot_type* new_slot, + slot_type* old_slot) { + slot_policy::transfer(alloc, new_slot, old_slot); + } + + template + static decltype(phmap::priv::DecomposePair( + std::declval(), std::declval()...)) + apply(F&& f, Args&&... args) { + return phmap::priv::DecomposePair(std::forward(f), + std::forward(args)...); + } + + static size_t space_used(const slot_type*) { return 0; } + + static std::pair& element(slot_type* slot) { return slot->value; } + + static V& value(std::pair* kv) { return kv->second; } + static const V& value(const std::pair* kv) { return kv->second; } +}; + +template +struct node_hash_policy { + static_assert(std::is_lvalue_reference::value, ""); + + using slot_type = typename std::remove_cv< + typename std::remove_reference::type>::type*; + + template + static void construct(Alloc* alloc, slot_type* slot, Args&&... args) { + *slot = Policy::new_element(alloc, std::forward(args)...); + } + + template + static void destroy(Alloc* alloc, slot_type* slot) { + Policy::delete_element(alloc, *slot); + } + + template + static void transfer(Alloc*, slot_type* new_slot, slot_type* old_slot) { + *new_slot = *old_slot; + } + + static size_t space_used(const slot_type* slot) { + if (slot == nullptr) return Policy::element_space_used(nullptr); + return Policy::element_space_used(*slot); + } + + static Reference element(slot_type* slot) { return **slot; } + + template + static auto value(T* elem) -> decltype(P::value(elem)) { + return P::value(elem); + } + + template + static auto apply(Ts&&... ts) -> decltype(P::apply(std::forward(ts)...)) { + return P::apply(std::forward(ts)...); + } +}; + +// -------------------------------------------------------------------------- +// -------------------------------------------------------------------------- +template +struct NodeHashSetPolicy + : phmap::priv::node_hash_policy> +{ + using key_type = T; + using init_type = T; + using constant_iterators = std::true_type; + + template + static T* new_element(Allocator* alloc, Args&&... args) { + using ValueAlloc = + typename phmap::allocator_traits::template rebind_alloc; + ValueAlloc value_alloc(*alloc); + T* res = phmap::allocator_traits::allocate(value_alloc, 1); + phmap::allocator_traits::construct(value_alloc, res, + std::forward(args)...); + return res; + } + + template + static void delete_element(Allocator* alloc, T* elem) { + using ValueAlloc = + typename phmap::allocator_traits::template rebind_alloc; + ValueAlloc value_alloc(*alloc); + phmap::allocator_traits::destroy(value_alloc, elem); + phmap::allocator_traits::deallocate(value_alloc, elem, 1); + } + + template + static decltype(phmap::priv::DecomposeValue( + std::declval(), std::declval()...)) + apply(F&& f, Args&&... args) { + return phmap::priv::DecomposeValue( + std::forward(f), std::forward(args)...); + } + + static size_t element_space_used(const T*) { return sizeof(T); } +}; + +// -------------------------------------------------------------------------- +// -------------------------------------------------------------------------- +template +class NodeHashMapPolicy + : public phmap::priv::node_hash_policy< + std::pair&, NodeHashMapPolicy> +{ + using value_type = std::pair; + +public: + using key_type = Key; + using mapped_type = Value; + using init_type = std::pair; + + template + static value_type* new_element(Allocator* alloc, Args&&... args) { + using PairAlloc = typename phmap::allocator_traits< + Allocator>::template rebind_alloc; + PairAlloc pair_alloc(*alloc); + value_type* res = + phmap::allocator_traits::allocate(pair_alloc, 1); + phmap::allocator_traits::construct(pair_alloc, res, + std::forward(args)...); + return res; + } + + template + static void delete_element(Allocator* alloc, value_type* pair) { + using PairAlloc = typename phmap::allocator_traits< + Allocator>::template rebind_alloc; + PairAlloc pair_alloc(*alloc); + phmap::allocator_traits::destroy(pair_alloc, pair); + phmap::allocator_traits::deallocate(pair_alloc, pair, 1); + } + + template + static decltype(phmap::priv::DecomposePair( + std::declval(), std::declval()...)) + apply(F&& f, Args&&... args) { + return phmap::priv::DecomposePair(std::forward(f), + std::forward(args)...); + } + + static size_t element_space_used(const value_type*) { + return sizeof(value_type); + } + + static Value& value(value_type* elem) { return elem->second; } + static const Value& value(const value_type* elem) { return elem->second; } +}; + + +// -------------------------------------------------------------------------- +// hash_default +// -------------------------------------------------------------------------- + +#if PHMAP_HAVE_STD_STRING_VIEW + +// Supports heterogeneous lookup for basic_string-like elements. +template +struct StringHashEqT +{ + struct Hash + { + using is_transparent = void; + + size_t operator()(std::basic_string_view v) const { + std::string_view bv{ + reinterpret_cast(v.data()), v.size() * sizeof(CharT)}; + return std::hash()(bv); + } + }; + + struct Eq { + using is_transparent = void; + + bool operator()(std::basic_string_view lhs, + std::basic_string_view rhs) const { + return lhs == rhs; + } + }; +}; + +template <> +struct HashEq : StringHashEqT {}; + +template <> +struct HashEq : StringHashEqT {}; + +// char16_t +template <> +struct HashEq : StringHashEqT {}; + +template <> +struct HashEq : StringHashEqT {}; + +// wchar_t +template <> +struct HashEq : StringHashEqT {}; + +template <> +struct HashEq : StringHashEqT {}; + +#endif + +// Supports heterogeneous lookup for pointers and smart pointers. +// ------------------------------------------------------------- +template +struct HashEq +{ + struct Hash { + using is_transparent = void; + template + size_t operator()(const U& ptr) const { + // we want phmap::Hash and not phmap::Hash + // so "struct std::hash " override works + return phmap::Hash{}((T*)(uintptr_t)HashEq::ToPtr(ptr)); + } + }; + + struct Eq { + using is_transparent = void; + template + bool operator()(const A& a, const B& b) const { + return HashEq::ToPtr(a) == HashEq::ToPtr(b); + } + }; + +private: + static const T* ToPtr(const T* ptr) { return ptr; } + + template + static const T* ToPtr(const std::unique_ptr& ptr) { + return ptr.get(); + } + + template + static const T* ToPtr(const std::shared_ptr& ptr) { + return ptr.get(); + } +}; + +template +struct HashEq> : HashEq {}; + +template +struct HashEq> : HashEq {}; + +namespace hashtable_debug_internal { + +// -------------------------------------------------------------------------- +// -------------------------------------------------------------------------- + +template +struct has_member_type_raw_hash_set : std::false_type +{}; +template +struct has_member_type_raw_hash_set> : std::true_type +{}; + +template +struct HashtableDebugAccess::value>::type> +{ + using Traits = typename Set::PolicyTraits; + using Slot = typename Traits::slot_type; + + static size_t GetNumProbes(const Set& set, + const typename Set::key_type& key) { + size_t num_probes = 0; + size_t hashval = set.hash(key); + auto seq = set.probe(hashval); + while (true) { + priv::Group g{set.ctrl_ + seq.offset()}; + for (uint32_t i : g.Match(priv::H2(hashval))) { + if (Traits::apply( + typename Set::template EqualElement{ + key, set.eq_ref()}, + Traits::element(set.slots_ + seq.offset((size_t)i)))) + return num_probes; + ++num_probes; + } + if (g.MatchEmpty()) return num_probes; + seq.next(); + ++num_probes; + } + } + + static size_t AllocatedByteSize(const Set& c) { + size_t capacity = c.capacity_; + if (capacity == 0) return 0; + auto layout = Set::MakeLayout(capacity); + size_t m = layout.AllocSize(); + + size_t per_slot = Traits::space_used(static_cast(nullptr)); + if (per_slot != ~size_t{}) { + m += per_slot * c.size(); + } else { + for (size_t i = 0; i != capacity; ++i) { + if (priv::IsFull(c.ctrl_[i])) { + m += Traits::space_used(c.slots_ + i); + } + } + } + return m; + } + + static size_t LowerBoundAllocatedByteSize(size_t size) { + size_t capacity = GrowthToLowerboundCapacity(size); + if (capacity == 0) return 0; + auto layout = Set::MakeLayout(NormalizeCapacity(capacity)); + size_t m = layout.AllocSize(); + size_t per_slot = Traits::space_used(static_cast(nullptr)); + if (per_slot != ~size_t{}) { + m += per_slot * size; + } + return m; + } +}; + + +template +struct has_member_type_EmbeddedSet : std::false_type +{}; +template +struct has_member_type_EmbeddedSet> : std::true_type +{}; + +template +struct HashtableDebugAccess::value>::type> { + using Traits = typename Set::PolicyTraits; + using Slot = typename Traits::slot_type; + using EmbeddedSet = typename Set::EmbeddedSet; + + static size_t GetNumProbes(const Set& set, const typename Set::key_type& key) { + size_t hashval = set.hash(key); + auto& inner = set.sets_[set.subidx(hashval)]; + auto& inner_set = inner.set_; + return HashtableDebugAccess::GetNumProbes(inner_set, key); + } +}; + +} // namespace hashtable_debug_internal +} // namespace priv + +// ----------------------------------------------------------------------------- +// phmap::flat_hash_set +// ----------------------------------------------------------------------------- +// An `phmap::flat_hash_set` is an unordered associative container which has +// been optimized for both speed and memory footprint in most common use cases. +// Its interface is similar to that of `std::unordered_set` with the +// following notable differences: +// +// * Supports heterogeneous lookup, through `find()`, `operator[]()` and +// `insert()`, provided that the set is provided a compatible heterogeneous +// hashing function and equality operator. +// * Invalidates any references and pointers to elements within the table after +// `rehash()`. +// * Contains a `capacity()` member function indicating the number of element +// slots (open, deleted, and empty) within the hash set. +// * Returns `void` from the `_erase(iterator)` overload. +// ----------------------------------------------------------------------------- +template // default values in phmap_fwd_decl.h +class flat_hash_set + : public phmap::priv::raw_hash_set< + phmap::priv::FlatHashSetPolicy, Hash, Eq, Alloc> +{ + using Base = typename flat_hash_set::raw_hash_set; + +public: + flat_hash_set() {} +#ifdef __INTEL_COMPILER + using Base::raw_hash_set; +#else + using Base::Base; +#endif + using Base::begin; + using Base::cbegin; + using Base::cend; + using Base::end; + using Base::capacity; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; // may shrink - To avoid shrinking `erase(begin(), end())` + using Base::erase; + using Base::insert; + using Base::emplace; + using Base::emplace_hint; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::rehash; + using Base::reserve; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::find; + using Base::bucket_count; + using Base::load_factor; + using Base::max_load_factor; + using Base::get_allocator; + using Base::hash_function; + using Base::hash; + using Base::key_eq; +}; + +// ----------------------------------------------------------------------------- +// phmap::flat_hash_map +// ----------------------------------------------------------------------------- +// +// An `phmap::flat_hash_map` is an unordered associative container which +// has been optimized for both speed and memory footprint in most common use +// cases. Its interface is similar to that of `std::unordered_map` with +// the following notable differences: +// +// * Supports heterogeneous lookup, through `find()`, `operator[]()` and +// `insert()`, provided that the map is provided a compatible heterogeneous +// hashing function and equality operator. +// * Invalidates any references and pointers to elements within the table after +// `rehash()`. +// * Contains a `capacity()` member function indicating the number of element +// slots (open, deleted, and empty) within the hash map. +// * Returns `void` from the `_erase(iterator)` overload. +// ----------------------------------------------------------------------------- +template // default values in phmap_fwd_decl.h +class flat_hash_map : public phmap::priv::raw_hash_map< + phmap::priv::FlatHashMapPolicy, + Hash, Eq, Alloc> { + using Base = typename flat_hash_map::raw_hash_map; + +public: + flat_hash_map() {} +#ifdef __INTEL_COMPILER + using Base::raw_hash_map; +#else + using Base::Base; +#endif + using Base::begin; + using Base::cbegin; + using Base::cend; + using Base::end; + using Base::capacity; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::insert_or_assign; + using Base::emplace; + using Base::emplace_hint; + using Base::try_emplace; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::rehash; + using Base::reserve; + using Base::at; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::find; + using Base::operator[]; + using Base::bucket_count; + using Base::load_factor; + using Base::max_load_factor; + using Base::get_allocator; + using Base::hash_function; + using Base::hash; + using Base::key_eq; +}; + +// ----------------------------------------------------------------------------- +// phmap::node_hash_set +// ----------------------------------------------------------------------------- +// An `phmap::node_hash_set` is an unordered associative container which +// has been optimized for both speed and memory footprint in most common use +// cases. Its interface is similar to that of `std::unordered_set` with the +// following notable differences: +// +// * Supports heterogeneous lookup, through `find()`, `operator[]()` and +// `insert()`, provided that the map is provided a compatible heterogeneous +// hashing function and equality operator. +// * Contains a `capacity()` member function indicating the number of element +// slots (open, deleted, and empty) within the hash set. +// * Returns `void` from the `erase(iterator)` overload. +// ----------------------------------------------------------------------------- +template // default values in phmap_fwd_decl.h +class node_hash_set + : public phmap::priv::raw_hash_set< + phmap::priv::NodeHashSetPolicy, Hash, Eq, Alloc> +{ + using Base = typename node_hash_set::raw_hash_set; + +public: + node_hash_set() {} +#ifdef __INTEL_COMPILER + using Base::raw_hash_set; +#else + using Base::Base; +#endif + using Base::begin; + using Base::cbegin; + using Base::cend; + using Base::end; + using Base::capacity; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::emplace; + using Base::emplace_hint; + using Base::emplace_with_hash; + using Base::emplace_hint_with_hash; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::rehash; + using Base::reserve; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::find; + using Base::bucket_count; + using Base::load_factor; + using Base::max_load_factor; + using Base::get_allocator; + using Base::hash_function; + using Base::hash; + using Base::key_eq; + typename Base::hasher hash_funct() { return this->hash_function(); } + void resize(typename Base::size_type hint) { this->rehash(hint); } +}; + +// ----------------------------------------------------------------------------- +// phmap::node_hash_map +// ----------------------------------------------------------------------------- +// +// An `phmap::node_hash_map` is an unordered associative container which +// has been optimized for both speed and memory footprint in most common use +// cases. Its interface is similar to that of `std::unordered_map` with +// the following notable differences: +// +// * Supports heterogeneous lookup, through `find()`, `operator[]()` and +// `insert()`, provided that the map is provided a compatible heterogeneous +// hashing function and equality operator. +// * Contains a `capacity()` member function indicating the number of element +// slots (open, deleted, and empty) within the hash map. +// * Returns `void` from the `erase(iterator)` overload. +// ----------------------------------------------------------------------------- +template // default values in phmap_fwd_decl.h +class node_hash_map + : public phmap::priv::raw_hash_map< + phmap::priv::NodeHashMapPolicy, Hash, Eq, + Alloc> +{ + using Base = typename node_hash_map::raw_hash_map; + +public: + node_hash_map() {} +#ifdef __INTEL_COMPILER + using Base::raw_hash_map; +#else + using Base::Base; +#endif + using Base::begin; + using Base::cbegin; + using Base::cend; + using Base::end; + using Base::capacity; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::insert_or_assign; + using Base::emplace; + using Base::emplace_hint; + using Base::try_emplace; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::rehash; + using Base::reserve; + using Base::at; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::find; + using Base::operator[]; + using Base::bucket_count; + using Base::load_factor; + using Base::max_load_factor; + using Base::get_allocator; + using Base::hash_function; + using Base::hash; + using Base::key_eq; + typename Base::hasher hash_funct() { return this->hash_function(); } + void resize(typename Base::size_type hint) { this->rehash(hint); } +}; + +// ----------------------------------------------------------------------------- +// phmap::parallel_flat_hash_set +// ----------------------------------------------------------------------------- +template // default values in phmap_fwd_decl.h +class parallel_flat_hash_set + : public phmap::priv::parallel_hash_set< + N, phmap::priv::raw_hash_set, Mtx_, + phmap::priv::FlatHashSetPolicy, + Hash, Eq, Alloc> +{ + using Base = typename parallel_flat_hash_set::parallel_hash_set; + +public: + parallel_flat_hash_set() {} +#ifdef __INTEL_COMPILER + using Base::parallel_hash_set; +#else + using Base::Base; +#endif + using Base::hash; + using Base::subidx; + using Base::subcnt; + using Base::begin; + using Base::cbegin; + using Base::cend; + using Base::end; + using Base::capacity; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::emplace; + using Base::emplace_hint; + using Base::emplace_with_hash; + using Base::emplace_hint_with_hash; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::rehash; + using Base::reserve; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::find; + using Base::bucket_count; + using Base::load_factor; + using Base::max_load_factor; + using Base::get_allocator; + using Base::hash_function; + using Base::key_eq; +}; + +// ----------------------------------------------------------------------------- +// phmap::parallel_flat_hash_map - default values in phmap_fwd_decl.h +// ----------------------------------------------------------------------------- +template +class parallel_flat_hash_map : public phmap::priv::parallel_hash_map< + N, phmap::priv::raw_hash_set, Mtx_, + phmap::priv::FlatHashMapPolicy, + Hash, Eq, Alloc> +{ + using Base = typename parallel_flat_hash_map::parallel_hash_map; + +public: + parallel_flat_hash_map() {} +#ifdef __INTEL_COMPILER + using Base::parallel_hash_map; +#else + using Base::Base; +#endif + using Base::hash; + using Base::subidx; + using Base::subcnt; + using Base::begin; + using Base::cbegin; + using Base::cend; + using Base::end; + using Base::capacity; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::insert_or_assign; + using Base::emplace; + using Base::emplace_hint; + using Base::try_emplace; + using Base::emplace_with_hash; + using Base::emplace_hint_with_hash; + using Base::try_emplace_with_hash; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::rehash; + using Base::reserve; + using Base::at; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::find; + using Base::operator[]; + using Base::bucket_count; + using Base::load_factor; + using Base::max_load_factor; + using Base::get_allocator; + using Base::hash_function; + using Base::key_eq; +}; + +// ----------------------------------------------------------------------------- +// phmap::parallel_node_hash_set +// ----------------------------------------------------------------------------- +template +class parallel_node_hash_set + : public phmap::priv::parallel_hash_set< + N, phmap::priv::raw_hash_set, Mtx_, + phmap::priv::NodeHashSetPolicy, Hash, Eq, Alloc> +{ + using Base = typename parallel_node_hash_set::parallel_hash_set; + +public: + parallel_node_hash_set() {} +#ifdef __INTEL_COMPILER + using Base::parallel_hash_set; +#else + using Base::Base; +#endif + using Base::hash; + using Base::subidx; + using Base::subcnt; + using Base::begin; + using Base::cbegin; + using Base::cend; + using Base::end; + using Base::capacity; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::emplace; + using Base::emplace_hint; + using Base::emplace_with_hash; + using Base::emplace_hint_with_hash; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::rehash; + using Base::reserve; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::find; + using Base::bucket_count; + using Base::load_factor; + using Base::max_load_factor; + using Base::get_allocator; + using Base::hash_function; + using Base::key_eq; + typename Base::hasher hash_funct() { return this->hash_function(); } + void resize(typename Base::size_type hint) { this->rehash(hint); } +}; + +// ----------------------------------------------------------------------------- +// phmap::parallel_node_hash_map +// ----------------------------------------------------------------------------- +template +class parallel_node_hash_map + : public phmap::priv::parallel_hash_map< + N, phmap::priv::raw_hash_set, Mtx_, + phmap::priv::NodeHashMapPolicy, Hash, Eq, + Alloc> +{ + using Base = typename parallel_node_hash_map::parallel_hash_map; + +public: + parallel_node_hash_map() {} +#ifdef __INTEL_COMPILER + using Base::parallel_hash_map; +#else + using Base::Base; +#endif + using Base::hash; + using Base::subidx; + using Base::subcnt; + using Base::begin; + using Base::cbegin; + using Base::cend; + using Base::end; + using Base::capacity; + using Base::empty; + using Base::max_size; + using Base::size; + using Base::clear; + using Base::erase; + using Base::insert; + using Base::insert_or_assign; + using Base::emplace; + using Base::emplace_hint; + using Base::try_emplace; + using Base::emplace_with_hash; + using Base::emplace_hint_with_hash; + using Base::try_emplace_with_hash; + using Base::extract; + using Base::merge; + using Base::swap; + using Base::rehash; + using Base::reserve; + using Base::at; + using Base::contains; + using Base::count; + using Base::equal_range; + using Base::find; + using Base::operator[]; + using Base::bucket_count; + using Base::load_factor; + using Base::max_load_factor; + using Base::get_allocator; + using Base::hash_function; + using Base::key_eq; + typename Base::hasher hash_funct() { return this->hash_function(); } + void resize(typename Base::size_type hint) { this->rehash(hint); } +}; + +} // namespace phmap + + +namespace phmap { + namespace priv { + template + std::size_t erase_if(C &c, Pred pred) { + auto old_size = c.size(); + for (auto i = c.begin(), last = c.end(); i != last; ) { + if (pred(*i)) { + i = c.erase(i); + } else { + ++i; + } + } + return old_size - c.size(); + } + } // priv + + // ======== erase_if for phmap set containers ================================== + template + std::size_t erase_if(phmap::flat_hash_set& c, Pred pred) { + return phmap::priv::erase_if(c, std::move(pred)); + } + + template + std::size_t erase_if(phmap::node_hash_set& c, Pred pred) { + return phmap::priv::erase_if(c, std::move(pred)); + } + + template + std::size_t erase_if(phmap::parallel_flat_hash_set& c, Pred pred) { + return phmap::priv::erase_if(c, std::move(pred)); + } + + template + std::size_t erase_if(phmap::parallel_node_hash_set& c, Pred pred) { + return phmap::priv::erase_if(c, std::move(pred)); + } + + // ======== erase_if for phmap map containers ================================== + template + std::size_t erase_if(phmap::flat_hash_map& c, Pred pred) { + return phmap::priv::erase_if(c, std::move(pred)); + } + + template + std::size_t erase_if(phmap::node_hash_map& c, Pred pred) { + return phmap::priv::erase_if(c, std::move(pred)); + } + + template + std::size_t erase_if(phmap::parallel_flat_hash_map& c, Pred pred) { + return phmap::priv::erase_if(c, std::move(pred)); + } + + template + std::size_t erase_if(phmap::parallel_node_hash_map& c, Pred pred) { + return phmap::priv::erase_if(c, std::move(pred)); + } + +} // phmap + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + + +#endif // phmap_h_guard_ diff --git a/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/phmap_base.h b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/phmap_base.h new file mode 100644 index 00000000..0cb89310 --- /dev/null +++ b/liberty/lib/SLAMP/SLAMPcustom/consumer/ProfilingModules/parallel_hashmap/phmap_base.h @@ -0,0 +1,5157 @@ +#if !defined(phmap_base_h_guard_) +#define phmap_base_h_guard_ + +// --------------------------------------------------------------------------- +// Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Includes work from abseil-cpp (https://github.com/abseil/abseil-cpp) +// with modifications. +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// --------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for std::lock + +#include "phmap_config.h" + +#ifdef PHMAP_HAVE_SHARED_MUTEX + #include // after "phmap_config.h" +#endif + +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4514) // unreferenced inline function has been removed + #pragma warning(disable : 4582) // constructor is not implicitly called + #pragma warning(disable : 4625) // copy constructor was implicitly defined as deleted + #pragma warning(disable : 4626) // assignment operator was implicitly defined as deleted + #pragma warning(disable : 4710) // function not inlined + #pragma warning(disable : 4711) // selected for automatic inline expansion + #pragma warning(disable : 4820) // '6' bytes padding added after data member +#endif // _MSC_VER + +namespace phmap { + +template using Allocator = typename std::allocator; + +template using Pair = typename std::pair; + +template +struct EqualTo +{ + inline bool operator()(const T& a, const T& b) const + { + return std::equal_to()(a, b); + } +}; + +template +struct Less +{ + inline bool operator()(const T& a, const T& b) const + { + return std::less()(a, b); + } +}; + +namespace type_traits_internal { + +template +struct VoidTImpl { + using type = void; +}; + +// NOTE: The `is_detected` family of templates here differ from the library +// fundamentals specification in that for library fundamentals, `Op` is +// evaluated as soon as the type `is_detected` undergoes +// substitution, regardless of whether or not the `::value` is accessed. That +// is inconsistent with all other standard traits and prevents lazy evaluation +// in larger contexts (such as if the `is_detected` check is a trailing argument +// of a `conjunction`. This implementation opts to instead be lazy in the same +// way that the standard traits are (this "defect" of the detection idiom +// specifications has been reported). +// --------------------------------------------------------------------------- + +template class Op, class... Args> +struct is_detected_impl { + using type = std::false_type; +}; + +template