diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index 8186bdba16..f4b73965cb 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -20,10 +20,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/Utils/MaybeUniquePtr.h" -namespace llvm { -class CallBase; -} // namespace llvm - namespace psr { class DIBasedTypeHierarchy; diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index 044815e287..439afd2a72 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -12,10 +12,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" -namespace llvm { -class CallBase; -} // namespace llvm - namespace psr { /// \brief A resolver that doesn't resolve indirect- and virtual calls diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index 41b3f6e878..ee0265a38d 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -20,22 +20,8 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include -#include -#include -#include - -namespace llvm { -class CallBase; -class Function; -class Type; -class Value; -} // namespace llvm - namespace psr { -class DIBasedTypeHierarchy; - /// \brief A resolver that uses alias information to resolve indirect and /// virtual calls class OTFResolver : public Resolver { @@ -54,18 +40,11 @@ class OTFResolver : public Resolver { void resolveFunctionPointer(FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) override; - static std::set - getReachableTypes(const LLVMAliasInfo::AliasSetTy &Values); - - static std::vector> - getActualFormalPointerPairs(const llvm::CallBase *CallSite, - const llvm::Function *CalleeTarget); - [[nodiscard]] std::string str() const override; [[nodiscard]] bool mutatesHelperAnalysisInformation() const noexcept override { - return true; + return !PT.isInterProcedural(); } protected: diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index 6e89063a9d..f9165a7d96 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -22,12 +22,10 @@ #include namespace llvm { -class CallBase; class DICompositeType; } // namespace llvm namespace psr { -class DIBasedTypeHierarchy; /// \brief A resolver that performs Rapid Type Analysis to resolve calls /// to C++ virtual functions. Requires debug information. diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index 09b8147424..cc1e31100a 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -20,6 +20,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/DerivedTypes.h" #include @@ -44,6 +45,10 @@ enum class CallGraphAnalysisType; [[nodiscard]] std::optional getVFTIndex(const llvm::CallBase *CallSite); +/// Similar to getVFTIndex(), but also returns a pointer to the vtable +[[nodiscard]] std::optional> +getVFTIndexAndVT(const llvm::CallBase *CallSite); + /// Assuming that `CallSite` is a call to a non-static member function, /// retrieves the type of the receiver. Returns nullptr, if the receiver-type /// could not be extracted @@ -67,25 +72,16 @@ getReceiverType(const llvm::CallBase *CallSite); [[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst, const LLVMVFTableProvider &VTP); +/// A variant of F->hasAddressTaken() that is better suited for our use cases. +/// +/// Especially, it filteres out global aliases. +[[nodiscard]] bool isAddressTakenFunction(const llvm::Function *F); + /// \brief A base class for call-target resolvers. Used to build call graphs. /// /// Create a specific resolver by making a new class, inheriting this resolver /// class and implementing the virtual functions as needed. class Resolver { -protected: - const LLVMProjectIRDB *IRDB; - const LLVMVFTableProvider *VTP; - - const llvm::Function * - getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, - const llvm::CallBase *CallSite, - const llvm::DIType *ReceiverType) { - if (!VTP) { - return nullptr; - } - return psr::getNonPureVirtualVFTEntry(T, Idx, CallSite, *VTP, ReceiverType); - } - public: using FunctionSetTy = llvm::SmallDenseSet; @@ -93,17 +89,23 @@ class Resolver { virtual ~Resolver() = default; - virtual void preCall(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + preCall(const llvm::Instruction *Inst); virtual void handlePossibleTargets(const llvm::CallBase *CallSite, FunctionSetTy &PossibleTargets); - virtual void postCall(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + postCall(const llvm::Instruction *Inst); [[nodiscard]] FunctionSetTy resolveIndirectCall(const llvm::CallBase *CallSite); - virtual void otherInst(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + otherInst(const llvm::Instruction *Inst); [[nodiscard]] virtual std::string str() const = 0; @@ -115,11 +117,30 @@ class Resolver { // Conservatively returns true. Override if possible return true; } - static std::unique_ptr create(CallGraphAnalysisType Ty, - const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT = nullptr); + + [[nodiscard]] llvm::ArrayRef + getAddressTakenFunctions(); + + [[nodiscard]] static std::unique_ptr + create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT = nullptr); + +protected: + const llvm::Function * + getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, + const llvm::CallBase *CallSite, + const llvm::DIType *ReceiverType) { + if (!VTP) { + return nullptr; + } + return psr::getNonPureVirtualVFTEntry(T, Idx, CallSite, *VTP, ReceiverType); + } + + const LLVMProjectIRDB *IRDB{}; + const LLVMVFTableProvider *VTP{}; + std::optional> + AddressTakenFunctions{}; protected: virtual void resolveVirtualCall(FunctionSetTy &PossibleTargets, diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp index e3f65c00ec..492176a510 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -153,13 +153,9 @@ bool Builder::processFunction(const llvm::Function *F) { for (const auto &I : llvm::instructions(F)) { const auto *CS = llvm::dyn_cast(&I); if (!CS) { - Res->otherInst(&I); continue; } - Res->preCall(&I); - scope_exit PostCall = [&] { Res->postCall(&I); }; - FixpointReached &= fillPossibleTargets(PossibleTargets, *Res, CS, IndirectCalls); @@ -203,9 +199,6 @@ bool Builder::constructDynamicCall(const llvm::Instruction *CS) { "Looking into dynamic call-site: "); PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", " " << llvmIRToString(CS)); - Res->preCall(CallSite); - scope_exit PostCall = [&] { Res->postCall(CallSite); }; - // call the resolve routine auto PossibleTargets = Res->resolveIndirectCall(CallSite); @@ -275,7 +268,7 @@ auto psr::buildLLVMBasedCallGraph( PT = PTOwn.asRef(); } - auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH); + auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH, PT); return buildLLVMBasedCallGraph(IRDB, *Res, EntryPoints, S); } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp index 393e810fb1..56b9565a80 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp @@ -21,15 +21,10 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Module.h" #include -using namespace std; using namespace psr; CHAResolver::CHAResolver(const LLVMProjectIRDB *IRDB, diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp index 9dc6a56c28..7b962135eb 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp @@ -16,12 +16,8 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h" -#include - using namespace psr; -namespace psr { - NOResolver::NOResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) : Resolver(IRDB, VTP) {} @@ -33,5 +29,3 @@ void NOResolver::resolveFunctionPointer(FunctionSetTy & /*PossibleTargets*/, const llvm::CallBase * /*CallSite*/) {} std::string NOResolver::str() const { return "NOResolver"; } - -} // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp index c4168649c9..5adf72192c 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp @@ -22,7 +22,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" @@ -34,6 +33,55 @@ OTFResolver::OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef PT) : Resolver(IRDB, VTP), PT(PT) {} +static std::vector> +getActualFormalPointerPairs(const llvm::CallBase *CallSite, + const llvm::Function *CalleeTarget) { + std::vector> Pairs; + Pairs.reserve(CallSite->arg_size()); + // ordinary case + + unsigned Idx = 0; + for (; Idx < CallSite->arg_size() && Idx < CalleeTarget->arg_size(); ++Idx) { + // only collect pointer typed pairs + if (CallSite->getArgOperand(Idx)->getType()->isPointerTy() && + CalleeTarget->getArg(Idx)->getType()->isPointerTy()) { + Pairs.emplace_back(CallSite->getArgOperand(Idx), + CalleeTarget->getArg(Idx)); + } + } + + if (CalleeTarget->isVarArg()) { + // in case of vararg, we can pair-up incoming pointer parameters with the + // vararg pack of the callee target. the vararg pack will alias + // (intra-procedurally) with any pointer values loaded from the pack + const llvm::AllocaInst *VarArgs = nullptr; + + for (const auto &I : llvm::instructions(CalleeTarget)) { + if (const auto *Alloca = llvm::dyn_cast(&I)) { + if (const auto *AT = + llvm::dyn_cast(Alloca->getAllocatedType())) { + if (const auto *ST = + llvm::dyn_cast(AT->getArrayElementType())) { + if (ST->hasName() && ST->getName() == "struct.__va_list_tag") { + VarArgs = Alloca; + break; + } + } + } + } + } + + if (VarArgs) { + for (; Idx < CallSite->arg_size(); ++Idx) { + if (CallSite->getArgOperand(Idx)->getType()->isPointerTy()) { + Pairs.emplace_back(CallSite->getArgOperand(Idx), VarArgs); + } + } + } + } + return Pairs; +} + void OTFResolver::handlePossibleTargets(const llvm::CallBase *CallSite, FunctionSetTy &CalleeTargets) { // if we have no inter-procedural points-to information, use call-graph @@ -71,7 +119,7 @@ void OTFResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, PHASAR_LOG_LEVEL(DEBUG, "Call virtual function: " << llvmIRToString(CallSite)); - auto RetrievedVtableIndex = getVFTIndex(CallSite); + auto RetrievedVtableIndex = getVFTIndexAndVT(CallSite); if (!RetrievedVtableIndex.has_value()) { // An error occured PHASAR_LOG_LEVEL(DEBUG, @@ -81,11 +129,12 @@ void OTFResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, return; } - auto VtableIndex = RetrievedVtableIndex.value(); + auto [VtablePtr, VtableIndex] = RetrievedVtableIndex.value(); PHASAR_LOG_LEVEL(DEBUG, "Virtual function table entry is: " << VtableIndex); - auto PTS = PT.getAliasSet(CallSite->getCalledOperand(), CallSite); + auto PTS = PT.getAliasSet(VtablePtr, CallSite); + for (const auto *P : *PTS) { if (const auto *PGV = llvm::dyn_cast(P)) { if (PGV->hasName() && @@ -201,60 +250,4 @@ void OTFResolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, } } -std::set -OTFResolver::getReachableTypes(const LLVMAliasInfo::AliasSetTy &Values) { - std::set Types; - // an allocation site can either be an AllocaInst or a call to an - // allocating function - for (const auto *V : Values) { - if (const auto *Alloc = llvm::dyn_cast(V)) { - Types.insert(Alloc->getAllocatedType()); - } else { - // usually if an allocating function is called, it is immediately - // bit-casted - // to the desired allocated value and hence we can determine it from - // the destination type of that cast instruction. - for (const auto *User : V->users()) { - if (const auto *Cast = llvm::dyn_cast(User)) { - Types.insert(Cast->getDestTy()); - } - } - } - } - return Types; -} - -std::vector> -OTFResolver::getActualFormalPointerPairs(const llvm::CallBase *CallSite, - const llvm::Function *CalleeTarget) { - std::vector> Pairs; - Pairs.reserve(CallSite->arg_size()); - // ordinary case - - unsigned Idx = 0; - for (; Idx < CallSite->arg_size() && Idx < CalleeTarget->arg_size(); ++Idx) { - // only collect pointer typed pairs - if (CallSite->getArgOperand(Idx)->getType()->isPointerTy() && - CalleeTarget->getArg(Idx)->getType()->isPointerTy()) { - Pairs.emplace_back(CallSite->getArgOperand(Idx), - CalleeTarget->getArg(Idx)); - } - } - - if (CalleeTarget->isVarArg()) { - // in case of vararg, we can pair-up incoming pointer parameters with the - // vararg pack of the callee target. the vararg pack will alias - // (intra-procedurally) with any pointer values loaded from the pack - - if (const auto *VarArgs = getVaListTagOrNull(*CalleeTarget)) { - for (; Idx < CallSite->arg_size(); ++Idx) { - if (CallSite->getArgOperand(Idx)->getType()->isPointerTy()) { - Pairs.emplace_back(CallSite->getArgOperand(Idx), VarArgs); - } - } - } - } - return Pairs; -} - std::string OTFResolver::str() const { return "OTF"; } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index f97fc8c1f1..a1d8bd5b1c 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -42,6 +43,8 @@ #include #include +using namespace psr; + std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { // deal with a virtual member function // retrieve the vtable entry that is called @@ -61,6 +64,29 @@ std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { return std::nullopt; } +std::optional> +psr::getVFTIndexAndVT(const llvm::CallBase *CallSite) { + // deal with a virtual member function + // retrieve the vtable entry that is called + const auto *Load = + llvm::dyn_cast(CallSite->getCalledOperand()); + if (Load == nullptr) { + return std::nullopt; + } + + const auto *GEP = + llvm::dyn_cast(Load->getPointerOperand()); + if (GEP == nullptr) { + return std::nullopt; + } + + if (auto *CI = llvm::dyn_cast(GEP->getOperand(1))) { + return {{GEP->getPointerOperand(), CI->getZExtValue()}}; + } + + return std::nullopt; +} + static const llvm::DIType *stripPointerTypes(const llvm::DIType *DITy) { while (const auto *DerivedTy = llvm::dyn_cast_if_present(DITy)) { @@ -155,7 +181,59 @@ bool psr::isVirtualCall(const llvm::Instruction *Inst, return getVFTIndex(CallSite) >= 0; } -namespace psr { +// Derived from LLVM's llvm::Function::hasAddressTaken() +static bool isAddressTakenImpl(const llvm::Value *F) { + if (!F) { + return false; + } + + for (const auto &Use : F->uses()) { + const auto *User = Use.getUser(); + + if (llvm::isa(User)) { + if (isAddressTakenImpl(User)) { + return true; + } + + continue; + } + + if (const auto *Glob = llvm::dyn_cast(User)) { + if (Glob->getName() == "llvm.compiler.used" || + Glob->getName() == "llvm.used") { + continue; + } + + return true; + } + + const auto *Call = llvm::dyn_cast(User); + if (!Call) { + return true; + } + + if (Call->isDebugOrPseudoInst()) { + continue; + } + + const auto *Intrinsic = llvm::dyn_cast(Call); + if (Intrinsic && Intrinsic->isAssumeLikeIntrinsic()) { + continue; + } + + if (Call->isCallee(&Use)) { + continue; + } + + return true; + } + + return false; +} + +bool psr::isAddressTakenFunction(const llvm::Function *F) { + return isAddressTakenImpl(F); +} Resolver::Resolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) : IRDB(IRDB), VTP(VTP) { @@ -183,6 +261,21 @@ auto Resolver::resolveIndirectCall(const llvm::CallBase *CallSite) return PossibleTargets; } +llvm::ArrayRef Resolver::getAddressTakenFunctions() { + if (!AddressTakenFunctions) { + auto &ATF = AddressTakenFunctions.emplace(); + // XXX: Find better heuristic + ATF.reserve(IRDB->getNumFunctions() / 2); + for (const auto *F : IRDB->getAllFunctions()) { + if (isAddressTakenFunction(F)) { + ATF.push_back(F); + } + } + } + + return *AddressTakenFunctions; +} + void Resolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) { // we may wish to optimise this function @@ -191,8 +284,8 @@ void Resolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, PHASAR_LOG_LEVEL(DEBUG, "Call function pointer: " << llvmIRToString(CallSite)); - for (const auto *F : IRDB->getAllFunctions()) { - if (F->hasAddressTaken() && isConsistentCall(CallSite, F)) { + for (const auto *F : getAddressTakenFunctions()) { + if (isConsistentCall(CallSite, F)) { PossibleTargets.insert(F); } } @@ -230,5 +323,3 @@ std::unique_ptr Resolver::create(CallGraphAnalysisType Ty, llvm_unreachable("All possible callgraph algorithms should be handled in the " "above switch"); } - -} // namespace psr