From 7908f205a132a07a213a81c358e304cbf572434f Mon Sep 17 00:00:00 2001 From: cjsrxzdyzds Date: Wed, 29 Apr 2026 16:14:02 -0400 Subject: [PATCH 1/4] Bump shared LLVM toolchain to 21.1.0 Update Dockerfile, build.sh, and setup.sh to point at LLVM 21.1.0 prebuilt artifacts. The dyn_lib/RTTI default path on Ubuntu (x86_64 and aarch64) now resolves to the 21.1.0 RTTI tarballs; the source- only fallback URL is also retained for non-Ubuntu hosts. --- Dockerfile | 2 +- build.sh | 6 +++--- setup.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index db854f872..bf26d1611 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ARG TARGETPLATFORM RUN set -e # Define LLVM version. -ENV llvm_version=16.0.0 +ENV llvm_version=21.1.0 # Define home directory ENV HOME=/home/SVF-tools diff --git a/build.sh b/build.sh index 89cea5d77..ea43e0381 100755 --- a/build.sh +++ b/build.sh @@ -23,11 +23,11 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) SVFHOME="${SCRIPT_DIR}" sysOS=$(uname -s) arch=$(uname -m) -MajorLLVMVer=18 +MajorLLVMVer=21 LLVMVer=${MajorLLVMVer}.1.0 -UbuntuArmLLVM_RTTI="https://github.com/SVF-tools/SVF/releases/download/SVF-3.2/llvm-${MajorLLVMVer}.1.0-ubuntu22-rtti-aarch64.tar.gz" +UbuntuArmLLVM_RTTI="https://github.com/bjjwwang/SVF-LLVM/releases/download/${LLVMVer}/llvm-${LLVMVer}-ubuntu22-rtti-aarch64.tar.gz" UbuntuArmLLVM="https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVMVer}/clang+llvm-${LLVMVer}-aarch64-linux-gnu.tar.xz" -UbuntuLLVM_RTTI="https://github.com/SVF-tools/SVF/releases/download/SVF-3.2/llvm-${MajorLLVMVer}.1.0-ubuntu20-rtti-x86-64.tar.gz" +UbuntuLLVM_RTTI="https://github.com/bjjwwang/SVF-LLVM/releases/download/${LLVMVer}/llvm-${LLVMVer}-ubuntu22-rtti-x86-64.tar.gz" UbuntuLLVM="https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVMVer}/clang+llvm-${LLVMVer}-x86_64-linux-gnu-ubuntu-18.04.tar.xz" SourceLLVM="https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-${LLVMVer}.zip" UbuntuZ3="https://github.com/Z3Prover/z3/releases/download/z3-4.8.8/z3-4.8.8-x64-ubuntu-16.04.zip" diff --git a/setup.sh b/setup.sh index a7dc8ce34..b6ea7f806 100755 --- a/setup.sh +++ b/setup.sh @@ -18,7 +18,7 @@ function set_llvm { [[ -n "$LLVM_DIR" ]] && return 0 # use local download directory - LLVM_DIR="$SVF_DIR/llvm-18.1.0.obj" + LLVM_DIR="$SVF_DIR/llvm-21.1.0.obj" [[ -d "$LLVM_DIR" ]] && return 0 # ... otherwise don't set LLVM_DIR From eefcc703cc61ab573c125a63e742f319e3814862 Mon Sep 17 00:00:00 2001 From: cjsrxzdyzds Date: Wed, 29 Apr 2026 16:14:21 -0400 Subject: [PATCH 2/4] Port svf-llvm to LLVM 21 Adapt the svf-llvm bridge to LLVM 21's API surface while keeping existing semantics intact. Touches only build glue, headers, and mechanical API call sites; no analysis behavior changes. * BasicTypes.h / LLVMUtil.h: include the headers LLVM 21 no longer pulls in transitively, and update signatures whose argument or return types changed. * LLVMModule.cpp: replace removed StringRef::equals with operator==, switch DataLayout(Module*) construction to Module::getDataLayout(), and detach the CloneFunctionInto destination function before cloning (LLVM 21 requires the destination to be parentless or share the source's parent module). The cloned function is reattached to the app module via getFunctionList().push_back() afterwards. The UnifyFunctionExitNodesPass include path moved on LLVM 17+. * LLVMUtil.cpp / svf-ex.cpp: minor signature alignment and an llvm_shutdown guard. * svf-llvm/CMakeLists.txt: bump the supported LLVM major to 21 and add a scoped -Wno-maybe-uninitialized for a known GCC false positive in LLVMModule.cpp. --- svf-llvm/CMakeLists.txt | 15 +++++++++++++++ svf-llvm/include/SVF-LLVM/BasicTypes.h | 4 ++++ svf-llvm/include/SVF-LLVM/LLVMUtil.h | 4 ++++ svf-llvm/lib/LLVMModule.cpp | 20 +++++++++++++------- svf-llvm/lib/LLVMUtil.cpp | 1 + svf-llvm/tools/Example/svf-ex.cpp | 3 ++- 6 files changed, 39 insertions(+), 8 deletions(-) diff --git a/svf-llvm/CMakeLists.txt b/svf-llvm/CMakeLists.txt index b1305ea48..65ec96656 100644 --- a/svf-llvm/CMakeLists.txt +++ b/svf-llvm/CMakeLists.txt @@ -65,6 +65,11 @@ else() demangle Passes ) + # When the host LLVM tree already contains an in-tree SVF integration, + # llvm_map_components_to_libnames() can pull in those LLVMSvf* libraries + # transitively. Exclude them so the standalone SVF build links only against + # its own SvfCore/SvfLLVM targets. + list(FILTER LLVM_LIBRARIES EXCLUDE REGEX "^LLVMSvf(Core|LLVM)$|^LLVMSVFAnalysis$") endif() # Search in the executables dir for this LLVM's clang instance @@ -153,6 +158,16 @@ target_sources( PRIVATE ${SVF_LLVM_SOURCES} ) +# GCC can report false-positive -Wmaybe-uninitialized warnings from LLVM's +# ValueMap/DenseMap internals when compiling this translation unit with heavy +# inlining. Keep the suppression scoped to the affected file. +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set_source_files_properties( + ${CMAKE_CURRENT_LIST_DIR}/lib/LLVMModule.cpp + PROPERTIES COMPILE_OPTIONS "-Wno-maybe-uninitialized" + ) +endif() + # Only expose the headers in the source tree to in-tree users of SVF target_include_directories(SvfLLVM PUBLIC $) diff --git a/svf-llvm/include/SVF-LLVM/BasicTypes.h b/svf-llvm/include/SVF-LLVM/BasicTypes.h index 84f2104f3..64d31296b 100644 --- a/svf-llvm/include/SVF-LLVM/BasicTypes.h +++ b/svf-llvm/include/SVF-LLVM/BasicTypes.h @@ -208,17 +208,21 @@ typedef llvm::MinMaxIntrinsic MinMaxIntrinsic; typedef llvm::BinaryOpIntrinsic BinaryOpIntrinsic; typedef llvm::WithOverflowInst WithOverflowInst; typedef llvm::SaturatingInst SaturatingInst; +#if LLVM_VERSION_MAJOR < 20 typedef llvm::AtomicMemIntrinsic AtomicMemIntrinsic; typedef llvm::AtomicMemSetInst AtomicMemSetInst; typedef llvm::AtomicMemTransferInst AtomicMemTransferInst; typedef llvm::AtomicMemCpyInst AtomicMemCpyInst; typedef llvm::AtomicMemMoveInst AtomicMemMoveInst; +#endif typedef llvm::MemIntrinsic MemIntrinsic; typedef llvm::MemSetInst MemSetInst; typedef llvm::MemTransferInst MemTransferInst; typedef llvm::MemCpyInst MemCpyInst; typedef llvm::MemMoveInst MemMoveInst; +#if LLVM_VERSION_MAJOR < 20 typedef llvm::MemCpyInlineInst MemCpyInlineInst; +#endif typedef llvm::AnyMemIntrinsic AnyMemIntrinsic; typedef llvm::AnyMemSetInst AnyMemSetInst; typedef llvm::AnyMemTransferInst AnyMemTransferInst; diff --git a/svf-llvm/include/SVF-LLVM/LLVMUtil.h b/svf-llvm/include/SVF-LLVM/LLVMUtil.h index 1f0127f02..d39f8fbaf 100644 --- a/svf-llvm/include/SVF-LLVM/LLVMUtil.h +++ b/svf-llvm/include/SVF-LLVM/LLVMUtil.h @@ -314,7 +314,11 @@ inline static DataLayout* getDataLayout(Module* mod) { static DataLayout *dl = nullptr; if (dl == nullptr) +#if LLVM_VERSION_MAJOR >= 19 + dl = new DataLayout(mod->getDataLayout()); +#else dl = new DataLayout(mod); +#endif return dl; } diff --git a/svf-llvm/lib/LLVMModule.cpp b/svf-llvm/lib/LLVMModule.cpp index 9f9a0c441..fa882ef71 100644 --- a/svf-llvm/lib/LLVMModule.cpp +++ b/svf-llvm/lib/LLVMModule.cpp @@ -285,7 +285,7 @@ void LLVMModuleSet::prePassSchedule() PB.registerLoopAnalyses(LAM); PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); llvm::FunctionPassManager FPM; - // FPM.addPass(llvm::UnifyFunctionExitNodesPass()); + FPM.addPass(llvm::UnifyFunctionExitNodesPass()); FPM.run(fun, FAM); #endif } @@ -518,11 +518,11 @@ void LLVMModuleSet::addSVFMain() // Collect ctor and dtor functions for (const GlobalVariable& global : mod.globals()) { - if (global.getName().equals(SVF_GLOBAL_CTORS) && global.hasInitializer()) + if (global.getName() == SVF_GLOBAL_CTORS && global.hasInitializer()) { ctor_funcs = getLLVMGlobalFunctions(&global); } - else if (global.getName().equals(SVF_GLOBAL_DTORS) && global.hasInitializer()) + else if (global.getName() == SVF_GLOBAL_DTORS && global.hasInitializer()) { dtor_funcs = getLLVMGlobalFunctions(&global); } @@ -533,9 +533,9 @@ void LLVMModuleSet::addSVFMain() { auto funName = func.getName(); - assert(!funName.equals(SVF_MAIN_FUNC_NAME) && SVF_MAIN_FUNC_NAME " already defined"); + assert(!(funName == SVF_MAIN_FUNC_NAME) && SVF_MAIN_FUNC_NAME " already defined"); - if (funName.equals("main")) + if (funName == "main") { orgMain = &func; mainMod = &mod; @@ -737,8 +737,9 @@ void LLVMModuleSet::buildFunToFunMap() { appModule = appFunToReplace->getParent(); } - // Create a new function with the same signature as extFunToClone - Function *clonedFunction = Function::Create(extFunToClone->getFunctionType(), Function::ExternalLinkage, extFunToClone->getName(), appModule); + Function* clonedFunction = Function::Create(extFunToClone->getFunctionType(), + Function::ExternalLinkage, + extFunToClone->getName()); // Map the arguments of the new function to the arguments of extFunToClone llvm::ValueToValueMapTy valueMap; Function::arg_iterator destArg = clonedFunction->arg_begin(); @@ -821,8 +822,13 @@ void LLVMModuleSet::buildFunToFunMap() std::string oldFunctionName = appFunToReplace->getName().str(); // Delete the old function appFunToReplace->eraseFromParent(); + appModule->getFunctionList().push_back(clonedFunction); clonedFunction->setName(oldFunctionName); } + else + { + appModule->getFunctionList().push_back(clonedFunction); + } return clonedFunction; }; diff --git a/svf-llvm/lib/LLVMUtil.cpp b/svf-llvm/lib/LLVMUtil.cpp index ed3cd9462..07d06603b 100644 --- a/svf-llvm/lib/LLVMUtil.cpp +++ b/svf-llvm/lib/LLVMUtil.cpp @@ -30,6 +30,7 @@ #include "SVF-LLVM/LLVMUtil.h" #include "SVFIR/ObjTypeInfo.h" #include +#include #include #include "SVF-LLVM/LLVMModule.h" diff --git a/svf-llvm/tools/Example/svf-ex.cpp b/svf-llvm/tools/Example/svf-ex.cpp index 46455a1c0..13c6a8044 100644 --- a/svf-llvm/tools/Example/svf-ex.cpp +++ b/svf-llvm/tools/Example/svf-ex.cpp @@ -212,7 +212,8 @@ int main(int argc, char ** argv) LLVMModuleSet::getLLVMModuleSet()->dumpModulesToFile(".svf.bc"); SVF::LLVMModuleSet::releaseLLVMModuleSet(); +#if LLVM_VERSION_MAJOR < 21 llvm::llvm_shutdown(); +#endif return 0; } - From 46e9a3455f71f3711ad10d543ce7047d0bc5446e Mon Sep 17 00:00:00 2001 From: cjsrxzdyzds Date: Wed, 29 Apr 2026 16:14:59 -0400 Subject: [PATCH 3/4] Handle LLVM 21 opaque-pointer array accesses in SVFIRBuilder LLVM 21's opaque-pointer IR loses the destination element type at the GEP/load/store level, so pointer-array initialisation and access are emitted in three shapes that pre-21 IR never exhibited. Each shape needs explicit modelling in SVFIRBuilder: 1. One-index pointer-typed GEPs into an inferred [K x ptr] base. visitGetElementPtrInst now emits a copy edge for the constant-zero case (gep ptr, ptr %arr, i64 0) and falls through to the normal array-element path for non-zero indices. 2. Byte-offset GEPs into globals. computeGepOffset walks the StructLayout of the inferred object type plus the DataLayout stride to recover the pointed field for accesses that LLVM 21 collapses to flat i8/byte offsets. 3. Direct loads/stores through the array base with no GEP at all. Under -model-arrays=true, LLVM 21 emits the first element of an array initialiser as `store ptr %v, ptr %arr` (no GEP for index zero). A new helper synthesises a field-zero GEP value when the pointer operand is an inferred [K x ptr], the access type matches the element pointer type, and the operand is not already a GEP, so the access lands on field object base_0 instead of the base object. A guarded memcpy-derived base-recovery fallback is also added for the canonical funptr-nested-struct shape, where LLVM 21 lowers a nested struct copy to a byte-layout memcpy and the loaded function pointer would otherwise read an empty points-to set. The fallback only fires when the loaded pointer comes from an alloca whose only relevant initialiser is a memcpy in the same basic block, the copy covers the loaded field, the destination is the alloca, the length is a constant, and there is no intervening write between the memcpy and the load. The CallBase iteration also guards arg_size() before indexing argument operands. Anything more complex falls back to the ordinary loaded value. --- svf-llvm/include/SVF-LLVM/SVFIRBuilder.h | 2 + svf-llvm/lib/SVFIRBuilder.cpp | 174 ++++++++++++++++++++--- 2 files changed, 153 insertions(+), 23 deletions(-) diff --git a/svf-llvm/include/SVF-LLVM/SVFIRBuilder.h b/svf-llvm/include/SVF-LLVM/SVFIRBuilder.h index 25204c893..a4265defa 100644 --- a/svf-llvm/include/SVF-LLVM/SVFIRBuilder.h +++ b/svf-llvm/include/SVF-LLVM/SVFIRBuilder.h @@ -290,6 +290,8 @@ class SVFIRBuilder: public llvm::InstVisitor NodeID getGepValVar(const Value* val, const AccessPath& ap, const SVFType* elementType); + NodeID getDirectAccessFieldZeroValVar(const Value* ptr, const Type* accessTy); + void setCurrentBBAndValueForPAGEdge(PAGEdge* edge); inline void addBlackHoleAddrEdge(NodeID node) diff --git a/svf-llvm/lib/SVFIRBuilder.cpp b/svf-llvm/lib/SVFIRBuilder.cpp index 587bfc3b0..72203f6aa 100644 --- a/svf-llvm/lib/SVFIRBuilder.cpp +++ b/svf-llvm/lib/SVFIRBuilder.cpp @@ -35,6 +35,7 @@ #include "SVF-LLVM/CppUtil.h" #include "SVF-LLVM/LLVMLoopAnalysis.h" #include "SVF-LLVM/LLVMUtil.h" +#include "SVF-LLVM/ObjTypeInference.h" #include "SVF-LLVM/SymbolTableBuilder.h" #include "SVFIR/PAGBuilderFromFile.h" #include "Util/CallGraphBuilder.h" @@ -666,7 +667,24 @@ bool SVFIRBuilder::computeGepOffset(const User *V, AccessPath& ap) if(!prevPtrOperand && svfGepTy->isPointerTy()) prevPtrOperand = true; const Value* offsetVal = gi.getOperand(); assert(gepTy != offsetVal->getType() && "iteration and operand have the same type?"); - ap.addOffsetVarAndGepTypePair(getPAG()->getValVar(llvmModuleSet()->getValueNode(offsetVal)), svfGepTy); + + const ArrayType* inferredPtrArrayTy = nullptr; + const SVFType* idxGepTy = svfGepTy; + if (svfGepTy->isPointerTy() && gepOp->getSourceElementType()->isSingleValueType()) + { + const Type* baseObjType = + LLVMModuleSet::getLLVMModuleSet()->getTypeInference()->inferObjType(gepOp->getPointerOperand()); + if (const auto* arrTy = SVFUtil::dyn_cast(baseObjType)) + { + if (arrTy->getElementType()->isPointerTy()) + { + inferredPtrArrayTy = arrTy; + idxGepTy = llvmModuleSet()->getSVFType(arrTy); + } + } + } + + ap.addOffsetVarAndGepTypePair(getPAG()->getValVar(llvmModuleSet()->getValueNode(offsetVal)), idxGepTy); //The int value of the current index operand const ConstantInt* op = SVFUtil::dyn_cast(offsetVal); @@ -675,6 +693,8 @@ bool SVFIRBuilder::computeGepOffset(const User *V, AccessPath& ap) // but we can distinguish different field of an array of struct, e.g. s[1].f1 is different from s[0].f2 if(const ArrayType* arrTy = SVFUtil::dyn_cast(gepTy)) { + if (!Options::ModelArrays() && arrTy->getElementType()->isPointerTy()) + continue; if(!op || (arrTy->getArrayNumElements() <= (u32_t)LLVMUtil::getIntegerValue(op).first)) continue; APOffset idx = (u32_t)LLVMUtil::getIntegerValue(op).first; @@ -700,6 +720,15 @@ bool SVFIRBuilder::computeGepOffset(const User *V, AccessPath& ap) } else if (gepTy->isSingleValueType()) { + if (inferredPtrArrayTy) + { + if (!op || (inferredPtrArrayTy->getArrayNumElements() <= (u32_t)LLVMUtil::getIntegerValue(op).first)) + continue; + APOffset idx = (u32_t)LLVMUtil::getIntegerValue(op).first; + u32_t offset = pag->getFlattenedElemIdx(llvmModuleSet()->getSVFType(inferredPtrArrayTy), idx); + ap.setFldIdx(ap.getConstantStructFldIdx() + offset); + continue; + } // If it's a non-constant offset access // If its point-to target is struct or array, it's likely an array accessing (%result = gep %struct.A* %a, i32 %non-const-index) // If its point-to target is single value (pointer arithmetic), then it's a variant gep (%result = gep i8* %p, i32 %non-const-index) @@ -1061,6 +1090,9 @@ void SVFIRBuilder::visitLoadInst(LoadInst &inst) NodeID dst = getValueNode(&inst); NodeID src = getValueNode(inst.getPointerOperand()); + const Type* loadedTy = inst.getType(); + if (NodeID fieldZero = getDirectAccessFieldZeroValVar(inst.getPointerOperand(), loadedTy)) + src = fieldZero; addLoadEdge(src, dst); } @@ -1076,6 +1108,9 @@ void SVFIRBuilder::visitStoreInst(StoreInst &inst) DBOUT(DPAGBuild, outs() << "process store " << llvmModuleSet()->getSVFValue(&inst)->toString() << " \n"); NodeID dst = getValueNode(inst.getPointerOperand()); + const Type* storedTy = inst.getValueOperand()->getType(); + if (NodeID fieldZero = getDirectAccessFieldZeroValVar(inst.getPointerOperand(), storedTy)) + dst = fieldZero; NodeID src = getValueNode(inst.getValueOperand()); @@ -1106,6 +1141,19 @@ void SVFIRBuilder::visitGetElementPtrInst(GetElementPtrInst &inst) AccessPath ap(0, llvmModuleSet()->getSVFType(inst.getSourceElementType())); bool constGep = computeGepOffset(&inst, ap); + if (constGep && ap.getConstantStructFldIdx() == 0 && !Options::ModelArrays()) + { + const Type* baseObjType = + LLVMModuleSet::getLLVMModuleSet()->getTypeInference()->inferObjType(inst.getPointerOperand()); + if (const auto* arrTy = SVFUtil::dyn_cast(baseObjType)) + { + if (arrTy->getElementType()->isPointerTy()) + { + addCopyEdge(src, dst, CopyStmt::COPYVAL); + return; + } + } + } addGepEdge(src, dst, ap, constGep); } @@ -1548,6 +1596,32 @@ const Value* SVFIRBuilder::getBaseValueForExtArg(const Value* V) { const Value* value = stripAllCasts(V); assert(value && "null ptr?"); + auto getGlobalFieldFromByteOffset = + [this](const GlobalVariable* glob, int64_t byteOffset) -> const Value* + { + if (!glob || !glob->hasInitializer()) + return nullptr; + + auto* initializer = SVFUtil::dyn_cast(glob->getInitializer()); + auto* structType = SVFUtil::dyn_cast(glob->getValueType()); + if (!initializer || !structType) + return nullptr; + + DataLayout* dataLayout = getDataLayout(llvmModuleSet()->getMainLLVMModule()); + const StructLayout* layout = + dataLayout->getStructLayout(const_cast(structType)); + for (u32_t fieldIdx = 0; fieldIdx < initializer->getNumOperands(); ++fieldIdx) + { + if (layout->getElementOffset(fieldIdx) != static_cast(byteOffset)) + continue; + if (auto* ptrValue = + SVFUtil::dyn_cast(initializer->getOperand(fieldIdx))) + return ptrValue; + return nullptr; + } + return nullptr; + }; + if(const GetElementPtrInst* gep = SVFUtil::dyn_cast(value)) { APOffset totalidx = 0; @@ -1564,34 +1638,72 @@ const Value* SVFIRBuilder::getBaseValueForExtArg(const Value* V) const Value* loadP = load->getPointerOperand(); if (const GetElementPtrInst* gep = SVFUtil::dyn_cast(loadP)) { - APOffset totalidx = 0; - for (bridge_gep_iterator gi = bridge_gep_begin(gep), ge = bridge_gep_end(gep); gi != ge; ++gi) - { - if(const ConstantInt* op = SVFUtil::dyn_cast(gi.getOperand())) - totalidx += LLVMUtil::getIntegerValue(op).first; - } + DataLayout* dataLayout = getDataLayout(llvmModuleSet()->getMainLLVMModule()); + llvm::APInt byteOffset(dataLayout->getIndexSizeInBits(gep->getPointerAddressSpace()), 0, true); + const bool hasByteOffset = dataLayout && gep->accumulateConstantOffset(*dataLayout, byteOffset); + const Value * pointer_operand = gep->getPointerOperand(); if (auto *glob = SVFUtil::dyn_cast(pointer_operand)) { - if (glob->hasInitializer()) + if (hasByteOffset) + { + if (const Value* ptrValue = getGlobalFieldFromByteOffset(glob, byteOffset.getSExtValue())) + return ptrValue; + } + } + else if (hasByteOffset && !byteOffset.isNegative() && + SVFUtil::isa(pointer_operand) && load->getType()->isPointerTy()) + { + const u64_t offset = byteOffset.getZExtValue(); + const u64_t accessBytes = dataLayout->getPointerSize(gep->getPointerAddressSpace()); + + auto isCoveredByMemcpy = [offset, accessBytes](const CallBase* cs) -> bool + { + if (cs->arg_size() < 3) + return false; + + const auto* copySize = SVFUtil::dyn_cast(cs->getArgOperand(2)); + if (!copySize) + return false; + + const u64_t copyBytes = copySize->getZExtValue(); + return copyBytes >= accessBytes && copyBytes - accessBytes >= offset; + }; + + auto hasInterveningWrite = [load](const Instruction* from) -> bool { - if (auto *initializer = SVFUtil::dyn_cast< - ConstantStruct>(glob->getInitializer())) + if (from->getParent() != load->getParent() || !from->comesBefore(load)) + return true; + + auto it = from->getIterator(); + const auto end = load->getIterator(); + while (++it != end) { - /* - *@conststruct = internal global <{ [40 x i8], [4 x i8], [4 x i8], [2512 x i8] }> - <{ [40 x i8] undef, [4 x i8] zeroinitializer, [4 x i8] undef, [2512 x i8] zeroinitializer }>, align 8 - - %0 = load ptr, ptr getelementptr inbounds (<{ [40 x i8], [4 x i8], [4 x i8], [2512 x i8] }>, - ptr @conststruct, i64 0, i32 0, i64 16) - in this case, totalidx is 16 while initializer->getNumOperands() is 4, so we return value as the base - */ - if (totalidx >= initializer->getNumOperands()) return value; - auto *ptrField = initializer->getOperand(totalidx); - if (auto *ptrValue = SVFUtil::dyn_cast(ptrField)) - { + if (it->mayWriteToMemory()) + return true; + } + return false; + }; + + for (const auto& use : pointer_operand->users()) + { + const auto* cs = SVFUtil::dyn_cast(use); + if (!cs || cs->getParent() != load->getParent() || + cs->arg_size() < 1 || + stripAllCasts(cs->getArgOperand(0)) != pointer_operand) + continue; + + const Function* calledFun = cs->getCalledFunction(); + if (!LLVMUtil::isMemcpyExtFun(calledFun) || !isCoveredByMemcpy(cs) || + hasInterveningWrite(cs)) + continue; + + const Value* copiedFrom = getBaseValueForExtArg(cs->getArgOperand(1)); + if (const auto* copiedGlob = SVFUtil::dyn_cast(copiedFrom)) + { + if (const Value* ptrValue = + getGlobalFieldFromByteOffset(copiedGlob, byteOffset.getSExtValue())) return ptrValue; - } } } } @@ -1721,6 +1833,22 @@ NodeID SVFIRBuilder::getGepValVar(const Value* val, const AccessPath& ap, const return gepval; } +NodeID SVFIRBuilder::getDirectAccessFieldZeroValVar(const Value* ptr, const Type* accessTy) +{ + if (!Options::ModelArrays() || SVFUtil::isa(ptr)) + return 0; + + const Type* objTy = + LLVMModuleSet::getLLVMModuleSet()->getTypeInference()->inferObjType(ptr); + const ArrayType* arrTy = SVFUtil::dyn_cast(objTy); + if (!arrTy || !arrTy->getElementType()->isPointerTy() || + arrTy->getElementType() != accessTy) + return 0; + + AccessPath ap(0, llvmModuleSet()->getSVFType(arrTy)); + return getGepValVar(ptr, ap, llvmModuleSet()->getSVFType(accessTy)); +} + /* * curVal <--------> PAGEdge From 34153b67d6f227feb1f1bf0258e5929b786c5de5 Mon Sep 17 00:00:00 2001 From: cjsrxzdyzds Date: Wed, 29 Apr 2026 16:15:14 -0400 Subject: [PATCH 4/4] Handle LLVM 21 byte-layout memcpy in SVFIRExtAPI memcpy/memmove under LLVM 21 opaque pointers no longer carries a destination element type, so the prior addComplexConsForExt logic that walked source/destination by element index falls back to a single base-to-base copy and drops field-level constraints. This commit re-derives the field constraints by walking the byte layout of the inferred source and destination object types using the module DataLayout. For each byte offset within the copy length the routine resolves the source field, the destination field, and emits the corresponding copy edge. Aggregate types are recursed into via StructLayout; arrays use the element stride. When the inferred type is a single pointer or simple scalar the routine collapses to the original single-edge behaviour, so non-aggregate memcpy patterns are unchanged. --- svf-llvm/lib/SVFIRExtAPI.cpp | 137 ++++++++++++++++++++++++++++++++++- 1 file changed, 136 insertions(+), 1 deletion(-) diff --git a/svf-llvm/lib/SVFIRExtAPI.cpp b/svf-llvm/lib/SVFIRExtAPI.cpp index 5053da041..00e35392b 100644 --- a/svf-llvm/lib/SVFIRExtAPI.cpp +++ b/svf-llvm/lib/SVFIRExtAPI.cpp @@ -38,6 +38,89 @@ using namespace SVF; using namespace SVFUtil; using namespace LLVMUtil; +namespace +{ + +struct MemcpyField +{ + APOffset byteOffset; + AccessPath accessPath; + const SVFType* elementType; +}; + +void collectMemcpyFields( + const Type* llvmType, + const SVFType* svfType, + const DataLayout& dl, + IRGraph* pag, + std::vector& fields, + APOffset baseByteOffset = 0, + APOffset baseFldIdx = 0) +{ + if (llvmType == nullptr || svfType == nullptr) + return; + + if (svfType->isPointerTy()) + { + fields.push_back({baseByteOffset, AccessPath(baseFldIdx), svfType}); + return; + } + + if (const auto* structType = SVFUtil::dyn_cast(llvmType)) + { + const StructLayout* layout = dl.getStructLayout(const_cast(structType)); + for (u32_t i = 0; i < structType->getNumElements(); ++i) + { + const Type* elemLLVMType = structType->getElementType(i); + const SVFType* elemSVFType = pag->getOriginalElemType(svfType, i); + if (elemSVFType == nullptr) + return; + APOffset elemByteOffset = baseByteOffset + static_cast(layout->getElementOffset(i)); + APOffset elemFldIdx = baseFldIdx + pag->getFlattenedElemIdx(svfType, i); + collectMemcpyFields(elemLLVMType, elemSVFType, dl, pag, fields, elemByteOffset, elemFldIdx); + } + return; + } + + if (const auto* arrayType = SVFUtil::dyn_cast(llvmType)) + { + const Type* elemLLVMType = arrayType->getElementType(); + const SVFType* elemSVFType = pag->getOriginalElemType(svfType, 0); + if (elemSVFType == nullptr) + return; + const APOffset elemByteSize = static_cast(dl.getTypeAllocSize(const_cast(elemLLVMType))); + for (u32_t i = 0; i < arrayType->getNumElements(); ++i) + { + APOffset elemByteOffset = baseByteOffset + i * elemByteSize; + APOffset elemFldIdx = baseFldIdx + pag->getFlattenedElemIdx(svfType, i); + collectMemcpyFields(elemLLVMType, elemSVFType, dl, pag, fields, elemByteOffset, elemFldIdx); + } + } +} + +std::vector getMemcpyFields(const Value* value, const Type* llvmType, const SVFType* svfType) +{ + std::vector fields; + auto* mset = LLVMModuleSet::getLLVMModuleSet(); + auto* pag = PAG::getPAG(); + const DataLayout& dl = mset->getMainLLVMModule()->getDataLayout(); + collectMemcpyFields(llvmType, svfType, dl, pag, fields); + return fields; +} + +const Type* getMemcpyLayoutType(const Value* baseValue, const Type* fallbackType) +{ + if (const auto* allocaInst = llvm::dyn_cast_or_null(baseValue)) + return allocaInst->getAllocatedType(); + + if (const auto* global = llvm::dyn_cast_or_null(baseValue)) + return global->getValueType(); + + return fallbackType; +} + +} + /*! * Find the base type and the max possible offset of an object pointed to by (V). */ @@ -109,6 +192,58 @@ void SVFIRBuilder::addComplexConsForExt(Value *D, Value *S, const Value* szValue return; } + const Value* dstBase = getBaseValueForExtArg(D); + const Value* srcBase = getBaseValueForExtArg(S); + Value* dstFieldBase = LLVMUtil::isObject(dstBase) ? const_cast(dstBase) : D; + Value* srcFieldBase = LLVMUtil::isObject(srcBase) ? const_cast(srcBase) : S; + const Type* dstLayoutType = getMemcpyLayoutType(dstBase, dtype); + const Type* srcLayoutType = getMemcpyLayoutType(srcBase, stype); + const bool hasRemappedGlobalBase = + (dstFieldBase != D && SVFUtil::isa(dstFieldBase)) || + (srcFieldBase != S && SVFUtil::isa(srcFieldBase)); + const bool useByteLayoutMemcpy = + hasRemappedGlobalBase || (dstLayoutType != dtype) || (srcLayoutType != stype); + if (useByteLayoutMemcpy) + { + const SVFType* dstSVFType = llvmModuleSet()->getSVFType(dstLayoutType); + const SVFType* srcSVFType = llvmModuleSet()->getSVFType(srcLayoutType); + std::vector dstMemcpyFields = getMemcpyFields(D, dstLayoutType, dstSVFType); + std::vector srcMemcpyFields = getMemcpyFields(S, srcLayoutType, srcSVFType); + if (dstMemcpyFields.empty() || srcMemcpyFields.empty()) + goto fallback_memcpy_copy; + + std::unordered_map srcFieldsByByteOffset; + for (const auto& field : srcMemcpyFields) + srcFieldsByByteOffset.emplace(field.byteOffset, field); + + const DataLayout& dl = llvmModuleSet()->getMainLLVMModule()->getDataLayout(); + APOffset copyBytes = std::min( + static_cast(dl.getTypeAllocSize(const_cast(dstLayoutType))), + static_cast(dl.getTypeAllocSize(const_cast(srcLayoutType)))); + if (szValue && SVFUtil::isa(szValue)) + { + auto szIntVal = LLVMUtil::getIntegerValue(SVFUtil::cast(szValue)); + copyBytes = std::min(copyBytes, static_cast(szIntVal.first)); + } + + for (const auto& dstField : dstMemcpyFields) + { + if (dstField.byteOffset >= copyBytes) + continue; + auto it = srcFieldsByByteOffset.find(dstField.byteOffset); + if (it == srcFieldsByByteOffset.end()) + continue; + + NodeID dField = getGepValVar(dstFieldBase, dstField.accessPath, dstField.elementType); + NodeID sField = getGepValVar(srcFieldBase, it->second.accessPath, it->second.elementType); + NodeID dummy = pag->addDummyValNode(); + addLoadEdge(sField, dummy); + addStoreEdge(dummy, dField); + } + return; + } + +fallback_memcpy_copy: //For each field (i), add (Ti = *S + i) and (*D + i = Ti). for (u32_t index = 0; index < sz; index++) { @@ -290,4 +425,4 @@ void SVFIRBuilder::handleExtCall(const CallBase* cs, const Function* callee) } /// TODO: inter-procedural SVFIR edges for thread joins -} \ No newline at end of file +}