From 51543e86ec5e533c4b2eca463f7d023ad0714bde Mon Sep 17 00:00:00 2001 From: bjjwwang Date: Thu, 30 Apr 2026 17:31:58 +1000 Subject: [PATCH] Sync pybind with upstream Semi-Sparse refactor + LLVM 21.1.0 Upstream commit 0aa951d (Semi-Sparse infrastructure) moved a group of methods off AbstractState onto the new AbstractStateManager. The pybind bindings still pointed at the deleted methods, so the wheel no longer compiled against current SVF. pybind/AE.cpp: - Drop 9 stale AbstractState bindings: getByteOffset, getElementIndex, loadValue, storeValue, getPointeeElement, getGepObjAddrs (twice), getAllocaInstByteSize. - Add py::class_ with 18 methods: getAbstractValue, hasAbstractValue, updateAbstractValue (each with ValVar/ObjVar/SVFVar overloads), getAbstractState, hasAbstractState, updateAbstractState, getGepElementIndex, getGepByteOffset, getGepObjAddrs, loadValue, storeValue, getPointeeElement, getAllocaInstByteSize, getTrace, __getitem__, getUseSitesOfObjVar / ValVar, getDefSiteOfObjVar / ValVar. Constructor takes (SVFIR*, std::shared_ptr). - Add minimal py::class_ exposing getStateMgr() so Python users can grab a stateMgr from a running analysis. - Add py::class_ with the max_field_limit() static (used by downstream Assignment-3 helpers when porting getByteOffset). pybind/SVFIR.cpp: - Add GepStmt::getStructFieldOffset(idx_var, struct_type) lambda binding, needed to faithfully port AbstractStateManager::getGepByteOffset to Python without losing the struct-field path. pysvf/__init__.py: re-export AbstractStateManager, AbstractInterpretation, Options from .pysvf so they show up at the package top level. setup.py + workflows: bump LLVM 18.1.0 -> 21.1.0 paths everywhere (setup.py also had stale llvm-16.0.0.obj remnants; consolidated to llvm-21.1.0.obj). The npm svf-lib URL must point at LLVM 21 too -- SVF-npm sync-llvm-21 covers that. BREAKING (release notes worthy): callers of pysvf.AbstractState that relied on as.loadValue / as.storeValue / as.getByteOffset / etc. must migrate to a stateMgr. New signatures take ValVar*/ObjVar* + ICFGNode* instead of NodeID. See SSA Assignment-3 sync-llvm-21 for an example migration that reproduces the dense-mode behavior locally. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/jupyter.yml | 2 +- .github/workflows/release.yml | 4 +- pybind/AE.cpp | 108 +++++++++++++++++++++++++++++----- pybind/SVFIR.cpp | 8 ++- pysvf/__init__.py | 5 +- setup.py | 8 +-- 6 files changed, 111 insertions(+), 24 deletions(-) diff --git a/.github/workflows/jupyter.yml b/.github/workflows/jupyter.yml index 9bd47e8..28ba306 100644 --- a/.github/workflows/jupyter.yml +++ b/.github/workflows/jupyter.yml @@ -38,7 +38,7 @@ jobs: run: | npm install svf-lib echo "SVF_DIR=$PWD/node_modules/svf-lib/SVF-linux-x86_64" >> $GITHUB_ENV - echo "LLVM_DIR=$PWD/node_modules/llvm-18.1.0.obj" >> $GITHUB_ENV + echo "LLVM_DIR=$PWD/node_modules/llvm-21.1.0.obj" >> $GITHUB_ENV echo "Z3_DIR=$PWD/node_modules/z3.obj" >> $GITHUB_ENV - name: Get system architecture diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 20a3ae8..7ed6b83 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -72,7 +72,7 @@ jobs: run: | npm install svf-lib echo "SVF_DIR=$PWD/node_modules/svf-lib/SVF-linux-x86_64" >> $GITHUB_ENV - echo "LLVM_DIR=$PWD/node_modules/llvm-18.1.0.obj" >> $GITHUB_ENV + echo "LLVM_DIR=$PWD/node_modules/llvm-21.1.0.obj" >> $GITHUB_ENV echo "Z3_DIR=$PWD/node_modules/z3.obj" >> $GITHUB_ENV - name: Set up Python @@ -154,7 +154,7 @@ jobs: elif [[ "$RUNNER_OS" == "macOS" ]]; then echo "SVF_DIR=$PWD/node_modules/svf-lib/SVF-osx" >> $GITHUB_ENV fi - echo "LLVM_DIR=$PWD/node_modules/llvm-18.1.0.obj" >> $GITHUB_ENV + echo "LLVM_DIR=$PWD/node_modules/llvm-21.1.0.obj" >> $GITHUB_ENV echo "Z3_DIR=$PWD/node_modules/z3.obj" >> $GITHUB_ENV - name: Set up Python diff --git a/pybind/AE.cpp b/pybind/AE.cpp index 664a59a..71b0f1c 100644 --- a/pybind/AE.cpp +++ b/pybind/AE.cpp @@ -8,6 +8,8 @@ #include "MemoryModel/PointerAnalysis.h" #include "WPA/Andersen.h" #include "AE/Core/AbstractState.h" +#include "AE/Svfexe/AbstractStateManager.h" +#include "AE/Svfexe/AbstractInterpretation.h" #include @@ -315,12 +317,6 @@ void bind_abstract_state(py::module& m) { return self.load(addr); }, py::arg("addr"), py::return_value_policy::reference) - .def("storeValue", &AbstractState::storeValue, - py::arg("varId"), py::arg("val")) - - .def("loadValue", &AbstractState::loadValue, - py::arg("varId"), py::return_value_policy::reference) - // Equality comparison .def("equals", &AbstractState::equals, py::arg("other")) @@ -340,8 +336,6 @@ void bind_abstract_state(py::module& m) { .def("narrowing", &AbstractState::narrowing, py::arg("other")) .def("getIDFromAddr", &AbstractState::getIDFromAddr, py::arg("addr")) - .def("getAllocaInstByteSize", &AbstractState::getAllocaInstByteSize, py::arg("addr")) - // Static utilities for address handling .def_static("isVirtualMemAddress", &AbstractState::isVirtualMemAddress, py::arg("val")) .def_static("getVirtualMemAddress", &AbstractState::getVirtualMemAddress, py::arg("idx")) @@ -385,12 +379,6 @@ void bind_abstract_state(py::module& m) { }, py::return_value_policy::move) .def("bottom", &AbstractState::bottom) .def("top", &AbstractState::top) - .def("getGepObjAddrs", &AbstractState::getGepObjAddrs, py::arg("ptr"), py::arg("offset")) - .def("getElementIndex", &AbstractState::getElementIndex, py::arg("gep")) - .def("getByteOffset", &AbstractState::getByteOffset, py::arg("gep")) - .def("loadValue", &AbstractState::loadValue, py::arg("var_id")) - .def("storeValue", &AbstractState::storeValue, py::arg("var_id"), py::arg("val")) - .def("getPointeeElement", &AbstractState::getPointeeElement, py::arg("var_id"), py::return_value_policy::reference) .def("inVarToValTable", &AbstractState::inVarToValTable, py::arg("var_id")) .def("inVarToAddrsTable", &AbstractState::inVarToAddrsTable, py::arg("var_id")) .def("inAddrToAddrsTable", &AbstractState::inAddrToAddrsTable, py::arg("id")) @@ -398,7 +386,6 @@ void bind_abstract_state(py::module& m) { .def("addToFreedAddrs", &AbstractState::addToFreedAddrs, py::arg("addr"), "Add an address to the freed addresses set") .def("isFreedMem", &AbstractState::isFreedMem, py::arg("addr"), "Check if an address is freed memory") .def("hash", &AbstractState::hash, "Get the hash of this abstract state") - .def("getGepObjAddrs", &AbstractState::getGepObjAddrs, py::arg("var_id"), py::arg("offset")) .def_static("isCmpBranchFeasible", [](SVFIR* svfir, const CmpStmt* cmpStmt, s64_t succ, AbstractState& as) { Map _reverse_predicate = { {CmpStmt::Predicate::FCMP_OEQ, CmpStmt::Predicate::FCMP_ONE}, // == -> != @@ -691,5 +678,96 @@ void bind_abstract_state(py::module& m) { as = new_es; return true; }, py::arg("svfir"), py::arg("var"), py::arg("succ"), py::arg("as")); + + // --------------------------------------------------------------- + // AbstractStateManager — owns the per-ICFGNode AbstractState trace + // and provides sparsity-aware getters that used to live on + // AbstractState (loadValue, storeValue, getGepByteOffset, etc.). + // --------------------------------------------------------------- + py::class_(m, "AbstractStateManager") + .def(py::init([](SVFIR* svfir, std::shared_ptr pta) { + return new AbstractStateManager(svfir, pta.get()); + }), py::arg("svfir"), py::arg("pta")) + + // Abstract value access (sparsity-aware) + .def("getAbstractValue", + py::overload_cast(&AbstractStateManager::getAbstractValue), + py::arg("var"), py::arg("node"), py::return_value_policy::reference) + .def("getAbstractValue", + py::overload_cast(&AbstractStateManager::getAbstractValue), + py::arg("var"), py::arg("node"), py::return_value_policy::reference) + .def("getAbstractValue", + py::overload_cast(&AbstractStateManager::getAbstractValue), + py::arg("var"), py::arg("node"), py::return_value_policy::reference) + + .def("hasAbstractValue", + py::overload_cast(&AbstractStateManager::hasAbstractValue, py::const_), + py::arg("var"), py::arg("node")) + .def("hasAbstractValue", + py::overload_cast(&AbstractStateManager::hasAbstractValue, py::const_), + py::arg("var"), py::arg("node")) + .def("hasAbstractValue", + py::overload_cast(&AbstractStateManager::hasAbstractValue, py::const_), + py::arg("var"), py::arg("node")) + + .def("updateAbstractValue", + py::overload_cast(&AbstractStateManager::updateAbstractValue), + py::arg("var"), py::arg("val"), py::arg("node")) + .def("updateAbstractValue", + py::overload_cast(&AbstractStateManager::updateAbstractValue), + py::arg("var"), py::arg("val"), py::arg("node")) + .def("updateAbstractValue", + py::overload_cast(&AbstractStateManager::updateAbstractValue), + py::arg("var"), py::arg("val"), py::arg("node")) + + // State access + .def("getAbstractState", + py::overload_cast(&AbstractStateManager::getAbstractState), + py::arg("node"), py::return_value_policy::reference) + .def("hasAbstractState", &AbstractStateManager::hasAbstractState, py::arg("node")) + .def("updateAbstractState", &AbstractStateManager::updateAbstractState, + py::arg("node"), py::arg("state")) + + // GEP helpers (the ones that used to be AbstractState::getByteOffset etc.) + .def("getGepElementIndex", &AbstractStateManager::getGepElementIndex, py::arg("gep")) + .def("getGepByteOffset", &AbstractStateManager::getGepByteOffset, py::arg("gep")) + .def("getGepObjAddrs", &AbstractStateManager::getGepObjAddrs, + py::arg("pointer"), py::arg("offset")) + + // Load / store through pointer (require ValVar + ICFGNode now) + .def("loadValue", &AbstractStateManager::loadValue, + py::arg("pointer"), py::arg("node")) + .def("storeValue", &AbstractStateManager::storeValue, + py::arg("pointer"), py::arg("val"), py::arg("node")) + + // Type / size helpers + .def("getPointeeElement", &AbstractStateManager::getPointeeElement, + py::arg("var"), py::arg("node"), py::return_value_policy::reference) + .def("getAllocaInstByteSize", &AbstractStateManager::getAllocaInstByteSize, py::arg("addr")) + + // Direct trace access + .def("getTrace", &AbstractStateManager::getTrace, py::return_value_policy::reference) + .def("__getitem__", [](AbstractStateManager& self, const ICFGNode* node) -> AbstractState& { + return self[node]; + }, py::arg("node"), py::return_value_policy::reference) + + // Def/Use site queries + .def("getUseSitesOfObjVar", &AbstractStateManager::getUseSitesOfObjVar, + py::arg("obj"), py::arg("node")) + .def("getUseSitesOfValVar", &AbstractStateManager::getUseSitesOfValVar, py::arg("var")) + .def("getDefSiteOfValVar", &AbstractStateManager::getDefSiteOfValVar, + py::arg("var"), py::return_value_policy::reference) + .def("getDefSiteOfObjVar", &AbstractStateManager::getDefSiteOfObjVar, + py::arg("obj"), py::arg("node"), py::return_value_policy::reference); + + // Minimal AbstractInterpretation binding so users can grab a stateMgr + // from a running analysis. + py::class_(m, "AbstractInterpretation") + .def("getStateMgr", &AbstractInterpretation::getStateMgr, + py::return_value_policy::reference); + + // Expose the few Options statics that downstream Python code relies on. + py::class_(m, "Options") + .def_static("max_field_limit", []() { return Options::MaxFieldLimit(); }); } diff --git a/pybind/SVFIR.cpp b/pybind/SVFIR.cpp index 84bf9af..78f7ef5 100644 --- a/pybind/SVFIR.cpp +++ b/pybind/SVFIR.cpp @@ -136,7 +136,13 @@ void bind_svf_stmt(py::module& m) { .def("getOffsetVarAndGepTypePairVec", &GepStmt::getOffsetVarAndGepTypePairVec, py::return_value_policy::reference, "Get the offset variable and GEP type pair vector of the GEP statement") .def("getSrcPointeeType", [](GepStmt& stmt) { return stmt.getAccessPath().gepSrcPointeeType(); }, - py::return_value_policy::reference); + py::return_value_policy::reference) + .def("getStructFieldOffset", + [](const GepStmt& stmt, const ValVar* idxVar, const SVFStructType* st) { + return stmt.getAccessPath().getStructFieldOffset(idxVar, st); + }, + py::arg("idx_var"), py::arg("struct_type"), + "Compute the byte offset for a struct field index"); py::class_(m, "MultiOpndStmt") .def("getOpVar", [](MultiOpndStmt& stmt, int ID) { return stmt.getOpVar(ID); }, diff --git a/pysvf/__init__.py b/pysvf/__init__.py index b6c0743..d44a0e2 100644 --- a/pysvf/__init__.py +++ b/pysvf/__init__.py @@ -9,7 +9,7 @@ EXTAPI_BC_PATH = os.path.join(CURRENT_DIR, "SVF/Release-build/lib/", "extapi.bc") SVF_DIR = os.path.join(CURRENT_DIR, "SVF") Z3_DIR = os.path.join(CURRENT_DIR, "SVF/z3.obj") -LLVM_DIR = os.path.join(CURRENT_DIR, "SVF/llvm-16.0.0.obj") +LLVM_DIR = os.path.join(CURRENT_DIR, "SVF/llvm-21.1.0.obj") BIN_DIR = os.path.join(SVF_DIR, "Release-build", "bin") # Set environment variables @@ -229,8 +229,11 @@ def main(): IntervalValue, AddressValue, AbstractState, + AbstractStateManager, + AbstractInterpretation, AbstractValue, BoundedInt, + Options, MTA, MHP, LockAnalysis, diff --git a/setup.py b/setup.py index 6fb9726..e198c42 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ def run(self): if platform.system() == "Linux": subprocess.run(["patchelf", "--add-needed", "$ORIGIN/SVF/z3.obj/bin/libz3.so", so_target], check=True) if os.path.exists(os.path.join(os.environ["LLVM_DIR"], "lib", "libLLVM.so")): - subprocess.run(["patchelf", "--add-needed", "$ORIGIN/SVF/llvm-16.0.0.obj/lib/libLLVM.so", so_target], check=True) + subprocess.run(["patchelf", "--add-needed", "$ORIGIN/SVF/llvm-21.1.0.obj/lib/libLLVM.so", so_target], check=True) @@ -138,11 +138,11 @@ def run(self): shutil.copytree(os.path.join(os.environ["Z3_DIR"], "lib"), os.path.join(self.build_lib, "pysvf", "SVF", "z3.obj", "lib"),dirs_exist_ok=True) # if exist $LLVM_DIR/lib/libLLVM.so or libLLVM.dylib - os.makedirs(os.path.join(self.build_lib, "pysvf", "SVF", "llvm-16.0.0.obj", "lib"), exist_ok=True) + os.makedirs(os.path.join(self.build_lib, "pysvf", "SVF", "llvm-21.1.0.obj", "lib"), exist_ok=True) if os.path.exists(os.path.join(os.environ["LLVM_DIR"], "lib", "libLLVM.so")): - shutil.copyfile(os.path.join(os.environ["LLVM_DIR"], "lib", "libLLVM.so"), os.path.join(self.build_lib, "pysvf", "SVF", "llvm-16.0.0.obj", "lib", "libLLVM.so")) + shutil.copyfile(os.path.join(os.environ["LLVM_DIR"], "lib", "libLLVM.so"), os.path.join(self.build_lib, "pysvf", "SVF", "llvm-21.1.0.obj", "lib", "libLLVM.so")) if os.path.exists(os.path.join(os.environ["LLVM_DIR"], "lib", "libLLVM.dylib")): - shutil.copyfile(os.path.join(os.environ["LLVM_DIR"], "lib", "libLLVM.dylib"), os.path.join(self.build_lib, "pysvf", "SVF", "llvm-16.0.0.obj", "lib", "libLLVM.dylib")) + shutil.copyfile(os.path.join(os.environ["LLVM_DIR"], "lib", "libLLVM.dylib"), os.path.join(self.build_lib, "pysvf", "SVF", "llvm-21.1.0.obj", "lib", "libLLVM.dylib")) setup(