diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7602d53..d4304ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,7 +43,7 @@ jobs: - name: build run: | export SVF_DIR=$(npm root)/SVF - export LLVM_DIR=$(npm root)/llvm-18.1.0.obj + export LLVM_DIR=$(npm root)/llvm-21.1.0.obj export Z3_DIR=$(npm root)/z3.obj echo "SVF_DIR="$SVF_DIR echo "LLVM_DIR="$LLVM_DIR diff --git a/Assignment-3/CPP/Assignment_3.cpp b/Assignment-3/CPP/Assignment_3.cpp index 55417d2..fe8ccd1 100644 --- a/Assignment-3/CPP/Assignment_3.cpp +++ b/Assignment-3/CPP/Assignment_3.cpp @@ -76,7 +76,7 @@ void AbstractExecution::bufOverflowDetection(const SVF::SVFStmt* stmt) { AbstractState& as = getAbsStateFromTrace(gep->getICFGNode()); NodeID lhs = gep->getLHSVarID(); NodeID rhs = gep->getRHSVarID(); - updateGepObjOffsetFromBase(as, as[lhs].getAddrs(), as[rhs].getAddrs(), as.getByteOffset(gep)); + updateGepObjOffsetFromBase(as, as[lhs].getAddrs(), as[rhs].getAddrs(), svfStateMgr->getGepByteOffset(gep)); /// TODO: your code starts from here @@ -264,9 +264,9 @@ void AbstractExecution::updateStateOnCall(const CallPE* callPE) { { NodeID curId = callPE->getOpVarID(i); const ICFGNode* opICFGNode = callPE->getOpCallICFGNode(i); - if (postAbsTrace.count(opICFGNode)) + if (postAbsTrace().count(opICFGNode)) { - AbstractState& opAs = postAbsTrace[opICFGNode]; + AbstractState& opAs = postAbsTrace()[opICFGNode]; rhs.join_with(opAs[curId]); } } diff --git a/Assignment-3/CPP/Assignment_3.h b/Assignment-3/CPP/Assignment_3.h index 60f5702..da75f38 100644 --- a/Assignment-3/CPP/Assignment_3.h +++ b/Assignment-3/CPP/Assignment_3.h @@ -26,6 +26,7 @@ */ #include "Assignment_3_Helper.h" #include "AE/Svfexe/AbsExtAPI.h" +#include "AE/Svfexe/AbstractStateManager.h" #include "SVFIR/SVFIR.h" namespace SVF { @@ -108,7 +109,7 @@ namespace SVF { /// Return its abstract state given an ICFGNode AbstractState& getAbsStateFromTrace(const ICFGNode* node) { - return postAbsTrace[node]; + return (*svfStateMgr)[node]; } /// Update the offset of a GEP (GetElementPtr) object from its base address @@ -121,12 +122,18 @@ namespace SVF { /// Destructor virtual ~AbstractExecution() { + delete svfStateMgr; } protected: /// SVFIR and ICFG SVFIR* svfir; ICFG* icfg; + /// Owns the abstract trace immediately after an ICFGNode (post trace). + /// AbsExtAPI and the GEP/load/store helpers (getGepByteOffset etc.) + /// read and write through this manager; we don't keep a separate + /// postAbsTrace map any more. + AbstractStateManager* svfStateMgr = nullptr; /// Map a function to its corresponding WTO Map funcToWTO; @@ -134,8 +141,10 @@ namespace SVF { Set recursiveFuns; /// Abstract trace immediately before an ICFGNode. Map preAbsTrace; - /// Abstract trace immediately after an ICFGNode. - Map postAbsTrace; + /// Convenience alias: the "post" trace lives inside svfStateMgr. + Map& postAbsTrace() { + return svfStateMgr->getTrace(); + } private: AbstractExecutionHelper bufOverflowHelper; diff --git a/Assignment-3/CPP/Assignment_3_Helper.cpp b/Assignment-3/CPP/Assignment_3_Helper.cpp index f5357b6..a0849a1 100644 --- a/Assignment-3/CPP/Assignment_3_Helper.cpp +++ b/Assignment-3/CPP/Assignment_3_Helper.cpp @@ -81,13 +81,13 @@ IntervalValue AbstractExecution::getAccessOffset(NodeID objId, const GepStmt* ge // Field-insensitive base object if (SVFUtil::isa(obj)) { // get base size - IntervalValue accessOffset = as.getByteOffset(gep); + IntervalValue accessOffset = svfStateMgr->getGepByteOffset(gep); return accessOffset; } // A sub object of an aggregate object else if (SVFUtil::isa(obj)) { IntervalValue accessOffset = - bufOverflowHelper.getGepObjOffsetFromBase(SVFUtil::cast(obj)) + as.getByteOffset(gep); + bufOverflowHelper.getGepObjOffsetFromBase(SVFUtil::cast(obj)) + svfStateMgr->getGepByteOffset(gep); return accessOffset; } else{ @@ -221,12 +221,12 @@ bool AbstractExecution::mergeStatesFromPredecessors(const ICFGNode* block, Abstr // Iterate over all incoming edges of the given block for (auto& edge : block->getInEdges()) { // Check if the source node of the edge has a post-execution state recorded - if (postAbsTrace.find(edge->getSrcNode()) != postAbsTrace.end()) { + if (postAbsTrace().find(edge->getSrcNode()) != postAbsTrace().end()) { const IntraCFGEdge* intraCfgEdge = SVFUtil::dyn_cast(edge); // If the edge is an intra-block edge and has a condition if (intraCfgEdge && intraCfgEdge->getCondition()) { - AbstractState tmpEs = postAbsTrace[edge->getSrcNode()]; + AbstractState tmpEs = postAbsTrace()[edge->getSrcNode()]; // Check if the branch condition is feasible if (isBranchFeasible(intraCfgEdge, tmpEs)) { as.joinWith(tmpEs); // Merge the state with the current state @@ -236,7 +236,7 @@ bool AbstractExecution::mergeStatesFromPredecessors(const ICFGNode* block, Abstr } else { // For non-conditional edges, directly merge the state - as.joinWith(postAbsTrace[edge->getSrcNode()]); + as.joinWith(postAbsTrace()[edge->getSrcNode()]); inEdgeNum++; } } @@ -492,8 +492,8 @@ bool AbstractExecution::isBranchFeasible(const IntraCFGEdge* intraEdge, Abstract void AbstractExecution::handleGlobalNode() { AbstractState as; const ICFGNode* node = icfg->getGlobalICFGNode(); - postAbsTrace[node] = preAbsTrace[node]; - postAbsTrace[node][0] = AddressValue(); + postAbsTrace()[node] = preAbsTrace[node]; + postAbsTrace()[node][0] = AddressValue(); // Global Node, we just need to handle addr, load, store, copy and gep for (const SVFStmt* stmt : node->getSVFStmts()) { updateAbsState(stmt); @@ -543,7 +543,9 @@ void AbstractExecution::ensureAllAssertsValidated() { void AbstractExecution::analyse() { // Init WTOs for all functions, and handle Global ICFGNode of SVFModule initWTO(); - utils = new AbsExtAPI(postAbsTrace); + AndersenWaveDiff* ander = AndersenWaveDiff::createAndersenWaveDiff(svfir); + svfStateMgr = new AbstractStateManager(svfir, ander); + utils = new AbsExtAPI(svfStateMgr); // Handle the global node handleGlobalNode(); @@ -583,8 +585,8 @@ bool AbstractExecution::handleICFGNode(const ICFGNode* node) { } preAbsTrace[node] = tmpEs; // Store the last abstract state, used to check if the abstract state has reached a fixpoint - AbstractState last_as = postAbsTrace[node]; - postAbsTrace[node] = preAbsTrace[node]; + AbstractState last_as = postAbsTrace()[node]; + postAbsTrace()[node] = preAbsTrace[node]; for (const SVFStmt* stmt : node->getSVFStmts()) { updateAbsState(stmt); bufOverflowDetection(stmt); @@ -594,7 +596,7 @@ bool AbstractExecution::handleICFGNode(const ICFGNode* node) { handleCallSite(callNode); } // If the abstract state is the same as the last abstract state, return false because we have reached fixpoint - if (postAbsTrace[node] == last_as) { + if (postAbsTrace()[node] == last_as) { return false; } return true; @@ -680,14 +682,15 @@ void AbstractExecution::handleCallSite(const CallICFGNode* callNode) { } else if (fun_name == "nd" || fun_name == "rand") { NodeID lhsId = callNode->getRetICFGNode()->getActualRet()->getId(); - postAbsTrace[callNode][lhsId] = AbstractValue(IntervalValue::top()); + postAbsTrace()[callNode][lhsId] = AbstractValue(IntervalValue::top()); } else if (isExternalCallForAssignment(callee)) { // implement external calls for the assignment updateStateOnExtCall(callNode); } else if (SVFUtil::isExtCall(callee)) { - // handle external API calls + // handle external API calls — AbsExtAPI reads/writes through the + // same svfStateMgr that backs postAbsTrace(), so no sync needed. utils->handleExtAPI(callNode); } else if (recursiveFuns.find(callee) != recursiveFuns.end()) { diff --git a/Assignment-3/CPP/Assignment_3_Helper.h b/Assignment-3/CPP/Assignment_3_Helper.h index 3be1624..dac3469 100644 --- a/Assignment-3/CPP/Assignment_3_Helper.h +++ b/Assignment-3/CPP/Assignment_3_Helper.h @@ -28,10 +28,13 @@ #include "AE/Core/AbstractState.h" #include "AE/Svfexe/AEDetector.h" #include "AE/Core/ICFGWTO.h" +#include "SVFIR/SVFStatements.h" +#include "Util/Options.h" #include "Util/SVFBugReport.h" namespace SVF { class AbstractExecutionHelper { public: + /// Add a detected bug to the bug reporter and print the report ///@{ void addBugToReporter(const AEException& e, const ICFGNode* node) { diff --git a/Assignment-3/Python/Assignment_3.py b/Assignment-3/Python/Assignment_3.py index 498dda1..7fcacc8 100644 --- a/Assignment-3/Python/Assignment_3.py +++ b/Assignment-3/Python/Assignment_3.py @@ -77,7 +77,7 @@ def bufOverflowDetection(self, stmt: pysvf.SVFStmt): # Update GEP object offset from base self.buf_overflow_helper.updateGepObjOffsetFromBase(abstract_state, abstract_state[lhs].getAddrs(), abstract_state[rhs].getAddrs(), - abstract_state.getByteOffset(stmt) + self.buf_overflow_helper.getByteOffset(abstract_state, stmt) ) # TODO: your code starts from here diff --git a/Assignment-3/Python/Assignment_3_Helper.py b/Assignment-3/Python/Assignment_3_Helper.py index 9cc3fb3..02a5c97 100644 --- a/Assignment-3/Python/Assignment_3_Helper.py +++ b/Assignment-3/Python/Assignment_3_Helper.py @@ -204,7 +204,7 @@ class AbstractExecutionHelper: managing GEP object offsets, and other utilities. """ - def __init__(self, svfir: pysvf.SVFIR): + def __init__(self, svfir: pysvf.SVFIR, svf_state_mgr: pysvf.AbstractStateManager = None): """ Initialize member variables. """ @@ -216,6 +216,48 @@ def __init__(self, svfir: pysvf.SVFIR): # Map to store exception information for each ICFGNode self.node_to_bug_info = {} self.svfir = svfir + # Optional: if a stateMgr is provided, getByteOffset delegates to its + # getGepByteOffset (the C++ side does the same via svfStateMgr->...). + self.svf_state_mgr = svf_state_mgr + + # ------------------------------------------------------------------ + # Helpers that used to live as instance methods on `pysvf.AbstractState`. + # Upstream (Semi-Sparse refactor) moved them to `AbstractStateManager`, + # which requires a sparsity-aware trace we don't keep here. We re-implement + # the dense-mode behavior using only public AbstractState surface so the + # Python side mirrors the C++ side (`AbstractExecutionHelper::getByteOffset`). + # ------------------------------------------------------------------ + def getByteOffset(self, abstract_state: pysvf.AbstractState, gep: pysvf.GepStmt) -> pysvf.IntervalValue: + # Delegates to the stateMgr's upstream impl, mirroring the C++ side + # `svfStateMgr->getGepByteOffset(gep)`. The `abstract_state` argument + # is kept in the signature for symmetry with the call-site shape but + # is not consulted here -- the mgr reads non-constant indices from + # its own trace, which is the same trace this helper writes to. + return self.svf_state_mgr.getGepByteOffset(gep) + + def getGepObjAddrs(self, abstract_state: pysvf.AbstractState, var_id: int, offset: pysvf.IntervalValue) -> pysvf.AddressValue: + # Delegates to the stateMgr's upstream impl. mgr.getGepObjAddrs takes + # a ValVar* (and infers the ICFGNode from it), so we look the var up + # by id. Matches the C++ side `svfStateMgr->getGepObjAddrs(...)`. + pointer = self.svfir.getGNode(var_id) + return self.svf_state_mgr.getGepObjAddrs(pointer, offset) + + def getPointeeElement(self, abstract_state: pysvf.AbstractState, var_id: int): + ptr_val = abstract_state[var_id] + if not ptr_val.isAddr(): + return None + for addr in ptr_val.getAddrs(): + obj_id = abstract_state.getIDFromAddr(addr) + if obj_id == 0: + continue + return self.svfir.getBaseObject(obj_id).getType() + return None + + def getAllocaInstByteSize(self, abstract_state: pysvf.AbstractState, addr: pysvf.AddrStmt) -> int: + # Delegates to the stateMgr's upstream impl. mgr.getAllocaInstByteSize + # takes the AddrStmt directly (it derives node + sizes itself). Matches + # the C++ side `svfStateMgr->getAllocaInstByteSize(addr)`. + return self.svf_state_mgr.getAllocaInstByteSize(addr) def reportBufOverflow(self, node, msg): """ @@ -277,7 +319,7 @@ def handleMemcpy(self, abstractState: pysvf.AbstractState, dst: pysvf.SVFVar, sr if dst.getType().isArrayTy(): elemSize = dst.getType().getTypeOfElement().getByteSize() elif dst.getType().isPointerTy(): - elemType = abstractState.getPointeeElement(dstId) + elemType = self.getPointeeElement(abstractState, dstId) if elemType.isArrayTy(): elemSize = elemType.getTypeOfElement().getByteSize() else: @@ -288,8 +330,8 @@ def handleMemcpy(self, abstractState: pysvf.AbstractState, dst: pysvf.SVFVar, sr range_val = size/elemSize if abstractState.inVarToAddrsTable(dstId) and abstractState.inVarToAddrsTable(srcId): for index in range(0, int(range_val)): - expr_src = abstractState.getGepObjAddrs(srcId, pysvf.IntervalValue(index)) - expr_dst = abstractState.getGepObjAddrs(dstId, pysvf.IntervalValue(index + start_idx)) + expr_src = self.getGepObjAddrs(abstractState, srcId, pysvf.IntervalValue(index)) + expr_dst = self.getGepObjAddrs(abstractState, dstId, pysvf.IntervalValue(index + start_idx)) for addr_src in expr_src: for addr_dst in expr_dst: objId = abstractState.getIDFromAddr(addr_src) @@ -320,7 +362,7 @@ def getStrlen(self, abstractState, strValue): icfg_node = base_object.getICFGNode() for stmt in icfg_node.getSVFStmts(): if isinstance(stmt, pysvf.AddrStmt): - dst_size = abstractState.getAllocaInstByteSize(stmt) + dst_size = self.getAllocaInstByteSize(abstractState, stmt) length = 0 elem_size = 1 @@ -328,7 +370,7 @@ def getStrlen(self, abstractState, strValue): # Calculate the string length if abstractState.getVar(value_id).isAddr(): for index in range(dst_size): - expr0 = abstractState.getGepObjAddrs(value_id, pysvf.IntervalValue(index)) + expr0 = self.getGepObjAddrs(abstractState, value_id, pysvf.IntervalValue(index)) val = pysvf.AbstractValue() for addr in expr0: @@ -343,7 +385,7 @@ def getStrlen(self, abstractState, strValue): if strValue.getType().isArrayTy(): elem_size = strValue.getType().getTypeOfElement().getByteSize() elif strValue.getType().isPointerTy(): - elem_type = abstractState.getPointeeElement(value_id) + elem_type = self.getPointeeElement(abstractState, value_id) if elem_type: if elem_type.isArrayTy(): elem_size = elem_type.getTypeOfElement().getByteSize() @@ -400,8 +442,16 @@ def __init__(self, pag: pysvf.SVFIR): self.func_to_wto = {} self.recursive_funs = set() self.pre_abs_trace = {} - self.post_abs_trace = {} - self.buf_overflow_helper = AbstractExecutionHelper(self.svfir) + # Owns the post-trace and is the backing store for AbsExtAPI as well + # as the GEP/load/store helpers (getGepByteOffset etc.). Replaces + # the old `self.post_abs_trace` dict so reads/writes on + # `self.post_abs_trace[node]` go through the mgr's trace. + self.ander = pysvf.AndersenWaveDiff(self.svfir) + self.svf_state_mgr = pysvf.AbstractStateManager(self.svfir, self.ander) + # Alias preserved so existing call-sites `self.post_abs_trace[node]` + # keep working. The mgr supports __getitem__/__setitem__/__contains__. + self.post_abs_trace = self.svf_state_mgr + self.buf_overflow_helper = AbstractExecutionHelper(self.svfir, self.svf_state_mgr) self.assert_points = set() self.widen_delay = 3 self.addressMask = 0x7f000000 @@ -1071,14 +1121,14 @@ def getAccessOffset(self, objId: int, gep: pysvf.GepStmt) -> pysvf.IntervalValue # Field-insensitive base object if isinstance(obj, pysvf.BaseObjVar): # Get base size - access_offset = abstract_state.getByteOffset(gep) + access_offset = self.buf_overflow_helper.getByteOffset(abstract_state, gep) return access_offset # A sub-object of an aggregate object elif isinstance(obj, pysvf.GepObjVar): access_offset = ( self.buf_overflow_helper.getGepObjOffsetFromBase(obj) - + abstract_state.getByteOffset(gep) + + self.buf_overflow_helper.getByteOffset(abstract_state, gep) ) return access_offset diff --git a/Dockerfile b/Dockerfile index a5aa85c..6f15813 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ ARG TARGETPLATFORM RUN set -e # Define LLVM version. -ENV llvm_version=18.1.0 +ENV llvm_version=21.1.0 # Define home directory ENV HOME=/home/SVF-tools