From deb5cb18ac1037785de7e6312d530de15464cae0 Mon Sep 17 00:00:00 2001 From: bjjwwang Date: Thu, 30 Apr 2026 17:34:41 +1000 Subject: [PATCH 1/2] Sync Assignment-3 + CI with upstream LLVM 21.1.0 + Semi-Sparse refactor Upstream commits 899d00a (Port SVF to LLVM 21) and 0aa951d (Semi-Sparse infrastructure) together broke Assignment-3: 1. AbstractState::getByteOffset(GepStmt*) was deleted (moved to AbstractStateManager::getGepByteOffset). 2. AbsExtAPI's constructor changed from AbsExtAPI(Map&) to AbsExtAPI(AbstractStateManager*). 3. The same set of methods was deleted on the Python pysvf binding. CI / packaging: - Dockerfile + build.yml: bump llvm_version 18.1.0 -> 21.1.0 to match the npm svf-lib package after SVF-npm sync-llvm-21 republishes. Assignment-3 C++: - Assignment_3_Helper.h: add AbstractExecutionHelper::getByteOffset (header-only). Body is a faithful port of the upstream AbstractStateManager::getGepByteOffset, reading non-constant indices from `as[idxVar.getId()]` instead of going through a stateMgr -- works because Assignment-3 keeps a dense per-node trace. - Assignment_3.h / _Helper.cpp: own a lazily-constructed AbstractStateManager* svfStateMgr so AbsExtAPI(svfStateMgr) compiles. Around the single utils->handleExtAPI(callNode) site, sync postAbsTrace into the mgr and copy any updates back, since AbsExtAPI now reads abstract values exclusively through the mgr. - Migrate the 3 stale call sites: as.getByteOffset(gep) -> bufOverflowHelper.getByteOffset(as, gep) Assignment-3 Python (mirrors C++ shape): - Assignment_3_Helper.py: add 4 helpers on AbstractExecutionHelper that port the upstream behavior: getByteOffset, getGepObjAddrs, getPointeeElement, getAllocaInstByteSize. Three of them need svfir (already a member of the helper); getByteOffset uses pysvf.Options.max_field_limit() and gep.getStructFieldOffset(...) which SVF-Python sync-llvm-21 newly exposes. - Migrate 9 stale call sites: abstract_state.(...) -> self.buf_overflow_helper.(abstract_state, ...) (or self.(...) when the caller is already inside the helper). Symmetric with the C++ bufOverflowHelper.(as, ...) pattern. Locally: SVF builds clean against brew llvm@21 (21.1.4) on darwin/arm64; SSA builds 100% (bin/ass3 produced); pysvf imports + Assignment-3 helpers import + Options.max_field_limit() and AbstractStateManager are visible. test-ae.{cpp,py} need a real .bc fixture to run end-to-end and have not been exercised yet. Depends on (publish in this order): 1. SVF-npm sync-llvm-21 (republish svf-lib) 2. SVF-Python sync-llvm-21 (republish pysvf to TestPyPI) 3. this branch Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/build.yml | 2 +- Assignment-3/CPP/Assignment_3.cpp | 2 +- Assignment-3/CPP/Assignment_3.h | 6 ++ Assignment-3/CPP/Assignment_3_Helper.cpp | 17 +++- Assignment-3/CPP/Assignment_3_Helper.h | 59 ++++++++++++ Assignment-3/Python/Assignment_3.py | 2 +- Assignment-3/Python/Assignment_3_Helper.py | 103 +++++++++++++++++++-- Dockerfile | 2 +- 8 files changed, 177 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7602d53..d4304ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,7 +43,7 @@ jobs: - name: build run: | export SVF_DIR=$(npm root)/SVF - export LLVM_DIR=$(npm root)/llvm-18.1.0.obj + export LLVM_DIR=$(npm root)/llvm-21.1.0.obj export Z3_DIR=$(npm root)/z3.obj echo "SVF_DIR="$SVF_DIR echo "LLVM_DIR="$LLVM_DIR diff --git a/Assignment-3/CPP/Assignment_3.cpp b/Assignment-3/CPP/Assignment_3.cpp index 55417d2..bc1e4d4 100644 --- a/Assignment-3/CPP/Assignment_3.cpp +++ b/Assignment-3/CPP/Assignment_3.cpp @@ -76,7 +76,7 @@ void AbstractExecution::bufOverflowDetection(const SVF::SVFStmt* stmt) { AbstractState& as = getAbsStateFromTrace(gep->getICFGNode()); NodeID lhs = gep->getLHSVarID(); NodeID rhs = gep->getRHSVarID(); - updateGepObjOffsetFromBase(as, as[lhs].getAddrs(), as[rhs].getAddrs(), as.getByteOffset(gep)); + updateGepObjOffsetFromBase(as, as[lhs].getAddrs(), as[rhs].getAddrs(), bufOverflowHelper.getByteOffset(as, gep)); /// TODO: your code starts from here diff --git a/Assignment-3/CPP/Assignment_3.h b/Assignment-3/CPP/Assignment_3.h index 60f5702..c4d2628 100644 --- a/Assignment-3/CPP/Assignment_3.h +++ b/Assignment-3/CPP/Assignment_3.h @@ -26,6 +26,7 @@ */ #include "Assignment_3_Helper.h" #include "AE/Svfexe/AbsExtAPI.h" +#include "AE/Svfexe/AbstractStateManager.h" #include "SVFIR/SVFIR.h" namespace SVF { @@ -121,12 +122,17 @@ namespace SVF { /// Destructor virtual ~AbstractExecution() { + delete svfStateMgr; } protected: /// SVFIR and ICFG SVFIR* svfir; ICFG* icfg; + /// Adapter that lets us reuse AbsExtAPI (which now requires an + /// AbstractStateManager) without giving up our own pre/postAbsTrace. + /// Trace is synced in/out around AbsExtAPI calls. + AbstractStateManager* svfStateMgr = nullptr; /// Map a function to its corresponding WTO Map funcToWTO; diff --git a/Assignment-3/CPP/Assignment_3_Helper.cpp b/Assignment-3/CPP/Assignment_3_Helper.cpp index f5357b6..49ac273 100644 --- a/Assignment-3/CPP/Assignment_3_Helper.cpp +++ b/Assignment-3/CPP/Assignment_3_Helper.cpp @@ -81,13 +81,13 @@ IntervalValue AbstractExecution::getAccessOffset(NodeID objId, const GepStmt* ge // Field-insensitive base object if (SVFUtil::isa(obj)) { // get base size - IntervalValue accessOffset = as.getByteOffset(gep); + IntervalValue accessOffset = bufOverflowHelper.getByteOffset(as, gep); return accessOffset; } // A sub object of an aggregate object else if (SVFUtil::isa(obj)) { IntervalValue accessOffset = - bufOverflowHelper.getGepObjOffsetFromBase(SVFUtil::cast(obj)) + as.getByteOffset(gep); + bufOverflowHelper.getGepObjOffsetFromBase(SVFUtil::cast(obj)) + bufOverflowHelper.getByteOffset(as, gep); return accessOffset; } else{ @@ -543,7 +543,9 @@ void AbstractExecution::ensureAllAssertsValidated() { void AbstractExecution::analyse() { // Init WTOs for all functions, and handle Global ICFGNode of SVFModule initWTO(); - utils = new AbsExtAPI(postAbsTrace); + AndersenWaveDiff* ander = AndersenWaveDiff::createAndersenWaveDiff(svfir); + svfStateMgr = new AbstractStateManager(svfir, ander); + utils = new AbsExtAPI(svfStateMgr); // Handle the global node handleGlobalNode(); @@ -687,8 +689,15 @@ void AbstractExecution::handleCallSite(const CallICFGNode* callNode) { updateStateOnExtCall(callNode); } else if (SVFUtil::isExtCall(callee)) { - // handle external API calls + // handle external API calls — sync our trace into the stateMgr so + // AbsExtAPI sees the right state, then copy any updates back out. + for (const auto& kv : postAbsTrace) { + svfStateMgr->updateAbstractState(kv.first, kv.second); + } utils->handleExtAPI(callNode); + for (const auto& kv : svfStateMgr->getTrace()) { + postAbsTrace[kv.first] = kv.second; + } } else if (recursiveFuns.find(callee) != recursiveFuns.end()) { // skip recursive functions diff --git a/Assignment-3/CPP/Assignment_3_Helper.h b/Assignment-3/CPP/Assignment_3_Helper.h index 3be1624..7e70968 100644 --- a/Assignment-3/CPP/Assignment_3_Helper.h +++ b/Assignment-3/CPP/Assignment_3_Helper.h @@ -28,10 +28,69 @@ #include "AE/Core/AbstractState.h" #include "AE/Svfexe/AEDetector.h" #include "AE/Core/ICFGWTO.h" +#include "SVFIR/SVFStatements.h" +#include "Util/Options.h" #include "Util/SVFBugReport.h" namespace SVF { class AbstractExecutionHelper { public: + /// Compute the byte offset of a GepStmt against the given abstract state. + /// Replaces the upstream-removed `AbstractState::getByteOffset(GepStmt*)`. + /// Mirrors `AbstractStateManager::getGepByteOffset` but reads non-constant + /// indices directly from `as` (dense trace), since Assignment-3 manages + /// its own per-node trace separately from any AbstractStateManager. + IntervalValue getByteOffset(const AbstractState& as, const GepStmt* gep) { + if (gep->isConstantOffset()) + return IntervalValue((s64_t)gep->accumulateConstantByteOffset()); + + IntervalValue res(0); + for (int i = gep->getOffsetVarAndGepTypePairVec().size() - 1; i >= 0; i--) { + const ValVar* idxOperandVar = gep->getOffsetVarAndGepTypePairVec()[i].first; + const SVFType* idxOperandType = gep->getOffsetVarAndGepTypePairVec()[i].second; + + if (SVFUtil::isa(idxOperandType) || SVFUtil::isa(idxOperandType)) { + u32_t elemByteSize = 1; + if (const SVFArrayType* arrTy = SVFUtil::dyn_cast(idxOperandType)) + elemByteSize = arrTy->getTypeOfElement()->getByteSize(); + else if (SVFUtil::isa(idxOperandType)) + elemByteSize = gep->getAccessPath().gepSrcPointeeType()->getByteSize(); + else + assert(false && "idxOperandType must be ArrType or PtrType"); + + if (const ConstIntValVar* op = SVFUtil::dyn_cast(idxOperandVar)) { + s64_t lb = (double)Options::MaxFieldLimit() / elemByteSize >= op->getSExtValue() + ? op->getSExtValue() * elemByteSize + : Options::MaxFieldLimit(); + res = res + IntervalValue(lb, lb); + } + else { + AbstractState& mut_as = const_cast(as); + IntervalValue idxVal = mut_as[idxOperandVar->getId()].getInterval(); + if (idxVal.isBottom()) + res = res + IntervalValue(0, 0); + else { + s64_t ub = (idxVal.ub().getIntNumeral() < 0) ? 0 + : (double)Options::MaxFieldLimit() / elemByteSize >= idxVal.ub().getIntNumeral() + ? elemByteSize * idxVal.ub().getIntNumeral() + : Options::MaxFieldLimit(); + s64_t lb = (idxVal.lb().getIntNumeral() < 0) ? 0 + : (double)Options::MaxFieldLimit() / elemByteSize >= idxVal.lb().getIntNumeral() + ? elemByteSize * idxVal.lb().getIntNumeral() + : Options::MaxFieldLimit(); + res = res + IntervalValue(lb, ub); + } + } + } + else if (const SVFStructType* structTy = SVFUtil::dyn_cast(idxOperandType)) { + res = res + IntervalValue(gep->getAccessPath().getStructFieldOffset(idxOperandVar, structTy)); + } + else { + assert(false && "gep type pair only support arr/ptr/struct"); + } + } + return res; + } + /// Add a detected bug to the bug reporter and print the report ///@{ void addBugToReporter(const AEException& e, const ICFGNode* node) { diff --git a/Assignment-3/Python/Assignment_3.py b/Assignment-3/Python/Assignment_3.py index 498dda1..7fcacc8 100644 --- a/Assignment-3/Python/Assignment_3.py +++ b/Assignment-3/Python/Assignment_3.py @@ -77,7 +77,7 @@ def bufOverflowDetection(self, stmt: pysvf.SVFStmt): # Update GEP object offset from base self.buf_overflow_helper.updateGepObjOffsetFromBase(abstract_state, abstract_state[lhs].getAddrs(), abstract_state[rhs].getAddrs(), - abstract_state.getByteOffset(stmt) + self.buf_overflow_helper.getByteOffset(abstract_state, stmt) ) # TODO: your code starts from here diff --git a/Assignment-3/Python/Assignment_3_Helper.py b/Assignment-3/Python/Assignment_3_Helper.py index 9cc3fb3..4553856 100644 --- a/Assignment-3/Python/Assignment_3_Helper.py +++ b/Assignment-3/Python/Assignment_3_Helper.py @@ -217,6 +217,93 @@ def __init__(self, svfir: pysvf.SVFIR): self.node_to_bug_info = {} self.svfir = svfir + # ------------------------------------------------------------------ + # Helpers that used to live as instance methods on `pysvf.AbstractState`. + # Upstream (Semi-Sparse refactor) moved them to `AbstractStateManager`, + # which requires a sparsity-aware trace we don't keep here. We re-implement + # the dense-mode behavior using only public AbstractState surface so the + # Python side mirrors the C++ side (`AbstractExecutionHelper::getByteOffset`). + # ------------------------------------------------------------------ + def getByteOffset(self, abstract_state: pysvf.AbstractState, gep: pysvf.GepStmt) -> pysvf.IntervalValue: + if gep.isConstantOffset(): + return pysvf.IntervalValue(gep.getConstantByteOffset()) + max_field_limit = pysvf.Options.max_field_limit() + res = pysvf.IntervalValue(0) + pairs = gep.getOffsetVarAndGepTypePairVec() + for i in reversed(range(len(pairs))): + idx_var, idx_type = pairs[i] + if idx_type.isArrayType() or idx_type.isPointerType(): + if idx_type.isArrayType(): + elem_byte_size = idx_type.asArrayType().getTypeOfElement().getByteSize() + else: + elem_byte_size = gep.getSrcPointeeType().getByteSize() + if isinstance(idx_var, pysvf.ConstIntValVar): + val = idx_var.getSExtValue() + lb = val * elem_byte_size if (max_field_limit / elem_byte_size) >= val else max_field_limit + res = res + pysvf.IntervalValue(lb, lb) + else: + idx_val = abstract_state[idx_var.getId()].getInterval() + if idx_val.isBottom(): + res = res + pysvf.IntervalValue(0, 0) + else: + ub_int = idx_val.ub().getNumeral() + lb_int = idx_val.lb().getNumeral() + ub = 0 if ub_int < 0 else ( + elem_byte_size * ub_int if (max_field_limit / elem_byte_size) >= ub_int + else max_field_limit) + lb = 0 if lb_int < 0 else ( + elem_byte_size * lb_int if (max_field_limit / elem_byte_size) >= lb_int + else max_field_limit) + res = res + pysvf.IntervalValue(lb, ub) + elif idx_type.isStructType(): + res = res + pysvf.IntervalValue(gep.getStructFieldOffset(idx_var, idx_type.asStructType())) + else: + raise AssertionError("gep type pair only supports arr/ptr/struct") + return res + + def getGepObjAddrs(self, abstract_state: pysvf.AbstractState, var_id: int, offset: pysvf.IntervalValue) -> pysvf.AddressValue: + gep_addrs = pysvf.AddressValue() + max_field_limit = pysvf.Options.max_field_limit() + lb = min(offset.lb().getNumeral(), max_field_limit) + ub = min(offset.ub().getNumeral(), max_field_limit) + addrs = abstract_state[var_id].getAddrs() + for i in range(lb, ub + 1): + for addr in addrs: + base_obj = abstract_state.getIDFromAddr(addr) + gep_obj = self.svfir.getGepObjVar(base_obj, i) + gep_addrs.insert(pysvf.AbstractState.getVirtualMemAddress(gep_obj)) + return gep_addrs + + def getPointeeElement(self, abstract_state: pysvf.AbstractState, var_id: int): + ptr_val = abstract_state[var_id] + if not ptr_val.isAddr(): + return None + for addr in ptr_val.getAddrs(): + obj_id = abstract_state.getIDFromAddr(addr) + if obj_id == 0: + continue + return self.svfir.getBaseObject(obj_id).getType() + return None + + def getAllocaInstByteSize(self, abstract_state: pysvf.AbstractState, addr: pysvf.AddrStmt) -> int: + rhs = addr.getRHSVar() + if not isinstance(rhs, pysvf.ObjVar): + raise AssertionError("Addr rhs value is not ObjVar") + base = self.svfir.getBaseObject(rhs.getId()) + if base.isConstantByteSize(): + return base.getByteSizeOfObj() + max_field_limit = pysvf.Options.max_field_limit() + sizes = addr.getArrSize() + res = 1 + for value in sizes: + sz_val = abstract_state[value.getId()].getInterval() + if sz_val.isBottom(): + ub = max_field_limit + else: + ub = sz_val.ub().getNumeral() + res = res * ub if res * ub <= max_field_limit else max_field_limit + return int(res) + def reportBufOverflow(self, node, msg): """ Record an overflow node and its associated exception. @@ -277,7 +364,7 @@ def handleMemcpy(self, abstractState: pysvf.AbstractState, dst: pysvf.SVFVar, sr if dst.getType().isArrayTy(): elemSize = dst.getType().getTypeOfElement().getByteSize() elif dst.getType().isPointerTy(): - elemType = abstractState.getPointeeElement(dstId) + elemType = self.getPointeeElement(abstractState, dstId) if elemType.isArrayTy(): elemSize = elemType.getTypeOfElement().getByteSize() else: @@ -288,8 +375,8 @@ def handleMemcpy(self, abstractState: pysvf.AbstractState, dst: pysvf.SVFVar, sr range_val = size/elemSize if abstractState.inVarToAddrsTable(dstId) and abstractState.inVarToAddrsTable(srcId): for index in range(0, int(range_val)): - expr_src = abstractState.getGepObjAddrs(srcId, pysvf.IntervalValue(index)) - expr_dst = abstractState.getGepObjAddrs(dstId, pysvf.IntervalValue(index + start_idx)) + expr_src = self.getGepObjAddrs(abstractState, srcId, pysvf.IntervalValue(index)) + expr_dst = self.getGepObjAddrs(abstractState, dstId, pysvf.IntervalValue(index + start_idx)) for addr_src in expr_src: for addr_dst in expr_dst: objId = abstractState.getIDFromAddr(addr_src) @@ -320,7 +407,7 @@ def getStrlen(self, abstractState, strValue): icfg_node = base_object.getICFGNode() for stmt in icfg_node.getSVFStmts(): if isinstance(stmt, pysvf.AddrStmt): - dst_size = abstractState.getAllocaInstByteSize(stmt) + dst_size = self.getAllocaInstByteSize(abstractState, stmt) length = 0 elem_size = 1 @@ -328,7 +415,7 @@ def getStrlen(self, abstractState, strValue): # Calculate the string length if abstractState.getVar(value_id).isAddr(): for index in range(dst_size): - expr0 = abstractState.getGepObjAddrs(value_id, pysvf.IntervalValue(index)) + expr0 = self.getGepObjAddrs(abstractState, value_id, pysvf.IntervalValue(index)) val = pysvf.AbstractValue() for addr in expr0: @@ -343,7 +430,7 @@ def getStrlen(self, abstractState, strValue): if strValue.getType().isArrayTy(): elem_size = strValue.getType().getTypeOfElement().getByteSize() elif strValue.getType().isPointerTy(): - elem_type = abstractState.getPointeeElement(value_id) + elem_type = self.getPointeeElement(abstractState, value_id) if elem_type: if elem_type.isArrayTy(): elem_size = elem_type.getTypeOfElement().getByteSize() @@ -1071,14 +1158,14 @@ def getAccessOffset(self, objId: int, gep: pysvf.GepStmt) -> pysvf.IntervalValue # Field-insensitive base object if isinstance(obj, pysvf.BaseObjVar): # Get base size - access_offset = abstract_state.getByteOffset(gep) + access_offset = self.buf_overflow_helper.getByteOffset(abstract_state, gep) return access_offset # A sub-object of an aggregate object elif isinstance(obj, pysvf.GepObjVar): access_offset = ( self.buf_overflow_helper.getGepObjOffsetFromBase(obj) - + abstract_state.getByteOffset(gep) + + self.buf_overflow_helper.getByteOffset(abstract_state, gep) ) return access_offset diff --git a/Dockerfile b/Dockerfile index a5aa85c..6f15813 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ ARG TARGETPLATFORM RUN set -e # Define LLVM version. -ENV llvm_version=18.1.0 +ENV llvm_version=21.1.0 # Define home directory ENV HOME=/home/SVF-tools From 6cdc22f171d93fd6552fd44c29ec7accece410e6 Mon Sep 17 00:00:00 2001 From: bjjwwang Date: Thu, 30 Apr 2026 18:01:26 +1000 Subject: [PATCH 2/2] Actually use AbstractStateManager (drop local helper re-implementations) The previous sync (deb5cb1) wired stateMgr into AbstractExecution but only as a placeholder for AbsExtAPI. Reads/writes still went through a separate postAbsTrace map and the GEP/load/size helpers were ported by hand. That defeated the point: the C++ side held a stateMgr it never actually called, and the Python side never even built one. This commit makes both languages use the stateMgr as the authoritative post-trace store and routes the GEP helpers through its API. C++ Assignment-3: - Drop the standalone Map postAbsTrace. Replace with `Map<...>& postAbsTrace() { return svfStateMgr->getTrace(); }` so existing call sites still read like `postAbsTrace()[node]`. - getAbsStateFromTrace returns `(*svfStateMgr)[node]`. - All call sites of `as.getByteOffset(gep)` (now removed upstream) and the intermediate `bufOverflowHelper.getByteOffset(as, gep)` shim go directly to `svfStateMgr->getGepByteOffset(gep)`. - Drop the in-out trace sync that used to wrap `utils->handleExtAPI(...)`. AbsExtAPI now reads and writes through the same stateMgr that backs the post trace, so there is nothing to sync. - Drop the 60-line `AbstractExecutionHelper::getByteOffset` re-implementation; it has no remaining caller. Python Assignment-3: - Construct `self.ander = pysvf.AndersenWaveDiff(svfir)` and `self.svf_state_mgr = pysvf.AbstractStateManager(svfir, self.ander)` in `AbstractExecution.__init__`. - Alias `self.post_abs_trace = self.svf_state_mgr` so every existing `self.post_abs_trace[node]`, `node in self.post_abs_trace`, and `self.post_abs_trace[node] = state` call site continues to work -- pysvf.AbstractStateManager's new __getitem__/__setitem__/__contains__ bindings carry the dict protocol. - Pass the stateMgr into AbstractExecutionHelper.__init__. - AbstractExecutionHelper.getByteOffset/getGepObjAddrs/getAllocaInstByteSize shrink to one-liners that delegate to the stateMgr's upstream impl, matching the C++ shape `svfStateMgr->getGepByteOffset(gep)`. getPointeeElement keeps a local impl because the upstream signature takes (ObjVar, ICFGNode) while the existing call sites only have a NodeID for what is typically a ValVar pointer -- not 1:1 convertible. Net asymmetry left: 1 local helper on the Python side (getPointeeElement); everything else now flows through stateMgr on both languages. Locally: bin/ass3 builds at 100%; Python imports clean; helper methods visibly delegate to self.svf_state_mgr. Depends on bjjwwang/SVF-Python sync-llvm-21 head bb03d79 for the new __setitem__/__contains__ on AbstractStateManager. Co-Authored-By: Claude Opus 4.7 (1M context) --- Assignment-3/CPP/Assignment_3.cpp | 6 +- Assignment-3/CPP/Assignment_3.h | 15 ++-- Assignment-3/CPP/Assignment_3_Helper.cpp | 32 +++----- Assignment-3/CPP/Assignment_3_Helper.h | 56 ------------- Assignment-3/Python/Assignment_3_Helper.py | 95 +++++++--------------- 5 files changed, 54 insertions(+), 150 deletions(-) diff --git a/Assignment-3/CPP/Assignment_3.cpp b/Assignment-3/CPP/Assignment_3.cpp index bc1e4d4..fe8ccd1 100644 --- a/Assignment-3/CPP/Assignment_3.cpp +++ b/Assignment-3/CPP/Assignment_3.cpp @@ -76,7 +76,7 @@ void AbstractExecution::bufOverflowDetection(const SVF::SVFStmt* stmt) { AbstractState& as = getAbsStateFromTrace(gep->getICFGNode()); NodeID lhs = gep->getLHSVarID(); NodeID rhs = gep->getRHSVarID(); - updateGepObjOffsetFromBase(as, as[lhs].getAddrs(), as[rhs].getAddrs(), bufOverflowHelper.getByteOffset(as, gep)); + updateGepObjOffsetFromBase(as, as[lhs].getAddrs(), as[rhs].getAddrs(), svfStateMgr->getGepByteOffset(gep)); /// TODO: your code starts from here @@ -264,9 +264,9 @@ void AbstractExecution::updateStateOnCall(const CallPE* callPE) { { NodeID curId = callPE->getOpVarID(i); const ICFGNode* opICFGNode = callPE->getOpCallICFGNode(i); - if (postAbsTrace.count(opICFGNode)) + if (postAbsTrace().count(opICFGNode)) { - AbstractState& opAs = postAbsTrace[opICFGNode]; + AbstractState& opAs = postAbsTrace()[opICFGNode]; rhs.join_with(opAs[curId]); } } diff --git a/Assignment-3/CPP/Assignment_3.h b/Assignment-3/CPP/Assignment_3.h index c4d2628..da75f38 100644 --- a/Assignment-3/CPP/Assignment_3.h +++ b/Assignment-3/CPP/Assignment_3.h @@ -109,7 +109,7 @@ namespace SVF { /// Return its abstract state given an ICFGNode AbstractState& getAbsStateFromTrace(const ICFGNode* node) { - return postAbsTrace[node]; + return (*svfStateMgr)[node]; } /// Update the offset of a GEP (GetElementPtr) object from its base address @@ -129,9 +129,10 @@ namespace SVF { /// SVFIR and ICFG SVFIR* svfir; ICFG* icfg; - /// Adapter that lets us reuse AbsExtAPI (which now requires an - /// AbstractStateManager) without giving up our own pre/postAbsTrace. - /// Trace is synced in/out around AbsExtAPI calls. + /// Owns the abstract trace immediately after an ICFGNode (post trace). + /// AbsExtAPI and the GEP/load/store helpers (getGepByteOffset etc.) + /// read and write through this manager; we don't keep a separate + /// postAbsTrace map any more. AbstractStateManager* svfStateMgr = nullptr; /// Map a function to its corresponding WTO @@ -140,8 +141,10 @@ namespace SVF { Set recursiveFuns; /// Abstract trace immediately before an ICFGNode. Map preAbsTrace; - /// Abstract trace immediately after an ICFGNode. - Map postAbsTrace; + /// Convenience alias: the "post" trace lives inside svfStateMgr. + Map& postAbsTrace() { + return svfStateMgr->getTrace(); + } private: AbstractExecutionHelper bufOverflowHelper; diff --git a/Assignment-3/CPP/Assignment_3_Helper.cpp b/Assignment-3/CPP/Assignment_3_Helper.cpp index 49ac273..a0849a1 100644 --- a/Assignment-3/CPP/Assignment_3_Helper.cpp +++ b/Assignment-3/CPP/Assignment_3_Helper.cpp @@ -81,13 +81,13 @@ IntervalValue AbstractExecution::getAccessOffset(NodeID objId, const GepStmt* ge // Field-insensitive base object if (SVFUtil::isa(obj)) { // get base size - IntervalValue accessOffset = bufOverflowHelper.getByteOffset(as, gep); + IntervalValue accessOffset = svfStateMgr->getGepByteOffset(gep); return accessOffset; } // A sub object of an aggregate object else if (SVFUtil::isa(obj)) { IntervalValue accessOffset = - bufOverflowHelper.getGepObjOffsetFromBase(SVFUtil::cast(obj)) + bufOverflowHelper.getByteOffset(as, gep); + bufOverflowHelper.getGepObjOffsetFromBase(SVFUtil::cast(obj)) + svfStateMgr->getGepByteOffset(gep); return accessOffset; } else{ @@ -221,12 +221,12 @@ bool AbstractExecution::mergeStatesFromPredecessors(const ICFGNode* block, Abstr // Iterate over all incoming edges of the given block for (auto& edge : block->getInEdges()) { // Check if the source node of the edge has a post-execution state recorded - if (postAbsTrace.find(edge->getSrcNode()) != postAbsTrace.end()) { + if (postAbsTrace().find(edge->getSrcNode()) != postAbsTrace().end()) { const IntraCFGEdge* intraCfgEdge = SVFUtil::dyn_cast(edge); // If the edge is an intra-block edge and has a condition if (intraCfgEdge && intraCfgEdge->getCondition()) { - AbstractState tmpEs = postAbsTrace[edge->getSrcNode()]; + AbstractState tmpEs = postAbsTrace()[edge->getSrcNode()]; // Check if the branch condition is feasible if (isBranchFeasible(intraCfgEdge, tmpEs)) { as.joinWith(tmpEs); // Merge the state with the current state @@ -236,7 +236,7 @@ bool AbstractExecution::mergeStatesFromPredecessors(const ICFGNode* block, Abstr } else { // For non-conditional edges, directly merge the state - as.joinWith(postAbsTrace[edge->getSrcNode()]); + as.joinWith(postAbsTrace()[edge->getSrcNode()]); inEdgeNum++; } } @@ -492,8 +492,8 @@ bool AbstractExecution::isBranchFeasible(const IntraCFGEdge* intraEdge, Abstract void AbstractExecution::handleGlobalNode() { AbstractState as; const ICFGNode* node = icfg->getGlobalICFGNode(); - postAbsTrace[node] = preAbsTrace[node]; - postAbsTrace[node][0] = AddressValue(); + postAbsTrace()[node] = preAbsTrace[node]; + postAbsTrace()[node][0] = AddressValue(); // Global Node, we just need to handle addr, load, store, copy and gep for (const SVFStmt* stmt : node->getSVFStmts()) { updateAbsState(stmt); @@ -585,8 +585,8 @@ bool AbstractExecution::handleICFGNode(const ICFGNode* node) { } preAbsTrace[node] = tmpEs; // Store the last abstract state, used to check if the abstract state has reached a fixpoint - AbstractState last_as = postAbsTrace[node]; - postAbsTrace[node] = preAbsTrace[node]; + AbstractState last_as = postAbsTrace()[node]; + postAbsTrace()[node] = preAbsTrace[node]; for (const SVFStmt* stmt : node->getSVFStmts()) { updateAbsState(stmt); bufOverflowDetection(stmt); @@ -596,7 +596,7 @@ bool AbstractExecution::handleICFGNode(const ICFGNode* node) { handleCallSite(callNode); } // If the abstract state is the same as the last abstract state, return false because we have reached fixpoint - if (postAbsTrace[node] == last_as) { + if (postAbsTrace()[node] == last_as) { return false; } return true; @@ -682,22 +682,16 @@ void AbstractExecution::handleCallSite(const CallICFGNode* callNode) { } else if (fun_name == "nd" || fun_name == "rand") { NodeID lhsId = callNode->getRetICFGNode()->getActualRet()->getId(); - postAbsTrace[callNode][lhsId] = AbstractValue(IntervalValue::top()); + postAbsTrace()[callNode][lhsId] = AbstractValue(IntervalValue::top()); } else if (isExternalCallForAssignment(callee)) { // implement external calls for the assignment updateStateOnExtCall(callNode); } else if (SVFUtil::isExtCall(callee)) { - // handle external API calls — sync our trace into the stateMgr so - // AbsExtAPI sees the right state, then copy any updates back out. - for (const auto& kv : postAbsTrace) { - svfStateMgr->updateAbstractState(kv.first, kv.second); - } + // handle external API calls — AbsExtAPI reads/writes through the + // same svfStateMgr that backs postAbsTrace(), so no sync needed. utils->handleExtAPI(callNode); - for (const auto& kv : svfStateMgr->getTrace()) { - postAbsTrace[kv.first] = kv.second; - } } else if (recursiveFuns.find(callee) != recursiveFuns.end()) { // skip recursive functions diff --git a/Assignment-3/CPP/Assignment_3_Helper.h b/Assignment-3/CPP/Assignment_3_Helper.h index 7e70968..dac3469 100644 --- a/Assignment-3/CPP/Assignment_3_Helper.h +++ b/Assignment-3/CPP/Assignment_3_Helper.h @@ -34,62 +34,6 @@ namespace SVF { class AbstractExecutionHelper { public: - /// Compute the byte offset of a GepStmt against the given abstract state. - /// Replaces the upstream-removed `AbstractState::getByteOffset(GepStmt*)`. - /// Mirrors `AbstractStateManager::getGepByteOffset` but reads non-constant - /// indices directly from `as` (dense trace), since Assignment-3 manages - /// its own per-node trace separately from any AbstractStateManager. - IntervalValue getByteOffset(const AbstractState& as, const GepStmt* gep) { - if (gep->isConstantOffset()) - return IntervalValue((s64_t)gep->accumulateConstantByteOffset()); - - IntervalValue res(0); - for (int i = gep->getOffsetVarAndGepTypePairVec().size() - 1; i >= 0; i--) { - const ValVar* idxOperandVar = gep->getOffsetVarAndGepTypePairVec()[i].first; - const SVFType* idxOperandType = gep->getOffsetVarAndGepTypePairVec()[i].second; - - if (SVFUtil::isa(idxOperandType) || SVFUtil::isa(idxOperandType)) { - u32_t elemByteSize = 1; - if (const SVFArrayType* arrTy = SVFUtil::dyn_cast(idxOperandType)) - elemByteSize = arrTy->getTypeOfElement()->getByteSize(); - else if (SVFUtil::isa(idxOperandType)) - elemByteSize = gep->getAccessPath().gepSrcPointeeType()->getByteSize(); - else - assert(false && "idxOperandType must be ArrType or PtrType"); - - if (const ConstIntValVar* op = SVFUtil::dyn_cast(idxOperandVar)) { - s64_t lb = (double)Options::MaxFieldLimit() / elemByteSize >= op->getSExtValue() - ? op->getSExtValue() * elemByteSize - : Options::MaxFieldLimit(); - res = res + IntervalValue(lb, lb); - } - else { - AbstractState& mut_as = const_cast(as); - IntervalValue idxVal = mut_as[idxOperandVar->getId()].getInterval(); - if (idxVal.isBottom()) - res = res + IntervalValue(0, 0); - else { - s64_t ub = (idxVal.ub().getIntNumeral() < 0) ? 0 - : (double)Options::MaxFieldLimit() / elemByteSize >= idxVal.ub().getIntNumeral() - ? elemByteSize * idxVal.ub().getIntNumeral() - : Options::MaxFieldLimit(); - s64_t lb = (idxVal.lb().getIntNumeral() < 0) ? 0 - : (double)Options::MaxFieldLimit() / elemByteSize >= idxVal.lb().getIntNumeral() - ? elemByteSize * idxVal.lb().getIntNumeral() - : Options::MaxFieldLimit(); - res = res + IntervalValue(lb, ub); - } - } - } - else if (const SVFStructType* structTy = SVFUtil::dyn_cast(idxOperandType)) { - res = res + IntervalValue(gep->getAccessPath().getStructFieldOffset(idxOperandVar, structTy)); - } - else { - assert(false && "gep type pair only support arr/ptr/struct"); - } - } - return res; - } /// Add a detected bug to the bug reporter and print the report ///@{ diff --git a/Assignment-3/Python/Assignment_3_Helper.py b/Assignment-3/Python/Assignment_3_Helper.py index 4553856..02a5c97 100644 --- a/Assignment-3/Python/Assignment_3_Helper.py +++ b/Assignment-3/Python/Assignment_3_Helper.py @@ -204,7 +204,7 @@ class AbstractExecutionHelper: managing GEP object offsets, and other utilities. """ - def __init__(self, svfir: pysvf.SVFIR): + def __init__(self, svfir: pysvf.SVFIR, svf_state_mgr: pysvf.AbstractStateManager = None): """ Initialize member variables. """ @@ -216,6 +216,9 @@ def __init__(self, svfir: pysvf.SVFIR): # Map to store exception information for each ICFGNode self.node_to_bug_info = {} self.svfir = svfir + # Optional: if a stateMgr is provided, getByteOffset delegates to its + # getGepByteOffset (the C++ side does the same via svfStateMgr->...). + self.svf_state_mgr = svf_state_mgr # ------------------------------------------------------------------ # Helpers that used to live as instance methods on `pysvf.AbstractState`. @@ -225,54 +228,19 @@ def __init__(self, svfir: pysvf.SVFIR): # Python side mirrors the C++ side (`AbstractExecutionHelper::getByteOffset`). # ------------------------------------------------------------------ def getByteOffset(self, abstract_state: pysvf.AbstractState, gep: pysvf.GepStmt) -> pysvf.IntervalValue: - if gep.isConstantOffset(): - return pysvf.IntervalValue(gep.getConstantByteOffset()) - max_field_limit = pysvf.Options.max_field_limit() - res = pysvf.IntervalValue(0) - pairs = gep.getOffsetVarAndGepTypePairVec() - for i in reversed(range(len(pairs))): - idx_var, idx_type = pairs[i] - if idx_type.isArrayType() or idx_type.isPointerType(): - if idx_type.isArrayType(): - elem_byte_size = idx_type.asArrayType().getTypeOfElement().getByteSize() - else: - elem_byte_size = gep.getSrcPointeeType().getByteSize() - if isinstance(idx_var, pysvf.ConstIntValVar): - val = idx_var.getSExtValue() - lb = val * elem_byte_size if (max_field_limit / elem_byte_size) >= val else max_field_limit - res = res + pysvf.IntervalValue(lb, lb) - else: - idx_val = abstract_state[idx_var.getId()].getInterval() - if idx_val.isBottom(): - res = res + pysvf.IntervalValue(0, 0) - else: - ub_int = idx_val.ub().getNumeral() - lb_int = idx_val.lb().getNumeral() - ub = 0 if ub_int < 0 else ( - elem_byte_size * ub_int if (max_field_limit / elem_byte_size) >= ub_int - else max_field_limit) - lb = 0 if lb_int < 0 else ( - elem_byte_size * lb_int if (max_field_limit / elem_byte_size) >= lb_int - else max_field_limit) - res = res + pysvf.IntervalValue(lb, ub) - elif idx_type.isStructType(): - res = res + pysvf.IntervalValue(gep.getStructFieldOffset(idx_var, idx_type.asStructType())) - else: - raise AssertionError("gep type pair only supports arr/ptr/struct") - return res + # Delegates to the stateMgr's upstream impl, mirroring the C++ side + # `svfStateMgr->getGepByteOffset(gep)`. The `abstract_state` argument + # is kept in the signature for symmetry with the call-site shape but + # is not consulted here -- the mgr reads non-constant indices from + # its own trace, which is the same trace this helper writes to. + return self.svf_state_mgr.getGepByteOffset(gep) def getGepObjAddrs(self, abstract_state: pysvf.AbstractState, var_id: int, offset: pysvf.IntervalValue) -> pysvf.AddressValue: - gep_addrs = pysvf.AddressValue() - max_field_limit = pysvf.Options.max_field_limit() - lb = min(offset.lb().getNumeral(), max_field_limit) - ub = min(offset.ub().getNumeral(), max_field_limit) - addrs = abstract_state[var_id].getAddrs() - for i in range(lb, ub + 1): - for addr in addrs: - base_obj = abstract_state.getIDFromAddr(addr) - gep_obj = self.svfir.getGepObjVar(base_obj, i) - gep_addrs.insert(pysvf.AbstractState.getVirtualMemAddress(gep_obj)) - return gep_addrs + # Delegates to the stateMgr's upstream impl. mgr.getGepObjAddrs takes + # a ValVar* (and infers the ICFGNode from it), so we look the var up + # by id. Matches the C++ side `svfStateMgr->getGepObjAddrs(...)`. + pointer = self.svfir.getGNode(var_id) + return self.svf_state_mgr.getGepObjAddrs(pointer, offset) def getPointeeElement(self, abstract_state: pysvf.AbstractState, var_id: int): ptr_val = abstract_state[var_id] @@ -286,23 +254,10 @@ def getPointeeElement(self, abstract_state: pysvf.AbstractState, var_id: int): return None def getAllocaInstByteSize(self, abstract_state: pysvf.AbstractState, addr: pysvf.AddrStmt) -> int: - rhs = addr.getRHSVar() - if not isinstance(rhs, pysvf.ObjVar): - raise AssertionError("Addr rhs value is not ObjVar") - base = self.svfir.getBaseObject(rhs.getId()) - if base.isConstantByteSize(): - return base.getByteSizeOfObj() - max_field_limit = pysvf.Options.max_field_limit() - sizes = addr.getArrSize() - res = 1 - for value in sizes: - sz_val = abstract_state[value.getId()].getInterval() - if sz_val.isBottom(): - ub = max_field_limit - else: - ub = sz_val.ub().getNumeral() - res = res * ub if res * ub <= max_field_limit else max_field_limit - return int(res) + # Delegates to the stateMgr's upstream impl. mgr.getAllocaInstByteSize + # takes the AddrStmt directly (it derives node + sizes itself). Matches + # the C++ side `svfStateMgr->getAllocaInstByteSize(addr)`. + return self.svf_state_mgr.getAllocaInstByteSize(addr) def reportBufOverflow(self, node, msg): """ @@ -487,8 +442,16 @@ def __init__(self, pag: pysvf.SVFIR): self.func_to_wto = {} self.recursive_funs = set() self.pre_abs_trace = {} - self.post_abs_trace = {} - self.buf_overflow_helper = AbstractExecutionHelper(self.svfir) + # Owns the post-trace and is the backing store for AbsExtAPI as well + # as the GEP/load/store helpers (getGepByteOffset etc.). Replaces + # the old `self.post_abs_trace` dict so reads/writes on + # `self.post_abs_trace[node]` go through the mgr's trace. + self.ander = pysvf.AndersenWaveDiff(self.svfir) + self.svf_state_mgr = pysvf.AbstractStateManager(self.svfir, self.ander) + # Alias preserved so existing call-sites `self.post_abs_trace[node]` + # keep working. The mgr supports __getitem__/__setitem__/__contains__. + self.post_abs_trace = self.svf_state_mgr + self.buf_overflow_helper = AbstractExecutionHelper(self.svfir, self.svf_state_mgr) self.assert_points = set() self.widen_delay = 3 self.addressMask = 0x7f000000