From 1e72913bdbfdb656442767412496a21a306e58e7 Mon Sep 17 00:00:00 2001 From: Shaobo He Date: Fri, 13 Mar 2026 20:03:14 -0700 Subject: [PATCH] Add integer sign inference analysis pass Introduce SignAnalysis, a module pass that infers the sign (signed/unsigned) of integer SSA values via bidirectional dataflow over a four-point lattice. The analysis propagates constraints from sign-carrying operations (sdiv, zext, signed comparisons, nsw/nuw flags), through memory via sea-dsa alias info, and across function boundaries via call/return edges. SmackRep::lit() now queries SignAnalysis to decide constant rendering, falling back to the previous heuristic when the analysis returns Unknown or Conflict. Co-Authored-By: Claude Opus 4.6 --- CMakeLists.txt | 2 + docs/sign-analysis.md | 272 ++++++++++++++++++ include/smack/SignAnalysis.h | 98 +++++++ include/smack/SmackRep.h | 5 +- lib/smack/SignAnalysis.cpp | 444 +++++++++++++++++++++++++++++ lib/smack/SmackModuleGenerator.cpp | 5 +- lib/smack/SmackRep.cpp | 40 ++- tools/llvm2bpl/llvm2bpl.cpp | 2 + 8 files changed, 855 insertions(+), 13 deletions(-) create mode 100644 docs/sign-analysis.md create mode 100644 include/smack/SignAnalysis.h create mode 100644 lib/smack/SignAnalysis.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a70d7692..4b9de03fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,6 +123,7 @@ add_library(smackTranslator STATIC include/smack/AnnotateLoopExits.h include/smack/SplitAggregateValue.h include/smack/Prelude.h + include/smack/SignAnalysis.h include/smack/SmackWarnings.h lib/smack/AddTiming.cpp lib/smack/BoogieAst.cpp @@ -150,6 +151,7 @@ add_library(smackTranslator STATIC lib/smack/AnnotateLoopExits.cpp lib/smack/SplitAggregateValue.cpp lib/smack/Prelude.cpp + lib/smack/SignAnalysis.cpp lib/smack/SmackWarnings.cpp ) diff --git a/docs/sign-analysis.md b/docs/sign-analysis.md new file mode 100644 index 000000000..556422f77 --- /dev/null +++ b/docs/sign-analysis.md @@ -0,0 +1,272 @@ +# Sign Analysis Pass for SMACK + +## Context + +LLVM IR integers are signless — the sign is encoded per-operation (e.g., `sdiv` vs `udiv`), not per-value. For downstream translation (e.g., to Boogie or other typed IRs), it's useful to infer whether a given SSA value is "intended" to be signed or unsigned. This pass performs a best-effort dataflow analysis to recover that intent. + +## Files to Create/Modify + +| File | Action | +|------|--------| +| `include/smack/SignAnalysis.h` | **Create** — header with `Sign` enum, lattice ops, `SignAnalysis` pass class | +| `lib/smack/SignAnalysis.cpp` | **Create** — implementation | +| `CMakeLists.txt` | **Edit** — add both files to `smackTranslator` | + +No wiring into the pass pipeline yet — this is a standalone analysis that other passes can opt into via `getAnalysis()`. + +## Core Algorithm + +### 1. Sign Lattice + +``` + Unknown (top — no evidence yet) + / \ + Signed Unsigned + \ / + Conflict (bottom — contradictory evidence) +``` + +**Meet operator:** `meet(a, b)` = `a` if `a == b`; return whichever is non-Unknown; otherwise `Conflict`. + +### 2. Single Domain + +The analysis tracks signs in a single map: + +- **SSA map** (`SignMap`): maps `const Value*` → `Sign` for all SSA values, including load results. + +Memory propagation (§4) connects loads and stores via DSA alias queries, but the results are stored in the same `SignMap` — there is no separate memory map. + +### 3. Constraint Sources + +Every integer instruction is classified into one of three categories: + +#### A. Instructions that constrain their **operands** (backward) + +| Instruction / Predicate | Operand sign | +|--------------------------|--------------| +| `sdiv`, `srem`, `ashr` | `Signed` | +| `sext` (source operand) | `Signed` | +| `icmp slt/sgt/sle/sge` | `Signed` | +| `udiv`, `urem`, `lshr` | `Unsigned` | +| `zext` (source operand) | `Unsigned` | +| `icmp ult/ugt/ule/uge` | `Unsigned` | + +#### B. Instructions that constrain their **result** (forward) + +| Instruction | Result sign | +|-------------|-------------| +| `sext` | `Signed` | +| `zext` | `Unsigned` | +| `fptoui` | `Unsigned` | +| `fptosi` | `Signed` | + +#### C. Sign-agnostic instructions (no constraint by default) + +`add`, `sub`, `mul`, `shl`, `and`, `or`, `xor`, `icmp eq/ne`, `trunc`, `select` + +**However**, `nsw`/`nuw` flags on sign-agnostic arithmetic provide direct evidence: +- `add nsw` / `sub nsw` / `mul nsw` / `shl nsw` → operands and result are `Signed` +- `add nuw` / `sub nuw` / `mul nuw` / `shl nuw` → operands and result are `Unsigned` +- If both flags are present, we do not constrain (they don't conflict in LLVM semantics, but for sign inference they're ambiguous). + +### 4. Memory Propagation (via sea-dsa) + +Sign information flows through memory by connecting loads to aliasing stores (and vice versa) using `DSAWrapper`. Everything stays in the single `SignMap`. + +#### Alias resolution + +Two pointers alias if they map to the same abstract memory cell. A cell is identified by `(DSNode*, offset)`, obtained via `DSAWrapper::getNode(ptr)` and `DSAWrapper::getOffset(ptr)`. + +Edge cases (learned from `Regions.cpp`): +- **Collapsed nodes** (`node->isOffsetCollapsed()`): all offsets within the node alias each other. Normalize offset to 0 for these nodes. +- **Incomplete/complicated nodes** (`node->isIncomplete()`, `node->isExternal()`, `node->isIntToPtr()`, etc.): conservatively alias other nodes of the same kind. Skip sign propagation for these — leave loaded values as `Unknown`. +- **Null nodes**: `getNode()` returns `nullptr` for values without cells (e.g., `undef`). Skip. + +#### Load: aliasing stores → load result + +``` +%y = load i32, i32* %p +``` +- Use DSA to find all `store` instructions whose pointer aliases `%p` (same cell). +- `sign(%y) = meet(sign(%y), sign(stored_value))` for each aliasing store. + +#### Store: stored value → aliasing load results + +``` +store i32 %x, i32* %p +``` +- Use DSA to find all `load` instructions whose pointer aliases `%p`. +- `sign(load_result) = meet(sign(load_result), sign(%x))` for each aliasing load. + +#### Implementation + +During initialization, build an index of stores and loads per cell: +- `CellStores`: maps `(DSNode*, offset)` → list of stored `Value*` +- `CellLoads`: maps `(DSNode*, offset)` → list of load result `Value*` + +For collapsed nodes, the offset in the key is normalized to 0 so all accesses to the same collapsed node are grouped together. Incomplete/complicated nodes are excluded from the index. + +During propagation, when a store's value sign changes, propagate to all loads in the same cell, and vice versa. + +#### memcpy / memmove + +- Resolve source and destination pointers to their DSNodes. +- For each aliasing store to the source, propagate to loads from the destination. +- If the copy size or field layout is unknown, conservatively skip. + +#### memset + +- Typically zeroing memory — zero is both signed and unsigned, so no sign constraint. Skip. + +#### Soundness + +sea-dsa over-approximates the set of memory locations a pointer can refer to. Since we use `meet` (which can only move values downward in the lattice), this is conservative: aliased stores from different sign contexts will produce `Conflict`, which is correct. + +### 5. Propagation Rules + +#### Forward propagation (def → result) +For each instruction `I` producing value `V`: +- If `I` is in category B, set `sign(V) = meet(sign(V), )`. +- If `I` has `nsw` (only) → `sign(V) = meet(sign(V), Signed)`. +- If `I` has `nuw` (only) → `sign(V) = meet(sign(V), Unsigned)`. +- If `I` is a `PHINode`: `sign(V) = meet over all incoming values`. +- If `I` is a `select`: `sign(V) = meet(sign(true_val), sign(false_val))`. +- If `I` is a `trunc`: `sign(V) = sign(source operand)` (propagate through). +- If `I` is a `load`: meet signs of all aliasing stores' values into `sign(I)` (see §4). + +#### Backward propagation (use → operands) +For each instruction `I` using value `V` as an operand: +- If `I` is in category A, update `sign(V) = meet(sign(V),
)`. +- If `I` has `nsw` (only) → `sign(V) = meet(sign(V), Signed)` for each integer operand. +- If `I` has `nuw` (only) → `sign(V) = meet(sign(V), Unsigned)` for each integer operand. +- If `I` is a `store`: meet `sign(stored_value)` into all aliasing load results (see §4). + +### 6. Initialization + +Before iteration: +- Run `DSAWrapper` to build alias information. +- Build the `CellStores` and `CellLoads` indices by scanning all load/store instructions. +- Scan all instructions once. For each instruction, apply forward and backward rules (including load/store memory rules) to seed `SignMap`. +- Constants: leave as `Unknown` (their sign is determined by how they're used). + +### 7. Interprocedural Propagation + +Sign information crosses function boundaries in two ways: + +- **Memory**: already handled by DSA (module-wide, context-insensitive). +- **Call/return**: at a `CallInst`, propagate signs between actual arguments and formal parameters, and between the callee's return value and the `CallInst` result. + +#### Forward (caller → callee) +- At a `CallInst`, propagate `sign(actual_arg)` → `sign(formal_param)` for each argument. + +#### Backward (callee → caller) +- At a `ReturnInst`, propagate `sign(return_value)` → `sign(CallInst result)` for all call sites. +- At a `CallInst`, propagate `sign(formal_param)` → `sign(actual_arg)` for each argument. + +### 8. Fixpoint Iteration + +``` +repeat: + changed = false + for each function F in module: + for each instruction I in F: + changed |= propagateForward(I) // includes load, call → callee + changed |= propagateBackward(I) // includes store, return → caller +until !changed +``` + +The lattice has height 3 (Unknown → Signed/Unsigned → Conflict) and values can only move downward, so this terminates in at most 3 passes over the module (in practice, 2). + +> **TODO**: The current implementation uses a simple global fixpoint over all functions in arbitrary order. For large modules with deep call chains, this could be improved by using bottom-up/top-down traversal of the call graph with SCC-based handling of recursive functions. The simple approach is correct (same lattice, same monotone transfer functions) but may require more iterations. + +### 8. Query Interface + +```cpp +Sign getSign(const Value *V) const; +``` + +Returns the inferred sign. Values not in the map default to `Unknown`. + +### 9. Debug Support + +A `dump()` method prints the sign map to `errs()`, showing each value and its inferred sign. Gated behind `DEBUG_TYPE "smack-sign"` so it only appears with `-debug-only=smack-sign`. + +## Implementation Structure + +```cpp +// SignAnalysis.h +enum class Sign { Unknown, Signed, Unsigned, Conflict }; +Sign meetSign(Sign a, Sign b); + +using MemCell = std::pair; + +class SignAnalysis : public llvm::ModulePass { + static char ID; + std::map SignMap; + + // Indices connecting loads and stores through DSA alias cells + std::map> CellStores; // cell → stored values + std::map> CellLoads; // cell → load results + + DSAWrapper *DSA = nullptr; + + bool update(const Value *V, Sign S); // meet into sign map + MemCell resolvePointer(const Value *Ptr); // pointer → (DSNode, offset) + + void buildMemoryIndex(Module &M); // build CellStores/CellLoads + void initialize(Module &M); + bool propagate(Module &M); + bool propagateForward(Instruction &I); + bool propagateBackward(Instruction &I); + +public: + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnModule(Module &M) override; + Sign getSign(const Value *V) const; + void dump() const; +}; +``` + +## Integration with SmackRep (constant rendering) + +The existing per-instruction sign heuristic in `SmackRep` is replaced with queries to `SignAnalysis`: + +### Current heuristic (to be commented out) + +- `SmackRep::bop(BinaryOperator*)`: uses `!BO->hasNoSignedWrap()` as `isUnsigned`, then overrides per opcode (SDiv→signed, UDiv/Sub→unsigned). +- `SmackRep::lit()`: decides whether to render a negative constant via a complex `isUnsigned`/`isUnsignedInst` two-flag heuristic with a special case for `-1`. +- `SmackRep::expr()`: threads `isConstIntUnsigned`/`isUnsignedInst` from callers down to `lit()`. + +### Replacement + +- `SmackRep` gains a `SignAnalysis *signAnalysis` member, set at construction. +- `SmackRep::expr(const Value *v)` — when `v` is a `ConstantInt`, queries `signAnalysis->getSign(v)` to decide rendering. The `isConstIntUnsigned`/`isUnsignedInst` parameters are removed. +- `SmackRep::lit(const Value *v)` — simplified: query `signAnalysis->getSign(v)`. If `Signed`, render negative constants as `$sub.iN(0, abs)`. If `Unsigned` or `Unknown`, render as positive. The old two-flag parameters are removed. +- `SmackRep::bop(BinaryOperator*)` — no longer computes `isUnsigned` from NSW; calls `expr()` without sign flags. +- `SmackRep::cmp()` — already uses `CmpInst::isUnsigned()` for predicate selection; for operand constant rendering, it now relies on `SignAnalysis` via `expr()` instead of passing its own flag. + +### Wiring + +- `SmackModuleGenerator::getAnalysisUsage()` adds `AU.addRequired()`. +- `SmackModuleGenerator::generateProgram()` passes `&getAnalysis()` to `SmackRep` constructor. +- `SignAnalysis` is added to the pipeline in `llvm2bpl.cpp` (before `SmackModuleGenerator`). + +### Files modified + +| File | Changes | +|------|---------| +| `include/smack/SmackRep.h` | Add `SignAnalysis*` member; remove `isUnsigned`/`isUnsignedInst` params from `lit()`, `expr()`, `bop()` | +| `lib/smack/SmackRep.cpp` | Comment out old heuristic, replace with `signAnalysis->getSign()` queries | +| `include/smack/SmackModuleGenerator.h` | No change needed | +| `lib/smack/SmackModuleGenerator.cpp` | Add `SignAnalysis` to `getAnalysisUsage`, pass to `SmackRep` | +| `tools/llvm2bpl/llvm2bpl.cpp` | Add `SignAnalysis` pass to pipeline | + +## Key Dependencies + +- `DSAWrapper` (`include/smack/DSAWrapper.h`, `lib/smack/DSAWrapper.cpp`) — wraps sea-dsa's `GlobalAnalysis`, provides `getNode(const Value*)` and `getOffset(const Value*)` to resolve pointers to `(DSNode*, offset)` pairs. + +## Verification + +1. **Build**: add files to CMakeLists.txt, run `cmake --build`. +2. **Functional test**: write C test files where constants appear in signed/unsigned contexts, compile with SMACK, and use `@checkbpl grep` to assert the generated Boogie renders constants correctly (e.g., `-1` as `$sub.i32(0, 1)` in signed context, as `4294967295` in unsigned context). +3. **Regression**: run existing SMACK test suite (`test/regtest.py`) to ensure no existing tests break. diff --git a/include/smack/SignAnalysis.h b/include/smack/SignAnalysis.h new file mode 100644 index 000000000..78418b8ab --- /dev/null +++ b/include/smack/SignAnalysis.h @@ -0,0 +1,98 @@ +// +// This file is distributed under the MIT License. See LICENSE for details. +// +#ifndef SIGNANALYSIS_H +#define SIGNANALYSIS_H + +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include +#include + +namespace seadsa { +class Node; +} + +namespace smack { + +class DSAWrapper; + +/// Inferred sign for an integer SSA value. +/// +/// Lattice: +/// Unknown +/// / \; +/// Signed Unsigned +/// \ / +/// Conflict +enum class Sign { Unknown, Signed, Unsigned, Conflict }; + +/// Meet operator on the sign lattice. Unknown is top, Conflict is bottom. +inline Sign meetSign(Sign a, Sign b) { + if (a == b) + return a; + if (a == Sign::Unknown) + return b; + if (b == Sign::Unknown) + return a; + return Sign::Conflict; +} + +/// Module pass that infers the sign (signed / unsigned / unknown / conflict) +/// of every integer-typed SSA value via bidirectional dataflow analysis, +/// including propagation through memory via sea-dsa alias information. +class SignAnalysis : public llvm::ModulePass { +public: + static char ID; + SignAnalysis() : llvm::ModulePass(ID) {} + llvm::StringRef getPassName() const override; + void getAnalysisUsage(llvm::AnalysisUsage &AU) const override; + bool runOnModule(llvm::Module &M) override; + + /// Query the inferred sign for a value. Returns Unknown for values + /// not in the map. + Sign getSign(const llvm::Value *V) const; + + /// Dump the analysis results to errs() (for debugging). + void dump() const; + +private: + using MemCell = std::pair; + + std::map SignMap; + + /// Index connecting loads and stores through DSA alias cells. + std::map> CellStores; + std::map> CellLoads; + + DSAWrapper *DSA = nullptr; + + /// Meet sign S into V's current mapping. Returns true if changed. + bool update(const llvm::Value *V, Sign S); + + /// Resolve a pointer to its abstract memory cell via DSA. + /// Returns {nullptr, 0} if resolution fails. + MemCell resolvePointer(const llvm::Value *Ptr); + + /// Build the CellStores/CellLoads indices from all load/store instructions. + void buildMemoryIndex(llvm::Module &M); + + /// Seed the map from obvious defs and uses. + void initialize(llvm::Module &M); + + /// One round of forward + backward propagation. Returns true if + /// any mapping changed. + bool propagate(llvm::Module &M); + + /// Forward: propagate sign from a def to its result. + bool propagateForward(llvm::Instruction &I); + + /// Backward: propagate sign constraints from a use to its operands. + bool propagateBackward(llvm::Instruction &I); +}; + +} // namespace smack + +#endif // SIGNANALYSIS_H diff --git a/include/smack/SmackRep.h b/include/smack/SmackRep.h index a01247ab1..b050ed62a 100644 --- a/include/smack/SmackRep.h +++ b/include/smack/SmackRep.h @@ -24,6 +24,7 @@ class Stmt; class Expr; class Regions; class Attr; +class SignAnalysis; using llvm::Regex; using llvm::SmallVector; @@ -47,6 +48,7 @@ class SmackRep { Naming *naming; Program *program; Regions *regions; + SignAnalysis *signAnalysis; std::vector bplGlobals; std::map globalAllocations; @@ -59,7 +61,8 @@ class SmackRep { std::map auxDecls; public: - SmackRep(const llvm::DataLayout *L, Naming *N, Program *P, Regions *R); + SmackRep(const llvm::DataLayout *L, Naming *N, Program *P, Regions *R, + SignAnalysis *SA = nullptr); Program *getProgram() { return program; } private: diff --git a/lib/smack/SignAnalysis.cpp b/lib/smack/SignAnalysis.cpp new file mode 100644 index 000000000..14ab62e09 --- /dev/null +++ b/lib/smack/SignAnalysis.cpp @@ -0,0 +1,444 @@ +// +// This file is distributed under the MIT License. See LICENSE for details. +// + +// +// This pass infers the sign (signed / unsigned) of integer SSA values +// via bidirectional dataflow analysis over a four-point lattice: +// +// Unknown +// / \; +// Signed Unsigned +// \ / +// Conflict +// +// Constraints are drawn from operations that carry explicit sign intent +// (sdiv, zext, signed comparisons, etc.) and from nsw/nuw flags on +// otherwise sign-agnostic arithmetic. Memory propagation uses sea-dsa +// alias information to connect stores to loads. The analysis iterates +// to a fixpoint. +// + +#define DEBUG_TYPE "smack-sign" +#include "smack/SignAnalysis.h" +#include "smack/DSAWrapper.h" +#include "smack/Debug.h" +#include "seadsa/Graph.hh" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace smack { + +using namespace llvm; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +static const char *signName(Sign S) { + switch (S) { + case Sign::Unknown: + return "unknown"; + case Sign::Signed: + return "signed"; + case Sign::Unsigned: + return "unsigned"; + case Sign::Conflict: + return "conflict"; + } + return "?"; +} + +/// Return true if V is an integer (not pointer, not void, not FP). +static bool isInteger(const Value *V) { return V->getType()->isIntegerTy(); } + +/// Classify the nsw/nuw flags on an OverflowingBinaryOperator. +/// Returns Signed if nsw-only, Unsigned if nuw-only, Unknown otherwise. +static Sign flagSign(const Instruction &I) { + if (auto *OBO = dyn_cast(&I)) { + bool nsw = OBO->hasNoSignedWrap(); + bool nuw = OBO->hasNoUnsignedWrap(); + if (nsw && !nuw) + return Sign::Signed; + if (nuw && !nsw) + return Sign::Unsigned; + } + return Sign::Unknown; +} + +// --------------------------------------------------------------------------- +// SignAnalysis implementation +// --------------------------------------------------------------------------- + +char SignAnalysis::ID = 0; + +StringRef SignAnalysis::getPassName() const { + return "Integer sign inference analysis"; +} + +void SignAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} + +bool SignAnalysis::update(const Value *V, Sign S) { + if (S == Sign::Unknown) + return false; + auto it = SignMap.find(V); + if (it == SignMap.end()) { + SignMap[V] = S; + return true; + } + Sign old = it->second; + Sign merged = meetSign(old, S); + if (merged == old) + return false; + it->second = merged; + return true; +} + +Sign SignAnalysis::getSign(const Value *V) const { + auto it = SignMap.find(V); + return it != SignMap.end() ? it->second : Sign::Unknown; +} + +// --------------------------------------------------------------------------- +// Memory index (DSA-based) +// --------------------------------------------------------------------------- + +SignAnalysis::MemCell SignAnalysis::resolvePointer(const Value *Ptr) { + if (!DSA) + return {nullptr, 0}; + auto *node = DSA->getNode(Ptr); + if (!node) + return {nullptr, 0}; + // Skip incomplete/complicated nodes — too imprecise for sign propagation + if (node->isIncomplete() || node->isExternal() || node->isIntToPtr() || + node->isPtrToInt() || node->isUnknown()) + return {nullptr, 0}; + unsigned offset = DSA->getOffset(Ptr); + // Normalize offset for collapsed nodes + if (node->isOffsetCollapsed()) + offset = 0; + return {node, offset}; +} + +void SignAnalysis::buildMemoryIndex(Module &M) { + CellStores.clear(); + CellLoads.clear(); + for (auto &F : M) { + for (auto &I : instructions(F)) { + if (auto *SI = dyn_cast(&I)) { + if (!isInteger(SI->getValueOperand())) + continue; + auto cell = resolvePointer(SI->getPointerOperand()); + if (cell.first) + CellStores[cell].push_back(SI->getValueOperand()); + } else if (auto *LI = dyn_cast(&I)) { + if (!isInteger(LI)) + continue; + auto cell = resolvePointer(LI->getPointerOperand()); + if (cell.first) + CellLoads[cell].push_back(LI); + } + } + } +} + +// --------------------------------------------------------------------------- +// Forward propagation: def → result +// --------------------------------------------------------------------------- + +bool SignAnalysis::propagateForward(Instruction &I) { + if (!isInteger(&I)) + return false; + + bool changed = false; + + switch (I.getOpcode()) { + // Cast instructions that determine result sign + case Instruction::SExt: + case Instruction::FPToSI: + changed |= update(&I, Sign::Signed); + break; + case Instruction::ZExt: + case Instruction::FPToUI: + changed |= update(&I, Sign::Unsigned); + break; + + // Trunc: propagate sign of the source operand through + case Instruction::Trunc: + changed |= update(&I, getSign(I.getOperand(0))); + break; + + // PHI: meet over all incoming values + case Instruction::PHI: { + auto &Phi = cast(I); + Sign S = Sign::Unknown; + for (unsigned i = 0, e = Phi.getNumIncomingValues(); i < e; ++i) + S = meetSign(S, getSign(Phi.getIncomingValue(i))); + changed |= update(&I, S); + break; + } + + // Select: meet of true and false values + case Instruction::Select: + changed |= + update(&I, meetSign(getSign(I.getOperand(1)), getSign(I.getOperand(2)))); + break; + + // Signed arithmetic results + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::AShr: + changed |= update(&I, Sign::Signed); + break; + + // Unsigned arithmetic results + case Instruction::UDiv: + case Instruction::URem: + case Instruction::LShr: + changed |= update(&I, Sign::Unsigned); + break; + + // Load: propagate from aliasing stores + case Instruction::Load: { + auto *LI = cast(&I); + auto cell = resolvePointer(LI->getPointerOperand()); + if (cell.first) { + auto it = CellStores.find(cell); + if (it != CellStores.end()) { + for (auto *storedVal : it->second) + changed |= update(&I, getSign(storedVal)); + } + } + break; + } + + // Call: propagate actual argument signs → formal parameters; + // propagate callee return value sign → CallInst result + case Instruction::Call: + case Instruction::Invoke: { + auto *CB = cast(&I); + Function *callee = CB->getCalledFunction(); + if (!callee || callee->isDeclaration()) + break; + // Actual args → formal params + for (unsigned i = 0, e = CB->arg_size(); i < e && i < callee->arg_size(); + ++i) { + if (isInteger(CB->getArgOperand(i))) + changed |= update(callee->getArg(i), getSign(CB->getArgOperand(i))); + } + // Return value → CallInst result + if (isInteger(CB)) { + for (auto &BB : *callee) { + if (auto *RI = dyn_cast(BB.getTerminator())) { + if (RI->getReturnValue() && isInteger(RI->getReturnValue())) + changed |= update(CB, getSign(RI->getReturnValue())); + } + } + } + break; + } + + default: + break; + } + + // nsw/nuw flags on sign-agnostic ops + Sign fs = flagSign(I); + if (fs != Sign::Unknown) + changed |= update(&I, fs); + + return changed; +} + +// --------------------------------------------------------------------------- +// Backward propagation: use → operands +// --------------------------------------------------------------------------- + +bool SignAnalysis::propagateBackward(Instruction &I) { + bool changed = false; + + auto constrainIntegerOperands = [&](Sign S) { + for (auto &Op : I.operands()) { + if (isInteger(Op.get())) + changed |= update(Op.get(), S); + } + }; + + switch (I.getOpcode()) { + // Signed operations → operands are signed + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::AShr: + constrainIntegerOperands(Sign::Signed); + break; + + // Unsigned operations → operands are unsigned + case Instruction::UDiv: + case Instruction::URem: + case Instruction::LShr: + constrainIntegerOperands(Sign::Unsigned); + break; + + // Casts constrain the source operand + case Instruction::SExt: + if (isInteger(I.getOperand(0))) + changed |= update(I.getOperand(0), Sign::Signed); + break; + case Instruction::ZExt: + if (isInteger(I.getOperand(0))) + changed |= update(I.getOperand(0), Sign::Unsigned); + break; + + // FP conversions constrain the source operand + case Instruction::SIToFP: + if (isInteger(I.getOperand(0))) + changed |= update(I.getOperand(0), Sign::Signed); + break; + case Instruction::UIToFP: + if (isInteger(I.getOperand(0))) + changed |= update(I.getOperand(0), Sign::Unsigned); + break; + + // Integer comparisons: signed/unsigned predicates constrain operands + case Instruction::ICmp: { + auto &Cmp = cast(I); + if (CmpInst::isSigned(Cmp.getPredicate())) + constrainIntegerOperands(Sign::Signed); + else if (CmpInst::isUnsigned(Cmp.getPredicate())) + constrainIntegerOperands(Sign::Unsigned); + // eq/ne: no constraint + break; + } + + // Store: propagate stored value's sign to aliasing load results + case Instruction::Store: { + auto *SI = cast(&I); + auto *storedVal = SI->getValueOperand(); + if (!isInteger(storedVal)) + break; + Sign valSign = getSign(storedVal); + if (valSign == Sign::Unknown) + break; + auto cell = resolvePointer(SI->getPointerOperand()); + if (cell.first) { + auto it = CellLoads.find(cell); + if (it != CellLoads.end()) { + for (auto *loadResult : it->second) + changed |= update(loadResult, valSign); + } + } + break; + } + + // Call: propagate formal parameter signs → actual arguments + case Instruction::Call: + case Instruction::Invoke: { + auto *CB = cast(&I); + Function *callee = CB->getCalledFunction(); + if (!callee || callee->isDeclaration()) + break; + for (unsigned i = 0, e = CB->arg_size(); i < e && i < callee->arg_size(); + ++i) { + if (isInteger(CB->getArgOperand(i))) + changed |= update(CB->getArgOperand(i), getSign(callee->getArg(i))); + } + break; + } + + // Return: propagate return value sign → all CallInst results + case Instruction::Ret: { + auto *RI = cast(&I); + auto *retVal = RI->getReturnValue(); + if (!retVal || !isInteger(retVal)) + break; + Sign retSign = getSign(retVal); + if (retSign == Sign::Unknown) + break; + Function *F = I.getFunction(); + for (auto *U : F->users()) { + if (auto *CB = dyn_cast(U)) { + if (CB->getCalledFunction() == F && isInteger(CB)) + changed |= update(CB, retSign); + } + } + break; + } + + default: + break; + } + + // nsw/nuw flags on sign-agnostic ops constrain operands too + Sign fs = flagSign(I); + if (fs != Sign::Unknown) + constrainIntegerOperands(fs); + + return changed; +} + +// --------------------------------------------------------------------------- +// Initialization & fixpoint +// --------------------------------------------------------------------------- + +void SignAnalysis::initialize(Module &M) { + SignMap.clear(); + for (auto &F : M) { + for (auto &I : instructions(F)) { + propagateForward(I); + propagateBackward(I); + } + } +} + +bool SignAnalysis::propagate(Module &M) { + bool changed = false; + for (auto &F : M) { + for (auto &I : instructions(F)) { + changed |= propagateForward(I); + changed |= propagateBackward(I); + } + } + return changed; +} + +bool SignAnalysis::runOnModule(Module &M) { + DSA = &getAnalysis(); + buildMemoryIndex(M); + initialize(M); + + unsigned iterations = 0; + while (propagate(M)) + ++iterations; + + SDEBUG(errs() << "SignAnalysis: converged after " << iterations + << " iteration(s), " << SignMap.size() << " values mapped\n"); + SDEBUG(dump()); + + // Analysis pass — does not modify the module. + return false; +} + +void SignAnalysis::dump() const { + errs() << "=== SignAnalysis results ===\n"; + for (auto &entry : SignMap) { + if (entry.second == Sign::Unknown) + continue; + errs() << " [" << signName(entry.second) << "] "; + if (entry.first->hasName()) + errs() << entry.first->getName(); + else + entry.first->printAsOperand(errs(), false); + errs() << "\n"; + } + errs() << "=== end SignAnalysis ===\n"; +} + +} // namespace smack diff --git a/lib/smack/SmackModuleGenerator.cpp b/lib/smack/SmackModuleGenerator.cpp index 2c70404f4..0062a031c 100644 --- a/lib/smack/SmackModuleGenerator.cpp +++ b/lib/smack/SmackModuleGenerator.cpp @@ -8,6 +8,7 @@ #include "smack/Naming.h" #include "smack/Prelude.h" #include "smack/Regions.h" +#include "smack/SignAnalysis.h" #include "smack/SmackInstGenerator.h" #include "smack/SmackOptions.h" #include "smack/SmackRep.h" @@ -25,6 +26,7 @@ void SmackModuleGenerator::getAnalysisUsage(llvm::AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } bool SmackModuleGenerator::runOnModule(llvm::Module &m) { @@ -35,7 +37,8 @@ bool SmackModuleGenerator::runOnModule(llvm::Module &m) { void SmackModuleGenerator::generateProgram(llvm::Module &M) { Naming naming; - SmackRep rep(&M.getDataLayout(), &naming, program, &getAnalysis()); + SmackRep rep(&M.getDataLayout(), &naming, program, &getAnalysis(), + &getAnalysis()); std::list &decls = program->getDeclarations(); SDEBUG(errs() << "Analyzing globals...\n"); diff --git a/lib/smack/SmackRep.cpp b/lib/smack/SmackRep.cpp index 79c8a09c2..1f0d7961c 100644 --- a/lib/smack/SmackRep.cpp +++ b/lib/smack/SmackRep.cpp @@ -4,6 +4,7 @@ #define DEBUG_TYPE "smack-rep" #include "smack/SmackRep.h" #include "smack/CodifyStaticInits.h" +#include "smack/SignAnalysis.h" #include "smack/SmackOptions.h" #include "smack/VectorOperations.h" @@ -99,9 +100,10 @@ bool isCodeString(const llvm::Value *V) { return false; } -SmackRep::SmackRep(const DataLayout *L, Naming *N, Program *P, Regions *R) - : targetData(L), naming(N), program(P), regions(R), globalsOffset(0), - externsOffset(-32768), uniqueFpNum(0), +SmackRep::SmackRep(const DataLayout *L, Naming *N, Program *P, Regions *R, + SignAnalysis *SA) + : targetData(L), naming(N), program(P), regions(R), signAnalysis(SA), + globalsOffset(0), externsOffset(-32768), uniqueFpNum(0), ptrSizeInBits(targetData->getPointerSizeInBits()) { if (SmackOptions::MemorySafety) initFuncs.push_back("$global_allocations"); @@ -632,14 +634,30 @@ const Expr *SmackRep::lit(const llvm::Value *v, bool isUnsigned, const APInt &API = ci->getValue(); unsigned width = ci->getBitWidth(); - // This is heuristics for choosing between generating an unsigned vs - // signed constant (since LLVM does not keep track of that). - // Signed values -1 is special since it appears often because i-- - // gets translated into i + (-1), and so in that context it should - // be a signed integer. - bool neg = width > 1 && - (isUnsigned ? (isUnsignedInst ? false : API.getSExtValue() == -1) - : ci->isNegative()); + // Use SignAnalysis to determine whether this constant should be + // rendered as negative. Fall back to the old heuristic if + // SignAnalysis is not available or returns Unknown/Conflict. + bool neg; + if (signAnalysis) { + Sign s = signAnalysis->getSign(v); + if (s == Sign::Signed) + neg = width > 1 && ci->isNegative(); + else if (s == Sign::Unsigned) + neg = false; + else { + // Unknown or Conflict: fall back to old heuristic + neg = width > 1 && + (isUnsigned + ? (isUnsignedInst ? false : API.getSExtValue() == -1) + : ci->isNegative()); + } + } else { + // No SignAnalysis available: use old heuristic + neg = width > 1 && + (isUnsigned + ? (isUnsignedInst ? false : API.getSExtValue() == -1) + : ci->isNegative()); + } SmallString<32> str; (neg ? API.abs() : API).toString(str, 10, false); const Expr *e = SmackOptions::BitPrecise diff --git a/tools/llvm2bpl/llvm2bpl.cpp b/tools/llvm2bpl/llvm2bpl.cpp index 8854704f0..be38407fb 100644 --- a/tools/llvm2bpl/llvm2bpl.cpp +++ b/tools/llvm2bpl/llvm2bpl.cpp @@ -34,6 +34,7 @@ #include "smack/ExtractContracts.h" #include "smack/InitializePasses.h" #include "smack/IntegerOverflowChecker.h" +#include "smack/SignAnalysis.h" #include "smack/MemorySafetyChecker.h" #include "smack/Naming.h" #include "smack/NormalizeLoops.h" @@ -269,6 +270,7 @@ int main(int argc, char **argv) { check(EC.message()); F->keep(); files.push_back(F); + pass_manager.add(new smack::SignAnalysis()); pass_manager.add(new smack::SmackModuleGenerator()); pass_manager.add(new smack::BplFilePrinter(F->os())); }