From 837f26a420a77446b306ae92fa75a74e0fc3fd29 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Mon, 25 May 2026 23:07:11 -0500 Subject: [PATCH] =?UTF-8?q?feat(decompiler):=20Rec=2031=20#31-3=20RAII=20S?= =?UTF-8?q?tage=202=20=E2=80=94=20marshal.cc=20ByteChunk=20owns=20its=20bu?= =?UTF-8?q?ffer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Converts PackedDecode's `inStream` chunks from raw-pointer-owning to unique_ptr-owning. Pre-Stage-2 design pattern: class ByteChunk { // Just two raw pointers uint1 *start; uint1 *end; }; list inStream; // allocateNextInputBuffer(): uint1 *buf = new uint1[BUFFER_SIZE + pad]; inStream.emplace_back(buf, buf+BUFFER_SIZE); // endIngest() tail-padding: uint1 *endbuf = new uint1[1]; inStream.emplace_back(endbuf, endbuf+1); // ~PackedDecode(): for (auto iter = inStream.begin(); iter != inStream.end(); ++iter) delete[] (*iter).start; // Manual cleanup of owning raw pointers The ownership lives in the destructor's `delete[]` loop, which is exactly the leak surface (any exception or early return that bypasses the destructor would leak; any chunk inserted but not destroyed via ~PackedDecode leaks). After Stage 2 the ownership is local to the chunk itself: class ByteChunk { unique_ptr storage; // Owning storage uint1 *start; // [start, end) window into storage uint1 *end; }; // allocateNextInputBuffer(): auto owned = make_unique(BUFFER_SIZE + pad); uint1 *buf = owned.get(); inStream.emplace_back(move(owned), buf, buf+BUFFER_SIZE); // endIngest() tail-padding: same shape. // ~PackedDecode(): // empty — unique_ptr cleans up per chunk ByteChunk becomes move-only (unique_ptr has no copy ctor). `list:: emplace_back` constructs in-place, so no copy is ever required for the inStream usage pattern. Behavior preserved: - start / end pointers still expose the same [start, end) byte window; consumers (getNextByte, advancePosition, the inStream iterator walk in Position) are read-only and unchanged. - The endIngest tail-pad case still allocates a 1-byte chunk and writes ELEMENT_END at offset 0. - Lifetime of the buffer still matches the chunk's lifetime, which still matches the PackedDecode's lifetime via inStream. Audit gate: marshal.cc and marshal.hh added to cppRaiiAudit.gradle's PROTECTED_FILES. Stage 1 (#31-2) already gated address.cc, space.cc, rangeutil.cc; Stage 2's marshal pair joins them. xml.cc / xml.y (the yacc-generated parser's bison-source-level edits) come in a follow-up PR — those raw `new`s live in regenerated parser actions, not in hand-written code, and need a separate bison-source-level approach. Note: make_unique(N) value-initializes (zeroes) the buffer, whereas `new uint1[N]` did not. The buffer is overwritten by `s.get((char *)buf, BUFFER_SIZE+1, '\0')` before any read; the zero- init cost is one memset per chunk (BUFFER_SIZE bytes typically), which is negligible relative to the I/O it precedes. Closes #31-3 (marshal half) of Rec 31. xml.y / xml.cc remain open; RAII_MIGRATION.md sequencing row to be updated as that lands. Proudly Made in Nebraska. Go Big Red! 🌽 https://xkcd.com/2347/ Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Decompiler/src/decompile/cpp/marshal.cc | 13 +++++----- .../Decompiler/src/decompile/cpp/marshal.hh | 26 ++++++++++++++----- gradle/cppRaiiAudit.gradle | 4 +++ 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc index 525d3c96cc9..d9cfabf5518 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc @@ -682,9 +682,10 @@ void PackedDecode::endIngest(int4 bufPos) endPos.end = (*endPos.seqIter).end; // Make sure there is at least one character after ingested buffer if (bufPos == BUFFER_SIZE) { - // Last buffer was entirely filled - uint1 *endbuf = new uint1[1]; // Add one more buffer - inStream.emplace_back(endbuf,endbuf + 1); + // Last buffer was entirely filled — append a 1-byte tail chunk + auto owned = make_unique(1); + uint1 *endbuf = owned.get(); + inStream.emplace_back(move(owned),endbuf,endbuf + 1); bufPos = 0; } uint1 *buf = inStream.back().start; @@ -698,10 +699,8 @@ void PackedDecode::endIngest(int4 bufPos) PackedDecode::~PackedDecode(void) { - list::const_iterator iter; - for(iter=inStream.begin();iter!=inStream.end();++iter) { - delete [] (*iter).start; - } + // inStream's chunks own their buffers via ByteChunk::storage (unique_ptr), + // which cleans up automatically on chunk destruction. No manual delete[] needed. } void PackedDecode::ingestStream(istream &s) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.hh index 9817f7b0131..340d8d9ba89 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.hh @@ -19,11 +19,15 @@ #include "xml.hh" #include "opcodes.hh" #include +#include #include namespace ghidra { using std::list; +using std::make_unique; +using std::move; +using std::unique_ptr; using std::unordered_map; /// \brief An annotation for a data element to being transferred to/from a stream @@ -513,13 +517,22 @@ class PackedDecode : public Decoder { public: static const int4 BUFFER_SIZE; ///< The size, in bytes, of a single cached chunk of the input stream private: - /// \brief A bounded array of bytes + /// \brief A bounded array of bytes, owning its storage + /// + /// The chunk owns the underlying buffer via \b storage and exposes a [start,end) + /// window into it. The window typically covers the entire buffer; callers that + /// need a sub-range (e.g. the 1-byte padding tail in endIngest) pass an + /// explicit end pointer at construction. The unique_ptr cleans up on chunk + /// destruction, so PackedDecode's destructor no longer walks inStream. + /// Move-only — list::emplace_back constructs in place, so no copies needed. class ByteChunk { friend class PackedDecode; - uint1 *start; ///< Start of the byte array - uint1 *end; ///< End of the byte array + unique_ptr storage; ///< Owning storage for the byte array + uint1 *start; ///< Start of the valid window (points into storage) + uint1 *end; ///< End of the valid window (one past the last valid byte) public: - ByteChunk(uint1 *s,uint1 *e) { start = s; end = e; } ///< Constructor + ByteChunk(unique_ptr buf,uint1 *s,uint1 *e) : + storage(move(buf)), start(s), end(e) {} ///< Constructor }; /// \brief An iterator into input stream class Position { @@ -651,8 +664,9 @@ inline void PackedDecode::advancePosition(Position &pos,uint4 skip) inline uint1 *PackedDecode::allocateNextInputBuffer(int4 pad) { - uint1 *buf = new uint1[BUFFER_SIZE + pad]; - inStream.emplace_back(buf,buf+BUFFER_SIZE); + auto owned = make_unique(BUFFER_SIZE + pad); + uint1 *buf = owned.get(); + inStream.emplace_back(move(owned),buf,buf+BUFFER_SIZE); return buf; } diff --git a/gradle/cppRaiiAudit.gradle b/gradle/cppRaiiAudit.gradle index 94eaf3058ea..39059faf397 100644 --- a/gradle/cppRaiiAudit.gradle +++ b/gradle/cppRaiiAudit.gradle @@ -41,9 +41,13 @@ final Pattern RAW_NEW_PATTERN = Pattern.compile( // the address / space / rangeutil triangle that the audit named as // foundation files; subsequent stages extend the set. final Set PROTECTED_FILES = [ + // Rec 31 Stage 1 (#31-2) 'Ghidra/Features/Decompiler/src/decompile/cpp/address.cc', 'Ghidra/Features/Decompiler/src/decompile/cpp/space.cc', 'Ghidra/Features/Decompiler/src/decompile/cpp/rangeutil.cc', + // Rec 31 Stage 2 (#31-3) — marshal half + 'Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc', + 'Ghidra/Features/Decompiler/src/decompile/cpp/marshal.hh', ] as Set task cppRaiiAudit {