From fdb48f1b32e0c3ab6c9404728accbbdce3e2e514 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Tue, 26 May 2026 01:19:29 -0500 Subject: [PATCH 1/2] ci(codeql): manual c-cpp build replaces autobuild MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeQL's autobuilder has been failing on every PR (including doc-only ones — see #44, #47, #48) with: cpp/autobuilder: Incompatible operating system (expected Windows). cpp/autobuilder: No supported build system detected. The autobuilder scans the repo root for a recognized build file (Makefile, CMakeLists.txt, etc.); the decompiler's Makefile lives at Ghidra/Features/Decompiler/src/decompile/cpp/, so the scan fails and the c-cpp matrix leg goes red even when no C++ source is touched. The actual Java/Kotlin, Actions, and Python legs have always been fine; only c-cpp was affected. Fix: replace the `github/codeql-action/autobuild@v3` step with an explicit `make libdecomp_dbg.a` invocation that cd's into the decompiler tree. libdecomp_dbg.a is the static-archive target that compiles every LIBDECOMP_NAMES source into com_dbg/*.o then `ar qc`s them; it does NOT need BFD at link time (no -lbfd dependency), so binutils-dev / libiberty-dev drop out of the apt-get list too. CodeQL's tracer picks up the .o compile commands during the make invocation, which is exactly the input static analysis needs. The autobuild step stays for the `actions` and `python` matrix legs (it works fine for both — actions is just YAML, python doesn't need a build at all). Proudly Made in Nebraska. Go Big Red! 🌽 https://xkcd.com/2347/ Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/codeql.yml | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 4ae52284f8a..bf610a684bf 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -74,9 +74,15 @@ jobs: - name: Install C++ build deps if: matrix.language == 'c-cpp' + # bison + flex are needed to regenerate xml.cc / slghparse.cc / + # slghscan.cc from their .y / .l sources during the manual + # build below. The decompiler's actual link step (decomp_dbg) + # would want binutils-dev + libiberty-dev for BFD, but we don't + # link here — only compile to .o files for CodeQL to extract, + # which doesn't need BFD. run: | sudo apt-get update - sudo apt-get install -y bison flex g++ make binutils-dev libiberty-dev + sudo apt-get install -y bison flex g++ make - name: Initialize CodeQL uses: github/codeql-action/init@v3 @@ -97,8 +103,23 @@ jobs: # irrelevant to static analysis. run: gradle prepDev --parallel - - name: Autobuild (non-Java) - if: matrix.language != 'java-kotlin' + - name: Manual C++ build + if: matrix.language == 'c-cpp' + # Autobuild fails for this repo because the decompiler's + # Makefile lives at Ghidra/Features/Decompiler/src/decompile/cpp/ + # rather than at the repo root, so `cpp/autobuilder.sh` reports + # "No supported build system detected" and exits 1. Build the + # static library `libdecomp_dbg.a` instead — that target + # compiles every C++ source file in the decompiler core (all + # LIBDECOMP_NAMES → com_dbg/*.o, then ar qc into the archive) + # without needing BFD at link time. CodeQL's tracer picks up + # the .o compile commands, which is what static analysis needs. + run: | + cd Ghidra/Features/Decompiler/src/decompile/cpp + make libdecomp_dbg.a + + - name: Autobuild (other languages) + if: matrix.language != 'java-kotlin' && matrix.language != 'c-cpp' uses: github/codeql-action/autobuild@v3 - name: Perform CodeQL Analysis From 1516b436780702a53622568ba7975c58c5563be8 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Tue, 26 May 2026 01:37:57 -0500 Subject: [PATCH 2/2] =?UTF-8?q?feat(decompiler):=20Rec=2031=20#31-3=20RAII?= =?UTF-8?q?=20Stage=202B=20=E2=80=94=20XmlScan::lvalue=20unique=5Fptr=20mi?= =?UTF-8?q?gration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Converts XmlScan's per-token string buffer from a raw owning pointer to unique_ptr. This eliminates 7 raw `new string()` allocation sites (one per scan* mode helper) and the manual `delete` in ~XmlScan / clearlvalue, replacing them with make_unique + the unique_ptr destructor's automatic cleanup. The single ownership-transfer point (lval(), called by bison's yylex to hand the string up to the parser value stack) uses unique_ptr::release() so the bison side's existing raw-pointer ownership convention is unchanged. Before: class XmlScan { string *lvalue; // raw owning pointer ... string *lval(void) { string *ret = lvalue; lvalue = (string *)0; // manual null-out return ret; } void clearlvalue(void) { if (lvalue != (string *)0) // manual null-check delete lvalue; // manual delete } }; // Allocate (×7 across scan* helpers): lvalue = new string(); After: class XmlScan { unique_ptr lvalue; // owning RAII handle ... string *lval(void) { return lvalue.release(); // ownership transfer; lvalue → nullptr } void clearlvalue(void) { lvalue.reset(); // null-safe, automatic delete } }; // Allocate (×7): lvalue = make_unique(); Bison sync: xml.y is the grammar source; xml.cc is the generated parser. The edits are entirely in the epilogue / prologue %{...%} regions that bison copies verbatim, so both files get the identical hand-edit and stay in lockstep without invoking bison. The repo's xml.cc was generated by bison 3.0.4; the local box has 3.8.2 and a clean regeneration would produce huge unrelated diff — keeping the parallel hand-edit avoids that. xml.hh picks up and the make_unique / move / unique_ptr usings alongside the existing std:: aliases. Scope intentionally limited: - Semantic-action raw `new`s (xml.y:150, 153, 198, 200, 208 and the parallel sites in xml.cc's yyparse) are NOT touched in this PR. They live inside bison's table-driven yyparse and a clean migration requires actual bison-3.0.4 regeneration; that's a separate follow-up PR (Stage 2C). - The big-object epilogue raw new's (line 525 `new XmlScan`, line 538 `new Element`, line 624 `new Document`) require API redesign across xml.hh + callers and are separate PRs of their own. - cppRaiiAudit's PROTECTED_FILES does NOT yet include xml.cc / xml.hh / xml.y — adding them now would fail the gate on the still-present semantic-action raw new's. Add after Stage 2C lands. Tests: this is functionally a no-op refactor. The behavior change is "manual delete replaced by unique_ptr destructor"; both forms free the string on exactly the same code paths. Marshal RAII Stage 2A (PR #46) had the same shape and passed C++ unit tests + ASan + UBSan. Proudly Made in Nebraska. Go Big Red! 🌽 https://xkcd.com/2347/ Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Decompiler/src/decompile/cpp/xml.cc | 23 +++++++++---------- .../Decompiler/src/decompile/cpp/xml.hh | 4 ++++ .../Decompiler/src/decompile/cpp/xml.y | 23 +++++++++---------- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/xml.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/xml.cc index 6831da5a65c..13bf3ccf233 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/xml.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/xml.cc @@ -127,7 +127,7 @@ class XmlScan { private: mode curmode; ///< The current scanning mode istream &s; ///< The stream being scanned - string *lvalue; ///< Current string being built + unique_ptr lvalue; ///< Current string being built; nullptr when no token in progress int4 lookahead[4]; ///< Lookahead into the byte stream int4 pos; ///< Current position in the lookahead buffer bool endofstream; ///< Has end of stream been reached @@ -173,7 +173,7 @@ class XmlScan { ~XmlScan(void); ///< Destructor void setmode(mode m) { curmode = m; } ///< Set the scanning mode int4 nexttoken(void); ///< Get the next token - string *lval(void) { string *ret = lvalue; lvalue = (string *)0; return ret; } ///< Return the last \e lvalue string + string *lval(void) { return lvalue.release(); } ///< Transfer ownership of the last \e lvalue string to the caller (typically yylval on the bison value stack); leaves lvalue null. Caller deletes. }; /// \brief A parsed name/value pair @@ -2081,7 +2081,7 @@ XmlScan::XmlScan(istream &t) : s(t) { curmode = SingleMode; - lvalue = (string *)0; + // lvalue (unique_ptr) default-constructs to nullptr; no explicit init needed. pos = 0; endofstream = false; getxmlchar(); getxmlchar(); getxmlchar(); getxmlchar(); // Fill lookahead buffer @@ -2096,8 +2096,7 @@ XmlScan::~XmlScan(void) void XmlScan::clearlvalue(void) { - if (lvalue != (string *)0) - delete lvalue; + lvalue.reset(); } int4 XmlScan::scanSingle(void) @@ -2115,7 +2114,7 @@ int4 XmlScan::scanCharData(void) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); while(next(0) != -1) { // look for '<' '&' or ']]>' if (next(0) == '<') break; @@ -2135,7 +2134,7 @@ int4 XmlScan::scanCData(void) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); while(next(0) != -1) { // Look for "]]>" and non-Char if (next(0)==']') @@ -2153,7 +2152,7 @@ int4 XmlScan::scanCharRef(void) { int4 v; clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); if (next(0) == 'x') { *lvalue += getxmlchar(); while(next(0) != -1) { @@ -2184,7 +2183,7 @@ int4 XmlScan::scanAttValue(int4 quote) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); while(next(0) != -1) { if (next(0) == quote) break; if (next(0) == '<') break; @@ -2200,7 +2199,7 @@ int4 XmlScan::scanComment(void) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); while(next(0) != -1) { if (next(0)=='-') @@ -2216,7 +2215,7 @@ int4 XmlScan::scanName(void) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); if (!isInitialNameChar(next(0))) return scanSingle(); @@ -2237,7 +2236,7 @@ int4 XmlScan::scanSName(void) getxmlchar(); } clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); if (!isInitialNameChar(next(0))) { // First non-whitespace is not Name char if (whitecount > 0) return ' '; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/xml.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/xml.hh index 72533012a13..78f7bfe52fd 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/xml.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/xml.hh @@ -21,13 +21,17 @@ #include "types.h" #include #include +#include #include #include #include namespace ghidra { +using std::make_unique; +using std::move; using std::string; +using std::unique_ptr; using std::vector; using std::map; using std::istream; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/xml.y b/Ghidra/Features/Decompiler/src/decompile/cpp/xml.y index 222e9d495ec..f50093b8c31 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/xml.y +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/xml.y @@ -54,7 +54,7 @@ public: private: mode curmode; ///< The current scanning mode istream &s; ///< The stream being scanned - string *lvalue; ///< Current string being built + unique_ptr lvalue; ///< Current string being built; nullptr when no token in progress int4 lookahead[4]; ///< Lookahead into the byte stream int4 pos; ///< Current position in the lookahead buffer bool endofstream; ///< Has end of stream been reached @@ -100,7 +100,7 @@ public: ~XmlScan(void); ///< Destructor void setmode(mode m) { curmode = m; } ///< Set the scanning mode int4 nexttoken(void); ///< Get the next token - string *lval(void) { string *ret = lvalue; lvalue = (string *)0; return ret; } ///< Return the last \e lvalue string + string *lval(void) { return lvalue.release(); } ///< Transfer ownership of the last \e lvalue string to the caller (typically yylval on the bison value stack); leaves lvalue null. Caller deletes. }; /// \brief A parsed name/value pair @@ -223,7 +223,7 @@ XmlScan::XmlScan(istream &t) : s(t) { curmode = SingleMode; - lvalue = (string *)0; + // lvalue (unique_ptr) default-constructs to nullptr; no explicit init needed. pos = 0; endofstream = false; getxmlchar(); getxmlchar(); getxmlchar(); getxmlchar(); // Fill lookahead buffer @@ -238,8 +238,7 @@ XmlScan::~XmlScan(void) void XmlScan::clearlvalue(void) { - if (lvalue != (string *)0) - delete lvalue; + lvalue.reset(); } int4 XmlScan::scanSingle(void) @@ -257,7 +256,7 @@ int4 XmlScan::scanCharData(void) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); while(next(0) != -1) { // look for '<' '&' or ']]>' if (next(0) == '<') break; @@ -277,7 +276,7 @@ int4 XmlScan::scanCData(void) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); while(next(0) != -1) { // Look for "]]>" and non-Char if (next(0)==']') @@ -295,7 +294,7 @@ int4 XmlScan::scanCharRef(void) { int4 v; clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); if (next(0) == 'x') { *lvalue += getxmlchar(); while(next(0) != -1) { @@ -326,7 +325,7 @@ int4 XmlScan::scanAttValue(int4 quote) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); while(next(0) != -1) { if (next(0) == quote) break; if (next(0) == '<') break; @@ -342,7 +341,7 @@ int4 XmlScan::scanComment(void) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); while(next(0) != -1) { if (next(0)=='-') @@ -358,7 +357,7 @@ int4 XmlScan::scanName(void) { clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); if (!isInitialNameChar(next(0))) return scanSingle(); @@ -379,7 +378,7 @@ int4 XmlScan::scanSName(void) getxmlchar(); } clearlvalue(); - lvalue = new string(); + lvalue = make_unique(); if (!isInitialNameChar(next(0))) { // First non-whitespace is not Name char if (whitecount > 0) return ' ';