From 53d44adf4cb4de12f553b72edabae89a42ceb8b0 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Fri, 15 May 2026 14:14:41 -0700 Subject: [PATCH 1/2] [CFG] Extend assert_structural_invariants with O(V+E) edge consistency The endpoint-only invariants (start_node / final_node in range, both non-null) don't catch the asymmetric-edge corruption typical of a pre-CFG IR transform producing malformed IR that build_cfg then turns into a CFG with mismatched prev/next pointers. Such corruption surfaces as a segfault deep in worklist propagation, not at the boundary. Add a second phase to assert_structural_invariants: for every node n and every successor m in n->next, require m->prev to contain n (and symmetrically for predecessors). Null entries are tolerated -- erase() clears entries before simplify_graph compacts, so the intermediate non-compact state must still validate. Cost is O(V+E), small for typical kernels and dominated by the analyses that follow; left always-on (release builds too) because the alternative on violation is silent corruption. --- quadrants/ir/control_flow_graph.cpp | 34 +++++++++++++++++++++++++++++ quadrants/ir/control_flow_graph.h | 14 ++++++++---- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/quadrants/ir/control_flow_graph.cpp b/quadrants/ir/control_flow_graph.cpp index 97ed176033..31c01bc6a8 100644 --- a/quadrants/ir/control_flow_graph.cpp +++ b/quadrants/ir/control_flow_graph.cpp @@ -1034,11 +1034,45 @@ void ControlFlowGraph::assert_structural_invariants() const { // worklist seeding `nodes[start_node]->reach_gen.insert(...)`, the DP scratch buffer indexed by // start_node, the dump-graph loop that skips start_node / final_node by index). If they ever // fail, the code following will segfault or silently corrupt -- catch it here instead. + + // (1) Endpoint invariants -- O(1). QD_ASSERT_INFO(!nodes.empty(), "ControlFlowGraph has no nodes"); QD_ASSERT_INFO(start_node >= 0 && start_node < (int)nodes.size(), "start_node out of range"); QD_ASSERT_INFO(final_node >= 0 && final_node < (int)nodes.size(), "final_node out of range"); QD_ASSERT_INFO(nodes[start_node] != nullptr, "start_node entry is null"); QD_ASSERT_INFO(nodes[final_node] != nullptr, "final_node entry is null"); + + // (2) Edge consistency -- O(V+E). For each forward edge n->next[k] == m, we require the + // corresponding back edge m->prev to contain n, and vice versa. This catches the dangling- + // pointer / asymmetric-edge corruption that surfaces when a pre-CFG pass (e.g. an unstructured + // control-flow normaliser like structure_continues) produces malformed IR -- precisely the + // failure mode that currently shows up as a segfault deep in worklist propagation rather than + // at the boundary. Null entries are tolerated: `erase()` clears entries before `simplify_graph` + // compacts, so a non-compact `nodes` vector with embedded nulls is a legal intermediate state. + for (std::size_t i = 0; i < nodes.size(); ++i) { + if (!nodes[i]) { + continue; + } + CFGNode *n = nodes[i].get(); + for (CFGNode *m : n->next) { + QD_ASSERT_INFO(m != nullptr, "CFG node {} has a null entry in `next`", i); + const bool back_link = + std::find(m->prev.begin(), m->prev.end(), n) != m->prev.end(); + QD_ASSERT_INFO(back_link, + "CFG edge asymmetry: node {} -> next contains a successor whose `prev` does " + "not list back to node {}", + i, i); + } + for (CFGNode *m : n->prev) { + QD_ASSERT_INFO(m != nullptr, "CFG node {} has a null entry in `prev`", i); + const bool fwd_link = + std::find(m->next.begin(), m->next.end(), n) != m->next.end(); + QD_ASSERT_INFO(fwd_link, + "CFG edge asymmetry: node {} <- prev contains a predecessor whose `next` " + "does not list forward to node {}", + i, i); + } + } } void ControlFlowGraph::erase(int node_id) { diff --git a/quadrants/ir/control_flow_graph.h b/quadrants/ir/control_flow_graph.h index a6834d9103..ce0c72b051 100644 --- a/quadrants/ir/control_flow_graph.h +++ b/quadrants/ir/control_flow_graph.h @@ -186,10 +186,16 @@ class ControlFlowGraph { // Erase an empty node. void erase(int node_id); - // Assert structural invariants that every whole-graph driver assumes: `nodes` non-empty, - // `start_node` and `final_node` in range, both endpoints actually allocated. Called from each - // public driver. Cheap (a handful of comparisons); leave on even in release builds since the - // alternative on violation is silent corruption. + // Assert structural invariants that every whole-graph driver assumes: + // (1) endpoint invariants -- nodes non-empty, start_node / final_node in range, both + // endpoints actually allocated (O(1)); + // (2) edge consistency -- for every node n and every successor m in n->next, n is in + // m->prev (and symmetrically for predecessors). This catches the dangling / asymmetric + // edges produced when a pre-CFG IR transform leaves the IR in a shape `analysis::build_cfg` + // cannot canonicalise (O(V+E)). + // Called from each public driver. Always-on (release builds too); the cost is dominated by the + // analyses that follow and the alternative on violation is silent corruption / a segfault deep + // in worklist processing. void assert_structural_invariants() const; public: From e1372d3c25fe06f9a5e771481289164e8d033a1d Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Fri, 15 May 2026 14:17:18 -0700 Subject: [PATCH 2/2] [CFG] Dump CFG to /tmp on assert_structural_invariants failure When a structural-invariant check fires, the assert message now points at a fresh file under /cfg_invariant_failures/structural_.txt containing the CFG state at the moment of violation -- node-by-node ranges, prev/next edges, intra-node next/prev links, the offending nodes' statements. Designed for post-mortem on the kind of failure that shows up after a pre-CFG IR transform (e.g. an unstructured-control-flow normaliser) produces a CFG that build_cfg cannot canonicalise: instead of "assert fired, now bisect from scratch" the user has a viewable graph at the exact failure point. The dumper is deliberately tolerant of malformed state. It is invoked only after a violation has been detected so the graph is by construction not internally consistent: null entries print as , per-node dump calls are individually try/catch wrapped so a single bad pointer doesn't take down the whole dump, and the outer try/catch handles filesystem failures. Returns the path on success or empty if the dump itself failed; the assert message picks "" in that case. Also fold the previously-one-per-violation QD_ASSERT_INFO calls into a single comprehensive message at the end -- the endpoint phase now collects all violations before bailing, so the user sees every broken endpoint at once instead of fixing them serially. Edge-consistency phase still short-circuits on first violation (subsequent edges may share corrupted pointers). --- quadrants/ir/control_flow_graph.cpp | 142 +++++++++++++++++++++++----- quadrants/ir/control_flow_graph.h | 13 ++- 2 files changed, 129 insertions(+), 26 deletions(-) diff --git a/quadrants/ir/control_flow_graph.cpp b/quadrants/ir/control_flow_graph.cpp index 31c01bc6a8..c2a2e022b7 100644 --- a/quadrants/ir/control_flow_graph.cpp +++ b/quadrants/ir/control_flow_graph.cpp @@ -1,5 +1,6 @@ #include "quadrants/ir/control_flow_graph.h" +#include #include #include #include @@ -1034,45 +1035,88 @@ void ControlFlowGraph::assert_structural_invariants() const { // worklist seeding `nodes[start_node]->reach_gen.insert(...)`, the DP scratch buffer indexed by // start_node, the dump-graph loop that skips start_node / final_node by index). If they ever // fail, the code following will segfault or silently corrupt -- catch it here instead. + // + // Collect every violation we can detect cheaply, then dump + assert in one go. The dump goes + // to /cfg_invariant_failures/structural_.txt and its path is included in the assert + // message so a post-mortem can pull up the actual CFG state that violated. - // (1) Endpoint invariants -- O(1). - QD_ASSERT_INFO(!nodes.empty(), "ControlFlowGraph has no nodes"); - QD_ASSERT_INFO(start_node >= 0 && start_node < (int)nodes.size(), "start_node out of range"); - QD_ASSERT_INFO(final_node >= 0 && final_node < (int)nodes.size(), "final_node out of range"); - QD_ASSERT_INFO(nodes[start_node] != nullptr, "start_node entry is null"); - QD_ASSERT_INFO(nodes[final_node] != nullptr, "final_node entry is null"); + std::vector errors; + + // (1) Endpoint invariants -- O(1). Cheap; collect all violations before bailing. + if (nodes.empty()) { + errors.emplace_back("nodes is empty"); + } + if (start_node < 0 || start_node >= (int)nodes.size()) { + errors.emplace_back(fmt::format("start_node {} out of range [0, {})", start_node, nodes.size())); + } + if (final_node < 0 || final_node >= (int)nodes.size()) { + errors.emplace_back(fmt::format("final_node {} out of range [0, {})", final_node, nodes.size())); + } + if (errors.empty()) { + if (nodes[start_node] == nullptr) { + errors.emplace_back(fmt::format("start_node entry (index {}) is null", start_node)); + } + if (nodes[final_node] == nullptr) { + errors.emplace_back(fmt::format("final_node entry (index {}) is null", final_node)); + } + } // (2) Edge consistency -- O(V+E). For each forward edge n->next[k] == m, we require the // corresponding back edge m->prev to contain n, and vice versa. This catches the dangling- // pointer / asymmetric-edge corruption that surfaces when a pre-CFG pass (e.g. an unstructured - // control-flow normaliser like structure_continues) produces malformed IR -- precisely the - // failure mode that currently shows up as a segfault deep in worklist propagation rather than - // at the boundary. Null entries are tolerated: `erase()` clears entries before `simplify_graph` - // compacts, so a non-compact `nodes` vector with embedded nulls is a legal intermediate state. - for (std::size_t i = 0; i < nodes.size(); ++i) { + // control-flow normaliser) produces malformed IR -- precisely the failure mode that surfaces + // as a segfault deep in worklist propagation rather than at the boundary. Null entries are + // tolerated: `erase()` clears entries before `simplify_graph` compacts, so a non-compact + // `nodes` vector with embedded nulls is a legal intermediate state. We short-circuit on the + // first edge violation: subsequent edges may share corrupted pointers and dereferencing them + // could itself segfault. + for (std::size_t i = 0; i < nodes.size() && errors.empty(); ++i) { if (!nodes[i]) { continue; } CFGNode *n = nodes[i].get(); for (CFGNode *m : n->next) { - QD_ASSERT_INFO(m != nullptr, "CFG node {} has a null entry in `next`", i); - const bool back_link = - std::find(m->prev.begin(), m->prev.end(), n) != m->prev.end(); - QD_ASSERT_INFO(back_link, - "CFG edge asymmetry: node {} -> next contains a successor whose `prev` does " - "not list back to node {}", - i, i); + if (m == nullptr) { + errors.emplace_back(fmt::format("node {} has a null entry in `next`", i)); + break; + } + if (std::find(m->prev.begin(), m->prev.end(), n) == m->prev.end()) { + errors.emplace_back( + fmt::format("edge asymmetry: node {} has a successor whose `prev` does not " + "list back to node {}", + i, i)); + break; + } + } + if (!errors.empty()) { + break; } for (CFGNode *m : n->prev) { - QD_ASSERT_INFO(m != nullptr, "CFG node {} has a null entry in `prev`", i); - const bool fwd_link = - std::find(m->next.begin(), m->next.end(), n) != m->next.end(); - QD_ASSERT_INFO(fwd_link, - "CFG edge asymmetry: node {} <- prev contains a predecessor whose `next` " - "does not list forward to node {}", - i, i); + if (m == nullptr) { + errors.emplace_back(fmt::format("node {} has a null entry in `prev`", i)); + break; + } + if (std::find(m->next.begin(), m->next.end(), n) == m->next.end()) { + errors.emplace_back( + fmt::format("edge asymmetry: node {} has a predecessor whose `next` does not " + "list forward to node {}", + i, i)); + break; + } } } + + if (errors.empty()) { + return; + } + const std::filesystem::path dump_path = dump_invariant_failure_to_temp_path("structural"); + std::string joined; + for (const auto &e : errors) { + joined += "\n - "; + joined += e; + } + QD_ASSERT_INFO(false, "CFG structural invariant failure(s):{}\nCFG state dumped to: {}", joined, + dump_path.empty() ? std::string("") : dump_path.string()); } void ControlFlowGraph::erase(int node_id) { @@ -1182,6 +1226,54 @@ void write_cfg_node_statements(std::ostream &out, const CFGNode *node) { } // namespace +std::filesystem::path ControlFlowGraph::dump_invariant_failure_to_temp_path( + const std::string &reason) const { + // Deliberately tolerant of malformed state (null entries, dangling edges, broken back-pointers): + // we are *only* called from `assert_structural_invariants` after a violation has been detected, + // so the graph state we are dumping is, by construction, not internally consistent. Any single + // stmt-write that throws is caught and replaced with a placeholder line so the rest of the dump + // still lands. The outer try/catch handles filesystem failures and similar. + try { + namespace fs = std::filesystem; + const fs::path dir = fs::temp_directory_path() / "cfg_invariant_failures"; + fs::create_directories(dir); + const auto ns = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + const fs::path filename = dir / fmt::format("{}_{}.txt", reason, ns); + std::ofstream out(filename); + if (!out) { + return {}; + } + out << "# CFG invariant failure dump\n"; + out << "# reason: " << reason << "\n"; + out << fmt::format("# start_node: {}\n", start_node); + out << fmt::format("# final_node: {}\n", final_node); + out << fmt::format("# nodes.size(): {}\n\n", nodes.size()); + + std::unordered_map to_index; + to_index.reserve(nodes.size()); + for (std::size_t i = 0; i < nodes.size(); ++i) { + to_index[nodes[i].get()] = static_cast(i); + } + for (std::size_t i = 0; i < nodes.size(); ++i) { + if (!nodes[i]) { + out << "NODE " << i << ": \n\n"; + continue; + } + try { + write_cfg_node_header(out, static_cast(i), nodes[i].get(), to_index); + write_cfg_node_statements(out, nodes[i].get()); + } catch (...) { + out << "NODE " << i << ": \n\n"; + } + } + return filename; + } catch (...) { + return {}; + } +} + void ControlFlowGraph::dump_graph_to_file(const CompileConfig &config, const std::string &kernel_name, const std::string &suffix) const { diff --git a/quadrants/ir/control_flow_graph.h b/quadrants/ir/control_flow_graph.h index ce0c72b051..7785f8e290 100644 --- a/quadrants/ir/control_flow_graph.h +++ b/quadrants/ir/control_flow_graph.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -195,9 +196,19 @@ class ControlFlowGraph { // cannot canonicalise (O(V+E)). // Called from each public driver. Always-on (release builds too); the cost is dominated by the // analyses that follow and the alternative on violation is silent corruption / a segfault deep - // in worklist processing. + // in worklist processing. On any violation, dumps the current CFG state to a temp file (see + // `dump_invariant_failure_to_temp_path`) and includes the path in the assertion message. void assert_structural_invariants() const; + // Best-effort dump of the current CFG state to a unique file under + // /cfg_invariant_failures/_.txt. Called from `assert_structural_invariants` + // when a violation is about to fire, so the dump runs against possibly-corrupt state -- the + // dump is deliberately tolerant of nulls, dangling edges, and broken back-pointers (it + // prints placeholders and continues, rather than segfaulting). Catches and swallows all + // exceptions; the caller is about to abort. Returns the dump path on success, or an empty + // path if the dump itself failed. + std::filesystem::path dump_invariant_failure_to_temp_path(const std::string &reason) const; + public: struct LiveVarAnalysisConfig { // This is mostly useful for SFG task-level dead store elimination. SFG may