From c89b97ed3d70976bc85f79d9c025c86b3b10211b Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 7 Jun 2026 16:38:43 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20transitive=5Fcal?= =?UTF-8?q?lers=20with=20set=20difference?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: tachyon-beep <544926+tachyon-beep@users.noreply.github.com> --- .jules/bolt.md | 3 +++ .../scanner/taint/reverse_edge_index.py | 20 +++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000..c6e84f7b --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2026-06-07 - Python loop overhead in graph traversal +**Learning:** In highly connected graphs within a static analysis engine, looping over sets in Python (`for mod in callers: if mod not in closure: ...`) can introduce significant bytecode execution overhead. +**Action:** Replace Python loops checking membership with fast C-level set operations (e.g., `callers - closure`) and use list-based stacks instead of intermediate set constructions for graph frontiers. This avoids O(N) Python iteration in hot paths. diff --git a/src/wardline/scanner/taint/reverse_edge_index.py b/src/wardline/scanner/taint/reverse_edge_index.py index d7ec7d51..70935e47 100644 --- a/src/wardline/scanner/taint/reverse_edge_index.py +++ b/src/wardline/scanner/taint/reverse_edge_index.py @@ -66,15 +66,15 @@ def callers_of(self, callee_module: str) -> frozenset[str]: def transitive_callers(self, seeds: frozenset[str]) -> frozenset[str]: """``seeds`` plus every transitively-reverse-reachable module.""" closure: set[str] = set(seeds) - frontier: set[str] = set(seeds) + frontier: list[str] = list(seeds) + get_callers = self._reverse.get + while frontier: - next_frontier: set[str] = set() - for mod in frontier: - if mod not in self._reverse: - continue - for caller_mod in self._reverse[mod]: - if caller_mod not in closure: - closure.add(caller_mod) - next_frontier.add(caller_mod) - frontier = next_frontier + mod = frontier.pop() + callers = get_callers(mod) + if callers: + new_callers = callers - closure + if new_callers: + closure.update(new_callers) + frontier.extend(new_callers) return frozenset(closure)