diff --git a/.jules/bolt.md b/.jules/bolt.md index 68fc89b..fcf37a1 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -2,3 +2,7 @@ ## 2024-05-28 - PageRank Calculation Bottleneck on Massive Site Graphs **Learning:** During iterative algorithms like PageRank or HITS, accessing node data via `Map.get(url)` inside nested loops results in significant performance degradation for massive site graphs. `Map` lookups add unpredictable overhead to the inner computation cycle. **Action:** Always map generic unique identifiers (like `url`s) to zero-indexed integers (`urlToIndex`) before entering iterative hot loops. Use typed contiguous arrays (like `Float64Array`) and standard contiguous arrays (for adjacency lists) for purely O(1) buffer lookups during the computation, mapping results back to URLs only after convergence. + +## 2024-06-05 - Avoid .getEdges() Array Allocations in Hot Paths +**Learning:** Generating the edge array via `graph.getEdges()` inside `calculateMetrics()` was resulting in excessive memory allocations (O(E) new objects per call) due to mapping over `graph.edges.entries()` and invoking `parseEdgeKey`. This caused unnecessary GC pressure for functions only needing edge connectivity info. +**Action:** When only needing source/target relationships, iterate over `graph.edges.keys()` and inline the string splitting (`indexOf('\x00')`) to avoid intermediate object allocation entirely. diff --git a/packages/core/src/graph/metrics.ts b/packages/core/src/graph/metrics.ts index 622cd96..9263d78 100644 --- a/packages/core/src/graph/metrics.ts +++ b/packages/core/src/graph/metrics.ts @@ -23,23 +23,27 @@ export interface Metrics { export function calculateMetrics(graph: Graph, _maxDepth: number): Metrics { const nodes = graph.getNodes(); - const edges = graph.getEdges(); const totalPages = nodes.length; - const totalEdges = edges.length; + const totalEdges = graph.edges.size; // Identify broken nodes const brokenNodes = new Set(nodes.filter(n => n.status >= 400 || n.status === 0).map(n => n.url)); + // ⚡ Bolt: Fast parsing of string edge keys instead of mapping over objects for O(1) string splits inside hot loop // Pre-compute outgoing edges per node for faster lookup const outgoingEdges = new Map(); - for (const edge of edges) { - let targets = outgoingEdges.get(edge.source); + for (const edgeKey of graph.edges.keys()) { + const splitIndex = edgeKey.indexOf('\x00'); + const source = edgeKey.slice(0, splitIndex); + const target = edgeKey.slice(splitIndex + 1); + + let targets = outgoingEdges.get(source); if (!targets) { targets = []; - outgoingEdges.set(edge.source, targets); + outgoingEdges.set(source, targets); } - targets.push(edge.target); + targets.push(target); } // Populate brokenLinks per node