diff --git a/.jules/bolt.md b/.jules/bolt.md index 68fc89b..77976cd 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -2,3 +2,7 @@ ## 2024-05-28 - PageRank Calculation Bottleneck on Massive Site Graphs **Learning:** During iterative algorithms like PageRank or HITS, accessing node data via `Map.get(url)` inside nested loops results in significant performance degradation for massive site graphs. `Map` lookups add unpredictable overhead to the inner computation cycle. **Action:** Always map generic unique identifiers (like `url`s) to zero-indexed integers (`urlToIndex`) before entering iterative hot loops. Use typed contiguous arrays (like `Float64Array`) and standard contiguous arrays (for adjacency lists) for purely O(1) buffer lookups during the computation, mapping results back to URLs only after convergence. + +## 2024-05-29 - Array Allocation Bottlenecks in Graph Edge Iteration +**Learning:** Calling `.map()` or generating massive object arrays via methods like `graph.getEdges()` before passing data into core iterative hot loops (PageRank, HITS) introduces severe object allocation penalties and stalls the Garbage Collector. Direct inlining of the internal array map representations inside consumer methods (e.g., `hits.ts` parsing `\x00`) breaks object-oriented encapsulation and poses major maintenance risks. +**Action:** Use a memory-efficient traversal abstraction like `graph.forEachEdge((source, target, weight) => {...})` on the `Graph` class itself. This allows the graph structure to handle the fast string parsing of its internal composite keys inline and passes scalar arguments (strings/numbers) to the iterator callback, completely sidestepping intermediate array/object allocations while preserving strict architectural boundaries. diff --git a/packages/core/src/graph/graph.ts b/packages/core/src/graph/graph.ts index 81be83d..bcdeeb3 100644 --- a/packages/core/src/graph/graph.ts +++ b/packages/core/src/graph/graph.ts @@ -165,6 +165,19 @@ export class Graph { }); } + /** + * ⚡ Bolt: Iterates over all edges without allocating a new array of objects. + * Useful for high-performance iterative algorithms like PageRank and HITS. + */ + forEachEdge(callback: (source: string, target: string, weight: number) => void) { + for (const [edgeKey, weight] of this.edges.entries()) { + const splitIndex = edgeKey.indexOf('\x00'); + const source = edgeKey.slice(0, splitIndex); + const target = edgeKey.slice(splitIndex + 1); + callback(source, target, weight); + } + } + toJSON() { return { nodes: this.getNodes(), diff --git a/packages/core/src/graph/hits.ts b/packages/core/src/graph/hits.ts index 0319ff1..533324d 100644 --- a/packages/core/src/graph/hits.ts +++ b/packages/core/src/graph/hits.ts @@ -48,19 +48,19 @@ export class HITSService { const incoming: { sourceIndex: number, weight: number }[][] = new Array(N).fill(null).map(() => []); const outgoing: { targetIndex: number, weight: number }[][] = new Array(N).fill(null).map(() => []); - const allEdges = graph.getEdges(); - for (const edge of allEdges) { - if (edge.source === edge.target) continue; + // ⚡ Bolt: Use memory-efficient iteration avoiding array allocations + graph.forEachEdge((source, target, edgeWeight) => { + if (source === target) return; - const sourceIndex = urlToIndex.get(edge.source); - const targetIndex = urlToIndex.get(edge.target); + const sourceIndex = urlToIndex.get(source); + const targetIndex = urlToIndex.get(target); if (sourceIndex !== undefined && targetIndex !== undefined) { - const weight = edge.weight || 1.0; + const weight = edgeWeight || 1.0; incoming[targetIndex].push({ sourceIndex, weight }); outgoing[sourceIndex].push({ targetIndex, weight }); } - } + }); // Initialize Scores const authScores = new Float64Array(N).fill(1.0); diff --git a/packages/core/src/graph/pagerank.ts b/packages/core/src/graph/pagerank.ts index c84a9a8..d210bf7 100644 --- a/packages/core/src/graph/pagerank.ts +++ b/packages/core/src/graph/pagerank.ts @@ -33,7 +33,6 @@ export class PageRankService { const neutralScoreWhenFlat = options.neutralScoreWhenFlat ?? 50; const allNodes = graph.getNodes(); - const allEdges = graph.getEdges(); // 1. Filter Eligible Nodes const eligibleNodes = allNodes.filter(node => { @@ -71,16 +70,17 @@ export class PageRankService { const outWeights = new Float64Array(nodeCount); const incoming: { sourceIndex: number, weight: number }[][] = new Array(nodeCount).fill(null).map(() => []); - for (const edge of allEdges) { - const sourceIndex = urlToIndex.get(edge.source); - const targetIndex = urlToIndex.get(edge.target); + // ⚡ Bolt: Use memory-efficient iteration avoiding array allocations + graph.forEachEdge((source, target, edgeWeight) => { + const sourceIndex = urlToIndex.get(source); + const targetIndex = urlToIndex.get(target); if (sourceIndex !== undefined && targetIndex !== undefined) { - const weight = edge.weight || 1.0; + const weight = edgeWeight || 1.0; incoming[targetIndex].push({ sourceIndex, weight }); outWeights[sourceIndex] += weight; } - } + }); // Identify sinks const sinks: number[] = [];