Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
## 2024-05-28 - PageRank Calculation Bottleneck on Massive Site Graphs
**Learning:** During iterative algorithms like PageRank or HITS, accessing node data via `Map.get(url)` inside nested loops results in significant performance degradation for massive site graphs. `Map` lookups add unpredictable overhead to the inner computation cycle.
**Action:** Always map generic unique identifiers (like `url`s) to zero-indexed integers (`urlToIndex`) before entering iterative hot loops. Use typed contiguous arrays (like `Float64Array`) and standard contiguous arrays (for adjacency lists) for purely O(1) buffer lookups during the computation, mapping results back to URLs only after convergence.

## 2024-05-29 - Array Allocation Bottlenecks in Graph Edge Iteration
**Learning:** Calling `.map()` or generating massive object arrays via methods like `graph.getEdges()` before passing data into core iterative hot loops (PageRank, HITS) introduces severe object allocation penalties and stalls the Garbage Collector. Direct inlining of the internal array map representations inside consumer methods (e.g., `hits.ts` parsing `\x00`) breaks object-oriented encapsulation and poses major maintenance risks.
**Action:** Use a memory-efficient traversal abstraction like `graph.forEachEdge((source, target, weight) => {...})` on the `Graph` class itself. This allows the graph structure to handle the fast string parsing of its internal composite keys inline and passes scalar arguments (strings/numbers) to the iterator callback, completely sidestepping intermediate array/object allocations while preserving strict architectural boundaries.
13 changes: 13 additions & 0 deletions packages/core/src/graph/graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,19 @@ export class Graph {
});
}

/**
* ⚑ Bolt: Iterates over all edges without allocating a new array of objects.
* Useful for high-performance iterative algorithms like PageRank and HITS.
*/
forEachEdge(callback: (source: string, target: string, weight: number) => void) {
for (const [edgeKey, weight] of this.edges.entries()) {
const splitIndex = edgeKey.indexOf('\x00');
const source = edgeKey.slice(0, splitIndex);
const target = edgeKey.slice(splitIndex + 1);
callback(source, target, weight);
}
}

toJSON() {
return {
nodes: this.getNodes(),
Expand Down
14 changes: 7 additions & 7 deletions packages/core/src/graph/hits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,19 @@ export class HITSService {
const incoming: { sourceIndex: number, weight: number }[][] = new Array(N).fill(null).map(() => []);
const outgoing: { targetIndex: number, weight: number }[][] = new Array(N).fill(null).map(() => []);

const allEdges = graph.getEdges();
for (const edge of allEdges) {
if (edge.source === edge.target) continue;
// ⚑ Bolt: Use memory-efficient iteration avoiding array allocations
graph.forEachEdge((source, target, edgeWeight) => {
if (source === target) return;

const sourceIndex = urlToIndex.get(edge.source);
const targetIndex = urlToIndex.get(edge.target);
const sourceIndex = urlToIndex.get(source);
const targetIndex = urlToIndex.get(target);

if (sourceIndex !== undefined && targetIndex !== undefined) {
const weight = edge.weight || 1.0;
const weight = edgeWeight || 1.0;
incoming[targetIndex].push({ sourceIndex, weight });
outgoing[sourceIndex].push({ targetIndex, weight });
}
}
});

// Initialize Scores
const authScores = new Float64Array(N).fill(1.0);
Expand Down
12 changes: 6 additions & 6 deletions packages/core/src/graph/pagerank.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ export class PageRankService {
const neutralScoreWhenFlat = options.neutralScoreWhenFlat ?? 50;

const allNodes = graph.getNodes();
const allEdges = graph.getEdges();

// 1. Filter Eligible Nodes
const eligibleNodes = allNodes.filter(node => {
Expand Down Expand Up @@ -71,16 +70,17 @@ export class PageRankService {
const outWeights = new Float64Array(nodeCount);
const incoming: { sourceIndex: number, weight: number }[][] = new Array(nodeCount).fill(null).map(() => []);

for (const edge of allEdges) {
const sourceIndex = urlToIndex.get(edge.source);
const targetIndex = urlToIndex.get(edge.target);
// ⚑ Bolt: Use memory-efficient iteration avoiding array allocations
graph.forEachEdge((source, target, edgeWeight) => {
const sourceIndex = urlToIndex.get(source);
const targetIndex = urlToIndex.get(target);

if (sourceIndex !== undefined && targetIndex !== undefined) {
const weight = edge.weight || 1.0;
const weight = edgeWeight || 1.0;
incoming[targetIndex].push({ sourceIndex, weight });
outWeights[sourceIndex] += weight;
}
}
});

// Identify sinks
const sinks: number[] = [];
Expand Down