From 0b4e8e290489fbef4c76c37ecde319f7769abd41 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 5 Jun 2026 17:44:21 -0600 Subject: [PATCH 1/5] fix(native): persist this/super dispatch via hybrid WASM post-pass (#1326) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The native orchestrator resolves typed receiver calls but does not persist raw unresolved call site receiver info (this/super) to the DB, so runPostNativeCha could not resolve this.method() or super.method() calls. Add runPostNativeThisDispatch: after the Rust pipeline completes, WASM-re- parses JS/TS/TSX files to collect call sites with this/super receivers, then resolves them through the DB class hierarchy (extends edges) using the existing resolveThisDispatch function. Only runs when extends edges exist. Removes the skipIf(engine === 'native') guards on the this-dispatch and super-dispatch integration tests — both engines now produce identical edges for ConcreteWorker.doWork → ConcreteWorker.prepare and Lion.speak → Animal.speak. The two CHA transitive skips remain (pending abstract_class_ declaration fix in a future native binary). Closes #1326 --- .../builder/stages/native-orchestrator.ts | 178 +++++++++++++++++- .../phase-8.5-cha-dispatch.test.ts | 35 ++-- 2 files changed, 186 insertions(+), 27 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index a3903cd3..fa76145a 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -42,6 +42,9 @@ import { parseFilesWasmForBackfill, } from '../../../parser.js'; import { computeConfidence } from '../../resolve.js'; +import type { CallNodeLookup } from '../call-resolver.js'; +import type { ChaContext } from '../cha.js'; +import { resolveThisDispatch } from '../cha.js'; import type { PipelineContext } from '../context.js'; import { batchInsertEdges, @@ -394,9 +397,8 @@ async function runPostNativeAnalysis( * each call to an interface/abstract method to ALL RTA-filtered concrete * implementations. * - * Note: `this`/`super` dispatch requires the raw unresolved call sites which are - * not persisted to the DB by the Rust pipeline. That case is handled by the WASM - * path (`buildFileCallEdges`) and is a known gap for the native orchestrator. + * Note: `this`/`super` dispatch is handled separately by `runPostNativeThisDispatch`, + * which WASM-re-parses JS/TS files to obtain raw call site receiver info. * * Returns the set of target node IDs for newly inserted CHA edges so the caller * can re-classify roles for the affected implementation files. An empty set @@ -556,6 +558,163 @@ function runPostNativeCha(db: BetterSqlite3Database): Set { return newTargetIds; } +// Extensions where `this`/`super` dispatch can occur (JS/TS family) +const THIS_DISPATCH_EXTS = new Set(['.js', '.ts', '.tsx', '.jsx', '.mjs', '.cjs', '.mts', '.cts']); + +/** + * Phase 8.5: this/super dispatch post-pass for the native orchestrator path. + * + * The Rust build pipeline resolves typed receiver calls but does NOT persist raw + * unresolved call site receiver info (e.g. `this`, `super`) to the DB. This + * hybrid post-pass re-parses JS/TS/TSX files via WASM to collect call sites with + * `this`/`super` receivers, then resolves them through the class hierarchy stored + * in DB `extends` edges — mirroring what `buildChaPostPass` does on the WASM path. + * + * Only runs when `extends` edges exist in the DB; if there is no inheritance + * hierarchy there is nothing to resolve via `this`/`super` dispatch. + */ +async function runPostNativeThisDispatch( + db: BetterSqlite3Database, + rootDir: string, + changedFiles: string[] | undefined, + isFullBuild: boolean, +): Promise { + // Fast guard: need at least one extends edge for this/super to have meaning + const hasExtends = db.prepare(`SELECT 1 FROM edges WHERE kind = 'extends' LIMIT 1`).get(); + if (!hasExtends) return; + + // Build parents map: child class → direct parent class (from `extends` edges) + const parentRows = db + .prepare(` + SELECT src.name AS child_name, tgt.name AS parent_name + FROM edges e + JOIN nodes src ON e.source_id = src.id + JOIN nodes tgt ON e.target_id = tgt.id + WHERE e.kind = 'extends' + `) + .all() as Array<{ child_name: string; parent_name: string }>; + + const parents = new Map(); + for (const row of parentRows) { + if (!parents.has(row.child_name)) parents.set(row.child_name, row.parent_name); + } + if (parents.size === 0) return; + + const chaCtx: ChaContext = { + implementors: new Map(), // not needed for this/super resolution + parents, + instantiatedTypes: new Set(), // not needed for this/super resolution + }; + + // Determine which files to re-parse (JS/TS family only) + let relFiles: string[]; + if (isFullBuild || !changedFiles) { + const rows = db + .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file' AND file IS NOT NULL") + .all() as Array<{ file: string }>; + relFiles = rows + .map((r) => r.file) + .filter((f) => THIS_DISPATCH_EXTS.has(path.extname(f).toLowerCase())); + } else { + relFiles = changedFiles.filter((f) => THIS_DISPATCH_EXTS.has(path.extname(f).toLowerCase())); + } + if (relFiles.length === 0) return; + + // DB-backed CallNodeLookup — resolveThisDispatch only calls byName() + const findByNameStmt = db.prepare(`SELECT id, file, kind FROM nodes WHERE name = ?`); + const lookup: CallNodeLookup = { + byName: (name) => findByNameStmt.all(name) as Array<{ id: number; file: string; kind: string }>, + byNameAndFile: (name, file) => + (findByNameStmt.all(name) as Array<{ id: number; file: string; kind: string }>).filter( + (n) => n.file === file, + ), + isBarrel: () => false, + resolveBarrel: () => null, + nodeId: () => undefined, + }; + + // Seed seen-pairs from existing call edges on source nodes in our file set + const seen = new Set(); + const CHUNK = 500; + for (let i = 0; i < relFiles.length; i += CHUNK) { + const chunk = relFiles.slice(i, i + CHUNK); + const ph = chunk.map(() => '?').join(','); + const rows = db + .prepare( + `SELECT e.source_id, e.target_id + FROM edges e + JOIN nodes n ON e.source_id = n.id + WHERE e.kind = 'calls' AND n.file IN (${ph})`, + ) + .all(...chunk) as Array<{ source_id: number; target_id: number }>; + for (const r of rows) seen.add(`${r.source_id}|${r.target_id}`); + } + + // Find the innermost containing method/function for a call at `line` in `file`. + // NULL end_line sorts last in SQLite ASC → only selected when no bounded node exists. + const findCallerByLineStmt = db.prepare(` + SELECT id, name FROM nodes + WHERE file = ? AND kind IN ('method', 'function') + AND line <= ? AND (end_line IS NULL OR end_line >= ?) + ORDER BY (end_line - line) ASC + LIMIT 1 + `); + + // WASM-parse the files to obtain raw call sites with receiver info + const absFiles = relFiles.map((f) => path.join(rootDir, f)); + const wasmResults = await parseFilesWasmForBackfill(absFiles, rootDir); + + const newEdges: Array<[number, number, string, number, number, string]> = []; + + for (const [relPath, symbols] of wasmResults) { + for (const call of symbols.calls) { + if (call.receiver !== 'this' && call.receiver !== 'self' && call.receiver !== 'super') + continue; + + const callerRow = findCallerByLineStmt.get(relPath, call.line, call.line) as + | { id: number; name: string } + | undefined; + if (!callerRow) continue; + + const targets = resolveThisDispatch( + call.name, + callerRow.name, + call.receiver as 'this' | 'self' | 'super', + chaCtx, + lookup, + ); + + for (const t of targets) { + const key = `${callerRow.id}|${t.id}`; + if (seen.has(key)) continue; + seen.add(key); + const conf = computeConfidence(relPath, t.file, null) - 0.1; + if (conf <= 0) continue; + newEdges.push([callerRow.id, t.id, 'calls', conf, 0, 'cha']); + } + } + } + + if (newEdges.length > 0) { + db.transaction(() => batchInsertEdges(db, newEdges))(); + debug(`this/super dispatch post-pass: inserted ${newEdges.length} edge(s)`); + } + + // Free WASM parse trees — mirrors the cleanup in backfillNativeDroppedFiles + for (const [, symbols] of wasmResults) { + const tree = (symbols as { _tree?: { delete?: () => void } })._tree; + if (tree && typeof tree.delete === 'function') { + try { + tree.delete(); + } catch { + /* ignore cleanup errors */ + } + } + (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined; + (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined; + } +} + /** Format timing result from native orchestrator phases + JS post-processing. */ function formatNativeTimingResult( p: Record, @@ -1193,10 +1352,19 @@ export async function tryNativeOrchestrator( } } + // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites + // whose raw receiver info the Rust pipeline does not persist to DB. + await runPostNativeThisDispatch( + ctx.db as unknown as BetterSqlite3Database, + ctx.rootDir, + result.changedFiles, + !!result.isFullBuild, + ); + // Backfill the `technique` column on `calls` edges written by the Rust // orchestrator, which does not write the column. Runs after all edge-writing - // phases (including the WASM dropped-language backfill and CHA post-pass) so - // every new edge in this build cycle gets a technique label. + // phases (including the WASM dropped-language backfill, CHA post-pass, and + // this/super dispatch) so every new edge in this build cycle gets a label. backfillEdgeTechniquesAfterNativeOrchestrator(ctx.db, !!result.isFullBuild, result.changedFiles); // ── Structure and analysis fallback (run after edge-writing so roles see full graph) ── diff --git a/tests/integration/phase-8.5-cha-dispatch.test.ts b/tests/integration/phase-8.5-cha-dispatch.test.ts index edb51f9a..b5701e5a 100644 --- a/tests/integration/phase-8.5-cha-dispatch.test.ts +++ b/tests/integration/phase-8.5-cha-dispatch.test.ts @@ -119,32 +119,23 @@ describe.each(ENGINES)('Phase 8.5 CHA dispatch (%s)', (engine) => { }); // ── this-dispatch ────────────────────────────────────────────────────── - // The WASM path resolves `this.prepare()` through the class hierarchy via - // the inline CHA dispatch in buildFileCallEdges. The native orchestrator - // path does not persist raw call sites to the DB, so this-dispatch is a - // known gap for native — tested only for wasm. - it.skipIf(engine === 'native')( - 'this-dispatch: emits ConcreteWorker.doWork → ConcreteWorker.prepare', - () => { - const edge = callEdges.find( - (e) => - e.caller_name === 'ConcreteWorker.doWork' && - e.callee_name === 'ConcreteWorker.prepare' && - e.callee_file === 'ConcreteWorker.ts', - ); - expect( - edge, - `Expected ConcreteWorker.doWork → ConcreteWorker.prepare edge (this-dispatch).\nActual edges:\n${JSON.stringify(callEdges, null, 2)}`, - ).toBeDefined(); - }, - ); + it('this-dispatch: emits ConcreteWorker.doWork → ConcreteWorker.prepare', () => { + const edge = callEdges.find( + (e) => + e.caller_name === 'ConcreteWorker.doWork' && + e.callee_name === 'ConcreteWorker.prepare' && + e.callee_file === 'ConcreteWorker.ts', + ); + expect( + edge, + `Expected ConcreteWorker.doWork → ConcreteWorker.prepare edge (this-dispatch).\nActual edges:\n${JSON.stringify(callEdges, null, 2)}`, + ).toBeDefined(); + }); // ── super-dispatch ───────────────────────────────────────────────────── - // Same gap as this-dispatch: super.speak() cannot be resolved from DB edges - // alone in the native orchestrator path. - it.skipIf(engine === 'native')('super-dispatch: emits Lion.speak → Animal.speak', () => { + it('super-dispatch: emits Lion.speak → Animal.speak', () => { const edge = callEdges.find( (e) => e.caller_name === 'Lion.speak' && From fd15d3c4082846c4d1ed36fe481a50f0493e4773 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 5 Jun 2026 20:18:00 -0600 Subject: [PATCH 2/5] fix(native): correct NULL ordering in findCallerByLine and remove self receiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SQLite ASC ordering puts NULL values first, so (end_line - line) ASC would pick unbounded nodes before any bounded node — inverting the intent. Replace with COALESCE(end_line - line, 999999999) ASC so unbounded nodes sort last. Also remove 'self' from the this/super receiver filter in runPostNativeThisDispatch. In JS/TS files 'self' refers to WindowOrWorkerGlobalScope, not a class instance — including it would produce spurious dispatch edges from Worker call sites. --- .../graph/builder/stages/native-orchestrator.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 79633a66..11a05f18 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -653,12 +653,13 @@ async function runPostNativeThisDispatch( } // Find the innermost containing method/function for a call at `line` in `file`. - // NULL end_line sorts last in SQLite ASC → only selected when no bounded node exists. + // COALESCE maps NULL end_line to a large sentinel so unbounded nodes sort last + // (SQLite ASC orders NULLs first, so a raw `end_line - line` would pick them first). const findCallerByLineStmt = db.prepare(` SELECT id, name FROM nodes WHERE file = ? AND kind IN ('method', 'function') AND line <= ? AND (end_line IS NULL OR end_line >= ?) - ORDER BY (end_line - line) ASC + ORDER BY COALESCE(end_line - line, 999999999) ASC LIMIT 1 `); @@ -670,8 +671,10 @@ async function runPostNativeThisDispatch( for (const [relPath, symbols] of wasmResults) { for (const call of symbols.calls) { - if (call.receiver !== 'this' && call.receiver !== 'self' && call.receiver !== 'super') - continue; + // Only 'this' and 'super' are class-instance receivers in JS/TS. + // 'self' refers to WindowOrWorkerGlobalScope — not a class instance — so + // filtering it here prevents spurious dispatch edges from Worker call sites. + if (call.receiver !== 'this' && call.receiver !== 'super') continue; const callerRow = findCallerByLineStmt.get(relPath, call.line, call.line) as | { id: number; name: string } @@ -681,7 +684,7 @@ async function runPostNativeThisDispatch( const targets = resolveThisDispatch( call.name, callerRow.name, - call.receiver as 'this' | 'self' | 'super', + call.receiver as 'this' | 'super', chaCtx, lookup, ); From 4a5c5b97675f5b54c80fdc30bed85d5a6da4e9b2 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 5 Jun 2026 22:18:25 -0600 Subject: [PATCH 3/5] fix(native): scope this/super WASM re-parse to inheritance-hierarchy files only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a full native build, runPostNativeThisDispatch was WASM-re-parsing every JS/TS file in the project, adding a costly second parse pass on top of the native Rust parse (measured: +358% ms/file on codegraph itself). Narrow the file set to only files that appear in the class inheritance graph (sources and targets of 'extends' edges). Files outside the hierarchy have no class relationship, so this/super calls in them either resolve locally or are skipped by resolveThisDispatch anyway — WASM re-parsing them adds cost with zero benefit. Also replace the hardcoded 0.1 confidence penalty with the CHA_DISPATCH_PENALTY named constant (already imported), matching every other CHA confidence calculation in native-orchestrator.ts and build-edges.ts. Fixes: regression-guard failure "Build ms/file: 3.6 → 16.5 (+358%)" (#1337) --- .../builder/stages/native-orchestrator.ts | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 11a05f18..9e6185e4 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -608,11 +608,34 @@ async function runPostNativeThisDispatch( instantiatedTypes: new Set(), // not needed for this/super resolution }; - // Determine which files to re-parse (JS/TS family only) + // Determine which files to re-parse. + // + // On a full build we do NOT re-parse every JS/TS file — that would WASM-parse + // the entire project on top of the native pass, causing a massive regression + // (measured: +358% ms/file on codegraph itself). Instead we restrict to files + // that are part of the class inheritance hierarchy: both subclass files (which + // contain `super.X()` calls dispatching to a parent) and parent-class files + // (whose method bodies contain `this.X()` calls that CHA must resolve). Any + // file not in the hierarchy has no `extends` relationship, so `this`/`super` + // calls in it either resolve locally (same-class dispatch, already handled by + // the direct-call edge) or have no class context — and will be skipped by + // `resolveThisDispatch` anyway. let relFiles: string[]; if (isFullBuild || !changedFiles) { const rows = db - .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file' AND file IS NOT NULL") + .prepare(` + SELECT DISTINCT file FROM ( + SELECT src.file AS file + FROM edges e + JOIN nodes src ON e.source_id = src.id + WHERE e.kind = 'extends' AND src.file IS NOT NULL + UNION + SELECT tgt.file AS file + FROM edges e + JOIN nodes tgt ON e.target_id = tgt.id + WHERE e.kind = 'extends' AND tgt.file IS NOT NULL + ) + `) .all() as Array<{ file: string }>; relFiles = rows .map((r) => r.file) @@ -693,7 +716,7 @@ async function runPostNativeThisDispatch( const key = `${callerRow.id}|${t.id}`; if (seen.has(key)) continue; seen.add(key); - const conf = computeConfidence(relPath, t.file, null) - 0.1; + const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; if (conf <= 0) continue; newEdges.push([callerRow.id, t.id, 'calls', conf, 0, 'cha']); } From 773caf0af2927d3a7d34b93b9d0f07cbf0fe4f26 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 6 Jun 2026 00:32:34 -0600 Subject: [PATCH 4/5] fix(native): document incremental limitation and capture thisDispatchMs timing Add a comment to the incremental-build branch of runPostNativeThisDispatch documenting the known gap: if a parent-class method is replaced (new node ID) but the child file is unchanged, the stale super.method() edge is not refreshed until the next full rebuild. Add wall-clock timing for the this/super dispatch post-pass. The function now returns the elapsed milliseconds (Promise), and the result is threaded through formatNativeTimingResult as a new thisDispatchMs phase. For large class hierarchies the WASM re-parse can be non-trivial, so surfacing it in build diagnostics makes performance regressions visible. --- .../builder/stages/native-orchestrator.ts | 23 +++++++++++++------ src/types.ts | 2 ++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 9e6185e4..0eab2aaa 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -580,10 +580,11 @@ async function runPostNativeThisDispatch( rootDir: string, changedFiles: string[] | undefined, isFullBuild: boolean, -): Promise { +): Promise { + const t0 = Date.now(); // Fast guard: need at least one extends edge for this/super to have meaning const hasExtends = db.prepare(`SELECT 1 FROM edges WHERE kind = 'extends' LIMIT 1`).get(); - if (!hasExtends) return; + if (!hasExtends) return 0; // Build parents map: child class → direct parent class (from `extends` edges) const parentRows = db @@ -600,7 +601,7 @@ async function runPostNativeThisDispatch( for (const row of parentRows) { if (!parents.has(row.child_name)) parents.set(row.child_name, row.parent_name); } - if (parents.size === 0) return; + if (parents.size === 0) return 0; const chaCtx: ChaContext = { implementors: new Map(), // not needed for this/super resolution @@ -641,9 +642,13 @@ async function runPostNativeThisDispatch( .map((r) => r.file) .filter((f) => THIS_DISPATCH_EXTS.has(path.extname(f).toLowerCase())); } else { + // NOTE: Only files explicitly listed in changedFiles are re-parsed. + // If a parent-class method is replaced (new node ID) but the child file is + // unchanged, the stale super.method() edge is not refreshed here. A full + // rebuild (isFullBuild=true) is required to recover in that scenario. relFiles = changedFiles.filter((f) => THIS_DISPATCH_EXTS.has(path.extname(f).toLowerCase())); } - if (relFiles.length === 0) return; + if (relFiles.length === 0) return 0; // DB-backed CallNodeLookup — resolveThisDispatch only calls byName() const findByNameStmt = db.prepare(`SELECT id, file, kind FROM nodes WHERE name = ?`); @@ -741,6 +746,8 @@ async function runPostNativeThisDispatch( (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined; (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined; } + + return Date.now() - t0; } /** Format timing result from native orchestrator phases + JS post-processing. */ @@ -748,6 +755,7 @@ function formatNativeTimingResult( p: Record, structurePatchMs: number, analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }, + thisDispatchMs: number, ): BuildResult { return { phases: { @@ -760,6 +768,7 @@ function formatNativeTimingResult( edgesMs: +(p.edgesMs ?? 0).toFixed(1), structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1), rolesMs: +(p.rolesMs ?? 0).toFixed(1), + thisDispatchMs: +thisDispatchMs.toFixed(1), astMs: +(analysisTiming.astMs ?? 0).toFixed(1), complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1), cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1), @@ -1299,7 +1308,7 @@ export async function tryNativeOrchestrator( ctx.nativeFirstProxy = false; } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) { // DB reopen failed — return partial result - return formatNativeTimingResult(p, 0, analysisTiming); + return formatNativeTimingResult(p, 0, analysisTiming, 0); } } @@ -1382,7 +1391,7 @@ export async function tryNativeOrchestrator( // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites // whose raw receiver info the Rust pipeline does not persist to DB. - await runPostNativeThisDispatch( + const thisDispatchMs = await runPostNativeThisDispatch( ctx.db as unknown as BetterSqlite3Database, ctx.rootDir, result.changedFiles, @@ -1417,5 +1426,5 @@ export async function tryNativeOrchestrator( } closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); - return formatNativeTimingResult(p, structurePatchMs, analysisTiming); + return formatNativeTimingResult(p, structurePatchMs, analysisTiming, thisDispatchMs); } diff --git a/src/types.ts b/src/types.ts index d5ff1f5a..4b51ba63 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1162,6 +1162,8 @@ export interface BuildResult { edgesMs: number; structureMs: number; rolesMs: number; + /** Wall-clock time for the this/super dispatch WASM post-pass (native path only). */ + thisDispatchMs?: number; astMs: number; complexityMs: number; cfgMs: number; From 6426cb828e4455632088f5664bbcde74029dbe90 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 6 Jun 2026 02:09:36 -0600 Subject: [PATCH 5/5] fix(native): re-classify node roles after this/super dispatch post-pass (#1337) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Rust orchestrator runs role classification before the post-passes, so target methods (e.g. Animal.speak, ConcreteWorker.prepare) that had no callers at Rust build time were classified dead or dead-ffi. runPostNativeThisDispatch inserted the correct call edges but never re-ran classifyNodeRoles, leaving stale role labels visible to dead-code detection and API boundary analysis. Mirror the pattern used after runPostNativeCha: change the return type from Promise to Promise<{ elapsedMs: number; targetIds: Set }>, collect target node IDs while building newEdges, then look up the affected files and call classifyNodeRoles on them — same chunk-and-dedupe pattern as the CHA post-pass. --- .../builder/stages/native-orchestrator.ts | 73 ++++++++++++++++--- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 0eab2aaa..b76c215a 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -580,11 +580,12 @@ async function runPostNativeThisDispatch( rootDir: string, changedFiles: string[] | undefined, isFullBuild: boolean, -): Promise { +): Promise<{ elapsedMs: number; targetIds: Set }> { const t0 = Date.now(); + const targetIds = new Set(); // Fast guard: need at least one extends edge for this/super to have meaning const hasExtends = db.prepare(`SELECT 1 FROM edges WHERE kind = 'extends' LIMIT 1`).get(); - if (!hasExtends) return 0; + if (!hasExtends) return { elapsedMs: 0, targetIds }; // Build parents map: child class → direct parent class (from `extends` edges) const parentRows = db @@ -601,7 +602,7 @@ async function runPostNativeThisDispatch( for (const row of parentRows) { if (!parents.has(row.child_name)) parents.set(row.child_name, row.parent_name); } - if (parents.size === 0) return 0; + if (parents.size === 0) return { elapsedMs: 0, targetIds }; const chaCtx: ChaContext = { implementors: new Map(), // not needed for this/super resolution @@ -648,7 +649,7 @@ async function runPostNativeThisDispatch( // rebuild (isFullBuild=true) is required to recover in that scenario. relFiles = changedFiles.filter((f) => THIS_DISPATCH_EXTS.has(path.extname(f).toLowerCase())); } - if (relFiles.length === 0) return 0; + if (relFiles.length === 0) return { elapsedMs: 0, targetIds }; // DB-backed CallNodeLookup — resolveThisDispatch only calls byName() const findByNameStmt = db.prepare(`SELECT id, file, kind FROM nodes WHERE name = ?`); @@ -724,6 +725,7 @@ async function runPostNativeThisDispatch( const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; if (conf <= 0) continue; newEdges.push([callerRow.id, t.id, 'calls', conf, 0, 'cha']); + targetIds.add(t.id); } } } @@ -747,7 +749,7 @@ async function runPostNativeThisDispatch( (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined; } - return Date.now() - t0; + return { elapsedMs: Date.now() - t0, targetIds }; } /** Format timing result from native orchestrator phases + JS post-processing. */ @@ -1391,12 +1393,61 @@ export async function tryNativeOrchestrator( // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites // whose raw receiver info the Rust pipeline does not persist to DB. - const thisDispatchMs = await runPostNativeThisDispatch( - ctx.db as unknown as BetterSqlite3Database, - ctx.rootDir, - result.changedFiles, - !!result.isFullBuild, - ); + const { elapsedMs: thisDispatchMs, targetIds: thisDispatchTargetIds } = + await runPostNativeThisDispatch( + ctx.db as unknown as BetterSqlite3Database, + ctx.rootDir, + result.changedFiles, + !!result.isFullBuild, + ); + + // Re-classify roles for methods that gained incoming this/super dispatch edges. + // The Rust orchestrator classifies roles BEFORE this post-pass, so target methods + // (e.g. Animal.speak, ConcreteWorker.prepare) that had no callers at Rust time + // are classified `dead` or `dead-ffi`. Inserting the new call edges does not + // automatically update those role labels — without a re-run the stale labels + // propagate to dead-code detection and API boundary analysis. + if (thisDispatchTargetIds.size > 0) { + try { + const db = ctx.db as unknown as BetterSqlite3Database; + const idArray = Array.from(thisDispatchTargetIds); + const CHUNK_SIZE = 500; + const seenFiles = new Set(); + const affectedFiles: Array<{ file: string }> = []; + for (let i = 0; i < idArray.length; i += CHUNK_SIZE) { + const chunk = idArray.slice(i, i + CHUNK_SIZE); + const placeholders = chunk.map(() => '?').join(','); + const rows = db + .prepare( + `SELECT DISTINCT file FROM nodes WHERE id IN (${placeholders}) AND file IS NOT NULL`, + ) + .all(...chunk) as Array<{ file: string }>; + for (const row of rows) { + if (!seenFiles.has(row.file)) { + seenFiles.add(row.file); + affectedFiles.push(row); + } + } + } + if (affectedFiles.length > 0) { + const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { + classifyNodeRoles: ( + db: BetterSqlite3Database, + changedFiles?: string[] | null, + ) => Record; + }; + classifyNodeRoles( + db, + affectedFiles.map((r) => r.file), + ); + debug( + `this/super dispatch post-pass: re-classified roles for ${affectedFiles.length} target file(s)`, + ); + } + } catch (err) { + debug(`this/super dispatch post-pass role re-classification failed: ${toErrorMessage(err)}`); + } + } // Backfill the `technique` column on `calls` edges written by the Rust // orchestrator, which does not write the column. Runs after all edge-writing