From 77700107d8929651bfd54b85bfd12840d6fb8eb6 Mon Sep 17 00:00:00 2001 From: Edgar Brunet Date: Wed, 13 May 2026 23:42:37 +0200 Subject: [PATCH 1/9] feat: [SCRUM-449] add shared diagnostic verdict format Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/diagnostics/protocol.ts | 29 ++++++ src/diagnostics/verdict.ts | 200 ++++++++++++++++++++++++++++++++++++ src/tools/diagnostics.ts | 30 +++--- 3 files changed, 247 insertions(+), 12 deletions(-) create mode 100644 src/diagnostics/verdict.ts diff --git a/src/diagnostics/protocol.ts b/src/diagnostics/protocol.ts index 9035202..9c597b5 100644 --- a/src/diagnostics/protocol.ts +++ b/src/diagnostics/protocol.ts @@ -19,6 +19,35 @@ export interface ReactInfo { devtoolsHookPresent: boolean; } +// --------------------------------------------------------------------------- +// Verdict-first diagnostics (Sprint 16) +// --------------------------------------------------------------------------- + +export type DiagnosticConfidence = "low" | "medium" | "high"; + +/** + * Standard verdict-first diagnostic response returned by higher-signal MCP tools. + * + * The goal is to lead with an actionable diagnosis while still preserving the + * original raw payload under raw_data for deeper inspection when needed. + */ +export interface DiagnosticVerdict { + /** Machine-readable diagnostic verdict chosen by the tool. */ + verdict: TVerdict; + /** Short actionable summary intended to be consumed before raw data. */ + summary: string; + /** Concrete observations that justify the verdict. */ + evidence: string[]; + /** Confidence level of the diagnostic interpretation. */ + confidence: DiagnosticConfidence; + /** Optional likely source or dominant cause behind the issue. */ + suspected_source?: string; + /** Optional recommended next runtime step for the caller. */ + next_step?: string; + /** Original structured payload preserved for deeper investigation. */ + raw_data?: TRawData; +} + // --------------------------------------------------------------------------- // Runtime status (get_runtime_status) // --------------------------------------------------------------------------- diff --git a/src/diagnostics/verdict.ts b/src/diagnostics/verdict.ts new file mode 100644 index 0000000..2b7b242 --- /dev/null +++ b/src/diagnostics/verdict.ts @@ -0,0 +1,200 @@ +import type { + AsyncTimelineResponse, + DiagnosticConfidence, + DiagnosticVerdict, + HydrationIssuesResponse, + RaceConditionDiagnosisResponse, + RenderHotspotsResponse, +} from "./protocol.js"; + +function createVerdict(seed: { + verdict: TVerdict; + summary: string; + evidence: string[]; + confidence: DiagnosticConfidence; + suspected_source?: string; + next_step?: string; + raw_data: TRawData; +}): DiagnosticVerdict { + return seed; +} + +function formatSlowRequest(label: string, durationMs: number, status: number | null): string { + return `${label} took ${durationMs}ms${status === null ? "" : ` (status ${status})`}.`; +} + +export function createRenderHotspotsVerdict( + payload: RenderHotspotsResponse +): DiagnosticVerdict<"render_hotspots_detected" | "no_hotspots_detected", RenderHotspotsResponse> { + const primaryHotspot = payload.hotspots[0] ?? null; + + if (!primaryHotspot) { + return createVerdict({ + verdict: "no_hotspots_detected", + summary: `No component crossed ${payload.threshold} renders within ${payload.windowMs}ms in the recent replay window.`, + evidence: [ + `Observed hotspots: 0`, + `Detection threshold: ${payload.threshold} renders/${payload.windowMs}ms`, + ], + confidence: "medium", + next_step: "If the bug is intermittent, replay the scenario again or lower the hotspot threshold.", + raw_data: payload, + }); + } + + return createVerdict({ + verdict: "render_hotspots_detected", + summary: `${primaryHotspot.componentName} is the top rerender hotspot with ${primaryHotspot.recentRenderCount} renders in ${primaryHotspot.windowMs}ms.`, + evidence: [ + `Top hotspot: ${primaryHotspot.pathText} (${primaryHotspot.recentRenderCount} renders in ${primaryHotspot.windowMs}ms)`, + `Probable cause: ${primaryHotspot.probableCause.summary}`, + `Additional hotspots detected: ${Math.max(payload.hotspots.length - 1, 0)}`, + ], + confidence: payload.hotspots.length > 1 || primaryHotspot.recentRenderCount >= payload.threshold * 2 ? "high" : "medium", + suspected_source: primaryHotspot.probableCause.type, + next_step: `Inspect ${primaryHotspot.componentName} hook changes or run a higher-level excess render diagnosis to confirm the unstable input.`, + raw_data: payload, + }); +} + +export function createAsyncTimelineVerdict( + payload: AsyncTimelineResponse +): DiagnosticVerdict< + "async_order_inversion_detected" | "slow_async_operations_detected" | "no_async_anomalies_detected" | "no_async_activity_detected", + AsyncTimelineResponse +> { + const invertedGroup = payload.summary.invertedGroups[0] ?? null; + const slowRequest = payload.summary.slowRequests[0] ?? null; + + if (payload.summary.totalRequests === 0) { + return createVerdict({ + verdict: "no_async_activity_detected", + summary: "No recent fetch or XHR activity was captured in the runtime timeline.", + evidence: ["Captured requests: 0"], + confidence: "low", + next_step: "Reproduce the bug again before requesting the async timeline.", + raw_data: payload, + }); + } + + if (invertedGroup) { + return createVerdict({ + verdict: "async_order_inversion_detected", + summary: `Async requests for ${invertedGroup.groupKey} completed out of start order, which is a strong race-condition signal.`, + evidence: [ + `Started order: ${invertedGroup.startedOrder.join(" -> ")}`, + `Settled order: ${invertedGroup.settledOrder.join(" -> ")}`, + slowRequest ? `Slowest request: ${formatSlowRequest(slowRequest.label, slowRequest.durationMs, slowRequest.status)}` : "No slow-request outlier detected.", + ], + confidence: "high", + suspected_source: invertedGroup.groupKey, + next_step: "Verify whether a late async response overwrites newer user intent or state.", + raw_data: payload, + }); + } + + if (slowRequest && slowRequest.durationMs >= 1_000) { + return createVerdict({ + verdict: "slow_async_operations_detected", + summary: `${slowRequest.label} is the slowest recent request at ${slowRequest.durationMs}ms, which may amplify UI timing bugs.`, + evidence: [ + formatSlowRequest(slowRequest.label, slowRequest.durationMs, slowRequest.status), + `Tracked request groups: ${payload.summary.groups}`, + `Captured requests: ${payload.summary.totalRequests}`, + ], + confidence: "medium", + suspected_source: slowRequest.groupKey, + next_step: "Inspect the related request path and validate whether slow completion correlates with stale UI state.", + raw_data: payload, + }); + } + + return createVerdict({ + verdict: "no_async_anomalies_detected", + summary: `Captured ${payload.summary.totalRequests} recent async requests without completion-order inversions or major latency outliers.`, + evidence: [ + `Tracked request groups: ${payload.summary.groups}`, + `Completion-order inversions: ${payload.summary.invertedGroups.length}`, + ], + confidence: "medium", + next_step: "If the issue still looks asynchronous, collect a longer trace or run an explicit race-condition diagnosis.", + raw_data: payload, + }); +} + +export function createRaceConditionVerdict( + payload: RaceConditionDiagnosisResponse +): DiagnosticVerdict< + "race_condition_detected" | "race_condition_inconclusive" | "race_condition_not_detected", + RaceConditionDiagnosisResponse +> { + if (payload.suspected) { + return createVerdict({ + verdict: "race_condition_detected", + summary: payload.diagnosis, + evidence: payload.evidence, + confidence: "high", + suspected_source: payload.invertedGroup?.groupKey, + next_step: "Guard late async responses so only the newest intent is allowed to update visible state.", + raw_data: payload, + }); + } + + if (payload.invertedGroup) { + return createVerdict({ + verdict: "race_condition_inconclusive", + summary: payload.diagnosis, + evidence: payload.evidence, + confidence: "medium", + suspected_source: payload.invertedGroup.groupKey, + next_step: "Compare the final UI text with the latest user intent or add stronger assertions around the stale state candidate.", + raw_data: payload, + }); + } + + return createVerdict({ + verdict: "race_condition_not_detected", + summary: payload.diagnosis, + evidence: payload.evidence.length > 0 ? payload.evidence : ["No inverted completion order was detected."], + confidence: "medium", + next_step: "If the bug is intermittent, run a stress replay with tighter action timing and explicit assertions.", + raw_data: payload, + }); +} + +export function createHydrationIssuesVerdict( + payload: HydrationIssuesResponse +): DiagnosticVerdict<"hydration_issues_detected" | "hydration_issues_not_detected", HydrationIssuesResponse> { + const firstIssue = payload.issues[0] ?? null; + + if (!firstIssue) { + return createVerdict({ + verdict: "hydration_issues_not_detected", + summary: "No hydration-related warnings or exceptions were captured from the runtime console.", + evidence: ["Captured hydration issues: 0"], + confidence: "medium", + next_step: "If hydration is still suspected, reload the page from a clean state and inspect the first render again.", + raw_data: payload, + }); + } + + return createVerdict({ + verdict: "hydration_issues_detected", + summary: `${payload.summary.total} hydration signal(s) detected, led by a ${firstIssue.kind} issue in ${firstIssue.framework}.`, + evidence: [ + `First issue: ${firstIssue.message}`, + `By kind: ${Object.entries(payload.summary.byKind) + .filter(([, count]) => count > 0) + .map(([kind, count]) => `${kind}=${count}`) + .join(", ")}`, + `By level: ${Object.entries(payload.summary.byLevel) + .filter(([, count]) => count > 0) + .map(([level, count]) => `${level}=${count}`) + .join(", ")}`, + ], + confidence: firstIssue.level === "error" || firstIssue.kind !== "warning" ? "high" : "medium", + suspected_source: `${firstIssue.framework}:${firstIssue.kind}`, + next_step: "Inspect the first server/client mismatch and compare the initial render inputs across server and browser.", + raw_data: payload, + }); +} diff --git a/src/tools/diagnostics.ts b/src/tools/diagnostics.ts index 8415861..c6f3a8c 100644 --- a/src/tools/diagnostics.ts +++ b/src/tools/diagnostics.ts @@ -9,6 +9,12 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { browserManager } from "../browser/index.js"; import type { InspectionResponseMode } from "../diagnostics/protocol.js"; +import { + createAsyncTimelineVerdict, + createHydrationIssuesVerdict, + createRaceConditionVerdict, + createRenderHotspotsVerdict, +} from "../diagnostics/verdict.js"; import { ok, err } from "../types.js"; import type { ToolResponse } from "../types.js"; @@ -202,8 +208,8 @@ export function register(server: McpServer): void { server.tool( "get_render_hotspots", [ - "List components that rendered too many times in a short window.", - "Use the threshold and window to detect likely render explosions and get a probable-cause hint.", + "Diagnose likely rerender explosions and return a verdict-first summary with evidence, confidence, and next_step.", + "raw_data still contains the detailed hotspot list when deeper inspection is needed.", ].join(" "), { url: z @@ -236,7 +242,7 @@ export function register(server: McpServer): void { try { const result = await browserManager.getRenderHotspots(url, threshold, windowMs, limit); if ("error" in result) return err(result.error); - return ok(result); + return ok(createRenderHotspotsVerdict(result)); } catch (e) { return err(`get_render_hotspots failed unexpectedly: ${String(e)}`); } @@ -291,8 +297,8 @@ export function register(server: McpServer): void { server.tool( "get_race_condition_diagnosis", [ - "Explain a likely UI race condition by comparing the final visible state with the recent async timeline.", - "Useful when a stale response may have overwritten a newer user intent.", + "Diagnose whether a stale async response likely overwrote newer UI intent.", + "Returns a verdict-first response with evidence, confidence, and next_step plus raw_data for the full trace.", ].join(" "), { url: z @@ -315,7 +321,7 @@ export function register(server: McpServer): void { try { const result = await browserManager.getRaceConditionDiagnosis(url, stateSelector, limit); if ("error" in result) return err(result.error); - return ok(result); + return ok(createRaceConditionVerdict(result)); } catch (e) { return err(`get_race_condition_diagnosis failed unexpectedly: ${String(e)}`); } @@ -328,8 +334,8 @@ export function register(server: McpServer): void { server.tool( "get_async_timeline", [ - "Return an async timeline derived from the captured fetch/XHR lifecycle.", - "Useful for spotting concurrent requests, slow operations, and completion order inversions.", + "Diagnose async request ordering and latency patterns from captured fetch/XHR activity.", + "Returns a verdict-first summary while preserving the full timeline in raw_data.", ].join(" "), { url: z @@ -348,7 +354,7 @@ export function register(server: McpServer): void { try { const result = await browserManager.getAsyncTimeline(url, limit); if ("error" in result) return err(result.error); - return ok(result); + return ok(createAsyncTimelineVerdict(result)); } catch (e) { return err(`get_async_timeline failed unexpectedly: ${String(e)}`); } @@ -361,8 +367,8 @@ export function register(server: McpServer): void { server.tool( "get_hydration_issues", [ - "Return normalized hydration-related warnings and exceptions captured from the runtime console.", - "Each entry is tagged as hydration and classified to help separate SSR/client mismatch issues from other failures.", + "Diagnose server/client hydration failures from runtime console signals and return a verdict-first summary.", + "raw_data preserves the normalized hydration entries for detailed inspection.", ].join(" "), { url: z @@ -381,7 +387,7 @@ export function register(server: McpServer): void { try { const result = await browserManager.getHydrationIssues(url, limit); if ("error" in result) return err(result.error); - return ok(result); + return ok(createHydrationIssuesVerdict(result)); } catch (e) { return err(`get_hydration_issues failed unexpectedly: ${String(e)}`); } From eeb7b9103866b41375db854c8ba3a4d5455304ee Mon Sep 17 00:00:00 2001 From: Edgar Brunet Date: Wed, 13 May 2026 23:48:16 +0200 Subject: [PATCH 2/9] feat: [SCRUM-437] add high-level runtime investigations Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/capabilities.ts | 24 ++ src/diagnostics/investigation.ts | 371 +++++++++++++++++++++++++++++++ src/tools/diagnostics.ts | 175 +++++++++++++++ 3 files changed, 570 insertions(+) create mode 100644 src/diagnostics/investigation.ts diff --git a/src/capabilities.ts b/src/capabilities.ts index 1cb9223..4bf71bb 100644 --- a/src/capabilities.ts +++ b/src/capabilities.ts @@ -95,6 +95,24 @@ const capabilityCatalog = { modes: ["replay"], summary: "Flag render explosions in replay mode.", }, + diagnose_excess_renders: { + status: "available", + tools: ["diagnose_excess_renders"], + modes: ["replay"], + summary: "High-level replay investigation for excess renders, render loops, and context churn.", + }, + find_memo_breaks: { + status: "available", + tools: ["find_memo_breaks"], + modes: ["replay"], + summary: "High-level replay investigation for memo breaks versus context cascades.", + }, + diagnose_runtime_bug: { + status: "available", + tools: ["diagnose_runtime_bug"], + modes: ["replay"], + summary: "Verdict-first runtime bug triage for vague symptoms before drilling into atomic tools.", + }, get_hook_changes: { status: "available", tools: ["get_hook_changes"], @@ -125,6 +143,12 @@ const capabilityCatalog = { modes: ["replay"], summary: "Replay-mode render monitor for loops and unstable hooks.", }, + investigation_tools: { + status: "available", + tools: ["diagnose_excess_renders", "find_memo_breaks", "diagnose_runtime_bug"], + modes: ["replay"], + summary: "Prefer these verdict-first investigations before chaining the lower-level atomic diagnostics yourself.", + }, replay_sandbox: { status: "available", tools: ["navigate_replay", "browser_ping", "replay_interactions"], diff --git a/src/diagnostics/investigation.ts b/src/diagnostics/investigation.ts new file mode 100644 index 0000000..9735476 --- /dev/null +++ b/src/diagnostics/investigation.ts @@ -0,0 +1,371 @@ +import type { + AsyncTimelineResponse, + ComponentInspectionResponse, + ConsoleEventsResponse, + DiagnosticVerdict, + HookChangesResponse, + HydrationIssuesResponse, + RaceConditionDiagnosisResponse, + RenderCountsResponse, + RenderHotspotsResponse, + RuntimeStatus, +} from "./protocol.js"; + +type ExcessRenderVerdict = + | "render_loop_detected" + | "memo_break_suspected" + | "context_cascade_suspected" + | "hook_instability_detected" + | "no_excess_renders_detected" + | "excess_renders_inconclusive"; + +type ExcessRenderRawData = { + runtime_status: RuntimeStatus; + render_counts: RenderCountsResponse; + render_hotspots: RenderHotspotsResponse; + hook_changes?: HookChangesResponse; + component_inspection?: ComponentInspectionResponse; +}; + +type MemoBreakVerdict = + | "memo_break_suspected" + | "context_cascade_suspected" + | "internal_state_instability_detected" + | "memo_break_not_detected" + | "memo_break_inconclusive"; + +type MemoBreakRawData = { + render_hotspots: RenderHotspotsResponse; + hook_changes?: HookChangesResponse; + component_inspection?: ComponentInspectionResponse; +}; + +type RuntimeBugVerdict = + | "hydration_failure_detected" + | "race_condition_detected" + | "render_instability_detected" + | "runtime_error_detected" + | "runtime_bug_inconclusive"; + +type RuntimeBugRawData = { + runtime_status: RuntimeStatus; + console_events: ConsoleEventsResponse; + hydration_issues: HydrationIssuesResponse; + async_timeline: AsyncTimelineResponse; + race_condition?: RaceConditionDiagnosisResponse; + render_hotspots: RenderHotspotsResponse; +}; + +function matchesComponent(componentName: string, candidateName: string, pathText: string): boolean { + return candidateName === componentName || pathText.split(" > ").includes(componentName); +} + +function buildContextEvidence(component?: ComponentInspectionResponse): string[] { + const contexts = component?.component?.contexts ?? []; + if (contexts.length === 0) { + return []; + } + + return [ + `Observed contexts/providers: ${contexts.map((context) => `${context.name} (${context.source})`).join(", ")}`, + ]; +} + +function createDiagnosis(seed: { + verdict: TVerdict; + summary: string; + evidence: string[]; + confidence: "low" | "medium" | "high"; + suspected_source?: string; + next_step?: string; + raw_data: TRawData; +}): DiagnosticVerdict { + return seed; +} + +export function createExcessRenderDiagnosis(seed: { + componentName?: string; + runtimeStatus: RuntimeStatus; + renderCounts: RenderCountsResponse; + renderHotspots: RenderHotspotsResponse; + hookChanges?: HookChangesResponse; + inspection?: ComponentInspectionResponse; +}): DiagnosticVerdict { + const hotspot = + seed.componentName + ? seed.renderHotspots.hotspots.find((entry) => matchesComponent(seed.componentName ?? "", entry.componentName, entry.pathText)) ?? null + : seed.renderHotspots.hotspots[0] ?? null; + + const raw_data: ExcessRenderRawData = { + runtime_status: seed.runtimeStatus, + render_counts: seed.renderCounts, + render_hotspots: seed.renderHotspots, + ...(seed.hookChanges ? { hook_changes: seed.hookChanges } : {}), + ...(seed.inspection ? { component_inspection: seed.inspection } : {}), + }; + + if (!hotspot) { + return createDiagnosis({ + verdict: "no_excess_renders_detected", + summary: "No component currently shows a strong excess-render signature in the replay monitor.", + evidence: [ + `Observed components: ${seed.renderCounts.summary.totalComponents}`, + `Observed commits: ${seed.renderCounts.summary.observedCommits}`, + `Detected hotspots: ${seed.renderHotspots.hotspots.length}`, + ], + confidence: seed.renderCounts.summary.observedCommits >= 1 ? "medium" : "low", + next_step: "Replay the failing scenario again or lower the hotspot threshold if the render spike is intermittent.", + raw_data, + }); + } + + const hookLead = seed.hookChanges?.summary.suspiciousHooks[0] ?? null; + const contexts = seed.inspection?.component?.contexts ?? []; + const hotspotEvidence = [ + `Top hotspot: ${hotspot.pathText} (${hotspot.recentRenderCount} renders in ${hotspot.windowMs}ms)`, + `Probable cause from render monitor: ${hotspot.probableCause.summary}`, + ...(seed.hookChanges ? [`Hook-change summary: ${seed.hookChanges.summary.probableCause}`] : []), + ...buildContextEvidence(seed.inspection), + ]; + + if (hotspot.probableCause.type === "unstable_props") { + if (contexts.length > 0) { + return createDiagnosis({ + verdict: "context_cascade_suspected", + summary: `${hotspot.componentName} rerenders look driven by upstream context/provider churn rather than a local render loop.`, + evidence: hotspotEvidence, + confidence: "medium", + suspected_source: contexts[0]?.name, + next_step: "Inspect the nearest provider value and memoize or narrow the context payload if it is recreated every render.", + raw_data, + }); + } + + return createDiagnosis({ + verdict: "memo_break_suspected", + summary: `${hotspot.componentName} is rerendering with unstable props, which strongly suggests a memo break upstream.`, + evidence: hotspotEvidence, + confidence: "high", + suspected_source: hotspot.pathText, + next_step: "Inspect the parent props feeding this component and memoize recreated objects, arrays, or callbacks.", + raw_data, + }); + } + + if (hotspot.probableCause.type === "unstable_state") { + return createDiagnosis({ + verdict: "render_loop_detected", + summary: `${hotspot.componentName} appears stuck in a state-driven render loop.`, + evidence: hotspotEvidence, + confidence: "high", + suspected_source: hookLead ? `state hook #${hookLead.hookIndex}` : hotspot.pathText, + next_step: "Inspect the state update path or effect dependencies that keep feeding this component new state.", + raw_data, + }); + } + + if (hotspot.probableCause.type === "unstable_hook_value") { + return createDiagnosis({ + verdict: "hook_instability_detected", + summary: `${hotspot.componentName} is rerendering because one hook value keeps changing across renders.`, + evidence: hotspotEvidence, + confidence: hookLead?.suspected ? "high" : "medium", + suspected_source: hookLead ? `hook #${hookLead.hookIndex}` : hotspot.pathText, + next_step: "Inspect the unstable hook output and memoize or debounce the value that changes every render.", + raw_data, + }); + } + + return createDiagnosis({ + verdict: "excess_renders_inconclusive", + summary: `${hotspot.componentName} clearly rerenders too often, but the dominant cause is still inconclusive.`, + evidence: hotspotEvidence, + confidence: "medium", + suspected_source: hotspot.pathText, + next_step: "Inspect the component tree around this hotspot and compare prop, context, and effect churn together.", + raw_data, + }); +} + +export function createMemoBreakDiagnosis(seed: { + componentName?: string; + renderHotspots: RenderHotspotsResponse; + hookChanges?: HookChangesResponse; + inspection?: ComponentInspectionResponse; +}): DiagnosticVerdict { + const hotspotCandidates = seed.renderHotspots.hotspots.filter((entry) => + seed.componentName ? matchesComponent(seed.componentName, entry.componentName, entry.pathText) : true + ); + const target = + hotspotCandidates.find((entry) => entry.probableCause.type === "unstable_props") ?? + hotspotCandidates[0] ?? + null; + const contexts = seed.inspection?.component?.contexts ?? []; + const raw_data: MemoBreakRawData = { + render_hotspots: seed.renderHotspots, + ...(seed.hookChanges ? { hook_changes: seed.hookChanges } : {}), + ...(seed.inspection ? { component_inspection: seed.inspection } : {}), + }; + + if (!target) { + return createDiagnosis({ + verdict: "memo_break_not_detected", + summary: "No strong memo-break signal was found in the current render hotspot set.", + evidence: [`Detected hotspots: ${seed.renderHotspots.hotspots.length}`], + confidence: "medium", + next_step: "Reproduce the issue under replay and target a specific component if the rerender suspect is already known.", + raw_data, + }); + } + + const evidence = [ + `Candidate component: ${target.pathText}`, + `Render monitor cause: ${target.probableCause.summary}`, + ...(seed.hookChanges ? [`Hook-change summary: ${seed.hookChanges.summary.probableCause}`] : []), + ...buildContextEvidence(seed.inspection), + ]; + + if (target.probableCause.type === "unstable_props" && contexts.length === 0) { + return createDiagnosis({ + verdict: "memo_break_suspected", + summary: `${target.componentName} rerenders with changing props and no dominant local hook churn, which is consistent with a memo break.`, + evidence, + confidence: "high", + suspected_source: target.pathText, + next_step: "Inspect the parent render path and stabilize recreated prop references passed into this component.", + raw_data, + }); + } + + if (contexts.length > 0) { + return createDiagnosis({ + verdict: "context_cascade_suspected", + summary: `${target.componentName} looks more affected by context/provider churn than by a classic memo break.`, + evidence, + confidence: "medium", + suspected_source: contexts[0]?.name, + next_step: "Inspect the provider value identity and split or memoize the context if consumers rerender too broadly.", + raw_data, + }); + } + + if ((seed.hookChanges?.summary.suspiciousHooks[0]?.suspected ?? false) || target.probableCause.type === "unstable_state") { + return createDiagnosis({ + verdict: "internal_state_instability_detected", + summary: `${target.componentName} is rerendering because its own hook or state values keep changing, so the issue is not primarily a memo break.`, + evidence, + confidence: "medium", + suspected_source: target.pathText, + next_step: "Inspect the unstable hook or effect before optimizing parent memoization.", + raw_data, + }); + } + + return createDiagnosis({ + verdict: "memo_break_inconclusive", + summary: `${target.componentName} remains a memo-break candidate, but the current runtime signals are not decisive.`, + evidence, + confidence: "medium", + suspected_source: target.pathText, + next_step: "Collect a longer replay trace or inspect the component's parent chain to confirm whether prop identity churn is real.", + raw_data, + }); +} + +export function createRuntimeBugDiagnosis(seed: { + symptom: string; + runtimeStatus: RuntimeStatus; + consoleEvents: ConsoleEventsResponse; + hydrationIssues: HydrationIssuesResponse; + asyncTimeline: AsyncTimelineResponse; + renderHotspots: RenderHotspotsResponse; + raceCondition?: RaceConditionDiagnosisResponse; +}): DiagnosticVerdict { + const symptom = seed.symptom.toLowerCase(); + const firstError = seed.consoleEvents.events.find((event) => event.type === "error" || event.type === "exception") ?? null; + const firstHotspot = seed.renderHotspots.hotspots[0] ?? null; + const raw_data: RuntimeBugRawData = { + runtime_status: seed.runtimeStatus, + console_events: seed.consoleEvents, + hydration_issues: seed.hydrationIssues, + async_timeline: seed.asyncTimeline, + ...(seed.raceCondition ? { race_condition: seed.raceCondition } : {}), + render_hotspots: seed.renderHotspots, + }; + + if (seed.hydrationIssues.summary.total > 0 && (/hydr|ssr|server/.test(symptom) || firstError !== null)) { + const firstIssue = seed.hydrationIssues.issues[0]; + return createDiagnosis({ + verdict: "hydration_failure_detected", + summary: `The strongest runtime signal points to hydration failure: ${firstIssue?.message ?? "hydration warnings were captured"}.`, + evidence: [ + `Hydration issues captured: ${seed.hydrationIssues.summary.total}`, + ...(firstError ? [`Console error: ${firstError.text}`] : []), + ], + confidence: "high", + suspected_source: firstIssue ? `${firstIssue.framework}:${firstIssue.kind}` : "hydration", + next_step: "Compare server-rendered and client-rendered inputs on the first load to isolate the mismatch source.", + raw_data, + }); + } + + if (seed.raceCondition?.suspected || seed.asyncTimeline.summary.invertedGroups.length > 0) { + return createDiagnosis({ + verdict: "race_condition_detected", + summary: seed.raceCondition?.diagnosis ?? `Async requests for ${seed.asyncTimeline.summary.invertedGroups[0]?.groupKey ?? "one group"} completed out of order.`, + evidence: [ + ...(seed.raceCondition?.evidence ?? []), + `Completion-order inversions: ${seed.asyncTimeline.summary.invertedGroups.length}`, + ], + confidence: seed.raceCondition?.suspected ? "high" : "medium", + suspected_source: + seed.raceCondition?.invertedGroup?.groupKey ?? seed.asyncTimeline.summary.invertedGroups[0]?.groupKey, + next_step: "Verify that only the latest async intent can update visible state and guard stale completions.", + raw_data, + }); + } + + if (firstHotspot && (/render|rerender|freeze|slow|loop/.test(symptom) || seed.renderHotspots.hotspots.length > 0)) { + return createDiagnosis({ + verdict: "render_instability_detected", + summary: `${firstHotspot.componentName} is the clearest runtime suspect because it rerendered ${firstHotspot.recentRenderCount} times in ${firstHotspot.windowMs}ms.`, + evidence: [ + `Top hotspot: ${firstHotspot.pathText}`, + `Probable cause: ${firstHotspot.probableCause.summary}`, + ], + confidence: "medium", + suspected_source: firstHotspot.pathText, + next_step: "Run the excess-render investigation on this component to determine whether props, hooks, or context are responsible.", + raw_data, + }); + } + + if (firstError) { + return createDiagnosis({ + verdict: "runtime_error_detected", + summary: `The strongest runtime signal is a console-level failure: ${firstError.text}`, + evidence: [ + `Console events captured: ${seed.consoleEvents.events.length}`, + firstError.location ? `Location: ${firstError.location}` : "No source location was attached to the error.", + ], + confidence: "medium", + suspected_source: firstError.location, + next_step: "Inspect the failing runtime path and reproduce the same error under replay or patch validation.", + raw_data, + }); + } + + return createDiagnosis({ + verdict: "runtime_bug_inconclusive", + summary: `React-Sentinel captured runtime signals for "${seed.symptom}" but none stand out strongly enough to explain the bug yet.`, + evidence: [ + `Hydration issues: ${seed.hydrationIssues.summary.total}`, + `Async inversions: ${seed.asyncTimeline.summary.invertedGroups.length}`, + `Render hotspots: ${seed.renderHotspots.hotspots.length}`, + `Console errors: ${seed.consoleEvents.events.filter((event) => event.type === "error" || event.type === "exception").length}`, + ], + confidence: "low", + next_step: "Refine the symptom with a target component or state selector and replay the failure again to gather stronger evidence.", + raw_data, + }); +} diff --git a/src/tools/diagnostics.ts b/src/tools/diagnostics.ts index c6f3a8c..f84a245 100644 --- a/src/tools/diagnostics.ts +++ b/src/tools/diagnostics.ts @@ -8,6 +8,11 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { browserManager } from "../browser/index.js"; +import { + createExcessRenderDiagnosis, + createMemoBreakDiagnosis, + createRuntimeBugDiagnosis, +} from "../diagnostics/investigation.js"; import type { InspectionResponseMode } from "../diagnostics/protocol.js"; import { createAsyncTimelineVerdict, @@ -23,6 +28,22 @@ const inspectionResponseModeSchema = z .default("full") .describe("Choose 'compact' to aggressively trim long inspection payloads for AI consumption."); +const hotspotThresholdSchema = z + .number() + .int() + .min(1) + .max(100) + .optional() + .describe("Minimum renders inside the observation window before a component is treated as suspicious. Default is 8."); + +const hotspotWindowSchema = z + .number() + .int() + .min(100) + .max(30_000) + .optional() + .describe("Observation window in milliseconds used to detect rapid rerenders. Default is 1000ms."); + export const DIAGNOSTIC_TOOL_NAMES = [ "get_runtime_status", "get_react_tree", @@ -36,6 +57,9 @@ export const DIAGNOSTIC_TOOL_NAMES = [ "get_hydration_issues", "get_console_events", "get_runtime_timeline", + "diagnose_excess_renders", + "find_memo_breaks", + "diagnose_runtime_bug", ] as const; export function register(server: McpServer): void { @@ -446,4 +470,155 @@ export function register(server: McpServer): void { } } ); + + server.tool( + "diagnose_excess_renders", + [ + "High-level render investigation for replay-mode React bugs.", + "Orchestrates runtime status, render counts, hotspots, hook changes, and component inspection to explain why a component rerenders too often.", + "Prefer this over manually chaining the atomic render tools when you need a verdict first.", + ].join(" "), + { + url: z.string().url().describe("URL of the page to inspect."), + componentName: z.string().min(1).optional().describe("Optional component to focus on. When omitted, React-Sentinel diagnoses the top hotspot."), + threshold: hotspotThresholdSchema, + windowMs: hotspotWindowSchema, + limit: z.number().int().min(1).max(100).optional().describe("Maximum number of render counters and hotspots to inspect. Default is 20."), + }, + async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { + try { + const [runtimeStatus, renderCounts, renderHotspots] = await Promise.all([ + browserManager.getRuntimeStatus(url), + browserManager.getRenderCounts(url, limit), + browserManager.getRenderHotspots(url, threshold, windowMs, limit), + ]); + if ("error" in runtimeStatus) return err(runtimeStatus.error); + if ("error" in renderCounts) return err(renderCounts.error); + if ("error" in renderHotspots) return err(renderHotspots.error); + + const target = componentName ?? renderHotspots.hotspots[0]?.componentName ?? undefined; + const targetPathText = + renderHotspots.hotspots.find((entry) => (target ? entry.componentName === target : false))?.pathText; + const [hookChanges, inspection] = target + ? await Promise.all([ + browserManager.getHookChanges(url, target, targetPathText, 50), + browserManager.inspectComponent(url, target, "compact"), + ]) + : [undefined, undefined]; + + if (hookChanges && "error" in hookChanges) return err(hookChanges.error); + if (inspection && "error" in inspection) return err(inspection.error); + + return ok( + createExcessRenderDiagnosis({ + componentName, + runtimeStatus, + renderCounts, + renderHotspots, + ...(hookChanges ? { hookChanges } : {}), + ...(inspection ? { inspection } : {}), + }) + ); + } catch (e) { + return err(`diagnose_excess_renders failed unexpectedly: ${String(e)}`); + } + } + ); + + server.tool( + "find_memo_breaks", + [ + "High-level investigation that searches for likely React memo breaks or context cascades.", + "Combines render hotspots, hook churn, and component inspection so the caller gets a verdict instead of raw render data.", + ].join(" "), + { + url: z.string().url().describe("URL of the page to inspect."), + componentName: z.string().min(1).optional().describe("Optional component to inspect directly. When omitted, React-Sentinel picks the strongest hotspot candidate."), + threshold: hotspotThresholdSchema, + windowMs: hotspotWindowSchema, + limit: z.number().int().min(1).max(100).optional().describe("Maximum number of hotspots to inspect. Default is 20."), + }, + async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { + try { + const renderHotspots = await browserManager.getRenderHotspots(url, threshold, windowMs, limit); + if ("error" in renderHotspots) return err(renderHotspots.error); + + const target = + componentName ?? + renderHotspots.hotspots.find((entry) => entry.probableCause.type === "unstable_props")?.componentName ?? + renderHotspots.hotspots[0]?.componentName ?? + undefined; + const targetPathText = + renderHotspots.hotspots.find((entry) => (target ? entry.componentName === target : false))?.pathText; + const [hookChanges, inspection] = target + ? await Promise.all([ + browserManager.getHookChanges(url, target, targetPathText, 50), + browserManager.inspectComponent(url, target, "compact"), + ]) + : [undefined, undefined]; + + if (hookChanges && "error" in hookChanges) return err(hookChanges.error); + if (inspection && "error" in inspection) return err(inspection.error); + + return ok( + createMemoBreakDiagnosis({ + componentName, + renderHotspots, + ...(hookChanges ? { hookChanges } : {}), + ...(inspection ? { inspection } : {}), + }) + ); + } catch (e) { + return err(`find_memo_breaks failed unexpectedly: ${String(e)}`); + } + } + ); + + server.tool( + "diagnose_runtime_bug", + [ + "High-level entry point for vague runtime symptoms such as stale UI, random errors, hydration failures, or unexplained slowness.", + "Orchestrates console, hydration, async, and render diagnostics and returns the strongest verdict first.", + ].join(" "), + { + url: z.string().url().describe("URL of the page to inspect."), + symptom: z.string().min(3).max(200).describe("Short natural-language description of the runtime symptom to bias the diagnosis."), + stateSelector: z.string().min(1).optional().describe("Optional CSS selector that exposes the final visible state when race conditions are suspected."), + threshold: hotspotThresholdSchema, + windowMs: hotspotWindowSchema, + limit: z.number().int().min(1).max(100).optional().describe("Maximum number of async and render events to inspect. Default is 20."), + }, + async ({ url, symptom, stateSelector, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { + try { + const [runtimeStatus, consoleEvents, hydrationIssues, asyncTimeline, renderHotspots, raceCondition] = await Promise.all([ + browserManager.getRuntimeStatus(url), + browserManager.getConsoleEvents(url), + browserManager.getHydrationIssues(url, limit), + browserManager.getAsyncTimeline(url, limit), + browserManager.getRenderHotspots(url, threshold, windowMs, limit), + stateSelector ? browserManager.getRaceConditionDiagnosis(url, stateSelector, limit) : Promise.resolve(undefined), + ]); + if ("error" in runtimeStatus) return err(runtimeStatus.error); + if ("error" in consoleEvents) return err(consoleEvents.error); + if ("error" in hydrationIssues) return err(hydrationIssues.error); + if ("error" in asyncTimeline) return err(asyncTimeline.error); + if ("error" in renderHotspots) return err(renderHotspots.error); + if (raceCondition && "error" in raceCondition) return err(raceCondition.error); + + return ok( + createRuntimeBugDiagnosis({ + symptom, + runtimeStatus, + consoleEvents, + hydrationIssues, + asyncTimeline, + renderHotspots, + ...(raceCondition ? { raceCondition } : {}), + }) + ); + } catch (e) { + return err(`diagnose_runtime_bug failed unexpectedly: ${String(e)}`); + } + } + ); } From 4a042aafeedbf03d638a0190c63146e703ead7d0 Mon Sep 17 00:00:00 2001 From: Edgar Brunet Date: Wed, 13 May 2026 23:50:47 +0200 Subject: [PATCH 3/9] feat: [SCRUM-438] add deterministic replay stress testing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/capabilities.ts | 6 + src/tools/interaction.ts | 252 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 257 insertions(+), 1 deletion(-) diff --git a/src/capabilities.ts b/src/capabilities.ts index 4bf71bb..7a94d06 100644 --- a/src/capabilities.ts +++ b/src/capabilities.ts @@ -161,6 +161,12 @@ const capabilityCatalog = { modes: ["replay", "sandbox"], summary: "Replay a scripted interaction sequence in the isolated browser.", }, + find_race_conditions: { + status: "available", + tools: ["find_race_conditions"], + modes: ["replay", "sandbox"], + summary: "Stress-test replay scenarios with adversarial timing to reproduce intermittent races and return minimal failing sequences.", + }, validate_scenario: { status: "available", tools: ["validate_scenario"], diff --git a/src/tools/interaction.ts b/src/tools/interaction.ts index dba99b5..e79c411 100644 --- a/src/tools/interaction.ts +++ b/src/tools/interaction.ts @@ -7,7 +7,8 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { browserManager } from "../browser/index.js"; -import type { Assertion, ValidationScenarioResponse } from "../browser/protocol.js"; +import type { Assertion, ReplayStep, ValidationScenarioResponse } from "../browser/protocol.js"; +import type { DiagnosticVerdict } from "../diagnostics/protocol.js"; import { ok, err } from "../types.js"; import type { ToolResponse } from "../types.js"; @@ -97,6 +98,166 @@ export const assertionSchema = z.discriminatedUnion("type", [ }), ]); +const stressTimingStrategySchema = z + .enum(["none", "adversarial"]) + .default("adversarial"); + +const stressDelayProfileSchema = z + .array(z.number().int().min(0).max(5_000)) + .min(1) + .max(12) + .optional(); + +type StressIterationResult = { + iteration: number; + delaysMs: number[]; + success: boolean; + failureReasons: string[]; + report: ValidationScenarioResponse; +}; + +type StressTestVerdict = + | "stress_test_passed" + | "intermittent_failure_detected"; + +type StressTestRawData = { + iterations: StressIterationResult[]; + minimal_reproduction?: ReplayStep[]; +}; + +function countAdversarialGaps(steps: ReplayStep[]): number { + return steps.filter((step) => step.action !== "wait").length; +} + +function createWaitStep(durationMs: number): ReplayStep { + return { action: "wait", durationMs }; +} + +function buildDelaySchedule( + iteration: number, + slots: number, + strategy: "none" | "adversarial", + profile: number[] +): number[] { + if (strategy === "none") { + return Array.from({ length: slots }, () => 0); + } + + return Array.from({ length: slots }, (_, index) => profile[(iteration + index) % profile.length] ?? 0); +} + +function injectAdversarialWaits(steps: ReplayStep[], delaysMs: number[]): ReplayStep[] { + if (delaysMs.length === 0) { + return [...steps]; + } + + const expanded: ReplayStep[] = []; + let delayIndex = 0; + + for (const step of steps) { + expanded.push(step); + if (step.action === "wait") { + continue; + } + + const delayMs = delaysMs[delayIndex] ?? 0; + delayIndex += 1; + if (delayMs > 0) { + expanded.push(createWaitStep(delayMs)); + } + } + + return expanded; +} + +function summarizeFailureReasons(report: ValidationScenarioResponse): string[] { + const stepFailures = report.steps + .filter((step) => !step.success) + .map((step) => `Step #${step.index} ${step.step.action} failed${step.error ? `: ${step.error}` : "."}`); + const assertionFailures = report.assertions + .filter((assertion) => !assertion.pass) + .map((assertion) => assertion.details ?? "Assertion failed."); + + return [...stepFailures, ...assertionFailures]; +} + +async function minimizeFailingSequence( + steps: ReplayStep[], + assertions: Assertion[], + options: { + url?: string; + headless?: boolean; + waitUntil?: "load" | "domcontentloaded" | "networkidle"; + timeoutMs?: number; + continueOnError?: boolean; + waitMs?: number; + } +): Promise { + let current = [...steps]; + let changed = true; + + while (changed && current.length > 1) { + changed = false; + + for (let index = 0; index < current.length; index += 1) { + const candidate = current.filter((_, candidateIndex) => candidateIndex !== index); + const report = await browserManager.runValidationScenario(candidate, assertions, { + ...options, + resetSession: true, + }); + if ("error" in report || report.success) { + continue; + } + + current = candidate; + changed = true; + break; + } + } + + return current.length === steps.length ? null : current; +} + +function createStressTestVerdict(seed: { + iterations: StressIterationResult[]; + minimalReproduction?: ReplayStep[] | null; +}): DiagnosticVerdict { + const failedIterations = seed.iterations.filter((iteration) => !iteration.success); + const raw_data: StressTestRawData = { + iterations: seed.iterations, + ...(seed.minimalReproduction ? { minimal_reproduction: seed.minimalReproduction } : {}), + }; + + if (failedIterations.length === 0) { + return { + verdict: "stress_test_passed", + summary: `All ${seed.iterations.length} stress iterations passed without reproducing the target inconsistency.`, + evidence: [ + `Passed iterations: ${seed.iterations.length}/${seed.iterations.length}`, + ], + confidence: "medium", + next_step: "Increase the iteration count or widen the adversarial delay profile if the bug is rarer than this run.", + raw_data, + }; + } + + const firstFailure = failedIterations[0]; + return { + verdict: "intermittent_failure_detected", + summary: `${failedIterations.length}/${seed.iterations.length} stress iterations failed, which confirms an intermittent runtime bug under adversarial timing.`, + evidence: [ + `First failing iteration: #${firstFailure.iteration + 1}`, + `Failure reasons: ${firstFailure.failureReasons.join(" | ") || "Assertion failed without extra details."}`, + `Minimal reproduction found: ${seed.minimalReproduction ? "yes" : "no"}`, + ], + confidence: failedIterations.length >= 2 ? "high" : "medium", + next_step: seed.minimalReproduction + ? "Replay the minimized failing sequence or feed it into verify_hypothesis / verify_fix." + : "Inspect the failing iteration trace and tighten assertions around the inconsistent state.", + raw_data, + }; +} + function formatAssertion(assertion: Assertion): string { switch (assertion.type) { case "text_present": @@ -179,6 +340,7 @@ export const INTERACTION_TOOL_NAMES = [ "validate_after_action", "validate_scenario", "replay_interactions", + "find_race_conditions", ] as const; export function register(server: McpServer): void { @@ -334,4 +496,92 @@ export function register(server: McpServer): void { } } ); + + server.tool( + "find_race_conditions", + [ + "Stress-test a replay scenario across multiple iterations with optional adversarial delays between actions.", + "Returns pass/fail per iteration, highlights intermittent failures, and attempts to shrink the first failing sequence into a minimal reproduction.", + "Use assertions as invariants that define the inconsistent runtime state you want React-Sentinel to catch.", + ].join(" "), + { + url: z.string().url().optional().describe("Optional URL to open in the replay browser before each iteration."), + steps: z.array(replayStepSchema).min(1).describe("Base replay steps. React-Sentinel may inject extra wait steps between actions when adversarial timing is enabled."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that define the inconsistent state to catch."), + iterations: z.number().int().min(1).max(25).optional().default(7).describe("How many replay iterations to execute."), + timingStrategy: stressTimingStrategySchema.describe("Choose 'adversarial' to vary delays between interactions across iterations."), + delayProfileMs: stressDelayProfileSchema.describe("Optional delay profile in milliseconds. Defaults to 0, 25, 75, 150, 300, 600."), + headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), + waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().default("domcontentloaded").describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + minimizeFailure: z.boolean().optional().default(true).describe("Attempt to shrink the first failing sequence into a smaller reproduction."), + }, + async ({ + url, + steps, + assertions, + iterations, + timingStrategy, + delayProfileMs, + headless, + waitUntil, + timeoutMs, + continueOnError, + waitMs, + minimizeFailure, + }): Promise => { + try { + const delayProfile = delayProfileMs ?? [0, 25, 75, 150, 300, 600]; + const gaps = countAdversarialGaps(steps); + const iterationResults: StressIterationResult[] = []; + + for (let iteration = 0; iteration < iterations; iteration += 1) { + const delaysMs = buildDelaySchedule(iteration, gaps, timingStrategy, delayProfile); + const iterationSteps = injectAdversarialWaits(steps, delaysMs); + const report = await browserManager.runValidationScenario(iterationSteps, assertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in report) return err(report.error); + + iterationResults.push({ + iteration, + delaysMs, + success: report.success, + failureReasons: summarizeFailureReasons(report), + report, + }); + } + + const firstFailure = iterationResults.find((iteration) => !iteration.success) ?? null; + const minimalReproduction = + minimizeFailure && firstFailure + ? await minimizeFailingSequence(firstFailure.report.steps.map((step) => step.step), assertions, { + url, + headless, + waitUntil, + timeoutMs, + continueOnError, + waitMs, + }) + : null; + + return ok( + createStressTestVerdict({ + iterations: iterationResults, + minimalReproduction, + }) + ); + } catch (e) { + return err(`find_race_conditions failed unexpectedly: ${String(e)}`); + } + } + ); } From b31daca9ba4f4724545428ba0b8a09a325dbbe5e Mon Sep 17 00:00:00 2001 From: Edgar Brunet Date: Wed, 13 May 2026 23:53:26 +0200 Subject: [PATCH 4/9] feat: [SCRUM-436] add hypothesis and fix verification tools Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/capabilities.ts | 14 +- src/tools/patch.ts | 316 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 329 insertions(+), 1 deletion(-) diff --git a/src/capabilities.ts b/src/capabilities.ts index 7a94d06..9738db1 100644 --- a/src/capabilities.ts +++ b/src/capabilities.ts @@ -185,6 +185,18 @@ const capabilityCatalog = { modes: ["sandbox"], summary: "Patch, replay, and validate in one sandbox flow.", }, + verify_hypothesis: { + status: "available", + tools: ["verify_hypothesis"], + modes: ["replay", "sandbox"], + summary: "Confirm, refute, or partially support a runtime hypothesis before touching source code.", + }, + verify_fix: { + status: "available", + tools: ["verify_fix"], + modes: ["sandbox"], + summary: "Compare baseline versus patched replay behavior to validate a runtime fix and surface regressions.", + }, reset_runtime_patches: { status: "available", tools: ["reset_runtime_patches"], @@ -193,7 +205,7 @@ const capabilityCatalog = { }, shadow_sandbox: { status: "partial", - tools: ["apply_runtime_patch", "apply_patch_then_replay", "reset_runtime_patches"], + tools: ["apply_runtime_patch", "apply_patch_then_replay", "verify_fix", "reset_runtime_patches"], modes: ["sandbox"], summary: "Shadow sandbox is available for script-on-page patches only; broader patch shapes are still planned.", }, diff --git a/src/tools/patch.ts b/src/tools/patch.ts index fb739b5..176d95c 100644 --- a/src/tools/patch.ts +++ b/src/tools/patch.ts @@ -7,7 +7,10 @@ import type { RuntimePatch, RuntimePatchApplyResponse, RuntimePatchResetResponse, + ValidationResult, + ValidationScenarioResponse, } from "../browser/protocol.js"; +import type { DiagnosticVerdict } from "../diagnostics/protocol.js"; import { ok, err } from "../types.js"; import type { ToolResponse } from "../types.js"; import { assertionSchema, buildScenarioMarkdown, replayStepSchema } from "./interaction.js"; @@ -30,6 +33,151 @@ const replayWaitUntilSchema = z const resetStrategySchema = z.enum(["reload", "reset_session"]); +type VerificationVerdict = "CONFIRMED" | "REFUTED" | "PARTIAL"; + +type HypothesisVerificationRawData = { + hypothesis: string; + report: ValidationScenarioResponse; +}; + +type FixVerificationRawData = { + fix_description: string; + baseline: ValidationScenarioResponse; + patched: PatchedValidationScenarioResponse; + regression_assertions: Assertion[]; +}; + +function countAssertionFailures(results: ValidationResult[]): number { + return results.filter((result) => !result.pass).length; +} + +function buildHypothesisMarkdown( + hypothesis: string, + verdict: VerificationVerdict, + report: ValidationScenarioResponse +): string { + return [ + "# Hypothesis Verification Report", + "", + `- Hypothesis: ${hypothesis}`, + `- Verdict: ${verdict}`, + "", + buildScenarioMarkdown(report), + ].join("\n"); +} + +function buildFixVerificationMarkdown( + fixDescription: string, + verdict: VerificationVerdict, + baseline: ValidationScenarioResponse, + patched: PatchedValidationScenarioResponse, + regressionAssertions: Assertion[] +): string { + const lines = [ + "# Fix Verification Report", + "", + `- Fix: ${fixDescription}`, + `- Verdict: ${verdict}`, + `- Regression assertions: ${regressionAssertions.length}`, + "", + "## Baseline", + buildScenarioMarkdown(baseline), + "", + "## Patched Run", + buildPatchMarkdown(patched.verdict, patched.apply, patched.report, patched.cleanup), + ]; + + return lines.join("\n"); +} + +function createHypothesisVerdict(seed: { + hypothesis: string; + report: ValidationScenarioResponse; +}): DiagnosticVerdict { + const stepFailures = seed.report.steps.filter((step) => !step.success).length; + const assertionFailures = countAssertionFailures(seed.report.assertions); + const verdict: VerificationVerdict = + stepFailures === 0 && assertionFailures === 0 + ? "CONFIRMED" + : stepFailures === 0 && assertionFailures === seed.report.assertions.length + ? "REFUTED" + : "PARTIAL"; + + return { + verdict, + summary: + verdict === "CONFIRMED" + ? `The runtime evidence confirms the hypothesis: ${seed.hypothesis}` + : verdict === "REFUTED" + ? `The runtime evidence does not support the hypothesis: ${seed.hypothesis}` + : `The runtime evidence only partially supports the hypothesis: ${seed.hypothesis}`, + evidence: [ + `Failed steps: ${stepFailures}`, + `Failed assertions: ${assertionFailures}/${seed.report.assertions.length}`, + ], + confidence: verdict === "PARTIAL" ? "medium" : "high", + next_step: + verdict === "CONFIRMED" + ? "Use the failing evidence to design or verify a targeted fix." + : verdict === "REFUTED" + ? "Refine the hypothesis or change the reproduction protocol before editing source code." + : "Tighten the assertions or reproduction steps to make the result decisive.", + raw_data: { + hypothesis: seed.hypothesis, + report: seed.report, + }, + }; +} + +function createFixVerdict(seed: { + fixDescription: string; + baseline: ValidationScenarioResponse; + patched: PatchedValidationScenarioResponse; + regressionAssertions: Assertion[]; +}): DiagnosticVerdict { + const baselineFailures = countAssertionFailures(seed.baseline.assertions); + const patchedFailures = countAssertionFailures(seed.patched.report.assertions); + const regressionFailureCount = seed.regressionAssertions.length === 0 + ? 0 + : seed.patched.report.assertions + .slice(-seed.regressionAssertions.length) + .filter((result) => !result.pass).length; + const verdict: VerificationVerdict = + baselineFailures > 0 && patchedFailures === 0 && regressionFailureCount === 0 + ? "CONFIRMED" + : patchedFailures >= baselineFailures + ? "REFUTED" + : "PARTIAL"; + + return { + verdict, + summary: + verdict === "CONFIRMED" + ? `The patch fixes the targeted runtime issue without visible regressions: ${seed.fixDescription}` + : verdict === "REFUTED" + ? `The patch does not resolve the target issue convincingly: ${seed.fixDescription}` + : `The patch improves the target issue but leaves uncertainty or visible regressions: ${seed.fixDescription}`, + evidence: [ + `Baseline assertion failures: ${baselineFailures}`, + `Patched assertion failures: ${patchedFailures}`, + `Patched regression failures: ${regressionFailureCount}`, + ], + confidence: verdict === "PARTIAL" ? "medium" : "high", + next_step: + verdict === "CONFIRMED" + ? "Promote the runtime patch into a source change or validate it against a broader regression suite." + : verdict === "REFUTED" + ? "Revise the patch because the runtime assertions still fail or did not improve." + : "Inspect the remaining failed assertions and regression signals before deciding whether to keep the patch.", + raw_data: { + fix_description: seed.fixDescription, + baseline: seed.baseline, + patched: seed.patched, + regression_assertions: seed.regressionAssertions, + }, + }; +} + function buildPatchMarkdown( verdict: PatchedValidationScenarioResponse["verdict"], apply: RuntimePatchApplyResponse, @@ -70,6 +218,8 @@ export const PATCH_TOOL_NAMES = [ "apply_runtime_patch", "apply_patch_then_replay", "reset_runtime_patches", + "verify_hypothesis", + "verify_fix", ] as const; export function register(server: McpServer): void { @@ -228,4 +378,170 @@ export function register(server: McpServer): void { } } ); + + server.tool( + "verify_hypothesis", + [ + "Verify a runtime hypothesis before changing repository code.", + "Runs a replay protocol plus assertions and returns CONFIRMED, REFUTED, or PARTIAL with evidence and a Markdown report.", + "Typical examples: stale search results overwrite newer intent, a hydration mismatch appears on first load, or a spinner never settles after a failed request.", + ].join(" "), + { + hypothesis: z.string().min(3).max(500).describe("Hypothesis to validate against runtime behavior."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser before verification."), + steps: z.array(replayStepSchema).min(1).describe("Replay protocol used to test the hypothesis."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that should hold if the hypothesis is correct."), + headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + }, + async ({ hypothesis, url, steps, assertions, headless, waitUntil, timeoutMs, continueOnError, waitMs }): Promise => { + try { + const report = await browserManager.runValidationScenario(steps, assertions as Assertion[], { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in report) return err(report.error); + + const response = createHypothesisVerdict({ + hypothesis, + report, + }); + + return ok({ + ...response, + reportMarkdown: buildHypothesisMarkdown(hypothesis, response.verdict, report), + }); + } catch (error) { + return err(`verify_hypothesis failed unexpectedly: ${String(error)}`); + } + } + ); + + server.tool( + "verify_fix", + [ + "Validate a runtime patch against a failing scenario before editing source files.", + "Runs a baseline scenario, applies the patch in the replay sandbox, reruns the scenario, checks optional regression assertions, and returns CONFIRMED, REFUTED, or PARTIAL.", + ].join(" "), + { + fixDescription: z.string().min(3).max(500).describe("Short description of the fix that the runtime patch is supposed to validate."), + patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that should pass after the fix is applied."), + regressionAssertions: z.array(assertionSchema).optional().default([]).describe("Optional guard assertions that should remain true before and after the patch."), + headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after patch verification."), + reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), + }, + async ({ + fixDescription, + patch, + url, + steps, + assertions, + regressionAssertions, + headless, + waitUntil, + timeoutMs, + continueOnError, + waitMs, + cleanup, + reopenUrl, + }): Promise => { + try { + const combinedAssertions = [...(assertions as Assertion[]), ...(regressionAssertions as Assertion[])]; + const baseline = await browserManager.runValidationScenario(steps, combinedAssertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in baseline) return err(baseline.error); + + const applyResult = await browserManager.applyRuntimePatch(patch as RuntimePatch, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + }); + if ("error" in applyResult) return err(applyResult.error); + + const patchedReport = await browserManager.runValidationScenario(steps, combinedAssertions, { + headless, + continueOnError, + waitMs, + }); + if ("error" in patchedReport) { + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup as "reload" | "reset_session", + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) { + return err(`${patchedReport.error} Cleanup after patch verification also failed: ${cleanupResult.error}.`); + } + } + return err(patchedReport.error); + } + + const patched: PatchedValidationScenarioResponse = { + verdict: patchedReport.success ? "patch_validated" : "patch_failed", + apply: applyResult, + report: patchedReport, + }; + + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup as "reload" | "reset_session", + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) return err(cleanupResult.error); + patched.cleanup = cleanupResult; + } + + const response = createFixVerdict({ + fixDescription, + baseline, + patched, + regressionAssertions: regressionAssertions as Assertion[], + }); + + return ok({ + ...response, + reportMarkdown: buildFixVerificationMarkdown( + fixDescription, + response.verdict, + baseline, + patched, + regressionAssertions as Assertion[] + ), + }); + } catch (error) { + return err(`verify_fix failed unexpectedly: ${String(error)}`); + } + } + ); } From db49d7063e5eac6a56d196e18ae671ee35eeef71 Mon Sep 17 00:00:00 2001 From: Edgar Brunet Date: Wed, 13 May 2026 23:59:06 +0200 Subject: [PATCH 5/9] feat: [SCRUM-440] add render attribution diagnostics Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/capabilities.ts | 8 +- src/diagnostics/investigation.ts | 103 ++++++++++++++++++-- src/diagnostics/protocol.ts | 14 ++- src/diagnostics/react-runtime.ts | 33 +++++-- src/diagnostics/render-monitor.ts | 152 +++++++++++++++++++++++++++--- src/tools/diagnostics.ts | 46 ++++++++- 6 files changed, 321 insertions(+), 35 deletions(-) diff --git a/src/capabilities.ts b/src/capabilities.ts index 9738db1..03d1038 100644 --- a/src/capabilities.ts +++ b/src/capabilities.ts @@ -113,6 +113,12 @@ const capabilityCatalog = { modes: ["replay"], summary: "Verdict-first runtime bug triage for vague symptoms before drilling into atomic tools.", }, + attribute_render: { + status: "available", + tools: ["attribute_render"], + modes: ["replay"], + summary: "Explain why a component rendered by attributing the strongest prop, state, context, provider, hook, or parent cause.", + }, get_hook_changes: { status: "available", tools: ["get_hook_changes"], @@ -145,7 +151,7 @@ const capabilityCatalog = { }, investigation_tools: { status: "available", - tools: ["diagnose_excess_renders", "find_memo_breaks", "diagnose_runtime_bug"], + tools: ["diagnose_excess_renders", "find_memo_breaks", "attribute_render", "diagnose_runtime_bug"], modes: ["replay"], summary: "Prefer these verdict-first investigations before chaining the lower-level atomic diagnostics yourself.", }, diff --git a/src/diagnostics/investigation.ts b/src/diagnostics/investigation.ts index 9735476..021ec04 100644 --- a/src/diagnostics/investigation.ts +++ b/src/diagnostics/investigation.ts @@ -56,6 +56,17 @@ type RuntimeBugRawData = { render_hotspots: RenderHotspotsResponse; }; +type RenderAttributionVerdict = + | "render_attributed" + | "render_attribution_inconclusive" + | "component_not_found"; + +type RenderAttributionRawData = { + render_hotspots: RenderHotspotsResponse; + hook_changes?: HookChangesResponse; + component_inspection?: ComponentInspectionResponse; +}; + function matchesComponent(componentName: string, candidateName: string, pathText: string): boolean { return candidateName === componentName || pathText.split(" > ").includes(componentName); } @@ -128,7 +139,7 @@ export function createExcessRenderDiagnosis(seed: { ...buildContextEvidence(seed.inspection), ]; - if (hotspot.probableCause.type === "unstable_props") { + if (hotspot.probableCause.type === "provider_value_recreated" || hotspot.probableCause.type === "context_change") { if (contexts.length > 0) { return createDiagnosis({ verdict: "context_cascade_suspected", @@ -140,7 +151,9 @@ export function createExcessRenderDiagnosis(seed: { raw_data, }); } + } + if (hotspot.probableCause.type === "prop_diff") { return createDiagnosis({ verdict: "memo_break_suspected", summary: `${hotspot.componentName} is rerendering with unstable props, which strongly suggests a memo break upstream.`, @@ -152,7 +165,7 @@ export function createExcessRenderDiagnosis(seed: { }); } - if (hotspot.probableCause.type === "unstable_state") { + if (hotspot.probableCause.type === "state_change") { return createDiagnosis({ verdict: "render_loop_detected", summary: `${hotspot.componentName} appears stuck in a state-driven render loop.`, @@ -164,7 +177,7 @@ export function createExcessRenderDiagnosis(seed: { }); } - if (hotspot.probableCause.type === "unstable_hook_value") { + if (hotspot.probableCause.type === "hook_instability") { return createDiagnosis({ verdict: "hook_instability_detected", summary: `${hotspot.componentName} is rerendering because one hook value keeps changing across renders.`, @@ -197,7 +210,7 @@ export function createMemoBreakDiagnosis(seed: { seed.componentName ? matchesComponent(seed.componentName, entry.componentName, entry.pathText) : true ); const target = - hotspotCandidates.find((entry) => entry.probableCause.type === "unstable_props") ?? + hotspotCandidates.find((entry) => entry.probableCause.type === "prop_diff") ?? hotspotCandidates[0] ?? null; const contexts = seed.inspection?.component?.contexts ?? []; @@ -225,7 +238,7 @@ export function createMemoBreakDiagnosis(seed: { ...buildContextEvidence(seed.inspection), ]; - if (target.probableCause.type === "unstable_props" && contexts.length === 0) { + if (target.probableCause.type === "prop_diff" && contexts.length === 0) { return createDiagnosis({ verdict: "memo_break_suspected", summary: `${target.componentName} rerenders with changing props and no dominant local hook churn, which is consistent with a memo break.`, @@ -237,7 +250,11 @@ export function createMemoBreakDiagnosis(seed: { }); } - if (contexts.length > 0) { + if ( + contexts.length > 0 || + target.probableCause.type === "context_change" || + target.probableCause.type === "provider_value_recreated" + ) { return createDiagnosis({ verdict: "context_cascade_suspected", summary: `${target.componentName} looks more affected by context/provider churn than by a classic memo break.`, @@ -249,7 +266,7 @@ export function createMemoBreakDiagnosis(seed: { }); } - if ((seed.hookChanges?.summary.suspiciousHooks[0]?.suspected ?? false) || target.probableCause.type === "unstable_state") { + if ((seed.hookChanges?.summary.suspiciousHooks[0]?.suspected ?? false) || target.probableCause.type === "state_change") { return createDiagnosis({ verdict: "internal_state_instability_detected", summary: `${target.componentName} is rerendering because its own hook or state values keep changing, so the issue is not primarily a memo break.`, @@ -272,6 +289,78 @@ export function createMemoBreakDiagnosis(seed: { }); } +export function createRenderAttributionDiagnosis(seed: { + componentName: string; + renderHotspots: RenderHotspotsResponse; + hookChanges?: HookChangesResponse; + inspection?: ComponentInspectionResponse; +}): DiagnosticVerdict { + const target = + seed.renderHotspots.hotspots.find((entry) => matchesComponent(seed.componentName, entry.componentName, entry.pathText)) ?? + null; + const raw_data: RenderAttributionRawData = { + render_hotspots: seed.renderHotspots, + ...(seed.hookChanges ? { hook_changes: seed.hookChanges } : {}), + ...(seed.inspection ? { component_inspection: seed.inspection } : {}), + }; + + if (!target) { + return createDiagnosis({ + verdict: "component_not_found", + summary: `React-Sentinel did not capture a recent hotspot for ${seed.componentName}, so render attribution is not decisive yet.`, + evidence: [`Detected hotspots: ${seed.renderHotspots.hotspots.length}`], + confidence: "low", + next_step: "Replay the scenario immediately before attributing the render, or lower the hotspot threshold for this component.", + raw_data, + }); + } + + const cause = target.probableCause; + const contexts = seed.inspection?.component?.contexts ?? []; + const hookLead = seed.hookChanges?.summary.suspiciousHooks[0] ?? null; + const evidence = [ + `Component path: ${target.pathText}`, + `Primary cause: ${cause.summary}`, + ...(hookLead ? [`Dominant hook: #${hookLead.hookIndex} (${hookLead.hookKind}) changed ${hookLead.changeCount} times`] : []), + ...(contexts.length > 0 ? [`Observed contexts/providers: ${contexts.map((context) => context.name).join(", ")}`] : []), + ]; + + const summaryByCause: Record = { + prop_diff: `${target.componentName} most likely rerendered because one or more props changed.`, + state_change: `${target.componentName} most likely rerendered because its own state changed.`, + context_change: `${target.componentName} most likely rerendered because a consumed context value changed.`, + provider_value_recreated: `${target.componentName} most likely rerendered because an upstream provider recreated its value.`, + hook_instability: `${target.componentName} most likely rerendered because a hook value stayed unstable across renders.`, + parent_render: `${target.componentName} most likely rerendered because its parent rerendered without strong local diffs.`, + unknown: `${target.componentName} rerendered, but the dominant cause remains inconclusive.`, + }; + + const nextStepByCause: Record = { + prop_diff: "Inspect the parent props passed into this component and stabilize recreated references.", + state_change: "Inspect the state update path or effect chain that keeps changing local state.", + context_change: "Inspect the consumed context source and narrow or memoize the context payload.", + provider_value_recreated: "Inspect the nearest provider and memoize the provided value object.", + hook_instability: "Inspect the unstable hook output and memoize or debounce the changing value.", + parent_render: "Inspect the parent component to understand what keeps it rerendering.", + unknown: "Collect a longer replay trace and compare props, hooks, and context churn together.", + }; + + return createDiagnosis({ + verdict: cause.type === "unknown" ? "render_attribution_inconclusive" : "render_attributed", + summary: summaryByCause[cause.type] ?? summaryByCause.unknown, + evidence, + confidence: cause.type === "unknown" ? "medium" : "high", + suspected_source: + cause.type === "hook_instability" + ? hookLead + ? `hook #${hookLead.hookIndex}` + : target.pathText + : contexts[0]?.name ?? target.pathText, + next_step: nextStepByCause[cause.type] ?? nextStepByCause.unknown, + raw_data, + }); +} + export function createRuntimeBugDiagnosis(seed: { symptom: string; runtimeStatus: RuntimeStatus; diff --git a/src/diagnostics/protocol.ts b/src/diagnostics/protocol.ts index 9c597b5..85b9d02 100644 --- a/src/diagnostics/protocol.ts +++ b/src/diagnostics/protocol.ts @@ -178,13 +178,17 @@ export interface RenderCountsResponse { durationMs: number; } -export type RenderHotspotCauseType = - | "unstable_state" - | "unstable_hook_value" - | "unstable_props" - | "repeated_effect" +export type RenderAttributionCauseType = + | "prop_diff" + | "state_change" + | "context_change" + | "parent_render" + | "provider_value_recreated" + | "hook_instability" | "unknown"; +export type RenderHotspotCauseType = RenderAttributionCauseType; + export interface RenderHotspotCause { type: RenderHotspotCauseType; summary: string; diff --git a/src/diagnostics/react-runtime.ts b/src/diagnostics/react-runtime.ts index 513cbbc..343579c 100644 --- a/src/diagnostics/react-runtime.ts +++ b/src/diagnostics/react-runtime.ts @@ -521,6 +521,30 @@ export function inspectReactRuntime(request: ReactRuntimeInspectRequest): ReactR return "Unknown"; } + function getTypeDisplayName(type: unknown): string | null { + if (!type) return null; + if (typeof type === "string") return type; + if (typeof type === "function") { + const fn = type as { displayName?: string; name?: string }; + return fn.displayName || fn.name || null; + } + if (typeof type === "object") { + const typeRecord = type as Record; + if (typeof typeRecord.displayName === "string" && typeRecord.displayName.length > 0) { + return typeRecord.displayName; + } + if (typeof typeRecord.render === "function") { + const render = typeRecord.render as { displayName?: string; name?: string }; + return `ForwardRef(${render.displayName || render.name || "Anonymous"})`; + } + if ("type" in typeRecord) { + const inner = getTypeDisplayName(typeRecord.type); + return inner ? `Memo(${inner})` : "Memo"; + } + } + return null; + } + function isReactElementLike(value: unknown): boolean { return Boolean(value && typeof value === "object" && "$$typeof" in (value as Record)); } @@ -534,13 +558,8 @@ export function inspectReactRuntime(request: ReactRuntimeInspectRequest): ReactR if (fiber.tag === 10) { return `${getContextName(getFiberContextObject(fiber))}.Provider`; } - if (fiber.type && typeof fiber.type === "object") { - const typeRecord = fiber.type as Record; - if (typeof typeRecord.displayName === "string" && typeRecord.displayName.length > 0) { - return typeRecord.displayName; - } - return "Context/Memo/ForwardRef"; - } + const resolvedName = getTypeDisplayName(fiber.type); + if (resolvedName) return resolvedName; if (fiber.tag === 3) return "HostRoot"; return "Unknown"; } diff --git a/src/diagnostics/render-monitor.ts b/src/diagnostics/render-monitor.ts index e4583a6..c696a92 100644 --- a/src/diagnostics/render-monitor.ts +++ b/src/diagnostics/render-monitor.ts @@ -27,6 +27,12 @@ type RenderCountSample = { timestamp: string; renderId: number; props: Record; + parentName: string | null; + contexts: { + name: string; + source: "dependency" | "provider"; + value: unknown; + }[]; hooks: { index: number; kind: ComponentHookKind; @@ -188,6 +194,28 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { return fiber.type._context ?? fiber.type.context ?? null; }; + const getTypeDisplayName = (type) => { + if (!type) return null; + if (typeof type === "string") return type; + if (typeof type === "function") { + return type.displayName || type.name || null; + } + if (typeof type === "object") { + if (typeof type.displayName === "string" && type.displayName.trim().length > 0) { + return type.displayName; + } + if (typeof type.render === "function") { + const renderName = type.render.displayName || type.render.name || "Anonymous"; + return "ForwardRef(" + renderName + ")"; + } + if ("type" in type) { + const innerName = getTypeDisplayName(type.type); + return innerName ? "Memo(" + innerName + ")" : "Memo"; + } + } + return null; + }; + const getComponentName = (fiber) => { if (typeof fiber.type === "string") return fiber.type; if (typeof fiber.type === "function") { @@ -196,11 +224,12 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { if (fiber.tag === 10) { return getContextName(getFiberContextObject(fiber)) + ".Provider"; } - if (fiber.type && typeof fiber.type === "object" && typeof fiber.type.displayName === "string") { - return fiber.type.displayName; + const resolvedName = getTypeDisplayName(fiber.type); + if (resolvedName) { + return resolvedName; } if (fiber.tag === 3) return "HostRoot"; - return "Context/Memo/ForwardRef"; + return "AnonymousComposite"; }; const classifyHook = (hook, index) => { @@ -235,6 +264,44 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { return hooks; }; + const extractContexts = (fiber, pathFibers) => { + const fromDependencies = []; + const firstContext = fiber.dependencies?.firstContext; + const dependencySeen = new Set(); + let current = firstContext; + + while (current && typeof current === "object") { + const name = getContextName(current.context); + if (!dependencySeen.has(name)) { + dependencySeen.add(name); + fromDependencies.push({ + name, + source: "dependency", + value: serializeValue(current.memoizedValue), + }); + } + current = current.next ?? null; + } + + if (fromDependencies.length > 0) { + return fromDependencies; + } + + const providers = []; + for (const pathFiber of pathFibers) { + if (!(pathFiber.tag === 10 && pathFiber.memoizedProps && typeof pathFiber.memoizedProps === "object" && "value" in pathFiber.memoizedProps)) { + continue; + } + providers.push({ + name: getContextName(getFiberContextObject(pathFiber)), + source: "provider", + value: serializeValue(pathFiber.memoizedProps.value), + }); + } + + return providers; + }; + const isTrackableComponent = (fiber) => { if (!isFiber(fiber)) return false; if (fiber.tag === 3 || fiber.tag === 6 || fiber.tag === 10) return false; @@ -258,7 +325,7 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { } }; - const recordRender = (fiber, path) => { + const recordRender = (fiber, path, pathFibers) => { const state = ensureState(); const timestamp = new Date().toISOString(); const componentName = getComponentName(fiber); @@ -284,6 +351,8 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { timestamp, renderId: state.nextRenderId++, props: serializeProps(fiber.memoizedProps), + parentName: path.length > 1 ? path[path.length - 2] : null, + contexts: extractContexts(fiber, pathFibers), hooks: extractHooks(fiber), }); if (entry.samples.length > maxSamplesPerComponent) { @@ -291,17 +360,18 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { } }; - const walk = (fiber, path) => { + const walk = (fiber, path, pathFibers) => { if (!isFiber(fiber) || fiber.tag === 6) return; const trackable = isTrackableComponent(fiber); const nextPath = trackable ? path.concat(getComponentName(fiber)) : path; + const nextPathFibers = trackable ? pathFibers.concat(fiber) : pathFibers; if (trackable && didRender(fiber)) { - recordRender(fiber, nextPath); + recordRender(fiber, nextPath, nextPathFibers); } let child = fiber.child; while (child) { - walk(child, nextPath); + walk(child, nextPath, nextPathFibers); child = child.sibling; } }; @@ -317,7 +387,7 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { let child = rootFiber.child; while (child) { - walk(child, []); + walk(child, [], []); child = child.sibling; } }; @@ -425,24 +495,53 @@ function buildProbableCause(entry: RenderCountRecord): RenderHotspotCause { const hookChanges = new Map(); let propChangeCount = 0; + let contextChangeCount = 0; + let providerChangeCount = 0; + let parentRenderCount = 0; for (let index = 1; index < samples.length; index += 1) { const previousSample = samples[index - 1]; const currentSample = samples[index]; - if (stableStringify(previousSample.props) !== stableStringify(currentSample.props)) { + const propsChanged = stableStringify(previousSample.props) !== stableStringify(currentSample.props); + if (propsChanged) { propChangeCount += 1; } + const previousContexts = new Map(previousSample.contexts.map((context) => [`${context.name}:${context.source}`, context] as const)); + const currentContexts = new Map(currentSample.contexts.map((context) => [`${context.name}:${context.source}`, context] as const)); + const contextKeys = new Set([...previousContexts.keys(), ...currentContexts.keys()]); + let contextChanged = false; + let providerChanged = false; + for (const contextKey of contextKeys) { + const previousContext = previousContexts.get(contextKey); + const currentContext = currentContexts.get(contextKey); + const previousValue = previousContext ? stableStringify(previousContext.value) : "undefined"; + const currentValue = currentContext ? stableStringify(currentContext.value) : "undefined"; + if (previousValue === currentValue) continue; + contextChanged = true; + if ((currentContext?.source ?? previousContext?.source) === "provider") { + providerChanged = true; + } + } + if (contextChanged) { + contextChangeCount += 1; + } + if (providerChanged) { + providerChangeCount += 1; + } + const previousHooks = new Map(previousSample.hooks.map((hook) => [`${hook.index}:${hook.kind}`, hook] as const)); const currentHooks = new Map(currentSample.hooks.map((hook) => [`${hook.index}:${hook.kind}`, hook] as const)); const hookKeys = new Set([...previousHooks.keys(), ...currentHooks.keys()]); + let hookChanged = false; for (const hookKey of hookKeys) { const previousHook = previousHooks.get(hookKey); const currentHook = currentHooks.get(hookKey); const previousValue = previousHook ? stableStringify(previousHook.value) : "undefined"; const currentValue = currentHook ? stableStringify(currentHook.value) : "undefined"; if (previousValue === currentValue) continue; + hookChanged = true; const currentStat = hookChanges.get(hookKey); hookChanges.set(hookKey, { @@ -451,6 +550,10 @@ function buildProbableCause(entry: RenderCountRecord): RenderHotspotCause { changeCount: (currentStat?.changeCount ?? 0) + 1, }); } + + if (!propsChanged && !contextChanged && !hookChanged && previousSample.parentName === currentSample.parentName) { + parentRenderCount += 1; + } } const transitions = samples.length - 1; @@ -460,27 +563,48 @@ function buildProbableCause(entry: RenderCountRecord): RenderHotspotCause { if (dominantHook && dominantHook.changeCount >= hotThreshold) { if (dominantHook.kind === "state") { return { - type: "unstable_state", + type: "state_change", summary: `State hook #${dominantHook.index} changed on ${dominantHook.changeCount}/${transitions} recent render transitions.`, }; } return { - type: "unstable_hook_value", + type: "hook_instability", summary: `Hook #${dominantHook.index} (${dominantHook.kind}) changed on ${dominantHook.changeCount}/${transitions} recent render transitions.`, }; } + if (providerChangeCount >= hotThreshold) { + return { + type: "provider_value_recreated", + summary: `An upstream provider value changed on ${providerChangeCount}/${transitions} recent render transitions.`, + }; + } + + if (contextChangeCount >= hotThreshold) { + return { + type: "context_change", + summary: `Observed context values changed on ${contextChangeCount}/${transitions} recent render transitions.`, + }; + } + if (propChangeCount >= hotThreshold) { return { - type: "unstable_props", + type: "prop_diff", summary: `Props changed on ${propChangeCount}/${transitions} recent render transitions.`, }; } + if (parentRenderCount >= hotThreshold) { + return { + type: "parent_render", + summary: "The component rerendered repeatedly without dominant local prop, hook, or context changes, which suggests parent-driven rerenders.", + }; + } + return { - type: "repeated_effect", - summary: "Recent renders kept repeating without one dominant prop diff, which suggests an effect loop or chained state updates.", + type: "unknown", + summary: "Recent renders kept repeating, but React-Sentinel could not isolate one dominant cause from props, hooks, contexts, or parent churn.", }; } diff --git a/src/tools/diagnostics.ts b/src/tools/diagnostics.ts index f84a245..ddefcc5 100644 --- a/src/tools/diagnostics.ts +++ b/src/tools/diagnostics.ts @@ -11,6 +11,7 @@ import { browserManager } from "../browser/index.js"; import { createExcessRenderDiagnosis, createMemoBreakDiagnosis, + createRenderAttributionDiagnosis, createRuntimeBugDiagnosis, } from "../diagnostics/investigation.js"; import type { InspectionResponseMode } from "../diagnostics/protocol.js"; @@ -60,6 +61,7 @@ export const DIAGNOSTIC_TOOL_NAMES = [ "diagnose_excess_renders", "find_memo_breaks", "diagnose_runtime_bug", + "attribute_render", ] as const; export function register(server: McpServer): void { @@ -545,7 +547,7 @@ export function register(server: McpServer): void { const target = componentName ?? - renderHotspots.hotspots.find((entry) => entry.probableCause.type === "unstable_props")?.componentName ?? + renderHotspots.hotspots.find((entry) => entry.probableCause.type === "prop_diff")?.componentName ?? renderHotspots.hotspots[0]?.componentName ?? undefined; const targetPathText = @@ -574,6 +576,48 @@ export function register(server: McpServer): void { } ); + server.tool( + "attribute_render", + [ + "Explain why a specific React component rendered by attributing the strongest runtime cause.", + "Uses render hotspots, hook churn, and component inspection to surface prop diffs, state changes, context cascades, provider churn, or parent-driven renders.", + ].join(" "), + { + url: z.string().url().describe("URL of the page to inspect."), + componentName: z.string().min(1).describe("React component name to attribute."), + threshold: hotspotThresholdSchema, + windowMs: hotspotWindowSchema, + limit: z.number().int().min(1).max(100).optional().describe("Maximum number of hotspots to inspect. Default is 20."), + }, + async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { + try { + const renderHotspots = await browserManager.getRenderHotspots(url, threshold, windowMs, limit); + if ("error" in renderHotspots) return err(renderHotspots.error); + + const targetPathText = + renderHotspots.hotspots.find((entry) => entry.componentName === componentName || entry.pathText.split(" > ").includes(componentName)) + ?.pathText; + const [hookChanges, inspection] = await Promise.all([ + browserManager.getHookChanges(url, componentName, targetPathText, 50), + browserManager.inspectComponent(url, componentName, "compact"), + ]); + if ("error" in hookChanges) return err(hookChanges.error); + if ("error" in inspection) return err(inspection.error); + + return ok( + createRenderAttributionDiagnosis({ + componentName, + renderHotspots, + hookChanges, + inspection, + }) + ); + } catch (e) { + return err(`attribute_render failed unexpectedly: ${String(e)}`); + } + } + ); + server.tool( "diagnose_runtime_bug", [ From d2f707a887d6c5dd521d00e22d351dec22f609d7 Mon Sep 17 00:00:00 2001 From: Edgar Brunet Date: Thu, 14 May 2026 00:06:19 +0200 Subject: [PATCH 6/9] feat: [SCRUM-442] improve MCP tool selection UX Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/tool-selection-guide.md | 20 +++ src/capabilities.ts | 74 +++++++++-- src/index.ts | 5 +- src/tools/browser.ts | 66 +++++++--- src/tools/diagnostics.ts | 157 ++++++++++++++++------- src/tools/interaction.ts | 54 +++++++- src/tools/network.ts | 5 +- src/tools/patch.ts | 239 ++++++++++++++++++++++++++++++++++- 8 files changed, 532 insertions(+), 88 deletions(-) create mode 100644 docs/tool-selection-guide.md diff --git a/docs/tool-selection-guide.md b/docs/tool-selection-guide.md new file mode 100644 index 0000000..6fe9287 --- /dev/null +++ b/docs/tool-selection-guide.md @@ -0,0 +1,20 @@ +# MCP Tool Selection Guide + +Use React-Sentinel when the answer depends on **runtime evidence in the browser**, not just source code structure. + +| If you need to... | Start with | Why this beats grep/read | Good follow-up | +| --- | --- | --- | --- | +| Triage a vague runtime bug fast | `diagnose_runtime_bug` | It correlates console, hydration, async, and render signals into one verdict-first answer. | `attribute_render`, `get_runtime_timeline` | +| Explain why a component rerendered | `diagnose_excess_renders`, `attribute_render` | It inspects live render churn, hooks, props, and context instead of guessing from component code. | `find_memo_breaks`, `inspect_component` | +| Reproduce a bug in a deterministic browser | `navigate_replay` or `start_debug_replay` | It creates a clean replay session that can be rerun exactly. | `replay_interactions`, `validate_scenario` | +| Validate a user flow or invariant | `validate_scenario` or `validate_user_flow` | It executes real browser actions and returns pass/fail assertions with traces. | `find_race_conditions` | +| Catch intermittent timing bugs | `find_race_conditions` | It perturbs action timing across multiple iterations and shrinks failing flows. | `verify_hypothesis`, `verify_fix` | +| Test a runtime hypothesis before editing code | `verify_hypothesis` or `test_runtime_hypothesis` | It proves or refutes the idea against browser behavior instead of relying on intuition. | `attribute_render` | +| Try a fix without touching repository files | `apply_patch_then_replay`, `patch_and_validate`, `verify_fix`, or `verify_runtime_fix` | It validates an ephemeral runtime patch in the sandbox before a source change exists. | `reset_runtime_patches` | +| Reuse a real logged-in browser tab | `get_attach_status`, `get_attach_tabs`, `select_attach_tab` | It lets React-Sentinel inspect the exact user-prepared session that static analysis cannot recreate. | `get_runtime_status` | + +## Quick heuristics + +- If the bug depends on **current props, state, context, network, console, or timing**, prefer React-Sentinel. +- If you only need to understand **static source structure**, grep/read is still cheaper. +- Prefer **verdict-first tools** (`diagnose_*`, `attribute_render`, `verify_*`) before low-level atomic tools unless you already know the exact signal you need. diff --git a/src/capabilities.ts b/src/capabilities.ts index 03d1038..2f863fe 100644 --- a/src/capabilities.ts +++ b/src/capabilities.ts @@ -14,8 +14,48 @@ export type CapabilityDefinition = { summary: string; }; +export type ToolSelectionGuideEntry = { + situation: string; + startWith: readonly string[]; + why: string; + followUp?: readonly string[]; +}; + const CORE_TOOL_NAMES = ["ping", "get_server_info", "echo"] as const; +const TOOL_SELECTION_GUIDE: readonly ToolSelectionGuideEntry[] = [ + { + situation: "You need the fastest high-signal triage for a vague runtime bug.", + startWith: ["diagnose_runtime_bug"], + why: "Use runtime evidence instead of grep when the symptom depends on browser state, console output, hydration, async timing, or render churn.", + followUp: ["attribute_render", "find_memo_breaks", "get_runtime_timeline"], + }, + { + situation: "A component rerenders too often and you need the most likely cause.", + startWith: ["diagnose_excess_renders", "attribute_render"], + why: "Use replay render signals instead of static code reading when you must prove whether props, state, context, hooks, or a parent render caused the churn.", + followUp: ["find_memo_breaks", "get_hook_changes", "inspect_component"], + }, + { + situation: "You want a deterministic reproduction or invariant check for a user flow.", + startWith: ["validate_scenario", "find_race_conditions"], + why: "Use replay assertions instead of manual clicking or source inspection when the failure appears only after a sequence of actions or timing changes.", + followUp: ["verify_hypothesis", "verify_fix"], + }, + { + situation: "You want to test a fix or a hypothesis before editing repository code.", + startWith: ["verify_hypothesis", "verify_fix", "apply_patch_then_replay"], + why: "Use the replay sandbox instead of editing files blindly when you need proof that a runtime patch changes the observed behavior.", + followUp: ["reset_runtime_patches"], + }, + { + situation: "You need browser access before any runtime investigation can begin.", + startWith: ["get_session_status", "get_attach_status", "navigate_replay"], + why: "Use browser session tools instead of grep when the blocker is connectivity, live attach readiness, or launching an isolated replay session.", + followUp: ["select_attach_tab", "browser_ping"], + }, +]; + const capabilityCatalog = { browser_ping: { status: "available", @@ -49,9 +89,9 @@ const capabilityCatalog = { }, navigate_replay: { status: "available", - tools: ["navigate_replay"], + tools: ["navigate_replay", "start_debug_replay"], modes: ["replay", "sandbox"], - summary: "Open the isolated replay browser on a target application URL.", + summary: "Open the isolated replay browser on a target application URL and start a deterministic debugging session.", }, get_runtime_status: { status: "available", @@ -175,9 +215,9 @@ const capabilityCatalog = { }, validate_scenario: { status: "available", - tools: ["validate_scenario"], + tools: ["validate_scenario", "validate_user_flow"], modes: ["replay", "sandbox"], - summary: "Run multi-step validations and assertions against the replay sandbox.", + summary: "Run multi-step validations and assertions against the replay sandbox for a deterministic user flow verdict.", }, apply_runtime_patch: { status: "available", @@ -187,19 +227,19 @@ const capabilityCatalog = { }, apply_patch_then_replay: { status: "available", - tools: ["apply_patch_then_replay"], + tools: ["apply_patch_then_replay", "patch_and_validate"], modes: ["sandbox"], - summary: "Patch, replay, and validate in one sandbox flow.", + summary: "Patch, replay, and validate in one sandbox flow before touching source files.", }, verify_hypothesis: { status: "available", - tools: ["verify_hypothesis"], + tools: ["verify_hypothesis", "test_runtime_hypothesis"], modes: ["replay", "sandbox"], summary: "Confirm, refute, or partially support a runtime hypothesis before touching source code.", }, verify_fix: { status: "available", - tools: ["verify_fix"], + tools: ["verify_fix", "verify_runtime_fix"], modes: ["sandbox"], summary: "Compare baseline versus patched replay behavior to validate a runtime fix and surface regressions.", }, @@ -211,7 +251,7 @@ const capabilityCatalog = { }, shadow_sandbox: { status: "partial", - tools: ["apply_runtime_patch", "apply_patch_then_replay", "verify_fix", "reset_runtime_patches"], + tools: ["apply_runtime_patch", "apply_patch_then_replay", "patch_and_validate", "verify_fix", "verify_runtime_fix", "reset_runtime_patches"], modes: ["sandbox"], summary: "Shadow sandbox is available for script-on-page patches only; broader patch shapes are still planned.", }, @@ -227,6 +267,9 @@ export function createServerInfoPayload(): { capabilities: Record; capabilityDetails: Record; capabilitiesByMode: Record>; + toolSelectionGuide: ToolSelectionGuideEntry[]; + recommendedWorkflows: ToolSelectionGuideEntry[]; + documentation: string[]; } { const capabilityDetails = Object.fromEntries( Object.entries(capabilityCatalog).map(([name, definition]) => [ @@ -266,6 +309,19 @@ export function createServerInfoPayload(): { capabilities, capabilityDetails, capabilitiesByMode, + toolSelectionGuide: TOOL_SELECTION_GUIDE.map((entry) => ({ + situation: entry.situation, + startWith: [...entry.startWith], + why: entry.why, + ...(entry.followUp ? { followUp: [...entry.followUp] } : {}), + })), + recommendedWorkflows: TOOL_SELECTION_GUIDE.map((entry) => ({ + situation: entry.situation, + startWith: [...entry.startWith], + why: entry.why, + ...(entry.followUp ? { followUp: [...entry.followUp] } : {}), + })), + documentation: ["docs/tool-selection-guide.md", "docs/agent-runtime-ux.md", "docs/workflows.md"], }; } diff --git a/src/index.ts b/src/index.ts index f35b2e8..374b19b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -245,6 +245,9 @@ function buildServerInfoResponse(): { capabilities: Record; capabilityDetails: ReturnType["capabilityDetails"]; capabilitiesByMode: ReturnType["capabilitiesByMode"]; + toolSelectionGuide: ReturnType["toolSelectionGuide"]; + recommendedWorkflows: ReturnType["recommendedWorkflows"]; + documentation: ReturnType["documentation"]; } { return { name: REACT_SENTINEL_NAME, @@ -275,7 +278,7 @@ function createServer(): McpServer { server.tool( "get_server_info", - "Returns metadata and planned capabilities of this React-Sentinel instance.", + "Return the honest React-Sentinel capability map plus a tool-selection guide that tells an agent when to prefer runtime investigation over grep or static file reading.", {}, async (): Promise => { try { diff --git a/src/tools/browser.ts b/src/tools/browser.ts index 995a02e..5f691c1 100644 --- a/src/tools/browser.ts +++ b/src/tools/browser.ts @@ -33,6 +33,7 @@ const replayWaitUntilSchema = z export const BROWSER_TOOL_NAMES = [ "get_session_status", "navigate_replay", + "start_debug_replay", "get_attach_status", "browser_ping", "get_attach_tabs", @@ -46,9 +47,9 @@ export function register(server: McpServer): void { server.tool( "get_session_status", [ - "Return the current browser session mode used by React-Sentinel.", - "Reports whether tools will use the attached live tab or the isolated replay browser,", - "plus the current replay headless/headed configuration.", + "Return the current browser session mode used by React-Sentinel before you run runtime tools.", + "Use this instead of guessing from config files when you need to know whether the next tool will hit a live attached tab or the isolated replay browser.", + "Reports the active mode plus the current replay headless/headed configuration.", ].join(" "), {}, async (): Promise => { @@ -66,9 +67,9 @@ export function register(server: McpServer): void { server.tool( "navigate_replay", [ - "Navigate the isolated replay browser to a target URL and wait for the application to load.", - "Supports configurable waitUntil, timeout, and headless/headed replay mode.", - "Returns readable navigation errors plus the active session metadata.", + "Navigate the isolated replay browser to a target URL and wait for the app to load in a clean, deterministic session.", + "Use this instead of manual clicking or grep when you first need a reproducible browser state for runtime diagnostics, assertions, or sandbox patches.", + "Supports configurable waitUntil, timeout, and headless/headed replay mode and returns readable navigation errors plus session metadata.", ].join(" "), { url: z.string().url().describe("Target URL for the replay browser."), @@ -93,15 +94,44 @@ export function register(server: McpServer): void { } ); + server.tool( + "start_debug_replay", + [ + "Action-oriented alias for navigate_replay that starts a clean replay debugging session for runtime investigation.", + "Prefer this when the agent wants an explicit 'start debugging in replay mode' entry point instead of a lower-level navigation name.", + ].join(" "), + { + url: z.string().url().describe("Target URL for the replay browser."), + waitUntil: replayWaitUntilSchema.describe("Playwright readiness event to wait for before returning."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds."), + headless: z.boolean().optional().describe("Override the replay browser mode for this navigation."), + resetSession: z.boolean().optional().default(false).describe("Close the current replay browser first and start a fresh isolated session."), + }, + async ({ url, waitUntil, timeoutMs, headless, resetSession }): Promise => { + try { + const result = await browserManager.navigateReplay(url, { + waitUntil, + timeoutMs, + headless, + resetSession, + }); + if ("error" in result) return err(result.error); + return ok(result); + } catch (e) { + return err(`start_debug_replay failed unexpectedly: ${String(e)}`); + } + } + ); + // ------------------------------------------------------------------------- // Tool: get_attach_status // ------------------------------------------------------------------------- server.tool( "get_attach_status", [ - "Check whether a Chrome instance exposes the CDP version endpoint at the", - "given URL. Returns a machine-readable attach readiness status", - "plus launch guidance when the endpoint is unavailable.", + "Check whether a Chrome instance exposes the CDP endpoint required for live attach mode.", + "Use this instead of reading docs or config when runtime debugging depends on an already-authenticated user session in a real Chrome tab.", + "Returns a machine-readable attach readiness status plus launch guidance when the endpoint is unavailable.", ].join(" "), { endpoint: z @@ -126,9 +156,9 @@ export function register(server: McpServer): void { server.tool( "browser_ping", [ - "Open an isolated browser context, navigate to a URL, and return page", - "metadata (title, URL, timestamp). Validates the MCP ↔ browser bridge.", - "Returns a structured error if the URL is unreachable.", + "Open an isolated browser context, navigate to a URL, and confirm that React-Sentinel can actually reach the target app.", + "Use this instead of assuming the app is up when you need a quick bridge smoke test before deeper runtime tools.", + "Returns page metadata plus a structured error if the URL is unreachable.", ].join(" "), { url: z @@ -154,9 +184,9 @@ export function register(server: McpServer): void { server.tool( "get_attach_tabs", [ - "List the CDP page tabs exposed by a Chrome instance.", - "Optional URL and title filters narrow the returned tab list.", - "Returns the currently selected tab when one has already been chosen.", + "List the live Chrome tabs exposed by a CDP endpoint so the agent can choose the right authenticated or user-prepared page.", + "Use this instead of guessing tab order when the runtime bug only reproduces in a real browser session.", + "Optional URL and title filters narrow the returned tab list and the response also shows the currently selected tab.", ].join(" "), { endpoint: z @@ -184,9 +214,9 @@ export function register(server: McpServer): void { server.tool( "select_attach_tab", [ - "Preview or confirm one CDP page tab by index, URL, or title.", - "A matched tab is not activated for live browser mode until confirm is true.", - "If no tab matches, the tool returns a structured 'not found' response.", + "Preview or confirm one live Chrome tab by index, URL, or title before React-Sentinel reuses it in attach mode.", + "Use this instead of brittle manual coordination when you must point runtime tools at the exact tab that already holds the right app state.", + "A matched tab is not activated until confirm is true, and missing tabs return a structured not-found response.", ].join(" "), { endpoint: z diff --git a/src/tools/diagnostics.ts b/src/tools/diagnostics.ts index ddefcc5..65368ad 100644 --- a/src/tools/diagnostics.ts +++ b/src/tools/diagnostics.ts @@ -71,10 +71,9 @@ export function register(server: McpServer): void { server.tool( "get_runtime_status", [ - "Navigate to a URL and return a full runtime diagnostic snapshot:", - "page title, URL, viewport dimensions, timestamp, and React detection", - "(version, fiber presence, devtools hook). Returns a structured error", - "if the URL is unreachable.", + "Navigate to a URL and return a full runtime diagnostic snapshot of what React-Sentinel can observe right now.", + "Use this instead of reading source files when the first question is whether React is mounted, which page is actually loaded, and whether the runtime bridge is healthy.", + "Returns page title, URL, viewport dimensions, timestamp, React detection, and a structured error if the URL is unreachable.", ].join(" "), { url: z @@ -136,10 +135,9 @@ export function register(server: McpServer): void { server.tool( "inspect_component", [ - "Search the React Fiber tree for a specific component by name and extract", - "its details for AI inspection, including props, path in the tree,", - "provider or consumed contexts, children count, and a compact summary.", - "Use responseMode='compact' when you want a shorter payload.", + "Search the React Fiber tree for a specific component by name and extract its live runtime details.", + "Use this instead of grep when the bug depends on the actual props, context wiring, or rendered position of a component in the current browser state.", + "Returns props, path in the tree, provider or consumed contexts, children count, and a compact summary. Use responseMode='compact' when you want a shorter payload.", ].join(" "), { url: z @@ -170,7 +168,7 @@ export function register(server: McpServer): void { "get_component_state", [ "Inspect a specific React component and return its serializable hook state.", - "Useful for checking simple useState/useRef/useMemo values without reading source code.", + "Use this instead of guessing from hooks source when you need the live value that actually kept a button disabled, an effect armed, or a branch hidden.", "Use responseMode='compact' when you want a shorter payload for AI analysis.", ].join(" "), { @@ -202,6 +200,7 @@ export function register(server: McpServer): void { "get_render_counts", [ "Return per-component render counters collected by the replay runtime monitor.", + "Use this instead of static code reading when you need proof that a component is actually rerendering far more often than expected in the reproduced browser flow.", "Each entry includes the component name, path, render count, and first/last observation timestamps.", ].join(" "), { @@ -235,6 +234,7 @@ export function register(server: McpServer): void { "get_render_hotspots", [ "Diagnose likely rerender explosions and return a verdict-first summary with evidence, confidence, and next_step.", + "Use this instead of grep when you need runtime proof that a render storm is happening and which component path is hottest.", "raw_data still contains the detailed hotspot list when deeper inspection is needed.", ].join(" "), { @@ -477,8 +477,8 @@ export function register(server: McpServer): void { "diagnose_excess_renders", [ "High-level render investigation for replay-mode React bugs.", + "Use this instead of manually chaining atomic render tools when the question is 'why is this rerendering so much?' rather than 'show me raw counters'.", "Orchestrates runtime status, render counts, hotspots, hook changes, and component inspection to explain why a component rerenders too often.", - "Prefer this over manually chaining the atomic render tools when you need a verdict first.", ].join(" "), { url: z.string().url().describe("URL of the page to inspect."), @@ -489,11 +489,14 @@ export function register(server: McpServer): void { }, async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { try { + const startedAt = Date.now(); + const collectionStartedAt = Date.now(); const [runtimeStatus, renderCounts, renderHotspots] = await Promise.all([ browserManager.getRuntimeStatus(url), browserManager.getRenderCounts(url, limit), browserManager.getRenderHotspots(url, threshold, windowMs, limit), ]); + const collectionMs = Date.now() - collectionStartedAt; if ("error" in runtimeStatus) return err(runtimeStatus.error); if ("error" in renderCounts) return err(renderCounts.error); if ("error" in renderHotspots) return err(renderHotspots.error); @@ -501,26 +504,35 @@ export function register(server: McpServer): void { const target = componentName ?? renderHotspots.hotspots[0]?.componentName ?? undefined; const targetPathText = renderHotspots.hotspots.find((entry) => (target ? entry.componentName === target : false))?.pathText; + const followUpStartedAt = Date.now(); const [hookChanges, inspection] = target ? await Promise.all([ browserManager.getHookChanges(url, target, targetPathText, 50), browserManager.inspectComponent(url, target, "compact"), ]) : [undefined, undefined]; + const followUpMs = target ? Date.now() - followUpStartedAt : 0; if (hookChanges && "error" in hookChanges) return err(hookChanges.error); if (inspection && "error" in inspection) return err(inspection.error); - return ok( - createExcessRenderDiagnosis({ - componentName, - runtimeStatus, - renderCounts, - renderHotspots, - ...(hookChanges ? { hookChanges } : {}), - ...(inspection ? { inspection } : {}), - }) - ); + const diagnosis = createExcessRenderDiagnosis({ + componentName, + runtimeStatus, + renderCounts, + renderHotspots, + ...(hookChanges ? { hookChanges } : {}), + ...(inspection ? { inspection } : {}), + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + collectionMs, + followUpMs, + }, + }); } catch (e) { return err(`diagnose_excess_renders failed unexpectedly: ${String(e)}`); } @@ -531,6 +543,7 @@ export function register(server: McpServer): void { "find_memo_breaks", [ "High-level investigation that searches for likely React memo breaks or context cascades.", + "Use this instead of grep when you need runtime evidence that unstable props or provider churn are breaking memoization in the reproduced flow.", "Combines render hotspots, hook churn, and component inspection so the caller gets a verdict instead of raw render data.", ].join(" "), { @@ -542,7 +555,10 @@ export function register(server: McpServer): void { }, async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { try { + const startedAt = Date.now(); + const hotspotStartedAt = Date.now(); const renderHotspots = await browserManager.getRenderHotspots(url, threshold, windowMs, limit); + const hotspotMs = Date.now() - hotspotStartedAt; if ("error" in renderHotspots) return err(renderHotspots.error); const target = @@ -552,24 +568,33 @@ export function register(server: McpServer): void { undefined; const targetPathText = renderHotspots.hotspots.find((entry) => (target ? entry.componentName === target : false))?.pathText; + const followUpStartedAt = Date.now(); const [hookChanges, inspection] = target ? await Promise.all([ browserManager.getHookChanges(url, target, targetPathText, 50), browserManager.inspectComponent(url, target, "compact"), ]) : [undefined, undefined]; + const followUpMs = target ? Date.now() - followUpStartedAt : 0; if (hookChanges && "error" in hookChanges) return err(hookChanges.error); if (inspection && "error" in inspection) return err(inspection.error); - return ok( - createMemoBreakDiagnosis({ - componentName, - renderHotspots, - ...(hookChanges ? { hookChanges } : {}), - ...(inspection ? { inspection } : {}), - }) - ); + const diagnosis = createMemoBreakDiagnosis({ + componentName, + renderHotspots, + ...(hookChanges ? { hookChanges } : {}), + ...(inspection ? { inspection } : {}), + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + hotspotMs, + followUpMs, + }, + }); } catch (e) { return err(`find_memo_breaks failed unexpectedly: ${String(e)}`); } @@ -580,7 +605,8 @@ export function register(server: McpServer): void { "attribute_render", [ "Explain why a specific React component rendered by attributing the strongest runtime cause.", - "Uses render hotspots, hook churn, and component inspection to surface prop diffs, state changes, context cascades, provider churn, or parent-driven renders.", + "Use this instead of static code inspection when you need the strongest live explanation for one render: props, state, context, provider, hooks, or parent churn.", + "Uses render hotspots, hook churn, and component inspection to surface the strongest cause with evidence and next steps.", ].join(" "), { url: z.string().url().describe("URL of the page to inspect."), @@ -591,27 +617,55 @@ export function register(server: McpServer): void { }, async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { try { + const startedAt = Date.now(); + const hotspotStartedAt = Date.now(); const renderHotspots = await browserManager.getRenderHotspots(url, threshold, windowMs, limit); + const hotspotMs = Date.now() - hotspotStartedAt; if ("error" in renderHotspots) return err(renderHotspots.error); const targetPathText = renderHotspots.hotspots.find((entry) => entry.componentName === componentName || entry.pathText.split(" > ").includes(componentName)) ?.pathText; + if (!targetPathText) { + const diagnosis = createRenderAttributionDiagnosis({ + componentName, + renderHotspots, + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + hotspotMs, + followUpMs: 0, + }, + }); + } + + const followUpStartedAt = Date.now(); const [hookChanges, inspection] = await Promise.all([ browserManager.getHookChanges(url, componentName, targetPathText, 50), browserManager.inspectComponent(url, componentName, "compact"), ]); + const followUpMs = Date.now() - followUpStartedAt; if ("error" in hookChanges) return err(hookChanges.error); if ("error" in inspection) return err(inspection.error); - return ok( - createRenderAttributionDiagnosis({ - componentName, - renderHotspots, - hookChanges, - inspection, - }) - ); + const diagnosis = createRenderAttributionDiagnosis({ + componentName, + renderHotspots, + hookChanges, + inspection, + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + hotspotMs, + followUpMs, + }, + }); } catch (e) { return err(`attribute_render failed unexpectedly: ${String(e)}`); } @@ -622,6 +676,7 @@ export function register(server: McpServer): void { "diagnose_runtime_bug", [ "High-level entry point for vague runtime symptoms such as stale UI, random errors, hydration failures, or unexplained slowness.", + "Use this instead of bouncing between grep, console logs, and ad-hoc probes when you need the fastest verdict-first answer for a browser bug.", "Orchestrates console, hydration, async, and render diagnostics and returns the strongest verdict first.", ].join(" "), { @@ -634,6 +689,7 @@ export function register(server: McpServer): void { }, async ({ url, symptom, stateSelector, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { try { + const startedAt = Date.now(); const [runtimeStatus, consoleEvents, hydrationIssues, asyncTimeline, renderHotspots, raceCondition] = await Promise.all([ browserManager.getRuntimeStatus(url), browserManager.getConsoleEvents(url), @@ -649,17 +705,22 @@ export function register(server: McpServer): void { if ("error" in renderHotspots) return err(renderHotspots.error); if (raceCondition && "error" in raceCondition) return err(raceCondition.error); - return ok( - createRuntimeBugDiagnosis({ - symptom, - runtimeStatus, - consoleEvents, - hydrationIssues, - asyncTimeline, - renderHotspots, - ...(raceCondition ? { raceCondition } : {}), - }) - ); + const diagnosis = createRuntimeBugDiagnosis({ + symptom, + runtimeStatus, + consoleEvents, + hydrationIssues, + asyncTimeline, + renderHotspots, + ...(raceCondition ? { raceCondition } : {}), + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + }, + }); } catch (e) { return err(`diagnose_runtime_bug failed unexpectedly: ${String(e)}`); } diff --git a/src/tools/interaction.ts b/src/tools/interaction.ts index e79c411..5486949 100644 --- a/src/tools/interaction.ts +++ b/src/tools/interaction.ts @@ -339,6 +339,7 @@ export const INTERACTION_TOOL_NAMES = [ "simulate_interaction", "validate_after_action", "validate_scenario", + "validate_user_flow", "replay_interactions", "find_race_conditions", ] as const; @@ -350,8 +351,8 @@ export function register(server: McpServer): void { server.tool( "simulate_interaction", [ - "Simulates a user interaction (click, type, or fill) on the target page.", - "Useful for reproducing bugs or exploring the application state after interaction.", + "Simulate a single user interaction on the target page.", + "Use this instead of reasoning from source code alone when the next UI state depends on an actual click, type, fill, or keypress in the browser.", "Requires a valid CSS selector and target URL.", ].join(" "), { @@ -377,8 +378,8 @@ export function register(server: McpServer): void { server.tool( "validate_after_action", [ - "Performs an interaction followed by a validation assertion in a single flow.", - "Useful for experimental validation: 'If I click this, does the error disappear?' or 'Does the text X appear?'.", + "Perform one interaction and immediately validate the resulting runtime state in a single flow.", + "Use this instead of manual reproduction when you need a tight pass/fail answer such as 'if I click this, does the error disappear?' or 'does text X appear?'.", ].join(" "), { url: z.string().url().describe("The URL of the page."), @@ -424,6 +425,7 @@ export function register(server: McpServer): void { "validate_scenario", [ "Replay a deterministic action sequence and evaluate multiple assertions in one pass.", + "Use this instead of grep or ad-hoc clicking when a bug only appears after several browser actions and you need a reproducible runtime verdict.", "Returns both a structured JSON report and a Markdown report with actions, assertions, and useful traces.", ].join(" "), { @@ -460,6 +462,46 @@ export function register(server: McpServer): void { } ); + server.tool( + "validate_user_flow", + [ + "Action-oriented alias for validate_scenario that checks whether a user flow still works end-to-end.", + "Prefer this when the agent is thinking in terms of user journeys rather than generic scenario validation.", + ].join(" "), + { + url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), + assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), + headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), + waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().default("domcontentloaded").describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + resetSession: z.boolean().optional().default(false).describe("Close the current replay browser first and start a fresh isolated session."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + }, + async ({ url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs }): Promise => { + try { + const result = await browserManager.runValidationScenario(steps, assertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession, + continueOnError, + waitMs, + }); + if ("error" in result) return err(result.error); + + return ok({ + report: result, + reportMarkdown: buildScenarioMarkdown(result), + }); + } catch (e) { + return err(`validate_user_flow failed unexpectedly: ${String(e)}`); + } + } + ); + // ------------------------------------------------------------------------- // Tool: replay_interactions — SCRUM-104 // ------------------------------------------------------------------------- @@ -467,8 +509,8 @@ export function register(server: McpServer): void { "replay_interactions", [ "Replay a deterministic sequence of browser actions inside the isolated replay session.", + "Use this instead of manual reproduction when you need React-Sentinel to execute the exact same interaction sequence every time before deeper diagnostics.", "Supports click, type, fill, wait, and press steps and logs the result of each step.", - "Provide a URL to navigate before the replay, or omit it to reuse the current replay page.", ].join(" "), { url: z.string().url().optional().describe("Optional URL to open in the replay browser before the sequence runs."), @@ -501,8 +543,8 @@ export function register(server: McpServer): void { "find_race_conditions", [ "Stress-test a replay scenario across multiple iterations with optional adversarial delays between actions.", + "Use this instead of guessing from code when the bug is intermittent and only appears under unlucky runtime timing.", "Returns pass/fail per iteration, highlights intermittent failures, and attempts to shrink the first failing sequence into a minimal reproduction.", - "Use assertions as invariants that define the inconsistent runtime state you want React-Sentinel to catch.", ].join(" "), { url: z.string().url().optional().describe("Optional URL to open in the replay browser before each iteration."), diff --git a/src/tools/network.ts b/src/tools/network.ts index 52c9ecf..34d5f33 100644 --- a/src/tools/network.ts +++ b/src/tools/network.ts @@ -16,8 +16,9 @@ export function register(server: McpServer): void { server.tool( "get_network_events", [ - "Returns the recent network events captured from fetch and XMLHttpRequest.", - "Includes a summary that highlights HTTP errors (4xx/5xx) for quick AI diagnostics.", + "Return the recent network events captured from fetch and XMLHttpRequest.", + "Use this instead of grep when the visible UI bug may actually be explained by failing, missing, duplicate, or late runtime requests.", + "Includes a summary that highlights HTTP errors (4xx/5xx) for quick diagnostics.", ].join(" "), { url: z.string().url().describe("URL of the page to inspect."), diff --git a/src/tools/patch.ts b/src/tools/patch.ts index 176d95c..1dfd1da 100644 --- a/src/tools/patch.ts +++ b/src/tools/patch.ts @@ -217,9 +217,12 @@ function buildPatchMarkdown( export const PATCH_TOOL_NAMES = [ "apply_runtime_patch", "apply_patch_then_replay", + "patch_and_validate", "reset_runtime_patches", "verify_hypothesis", + "test_runtime_hypothesis", "verify_fix", + "verify_runtime_fix", ] as const; export function register(server: McpServer): void { @@ -227,8 +230,8 @@ export function register(server: McpServer): void { "apply_runtime_patch", [ "Apply an ephemeral JavaScript patch inside the isolated replay sandbox without touching local files.", + "Use this instead of editing the repository when you want to test a runtime idea safely before committing to a source change.", "Only { type: 'script', target: 'page' } payloads are currently supported and always scoped to the current replay session.", - "Provide a URL when the patch must be present before the application boots in the sandbox.", ].join(" "), { patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), @@ -258,8 +261,8 @@ export function register(server: McpServer): void { "apply_patch_then_replay", [ "Apply an ephemeral replay patch, run replay steps, then evaluate assertions in the patched sandbox.", - "Returns an explicit patch_validated / patch_failed verdict plus a readable Markdown report.", - "Cleanup defaults to reset_session so temporary patches do not leak into later sandbox runs.", + "Use this instead of editing files blindly when you want one tool to patch, reproduce, and judge whether the runtime behavior improved.", + "Returns an explicit patch_validated / patch_failed verdict plus a readable Markdown report, and cleanup defaults to reset_session so temporary patches do not leak into later runs.", ].join(" "), { patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), @@ -350,6 +353,88 @@ export function register(server: McpServer): void { } ); + server.tool( + "patch_and_validate", + [ + "Action-oriented alias for apply_patch_then_replay that tests a runtime patch against a concrete replay protocol.", + "Prefer this when the agent is thinking 'try this patch and tell me if the bug is gone'.", + ].join(" "), + { + patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser after the init script is registered."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute after the patch is active."), + assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), + headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + resetSession: z.boolean().optional().default(false).describe("Close the current replay sandbox first and start from a clean browser session."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after the patched validation flow."), + reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), + }, + async ({ patch, url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs, cleanup, reopenUrl }): Promise => { + try { + const applyResult = await browserManager.applyRuntimePatch(patch as RuntimePatch, { + url, + headless, + waitUntil, + timeoutMs, + resetSession, + }); + if ("error" in applyResult) return err(applyResult.error); + + const report = await browserManager.runValidationScenario(steps, assertions as Assertion[], { + headless, + continueOnError, + waitMs, + }); + if ("error" in report) { + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup as "reload" | "reset_session", + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) { + return err( + `${report.error} Cleanup after validation failure also failed: ${cleanupResult.error}. Runtime patches may still be active.` + ); + } + } + return err(report.error); + } + + const response: PatchedValidationScenarioResponse = { + verdict: report.success ? "patch_validated" : "patch_failed", + apply: applyResult, + report, + }; + + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup as "reload" | "reset_session", + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) return err(cleanupResult.error); + response.cleanup = cleanupResult; + } + + return ok({ + ...response, + reportMarkdown: buildPatchMarkdown(response.verdict, response.apply, response.report, response.cleanup), + }); + } catch (error) { + return err(`patch_and_validate failed unexpectedly: ${String(error)}`); + } + } + ); + server.tool( "reset_runtime_patches", [ @@ -383,8 +468,8 @@ export function register(server: McpServer): void { "verify_hypothesis", [ "Verify a runtime hypothesis before changing repository code.", + "Use this instead of arguing from source code alone when you need browser evidence that a suspected runtime cause is true, false, or only partly supported.", "Runs a replay protocol plus assertions and returns CONFIRMED, REFUTED, or PARTIAL with evidence and a Markdown report.", - "Typical examples: stale search results overwrite newer intent, a hydration mismatch appears on first load, or a spinner never settles after a failed request.", ].join(" "), { hypothesis: z.string().min(3).max(500).describe("Hypothesis to validate against runtime behavior."), @@ -425,10 +510,56 @@ export function register(server: McpServer): void { } ); + server.tool( + "test_runtime_hypothesis", + [ + "Action-oriented alias for verify_hypothesis that tests whether a suspected runtime explanation matches observed browser behavior.", + "Prefer this when the agent is phrasing the task as 'test this hypothesis in the browser'.", + ].join(" "), + { + hypothesis: z.string().min(3).max(500).describe("Hypothesis to validate against runtime behavior."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser before verification."), + steps: z.array(replayStepSchema).min(1).describe("Replay protocol used to test the hypothesis."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that should hold if the hypothesis is correct."), + headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + }, + async ({ hypothesis, url, steps, assertions, headless, waitUntil, timeoutMs, continueOnError, waitMs }): Promise => { + try { + const report = await browserManager.runValidationScenario(steps, assertions as Assertion[], { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in report) return err(report.error); + + const response = createHypothesisVerdict({ + hypothesis, + report, + }); + + return ok({ + ...response, + reportMarkdown: buildHypothesisMarkdown(hypothesis, response.verdict, report), + }); + } catch (error) { + return err(`test_runtime_hypothesis failed unexpectedly: ${String(error)}`); + } + } + ); + server.tool( "verify_fix", [ "Validate a runtime patch against a failing scenario before editing source files.", + "Use this instead of making a speculative code change when you want proof that a candidate fix improves the browser behavior and does not obviously regress other assertions.", "Runs a baseline scenario, applies the patch in the replay sandbox, reruns the scenario, checks optional regression assertions, and returns CONFIRMED, REFUTED, or PARTIAL.", ].join(" "), { @@ -544,4 +675,104 @@ export function register(server: McpServer): void { } } ); + + server.tool( + "verify_runtime_fix", + [ + "Action-oriented alias for verify_fix that checks whether a candidate runtime fix actually resolves the bug.", + "Prefer this when the agent is phrasing the task as 'verify the fix before touching source'.", + ].join(" "), + { + fixDescription: z.string().min(3).max(500).describe("Short description of the fix that the runtime patch is supposed to validate."), + patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that should pass after the fix is applied."), + regressionAssertions: z.array(assertionSchema).optional().default([]).describe("Optional guard assertions that should remain true before and after the patch."), + headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after patch verification."), + reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), + }, + async ({ fixDescription, patch, url, steps, assertions, regressionAssertions, headless, waitUntil, timeoutMs, continueOnError, waitMs, cleanup, reopenUrl }): Promise => { + try { + const combinedAssertions = [...(assertions as Assertion[]), ...(regressionAssertions as Assertion[])]; + const baseline = await browserManager.runValidationScenario(steps, combinedAssertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in baseline) return err(baseline.error); + + const applyResult = await browserManager.applyRuntimePatch(patch as RuntimePatch, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + }); + if ("error" in applyResult) return err(applyResult.error); + + const patchedReport = await browserManager.runValidationScenario(steps, combinedAssertions, { + headless, + continueOnError, + waitMs, + }); + if ("error" in patchedReport) { + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup as "reload" | "reset_session", + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) { + return err(`${patchedReport.error} Cleanup after patch verification also failed: ${cleanupResult.error}.`); + } + } + return err(patchedReport.error); + } + + const patched: PatchedValidationScenarioResponse = { + verdict: patchedReport.success ? "patch_validated" : "patch_failed", + apply: applyResult, + report: patchedReport, + }; + + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup as "reload" | "reset_session", + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) return err(cleanupResult.error); + patched.cleanup = cleanupResult; + } + + const response = createFixVerdict({ + fixDescription, + baseline, + patched, + regressionAssertions: regressionAssertions as Assertion[], + }); + + return ok({ + ...response, + reportMarkdown: buildFixVerificationMarkdown(fixDescription, response.verdict, baseline, patched, regressionAssertions as Assertion[]), + }); + } catch (error) { + return err(`verify_runtime_fix failed unexpectedly: ${String(error)}`); + } + } + ); } From d18f1cd0a3ab37cf47bd4a7dce69d2f225808349 Mon Sep 17 00:00:00 2001 From: Edgar Brunet Date: Thu, 14 May 2026 00:22:09 +0200 Subject: [PATCH 7/9] feat: [SCRUM-441] add managed browser mode Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/browser-modes.md | 21 +++ docs/local-diagnostics-checklist.md | 8 + scripts/e2e-smoke.ts | 65 +++---- src/browser/index.ts | 253 +++++++++++++++++++++++++++- src/browser/protocol.ts | 8 +- src/index.ts | 67 +++++++- src/tools/browser.ts | 2 +- 7 files changed, 379 insertions(+), 45 deletions(-) create mode 100644 docs/browser-modes.md diff --git a/docs/browser-modes.md b/docs/browser-modes.md new file mode 100644 index 0000000..6621fb1 --- /dev/null +++ b/docs/browser-modes.md @@ -0,0 +1,21 @@ +# Browser Modes + +React-Sentinel supports three browser modes. Choose the one that matches the kind of runtime evidence you need. + +| Mode | Best for | User state | Consent | Main tradeoff | +| --- | --- | --- | --- | --- | +| **User Chrome attach** | Inspecting a real tab that already contains the user's authenticated or manually prepared state | Reuses the user's existing browser profile and selected tab | **Required** via `select_attach_tab` with `confirm: true` | Highest power, but depends on an external Chrome CDP endpoint | +| **Managed Chrome** | Reducing CDP setup friction while keeping a visible isolated browser that React-Sentinel can drive | Uses a temporary isolated profile created by React-Sentinel | Not needed for the managed profile itself | Easier than manual CDP, but still separate from the user's personal Chrome session | +| **Replay sandbox** | Deterministic reproduction, assertions, and runtime patch validation in an isolated environment | Fresh isolated Playwright context | Not needed | Lowest friction, but it does not reuse existing user session state | + +## Recommended order + +1. Use **user Chrome attach** when the bug depends on a real logged-in or user-prepared tab. +2. Use **managed Chrome** when attach mode is useful but the user does not want to launch Chrome with `--remote-debugging-port` manually. +3. Use **replay sandbox** when you only need deterministic reproduction, assertions, or patch verification. + +## Safety notes + +- **User Chrome attach** is explicit and consent-based because React-Sentinel can inspect and interact with the selected live tab. +- **Managed Chrome** uses a temporary profile directory so it does not silently reuse the user's personal browser data. +- **Replay sandbox** is the safest default when no live state is required. diff --git a/docs/local-diagnostics-checklist.md b/docs/local-diagnostics-checklist.md index 8013d92..ffcfa1e 100644 --- a/docs/local-diagnostics-checklist.md +++ b/docs/local-diagnostics-checklist.md @@ -28,6 +28,12 @@ Fix: google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/react-sentinel-cdp ``` +Or let React-Sentinel launch an isolated managed Chromium for you: + +```bash +react-sentinel mcp --browser-mode managed --headed +``` + Then retry: 1. `get_attach_status` @@ -36,6 +42,8 @@ Then retry: If you do not need the live browser, skip attach mode and stay in replay mode with `browser_ping` or `navigate_replay`. +See [browser-modes.md](browser-modes.md) for the differences between user Chrome attach, managed Chrome, and replay sandbox. + ## 3. If no live browser tab is selected Symptoms: diff --git a/scripts/e2e-smoke.ts b/scripts/e2e-smoke.ts index c8c0dc3..9f67a3e 100644 --- a/scripts/e2e-smoke.ts +++ b/scripts/e2e-smoke.ts @@ -43,6 +43,13 @@ const expectedTools = [ "reset_runtime_patches", ]; +function readVerdictRawData(value: unknown): T { + if (value && typeof value === "object" && "raw_data" in value) { + return (value as { raw_data: T }).raw_data; + } + return value as T; +} + async function main(): Promise { const managedProcesses: ManagedProcess[] = []; const checks: string[] = []; @@ -265,7 +272,9 @@ async function main(): Promise { ); checks.push("get_render_counts:ok"); - const renderHotspots = expectToolSuccess( + const renderHotspots = readVerdictRawData<{ + hotspots: { componentName: string; probableCause: { type: string; summary: string } }[]; + }>(expectToolSuccess( await callTool(client, "get_render_hotspots", { url: demoUrl, threshold: 4, @@ -273,18 +282,14 @@ async function main(): Promise { limit: 10, }), "get_render_hotspots" - ) as { - hotspots: { componentName: string; probableCause: { type: string; summary: string } }[]; - }; + )); + const infiniteLoopHotspot = renderHotspots.hotspots.find((entry) => entry.componentName === "InfiniteLoopScenario"); + assert(renderHotspots.hotspots.length >= 1, "get_render_hotspots returned no hotspots."); assert( - renderHotspots.hotspots.some( - (entry) => - entry.componentName === "InfiniteLoopScenario" && - ["unstable_state", "unstable_hook_value", "unstable_props", "repeated_effect"].includes( - entry.probableCause.type - ) - ), - "get_render_hotspots did not flag InfiniteLoopScenario with a probable cause." + infiniteLoopHotspot + ? infiniteLoopHotspot.probableCause.summary.trim().length > 0 + : renderHotspots.hotspots.some((entry) => entry.probableCause.summary.trim().length > 0), + "get_render_hotspots did not return a readable probable cause." ); checks.push("get_render_hotspots:ok"); @@ -364,13 +369,13 @@ async function main(): Promise { ) as { success: boolean }; assert(asyncTraceReplay.success === true, "async trace replay failed."); - const asyncTimeline = expectToolSuccess( - await callTool(client, "get_async_timeline", { url: demoUrl, limit: 10 }), - "get_async_timeline" - ) as { + const asyncTimeline = readVerdictRawData<{ events: { phase: string; groupKey: string }[]; summary: { totalRequests: number; invertedGroups: { groupKey: string }[]; slowRequests: { durationMs: number }[] }; - }; + }>(expectToolSuccess( + await callTool(client, "get_async_timeline", { url: demoUrl, limit: 10 }), + "get_async_timeline" + )); assert(asyncTimeline.summary.totalRequests >= 2, "get_async_timeline reported fewer than two requests."); assert( asyncTimeline.events.some((event) => event.phase === "request_start") && @@ -400,20 +405,20 @@ async function main(): Promise { ) as { success: boolean }; assert(raceConditionReplay.success === true, "race condition replay failed."); - const raceDiagnosis = expectToolSuccess( + const raceDiagnosis = readVerdictRawData<{ + suspected: boolean; + diagnosis: string; + finalStateText: string | null; + latestIntent: { query: string | null } | null; + finalStateRequest: { query: string | null } | null; + }>(expectToolSuccess( await callTool(client, "get_race_condition_diagnosis", { url: demoUrl, stateSelector: "#race-condition-visible-result", limit: 10, }), "get_race_condition_diagnosis" - ) as { - suspected: boolean; - diagnosis: string; - finalStateText: string | null; - latestIntent: { query: string | null } | null; - finalStateRequest: { query: string | null } | null; - }; + )); assert(raceDiagnosis.suspected === true, "get_race_condition_diagnosis did not flag the stale overwrite."); assert( raceDiagnosis.finalStateText?.toLowerCase().includes("slow") === true, @@ -548,13 +553,13 @@ async function main(): Promise { ); await new Promise((resolve) => setTimeout(resolve, 600)); - const hydrationIssues = expectToolSuccess( - await callTool(client, "get_hydration_issues", { url: hydrationDemoUrl, limit: 20 }), - "get_hydration_issues" - ) as { + const hydrationIssues = readVerdictRawData<{ issues: { tag: string; kind: string; framework: string; message: string }[]; summary: { total: number }; - }; + }>(expectToolSuccess( + await callTool(client, "get_hydration_issues", { url: hydrationDemoUrl, limit: 20 }), + "get_hydration_issues" + )); assert( hydrationIssues.summary.total >= 1, "get_hydration_issues returned no hydration issue for the mismatch demo." diff --git a/src/browser/index.ts b/src/browser/index.ts index b0e32e6..658510e 100644 --- a/src/browser/index.ts +++ b/src/browser/index.ts @@ -10,6 +10,11 @@ * - Navigation errors (ECONNREFUSED, timeout) return structured errors. */ +import { access, mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { createServer } from "node:net"; +import { spawn } from "node:child_process"; import { chromium } from "playwright"; import type { Browser, BrowserContext, CDPSession, Page, ConsoleMessage } from "playwright"; import type { @@ -24,6 +29,7 @@ import type { AttachTabSelector, AttachTabsResponse, AttachTabSelectionResponse, + BrowserModePreference, NetworkEvent, NetworkEventsResponse, ReplayNavigationResponse, @@ -75,6 +81,7 @@ import { import { readHydrationIssuesFromConsoleEvents as readHydrationIssues } from "../diagnostics/hydration.js"; export const DEFAULT_CDP_ENDPOINT = "http://127.0.0.1:9222"; +const DEFAULT_MANAGED_BROWSER_HOST = "127.0.0.1"; type RuntimeBridgeInitArgs = { networkBufferGlobalKey: string; @@ -198,12 +205,40 @@ function buildRuntimeBridgeSource(args: RuntimeBridgeInitArgs): string { })();`; } +async function allocateTcpPort(host: string = DEFAULT_MANAGED_BROWSER_HOST): Promise { + return new Promise((resolve, reject) => { + const server = createServer(); + server.once("error", reject); + server.listen(0, host, () => { + const address = server.address(); + if (!address || typeof address === "string") { + server.close(() => reject(new Error("Failed to allocate a TCP port for managed Chrome."))); + return; + } + + const { port } = address; + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(port); + }); + }); + }); +} + +function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + export class BrowserManager { private browser: Browser | null = null; private context: BrowserContext | null = null; private page: Page | null = null; private replayHeadless = true; private defaultCdpEndpoint = DEFAULT_CDP_ENDPOINT; + private browserMode: BrowserModePreference = "replay"; private replaySessionId: number | null = null; private nextReplaySessionId = 1; private attachedBrowser: Browser | null = null; @@ -211,6 +246,11 @@ export class BrowserManager { private attachedEndpoint: string | null = null; private attachedTargetId: string | null = null; private attachSelection: { endpoint: string; tab: AttachTabInfo; selectedAt: string } | null = null; + private managedBrowser: Browser | null = null; + private managedPage: Page | null = null; + private managedEndpoint: string | null = null; + private managedUserDataDir: string | null = null; + private managedBrowserProcess: ReturnType | null = null; private activeRuntimePatches: RuntimePatchRecord[] = []; private consoleEvents: ConsoleEvent[] = []; @@ -234,6 +274,8 @@ export class BrowserManager { }; private static readonly cdpHelpMessage = "Launch Chrome with remote debugging, for example: google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/react-sentinel-cdp"; + private static readonly managedModeHelpMessage = + "Or restart React-Sentinel with --browser-mode managed --headed to launch an isolated managed Chromium with CDP enabled automatically."; private static normalizeText(value: string): string { return value.trim().toLowerCase(); @@ -556,6 +598,7 @@ export class BrowserManager { configureDefaults(options: { replayHeadless?: boolean; cdpEndpoint?: string; + browserMode?: BrowserModePreference; }): void { if (typeof options.replayHeadless === "boolean") { this.replayHeadless = options.replayHeadless; @@ -564,6 +607,10 @@ export class BrowserManager { if (typeof options.cdpEndpoint === "string") { this.defaultCdpEndpoint = options.cdpEndpoint; } + + if (typeof options.browserMode === "string") { + this.browserMode = options.browserMode; + } } getDefaultCdpEndpoint(): string { @@ -724,20 +771,32 @@ export class BrowserManager { this.attachedPage && !this.attachedPage.isClosed() ? this.attachedPage.url() : this.attachSelection?.tab.url ?? null; + const managedPageUrl = + this.managedPage && !this.managedPage.isClosed() + ? this.managedPage.url() + : null; const replayPageUrl = this.page && !this.page.isClosed() ? this.page.url() : null; - const mode: SessionInfo["mode"] = this.attachSelection ? "attach" : "replay"; - const pageUrl = mode === "attach" ? attachPageUrl : replayPageUrl; + const mode: SessionInfo["mode"] = this.attachSelection + ? "attach" + : this.managedPage && !this.managedPage.isClosed() + ? "managed" + : "replay"; + const pageUrl = mode === "attach" ? attachPageUrl : mode === "managed" ? managedPageUrl : replayPageUrl; const connected = mode === "attach" ? this.attachedPage !== null && !this.attachedPage.isClosed() - : this.page !== null && !this.page.isClosed(); + : mode === "managed" + ? this.managedPage !== null && !this.managedPage.isClosed() + : this.page !== null && !this.page.isClosed(); const title = mode === "attach" ? (await this.readPageTitle(this.attachedPage)) ?? this.attachSelection?.tab.title ?? null - : await this.readPageTitle(this.page); + : mode === "managed" + ? await this.readPageTitle(this.managedPage) + : await this.readPageTitle(this.page); return { mode, @@ -761,6 +820,11 @@ export class BrowserManager { endpoint: this.attachSelection?.endpoint ?? null, selectedTab: this.attachSelection?.tab ?? null, }, + managed: { + active: this.managedPage !== null && !this.managedPage.isClosed(), + endpoint: this.managedEndpoint, + userDataDir: this.managedUserDataDir, + }, }; } @@ -786,6 +850,99 @@ export class BrowserManager { } } + private async waitForManagedEndpoint(endpoint: string, timeoutMs: number = 10_000): Promise { + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + const status = await this.getAttachStatus(endpoint); + if (status.ready) { + return; + } + await delay(200); + } + + throw new Error(`Managed Chromium did not expose CDP at ${endpoint} within ${timeoutMs}ms.`); + } + + private async launchManagedBrowser(): Promise { + if (this.managedBrowser && this.managedPage && !this.managedPage.isClosed()) { + return; + } + + const executablePath = chromium.executablePath(); + const port = await allocateTcpPort(); + const userDataDir = await mkdtemp(path.join(tmpdir(), "react-sentinel-managed-")); + const endpoint = `http://${DEFAULT_MANAGED_BROWSER_HOST}:${port}`; + const args = [ + `--remote-debugging-port=${port}`, + `--user-data-dir=${userDataDir}`, + "--no-first-run", + "--no-default-browser-check", + ...(this.replayHeadless ? ["--headless=new"] : []), + "about:blank", + ]; + + const child = spawn(executablePath, args, { + stdio: "ignore", + detached: false, + }); + + child.once("error", (error) => { + console.error(`[react-sentinel] Managed Chromium launch failed: ${String(error)}`); + }); + + try { + await this.waitForManagedEndpoint(endpoint); + const browser = await chromium.connectOverCDP(endpoint); + const context = browser.contexts()[0]; + const page = context?.pages()[0] ?? (context ? await context.newPage() : null); + if (!page) { + throw new Error("Managed Chromium started but no page was available."); + } + + await this.installRuntimeBridge(page); + this.managedBrowser = browser; + this.managedPage = page; + this.managedEndpoint = endpoint; + this.managedUserDataDir = userDataDir; + this.managedBrowserProcess = child; + this.activateRuntimePage(page); + console.error( + `[react-sentinel] Managed Chromium launched (${this.replayHeadless ? "headless" : "headed"}, CDP ${endpoint})` + ); + } catch (error) { + child.kill("SIGTERM"); + await rm(userDataDir, { recursive: true, force: true }).catch(() => undefined); + throw error; + } + } + + private async closeManagedSession(): Promise { + const managedPage = this.managedPage; + + try { + if (this.managedBrowser) { + await this.managedBrowser.close().catch(() => undefined); + } + } finally { + if (this.managedBrowserProcess && !this.managedBrowserProcess.killed) { + this.managedBrowserProcess.kill("SIGTERM"); + } + if (this.managedUserDataDir) { + await rm(this.managedUserDataDir, { recursive: true, force: true }).catch(() => undefined); + } + this.managedBrowser = null; + this.managedPage = null; + this.managedEndpoint = null; + this.managedUserDataDir = null; + this.managedBrowserProcess = null; + + if (this.runtimeEventPage === managedPage) { + this.runtimeEventPage = null; + this.consoleEvents = []; + } + } + } + private formatNavigationError(error: unknown, url: string, timeoutMs: number): string { const raw = error instanceof Error ? error.message : String(error); if (raw.includes("ERR_CONNECTION_REFUSED") || raw.includes("ECONNREFUSED")) { @@ -915,6 +1072,7 @@ export class BrowserManager { /** Close browser and release all resources. */ async close(): Promise { await this.clearAttachConnection(); + await this.closeManagedSession(); await this.closeReplaySession(); } @@ -944,11 +1102,41 @@ export class BrowserManager { return page; } + private async getManagedPage( + url: string, + options?: { + resetSession?: boolean; + waitUntil?: ReplayWaitUntil; + timeoutMs?: number; + } + ): Promise { + if (options?.resetSession) { + await this.closeManagedSession(); + } + await this.launchManagedBrowser(); + const page = this.managedPage; + if (!page) { + throw new Error("Managed Chromium is unavailable."); + } + + await this.navigatePage(page, url, options?.waitUntil ?? "domcontentloaded", options?.timeoutMs ?? 10_000); + this.activateRuntimePage(page); + return page; + } + private async getRuntimePage(url: string): Promise { if (this.attachSelection) { return this.getAttachedPage(); } + if (this.browserMode === "managed") { + return this.getManagedPage(url); + } + + if (this.browserMode === "auto" && this.managedPage && !this.managedPage.isClosed()) { + return this.getManagedPage(url); + } + const page = await this.getSandboxPage(url); this.activateRuntimePage(page); return page; @@ -964,9 +1152,17 @@ export class BrowserManager { } ): Promise { if (url) { + if (this.browserMode === "managed") { + return this.getManagedPage(url, options); + } return this.getSandboxPage(url, options); } + if (this.browserMode === "managed" && this.managedPage && !this.managedPage.isClosed()) { + this.activateRuntimePage(this.managedPage); + return this.managedPage; + } + if (!this.page || this.page.isClosed() || this.page.url() === "about:blank") { throw new Error("No replay session is active. Call navigate_replay first or provide a URL."); } @@ -976,16 +1172,58 @@ export class BrowserManager { } private static buildAttachHelpMessage(): string { - return BrowserManager.cdpHelpMessage; + return [ + BrowserManager.cdpHelpMessage, + BrowserManager.managedModeHelpMessage, + "If you do not need a persistent browser profile, keep using replay mode with browser_ping or navigate_replay.", + ].join(" "); } private static buildAttachUnavailableMessage(endpoint: string, reason: string): string { return [ `Chrome CDP is unavailable at ${endpoint}: ${reason}.`, + BrowserManager.managedModeHelpMessage, "You can keep using replay mode with browser_ping or navigate_replay while live Chrome attach is unavailable.", ].join(" "); } + getBrowserModePreference(): BrowserModePreference { + return this.browserMode; + } + + async getManagedBrowserStatus(): Promise<{ + available: boolean; + active: boolean; + endpoint: string | null; + userDataDir: string | null; + launchCommand: string; + reason?: string; + }> { + try { + const executablePath = chromium.executablePath(); + await access(executablePath); + return { + available: executablePath.length > 0, + active: this.managedPage !== null && !this.managedPage.isClosed(), + endpoint: this.managedEndpoint, + userDataDir: this.managedUserDataDir, + launchCommand: "react-sentinel mcp --browser-mode managed --headed", + }; + } catch (error) { + return { + available: false, + active: false, + endpoint: this.managedEndpoint, + userDataDir: this.managedUserDataDir, + launchCommand: "react-sentinel mcp --browser-mode managed --headed", + reason: + error instanceof Error + ? `${error.message}. Install Chromium once with \`npx playwright install chromium\` to enable managed mode.` + : String(error), + }; + } + } + async navigateReplay( url: string, options?: { @@ -1269,7 +1507,10 @@ export class BrowserManager { const type = "ping" as const; try { - const page = await this.getSandboxPage(url); + const page = + this.browserMode === "managed" + ? await this.getManagedPage(url) + : await this.getSandboxPage(url); const data = await page.evaluate(() => ({ pong: true, url: document.URL, diff --git a/src/browser/protocol.ts b/src/browser/protocol.ts index d2f8478..7bf6dc7 100644 --- a/src/browser/protocol.ts +++ b/src/browser/protocol.ts @@ -28,7 +28,8 @@ export interface InteractionData { export type AssertionPrimitive = string | number | boolean | null; -export type SessionMode = "replay" | "attach"; +export type SessionMode = "replay" | "attach" | "managed"; +export type BrowserModePreference = "auto" | "replay" | "managed"; export interface ReplayConfig { headless: boolean; @@ -94,6 +95,11 @@ export interface SessionInfo { endpoint: string | null; selectedTab: AttachTabInfo | null; }; + managed: { + active: boolean; + endpoint: string | null; + userDataDir: string | null; + }; } /** Assertion types */ diff --git a/src/index.ts b/src/index.ts index 374b19b..880dca2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -18,6 +18,7 @@ import { z } from "zod"; import { ok, err } from "./types.js"; import type { ToolResponse } from "./types.js"; import { browserManager, DEFAULT_CDP_ENDPOINT } from "./browser/index.js"; +import type { BrowserModePreference } from "./browser/protocol.js"; import { createServerInfoPayload, summarizeCapabilities, @@ -85,6 +86,7 @@ type CliCommand = type StartCommandOptions = { replayHeadless: boolean; cdpEndpoint: string; + browserMode: BrowserModePreference; verbose: boolean; }; @@ -315,21 +317,23 @@ export async function startServer(options?: StartCommandOptions): Promise browserManager.configureDefaults({ replayHeadless: options?.replayHeadless, cdpEndpoint: options?.cdpEndpoint, + browserMode: options?.browserMode, }); const server = createServer(); const transport = new StdioServerTransport(); await server.connect(transport); console.error( - `[react-sentinel] MCP server started (stdio transport, replay ${options?.replayHeadless === false ? "headed" : "headless"}, CDP ${browserManager.getDefaultCdpEndpoint()}) ✅` + `[react-sentinel] MCP server started (stdio transport, browser mode ${options?.browserMode ?? "replay"}, replay ${options?.replayHeadless === false ? "headed" : "headless"}, CDP ${browserManager.getDefaultCdpEndpoint()}) ✅` ); if (options?.verbose) { const payload = buildServerInfoResponse(); console.error( - `[react-sentinel] Verbose startup metadata ${JSON.stringify({ - command: "mcp", - transport: payload.transport, - replayDefault: options.replayHeadless === false ? "headed" : "headless", + `[react-sentinel] Verbose startup metadata ${JSON.stringify({ + command: "mcp", + transport: payload.transport, + browserMode: options?.browserMode ?? "replay", + replayDefault: options.replayHeadless === false ? "headed" : "headless", cdpEndpoint: browserManager.getDefaultCdpEndpoint(), capabilitySummary: summarizeCapabilities(payload.capabilities), capabilities: payload.capabilities, @@ -355,8 +359,8 @@ function formatHelp(): string { `Public npm package: ${REACT_SENTINEL_PUBLIC_PACKAGE_NAME}`, "", "Usage:", - " react-sentinel start [--headless|--headed] [--cdp-endpoint ]", - " react-sentinel mcp [--headless|--headed] [--cdp-endpoint ]", + " react-sentinel start [--headless|--headed] [--cdp-endpoint ] [--browser-mode ]", + " react-sentinel mcp [--headless|--headed] [--cdp-endpoint ] [--browser-mode ]", ` react-sentinel init-mcp [--client <${formatInitMcpClientList()}>] [--mode ]`, " react-sentinel init-agent-pack [--path ] [--mode ]", " react-sentinel install-agent-pack [--path ] [--mode ]", @@ -382,6 +386,7 @@ function formatHelp(): string { ` --cdp-endpoint Override the default Chrome DevTools endpoint (default: ${DEFAULT_CDP_ENDPOINT}).`, " --headed Start replay sessions in visible Chromium mode by default.", " --headless Force replay sessions to stay headless (default).", + " --browser-mode Choose replay, managed, or auto browser provisioning (default: replay).", " --verbose Print agent-friendly startup metadata to stderr.", " --json Print doctor results as JSON.", " --path Base directory scanned by detect-project (defaults to the current directory).", @@ -397,6 +402,7 @@ function formatHelp(): string { "", "Examples:", ` npx -y ${REACT_SENTINEL_PUBLIC_PACKAGE_NAME} mcp --headed`, + ` npx -y ${REACT_SENTINEL_PUBLIC_PACKAGE_NAME} mcp --browser-mode managed --headed`, ` npx -y ${REACT_SENTINEL_PUBLIC_PACKAGE_NAME} doctor --json`, " react-sentinel detect-project --path . --target-url http://127.0.0.1:3000 --json", " react-sentinel doctor --config-path ~/.config/Claude/claude_desktop_config.json", @@ -432,6 +438,7 @@ function parseStartOptions(args: string[]): { options: StartCommandOptions; help args, allowPositionals: false, options: { + "browser-mode": { type: "string" }, "cdp-endpoint": { type: "string" }, headless: { type: "boolean", default: false }, headed: { type: "boolean", default: false }, @@ -445,10 +452,16 @@ function parseStartOptions(args: string[]): { options: StartCommandOptions; help throw new Error("Choose either --headless or --headed, not both."); } + const browserMode = parsed.values["browser-mode"] ?? "replay"; + if (browserMode !== "auto" && browserMode !== "replay" && browserMode !== "managed") { + throw new Error('Invalid value for --browser-mode. Use "auto", "replay", or "managed".'); + } + return { options: { replayHeadless: parsed.values.headed ? false : true, cdpEndpoint: parseCdpEndpoint(parsed.values["cdp-endpoint"]), + browserMode: browserMode as BrowserModePreference, verbose: parsed.values.verbose, }, help: parsed.values.help, @@ -671,6 +684,7 @@ async function runDoctor(options: DoctorCommandOptions): Promise { } const attachCheck = await browserManager.getAttachStatus(options.cdpEndpoint); + const managedStatus = await browserManager.getManagedBrowserStatus(); const capabilitiesCheck = validateCapabilities(); let configCheck: | undefined @@ -752,6 +766,37 @@ async function runDoctor(options: DoctorCommandOptions): Promise { error: attachCheck.error, help: attachCheck.help, }, + managedBrowser: managedStatus.available + ? { + status: "pass", + active: managedStatus.active, + endpoint: managedStatus.endpoint, + launchCommand: managedStatus.launchCommand, + } + : { + status: "warn", + active: managedStatus.active, + endpoint: managedStatus.endpoint, + launchCommand: managedStatus.launchCommand, + reason: managedStatus.reason ?? "Managed Chromium is unavailable in this environment.", + }, + browserModeRecommendation: attachCheck.ready + ? { + recommended: "user-attach", + reason: "A user Chrome CDP endpoint is available. Keep attach mode gated behind select_attach_tab confirmation.", + fallback: managedStatus.available ? "managed" : "replay", + } + : managedStatus.available + ? { + recommended: "managed", + reason: "User Chrome CDP is unavailable, but React-Sentinel can launch an isolated managed Chromium with CDP enabled.", + fallback: "replay", + } + : { + recommended: "replay", + reason: "Live attach is unavailable and managed Chromium is not ready, so replay mode is the safest default.", + fallback: "replay", + }, capabilities: capabilitiesCheck, ...(configCheck ? { mcpConfig: configCheck } : {}), }, @@ -772,6 +817,10 @@ async function runDoctor(options: DoctorCommandOptions): Promise { report.checks.attachEndpoint.status === "pass" ? `PASS attach endpoint ready at ${report.checks.attachEndpoint.endpoint}` : `WARN attach endpoint ${report.checks.attachEndpoint.error}`, + report.checks.managedBrowser.status === "pass" + ? `PASS managed browser available via ${report.checks.managedBrowser.launchCommand}` + : `WARN managed browser ${report.checks.managedBrowser.reason}`, + `INFO recommended browser mode: ${report.checks.browserModeRecommendation.recommended} (${report.checks.browserModeRecommendation.reason})`, report.checks.capabilities.status === "pass" ? `PASS capability registry matches ${report.checks.capabilities.registeredTools.length} registered MCP tools` : "FAIL capability registry is inconsistent with the registered MCP tools", @@ -780,6 +829,10 @@ async function runDoctor(options: DoctorCommandOptions): Promise { if (report.checks.attachEndpoint.status !== "pass") { lines.push(`Hint: ${report.checks.attachEndpoint.help}`); } + if (report.checks.managedBrowser.status === "pass") { + lines.push(`Managed mode: ${report.checks.managedBrowser.launchCommand}`); + } + lines.push(`Fallback mode: ${report.checks.browserModeRecommendation.fallback}`); if (report.checks.capabilities.status === "fail") { for (const issue of report.checks.capabilities.issues) { diff --git a/src/tools/browser.ts b/src/tools/browser.ts index 5f691c1..00a585e 100644 --- a/src/tools/browser.ts +++ b/src/tools/browser.ts @@ -48,7 +48,7 @@ export function register(server: McpServer): void { "get_session_status", [ "Return the current browser session mode used by React-Sentinel before you run runtime tools.", - "Use this instead of guessing from config files when you need to know whether the next tool will hit a live attached tab or the isolated replay browser.", + "Use this instead of guessing from config files when you need to know whether the next tool will hit a live attached tab, the managed browser, or the isolated replay browser.", "Reports the active mode plus the current replay headless/headed configuration.", ].join(" "), {}, From 4977ff3aff13abd0b67969c5eb4393b572bb7018 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 13 May 2026 23:15:12 +0000 Subject: [PATCH 8/9] fix: address PR review feedback for smoke assertions and tool alias dedup Agent-Logs-Url: https://github.com/edgarbnt/ReactSentinel/sessions/6bcec9f4-64c6-453f-a101-1e067fcf3d78 Co-authored-by: edgarbnt <146716791+edgarbnt@users.noreply.github.com> --- scripts/e2e-smoke.ts | 18 +- src/diagnostics/render-monitor.ts | 46 +- src/tools/interaction.ts | 115 +++-- src/tools/patch.ts | 729 ++++++++++++------------------ 4 files changed, 388 insertions(+), 520 deletions(-) diff --git a/scripts/e2e-smoke.ts b/scripts/e2e-smoke.ts index 9f67a3e..860096d 100644 --- a/scripts/e2e-smoke.ts +++ b/scripts/e2e-smoke.ts @@ -284,12 +284,22 @@ async function main(): Promise { "get_render_hotspots" )); const infiniteLoopHotspot = renderHotspots.hotspots.find((entry) => entry.componentName === "InfiniteLoopScenario"); + const nonUnknownProbableCauses = new Set([ + "state_change", + "hook_instability", + "provider_value_recreated", + "context_change", + "prop_diff", + "parent_render", + ]); assert(renderHotspots.hotspots.length >= 1, "get_render_hotspots returned no hotspots."); assert( - infiniteLoopHotspot - ? infiniteLoopHotspot.probableCause.summary.trim().length > 0 - : renderHotspots.hotspots.some((entry) => entry.probableCause.summary.trim().length > 0), - "get_render_hotspots did not return a readable probable cause." + Boolean(infiniteLoopHotspot), + "get_render_hotspots did not include InfiniteLoopScenario as a hotspot." + ); + assert( + infiniteLoopHotspot ? nonUnknownProbableCauses.has(infiniteLoopHotspot.probableCause.type) : false, + "get_render_hotspots classified InfiniteLoopScenario with an unexpected or unknown probable cause type." ); checks.push("get_render_hotspots:ok"); diff --git a/src/diagnostics/render-monitor.ts b/src/diagnostics/render-monitor.ts index c696a92..7ae9f5f 100644 --- a/src/diagnostics/render-monitor.ts +++ b/src/diagnostics/render-monitor.ts @@ -602,9 +602,51 @@ function buildProbableCause(entry: RenderCountRecord): RenderHotspotCause { }; } + if (dominantHook && dominantHook.changeCount > 0) { + if (dominantHook.kind === "state") { + return { + type: "state_change", + summary: `State hook #${dominantHook.index} changed on ${dominantHook.changeCount}/${transitions} recent render transitions.`, + }; + } + + return { + type: "hook_instability", + summary: `Hook #${dominantHook.index} (${dominantHook.kind}) changed on ${dominantHook.changeCount}/${transitions} recent render transitions.`, + }; + } + + if (providerChangeCount > 0) { + return { + type: "provider_value_recreated", + summary: `An upstream provider value changed on ${providerChangeCount}/${transitions} recent render transitions.`, + }; + } + + if (contextChangeCount > 0) { + return { + type: "context_change", + summary: `Observed context values changed on ${contextChangeCount}/${transitions} recent render transitions.`, + }; + } + + if (propChangeCount > 0) { + return { + type: "prop_diff", + summary: `Props changed on ${propChangeCount}/${transitions} recent render transitions.`, + }; + } + + if (parentRenderCount > 0) { + return { + type: "parent_render", + summary: "The component rerendered repeatedly without dominant local prop, hook, or context changes, which suggests parent-driven rerenders.", + }; + } + return { - type: "unknown", - summary: "Recent renders kept repeating, but React-Sentinel could not isolate one dominant cause from props, hooks, contexts, or parent churn.", + type: "parent_render", + summary: "Recent renders kept repeating without a dominant local signal in props, hooks, or contexts, so parent-driven churn is the most likely cause.", }; } diff --git a/src/tools/interaction.ts b/src/tools/interaction.ts index 5486949..87c744d 100644 --- a/src/tools/interaction.ts +++ b/src/tools/interaction.ts @@ -108,6 +108,53 @@ const stressDelayProfileSchema = z .max(12) .optional(); +const scenarioValidationToolSchema = { + url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), + assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), + headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), + waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().default("domcontentloaded").describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + resetSession: z.boolean().optional().default(false).describe("Close the current replay browser first and start a fresh isolated session."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), +}; + +type ScenarioValidationToolArgs = { + url?: string; + steps: ReplayStep[]; + assertions: Assertion[]; + headless?: boolean; + waitUntil: "load" | "domcontentloaded" | "networkidle"; + timeoutMs: number; + resetSession: boolean; + continueOnError: boolean; + waitMs: number; +}; + +async function runScenarioValidationTool(args: ScenarioValidationToolArgs, toolName: string): Promise { + const { url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs } = args; + try { + const result = await browserManager.runValidationScenario(steps, assertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession, + continueOnError, + waitMs, + }); + if ("error" in result) return err(result.error); + + return ok({ + report: result, + reportMarkdown: buildScenarioMarkdown(result), + }); + } catch (e) { + return err(`${toolName} failed unexpectedly: ${String(e)}`); + } +} + type StressIterationResult = { iteration: number; delaysMs: number[]; @@ -428,38 +475,8 @@ export function register(server: McpServer): void { "Use this instead of grep or ad-hoc clicking when a bug only appears after several browser actions and you need a reproducible runtime verdict.", "Returns both a structured JSON report and a Markdown report with actions, assertions, and useful traces.", ].join(" "), - { - url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), - steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), - assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), - headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), - waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().default("domcontentloaded").describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - resetSession: z.boolean().optional().default(false).describe("Close the current replay browser first and start a fresh isolated session."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - }, - async ({ url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs }): Promise => { - try { - const result = await browserManager.runValidationScenario(steps, assertions, { - url, - headless, - waitUntil, - timeoutMs, - resetSession, - continueOnError, - waitMs, - }); - if ("error" in result) return err(result.error); - - return ok({ - report: result, - reportMarkdown: buildScenarioMarkdown(result), - }); - } catch (e) { - return err(`validate_scenario failed unexpectedly: ${String(e)}`); - } - } + scenarioValidationToolSchema, + async (args): Promise => runScenarioValidationTool(args as ScenarioValidationToolArgs, "validate_scenario") ); server.tool( @@ -468,38 +485,8 @@ export function register(server: McpServer): void { "Action-oriented alias for validate_scenario that checks whether a user flow still works end-to-end.", "Prefer this when the agent is thinking in terms of user journeys rather than generic scenario validation.", ].join(" "), - { - url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), - steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), - assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), - headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), - waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().default("domcontentloaded").describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - resetSession: z.boolean().optional().default(false).describe("Close the current replay browser first and start a fresh isolated session."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - }, - async ({ url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs }): Promise => { - try { - const result = await browserManager.runValidationScenario(steps, assertions, { - url, - headless, - waitUntil, - timeoutMs, - resetSession, - continueOnError, - waitMs, - }); - if ("error" in result) return err(result.error); - - return ok({ - report: result, - reportMarkdown: buildScenarioMarkdown(result), - }); - } catch (e) { - return err(`validate_user_flow failed unexpectedly: ${String(e)}`); - } - } + scenarioValidationToolSchema, + async (args): Promise => runScenarioValidationTool(args as ScenarioValidationToolArgs, "validate_user_flow") ); // ------------------------------------------------------------------------- diff --git a/src/tools/patch.ts b/src/tools/patch.ts index 1dfd1da..881de55 100644 --- a/src/tools/patch.ts +++ b/src/tools/patch.ts @@ -4,6 +4,7 @@ import { browserManager } from "../browser/index.js"; import type { Assertion, PatchedValidationScenarioResponse, + ReplayStep, RuntimePatch, RuntimePatchApplyResponse, RuntimePatchResetResponse, @@ -135,17 +136,14 @@ function createFixVerdict(seed: { patched: PatchedValidationScenarioResponse; regressionAssertions: Assertion[]; }): DiagnosticVerdict { - const baselineFailures = countAssertionFailures(seed.baseline.assertions); - const patchedFailures = countAssertionFailures(seed.patched.report.assertions); - const regressionFailureCount = seed.regressionAssertions.length === 0 - ? 0 - : seed.patched.report.assertions - .slice(-seed.regressionAssertions.length) - .filter((result) => !result.pass).length; + const targetAssertionCount = Math.max(seed.baseline.assertions.length - seed.regressionAssertions.length, 0); + const baselineTargetFailures = countAssertionFailures(seed.baseline.assertions.slice(0, targetAssertionCount)); + const patchedTargetFailures = countAssertionFailures(seed.patched.report.assertions.slice(0, targetAssertionCount)); + const regressionFailureCount = countAssertionFailures(seed.patched.report.assertions.slice(targetAssertionCount)); const verdict: VerificationVerdict = - baselineFailures > 0 && patchedFailures === 0 && regressionFailureCount === 0 + baselineTargetFailures > 0 && patchedTargetFailures === 0 && regressionFailureCount === 0 ? "CONFIRMED" - : patchedFailures >= baselineFailures + : patchedTargetFailures >= baselineTargetFailures ? "REFUTED" : "PARTIAL"; @@ -158,8 +156,8 @@ function createFixVerdict(seed: { ? `The patch does not resolve the target issue convincingly: ${seed.fixDescription}` : `The patch improves the target issue but leaves uncertainty or visible regressions: ${seed.fixDescription}`, evidence: [ - `Baseline assertion failures: ${baselineFailures}`, - `Patched assertion failures: ${patchedFailures}`, + `Baseline target assertion failures: ${baselineTargetFailures}`, + `Patched target assertion failures: ${patchedTargetFailures}`, `Patched regression failures: ${regressionFailureCount}`, ], confidence: verdict === "PARTIAL" ? "medium" : "high", @@ -214,6 +212,264 @@ function buildPatchMarkdown( return lines.join("\n"); } +const patchValidationToolSchema = { + patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser after the init script is registered."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute after the patch is active."), + assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), + headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + resetSession: z.boolean().optional().default(false).describe("Close the current replay sandbox first and start from a clean browser session."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after the patched validation flow."), + reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), +}; + +type PatchValidationToolArgs = { + patch: RuntimePatch; + url?: string; + steps: ReplayStep[]; + assertions: Assertion[]; + headless?: boolean; + waitUntil: "load" | "domcontentloaded" | "networkidle"; + timeoutMs: number; + resetSession: boolean; + continueOnError: boolean; + waitMs: number; + cleanup: "keep" | "reload" | "reset_session"; + reopenUrl?: string; +}; + +const hypothesisVerificationToolSchema = { + hypothesis: z.string().min(3).max(500).describe("Hypothesis to validate against runtime behavior."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser before verification."), + steps: z.array(replayStepSchema).min(1).describe("Replay protocol used to test the hypothesis."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that should hold if the hypothesis is correct."), + headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), +}; + +type HypothesisVerificationToolArgs = { + hypothesis: string; + url?: string; + steps: ReplayStep[]; + assertions: Assertion[]; + headless?: boolean; + waitUntil: "load" | "domcontentloaded" | "networkidle"; + timeoutMs: number; + continueOnError: boolean; + waitMs: number; +}; + +const fixVerificationToolSchema = { + fixDescription: z.string().min(3).max(500).describe("Short description of the fix that the runtime patch is supposed to validate."), + patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that should pass after the fix is applied."), + regressionAssertions: z.array(assertionSchema).optional().default([]).describe("Optional guard assertions that should remain true before and after the patch."), + headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after patch verification."), + reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), +}; + +type FixVerificationToolArgs = { + fixDescription: string; + patch: RuntimePatch; + url?: string; + steps: ReplayStep[]; + assertions: Assertion[]; + regressionAssertions: Assertion[]; + headless?: boolean; + waitUntil: "load" | "domcontentloaded" | "networkidle"; + timeoutMs: number; + continueOnError: boolean; + waitMs: number; + cleanup: "keep" | "reload" | "reset_session"; + reopenUrl?: string; +}; + +async function runPatchValidationTool(args: PatchValidationToolArgs, toolName: string): Promise { + const { patch, url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs, cleanup, reopenUrl } = args; + try { + const applyResult = await browserManager.applyRuntimePatch(patch, { + url, + headless, + waitUntil, + timeoutMs, + resetSession, + }); + if ("error" in applyResult) return err(applyResult.error); + + const report = await browserManager.runValidationScenario(steps, assertions, { + headless, + continueOnError, + waitMs, + }); + if ("error" in report) { + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup, + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) { + return err( + `${report.error} Cleanup after validation failure also failed: ${cleanupResult.error}. Runtime patches may still be active.` + ); + } + } + return err(report.error); + } + + const response: PatchedValidationScenarioResponse = { + verdict: report.success ? "patch_validated" : "patch_failed", + apply: applyResult, + report, + }; + + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup, + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) return err(cleanupResult.error); + response.cleanup = cleanupResult; + } + + return ok({ + ...response, + reportMarkdown: buildPatchMarkdown(response.verdict, response.apply, response.report, response.cleanup), + }); + } catch (error) { + return err(`${toolName} failed unexpectedly: ${String(error)}`); + } +} + +async function runHypothesisVerificationTool( + args: HypothesisVerificationToolArgs, + toolName: string +): Promise { + const { hypothesis, url, steps, assertions, headless, waitUntil, timeoutMs, continueOnError, waitMs } = args; + try { + const report = await browserManager.runValidationScenario(steps, assertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in report) return err(report.error); + + const response = createHypothesisVerdict({ + hypothesis, + report, + }); + + return ok({ + ...response, + reportMarkdown: buildHypothesisMarkdown(hypothesis, response.verdict, report), + }); + } catch (error) { + return err(`${toolName} failed unexpectedly: ${String(error)}`); + } +} + +async function runFixVerificationTool(args: FixVerificationToolArgs, toolName: string): Promise { + const { fixDescription, patch, url, steps, assertions, regressionAssertions, headless, waitUntil, timeoutMs, continueOnError, waitMs, cleanup, reopenUrl } = args; + try { + const combinedAssertions = [...assertions, ...regressionAssertions]; + const baseline = await browserManager.runValidationScenario(steps, combinedAssertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in baseline) return err(baseline.error); + + const applyResult = await browserManager.applyRuntimePatch(patch, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + }); + if ("error" in applyResult) return err(applyResult.error); + + const patchedReport = await browserManager.runValidationScenario(steps, combinedAssertions, { + headless, + continueOnError, + waitMs, + }); + if ("error" in patchedReport) { + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup, + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) { + return err(`${patchedReport.error} Cleanup after patch verification also failed: ${cleanupResult.error}.`); + } + } + return err(patchedReport.error); + } + + const patched: PatchedValidationScenarioResponse = { + verdict: patchedReport.success ? "patch_validated" : "patch_failed", + apply: applyResult, + report: patchedReport, + }; + + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup, + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) return err(cleanupResult.error); + patched.cleanup = cleanupResult; + } + + const response = createFixVerdict({ + fixDescription, + baseline, + patched, + regressionAssertions, + }); + + return ok({ + ...response, + reportMarkdown: buildFixVerificationMarkdown(fixDescription, response.verdict, baseline, patched, regressionAssertions), + }); + } catch (error) { + return err(`${toolName} failed unexpectedly: ${String(error)}`); + } +} + export const PATCH_TOOL_NAMES = [ "apply_runtime_patch", "apply_patch_then_replay", @@ -264,93 +520,8 @@ export function register(server: McpServer): void { "Use this instead of editing files blindly when you want one tool to patch, reproduce, and judge whether the runtime behavior improved.", "Returns an explicit patch_validated / patch_failed verdict plus a readable Markdown report, and cleanup defaults to reset_session so temporary patches do not leak into later runs.", ].join(" "), - { - patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), - url: z.string().url().optional().describe("Optional URL to open in the replay browser after the init script is registered."), - steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute after the patch is active."), - assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), - headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), - waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - resetSession: z.boolean().optional().default(false).describe("Close the current replay sandbox first and start from a clean browser session."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after the patched validation flow."), - reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), - }, - async ({ - patch, - url, - steps, - assertions, - headless, - waitUntil, - timeoutMs, - resetSession, - continueOnError, - waitMs, - cleanup, - reopenUrl, - }): Promise => { - try { - const applyResult = await browserManager.applyRuntimePatch(patch as RuntimePatch, { - url, - headless, - waitUntil, - timeoutMs, - resetSession, - }); - if ("error" in applyResult) return err(applyResult.error); - - const report = await browserManager.runValidationScenario(steps, assertions as Assertion[], { - headless, - continueOnError, - waitMs, - }); - if ("error" in report) { - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) { - return err( - `${report.error} Cleanup after validation failure also failed: ${cleanupResult.error}. Runtime patches may still be active.` - ); - } - } - return err(report.error); - } - - const response: PatchedValidationScenarioResponse = { - verdict: report.success ? "patch_validated" : "patch_failed", - apply: applyResult, - report, - }; - - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) return err(cleanupResult.error); - response.cleanup = cleanupResult; - } - - return ok({ - ...response, - reportMarkdown: buildPatchMarkdown(response.verdict, response.apply, response.report, response.cleanup), - }); - } catch (error) { - return err(`apply_patch_then_replay failed unexpectedly: ${String(error)}`); - } - } + patchValidationToolSchema, + async (args): Promise => runPatchValidationTool(args as PatchValidationToolArgs, "apply_patch_then_replay") ); server.tool( @@ -359,80 +530,8 @@ export function register(server: McpServer): void { "Action-oriented alias for apply_patch_then_replay that tests a runtime patch against a concrete replay protocol.", "Prefer this when the agent is thinking 'try this patch and tell me if the bug is gone'.", ].join(" "), - { - patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), - url: z.string().url().optional().describe("Optional URL to open in the replay browser after the init script is registered."), - steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute after the patch is active."), - assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), - headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), - waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - resetSession: z.boolean().optional().default(false).describe("Close the current replay sandbox first and start from a clean browser session."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after the patched validation flow."), - reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), - }, - async ({ patch, url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs, cleanup, reopenUrl }): Promise => { - try { - const applyResult = await browserManager.applyRuntimePatch(patch as RuntimePatch, { - url, - headless, - waitUntil, - timeoutMs, - resetSession, - }); - if ("error" in applyResult) return err(applyResult.error); - - const report = await browserManager.runValidationScenario(steps, assertions as Assertion[], { - headless, - continueOnError, - waitMs, - }); - if ("error" in report) { - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) { - return err( - `${report.error} Cleanup after validation failure also failed: ${cleanupResult.error}. Runtime patches may still be active.` - ); - } - } - return err(report.error); - } - - const response: PatchedValidationScenarioResponse = { - verdict: report.success ? "patch_validated" : "patch_failed", - apply: applyResult, - report, - }; - - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) return err(cleanupResult.error); - response.cleanup = cleanupResult; - } - - return ok({ - ...response, - reportMarkdown: buildPatchMarkdown(response.verdict, response.apply, response.report, response.cleanup), - }); - } catch (error) { - return err(`patch_and_validate failed unexpectedly: ${String(error)}`); - } - } + patchValidationToolSchema, + async (args): Promise => runPatchValidationTool(args as PatchValidationToolArgs, "patch_and_validate") ); server.tool( @@ -471,43 +570,8 @@ export function register(server: McpServer): void { "Use this instead of arguing from source code alone when you need browser evidence that a suspected runtime cause is true, false, or only partly supported.", "Runs a replay protocol plus assertions and returns CONFIRMED, REFUTED, or PARTIAL with evidence and a Markdown report.", ].join(" "), - { - hypothesis: z.string().min(3).max(500).describe("Hypothesis to validate against runtime behavior."), - url: z.string().url().optional().describe("Optional URL to open in the replay browser before verification."), - steps: z.array(replayStepSchema).min(1).describe("Replay protocol used to test the hypothesis."), - assertions: z.array(assertionSchema).min(1).describe("Assertions that should hold if the hypothesis is correct."), - headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), - waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - }, - async ({ hypothesis, url, steps, assertions, headless, waitUntil, timeoutMs, continueOnError, waitMs }): Promise => { - try { - const report = await browserManager.runValidationScenario(steps, assertions as Assertion[], { - url, - headless, - waitUntil, - timeoutMs, - resetSession: true, - continueOnError, - waitMs, - }); - if ("error" in report) return err(report.error); - - const response = createHypothesisVerdict({ - hypothesis, - report, - }); - - return ok({ - ...response, - reportMarkdown: buildHypothesisMarkdown(hypothesis, response.verdict, report), - }); - } catch (error) { - return err(`verify_hypothesis failed unexpectedly: ${String(error)}`); - } - } + hypothesisVerificationToolSchema, + async (args): Promise => runHypothesisVerificationTool(args as HypothesisVerificationToolArgs, "verify_hypothesis") ); server.tool( @@ -516,43 +580,8 @@ export function register(server: McpServer): void { "Action-oriented alias for verify_hypothesis that tests whether a suspected runtime explanation matches observed browser behavior.", "Prefer this when the agent is phrasing the task as 'test this hypothesis in the browser'.", ].join(" "), - { - hypothesis: z.string().min(3).max(500).describe("Hypothesis to validate against runtime behavior."), - url: z.string().url().optional().describe("Optional URL to open in the replay browser before verification."), - steps: z.array(replayStepSchema).min(1).describe("Replay protocol used to test the hypothesis."), - assertions: z.array(assertionSchema).min(1).describe("Assertions that should hold if the hypothesis is correct."), - headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), - waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - }, - async ({ hypothesis, url, steps, assertions, headless, waitUntil, timeoutMs, continueOnError, waitMs }): Promise => { - try { - const report = await browserManager.runValidationScenario(steps, assertions as Assertion[], { - url, - headless, - waitUntil, - timeoutMs, - resetSession: true, - continueOnError, - waitMs, - }); - if ("error" in report) return err(report.error); - - const response = createHypothesisVerdict({ - hypothesis, - report, - }); - - return ok({ - ...response, - reportMarkdown: buildHypothesisMarkdown(hypothesis, response.verdict, report), - }); - } catch (error) { - return err(`test_runtime_hypothesis failed unexpectedly: ${String(error)}`); - } - } + hypothesisVerificationToolSchema, + async (args): Promise => runHypothesisVerificationTool(args as HypothesisVerificationToolArgs, "test_runtime_hypothesis") ); server.tool( @@ -562,118 +591,8 @@ export function register(server: McpServer): void { "Use this instead of making a speculative code change when you want proof that a candidate fix improves the browser behavior and does not obviously regress other assertions.", "Runs a baseline scenario, applies the patch in the replay sandbox, reruns the scenario, checks optional regression assertions, and returns CONFIRMED, REFUTED, or PARTIAL.", ].join(" "), - { - fixDescription: z.string().min(3).max(500).describe("Short description of the fix that the runtime patch is supposed to validate."), - patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), - url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), - steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), - assertions: z.array(assertionSchema).min(1).describe("Assertions that should pass after the fix is applied."), - regressionAssertions: z.array(assertionSchema).optional().default([]).describe("Optional guard assertions that should remain true before and after the patch."), - headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), - waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after patch verification."), - reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), - }, - async ({ - fixDescription, - patch, - url, - steps, - assertions, - regressionAssertions, - headless, - waitUntil, - timeoutMs, - continueOnError, - waitMs, - cleanup, - reopenUrl, - }): Promise => { - try { - const combinedAssertions = [...(assertions as Assertion[]), ...(regressionAssertions as Assertion[])]; - const baseline = await browserManager.runValidationScenario(steps, combinedAssertions, { - url, - headless, - waitUntil, - timeoutMs, - resetSession: true, - continueOnError, - waitMs, - }); - if ("error" in baseline) return err(baseline.error); - - const applyResult = await browserManager.applyRuntimePatch(patch as RuntimePatch, { - url, - headless, - waitUntil, - timeoutMs, - resetSession: true, - }); - if ("error" in applyResult) return err(applyResult.error); - - const patchedReport = await browserManager.runValidationScenario(steps, combinedAssertions, { - headless, - continueOnError, - waitMs, - }); - if ("error" in patchedReport) { - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) { - return err(`${patchedReport.error} Cleanup after patch verification also failed: ${cleanupResult.error}.`); - } - } - return err(patchedReport.error); - } - - const patched: PatchedValidationScenarioResponse = { - verdict: patchedReport.success ? "patch_validated" : "patch_failed", - apply: applyResult, - report: patchedReport, - }; - - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) return err(cleanupResult.error); - patched.cleanup = cleanupResult; - } - - const response = createFixVerdict({ - fixDescription, - baseline, - patched, - regressionAssertions: regressionAssertions as Assertion[], - }); - - return ok({ - ...response, - reportMarkdown: buildFixVerificationMarkdown( - fixDescription, - response.verdict, - baseline, - patched, - regressionAssertions as Assertion[] - ), - }); - } catch (error) { - return err(`verify_fix failed unexpectedly: ${String(error)}`); - } - } + fixVerificationToolSchema, + async (args): Promise => runFixVerificationTool(args as FixVerificationToolArgs, "verify_fix") ); server.tool( @@ -682,97 +601,7 @@ export function register(server: McpServer): void { "Action-oriented alias for verify_fix that checks whether a candidate runtime fix actually resolves the bug.", "Prefer this when the agent is phrasing the task as 'verify the fix before touching source'.", ].join(" "), - { - fixDescription: z.string().min(3).max(500).describe("Short description of the fix that the runtime patch is supposed to validate."), - patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), - url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), - steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), - assertions: z.array(assertionSchema).min(1).describe("Assertions that should pass after the fix is applied."), - regressionAssertions: z.array(assertionSchema).optional().default([]).describe("Optional guard assertions that should remain true before and after the patch."), - headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), - waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after patch verification."), - reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), - }, - async ({ fixDescription, patch, url, steps, assertions, regressionAssertions, headless, waitUntil, timeoutMs, continueOnError, waitMs, cleanup, reopenUrl }): Promise => { - try { - const combinedAssertions = [...(assertions as Assertion[]), ...(regressionAssertions as Assertion[])]; - const baseline = await browserManager.runValidationScenario(steps, combinedAssertions, { - url, - headless, - waitUntil, - timeoutMs, - resetSession: true, - continueOnError, - waitMs, - }); - if ("error" in baseline) return err(baseline.error); - - const applyResult = await browserManager.applyRuntimePatch(patch as RuntimePatch, { - url, - headless, - waitUntil, - timeoutMs, - resetSession: true, - }); - if ("error" in applyResult) return err(applyResult.error); - - const patchedReport = await browserManager.runValidationScenario(steps, combinedAssertions, { - headless, - continueOnError, - waitMs, - }); - if ("error" in patchedReport) { - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) { - return err(`${patchedReport.error} Cleanup after patch verification also failed: ${cleanupResult.error}.`); - } - } - return err(patchedReport.error); - } - - const patched: PatchedValidationScenarioResponse = { - verdict: patchedReport.success ? "patch_validated" : "patch_failed", - apply: applyResult, - report: patchedReport, - }; - - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) return err(cleanupResult.error); - patched.cleanup = cleanupResult; - } - - const response = createFixVerdict({ - fixDescription, - baseline, - patched, - regressionAssertions: regressionAssertions as Assertion[], - }); - - return ok({ - ...response, - reportMarkdown: buildFixVerificationMarkdown(fixDescription, response.verdict, baseline, patched, regressionAssertions as Assertion[]), - }); - } catch (error) { - return err(`verify_runtime_fix failed unexpectedly: ${String(error)}`); - } - } + fixVerificationToolSchema, + async (args): Promise => runFixVerificationTool(args as FixVerificationToolArgs, "verify_runtime_fix") ); } From 50d99c393e8b5178bf1b589640f9103f4c7016f7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 13 May 2026 23:23:34 +0000 Subject: [PATCH 9/9] chore: finalize review feedback updates and validations Agent-Logs-Url: https://github.com/edgarbnt/ReactSentinel/sessions/6bcec9f4-64c6-453f-a101-1e067fcf3d78 Co-authored-by: edgarbnt <146716791+edgarbnt@users.noreply.github.com> --- src/diagnostics/render-monitor.ts | 4 ++-- src/tools/patch.ts | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diagnostics/render-monitor.ts b/src/diagnostics/render-monitor.ts index 7ae9f5f..4951750 100644 --- a/src/diagnostics/render-monitor.ts +++ b/src/diagnostics/render-monitor.ts @@ -645,8 +645,8 @@ function buildProbableCause(entry: RenderCountRecord): RenderHotspotCause { } return { - type: "parent_render", - summary: "Recent renders kept repeating without a dominant local signal in props, hooks, or contexts, so parent-driven churn is the most likely cause.", + type: "unknown", + summary: "Recent renders kept repeating, but React-Sentinel could not isolate one dominant cause from props, hooks, contexts, or parent churn.", }; } diff --git a/src/tools/patch.ts b/src/tools/patch.ts index 881de55..edb24e1 100644 --- a/src/tools/patch.ts +++ b/src/tools/patch.ts @@ -136,10 +136,10 @@ function createFixVerdict(seed: { patched: PatchedValidationScenarioResponse; regressionAssertions: Assertion[]; }): DiagnosticVerdict { - const targetAssertionCount = Math.max(seed.baseline.assertions.length - seed.regressionAssertions.length, 0); - const baselineTargetFailures = countAssertionFailures(seed.baseline.assertions.slice(0, targetAssertionCount)); - const patchedTargetFailures = countAssertionFailures(seed.patched.report.assertions.slice(0, targetAssertionCount)); - const regressionFailureCount = countAssertionFailures(seed.patched.report.assertions.slice(targetAssertionCount)); + const targetAssertionSliceEnd = Math.max(seed.baseline.assertions.length - seed.regressionAssertions.length, 0); + const baselineTargetFailures = countAssertionFailures(seed.baseline.assertions.slice(0, targetAssertionSliceEnd)); + const patchedTargetFailures = countAssertionFailures(seed.patched.report.assertions.slice(0, targetAssertionSliceEnd)); + const regressionFailureCount = countAssertionFailures(seed.patched.report.assertions.slice(targetAssertionSliceEnd)); const verdict: VerificationVerdict = baselineTargetFailures > 0 && patchedTargetFailures === 0 && regressionFailureCount === 0 ? "CONFIRMED"