diff --git a/docs/browser-modes.md b/docs/browser-modes.md new file mode 100644 index 0000000..6621fb1 --- /dev/null +++ b/docs/browser-modes.md @@ -0,0 +1,21 @@ +# Browser Modes + +React-Sentinel supports three browser modes. Choose the one that matches the kind of runtime evidence you need. + +| Mode | Best for | User state | Consent | Main tradeoff | +| --- | --- | --- | --- | --- | +| **User Chrome attach** | Inspecting a real tab that already contains the user's authenticated or manually prepared state | Reuses the user's existing browser profile and selected tab | **Required** via `select_attach_tab` with `confirm: true` | Highest power, but depends on an external Chrome CDP endpoint | +| **Managed Chrome** | Reducing CDP setup friction while keeping a visible isolated browser that React-Sentinel can drive | Uses a temporary isolated profile created by React-Sentinel | Not needed for the managed profile itself | Easier than manual CDP, but still separate from the user's personal Chrome session | +| **Replay sandbox** | Deterministic reproduction, assertions, and runtime patch validation in an isolated environment | Fresh isolated Playwright context | Not needed | Lowest friction, but it does not reuse existing user session state | + +## Recommended order + +1. Use **user Chrome attach** when the bug depends on a real logged-in or user-prepared tab. +2. Use **managed Chrome** when attach mode is useful but the user does not want to launch Chrome with `--remote-debugging-port` manually. +3. Use **replay sandbox** when you only need deterministic reproduction, assertions, or patch verification. + +## Safety notes + +- **User Chrome attach** is explicit and consent-based because React-Sentinel can inspect and interact with the selected live tab. +- **Managed Chrome** uses a temporary profile directory so it does not silently reuse the user's personal browser data. +- **Replay sandbox** is the safest default when no live state is required. diff --git a/docs/local-diagnostics-checklist.md b/docs/local-diagnostics-checklist.md index 8013d92..ffcfa1e 100644 --- a/docs/local-diagnostics-checklist.md +++ b/docs/local-diagnostics-checklist.md @@ -28,6 +28,12 @@ Fix: google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/react-sentinel-cdp ``` +Or let React-Sentinel launch an isolated managed Chromium for you: + +```bash +react-sentinel mcp --browser-mode managed --headed +``` + Then retry: 1. `get_attach_status` @@ -36,6 +42,8 @@ Then retry: If you do not need the live browser, skip attach mode and stay in replay mode with `browser_ping` or `navigate_replay`. +See [browser-modes.md](browser-modes.md) for the differences between user Chrome attach, managed Chrome, and replay sandbox. + ## 3. If no live browser tab is selected Symptoms: diff --git a/docs/tool-selection-guide.md b/docs/tool-selection-guide.md new file mode 100644 index 0000000..6fe9287 --- /dev/null +++ b/docs/tool-selection-guide.md @@ -0,0 +1,20 @@ +# MCP Tool Selection Guide + +Use React-Sentinel when the answer depends on **runtime evidence in the browser**, not just source code structure. + +| If you need to... | Start with | Why this beats grep/read | Good follow-up | +| --- | --- | --- | --- | +| Triage a vague runtime bug fast | `diagnose_runtime_bug` | It correlates console, hydration, async, and render signals into one verdict-first answer. | `attribute_render`, `get_runtime_timeline` | +| Explain why a component rerendered | `diagnose_excess_renders`, `attribute_render` | It inspects live render churn, hooks, props, and context instead of guessing from component code. | `find_memo_breaks`, `inspect_component` | +| Reproduce a bug in a deterministic browser | `navigate_replay` or `start_debug_replay` | It creates a clean replay session that can be rerun exactly. | `replay_interactions`, `validate_scenario` | +| Validate a user flow or invariant | `validate_scenario` or `validate_user_flow` | It executes real browser actions and returns pass/fail assertions with traces. | `find_race_conditions` | +| Catch intermittent timing bugs | `find_race_conditions` | It perturbs action timing across multiple iterations and shrinks failing flows. | `verify_hypothesis`, `verify_fix` | +| Test a runtime hypothesis before editing code | `verify_hypothesis` or `test_runtime_hypothesis` | It proves or refutes the idea against browser behavior instead of relying on intuition. | `attribute_render` | +| Try a fix without touching repository files | `apply_patch_then_replay`, `patch_and_validate`, `verify_fix`, or `verify_runtime_fix` | It validates an ephemeral runtime patch in the sandbox before a source change exists. | `reset_runtime_patches` | +| Reuse a real logged-in browser tab | `get_attach_status`, `get_attach_tabs`, `select_attach_tab` | It lets React-Sentinel inspect the exact user-prepared session that static analysis cannot recreate. | `get_runtime_status` | + +## Quick heuristics + +- If the bug depends on **current props, state, context, network, console, or timing**, prefer React-Sentinel. +- If you only need to understand **static source structure**, grep/read is still cheaper. +- Prefer **verdict-first tools** (`diagnose_*`, `attribute_render`, `verify_*`) before low-level atomic tools unless you already know the exact signal you need. diff --git a/scripts/e2e-smoke.ts b/scripts/e2e-smoke.ts index c8c0dc3..860096d 100644 --- a/scripts/e2e-smoke.ts +++ b/scripts/e2e-smoke.ts @@ -43,6 +43,13 @@ const expectedTools = [ "reset_runtime_patches", ]; +function readVerdictRawData(value: unknown): T { + if (value && typeof value === "object" && "raw_data" in value) { + return (value as { raw_data: T }).raw_data; + } + return value as T; +} + async function main(): Promise { const managedProcesses: ManagedProcess[] = []; const checks: string[] = []; @@ -265,7 +272,9 @@ async function main(): Promise { ); checks.push("get_render_counts:ok"); - const renderHotspots = expectToolSuccess( + const renderHotspots = readVerdictRawData<{ + hotspots: { componentName: string; probableCause: { type: string; summary: string } }[]; + }>(expectToolSuccess( await callTool(client, "get_render_hotspots", { url: demoUrl, threshold: 4, @@ -273,18 +282,24 @@ async function main(): Promise { limit: 10, }), "get_render_hotspots" - ) as { - hotspots: { componentName: string; probableCause: { type: string; summary: string } }[]; - }; + )); + const infiniteLoopHotspot = renderHotspots.hotspots.find((entry) => entry.componentName === "InfiniteLoopScenario"); + const nonUnknownProbableCauses = new Set([ + "state_change", + "hook_instability", + "provider_value_recreated", + "context_change", + "prop_diff", + "parent_render", + ]); + assert(renderHotspots.hotspots.length >= 1, "get_render_hotspots returned no hotspots."); assert( - renderHotspots.hotspots.some( - (entry) => - entry.componentName === "InfiniteLoopScenario" && - ["unstable_state", "unstable_hook_value", "unstable_props", "repeated_effect"].includes( - entry.probableCause.type - ) - ), - "get_render_hotspots did not flag InfiniteLoopScenario with a probable cause." + Boolean(infiniteLoopHotspot), + "get_render_hotspots did not include InfiniteLoopScenario as a hotspot." + ); + assert( + infiniteLoopHotspot ? nonUnknownProbableCauses.has(infiniteLoopHotspot.probableCause.type) : false, + "get_render_hotspots classified InfiniteLoopScenario with an unexpected or unknown probable cause type." ); checks.push("get_render_hotspots:ok"); @@ -364,13 +379,13 @@ async function main(): Promise { ) as { success: boolean }; assert(asyncTraceReplay.success === true, "async trace replay failed."); - const asyncTimeline = expectToolSuccess( - await callTool(client, "get_async_timeline", { url: demoUrl, limit: 10 }), - "get_async_timeline" - ) as { + const asyncTimeline = readVerdictRawData<{ events: { phase: string; groupKey: string }[]; summary: { totalRequests: number; invertedGroups: { groupKey: string }[]; slowRequests: { durationMs: number }[] }; - }; + }>(expectToolSuccess( + await callTool(client, "get_async_timeline", { url: demoUrl, limit: 10 }), + "get_async_timeline" + )); assert(asyncTimeline.summary.totalRequests >= 2, "get_async_timeline reported fewer than two requests."); assert( asyncTimeline.events.some((event) => event.phase === "request_start") && @@ -400,20 +415,20 @@ async function main(): Promise { ) as { success: boolean }; assert(raceConditionReplay.success === true, "race condition replay failed."); - const raceDiagnosis = expectToolSuccess( + const raceDiagnosis = readVerdictRawData<{ + suspected: boolean; + diagnosis: string; + finalStateText: string | null; + latestIntent: { query: string | null } | null; + finalStateRequest: { query: string | null } | null; + }>(expectToolSuccess( await callTool(client, "get_race_condition_diagnosis", { url: demoUrl, stateSelector: "#race-condition-visible-result", limit: 10, }), "get_race_condition_diagnosis" - ) as { - suspected: boolean; - diagnosis: string; - finalStateText: string | null; - latestIntent: { query: string | null } | null; - finalStateRequest: { query: string | null } | null; - }; + )); assert(raceDiagnosis.suspected === true, "get_race_condition_diagnosis did not flag the stale overwrite."); assert( raceDiagnosis.finalStateText?.toLowerCase().includes("slow") === true, @@ -548,13 +563,13 @@ async function main(): Promise { ); await new Promise((resolve) => setTimeout(resolve, 600)); - const hydrationIssues = expectToolSuccess( - await callTool(client, "get_hydration_issues", { url: hydrationDemoUrl, limit: 20 }), - "get_hydration_issues" - ) as { + const hydrationIssues = readVerdictRawData<{ issues: { tag: string; kind: string; framework: string; message: string }[]; summary: { total: number }; - }; + }>(expectToolSuccess( + await callTool(client, "get_hydration_issues", { url: hydrationDemoUrl, limit: 20 }), + "get_hydration_issues" + )); assert( hydrationIssues.summary.total >= 1, "get_hydration_issues returned no hydration issue for the mismatch demo." diff --git a/src/browser/index.ts b/src/browser/index.ts index b0e32e6..658510e 100644 --- a/src/browser/index.ts +++ b/src/browser/index.ts @@ -10,6 +10,11 @@ * - Navigation errors (ECONNREFUSED, timeout) return structured errors. */ +import { access, mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { createServer } from "node:net"; +import { spawn } from "node:child_process"; import { chromium } from "playwright"; import type { Browser, BrowserContext, CDPSession, Page, ConsoleMessage } from "playwright"; import type { @@ -24,6 +29,7 @@ import type { AttachTabSelector, AttachTabsResponse, AttachTabSelectionResponse, + BrowserModePreference, NetworkEvent, NetworkEventsResponse, ReplayNavigationResponse, @@ -75,6 +81,7 @@ import { import { readHydrationIssuesFromConsoleEvents as readHydrationIssues } from "../diagnostics/hydration.js"; export const DEFAULT_CDP_ENDPOINT = "http://127.0.0.1:9222"; +const DEFAULT_MANAGED_BROWSER_HOST = "127.0.0.1"; type RuntimeBridgeInitArgs = { networkBufferGlobalKey: string; @@ -198,12 +205,40 @@ function buildRuntimeBridgeSource(args: RuntimeBridgeInitArgs): string { })();`; } +async function allocateTcpPort(host: string = DEFAULT_MANAGED_BROWSER_HOST): Promise { + return new Promise((resolve, reject) => { + const server = createServer(); + server.once("error", reject); + server.listen(0, host, () => { + const address = server.address(); + if (!address || typeof address === "string") { + server.close(() => reject(new Error("Failed to allocate a TCP port for managed Chrome."))); + return; + } + + const { port } = address; + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(port); + }); + }); + }); +} + +function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + export class BrowserManager { private browser: Browser | null = null; private context: BrowserContext | null = null; private page: Page | null = null; private replayHeadless = true; private defaultCdpEndpoint = DEFAULT_CDP_ENDPOINT; + private browserMode: BrowserModePreference = "replay"; private replaySessionId: number | null = null; private nextReplaySessionId = 1; private attachedBrowser: Browser | null = null; @@ -211,6 +246,11 @@ export class BrowserManager { private attachedEndpoint: string | null = null; private attachedTargetId: string | null = null; private attachSelection: { endpoint: string; tab: AttachTabInfo; selectedAt: string } | null = null; + private managedBrowser: Browser | null = null; + private managedPage: Page | null = null; + private managedEndpoint: string | null = null; + private managedUserDataDir: string | null = null; + private managedBrowserProcess: ReturnType | null = null; private activeRuntimePatches: RuntimePatchRecord[] = []; private consoleEvents: ConsoleEvent[] = []; @@ -234,6 +274,8 @@ export class BrowserManager { }; private static readonly cdpHelpMessage = "Launch Chrome with remote debugging, for example: google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/react-sentinel-cdp"; + private static readonly managedModeHelpMessage = + "Or restart React-Sentinel with --browser-mode managed --headed to launch an isolated managed Chromium with CDP enabled automatically."; private static normalizeText(value: string): string { return value.trim().toLowerCase(); @@ -556,6 +598,7 @@ export class BrowserManager { configureDefaults(options: { replayHeadless?: boolean; cdpEndpoint?: string; + browserMode?: BrowserModePreference; }): void { if (typeof options.replayHeadless === "boolean") { this.replayHeadless = options.replayHeadless; @@ -564,6 +607,10 @@ export class BrowserManager { if (typeof options.cdpEndpoint === "string") { this.defaultCdpEndpoint = options.cdpEndpoint; } + + if (typeof options.browserMode === "string") { + this.browserMode = options.browserMode; + } } getDefaultCdpEndpoint(): string { @@ -724,20 +771,32 @@ export class BrowserManager { this.attachedPage && !this.attachedPage.isClosed() ? this.attachedPage.url() : this.attachSelection?.tab.url ?? null; + const managedPageUrl = + this.managedPage && !this.managedPage.isClosed() + ? this.managedPage.url() + : null; const replayPageUrl = this.page && !this.page.isClosed() ? this.page.url() : null; - const mode: SessionInfo["mode"] = this.attachSelection ? "attach" : "replay"; - const pageUrl = mode === "attach" ? attachPageUrl : replayPageUrl; + const mode: SessionInfo["mode"] = this.attachSelection + ? "attach" + : this.managedPage && !this.managedPage.isClosed() + ? "managed" + : "replay"; + const pageUrl = mode === "attach" ? attachPageUrl : mode === "managed" ? managedPageUrl : replayPageUrl; const connected = mode === "attach" ? this.attachedPage !== null && !this.attachedPage.isClosed() - : this.page !== null && !this.page.isClosed(); + : mode === "managed" + ? this.managedPage !== null && !this.managedPage.isClosed() + : this.page !== null && !this.page.isClosed(); const title = mode === "attach" ? (await this.readPageTitle(this.attachedPage)) ?? this.attachSelection?.tab.title ?? null - : await this.readPageTitle(this.page); + : mode === "managed" + ? await this.readPageTitle(this.managedPage) + : await this.readPageTitle(this.page); return { mode, @@ -761,6 +820,11 @@ export class BrowserManager { endpoint: this.attachSelection?.endpoint ?? null, selectedTab: this.attachSelection?.tab ?? null, }, + managed: { + active: this.managedPage !== null && !this.managedPage.isClosed(), + endpoint: this.managedEndpoint, + userDataDir: this.managedUserDataDir, + }, }; } @@ -786,6 +850,99 @@ export class BrowserManager { } } + private async waitForManagedEndpoint(endpoint: string, timeoutMs: number = 10_000): Promise { + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + const status = await this.getAttachStatus(endpoint); + if (status.ready) { + return; + } + await delay(200); + } + + throw new Error(`Managed Chromium did not expose CDP at ${endpoint} within ${timeoutMs}ms.`); + } + + private async launchManagedBrowser(): Promise { + if (this.managedBrowser && this.managedPage && !this.managedPage.isClosed()) { + return; + } + + const executablePath = chromium.executablePath(); + const port = await allocateTcpPort(); + const userDataDir = await mkdtemp(path.join(tmpdir(), "react-sentinel-managed-")); + const endpoint = `http://${DEFAULT_MANAGED_BROWSER_HOST}:${port}`; + const args = [ + `--remote-debugging-port=${port}`, + `--user-data-dir=${userDataDir}`, + "--no-first-run", + "--no-default-browser-check", + ...(this.replayHeadless ? ["--headless=new"] : []), + "about:blank", + ]; + + const child = spawn(executablePath, args, { + stdio: "ignore", + detached: false, + }); + + child.once("error", (error) => { + console.error(`[react-sentinel] Managed Chromium launch failed: ${String(error)}`); + }); + + try { + await this.waitForManagedEndpoint(endpoint); + const browser = await chromium.connectOverCDP(endpoint); + const context = browser.contexts()[0]; + const page = context?.pages()[0] ?? (context ? await context.newPage() : null); + if (!page) { + throw new Error("Managed Chromium started but no page was available."); + } + + await this.installRuntimeBridge(page); + this.managedBrowser = browser; + this.managedPage = page; + this.managedEndpoint = endpoint; + this.managedUserDataDir = userDataDir; + this.managedBrowserProcess = child; + this.activateRuntimePage(page); + console.error( + `[react-sentinel] Managed Chromium launched (${this.replayHeadless ? "headless" : "headed"}, CDP ${endpoint})` + ); + } catch (error) { + child.kill("SIGTERM"); + await rm(userDataDir, { recursive: true, force: true }).catch(() => undefined); + throw error; + } + } + + private async closeManagedSession(): Promise { + const managedPage = this.managedPage; + + try { + if (this.managedBrowser) { + await this.managedBrowser.close().catch(() => undefined); + } + } finally { + if (this.managedBrowserProcess && !this.managedBrowserProcess.killed) { + this.managedBrowserProcess.kill("SIGTERM"); + } + if (this.managedUserDataDir) { + await rm(this.managedUserDataDir, { recursive: true, force: true }).catch(() => undefined); + } + this.managedBrowser = null; + this.managedPage = null; + this.managedEndpoint = null; + this.managedUserDataDir = null; + this.managedBrowserProcess = null; + + if (this.runtimeEventPage === managedPage) { + this.runtimeEventPage = null; + this.consoleEvents = []; + } + } + } + private formatNavigationError(error: unknown, url: string, timeoutMs: number): string { const raw = error instanceof Error ? error.message : String(error); if (raw.includes("ERR_CONNECTION_REFUSED") || raw.includes("ECONNREFUSED")) { @@ -915,6 +1072,7 @@ export class BrowserManager { /** Close browser and release all resources. */ async close(): Promise { await this.clearAttachConnection(); + await this.closeManagedSession(); await this.closeReplaySession(); } @@ -944,11 +1102,41 @@ export class BrowserManager { return page; } + private async getManagedPage( + url: string, + options?: { + resetSession?: boolean; + waitUntil?: ReplayWaitUntil; + timeoutMs?: number; + } + ): Promise { + if (options?.resetSession) { + await this.closeManagedSession(); + } + await this.launchManagedBrowser(); + const page = this.managedPage; + if (!page) { + throw new Error("Managed Chromium is unavailable."); + } + + await this.navigatePage(page, url, options?.waitUntil ?? "domcontentloaded", options?.timeoutMs ?? 10_000); + this.activateRuntimePage(page); + return page; + } + private async getRuntimePage(url: string): Promise { if (this.attachSelection) { return this.getAttachedPage(); } + if (this.browserMode === "managed") { + return this.getManagedPage(url); + } + + if (this.browserMode === "auto" && this.managedPage && !this.managedPage.isClosed()) { + return this.getManagedPage(url); + } + const page = await this.getSandboxPage(url); this.activateRuntimePage(page); return page; @@ -964,9 +1152,17 @@ export class BrowserManager { } ): Promise { if (url) { + if (this.browserMode === "managed") { + return this.getManagedPage(url, options); + } return this.getSandboxPage(url, options); } + if (this.browserMode === "managed" && this.managedPage && !this.managedPage.isClosed()) { + this.activateRuntimePage(this.managedPage); + return this.managedPage; + } + if (!this.page || this.page.isClosed() || this.page.url() === "about:blank") { throw new Error("No replay session is active. Call navigate_replay first or provide a URL."); } @@ -976,16 +1172,58 @@ export class BrowserManager { } private static buildAttachHelpMessage(): string { - return BrowserManager.cdpHelpMessage; + return [ + BrowserManager.cdpHelpMessage, + BrowserManager.managedModeHelpMessage, + "If you do not need a persistent browser profile, keep using replay mode with browser_ping or navigate_replay.", + ].join(" "); } private static buildAttachUnavailableMessage(endpoint: string, reason: string): string { return [ `Chrome CDP is unavailable at ${endpoint}: ${reason}.`, + BrowserManager.managedModeHelpMessage, "You can keep using replay mode with browser_ping or navigate_replay while live Chrome attach is unavailable.", ].join(" "); } + getBrowserModePreference(): BrowserModePreference { + return this.browserMode; + } + + async getManagedBrowserStatus(): Promise<{ + available: boolean; + active: boolean; + endpoint: string | null; + userDataDir: string | null; + launchCommand: string; + reason?: string; + }> { + try { + const executablePath = chromium.executablePath(); + await access(executablePath); + return { + available: executablePath.length > 0, + active: this.managedPage !== null && !this.managedPage.isClosed(), + endpoint: this.managedEndpoint, + userDataDir: this.managedUserDataDir, + launchCommand: "react-sentinel mcp --browser-mode managed --headed", + }; + } catch (error) { + return { + available: false, + active: false, + endpoint: this.managedEndpoint, + userDataDir: this.managedUserDataDir, + launchCommand: "react-sentinel mcp --browser-mode managed --headed", + reason: + error instanceof Error + ? `${error.message}. Install Chromium once with \`npx playwright install chromium\` to enable managed mode.` + : String(error), + }; + } + } + async navigateReplay( url: string, options?: { @@ -1269,7 +1507,10 @@ export class BrowserManager { const type = "ping" as const; try { - const page = await this.getSandboxPage(url); + const page = + this.browserMode === "managed" + ? await this.getManagedPage(url) + : await this.getSandboxPage(url); const data = await page.evaluate(() => ({ pong: true, url: document.URL, diff --git a/src/browser/protocol.ts b/src/browser/protocol.ts index d2f8478..7bf6dc7 100644 --- a/src/browser/protocol.ts +++ b/src/browser/protocol.ts @@ -28,7 +28,8 @@ export interface InteractionData { export type AssertionPrimitive = string | number | boolean | null; -export type SessionMode = "replay" | "attach"; +export type SessionMode = "replay" | "attach" | "managed"; +export type BrowserModePreference = "auto" | "replay" | "managed"; export interface ReplayConfig { headless: boolean; @@ -94,6 +95,11 @@ export interface SessionInfo { endpoint: string | null; selectedTab: AttachTabInfo | null; }; + managed: { + active: boolean; + endpoint: string | null; + userDataDir: string | null; + }; } /** Assertion types */ diff --git a/src/capabilities.ts b/src/capabilities.ts index 1cb9223..2f863fe 100644 --- a/src/capabilities.ts +++ b/src/capabilities.ts @@ -14,8 +14,48 @@ export type CapabilityDefinition = { summary: string; }; +export type ToolSelectionGuideEntry = { + situation: string; + startWith: readonly string[]; + why: string; + followUp?: readonly string[]; +}; + const CORE_TOOL_NAMES = ["ping", "get_server_info", "echo"] as const; +const TOOL_SELECTION_GUIDE: readonly ToolSelectionGuideEntry[] = [ + { + situation: "You need the fastest high-signal triage for a vague runtime bug.", + startWith: ["diagnose_runtime_bug"], + why: "Use runtime evidence instead of grep when the symptom depends on browser state, console output, hydration, async timing, or render churn.", + followUp: ["attribute_render", "find_memo_breaks", "get_runtime_timeline"], + }, + { + situation: "A component rerenders too often and you need the most likely cause.", + startWith: ["diagnose_excess_renders", "attribute_render"], + why: "Use replay render signals instead of static code reading when you must prove whether props, state, context, hooks, or a parent render caused the churn.", + followUp: ["find_memo_breaks", "get_hook_changes", "inspect_component"], + }, + { + situation: "You want a deterministic reproduction or invariant check for a user flow.", + startWith: ["validate_scenario", "find_race_conditions"], + why: "Use replay assertions instead of manual clicking or source inspection when the failure appears only after a sequence of actions or timing changes.", + followUp: ["verify_hypothesis", "verify_fix"], + }, + { + situation: "You want to test a fix or a hypothesis before editing repository code.", + startWith: ["verify_hypothesis", "verify_fix", "apply_patch_then_replay"], + why: "Use the replay sandbox instead of editing files blindly when you need proof that a runtime patch changes the observed behavior.", + followUp: ["reset_runtime_patches"], + }, + { + situation: "You need browser access before any runtime investigation can begin.", + startWith: ["get_session_status", "get_attach_status", "navigate_replay"], + why: "Use browser session tools instead of grep when the blocker is connectivity, live attach readiness, or launching an isolated replay session.", + followUp: ["select_attach_tab", "browser_ping"], + }, +]; + const capabilityCatalog = { browser_ping: { status: "available", @@ -49,9 +89,9 @@ const capabilityCatalog = { }, navigate_replay: { status: "available", - tools: ["navigate_replay"], + tools: ["navigate_replay", "start_debug_replay"], modes: ["replay", "sandbox"], - summary: "Open the isolated replay browser on a target application URL.", + summary: "Open the isolated replay browser on a target application URL and start a deterministic debugging session.", }, get_runtime_status: { status: "available", @@ -95,6 +135,30 @@ const capabilityCatalog = { modes: ["replay"], summary: "Flag render explosions in replay mode.", }, + diagnose_excess_renders: { + status: "available", + tools: ["diagnose_excess_renders"], + modes: ["replay"], + summary: "High-level replay investigation for excess renders, render loops, and context churn.", + }, + find_memo_breaks: { + status: "available", + tools: ["find_memo_breaks"], + modes: ["replay"], + summary: "High-level replay investigation for memo breaks versus context cascades.", + }, + diagnose_runtime_bug: { + status: "available", + tools: ["diagnose_runtime_bug"], + modes: ["replay"], + summary: "Verdict-first runtime bug triage for vague symptoms before drilling into atomic tools.", + }, + attribute_render: { + status: "available", + tools: ["attribute_render"], + modes: ["replay"], + summary: "Explain why a component rendered by attributing the strongest prop, state, context, provider, hook, or parent cause.", + }, get_hook_changes: { status: "available", tools: ["get_hook_changes"], @@ -125,6 +189,12 @@ const capabilityCatalog = { modes: ["replay"], summary: "Replay-mode render monitor for loops and unstable hooks.", }, + investigation_tools: { + status: "available", + tools: ["diagnose_excess_renders", "find_memo_breaks", "attribute_render", "diagnose_runtime_bug"], + modes: ["replay"], + summary: "Prefer these verdict-first investigations before chaining the lower-level atomic diagnostics yourself.", + }, replay_sandbox: { status: "available", tools: ["navigate_replay", "browser_ping", "replay_interactions"], @@ -137,11 +207,17 @@ const capabilityCatalog = { modes: ["replay", "sandbox"], summary: "Replay a scripted interaction sequence in the isolated browser.", }, + find_race_conditions: { + status: "available", + tools: ["find_race_conditions"], + modes: ["replay", "sandbox"], + summary: "Stress-test replay scenarios with adversarial timing to reproduce intermittent races and return minimal failing sequences.", + }, validate_scenario: { status: "available", - tools: ["validate_scenario"], + tools: ["validate_scenario", "validate_user_flow"], modes: ["replay", "sandbox"], - summary: "Run multi-step validations and assertions against the replay sandbox.", + summary: "Run multi-step validations and assertions against the replay sandbox for a deterministic user flow verdict.", }, apply_runtime_patch: { status: "available", @@ -151,9 +227,21 @@ const capabilityCatalog = { }, apply_patch_then_replay: { status: "available", - tools: ["apply_patch_then_replay"], + tools: ["apply_patch_then_replay", "patch_and_validate"], + modes: ["sandbox"], + summary: "Patch, replay, and validate in one sandbox flow before touching source files.", + }, + verify_hypothesis: { + status: "available", + tools: ["verify_hypothesis", "test_runtime_hypothesis"], + modes: ["replay", "sandbox"], + summary: "Confirm, refute, or partially support a runtime hypothesis before touching source code.", + }, + verify_fix: { + status: "available", + tools: ["verify_fix", "verify_runtime_fix"], modes: ["sandbox"], - summary: "Patch, replay, and validate in one sandbox flow.", + summary: "Compare baseline versus patched replay behavior to validate a runtime fix and surface regressions.", }, reset_runtime_patches: { status: "available", @@ -163,7 +251,7 @@ const capabilityCatalog = { }, shadow_sandbox: { status: "partial", - tools: ["apply_runtime_patch", "apply_patch_then_replay", "reset_runtime_patches"], + tools: ["apply_runtime_patch", "apply_patch_then_replay", "patch_and_validate", "verify_fix", "verify_runtime_fix", "reset_runtime_patches"], modes: ["sandbox"], summary: "Shadow sandbox is available for script-on-page patches only; broader patch shapes are still planned.", }, @@ -179,6 +267,9 @@ export function createServerInfoPayload(): { capabilities: Record; capabilityDetails: Record; capabilitiesByMode: Record>; + toolSelectionGuide: ToolSelectionGuideEntry[]; + recommendedWorkflows: ToolSelectionGuideEntry[]; + documentation: string[]; } { const capabilityDetails = Object.fromEntries( Object.entries(capabilityCatalog).map(([name, definition]) => [ @@ -218,6 +309,19 @@ export function createServerInfoPayload(): { capabilities, capabilityDetails, capabilitiesByMode, + toolSelectionGuide: TOOL_SELECTION_GUIDE.map((entry) => ({ + situation: entry.situation, + startWith: [...entry.startWith], + why: entry.why, + ...(entry.followUp ? { followUp: [...entry.followUp] } : {}), + })), + recommendedWorkflows: TOOL_SELECTION_GUIDE.map((entry) => ({ + situation: entry.situation, + startWith: [...entry.startWith], + why: entry.why, + ...(entry.followUp ? { followUp: [...entry.followUp] } : {}), + })), + documentation: ["docs/tool-selection-guide.md", "docs/agent-runtime-ux.md", "docs/workflows.md"], }; } diff --git a/src/diagnostics/investigation.ts b/src/diagnostics/investigation.ts new file mode 100644 index 0000000..021ec04 --- /dev/null +++ b/src/diagnostics/investigation.ts @@ -0,0 +1,460 @@ +import type { + AsyncTimelineResponse, + ComponentInspectionResponse, + ConsoleEventsResponse, + DiagnosticVerdict, + HookChangesResponse, + HydrationIssuesResponse, + RaceConditionDiagnosisResponse, + RenderCountsResponse, + RenderHotspotsResponse, + RuntimeStatus, +} from "./protocol.js"; + +type ExcessRenderVerdict = + | "render_loop_detected" + | "memo_break_suspected" + | "context_cascade_suspected" + | "hook_instability_detected" + | "no_excess_renders_detected" + | "excess_renders_inconclusive"; + +type ExcessRenderRawData = { + runtime_status: RuntimeStatus; + render_counts: RenderCountsResponse; + render_hotspots: RenderHotspotsResponse; + hook_changes?: HookChangesResponse; + component_inspection?: ComponentInspectionResponse; +}; + +type MemoBreakVerdict = + | "memo_break_suspected" + | "context_cascade_suspected" + | "internal_state_instability_detected" + | "memo_break_not_detected" + | "memo_break_inconclusive"; + +type MemoBreakRawData = { + render_hotspots: RenderHotspotsResponse; + hook_changes?: HookChangesResponse; + component_inspection?: ComponentInspectionResponse; +}; + +type RuntimeBugVerdict = + | "hydration_failure_detected" + | "race_condition_detected" + | "render_instability_detected" + | "runtime_error_detected" + | "runtime_bug_inconclusive"; + +type RuntimeBugRawData = { + runtime_status: RuntimeStatus; + console_events: ConsoleEventsResponse; + hydration_issues: HydrationIssuesResponse; + async_timeline: AsyncTimelineResponse; + race_condition?: RaceConditionDiagnosisResponse; + render_hotspots: RenderHotspotsResponse; +}; + +type RenderAttributionVerdict = + | "render_attributed" + | "render_attribution_inconclusive" + | "component_not_found"; + +type RenderAttributionRawData = { + render_hotspots: RenderHotspotsResponse; + hook_changes?: HookChangesResponse; + component_inspection?: ComponentInspectionResponse; +}; + +function matchesComponent(componentName: string, candidateName: string, pathText: string): boolean { + return candidateName === componentName || pathText.split(" > ").includes(componentName); +} + +function buildContextEvidence(component?: ComponentInspectionResponse): string[] { + const contexts = component?.component?.contexts ?? []; + if (contexts.length === 0) { + return []; + } + + return [ + `Observed contexts/providers: ${contexts.map((context) => `${context.name} (${context.source})`).join(", ")}`, + ]; +} + +function createDiagnosis(seed: { + verdict: TVerdict; + summary: string; + evidence: string[]; + confidence: "low" | "medium" | "high"; + suspected_source?: string; + next_step?: string; + raw_data: TRawData; +}): DiagnosticVerdict { + return seed; +} + +export function createExcessRenderDiagnosis(seed: { + componentName?: string; + runtimeStatus: RuntimeStatus; + renderCounts: RenderCountsResponse; + renderHotspots: RenderHotspotsResponse; + hookChanges?: HookChangesResponse; + inspection?: ComponentInspectionResponse; +}): DiagnosticVerdict { + const hotspot = + seed.componentName + ? seed.renderHotspots.hotspots.find((entry) => matchesComponent(seed.componentName ?? "", entry.componentName, entry.pathText)) ?? null + : seed.renderHotspots.hotspots[0] ?? null; + + const raw_data: ExcessRenderRawData = { + runtime_status: seed.runtimeStatus, + render_counts: seed.renderCounts, + render_hotspots: seed.renderHotspots, + ...(seed.hookChanges ? { hook_changes: seed.hookChanges } : {}), + ...(seed.inspection ? { component_inspection: seed.inspection } : {}), + }; + + if (!hotspot) { + return createDiagnosis({ + verdict: "no_excess_renders_detected", + summary: "No component currently shows a strong excess-render signature in the replay monitor.", + evidence: [ + `Observed components: ${seed.renderCounts.summary.totalComponents}`, + `Observed commits: ${seed.renderCounts.summary.observedCommits}`, + `Detected hotspots: ${seed.renderHotspots.hotspots.length}`, + ], + confidence: seed.renderCounts.summary.observedCommits >= 1 ? "medium" : "low", + next_step: "Replay the failing scenario again or lower the hotspot threshold if the render spike is intermittent.", + raw_data, + }); + } + + const hookLead = seed.hookChanges?.summary.suspiciousHooks[0] ?? null; + const contexts = seed.inspection?.component?.contexts ?? []; + const hotspotEvidence = [ + `Top hotspot: ${hotspot.pathText} (${hotspot.recentRenderCount} renders in ${hotspot.windowMs}ms)`, + `Probable cause from render monitor: ${hotspot.probableCause.summary}`, + ...(seed.hookChanges ? [`Hook-change summary: ${seed.hookChanges.summary.probableCause}`] : []), + ...buildContextEvidence(seed.inspection), + ]; + + if (hotspot.probableCause.type === "provider_value_recreated" || hotspot.probableCause.type === "context_change") { + if (contexts.length > 0) { + return createDiagnosis({ + verdict: "context_cascade_suspected", + summary: `${hotspot.componentName} rerenders look driven by upstream context/provider churn rather than a local render loop.`, + evidence: hotspotEvidence, + confidence: "medium", + suspected_source: contexts[0]?.name, + next_step: "Inspect the nearest provider value and memoize or narrow the context payload if it is recreated every render.", + raw_data, + }); + } + } + + if (hotspot.probableCause.type === "prop_diff") { + return createDiagnosis({ + verdict: "memo_break_suspected", + summary: `${hotspot.componentName} is rerendering with unstable props, which strongly suggests a memo break upstream.`, + evidence: hotspotEvidence, + confidence: "high", + suspected_source: hotspot.pathText, + next_step: "Inspect the parent props feeding this component and memoize recreated objects, arrays, or callbacks.", + raw_data, + }); + } + + if (hotspot.probableCause.type === "state_change") { + return createDiagnosis({ + verdict: "render_loop_detected", + summary: `${hotspot.componentName} appears stuck in a state-driven render loop.`, + evidence: hotspotEvidence, + confidence: "high", + suspected_source: hookLead ? `state hook #${hookLead.hookIndex}` : hotspot.pathText, + next_step: "Inspect the state update path or effect dependencies that keep feeding this component new state.", + raw_data, + }); + } + + if (hotspot.probableCause.type === "hook_instability") { + return createDiagnosis({ + verdict: "hook_instability_detected", + summary: `${hotspot.componentName} is rerendering because one hook value keeps changing across renders.`, + evidence: hotspotEvidence, + confidence: hookLead?.suspected ? "high" : "medium", + suspected_source: hookLead ? `hook #${hookLead.hookIndex}` : hotspot.pathText, + next_step: "Inspect the unstable hook output and memoize or debounce the value that changes every render.", + raw_data, + }); + } + + return createDiagnosis({ + verdict: "excess_renders_inconclusive", + summary: `${hotspot.componentName} clearly rerenders too often, but the dominant cause is still inconclusive.`, + evidence: hotspotEvidence, + confidence: "medium", + suspected_source: hotspot.pathText, + next_step: "Inspect the component tree around this hotspot and compare prop, context, and effect churn together.", + raw_data, + }); +} + +export function createMemoBreakDiagnosis(seed: { + componentName?: string; + renderHotspots: RenderHotspotsResponse; + hookChanges?: HookChangesResponse; + inspection?: ComponentInspectionResponse; +}): DiagnosticVerdict { + const hotspotCandidates = seed.renderHotspots.hotspots.filter((entry) => + seed.componentName ? matchesComponent(seed.componentName, entry.componentName, entry.pathText) : true + ); + const target = + hotspotCandidates.find((entry) => entry.probableCause.type === "prop_diff") ?? + hotspotCandidates[0] ?? + null; + const contexts = seed.inspection?.component?.contexts ?? []; + const raw_data: MemoBreakRawData = { + render_hotspots: seed.renderHotspots, + ...(seed.hookChanges ? { hook_changes: seed.hookChanges } : {}), + ...(seed.inspection ? { component_inspection: seed.inspection } : {}), + }; + + if (!target) { + return createDiagnosis({ + verdict: "memo_break_not_detected", + summary: "No strong memo-break signal was found in the current render hotspot set.", + evidence: [`Detected hotspots: ${seed.renderHotspots.hotspots.length}`], + confidence: "medium", + next_step: "Reproduce the issue under replay and target a specific component if the rerender suspect is already known.", + raw_data, + }); + } + + const evidence = [ + `Candidate component: ${target.pathText}`, + `Render monitor cause: ${target.probableCause.summary}`, + ...(seed.hookChanges ? [`Hook-change summary: ${seed.hookChanges.summary.probableCause}`] : []), + ...buildContextEvidence(seed.inspection), + ]; + + if (target.probableCause.type === "prop_diff" && contexts.length === 0) { + return createDiagnosis({ + verdict: "memo_break_suspected", + summary: `${target.componentName} rerenders with changing props and no dominant local hook churn, which is consistent with a memo break.`, + evidence, + confidence: "high", + suspected_source: target.pathText, + next_step: "Inspect the parent render path and stabilize recreated prop references passed into this component.", + raw_data, + }); + } + + if ( + contexts.length > 0 || + target.probableCause.type === "context_change" || + target.probableCause.type === "provider_value_recreated" + ) { + return createDiagnosis({ + verdict: "context_cascade_suspected", + summary: `${target.componentName} looks more affected by context/provider churn than by a classic memo break.`, + evidence, + confidence: "medium", + suspected_source: contexts[0]?.name, + next_step: "Inspect the provider value identity and split or memoize the context if consumers rerender too broadly.", + raw_data, + }); + } + + if ((seed.hookChanges?.summary.suspiciousHooks[0]?.suspected ?? false) || target.probableCause.type === "state_change") { + return createDiagnosis({ + verdict: "internal_state_instability_detected", + summary: `${target.componentName} is rerendering because its own hook or state values keep changing, so the issue is not primarily a memo break.`, + evidence, + confidence: "medium", + suspected_source: target.pathText, + next_step: "Inspect the unstable hook or effect before optimizing parent memoization.", + raw_data, + }); + } + + return createDiagnosis({ + verdict: "memo_break_inconclusive", + summary: `${target.componentName} remains a memo-break candidate, but the current runtime signals are not decisive.`, + evidence, + confidence: "medium", + suspected_source: target.pathText, + next_step: "Collect a longer replay trace or inspect the component's parent chain to confirm whether prop identity churn is real.", + raw_data, + }); +} + +export function createRenderAttributionDiagnosis(seed: { + componentName: string; + renderHotspots: RenderHotspotsResponse; + hookChanges?: HookChangesResponse; + inspection?: ComponentInspectionResponse; +}): DiagnosticVerdict { + const target = + seed.renderHotspots.hotspots.find((entry) => matchesComponent(seed.componentName, entry.componentName, entry.pathText)) ?? + null; + const raw_data: RenderAttributionRawData = { + render_hotspots: seed.renderHotspots, + ...(seed.hookChanges ? { hook_changes: seed.hookChanges } : {}), + ...(seed.inspection ? { component_inspection: seed.inspection } : {}), + }; + + if (!target) { + return createDiagnosis({ + verdict: "component_not_found", + summary: `React-Sentinel did not capture a recent hotspot for ${seed.componentName}, so render attribution is not decisive yet.`, + evidence: [`Detected hotspots: ${seed.renderHotspots.hotspots.length}`], + confidence: "low", + next_step: "Replay the scenario immediately before attributing the render, or lower the hotspot threshold for this component.", + raw_data, + }); + } + + const cause = target.probableCause; + const contexts = seed.inspection?.component?.contexts ?? []; + const hookLead = seed.hookChanges?.summary.suspiciousHooks[0] ?? null; + const evidence = [ + `Component path: ${target.pathText}`, + `Primary cause: ${cause.summary}`, + ...(hookLead ? [`Dominant hook: #${hookLead.hookIndex} (${hookLead.hookKind}) changed ${hookLead.changeCount} times`] : []), + ...(contexts.length > 0 ? [`Observed contexts/providers: ${contexts.map((context) => context.name).join(", ")}`] : []), + ]; + + const summaryByCause: Record = { + prop_diff: `${target.componentName} most likely rerendered because one or more props changed.`, + state_change: `${target.componentName} most likely rerendered because its own state changed.`, + context_change: `${target.componentName} most likely rerendered because a consumed context value changed.`, + provider_value_recreated: `${target.componentName} most likely rerendered because an upstream provider recreated its value.`, + hook_instability: `${target.componentName} most likely rerendered because a hook value stayed unstable across renders.`, + parent_render: `${target.componentName} most likely rerendered because its parent rerendered without strong local diffs.`, + unknown: `${target.componentName} rerendered, but the dominant cause remains inconclusive.`, + }; + + const nextStepByCause: Record = { + prop_diff: "Inspect the parent props passed into this component and stabilize recreated references.", + state_change: "Inspect the state update path or effect chain that keeps changing local state.", + context_change: "Inspect the consumed context source and narrow or memoize the context payload.", + provider_value_recreated: "Inspect the nearest provider and memoize the provided value object.", + hook_instability: "Inspect the unstable hook output and memoize or debounce the changing value.", + parent_render: "Inspect the parent component to understand what keeps it rerendering.", + unknown: "Collect a longer replay trace and compare props, hooks, and context churn together.", + }; + + return createDiagnosis({ + verdict: cause.type === "unknown" ? "render_attribution_inconclusive" : "render_attributed", + summary: summaryByCause[cause.type] ?? summaryByCause.unknown, + evidence, + confidence: cause.type === "unknown" ? "medium" : "high", + suspected_source: + cause.type === "hook_instability" + ? hookLead + ? `hook #${hookLead.hookIndex}` + : target.pathText + : contexts[0]?.name ?? target.pathText, + next_step: nextStepByCause[cause.type] ?? nextStepByCause.unknown, + raw_data, + }); +} + +export function createRuntimeBugDiagnosis(seed: { + symptom: string; + runtimeStatus: RuntimeStatus; + consoleEvents: ConsoleEventsResponse; + hydrationIssues: HydrationIssuesResponse; + asyncTimeline: AsyncTimelineResponse; + renderHotspots: RenderHotspotsResponse; + raceCondition?: RaceConditionDiagnosisResponse; +}): DiagnosticVerdict { + const symptom = seed.symptom.toLowerCase(); + const firstError = seed.consoleEvents.events.find((event) => event.type === "error" || event.type === "exception") ?? null; + const firstHotspot = seed.renderHotspots.hotspots[0] ?? null; + const raw_data: RuntimeBugRawData = { + runtime_status: seed.runtimeStatus, + console_events: seed.consoleEvents, + hydration_issues: seed.hydrationIssues, + async_timeline: seed.asyncTimeline, + ...(seed.raceCondition ? { race_condition: seed.raceCondition } : {}), + render_hotspots: seed.renderHotspots, + }; + + if (seed.hydrationIssues.summary.total > 0 && (/hydr|ssr|server/.test(symptom) || firstError !== null)) { + const firstIssue = seed.hydrationIssues.issues[0]; + return createDiagnosis({ + verdict: "hydration_failure_detected", + summary: `The strongest runtime signal points to hydration failure: ${firstIssue?.message ?? "hydration warnings were captured"}.`, + evidence: [ + `Hydration issues captured: ${seed.hydrationIssues.summary.total}`, + ...(firstError ? [`Console error: ${firstError.text}`] : []), + ], + confidence: "high", + suspected_source: firstIssue ? `${firstIssue.framework}:${firstIssue.kind}` : "hydration", + next_step: "Compare server-rendered and client-rendered inputs on the first load to isolate the mismatch source.", + raw_data, + }); + } + + if (seed.raceCondition?.suspected || seed.asyncTimeline.summary.invertedGroups.length > 0) { + return createDiagnosis({ + verdict: "race_condition_detected", + summary: seed.raceCondition?.diagnosis ?? `Async requests for ${seed.asyncTimeline.summary.invertedGroups[0]?.groupKey ?? "one group"} completed out of order.`, + evidence: [ + ...(seed.raceCondition?.evidence ?? []), + `Completion-order inversions: ${seed.asyncTimeline.summary.invertedGroups.length}`, + ], + confidence: seed.raceCondition?.suspected ? "high" : "medium", + suspected_source: + seed.raceCondition?.invertedGroup?.groupKey ?? seed.asyncTimeline.summary.invertedGroups[0]?.groupKey, + next_step: "Verify that only the latest async intent can update visible state and guard stale completions.", + raw_data, + }); + } + + if (firstHotspot && (/render|rerender|freeze|slow|loop/.test(symptom) || seed.renderHotspots.hotspots.length > 0)) { + return createDiagnosis({ + verdict: "render_instability_detected", + summary: `${firstHotspot.componentName} is the clearest runtime suspect because it rerendered ${firstHotspot.recentRenderCount} times in ${firstHotspot.windowMs}ms.`, + evidence: [ + `Top hotspot: ${firstHotspot.pathText}`, + `Probable cause: ${firstHotspot.probableCause.summary}`, + ], + confidence: "medium", + suspected_source: firstHotspot.pathText, + next_step: "Run the excess-render investigation on this component to determine whether props, hooks, or context are responsible.", + raw_data, + }); + } + + if (firstError) { + return createDiagnosis({ + verdict: "runtime_error_detected", + summary: `The strongest runtime signal is a console-level failure: ${firstError.text}`, + evidence: [ + `Console events captured: ${seed.consoleEvents.events.length}`, + firstError.location ? `Location: ${firstError.location}` : "No source location was attached to the error.", + ], + confidence: "medium", + suspected_source: firstError.location, + next_step: "Inspect the failing runtime path and reproduce the same error under replay or patch validation.", + raw_data, + }); + } + + return createDiagnosis({ + verdict: "runtime_bug_inconclusive", + summary: `React-Sentinel captured runtime signals for "${seed.symptom}" but none stand out strongly enough to explain the bug yet.`, + evidence: [ + `Hydration issues: ${seed.hydrationIssues.summary.total}`, + `Async inversions: ${seed.asyncTimeline.summary.invertedGroups.length}`, + `Render hotspots: ${seed.renderHotspots.hotspots.length}`, + `Console errors: ${seed.consoleEvents.events.filter((event) => event.type === "error" || event.type === "exception").length}`, + ], + confidence: "low", + next_step: "Refine the symptom with a target component or state selector and replay the failure again to gather stronger evidence.", + raw_data, + }); +} diff --git a/src/diagnostics/protocol.ts b/src/diagnostics/protocol.ts index 9035202..85b9d02 100644 --- a/src/diagnostics/protocol.ts +++ b/src/diagnostics/protocol.ts @@ -19,6 +19,35 @@ export interface ReactInfo { devtoolsHookPresent: boolean; } +// --------------------------------------------------------------------------- +// Verdict-first diagnostics (Sprint 16) +// --------------------------------------------------------------------------- + +export type DiagnosticConfidence = "low" | "medium" | "high"; + +/** + * Standard verdict-first diagnostic response returned by higher-signal MCP tools. + * + * The goal is to lead with an actionable diagnosis while still preserving the + * original raw payload under raw_data for deeper inspection when needed. + */ +export interface DiagnosticVerdict { + /** Machine-readable diagnostic verdict chosen by the tool. */ + verdict: TVerdict; + /** Short actionable summary intended to be consumed before raw data. */ + summary: string; + /** Concrete observations that justify the verdict. */ + evidence: string[]; + /** Confidence level of the diagnostic interpretation. */ + confidence: DiagnosticConfidence; + /** Optional likely source or dominant cause behind the issue. */ + suspected_source?: string; + /** Optional recommended next runtime step for the caller. */ + next_step?: string; + /** Original structured payload preserved for deeper investigation. */ + raw_data?: TRawData; +} + // --------------------------------------------------------------------------- // Runtime status (get_runtime_status) // --------------------------------------------------------------------------- @@ -149,13 +178,17 @@ export interface RenderCountsResponse { durationMs: number; } -export type RenderHotspotCauseType = - | "unstable_state" - | "unstable_hook_value" - | "unstable_props" - | "repeated_effect" +export type RenderAttributionCauseType = + | "prop_diff" + | "state_change" + | "context_change" + | "parent_render" + | "provider_value_recreated" + | "hook_instability" | "unknown"; +export type RenderHotspotCauseType = RenderAttributionCauseType; + export interface RenderHotspotCause { type: RenderHotspotCauseType; summary: string; diff --git a/src/diagnostics/react-runtime.ts b/src/diagnostics/react-runtime.ts index 513cbbc..343579c 100644 --- a/src/diagnostics/react-runtime.ts +++ b/src/diagnostics/react-runtime.ts @@ -521,6 +521,30 @@ export function inspectReactRuntime(request: ReactRuntimeInspectRequest): ReactR return "Unknown"; } + function getTypeDisplayName(type: unknown): string | null { + if (!type) return null; + if (typeof type === "string") return type; + if (typeof type === "function") { + const fn = type as { displayName?: string; name?: string }; + return fn.displayName || fn.name || null; + } + if (typeof type === "object") { + const typeRecord = type as Record; + if (typeof typeRecord.displayName === "string" && typeRecord.displayName.length > 0) { + return typeRecord.displayName; + } + if (typeof typeRecord.render === "function") { + const render = typeRecord.render as { displayName?: string; name?: string }; + return `ForwardRef(${render.displayName || render.name || "Anonymous"})`; + } + if ("type" in typeRecord) { + const inner = getTypeDisplayName(typeRecord.type); + return inner ? `Memo(${inner})` : "Memo"; + } + } + return null; + } + function isReactElementLike(value: unknown): boolean { return Boolean(value && typeof value === "object" && "$$typeof" in (value as Record)); } @@ -534,13 +558,8 @@ export function inspectReactRuntime(request: ReactRuntimeInspectRequest): ReactR if (fiber.tag === 10) { return `${getContextName(getFiberContextObject(fiber))}.Provider`; } - if (fiber.type && typeof fiber.type === "object") { - const typeRecord = fiber.type as Record; - if (typeof typeRecord.displayName === "string" && typeRecord.displayName.length > 0) { - return typeRecord.displayName; - } - return "Context/Memo/ForwardRef"; - } + const resolvedName = getTypeDisplayName(fiber.type); + if (resolvedName) return resolvedName; if (fiber.tag === 3) return "HostRoot"; return "Unknown"; } diff --git a/src/diagnostics/render-monitor.ts b/src/diagnostics/render-monitor.ts index e4583a6..4951750 100644 --- a/src/diagnostics/render-monitor.ts +++ b/src/diagnostics/render-monitor.ts @@ -27,6 +27,12 @@ type RenderCountSample = { timestamp: string; renderId: number; props: Record; + parentName: string | null; + contexts: { + name: string; + source: "dependency" | "provider"; + value: unknown; + }[]; hooks: { index: number; kind: ComponentHookKind; @@ -188,6 +194,28 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { return fiber.type._context ?? fiber.type.context ?? null; }; + const getTypeDisplayName = (type) => { + if (!type) return null; + if (typeof type === "string") return type; + if (typeof type === "function") { + return type.displayName || type.name || null; + } + if (typeof type === "object") { + if (typeof type.displayName === "string" && type.displayName.trim().length > 0) { + return type.displayName; + } + if (typeof type.render === "function") { + const renderName = type.render.displayName || type.render.name || "Anonymous"; + return "ForwardRef(" + renderName + ")"; + } + if ("type" in type) { + const innerName = getTypeDisplayName(type.type); + return innerName ? "Memo(" + innerName + ")" : "Memo"; + } + } + return null; + }; + const getComponentName = (fiber) => { if (typeof fiber.type === "string") return fiber.type; if (typeof fiber.type === "function") { @@ -196,11 +224,12 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { if (fiber.tag === 10) { return getContextName(getFiberContextObject(fiber)) + ".Provider"; } - if (fiber.type && typeof fiber.type === "object" && typeof fiber.type.displayName === "string") { - return fiber.type.displayName; + const resolvedName = getTypeDisplayName(fiber.type); + if (resolvedName) { + return resolvedName; } if (fiber.tag === 3) return "HostRoot"; - return "Context/Memo/ForwardRef"; + return "AnonymousComposite"; }; const classifyHook = (hook, index) => { @@ -235,6 +264,44 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { return hooks; }; + const extractContexts = (fiber, pathFibers) => { + const fromDependencies = []; + const firstContext = fiber.dependencies?.firstContext; + const dependencySeen = new Set(); + let current = firstContext; + + while (current && typeof current === "object") { + const name = getContextName(current.context); + if (!dependencySeen.has(name)) { + dependencySeen.add(name); + fromDependencies.push({ + name, + source: "dependency", + value: serializeValue(current.memoizedValue), + }); + } + current = current.next ?? null; + } + + if (fromDependencies.length > 0) { + return fromDependencies; + } + + const providers = []; + for (const pathFiber of pathFibers) { + if (!(pathFiber.tag === 10 && pathFiber.memoizedProps && typeof pathFiber.memoizedProps === "object" && "value" in pathFiber.memoizedProps)) { + continue; + } + providers.push({ + name: getContextName(getFiberContextObject(pathFiber)), + source: "provider", + value: serializeValue(pathFiber.memoizedProps.value), + }); + } + + return providers; + }; + const isTrackableComponent = (fiber) => { if (!isFiber(fiber)) return false; if (fiber.tag === 3 || fiber.tag === 6 || fiber.tag === 10) return false; @@ -258,7 +325,7 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { } }; - const recordRender = (fiber, path) => { + const recordRender = (fiber, path, pathFibers) => { const state = ensureState(); const timestamp = new Date().toISOString(); const componentName = getComponentName(fiber); @@ -284,6 +351,8 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { timestamp, renderId: state.nextRenderId++, props: serializeProps(fiber.memoizedProps), + parentName: path.length > 1 ? path[path.length - 2] : null, + contexts: extractContexts(fiber, pathFibers), hooks: extractHooks(fiber), }); if (entry.samples.length > maxSamplesPerComponent) { @@ -291,17 +360,18 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { } }; - const walk = (fiber, path) => { + const walk = (fiber, path, pathFibers) => { if (!isFiber(fiber) || fiber.tag === 6) return; const trackable = isTrackableComponent(fiber); const nextPath = trackable ? path.concat(getComponentName(fiber)) : path; + const nextPathFibers = trackable ? pathFibers.concat(fiber) : pathFibers; if (trackable && didRender(fiber)) { - recordRender(fiber, nextPath); + recordRender(fiber, nextPath, nextPathFibers); } let child = fiber.child; while (child) { - walk(child, nextPath); + walk(child, nextPath, nextPathFibers); child = child.sibling; } }; @@ -317,7 +387,7 @@ export function buildRenderMonitorSource(args: RenderMonitorInitArgs): string { let child = rootFiber.child; while (child) { - walk(child, []); + walk(child, [], []); child = child.sibling; } }; @@ -425,24 +495,53 @@ function buildProbableCause(entry: RenderCountRecord): RenderHotspotCause { const hookChanges = new Map(); let propChangeCount = 0; + let contextChangeCount = 0; + let providerChangeCount = 0; + let parentRenderCount = 0; for (let index = 1; index < samples.length; index += 1) { const previousSample = samples[index - 1]; const currentSample = samples[index]; - if (stableStringify(previousSample.props) !== stableStringify(currentSample.props)) { + const propsChanged = stableStringify(previousSample.props) !== stableStringify(currentSample.props); + if (propsChanged) { propChangeCount += 1; } + const previousContexts = new Map(previousSample.contexts.map((context) => [`${context.name}:${context.source}`, context] as const)); + const currentContexts = new Map(currentSample.contexts.map((context) => [`${context.name}:${context.source}`, context] as const)); + const contextKeys = new Set([...previousContexts.keys(), ...currentContexts.keys()]); + let contextChanged = false; + let providerChanged = false; + for (const contextKey of contextKeys) { + const previousContext = previousContexts.get(contextKey); + const currentContext = currentContexts.get(contextKey); + const previousValue = previousContext ? stableStringify(previousContext.value) : "undefined"; + const currentValue = currentContext ? stableStringify(currentContext.value) : "undefined"; + if (previousValue === currentValue) continue; + contextChanged = true; + if ((currentContext?.source ?? previousContext?.source) === "provider") { + providerChanged = true; + } + } + if (contextChanged) { + contextChangeCount += 1; + } + if (providerChanged) { + providerChangeCount += 1; + } + const previousHooks = new Map(previousSample.hooks.map((hook) => [`${hook.index}:${hook.kind}`, hook] as const)); const currentHooks = new Map(currentSample.hooks.map((hook) => [`${hook.index}:${hook.kind}`, hook] as const)); const hookKeys = new Set([...previousHooks.keys(), ...currentHooks.keys()]); + let hookChanged = false; for (const hookKey of hookKeys) { const previousHook = previousHooks.get(hookKey); const currentHook = currentHooks.get(hookKey); const previousValue = previousHook ? stableStringify(previousHook.value) : "undefined"; const currentValue = currentHook ? stableStringify(currentHook.value) : "undefined"; if (previousValue === currentValue) continue; + hookChanged = true; const currentStat = hookChanges.get(hookKey); hookChanges.set(hookKey, { @@ -451,6 +550,10 @@ function buildProbableCause(entry: RenderCountRecord): RenderHotspotCause { changeCount: (currentStat?.changeCount ?? 0) + 1, }); } + + if (!propsChanged && !contextChanged && !hookChanged && previousSample.parentName === currentSample.parentName) { + parentRenderCount += 1; + } } const transitions = samples.length - 1; @@ -460,27 +563,90 @@ function buildProbableCause(entry: RenderCountRecord): RenderHotspotCause { if (dominantHook && dominantHook.changeCount >= hotThreshold) { if (dominantHook.kind === "state") { return { - type: "unstable_state", + type: "state_change", summary: `State hook #${dominantHook.index} changed on ${dominantHook.changeCount}/${transitions} recent render transitions.`, }; } return { - type: "unstable_hook_value", + type: "hook_instability", summary: `Hook #${dominantHook.index} (${dominantHook.kind}) changed on ${dominantHook.changeCount}/${transitions} recent render transitions.`, }; } + if (providerChangeCount >= hotThreshold) { + return { + type: "provider_value_recreated", + summary: `An upstream provider value changed on ${providerChangeCount}/${transitions} recent render transitions.`, + }; + } + + if (contextChangeCount >= hotThreshold) { + return { + type: "context_change", + summary: `Observed context values changed on ${contextChangeCount}/${transitions} recent render transitions.`, + }; + } + if (propChangeCount >= hotThreshold) { return { - type: "unstable_props", + type: "prop_diff", + summary: `Props changed on ${propChangeCount}/${transitions} recent render transitions.`, + }; + } + + if (parentRenderCount >= hotThreshold) { + return { + type: "parent_render", + summary: "The component rerendered repeatedly without dominant local prop, hook, or context changes, which suggests parent-driven rerenders.", + }; + } + + if (dominantHook && dominantHook.changeCount > 0) { + if (dominantHook.kind === "state") { + return { + type: "state_change", + summary: `State hook #${dominantHook.index} changed on ${dominantHook.changeCount}/${transitions} recent render transitions.`, + }; + } + + return { + type: "hook_instability", + summary: `Hook #${dominantHook.index} (${dominantHook.kind}) changed on ${dominantHook.changeCount}/${transitions} recent render transitions.`, + }; + } + + if (providerChangeCount > 0) { + return { + type: "provider_value_recreated", + summary: `An upstream provider value changed on ${providerChangeCount}/${transitions} recent render transitions.`, + }; + } + + if (contextChangeCount > 0) { + return { + type: "context_change", + summary: `Observed context values changed on ${contextChangeCount}/${transitions} recent render transitions.`, + }; + } + + if (propChangeCount > 0) { + return { + type: "prop_diff", summary: `Props changed on ${propChangeCount}/${transitions} recent render transitions.`, }; } + if (parentRenderCount > 0) { + return { + type: "parent_render", + summary: "The component rerendered repeatedly without dominant local prop, hook, or context changes, which suggests parent-driven rerenders.", + }; + } + return { - type: "repeated_effect", - summary: "Recent renders kept repeating without one dominant prop diff, which suggests an effect loop or chained state updates.", + type: "unknown", + summary: "Recent renders kept repeating, but React-Sentinel could not isolate one dominant cause from props, hooks, contexts, or parent churn.", }; } diff --git a/src/diagnostics/verdict.ts b/src/diagnostics/verdict.ts new file mode 100644 index 0000000..2b7b242 --- /dev/null +++ b/src/diagnostics/verdict.ts @@ -0,0 +1,200 @@ +import type { + AsyncTimelineResponse, + DiagnosticConfidence, + DiagnosticVerdict, + HydrationIssuesResponse, + RaceConditionDiagnosisResponse, + RenderHotspotsResponse, +} from "./protocol.js"; + +function createVerdict(seed: { + verdict: TVerdict; + summary: string; + evidence: string[]; + confidence: DiagnosticConfidence; + suspected_source?: string; + next_step?: string; + raw_data: TRawData; +}): DiagnosticVerdict { + return seed; +} + +function formatSlowRequest(label: string, durationMs: number, status: number | null): string { + return `${label} took ${durationMs}ms${status === null ? "" : ` (status ${status})`}.`; +} + +export function createRenderHotspotsVerdict( + payload: RenderHotspotsResponse +): DiagnosticVerdict<"render_hotspots_detected" | "no_hotspots_detected", RenderHotspotsResponse> { + const primaryHotspot = payload.hotspots[0] ?? null; + + if (!primaryHotspot) { + return createVerdict({ + verdict: "no_hotspots_detected", + summary: `No component crossed ${payload.threshold} renders within ${payload.windowMs}ms in the recent replay window.`, + evidence: [ + `Observed hotspots: 0`, + `Detection threshold: ${payload.threshold} renders/${payload.windowMs}ms`, + ], + confidence: "medium", + next_step: "If the bug is intermittent, replay the scenario again or lower the hotspot threshold.", + raw_data: payload, + }); + } + + return createVerdict({ + verdict: "render_hotspots_detected", + summary: `${primaryHotspot.componentName} is the top rerender hotspot with ${primaryHotspot.recentRenderCount} renders in ${primaryHotspot.windowMs}ms.`, + evidence: [ + `Top hotspot: ${primaryHotspot.pathText} (${primaryHotspot.recentRenderCount} renders in ${primaryHotspot.windowMs}ms)`, + `Probable cause: ${primaryHotspot.probableCause.summary}`, + `Additional hotspots detected: ${Math.max(payload.hotspots.length - 1, 0)}`, + ], + confidence: payload.hotspots.length > 1 || primaryHotspot.recentRenderCount >= payload.threshold * 2 ? "high" : "medium", + suspected_source: primaryHotspot.probableCause.type, + next_step: `Inspect ${primaryHotspot.componentName} hook changes or run a higher-level excess render diagnosis to confirm the unstable input.`, + raw_data: payload, + }); +} + +export function createAsyncTimelineVerdict( + payload: AsyncTimelineResponse +): DiagnosticVerdict< + "async_order_inversion_detected" | "slow_async_operations_detected" | "no_async_anomalies_detected" | "no_async_activity_detected", + AsyncTimelineResponse +> { + const invertedGroup = payload.summary.invertedGroups[0] ?? null; + const slowRequest = payload.summary.slowRequests[0] ?? null; + + if (payload.summary.totalRequests === 0) { + return createVerdict({ + verdict: "no_async_activity_detected", + summary: "No recent fetch or XHR activity was captured in the runtime timeline.", + evidence: ["Captured requests: 0"], + confidence: "low", + next_step: "Reproduce the bug again before requesting the async timeline.", + raw_data: payload, + }); + } + + if (invertedGroup) { + return createVerdict({ + verdict: "async_order_inversion_detected", + summary: `Async requests for ${invertedGroup.groupKey} completed out of start order, which is a strong race-condition signal.`, + evidence: [ + `Started order: ${invertedGroup.startedOrder.join(" -> ")}`, + `Settled order: ${invertedGroup.settledOrder.join(" -> ")}`, + slowRequest ? `Slowest request: ${formatSlowRequest(slowRequest.label, slowRequest.durationMs, slowRequest.status)}` : "No slow-request outlier detected.", + ], + confidence: "high", + suspected_source: invertedGroup.groupKey, + next_step: "Verify whether a late async response overwrites newer user intent or state.", + raw_data: payload, + }); + } + + if (slowRequest && slowRequest.durationMs >= 1_000) { + return createVerdict({ + verdict: "slow_async_operations_detected", + summary: `${slowRequest.label} is the slowest recent request at ${slowRequest.durationMs}ms, which may amplify UI timing bugs.`, + evidence: [ + formatSlowRequest(slowRequest.label, slowRequest.durationMs, slowRequest.status), + `Tracked request groups: ${payload.summary.groups}`, + `Captured requests: ${payload.summary.totalRequests}`, + ], + confidence: "medium", + suspected_source: slowRequest.groupKey, + next_step: "Inspect the related request path and validate whether slow completion correlates with stale UI state.", + raw_data: payload, + }); + } + + return createVerdict({ + verdict: "no_async_anomalies_detected", + summary: `Captured ${payload.summary.totalRequests} recent async requests without completion-order inversions or major latency outliers.`, + evidence: [ + `Tracked request groups: ${payload.summary.groups}`, + `Completion-order inversions: ${payload.summary.invertedGroups.length}`, + ], + confidence: "medium", + next_step: "If the issue still looks asynchronous, collect a longer trace or run an explicit race-condition diagnosis.", + raw_data: payload, + }); +} + +export function createRaceConditionVerdict( + payload: RaceConditionDiagnosisResponse +): DiagnosticVerdict< + "race_condition_detected" | "race_condition_inconclusive" | "race_condition_not_detected", + RaceConditionDiagnosisResponse +> { + if (payload.suspected) { + return createVerdict({ + verdict: "race_condition_detected", + summary: payload.diagnosis, + evidence: payload.evidence, + confidence: "high", + suspected_source: payload.invertedGroup?.groupKey, + next_step: "Guard late async responses so only the newest intent is allowed to update visible state.", + raw_data: payload, + }); + } + + if (payload.invertedGroup) { + return createVerdict({ + verdict: "race_condition_inconclusive", + summary: payload.diagnosis, + evidence: payload.evidence, + confidence: "medium", + suspected_source: payload.invertedGroup.groupKey, + next_step: "Compare the final UI text with the latest user intent or add stronger assertions around the stale state candidate.", + raw_data: payload, + }); + } + + return createVerdict({ + verdict: "race_condition_not_detected", + summary: payload.diagnosis, + evidence: payload.evidence.length > 0 ? payload.evidence : ["No inverted completion order was detected."], + confidence: "medium", + next_step: "If the bug is intermittent, run a stress replay with tighter action timing and explicit assertions.", + raw_data: payload, + }); +} + +export function createHydrationIssuesVerdict( + payload: HydrationIssuesResponse +): DiagnosticVerdict<"hydration_issues_detected" | "hydration_issues_not_detected", HydrationIssuesResponse> { + const firstIssue = payload.issues[0] ?? null; + + if (!firstIssue) { + return createVerdict({ + verdict: "hydration_issues_not_detected", + summary: "No hydration-related warnings or exceptions were captured from the runtime console.", + evidence: ["Captured hydration issues: 0"], + confidence: "medium", + next_step: "If hydration is still suspected, reload the page from a clean state and inspect the first render again.", + raw_data: payload, + }); + } + + return createVerdict({ + verdict: "hydration_issues_detected", + summary: `${payload.summary.total} hydration signal(s) detected, led by a ${firstIssue.kind} issue in ${firstIssue.framework}.`, + evidence: [ + `First issue: ${firstIssue.message}`, + `By kind: ${Object.entries(payload.summary.byKind) + .filter(([, count]) => count > 0) + .map(([kind, count]) => `${kind}=${count}`) + .join(", ")}`, + `By level: ${Object.entries(payload.summary.byLevel) + .filter(([, count]) => count > 0) + .map(([level, count]) => `${level}=${count}`) + .join(", ")}`, + ], + confidence: firstIssue.level === "error" || firstIssue.kind !== "warning" ? "high" : "medium", + suspected_source: `${firstIssue.framework}:${firstIssue.kind}`, + next_step: "Inspect the first server/client mismatch and compare the initial render inputs across server and browser.", + raw_data: payload, + }); +} diff --git a/src/index.ts b/src/index.ts index f35b2e8..880dca2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -18,6 +18,7 @@ import { z } from "zod"; import { ok, err } from "./types.js"; import type { ToolResponse } from "./types.js"; import { browserManager, DEFAULT_CDP_ENDPOINT } from "./browser/index.js"; +import type { BrowserModePreference } from "./browser/protocol.js"; import { createServerInfoPayload, summarizeCapabilities, @@ -85,6 +86,7 @@ type CliCommand = type StartCommandOptions = { replayHeadless: boolean; cdpEndpoint: string; + browserMode: BrowserModePreference; verbose: boolean; }; @@ -245,6 +247,9 @@ function buildServerInfoResponse(): { capabilities: Record; capabilityDetails: ReturnType["capabilityDetails"]; capabilitiesByMode: ReturnType["capabilitiesByMode"]; + toolSelectionGuide: ReturnType["toolSelectionGuide"]; + recommendedWorkflows: ReturnType["recommendedWorkflows"]; + documentation: ReturnType["documentation"]; } { return { name: REACT_SENTINEL_NAME, @@ -275,7 +280,7 @@ function createServer(): McpServer { server.tool( "get_server_info", - "Returns metadata and planned capabilities of this React-Sentinel instance.", + "Return the honest React-Sentinel capability map plus a tool-selection guide that tells an agent when to prefer runtime investigation over grep or static file reading.", {}, async (): Promise => { try { @@ -312,21 +317,23 @@ export async function startServer(options?: StartCommandOptions): Promise browserManager.configureDefaults({ replayHeadless: options?.replayHeadless, cdpEndpoint: options?.cdpEndpoint, + browserMode: options?.browserMode, }); const server = createServer(); const transport = new StdioServerTransport(); await server.connect(transport); console.error( - `[react-sentinel] MCP server started (stdio transport, replay ${options?.replayHeadless === false ? "headed" : "headless"}, CDP ${browserManager.getDefaultCdpEndpoint()}) ✅` + `[react-sentinel] MCP server started (stdio transport, browser mode ${options?.browserMode ?? "replay"}, replay ${options?.replayHeadless === false ? "headed" : "headless"}, CDP ${browserManager.getDefaultCdpEndpoint()}) ✅` ); if (options?.verbose) { const payload = buildServerInfoResponse(); console.error( - `[react-sentinel] Verbose startup metadata ${JSON.stringify({ - command: "mcp", - transport: payload.transport, - replayDefault: options.replayHeadless === false ? "headed" : "headless", + `[react-sentinel] Verbose startup metadata ${JSON.stringify({ + command: "mcp", + transport: payload.transport, + browserMode: options?.browserMode ?? "replay", + replayDefault: options.replayHeadless === false ? "headed" : "headless", cdpEndpoint: browserManager.getDefaultCdpEndpoint(), capabilitySummary: summarizeCapabilities(payload.capabilities), capabilities: payload.capabilities, @@ -352,8 +359,8 @@ function formatHelp(): string { `Public npm package: ${REACT_SENTINEL_PUBLIC_PACKAGE_NAME}`, "", "Usage:", - " react-sentinel start [--headless|--headed] [--cdp-endpoint ]", - " react-sentinel mcp [--headless|--headed] [--cdp-endpoint ]", + " react-sentinel start [--headless|--headed] [--cdp-endpoint ] [--browser-mode ]", + " react-sentinel mcp [--headless|--headed] [--cdp-endpoint ] [--browser-mode ]", ` react-sentinel init-mcp [--client <${formatInitMcpClientList()}>] [--mode ]`, " react-sentinel init-agent-pack [--path ] [--mode ]", " react-sentinel install-agent-pack [--path ] [--mode ]", @@ -379,6 +386,7 @@ function formatHelp(): string { ` --cdp-endpoint Override the default Chrome DevTools endpoint (default: ${DEFAULT_CDP_ENDPOINT}).`, " --headed Start replay sessions in visible Chromium mode by default.", " --headless Force replay sessions to stay headless (default).", + " --browser-mode Choose replay, managed, or auto browser provisioning (default: replay).", " --verbose Print agent-friendly startup metadata to stderr.", " --json Print doctor results as JSON.", " --path Base directory scanned by detect-project (defaults to the current directory).", @@ -394,6 +402,7 @@ function formatHelp(): string { "", "Examples:", ` npx -y ${REACT_SENTINEL_PUBLIC_PACKAGE_NAME} mcp --headed`, + ` npx -y ${REACT_SENTINEL_PUBLIC_PACKAGE_NAME} mcp --browser-mode managed --headed`, ` npx -y ${REACT_SENTINEL_PUBLIC_PACKAGE_NAME} doctor --json`, " react-sentinel detect-project --path . --target-url http://127.0.0.1:3000 --json", " react-sentinel doctor --config-path ~/.config/Claude/claude_desktop_config.json", @@ -429,6 +438,7 @@ function parseStartOptions(args: string[]): { options: StartCommandOptions; help args, allowPositionals: false, options: { + "browser-mode": { type: "string" }, "cdp-endpoint": { type: "string" }, headless: { type: "boolean", default: false }, headed: { type: "boolean", default: false }, @@ -442,10 +452,16 @@ function parseStartOptions(args: string[]): { options: StartCommandOptions; help throw new Error("Choose either --headless or --headed, not both."); } + const browserMode = parsed.values["browser-mode"] ?? "replay"; + if (browserMode !== "auto" && browserMode !== "replay" && browserMode !== "managed") { + throw new Error('Invalid value for --browser-mode. Use "auto", "replay", or "managed".'); + } + return { options: { replayHeadless: parsed.values.headed ? false : true, cdpEndpoint: parseCdpEndpoint(parsed.values["cdp-endpoint"]), + browserMode: browserMode as BrowserModePreference, verbose: parsed.values.verbose, }, help: parsed.values.help, @@ -668,6 +684,7 @@ async function runDoctor(options: DoctorCommandOptions): Promise { } const attachCheck = await browserManager.getAttachStatus(options.cdpEndpoint); + const managedStatus = await browserManager.getManagedBrowserStatus(); const capabilitiesCheck = validateCapabilities(); let configCheck: | undefined @@ -749,6 +766,37 @@ async function runDoctor(options: DoctorCommandOptions): Promise { error: attachCheck.error, help: attachCheck.help, }, + managedBrowser: managedStatus.available + ? { + status: "pass", + active: managedStatus.active, + endpoint: managedStatus.endpoint, + launchCommand: managedStatus.launchCommand, + } + : { + status: "warn", + active: managedStatus.active, + endpoint: managedStatus.endpoint, + launchCommand: managedStatus.launchCommand, + reason: managedStatus.reason ?? "Managed Chromium is unavailable in this environment.", + }, + browserModeRecommendation: attachCheck.ready + ? { + recommended: "user-attach", + reason: "A user Chrome CDP endpoint is available. Keep attach mode gated behind select_attach_tab confirmation.", + fallback: managedStatus.available ? "managed" : "replay", + } + : managedStatus.available + ? { + recommended: "managed", + reason: "User Chrome CDP is unavailable, but React-Sentinel can launch an isolated managed Chromium with CDP enabled.", + fallback: "replay", + } + : { + recommended: "replay", + reason: "Live attach is unavailable and managed Chromium is not ready, so replay mode is the safest default.", + fallback: "replay", + }, capabilities: capabilitiesCheck, ...(configCheck ? { mcpConfig: configCheck } : {}), }, @@ -769,6 +817,10 @@ async function runDoctor(options: DoctorCommandOptions): Promise { report.checks.attachEndpoint.status === "pass" ? `PASS attach endpoint ready at ${report.checks.attachEndpoint.endpoint}` : `WARN attach endpoint ${report.checks.attachEndpoint.error}`, + report.checks.managedBrowser.status === "pass" + ? `PASS managed browser available via ${report.checks.managedBrowser.launchCommand}` + : `WARN managed browser ${report.checks.managedBrowser.reason}`, + `INFO recommended browser mode: ${report.checks.browserModeRecommendation.recommended} (${report.checks.browserModeRecommendation.reason})`, report.checks.capabilities.status === "pass" ? `PASS capability registry matches ${report.checks.capabilities.registeredTools.length} registered MCP tools` : "FAIL capability registry is inconsistent with the registered MCP tools", @@ -777,6 +829,10 @@ async function runDoctor(options: DoctorCommandOptions): Promise { if (report.checks.attachEndpoint.status !== "pass") { lines.push(`Hint: ${report.checks.attachEndpoint.help}`); } + if (report.checks.managedBrowser.status === "pass") { + lines.push(`Managed mode: ${report.checks.managedBrowser.launchCommand}`); + } + lines.push(`Fallback mode: ${report.checks.browserModeRecommendation.fallback}`); if (report.checks.capabilities.status === "fail") { for (const issue of report.checks.capabilities.issues) { diff --git a/src/tools/browser.ts b/src/tools/browser.ts index 995a02e..00a585e 100644 --- a/src/tools/browser.ts +++ b/src/tools/browser.ts @@ -33,6 +33,7 @@ const replayWaitUntilSchema = z export const BROWSER_TOOL_NAMES = [ "get_session_status", "navigate_replay", + "start_debug_replay", "get_attach_status", "browser_ping", "get_attach_tabs", @@ -46,9 +47,9 @@ export function register(server: McpServer): void { server.tool( "get_session_status", [ - "Return the current browser session mode used by React-Sentinel.", - "Reports whether tools will use the attached live tab or the isolated replay browser,", - "plus the current replay headless/headed configuration.", + "Return the current browser session mode used by React-Sentinel before you run runtime tools.", + "Use this instead of guessing from config files when you need to know whether the next tool will hit a live attached tab, the managed browser, or the isolated replay browser.", + "Reports the active mode plus the current replay headless/headed configuration.", ].join(" "), {}, async (): Promise => { @@ -66,9 +67,9 @@ export function register(server: McpServer): void { server.tool( "navigate_replay", [ - "Navigate the isolated replay browser to a target URL and wait for the application to load.", - "Supports configurable waitUntil, timeout, and headless/headed replay mode.", - "Returns readable navigation errors plus the active session metadata.", + "Navigate the isolated replay browser to a target URL and wait for the app to load in a clean, deterministic session.", + "Use this instead of manual clicking or grep when you first need a reproducible browser state for runtime diagnostics, assertions, or sandbox patches.", + "Supports configurable waitUntil, timeout, and headless/headed replay mode and returns readable navigation errors plus session metadata.", ].join(" "), { url: z.string().url().describe("Target URL for the replay browser."), @@ -93,15 +94,44 @@ export function register(server: McpServer): void { } ); + server.tool( + "start_debug_replay", + [ + "Action-oriented alias for navigate_replay that starts a clean replay debugging session for runtime investigation.", + "Prefer this when the agent wants an explicit 'start debugging in replay mode' entry point instead of a lower-level navigation name.", + ].join(" "), + { + url: z.string().url().describe("Target URL for the replay browser."), + waitUntil: replayWaitUntilSchema.describe("Playwright readiness event to wait for before returning."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds."), + headless: z.boolean().optional().describe("Override the replay browser mode for this navigation."), + resetSession: z.boolean().optional().default(false).describe("Close the current replay browser first and start a fresh isolated session."), + }, + async ({ url, waitUntil, timeoutMs, headless, resetSession }): Promise => { + try { + const result = await browserManager.navigateReplay(url, { + waitUntil, + timeoutMs, + headless, + resetSession, + }); + if ("error" in result) return err(result.error); + return ok(result); + } catch (e) { + return err(`start_debug_replay failed unexpectedly: ${String(e)}`); + } + } + ); + // ------------------------------------------------------------------------- // Tool: get_attach_status // ------------------------------------------------------------------------- server.tool( "get_attach_status", [ - "Check whether a Chrome instance exposes the CDP version endpoint at the", - "given URL. Returns a machine-readable attach readiness status", - "plus launch guidance when the endpoint is unavailable.", + "Check whether a Chrome instance exposes the CDP endpoint required for live attach mode.", + "Use this instead of reading docs or config when runtime debugging depends on an already-authenticated user session in a real Chrome tab.", + "Returns a machine-readable attach readiness status plus launch guidance when the endpoint is unavailable.", ].join(" "), { endpoint: z @@ -126,9 +156,9 @@ export function register(server: McpServer): void { server.tool( "browser_ping", [ - "Open an isolated browser context, navigate to a URL, and return page", - "metadata (title, URL, timestamp). Validates the MCP ↔ browser bridge.", - "Returns a structured error if the URL is unreachable.", + "Open an isolated browser context, navigate to a URL, and confirm that React-Sentinel can actually reach the target app.", + "Use this instead of assuming the app is up when you need a quick bridge smoke test before deeper runtime tools.", + "Returns page metadata plus a structured error if the URL is unreachable.", ].join(" "), { url: z @@ -154,9 +184,9 @@ export function register(server: McpServer): void { server.tool( "get_attach_tabs", [ - "List the CDP page tabs exposed by a Chrome instance.", - "Optional URL and title filters narrow the returned tab list.", - "Returns the currently selected tab when one has already been chosen.", + "List the live Chrome tabs exposed by a CDP endpoint so the agent can choose the right authenticated or user-prepared page.", + "Use this instead of guessing tab order when the runtime bug only reproduces in a real browser session.", + "Optional URL and title filters narrow the returned tab list and the response also shows the currently selected tab.", ].join(" "), { endpoint: z @@ -184,9 +214,9 @@ export function register(server: McpServer): void { server.tool( "select_attach_tab", [ - "Preview or confirm one CDP page tab by index, URL, or title.", - "A matched tab is not activated for live browser mode until confirm is true.", - "If no tab matches, the tool returns a structured 'not found' response.", + "Preview or confirm one live Chrome tab by index, URL, or title before React-Sentinel reuses it in attach mode.", + "Use this instead of brittle manual coordination when you must point runtime tools at the exact tab that already holds the right app state.", + "A matched tab is not activated until confirm is true, and missing tabs return a structured not-found response.", ].join(" "), { endpoint: z diff --git a/src/tools/diagnostics.ts b/src/tools/diagnostics.ts index 8415861..65368ad 100644 --- a/src/tools/diagnostics.ts +++ b/src/tools/diagnostics.ts @@ -8,7 +8,19 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { browserManager } from "../browser/index.js"; +import { + createExcessRenderDiagnosis, + createMemoBreakDiagnosis, + createRenderAttributionDiagnosis, + createRuntimeBugDiagnosis, +} from "../diagnostics/investigation.js"; import type { InspectionResponseMode } from "../diagnostics/protocol.js"; +import { + createAsyncTimelineVerdict, + createHydrationIssuesVerdict, + createRaceConditionVerdict, + createRenderHotspotsVerdict, +} from "../diagnostics/verdict.js"; import { ok, err } from "../types.js"; import type { ToolResponse } from "../types.js"; @@ -17,6 +29,22 @@ const inspectionResponseModeSchema = z .default("full") .describe("Choose 'compact' to aggressively trim long inspection payloads for AI consumption."); +const hotspotThresholdSchema = z + .number() + .int() + .min(1) + .max(100) + .optional() + .describe("Minimum renders inside the observation window before a component is treated as suspicious. Default is 8."); + +const hotspotWindowSchema = z + .number() + .int() + .min(100) + .max(30_000) + .optional() + .describe("Observation window in milliseconds used to detect rapid rerenders. Default is 1000ms."); + export const DIAGNOSTIC_TOOL_NAMES = [ "get_runtime_status", "get_react_tree", @@ -30,6 +58,10 @@ export const DIAGNOSTIC_TOOL_NAMES = [ "get_hydration_issues", "get_console_events", "get_runtime_timeline", + "diagnose_excess_renders", + "find_memo_breaks", + "diagnose_runtime_bug", + "attribute_render", ] as const; export function register(server: McpServer): void { @@ -39,10 +71,9 @@ export function register(server: McpServer): void { server.tool( "get_runtime_status", [ - "Navigate to a URL and return a full runtime diagnostic snapshot:", - "page title, URL, viewport dimensions, timestamp, and React detection", - "(version, fiber presence, devtools hook). Returns a structured error", - "if the URL is unreachable.", + "Navigate to a URL and return a full runtime diagnostic snapshot of what React-Sentinel can observe right now.", + "Use this instead of reading source files when the first question is whether React is mounted, which page is actually loaded, and whether the runtime bridge is healthy.", + "Returns page title, URL, viewport dimensions, timestamp, React detection, and a structured error if the URL is unreachable.", ].join(" "), { url: z @@ -104,10 +135,9 @@ export function register(server: McpServer): void { server.tool( "inspect_component", [ - "Search the React Fiber tree for a specific component by name and extract", - "its details for AI inspection, including props, path in the tree,", - "provider or consumed contexts, children count, and a compact summary.", - "Use responseMode='compact' when you want a shorter payload.", + "Search the React Fiber tree for a specific component by name and extract its live runtime details.", + "Use this instead of grep when the bug depends on the actual props, context wiring, or rendered position of a component in the current browser state.", + "Returns props, path in the tree, provider or consumed contexts, children count, and a compact summary. Use responseMode='compact' when you want a shorter payload.", ].join(" "), { url: z @@ -138,7 +168,7 @@ export function register(server: McpServer): void { "get_component_state", [ "Inspect a specific React component and return its serializable hook state.", - "Useful for checking simple useState/useRef/useMemo values without reading source code.", + "Use this instead of guessing from hooks source when you need the live value that actually kept a button disabled, an effect armed, or a branch hidden.", "Use responseMode='compact' when you want a shorter payload for AI analysis.", ].join(" "), { @@ -170,6 +200,7 @@ export function register(server: McpServer): void { "get_render_counts", [ "Return per-component render counters collected by the replay runtime monitor.", + "Use this instead of static code reading when you need proof that a component is actually rerendering far more often than expected in the reproduced browser flow.", "Each entry includes the component name, path, render count, and first/last observation timestamps.", ].join(" "), { @@ -202,8 +233,9 @@ export function register(server: McpServer): void { server.tool( "get_render_hotspots", [ - "List components that rendered too many times in a short window.", - "Use the threshold and window to detect likely render explosions and get a probable-cause hint.", + "Diagnose likely rerender explosions and return a verdict-first summary with evidence, confidence, and next_step.", + "Use this instead of grep when you need runtime proof that a render storm is happening and which component path is hottest.", + "raw_data still contains the detailed hotspot list when deeper inspection is needed.", ].join(" "), { url: z @@ -236,7 +268,7 @@ export function register(server: McpServer): void { try { const result = await browserManager.getRenderHotspots(url, threshold, windowMs, limit); if ("error" in result) return err(result.error); - return ok(result); + return ok(createRenderHotspotsVerdict(result)); } catch (e) { return err(`get_render_hotspots failed unexpectedly: ${String(e)}`); } @@ -291,8 +323,8 @@ export function register(server: McpServer): void { server.tool( "get_race_condition_diagnosis", [ - "Explain a likely UI race condition by comparing the final visible state with the recent async timeline.", - "Useful when a stale response may have overwritten a newer user intent.", + "Diagnose whether a stale async response likely overwrote newer UI intent.", + "Returns a verdict-first response with evidence, confidence, and next_step plus raw_data for the full trace.", ].join(" "), { url: z @@ -315,7 +347,7 @@ export function register(server: McpServer): void { try { const result = await browserManager.getRaceConditionDiagnosis(url, stateSelector, limit); if ("error" in result) return err(result.error); - return ok(result); + return ok(createRaceConditionVerdict(result)); } catch (e) { return err(`get_race_condition_diagnosis failed unexpectedly: ${String(e)}`); } @@ -328,8 +360,8 @@ export function register(server: McpServer): void { server.tool( "get_async_timeline", [ - "Return an async timeline derived from the captured fetch/XHR lifecycle.", - "Useful for spotting concurrent requests, slow operations, and completion order inversions.", + "Diagnose async request ordering and latency patterns from captured fetch/XHR activity.", + "Returns a verdict-first summary while preserving the full timeline in raw_data.", ].join(" "), { url: z @@ -348,7 +380,7 @@ export function register(server: McpServer): void { try { const result = await browserManager.getAsyncTimeline(url, limit); if ("error" in result) return err(result.error); - return ok(result); + return ok(createAsyncTimelineVerdict(result)); } catch (e) { return err(`get_async_timeline failed unexpectedly: ${String(e)}`); } @@ -361,8 +393,8 @@ export function register(server: McpServer): void { server.tool( "get_hydration_issues", [ - "Return normalized hydration-related warnings and exceptions captured from the runtime console.", - "Each entry is tagged as hydration and classified to help separate SSR/client mismatch issues from other failures.", + "Diagnose server/client hydration failures from runtime console signals and return a verdict-first summary.", + "raw_data preserves the normalized hydration entries for detailed inspection.", ].join(" "), { url: z @@ -381,7 +413,7 @@ export function register(server: McpServer): void { try { const result = await browserManager.getHydrationIssues(url, limit); if ("error" in result) return err(result.error); - return ok(result); + return ok(createHydrationIssuesVerdict(result)); } catch (e) { return err(`get_hydration_issues failed unexpectedly: ${String(e)}`); } @@ -440,4 +472,258 @@ export function register(server: McpServer): void { } } ); + + server.tool( + "diagnose_excess_renders", + [ + "High-level render investigation for replay-mode React bugs.", + "Use this instead of manually chaining atomic render tools when the question is 'why is this rerendering so much?' rather than 'show me raw counters'.", + "Orchestrates runtime status, render counts, hotspots, hook changes, and component inspection to explain why a component rerenders too often.", + ].join(" "), + { + url: z.string().url().describe("URL of the page to inspect."), + componentName: z.string().min(1).optional().describe("Optional component to focus on. When omitted, React-Sentinel diagnoses the top hotspot."), + threshold: hotspotThresholdSchema, + windowMs: hotspotWindowSchema, + limit: z.number().int().min(1).max(100).optional().describe("Maximum number of render counters and hotspots to inspect. Default is 20."), + }, + async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { + try { + const startedAt = Date.now(); + const collectionStartedAt = Date.now(); + const [runtimeStatus, renderCounts, renderHotspots] = await Promise.all([ + browserManager.getRuntimeStatus(url), + browserManager.getRenderCounts(url, limit), + browserManager.getRenderHotspots(url, threshold, windowMs, limit), + ]); + const collectionMs = Date.now() - collectionStartedAt; + if ("error" in runtimeStatus) return err(runtimeStatus.error); + if ("error" in renderCounts) return err(renderCounts.error); + if ("error" in renderHotspots) return err(renderHotspots.error); + + const target = componentName ?? renderHotspots.hotspots[0]?.componentName ?? undefined; + const targetPathText = + renderHotspots.hotspots.find((entry) => (target ? entry.componentName === target : false))?.pathText; + const followUpStartedAt = Date.now(); + const [hookChanges, inspection] = target + ? await Promise.all([ + browserManager.getHookChanges(url, target, targetPathText, 50), + browserManager.inspectComponent(url, target, "compact"), + ]) + : [undefined, undefined]; + const followUpMs = target ? Date.now() - followUpStartedAt : 0; + + if (hookChanges && "error" in hookChanges) return err(hookChanges.error); + if (inspection && "error" in inspection) return err(inspection.error); + + const diagnosis = createExcessRenderDiagnosis({ + componentName, + runtimeStatus, + renderCounts, + renderHotspots, + ...(hookChanges ? { hookChanges } : {}), + ...(inspection ? { inspection } : {}), + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + collectionMs, + followUpMs, + }, + }); + } catch (e) { + return err(`diagnose_excess_renders failed unexpectedly: ${String(e)}`); + } + } + ); + + server.tool( + "find_memo_breaks", + [ + "High-level investigation that searches for likely React memo breaks or context cascades.", + "Use this instead of grep when you need runtime evidence that unstable props or provider churn are breaking memoization in the reproduced flow.", + "Combines render hotspots, hook churn, and component inspection so the caller gets a verdict instead of raw render data.", + ].join(" "), + { + url: z.string().url().describe("URL of the page to inspect."), + componentName: z.string().min(1).optional().describe("Optional component to inspect directly. When omitted, React-Sentinel picks the strongest hotspot candidate."), + threshold: hotspotThresholdSchema, + windowMs: hotspotWindowSchema, + limit: z.number().int().min(1).max(100).optional().describe("Maximum number of hotspots to inspect. Default is 20."), + }, + async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { + try { + const startedAt = Date.now(); + const hotspotStartedAt = Date.now(); + const renderHotspots = await browserManager.getRenderHotspots(url, threshold, windowMs, limit); + const hotspotMs = Date.now() - hotspotStartedAt; + if ("error" in renderHotspots) return err(renderHotspots.error); + + const target = + componentName ?? + renderHotspots.hotspots.find((entry) => entry.probableCause.type === "prop_diff")?.componentName ?? + renderHotspots.hotspots[0]?.componentName ?? + undefined; + const targetPathText = + renderHotspots.hotspots.find((entry) => (target ? entry.componentName === target : false))?.pathText; + const followUpStartedAt = Date.now(); + const [hookChanges, inspection] = target + ? await Promise.all([ + browserManager.getHookChanges(url, target, targetPathText, 50), + browserManager.inspectComponent(url, target, "compact"), + ]) + : [undefined, undefined]; + const followUpMs = target ? Date.now() - followUpStartedAt : 0; + + if (hookChanges && "error" in hookChanges) return err(hookChanges.error); + if (inspection && "error" in inspection) return err(inspection.error); + + const diagnosis = createMemoBreakDiagnosis({ + componentName, + renderHotspots, + ...(hookChanges ? { hookChanges } : {}), + ...(inspection ? { inspection } : {}), + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + hotspotMs, + followUpMs, + }, + }); + } catch (e) { + return err(`find_memo_breaks failed unexpectedly: ${String(e)}`); + } + } + ); + + server.tool( + "attribute_render", + [ + "Explain why a specific React component rendered by attributing the strongest runtime cause.", + "Use this instead of static code inspection when you need the strongest live explanation for one render: props, state, context, provider, hooks, or parent churn.", + "Uses render hotspots, hook churn, and component inspection to surface the strongest cause with evidence and next steps.", + ].join(" "), + { + url: z.string().url().describe("URL of the page to inspect."), + componentName: z.string().min(1).describe("React component name to attribute."), + threshold: hotspotThresholdSchema, + windowMs: hotspotWindowSchema, + limit: z.number().int().min(1).max(100).optional().describe("Maximum number of hotspots to inspect. Default is 20."), + }, + async ({ url, componentName, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { + try { + const startedAt = Date.now(); + const hotspotStartedAt = Date.now(); + const renderHotspots = await browserManager.getRenderHotspots(url, threshold, windowMs, limit); + const hotspotMs = Date.now() - hotspotStartedAt; + if ("error" in renderHotspots) return err(renderHotspots.error); + + const targetPathText = + renderHotspots.hotspots.find((entry) => entry.componentName === componentName || entry.pathText.split(" > ").includes(componentName)) + ?.pathText; + if (!targetPathText) { + const diagnosis = createRenderAttributionDiagnosis({ + componentName, + renderHotspots, + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + hotspotMs, + followUpMs: 0, + }, + }); + } + + const followUpStartedAt = Date.now(); + const [hookChanges, inspection] = await Promise.all([ + browserManager.getHookChanges(url, componentName, targetPathText, 50), + browserManager.inspectComponent(url, componentName, "compact"), + ]); + const followUpMs = Date.now() - followUpStartedAt; + if ("error" in hookChanges) return err(hookChanges.error); + if ("error" in inspection) return err(inspection.error); + + const diagnosis = createRenderAttributionDiagnosis({ + componentName, + renderHotspots, + hookChanges, + inspection, + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + hotspotMs, + followUpMs, + }, + }); + } catch (e) { + return err(`attribute_render failed unexpectedly: ${String(e)}`); + } + } + ); + + server.tool( + "diagnose_runtime_bug", + [ + "High-level entry point for vague runtime symptoms such as stale UI, random errors, hydration failures, or unexplained slowness.", + "Use this instead of bouncing between grep, console logs, and ad-hoc probes when you need the fastest verdict-first answer for a browser bug.", + "Orchestrates console, hydration, async, and render diagnostics and returns the strongest verdict first.", + ].join(" "), + { + url: z.string().url().describe("URL of the page to inspect."), + symptom: z.string().min(3).max(200).describe("Short natural-language description of the runtime symptom to bias the diagnosis."), + stateSelector: z.string().min(1).optional().describe("Optional CSS selector that exposes the final visible state when race conditions are suspected."), + threshold: hotspotThresholdSchema, + windowMs: hotspotWindowSchema, + limit: z.number().int().min(1).max(100).optional().describe("Maximum number of async and render events to inspect. Default is 20."), + }, + async ({ url, symptom, stateSelector, threshold = 8, windowMs = 1000, limit = 20 }): Promise => { + try { + const startedAt = Date.now(); + const [runtimeStatus, consoleEvents, hydrationIssues, asyncTimeline, renderHotspots, raceCondition] = await Promise.all([ + browserManager.getRuntimeStatus(url), + browserManager.getConsoleEvents(url), + browserManager.getHydrationIssues(url, limit), + browserManager.getAsyncTimeline(url, limit), + browserManager.getRenderHotspots(url, threshold, windowMs, limit), + stateSelector ? browserManager.getRaceConditionDiagnosis(url, stateSelector, limit) : Promise.resolve(undefined), + ]); + if ("error" in runtimeStatus) return err(runtimeStatus.error); + if ("error" in consoleEvents) return err(consoleEvents.error); + if ("error" in hydrationIssues) return err(hydrationIssues.error); + if ("error" in asyncTimeline) return err(asyncTimeline.error); + if ("error" in renderHotspots) return err(renderHotspots.error); + if (raceCondition && "error" in raceCondition) return err(raceCondition.error); + + const diagnosis = createRuntimeBugDiagnosis({ + symptom, + runtimeStatus, + consoleEvents, + hydrationIssues, + asyncTimeline, + renderHotspots, + ...(raceCondition ? { raceCondition } : {}), + }); + + return ok({ + ...diagnosis, + timing: { + totalMs: Date.now() - startedAt, + }, + }); + } catch (e) { + return err(`diagnose_runtime_bug failed unexpectedly: ${String(e)}`); + } + } + ); } diff --git a/src/tools/interaction.ts b/src/tools/interaction.ts index dba99b5..87c744d 100644 --- a/src/tools/interaction.ts +++ b/src/tools/interaction.ts @@ -7,7 +7,8 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { browserManager } from "../browser/index.js"; -import type { Assertion, ValidationScenarioResponse } from "../browser/protocol.js"; +import type { Assertion, ReplayStep, ValidationScenarioResponse } from "../browser/protocol.js"; +import type { DiagnosticVerdict } from "../diagnostics/protocol.js"; import { ok, err } from "../types.js"; import type { ToolResponse } from "../types.js"; @@ -97,6 +98,213 @@ export const assertionSchema = z.discriminatedUnion("type", [ }), ]); +const stressTimingStrategySchema = z + .enum(["none", "adversarial"]) + .default("adversarial"); + +const stressDelayProfileSchema = z + .array(z.number().int().min(0).max(5_000)) + .min(1) + .max(12) + .optional(); + +const scenarioValidationToolSchema = { + url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), + assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), + headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), + waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().default("domcontentloaded").describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + resetSession: z.boolean().optional().default(false).describe("Close the current replay browser first and start a fresh isolated session."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), +}; + +type ScenarioValidationToolArgs = { + url?: string; + steps: ReplayStep[]; + assertions: Assertion[]; + headless?: boolean; + waitUntil: "load" | "domcontentloaded" | "networkidle"; + timeoutMs: number; + resetSession: boolean; + continueOnError: boolean; + waitMs: number; +}; + +async function runScenarioValidationTool(args: ScenarioValidationToolArgs, toolName: string): Promise { + const { url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs } = args; + try { + const result = await browserManager.runValidationScenario(steps, assertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession, + continueOnError, + waitMs, + }); + if ("error" in result) return err(result.error); + + return ok({ + report: result, + reportMarkdown: buildScenarioMarkdown(result), + }); + } catch (e) { + return err(`${toolName} failed unexpectedly: ${String(e)}`); + } +} + +type StressIterationResult = { + iteration: number; + delaysMs: number[]; + success: boolean; + failureReasons: string[]; + report: ValidationScenarioResponse; +}; + +type StressTestVerdict = + | "stress_test_passed" + | "intermittent_failure_detected"; + +type StressTestRawData = { + iterations: StressIterationResult[]; + minimal_reproduction?: ReplayStep[]; +}; + +function countAdversarialGaps(steps: ReplayStep[]): number { + return steps.filter((step) => step.action !== "wait").length; +} + +function createWaitStep(durationMs: number): ReplayStep { + return { action: "wait", durationMs }; +} + +function buildDelaySchedule( + iteration: number, + slots: number, + strategy: "none" | "adversarial", + profile: number[] +): number[] { + if (strategy === "none") { + return Array.from({ length: slots }, () => 0); + } + + return Array.from({ length: slots }, (_, index) => profile[(iteration + index) % profile.length] ?? 0); +} + +function injectAdversarialWaits(steps: ReplayStep[], delaysMs: number[]): ReplayStep[] { + if (delaysMs.length === 0) { + return [...steps]; + } + + const expanded: ReplayStep[] = []; + let delayIndex = 0; + + for (const step of steps) { + expanded.push(step); + if (step.action === "wait") { + continue; + } + + const delayMs = delaysMs[delayIndex] ?? 0; + delayIndex += 1; + if (delayMs > 0) { + expanded.push(createWaitStep(delayMs)); + } + } + + return expanded; +} + +function summarizeFailureReasons(report: ValidationScenarioResponse): string[] { + const stepFailures = report.steps + .filter((step) => !step.success) + .map((step) => `Step #${step.index} ${step.step.action} failed${step.error ? `: ${step.error}` : "."}`); + const assertionFailures = report.assertions + .filter((assertion) => !assertion.pass) + .map((assertion) => assertion.details ?? "Assertion failed."); + + return [...stepFailures, ...assertionFailures]; +} + +async function minimizeFailingSequence( + steps: ReplayStep[], + assertions: Assertion[], + options: { + url?: string; + headless?: boolean; + waitUntil?: "load" | "domcontentloaded" | "networkidle"; + timeoutMs?: number; + continueOnError?: boolean; + waitMs?: number; + } +): Promise { + let current = [...steps]; + let changed = true; + + while (changed && current.length > 1) { + changed = false; + + for (let index = 0; index < current.length; index += 1) { + const candidate = current.filter((_, candidateIndex) => candidateIndex !== index); + const report = await browserManager.runValidationScenario(candidate, assertions, { + ...options, + resetSession: true, + }); + if ("error" in report || report.success) { + continue; + } + + current = candidate; + changed = true; + break; + } + } + + return current.length === steps.length ? null : current; +} + +function createStressTestVerdict(seed: { + iterations: StressIterationResult[]; + minimalReproduction?: ReplayStep[] | null; +}): DiagnosticVerdict { + const failedIterations = seed.iterations.filter((iteration) => !iteration.success); + const raw_data: StressTestRawData = { + iterations: seed.iterations, + ...(seed.minimalReproduction ? { minimal_reproduction: seed.minimalReproduction } : {}), + }; + + if (failedIterations.length === 0) { + return { + verdict: "stress_test_passed", + summary: `All ${seed.iterations.length} stress iterations passed without reproducing the target inconsistency.`, + evidence: [ + `Passed iterations: ${seed.iterations.length}/${seed.iterations.length}`, + ], + confidence: "medium", + next_step: "Increase the iteration count or widen the adversarial delay profile if the bug is rarer than this run.", + raw_data, + }; + } + + const firstFailure = failedIterations[0]; + return { + verdict: "intermittent_failure_detected", + summary: `${failedIterations.length}/${seed.iterations.length} stress iterations failed, which confirms an intermittent runtime bug under adversarial timing.`, + evidence: [ + `First failing iteration: #${firstFailure.iteration + 1}`, + `Failure reasons: ${firstFailure.failureReasons.join(" | ") || "Assertion failed without extra details."}`, + `Minimal reproduction found: ${seed.minimalReproduction ? "yes" : "no"}`, + ], + confidence: failedIterations.length >= 2 ? "high" : "medium", + next_step: seed.minimalReproduction + ? "Replay the minimized failing sequence or feed it into verify_hypothesis / verify_fix." + : "Inspect the failing iteration trace and tighten assertions around the inconsistent state.", + raw_data, + }; +} + function formatAssertion(assertion: Assertion): string { switch (assertion.type) { case "text_present": @@ -178,7 +386,9 @@ export const INTERACTION_TOOL_NAMES = [ "simulate_interaction", "validate_after_action", "validate_scenario", + "validate_user_flow", "replay_interactions", + "find_race_conditions", ] as const; export function register(server: McpServer): void { @@ -188,8 +398,8 @@ export function register(server: McpServer): void { server.tool( "simulate_interaction", [ - "Simulates a user interaction (click, type, or fill) on the target page.", - "Useful for reproducing bugs or exploring the application state after interaction.", + "Simulate a single user interaction on the target page.", + "Use this instead of reasoning from source code alone when the next UI state depends on an actual click, type, fill, or keypress in the browser.", "Requires a valid CSS selector and target URL.", ].join(" "), { @@ -215,8 +425,8 @@ export function register(server: McpServer): void { server.tool( "validate_after_action", [ - "Performs an interaction followed by a validation assertion in a single flow.", - "Useful for experimental validation: 'If I click this, does the error disappear?' or 'Does the text X appear?'.", + "Perform one interaction and immediately validate the resulting runtime state in a single flow.", + "Use this instead of manual reproduction when you need a tight pass/fail answer such as 'if I click this, does the error disappear?' or 'does text X appear?'.", ].join(" "), { url: z.string().url().describe("The URL of the page."), @@ -262,40 +472,21 @@ export function register(server: McpServer): void { "validate_scenario", [ "Replay a deterministic action sequence and evaluate multiple assertions in one pass.", + "Use this instead of grep or ad-hoc clicking when a bug only appears after several browser actions and you need a reproducible runtime verdict.", "Returns both a structured JSON report and a Markdown report with actions, assertions, and useful traces.", ].join(" "), - { - url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), - steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), - assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), - headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), - waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().default("domcontentloaded").describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - resetSession: z.boolean().optional().default(false).describe("Close the current replay browser first and start a fresh isolated session."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - }, - async ({ url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs }): Promise => { - try { - const result = await browserManager.runValidationScenario(steps, assertions, { - url, - headless, - waitUntil, - timeoutMs, - resetSession, - continueOnError, - waitMs, - }); - if ("error" in result) return err(result.error); + scenarioValidationToolSchema, + async (args): Promise => runScenarioValidationTool(args as ScenarioValidationToolArgs, "validate_scenario") + ); - return ok({ - report: result, - reportMarkdown: buildScenarioMarkdown(result), - }); - } catch (e) { - return err(`validate_scenario failed unexpectedly: ${String(e)}`); - } - } + server.tool( + "validate_user_flow", + [ + "Action-oriented alias for validate_scenario that checks whether a user flow still works end-to-end.", + "Prefer this when the agent is thinking in terms of user journeys rather than generic scenario validation.", + ].join(" "), + scenarioValidationToolSchema, + async (args): Promise => runScenarioValidationTool(args as ScenarioValidationToolArgs, "validate_user_flow") ); // ------------------------------------------------------------------------- @@ -305,8 +496,8 @@ export function register(server: McpServer): void { "replay_interactions", [ "Replay a deterministic sequence of browser actions inside the isolated replay session.", + "Use this instead of manual reproduction when you need React-Sentinel to execute the exact same interaction sequence every time before deeper diagnostics.", "Supports click, type, fill, wait, and press steps and logs the result of each step.", - "Provide a URL to navigate before the replay, or omit it to reuse the current replay page.", ].join(" "), { url: z.string().url().optional().describe("Optional URL to open in the replay browser before the sequence runs."), @@ -334,4 +525,92 @@ export function register(server: McpServer): void { } } ); + + server.tool( + "find_race_conditions", + [ + "Stress-test a replay scenario across multiple iterations with optional adversarial delays between actions.", + "Use this instead of guessing from code when the bug is intermittent and only appears under unlucky runtime timing.", + "Returns pass/fail per iteration, highlights intermittent failures, and attempts to shrink the first failing sequence into a minimal reproduction.", + ].join(" "), + { + url: z.string().url().optional().describe("Optional URL to open in the replay browser before each iteration."), + steps: z.array(replayStepSchema).min(1).describe("Base replay steps. React-Sentinel may inject extra wait steps between actions when adversarial timing is enabled."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that define the inconsistent state to catch."), + iterations: z.number().int().min(1).max(25).optional().default(7).describe("How many replay iterations to execute."), + timingStrategy: stressTimingStrategySchema.describe("Choose 'adversarial' to vary delays between interactions across iterations."), + delayProfileMs: stressDelayProfileSchema.describe("Optional delay profile in milliseconds. Defaults to 0, 25, 75, 150, 300, 600."), + headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), + waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().default("domcontentloaded").describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + minimizeFailure: z.boolean().optional().default(true).describe("Attempt to shrink the first failing sequence into a smaller reproduction."), + }, + async ({ + url, + steps, + assertions, + iterations, + timingStrategy, + delayProfileMs, + headless, + waitUntil, + timeoutMs, + continueOnError, + waitMs, + minimizeFailure, + }): Promise => { + try { + const delayProfile = delayProfileMs ?? [0, 25, 75, 150, 300, 600]; + const gaps = countAdversarialGaps(steps); + const iterationResults: StressIterationResult[] = []; + + for (let iteration = 0; iteration < iterations; iteration += 1) { + const delaysMs = buildDelaySchedule(iteration, gaps, timingStrategy, delayProfile); + const iterationSteps = injectAdversarialWaits(steps, delaysMs); + const report = await browserManager.runValidationScenario(iterationSteps, assertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in report) return err(report.error); + + iterationResults.push({ + iteration, + delaysMs, + success: report.success, + failureReasons: summarizeFailureReasons(report), + report, + }); + } + + const firstFailure = iterationResults.find((iteration) => !iteration.success) ?? null; + const minimalReproduction = + minimizeFailure && firstFailure + ? await minimizeFailingSequence(firstFailure.report.steps.map((step) => step.step), assertions, { + url, + headless, + waitUntil, + timeoutMs, + continueOnError, + waitMs, + }) + : null; + + return ok( + createStressTestVerdict({ + iterations: iterationResults, + minimalReproduction, + }) + ); + } catch (e) { + return err(`find_race_conditions failed unexpectedly: ${String(e)}`); + } + } + ); } diff --git a/src/tools/network.ts b/src/tools/network.ts index 52c9ecf..34d5f33 100644 --- a/src/tools/network.ts +++ b/src/tools/network.ts @@ -16,8 +16,9 @@ export function register(server: McpServer): void { server.tool( "get_network_events", [ - "Returns the recent network events captured from fetch and XMLHttpRequest.", - "Includes a summary that highlights HTTP errors (4xx/5xx) for quick AI diagnostics.", + "Return the recent network events captured from fetch and XMLHttpRequest.", + "Use this instead of grep when the visible UI bug may actually be explained by failing, missing, duplicate, or late runtime requests.", + "Includes a summary that highlights HTTP errors (4xx/5xx) for quick diagnostics.", ].join(" "), { url: z.string().url().describe("URL of the page to inspect."), diff --git a/src/tools/patch.ts b/src/tools/patch.ts index fb739b5..edb24e1 100644 --- a/src/tools/patch.ts +++ b/src/tools/patch.ts @@ -4,10 +4,14 @@ import { browserManager } from "../browser/index.js"; import type { Assertion, PatchedValidationScenarioResponse, + ReplayStep, RuntimePatch, RuntimePatchApplyResponse, RuntimePatchResetResponse, + ValidationResult, + ValidationScenarioResponse, } from "../browser/protocol.js"; +import type { DiagnosticVerdict } from "../diagnostics/protocol.js"; import { ok, err } from "../types.js"; import type { ToolResponse } from "../types.js"; import { assertionSchema, buildScenarioMarkdown, replayStepSchema } from "./interaction.js"; @@ -30,6 +34,148 @@ const replayWaitUntilSchema = z const resetStrategySchema = z.enum(["reload", "reset_session"]); +type VerificationVerdict = "CONFIRMED" | "REFUTED" | "PARTIAL"; + +type HypothesisVerificationRawData = { + hypothesis: string; + report: ValidationScenarioResponse; +}; + +type FixVerificationRawData = { + fix_description: string; + baseline: ValidationScenarioResponse; + patched: PatchedValidationScenarioResponse; + regression_assertions: Assertion[]; +}; + +function countAssertionFailures(results: ValidationResult[]): number { + return results.filter((result) => !result.pass).length; +} + +function buildHypothesisMarkdown( + hypothesis: string, + verdict: VerificationVerdict, + report: ValidationScenarioResponse +): string { + return [ + "# Hypothesis Verification Report", + "", + `- Hypothesis: ${hypothesis}`, + `- Verdict: ${verdict}`, + "", + buildScenarioMarkdown(report), + ].join("\n"); +} + +function buildFixVerificationMarkdown( + fixDescription: string, + verdict: VerificationVerdict, + baseline: ValidationScenarioResponse, + patched: PatchedValidationScenarioResponse, + regressionAssertions: Assertion[] +): string { + const lines = [ + "# Fix Verification Report", + "", + `- Fix: ${fixDescription}`, + `- Verdict: ${verdict}`, + `- Regression assertions: ${regressionAssertions.length}`, + "", + "## Baseline", + buildScenarioMarkdown(baseline), + "", + "## Patched Run", + buildPatchMarkdown(patched.verdict, patched.apply, patched.report, patched.cleanup), + ]; + + return lines.join("\n"); +} + +function createHypothesisVerdict(seed: { + hypothesis: string; + report: ValidationScenarioResponse; +}): DiagnosticVerdict { + const stepFailures = seed.report.steps.filter((step) => !step.success).length; + const assertionFailures = countAssertionFailures(seed.report.assertions); + const verdict: VerificationVerdict = + stepFailures === 0 && assertionFailures === 0 + ? "CONFIRMED" + : stepFailures === 0 && assertionFailures === seed.report.assertions.length + ? "REFUTED" + : "PARTIAL"; + + return { + verdict, + summary: + verdict === "CONFIRMED" + ? `The runtime evidence confirms the hypothesis: ${seed.hypothesis}` + : verdict === "REFUTED" + ? `The runtime evidence does not support the hypothesis: ${seed.hypothesis}` + : `The runtime evidence only partially supports the hypothesis: ${seed.hypothesis}`, + evidence: [ + `Failed steps: ${stepFailures}`, + `Failed assertions: ${assertionFailures}/${seed.report.assertions.length}`, + ], + confidence: verdict === "PARTIAL" ? "medium" : "high", + next_step: + verdict === "CONFIRMED" + ? "Use the failing evidence to design or verify a targeted fix." + : verdict === "REFUTED" + ? "Refine the hypothesis or change the reproduction protocol before editing source code." + : "Tighten the assertions or reproduction steps to make the result decisive.", + raw_data: { + hypothesis: seed.hypothesis, + report: seed.report, + }, + }; +} + +function createFixVerdict(seed: { + fixDescription: string; + baseline: ValidationScenarioResponse; + patched: PatchedValidationScenarioResponse; + regressionAssertions: Assertion[]; +}): DiagnosticVerdict { + const targetAssertionSliceEnd = Math.max(seed.baseline.assertions.length - seed.regressionAssertions.length, 0); + const baselineTargetFailures = countAssertionFailures(seed.baseline.assertions.slice(0, targetAssertionSliceEnd)); + const patchedTargetFailures = countAssertionFailures(seed.patched.report.assertions.slice(0, targetAssertionSliceEnd)); + const regressionFailureCount = countAssertionFailures(seed.patched.report.assertions.slice(targetAssertionSliceEnd)); + const verdict: VerificationVerdict = + baselineTargetFailures > 0 && patchedTargetFailures === 0 && regressionFailureCount === 0 + ? "CONFIRMED" + : patchedTargetFailures >= baselineTargetFailures + ? "REFUTED" + : "PARTIAL"; + + return { + verdict, + summary: + verdict === "CONFIRMED" + ? `The patch fixes the targeted runtime issue without visible regressions: ${seed.fixDescription}` + : verdict === "REFUTED" + ? `The patch does not resolve the target issue convincingly: ${seed.fixDescription}` + : `The patch improves the target issue but leaves uncertainty or visible regressions: ${seed.fixDescription}`, + evidence: [ + `Baseline target assertion failures: ${baselineTargetFailures}`, + `Patched target assertion failures: ${patchedTargetFailures}`, + `Patched regression failures: ${regressionFailureCount}`, + ], + confidence: verdict === "PARTIAL" ? "medium" : "high", + next_step: + verdict === "CONFIRMED" + ? "Promote the runtime patch into a source change or validate it against a broader regression suite." + : verdict === "REFUTED" + ? "Revise the patch because the runtime assertions still fail or did not improve." + : "Inspect the remaining failed assertions and regression signals before deciding whether to keep the patch.", + raw_data: { + fix_description: seed.fixDescription, + baseline: seed.baseline, + patched: seed.patched, + regression_assertions: seed.regressionAssertions, + }, + }; +} + function buildPatchMarkdown( verdict: PatchedValidationScenarioResponse["verdict"], apply: RuntimePatchApplyResponse, @@ -66,10 +212,273 @@ function buildPatchMarkdown( return lines.join("\n"); } +const patchValidationToolSchema = { + patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser after the init script is registered."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute after the patch is active."), + assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), + headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + resetSession: z.boolean().optional().default(false).describe("Close the current replay sandbox first and start from a clean browser session."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after the patched validation flow."), + reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), +}; + +type PatchValidationToolArgs = { + patch: RuntimePatch; + url?: string; + steps: ReplayStep[]; + assertions: Assertion[]; + headless?: boolean; + waitUntil: "load" | "domcontentloaded" | "networkidle"; + timeoutMs: number; + resetSession: boolean; + continueOnError: boolean; + waitMs: number; + cleanup: "keep" | "reload" | "reset_session"; + reopenUrl?: string; +}; + +const hypothesisVerificationToolSchema = { + hypothesis: z.string().min(3).max(500).describe("Hypothesis to validate against runtime behavior."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser before verification."), + steps: z.array(replayStepSchema).min(1).describe("Replay protocol used to test the hypothesis."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that should hold if the hypothesis is correct."), + headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), +}; + +type HypothesisVerificationToolArgs = { + hypothesis: string; + url?: string; + steps: ReplayStep[]; + assertions: Assertion[]; + headless?: boolean; + waitUntil: "load" | "domcontentloaded" | "networkidle"; + timeoutMs: number; + continueOnError: boolean; + waitMs: number; +}; + +const fixVerificationToolSchema = { + fixDescription: z.string().min(3).max(500).describe("Short description of the fix that the runtime patch is supposed to validate."), + patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), + url: z.string().url().optional().describe("Optional URL to open in the replay browser before the scenario runs."), + steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute before assertions."), + assertions: z.array(assertionSchema).min(1).describe("Assertions that should pass after the fix is applied."), + regressionAssertions: z.array(assertionSchema).optional().default([]).describe("Optional guard assertions that should remain true before and after the patch."), + headless: z.boolean().optional().describe("Override the replay browser mode for this verification."), + waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), + timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), + continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), + waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), + cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after patch verification."), + reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), +}; + +type FixVerificationToolArgs = { + fixDescription: string; + patch: RuntimePatch; + url?: string; + steps: ReplayStep[]; + assertions: Assertion[]; + regressionAssertions: Assertion[]; + headless?: boolean; + waitUntil: "load" | "domcontentloaded" | "networkidle"; + timeoutMs: number; + continueOnError: boolean; + waitMs: number; + cleanup: "keep" | "reload" | "reset_session"; + reopenUrl?: string; +}; + +async function runPatchValidationTool(args: PatchValidationToolArgs, toolName: string): Promise { + const { patch, url, steps, assertions, headless, waitUntil, timeoutMs, resetSession, continueOnError, waitMs, cleanup, reopenUrl } = args; + try { + const applyResult = await browserManager.applyRuntimePatch(patch, { + url, + headless, + waitUntil, + timeoutMs, + resetSession, + }); + if ("error" in applyResult) return err(applyResult.error); + + const report = await browserManager.runValidationScenario(steps, assertions, { + headless, + continueOnError, + waitMs, + }); + if ("error" in report) { + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup, + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) { + return err( + `${report.error} Cleanup after validation failure also failed: ${cleanupResult.error}. Runtime patches may still be active.` + ); + } + } + return err(report.error); + } + + const response: PatchedValidationScenarioResponse = { + verdict: report.success ? "patch_validated" : "patch_failed", + apply: applyResult, + report, + }; + + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup, + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) return err(cleanupResult.error); + response.cleanup = cleanupResult; + } + + return ok({ + ...response, + reportMarkdown: buildPatchMarkdown(response.verdict, response.apply, response.report, response.cleanup), + }); + } catch (error) { + return err(`${toolName} failed unexpectedly: ${String(error)}`); + } +} + +async function runHypothesisVerificationTool( + args: HypothesisVerificationToolArgs, + toolName: string +): Promise { + const { hypothesis, url, steps, assertions, headless, waitUntil, timeoutMs, continueOnError, waitMs } = args; + try { + const report = await browserManager.runValidationScenario(steps, assertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in report) return err(report.error); + + const response = createHypothesisVerdict({ + hypothesis, + report, + }); + + return ok({ + ...response, + reportMarkdown: buildHypothesisMarkdown(hypothesis, response.verdict, report), + }); + } catch (error) { + return err(`${toolName} failed unexpectedly: ${String(error)}`); + } +} + +async function runFixVerificationTool(args: FixVerificationToolArgs, toolName: string): Promise { + const { fixDescription, patch, url, steps, assertions, regressionAssertions, headless, waitUntil, timeoutMs, continueOnError, waitMs, cleanup, reopenUrl } = args; + try { + const combinedAssertions = [...assertions, ...regressionAssertions]; + const baseline = await browserManager.runValidationScenario(steps, combinedAssertions, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + continueOnError, + waitMs, + }); + if ("error" in baseline) return err(baseline.error); + + const applyResult = await browserManager.applyRuntimePatch(patch, { + url, + headless, + waitUntil, + timeoutMs, + resetSession: true, + }); + if ("error" in applyResult) return err(applyResult.error); + + const patchedReport = await browserManager.runValidationScenario(steps, combinedAssertions, { + headless, + continueOnError, + waitMs, + }); + if ("error" in patchedReport) { + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup, + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) { + return err(`${patchedReport.error} Cleanup after patch verification also failed: ${cleanupResult.error}.`); + } + } + return err(patchedReport.error); + } + + const patched: PatchedValidationScenarioResponse = { + verdict: patchedReport.success ? "patch_validated" : "patch_failed", + apply: applyResult, + report: patchedReport, + }; + + if (cleanup !== "keep") { + const cleanupResult = await browserManager.resetRuntimePatches({ + strategy: cleanup, + waitUntil, + timeoutMs, + headless, + reopenUrl, + }); + if ("error" in cleanupResult) return err(cleanupResult.error); + patched.cleanup = cleanupResult; + } + + const response = createFixVerdict({ + fixDescription, + baseline, + patched, + regressionAssertions, + }); + + return ok({ + ...response, + reportMarkdown: buildFixVerificationMarkdown(fixDescription, response.verdict, baseline, patched, regressionAssertions), + }); + } catch (error) { + return err(`${toolName} failed unexpectedly: ${String(error)}`); + } +} + export const PATCH_TOOL_NAMES = [ "apply_runtime_patch", "apply_patch_then_replay", + "patch_and_validate", "reset_runtime_patches", + "verify_hypothesis", + "test_runtime_hypothesis", + "verify_fix", + "verify_runtime_fix", ] as const; export function register(server: McpServer): void { @@ -77,8 +486,8 @@ export function register(server: McpServer): void { "apply_runtime_patch", [ "Apply an ephemeral JavaScript patch inside the isolated replay sandbox without touching local files.", + "Use this instead of editing the repository when you want to test a runtime idea safely before committing to a source change.", "Only { type: 'script', target: 'page' } payloads are currently supported and always scoped to the current replay session.", - "Provide a URL when the patch must be present before the application boots in the sandbox.", ].join(" "), { patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), @@ -108,96 +517,21 @@ export function register(server: McpServer): void { "apply_patch_then_replay", [ "Apply an ephemeral replay patch, run replay steps, then evaluate assertions in the patched sandbox.", - "Returns an explicit patch_validated / patch_failed verdict plus a readable Markdown report.", - "Cleanup defaults to reset_session so temporary patches do not leak into later sandbox runs.", + "Use this instead of editing files blindly when you want one tool to patch, reproduce, and judge whether the runtime behavior improved.", + "Returns an explicit patch_validated / patch_failed verdict plus a readable Markdown report, and cleanup defaults to reset_session so temporary patches do not leak into later runs.", ].join(" "), - { - patch: runtimePatchSchema.describe("Runtime patch payload for the replay sandbox."), - url: z.string().url().optional().describe("Optional URL to open in the replay browser after the init script is registered."), - steps: z.array(replayStepSchema).min(1).describe("Ordered replay steps to execute after the patch is active."), - assertions: z.array(assertionSchema).min(1).describe("Assertions to evaluate after the replayed actions."), - headless: z.boolean().optional().describe("Override the replay browser mode for this scenario."), - waitUntil: replayWaitUntilSchema.describe("Navigation readiness event when url is provided."), - timeoutMs: z.number().int().min(1).max(120_000).optional().default(10_000).describe("Navigation timeout in milliseconds when url is provided."), - resetSession: z.boolean().optional().default(false).describe("Close the current replay sandbox first and start from a clean browser session."), - continueOnError: z.boolean().optional().default(false).describe("Keep executing later steps after a step failure."), - waitMs: z.number().int().min(0).max(60_000).optional().default(500).describe("Wait time in milliseconds before running assertions."), - cleanup: z.enum(["keep", "reload", "reset_session"]).optional().default("reset_session").describe("How to clean the replay sandbox after the patched validation flow."), - reopenUrl: z.string().url().optional().describe("Optional clean URL to reopen after cleanup when using reload or reset_session."), - }, - async ({ - patch, - url, - steps, - assertions, - headless, - waitUntil, - timeoutMs, - resetSession, - continueOnError, - waitMs, - cleanup, - reopenUrl, - }): Promise => { - try { - const applyResult = await browserManager.applyRuntimePatch(patch as RuntimePatch, { - url, - headless, - waitUntil, - timeoutMs, - resetSession, - }); - if ("error" in applyResult) return err(applyResult.error); - - const report = await browserManager.runValidationScenario(steps, assertions as Assertion[], { - headless, - continueOnError, - waitMs, - }); - if ("error" in report) { - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) { - return err( - `${report.error} Cleanup after validation failure also failed: ${cleanupResult.error}. Runtime patches may still be active.` - ); - } - } - return err(report.error); - } - - const response: PatchedValidationScenarioResponse = { - verdict: report.success ? "patch_validated" : "patch_failed", - apply: applyResult, - report, - }; - - if (cleanup !== "keep") { - const cleanupResult = await browserManager.resetRuntimePatches({ - strategy: cleanup as "reload" | "reset_session", - waitUntil, - timeoutMs, - headless, - reopenUrl, - }); - if ("error" in cleanupResult) return err(cleanupResult.error); - response.cleanup = cleanupResult; - } + patchValidationToolSchema, + async (args): Promise => runPatchValidationTool(args as PatchValidationToolArgs, "apply_patch_then_replay") + ); - return ok({ - ...response, - reportMarkdown: buildPatchMarkdown(response.verdict, response.apply, response.report, response.cleanup), - }); - } catch (error) { - return err(`apply_patch_then_replay failed unexpectedly: ${String(error)}`); - } - } + server.tool( + "patch_and_validate", + [ + "Action-oriented alias for apply_patch_then_replay that tests a runtime patch against a concrete replay protocol.", + "Prefer this when the agent is thinking 'try this patch and tell me if the bug is gone'.", + ].join(" "), + patchValidationToolSchema, + async (args): Promise => runPatchValidationTool(args as PatchValidationToolArgs, "patch_and_validate") ); server.tool( @@ -228,4 +562,46 @@ export function register(server: McpServer): void { } } ); + + server.tool( + "verify_hypothesis", + [ + "Verify a runtime hypothesis before changing repository code.", + "Use this instead of arguing from source code alone when you need browser evidence that a suspected runtime cause is true, false, or only partly supported.", + "Runs a replay protocol plus assertions and returns CONFIRMED, REFUTED, or PARTIAL with evidence and a Markdown report.", + ].join(" "), + hypothesisVerificationToolSchema, + async (args): Promise => runHypothesisVerificationTool(args as HypothesisVerificationToolArgs, "verify_hypothesis") + ); + + server.tool( + "test_runtime_hypothesis", + [ + "Action-oriented alias for verify_hypothesis that tests whether a suspected runtime explanation matches observed browser behavior.", + "Prefer this when the agent is phrasing the task as 'test this hypothesis in the browser'.", + ].join(" "), + hypothesisVerificationToolSchema, + async (args): Promise => runHypothesisVerificationTool(args as HypothesisVerificationToolArgs, "test_runtime_hypothesis") + ); + + server.tool( + "verify_fix", + [ + "Validate a runtime patch against a failing scenario before editing source files.", + "Use this instead of making a speculative code change when you want proof that a candidate fix improves the browser behavior and does not obviously regress other assertions.", + "Runs a baseline scenario, applies the patch in the replay sandbox, reruns the scenario, checks optional regression assertions, and returns CONFIRMED, REFUTED, or PARTIAL.", + ].join(" "), + fixVerificationToolSchema, + async (args): Promise => runFixVerificationTool(args as FixVerificationToolArgs, "verify_fix") + ); + + server.tool( + "verify_runtime_fix", + [ + "Action-oriented alias for verify_fix that checks whether a candidate runtime fix actually resolves the bug.", + "Prefer this when the agent is phrasing the task as 'verify the fix before touching source'.", + ].join(" "), + fixVerificationToolSchema, + async (args): Promise => runFixVerificationTool(args as FixVerificationToolArgs, "verify_runtime_fix") + ); }