From 3a5951b620875ce8a41641817fa45eecf60ebc65 Mon Sep 17 00:00:00 2001 From: Francois Laberge Date: Sun, 1 Mar 2026 20:27:28 -0500 Subject: [PATCH 1/2] feat: add direct CDP target support for webview endpoints --- src/actions.ts | 115 ++++++++++- src/browser.test.ts | 57 ++++++ src/browser.ts | 471 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 632 insertions(+), 11 deletions(-) diff --git a/src/actions.ts b/src/actions.ts index 1e36f94..20b25bb 100644 --- a/src/actions.ts +++ b/src/actions.ts @@ -615,6 +615,14 @@ async function handleNavigate( ): Promise> { browser.checkDomainAllowed(command.url); + if (browser.isDirectTargetMode()) { + await browser.directNavigate(command.url, command.waitUntil ?? 'load'); + return successResponse(command.id, { + url: await browser.directGetUrl(), + title: await browser.directGetTitle(), + }); + } + const page = browser.getPage(); // If headers are provided, set up scoped headers for this origin @@ -633,6 +641,14 @@ async function handleNavigate( } async function handleClick(command: ClickCommand, browser: BrowserManager): Promise { + if (browser.isDirectTargetMode()) { + if (command.newTab) { + throw new Error('--new-tab is not supported in direct target mode'); + } + await browser.directClick(command.selector); + return successResponse(command.id, { clicked: true }); + } + // Support both refs (@e1) and regular selectors const locator = browser.getLocator(command.selector); @@ -676,6 +692,11 @@ async function handleClick(command: ClickCommand, browser: BrowserManager): Prom } async function handleType(command: TypeCommand, browser: BrowserManager): Promise { + if (browser.isDirectTargetMode()) { + await browser.directType(command.selector, command.text, command.clear); + return successResponse(command.id, { typed: true }); + } + const locator = browser.getLocator(command.selector); try { @@ -694,6 +715,11 @@ async function handleType(command: TypeCommand, browser: BrowserManager): Promis } async function handlePress(command: PressCommand, browser: BrowserManager): Promise { + if (browser.isDirectTargetMode()) { + await browser.directPress(command.key, command.selector); + return successResponse(command.id, { pressed: true }); + } + const page = browser.getPage(); if (command.selector) { @@ -719,6 +745,21 @@ async function handleScreenshot( command: ScreenshotCommand, browser: BrowserManager ): Promise> { + if (browser.isDirectTargetMode()) { + let savePath = command.path; + if (!savePath) { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const random = Math.random().toString(36).substring(2, 8); + const filename = `screenshot-${timestamp}-${random}.png`; + const screenshotDir = path.join(getAppDir(), 'tmp', 'screenshots'); + mkdirSync(screenshotDir, { recursive: true }); + savePath = path.join(screenshotDir, filename); + } + const b64 = await browser.directCaptureScreenshotPngBase64(); + fs.writeFileSync(savePath, Buffer.from(b64, 'base64')); + return successResponse(command.id, { path: savePath }); + } + const page = browser.getPage(); const options: Parameters[0] = { @@ -919,14 +960,15 @@ async function handleSnapshot( }, browser: BrowserManager ): Promise> { - // Use enhanced snapshot with refs and optional filtering - const { tree, refs } = await browser.getSnapshot({ - interactive: command.interactive, - cursor: command.cursor, - maxDepth: command.maxDepth, - compact: command.compact, - selector: command.selector, - }); + const { tree, refs } = browser.isDirectTargetMode() + ? await browser.directSnapshot({ interactive: command.interactive }) + : await browser.getSnapshot({ + interactive: command.interactive, + cursor: command.cursor, + maxDepth: command.maxDepth, + compact: command.compact, + selector: command.selector, + }); // Simplify refs for output (just role and name) const simpleRefs: Record = {}; @@ -934,11 +976,13 @@ async function handleSnapshot( simpleRefs[ref] = { role: data.role, name: data.name }; } - const page = browser.getPage(); + const origin = browser.isDirectTargetMode() + ? await browser.directGetUrl() + : browser.getPage().url(); return successResponse(command.id, { snapshot: tree || 'Empty page', refs: Object.keys(simpleRefs).length > 0 ? simpleRefs : undefined, - origin: page.url(), + origin, }); } @@ -946,6 +990,11 @@ async function handleEvaluate( command: EvaluateCommand, browser: BrowserManager ): Promise> { + if (browser.isDirectTargetMode()) { + const result = await browser.directEvaluate(command.script); + return successResponse(command.id, { result, origin: await browser.directGetUrl() }); + } + const page = browser.getPage(); // Evaluate the script directly as a string expression @@ -955,6 +1004,15 @@ async function handleEvaluate( } async function handleWait(command: WaitCommand, browser: BrowserManager): Promise { + if (browser.isDirectTargetMode()) { + await browser.directWait({ + selector: command.selector, + state: command.selector ? command.state : undefined, + timeout: command.timeout, + }); + return successResponse(command.id, { waited: true }); + } + const page = browser.getPage(); if (command.selector) { @@ -973,6 +1031,30 @@ async function handleWait(command: WaitCommand, browser: BrowserManager): Promis } async function handleScroll(command: ScrollCommand, browser: BrowserManager): Promise { + if (browser.isDirectTargetMode()) { + let deltaX = command.x ?? 0; + let deltaY = command.y ?? 0; + if (command.direction) { + const amount = command.amount ?? 100; + switch (command.direction) { + case 'up': + deltaY = -amount; + break; + case 'down': + deltaY = amount; + break; + case 'left': + deltaX = -amount; + break; + case 'right': + deltaX = amount; + break; + } + } + await browser.directScroll(command.selector, deltaX, deltaY); + return successResponse(command.id, { scrolled: true }); + } + const page = browser.getPage(); let deltaX = command.x ?? 0; @@ -1121,6 +1203,11 @@ async function handleWindowNew( // New handlers for enhanced Playwright parity async function handleFill(command: FillCommand, browser: BrowserManager): Promise { + if (browser.isDirectTargetMode()) { + await browser.directFill(command.selector, command.value); + return successResponse(command.id, { filled: true }); + } + const locator = browser.getLocator(command.selector); try { await locator.fill(command.value); @@ -1548,6 +1635,10 @@ async function handleUrl( command: Command & { action: 'url' }, browser: BrowserManager ): Promise { + if (browser.isDirectTargetMode()) { + return successResponse(command.id, { url: await browser.directGetUrl() }); + } + const page = browser.getPage(); return successResponse(command.id, { url: page.url() }); } @@ -1556,6 +1647,10 @@ async function handleTitle( command: Command & { action: 'title' }, browser: BrowserManager ): Promise { + if (browser.isDirectTargetMode()) { + return successResponse(command.id, { title: await browser.directGetTitle() }); + } + const page = browser.getPage(); const title = await page.title(); return successResponse(command.id, { title }); diff --git a/src/browser.test.ts b/src/browser.test.ts index 8338d71..f3c9c15 100644 --- a/src/browser.test.ts +++ b/src/browser.test.ts @@ -2,6 +2,7 @@ import { describe, it, expect, beforeAll, afterAll, beforeEach, afterEach, vi } import { BrowserManager, getDefaultTimeout } from './browser.js'; import { executeCommand } from './actions.js'; import { chromium } from 'playwright-core'; +import { WebSocketServer } from 'ws'; describe('BrowserManager', () => { let browser: BrowserManager; @@ -902,6 +903,62 @@ describe('BrowserManager', () => { expect(urls).toContain('http://example.com'); spy.mockRestore(); }); + + it('should connect to a direct /devtools/page target and expose a single tab', async () => { + const wss = new WebSocketServer({ port: 0 }); + const address = wss.address(); + const port = typeof address === 'object' && address ? address.port : 0; + + wss.on('connection', (socket) => { + socket.on('message', (raw) => { + const msg = JSON.parse(raw.toString()) as { + id: number; + method: string; + params?: { expression?: string }; + }; + + if (msg.method === 'Runtime.evaluate') { + const expression = msg.params?.expression ?? ''; + let value: unknown = true; + if (expression.includes('location.href')) value = 'https://www.google.com/'; + if (expression.includes('document.title')) value = 'Google'; + socket.send( + JSON.stringify({ + id: msg.id, + result: { + result: { + type: typeof value, + value, + }, + }, + }) + ); + return; + } + + socket.send(JSON.stringify({ id: msg.id, result: {} })); + }); + }); + + const directBrowser = new BrowserManager(); + await directBrowser.launch({ + id: 'direct-1', + action: 'launch', + cdpUrl: `ws://127.0.0.1:${port}/devtools/page/ABC123`, + }); + + const tabs = await directBrowser.listTabs(); + expect(directBrowser.isDirectTargetMode()).toBe(true); + expect(tabs).toHaveLength(1); + expect(tabs[0].url).toBe('https://www.google.com/'); + expect(tabs[0].title).toBe('Google'); + await expect(directBrowser.switchTo(1)).rejects.toThrow( + 'Direct target mode only supports tab index 0' + ); + + await directBrowser.close(); + await new Promise((resolve) => wss.close(() => resolve())); + }); }); describe('screencast', () => { diff --git a/src/browser.ts b/src/browser.ts index 73fe0d0..a7d58a0 100644 --- a/src/browser.ts +++ b/src/browser.ts @@ -18,6 +18,7 @@ import path from 'node:path'; import os from 'node:os'; import { existsSync, mkdirSync, rmSync, readFileSync, statSync } from 'node:fs'; import { writeFile, mkdir } from 'node:fs/promises'; +import WebSocket from 'ws'; import type { LaunchCommand, TraceEvent } from './types.js'; import { type RefMap, type EnhancedSnapshot, getEnhancedSnapshot, parseRef } from './snapshot.js'; import { safeHeaderMerge } from './state-utils.js'; @@ -96,6 +97,12 @@ interface PageError { export class BrowserManager { private browser: Browser | null = null; private cdpEndpoint: string | null = null; // stores port number or full URL + private directTargetWs: WebSocket | null = null; + private directTargetMsgId: number = 0; + private directTargetPending: Map< + number, + { resolve: (value: unknown) => void; reject: (error: Error) => void } + > = new Map(); private isPersistentContext: boolean = false; private browserbaseSessionId: string | null = null; private browserbaseApiKey: string | null = null; @@ -128,6 +135,21 @@ export class BrowserManager { this.colorScheme = scheme; } + isDirectTargetMode(): boolean { + return this.directTargetWs !== null; + } + + private isDirectTargetEndpoint(endpoint: string): boolean { + const normalized = endpoint.trim(); + return /\/devtools\/page\/[^/?#]+(?:[/?#].*)?$/i.test(normalized); + } + + private asWebSocketUrl(endpoint: string): string { + if (endpoint.startsWith('http://')) return `ws://${endpoint.slice('http://'.length)}`; + if (endpoint.startsWith('https://')) return `wss://${endpoint.slice('https://'.length)}`; + return endpoint; + } + // CDP session for screencast and input injection private cdpSession: CDPSession | null = null; private screencastActive: boolean = false; @@ -164,7 +186,7 @@ export class BrowserManager { * Check if browser is launched */ isLaunched(): boolean { - return this.browser !== null || this.isPersistentContext; + return this.browser !== null || this.isPersistentContext || this.isDirectTargetMode(); } /** @@ -318,6 +340,7 @@ export class BrowserManager { * Check if the browser has any usable pages */ hasPages(): boolean { + if (this.isDirectTargetMode()) return true; return this.pages.length > 0; } @@ -327,6 +350,7 @@ export class BrowserManager { * No-op if pages already exist. */ async ensurePage(): Promise { + if (this.isDirectTargetMode()) return; if (this.pages.length > 0) return; if (!this.browser && !this.isPersistentContext) return; @@ -358,6 +382,9 @@ export class BrowserManager { * Get the current active page, throws if not launched */ getPage(): Page { + if (this.isDirectTargetMode()) { + throw new Error('Direct target mode has no Playwright Page. This command is not supported.'); + } if (this.pages.length === 0) { throw new Error('Browser not launched. Call launch first.'); } @@ -838,6 +865,9 @@ export class BrowserManager { * by verifying we can access browser contexts and that at least one has pages */ private isCdpConnectionAlive(): boolean { + if (this.isDirectTargetMode()) { + return this.directTargetWs?.readyState === WebSocket.OPEN; + } if (!this.browser) return false; try { const contexts = this.browser.contexts(); @@ -852,6 +882,10 @@ export class BrowserManager { * Check if CDP connection needs to be re-established */ private needsCdpReconnect(cdpEndpoint: string): boolean { + if (this.isDirectTargetMode()) { + if (this.cdpEndpoint !== cdpEndpoint) return true; + return this.directTargetWs?.readyState !== WebSocket.OPEN; + } if (!this.browser?.isConnected()) return true; if (this.cdpEndpoint !== cdpEndpoint) return true; if (!this.isCdpConnectionAlive()) return true; @@ -1488,6 +1522,91 @@ export class BrowserManager { this.activePageIndex = this.pages.length > 0 ? this.pages.length - 1 : 0; } + private async connectViaDirectTarget(targetWsUrl: string, timeoutMs = 15_000): Promise { + await new Promise((resolve, reject) => { + const ws = new WebSocket(targetWsUrl); + const timeout = setTimeout(() => { + ws.terminate(); + reject(new Error(`Timed out connecting to direct CDP target: ${targetWsUrl}`)); + }, timeoutMs); + + ws.on('open', () => { + clearTimeout(timeout); + this.directTargetWs = ws; + this.directTargetMsgId = 0; + this.cdpEndpoint = targetWsUrl; + resolve(); + }); + + ws.on('message', (raw: WebSocket.RawData) => { + let msg: + | { + id?: number; + result?: unknown; + error?: { message?: string }; + } + | undefined; + try { + msg = JSON.parse(raw.toString()) as { + id?: number; + result?: unknown; + error?: { message?: string }; + }; + } catch { + return; + } + if (!msg || typeof msg.id !== 'number') return; + const pending = this.directTargetPending.get(msg.id); + if (!pending) return; + this.directTargetPending.delete(msg.id); + if (msg.error) { + pending.reject(new Error(msg.error.message ?? 'CDP target command failed')); + return; + } + pending.resolve(msg.result); + }); + + ws.on('error', (error: Error) => { + clearTimeout(timeout); + reject(error); + }); + + ws.on('close', () => { + this.directTargetWs = null; + for (const [, pending] of this.directTargetPending) { + pending.reject(new Error('Direct CDP target disconnected')); + } + this.directTargetPending.clear(); + }); + }); + + await this.directEvaluate('location.href'); + } + + private async directTargetSend( + method: string, + params: Record = {} + ): Promise { + const ws = this.directTargetWs; + if (!ws || ws.readyState !== WebSocket.OPEN) { + throw new Error('Direct CDP target is not connected'); + } + + const id = ++this.directTargetMsgId; + return await new Promise((resolve, reject) => { + this.directTargetPending.set(id, { + resolve: (value) => resolve(value as T), + reject, + }); + ws.send(JSON.stringify({ id, method, params }), (error?: Error) => { + if (error) { + this.directTargetPending.delete(id); + reject(error); + } + }); + }); + } + /** * Connect to a running browser via CDP (Chrome DevTools Protocol) * @param cdpEndpoint Either a port number (as string) or a full WebSocket URL (ws:// or wss://) @@ -1520,6 +1639,11 @@ export class BrowserManager { cdpUrl = `http://localhost:${cdpEndpoint}`; } + if (this.isDirectTargetEndpoint(cdpUrl)) { + await this.connectViaDirectTarget(this.asWebSocketUrl(cdpUrl), options?.timeout ?? 15_000); + return; + } + const browser = await chromium .connectOverCDP(cdpUrl, { timeout: options?.timeout }) .catch(() => { @@ -1571,6 +1695,305 @@ export class BrowserManager { } } + private resolveDirectSelector(selectorOrRef: string): string { + const ref = parseRef(selectorOrRef); + if (!ref) return selectorOrRef; + + const refData = this.refMap[ref]; + if (!refData) { + throw new Error(`Unknown ref "${selectorOrRef}". Run snapshot again to refresh refs.`); + } + return refData.selector; + } + + private async sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); + } + + private runtimeEvaluateExpression(selector: string, expression: string): string { + const escapedSelector = JSON.stringify(selector); + return `(() => { + const el = document.querySelector(${escapedSelector}); + if (!el) throw new Error('Element not found: ' + ${escapedSelector}); + ${expression} + })()`; + } + + async directEvaluate(script: string): Promise { + const result = await this.directTargetSend<{ + result?: { + type: string; + value?: unknown; + description?: string; + unserializableValue?: string; + }; + exceptionDetails?: { + text?: string; + exception?: { description?: string; value?: unknown }; + }; + }>('Runtime.evaluate', { + expression: script, + returnByValue: true, + awaitPromise: true, + }); + + if (result.exceptionDetails) { + throw new Error( + result.exceptionDetails.exception?.description ?? + result.exceptionDetails.text ?? + 'Runtime.evaluate failed' + ); + } + + if (!result.result) return undefined; + if (result.result.unserializableValue !== undefined) return result.result.unserializableValue; + if (result.result.value !== undefined) return result.result.value; + return result.result.description; + } + + async directGetUrl(): Promise { + const value = await this.directEvaluate('location.href'); + return String(value ?? ''); + } + + async directGetTitle(): Promise { + const value = await this.directEvaluate('document.title'); + return String(value ?? ''); + } + + async directNavigate( + url: string, + waitUntil: 'load' | 'domcontentloaded' | 'networkidle' + ): Promise { + await this.directEvaluate(`window.location.href = ${JSON.stringify(url)}; true`); + + // Direct target mode has no page lifecycle events, so we approximate waitUntil. + if (waitUntil === 'domcontentloaded') { + await this.directWait({ state: 'domcontentloaded' }); + } else if (waitUntil === 'networkidle') { + await this.directWait({ state: 'load' }); + await this.sleep(250); + } else { + await this.directWait({ state: 'load' }); + } + } + + async directClick(selectorOrRef: string): Promise { + const selector = this.resolveDirectSelector(selectorOrRef); + await this.directEvaluate( + this.runtimeEvaluateExpression( + selector, + `el.scrollIntoView({ block: 'center', inline: 'center' }); + if (typeof el.focus === 'function') el.focus(); + if (typeof el.click === 'function') el.click(); + return true;` + ) + ); + } + + async directFill(selectorOrRef: string, value: string): Promise { + const selector = this.resolveDirectSelector(selectorOrRef); + await this.directEvaluate( + this.runtimeEvaluateExpression( + selector, + `const input = el; + input.focus(); + input.value = ${JSON.stringify(value)}; + input.dispatchEvent(new Event('input', { bubbles: true })); + input.dispatchEvent(new Event('change', { bubbles: true })); + return true;` + ) + ); + } + + async directType(selectorOrRef: string, text: string, clear?: boolean): Promise { + const selector = this.resolveDirectSelector(selectorOrRef); + const shouldClear = clear ? 'true' : 'false'; + await this.directEvaluate( + this.runtimeEvaluateExpression( + selector, + `const input = el; + input.focus(); + const previous = ${shouldClear} ? '' : String(input.value ?? ''); + input.value = previous + ${JSON.stringify(text)}; + input.dispatchEvent(new Event('input', { bubbles: true })); + return true;` + ) + ); + } + + async directPress(key: string, selectorOrRef?: string): Promise { + const selector = + selectorOrRef && selectorOrRef.length > 0 + ? this.resolveDirectSelector(selectorOrRef) + : undefined; + await this.directEvaluate(`(() => { + const target = ${selector ? `document.querySelector(${JSON.stringify(selector)})` : 'document.activeElement || document.body'}; + if (!target) throw new Error('No active element available for key press'); + const opts = { key: ${JSON.stringify(key)}, bubbles: true, cancelable: true }; + target.dispatchEvent(new KeyboardEvent('keydown', opts)); + target.dispatchEvent(new KeyboardEvent('keypress', opts)); + target.dispatchEvent(new KeyboardEvent('keyup', opts)); + return true; + })()`); + } + + async directWait(options?: { + selector?: string; + state?: 'attached' | 'detached' | 'visible' | 'hidden' | 'domcontentloaded' | 'load'; + timeout?: number; + }): Promise { + const timeout = options?.timeout ?? 30_000; + const state = options?.state ?? (options?.selector ? 'visible' : 'load'); + + if (!options?.selector) { + if (state === 'domcontentloaded' || state === 'load') { + const start = Date.now(); + while (Date.now() - start < timeout) { + const readyState = String(await this.directEvaluate('document.readyState')); + if (state === 'load' ? readyState === 'complete' : readyState !== 'loading') { + return; + } + await this.sleep(100); + } + throw new Error(`Timeout ${timeout}ms exceeded waiting for document readyState=${state}`); + } + + if (options?.timeout) { + await this.sleep(options.timeout); + } + return; + } + + const selector = this.resolveDirectSelector(options.selector); + const start = Date.now(); + while (Date.now() - start < timeout) { + const probe = (await this.directEvaluate(`(() => { + const el = document.querySelector(${JSON.stringify(selector)}); + if (!el) return { exists: false, visible: false }; + const style = window.getComputedStyle(el); + const rect = el.getBoundingClientRect(); + const visible = style.visibility !== 'hidden' && style.display !== 'none' && rect.width > 0 && rect.height > 0; + return { exists: true, visible }; + })()`)) as { exists?: boolean; visible?: boolean } | null; + + const exists = !!probe?.exists; + const visible = !!probe?.visible; + const conditionMet = + (state === 'attached' && exists) || + (state === 'detached' && !exists) || + (state === 'visible' && exists && visible) || + (state === 'hidden' && (!exists || !visible)); + if (conditionMet) return; + await this.sleep(100); + } + + throw new Error( + `Timeout ${timeout}ms exceeded waiting for selector "${options.selector}" to be ${state}` + ); + } + + async directScroll( + selectorOrRef: string | undefined, + deltaX: number, + deltaY: number + ): Promise { + if (selectorOrRef) { + const selector = this.resolveDirectSelector(selectorOrRef); + await this.directEvaluate( + this.runtimeEvaluateExpression( + selector, + `el.scrollIntoView({ block: 'center', inline: 'center' }); + el.scrollBy(${deltaX}, ${deltaY}); + return true;` + ) + ); + return; + } + + await this.directEvaluate(`window.scrollBy(${deltaX}, ${deltaY}); true`); + } + + async directCaptureScreenshotPngBase64(): Promise { + await this.directTargetSend('Page.enable'); + const result = await this.directTargetSend<{ data?: string }>('Page.captureScreenshot', { + format: 'png', + fromSurface: true, + }); + if (!result.data) { + throw new Error('Failed to capture screenshot from direct CDP target'); + } + return result.data; + } + + async directSnapshot(options?: { interactive?: boolean }): Promise { + const interactiveOnly = options?.interactive !== false; + const rows = (await this.directEvaluate(`(() => { + const all = Array.from(document.querySelectorAll('*')); + const interactiveSelector = 'a,button,input,textarea,select,[role=\"button\"],[onclick],[tabindex]'; + const nodes = ${interactiveOnly ? 'Array.from(document.querySelectorAll(interactiveSelector))' : 'all'}; + + const toSelector = (el) => { + if (el.id) return '#' + CSS.escape(el.id); + if (el.getAttribute('data-testid')) { + return '[data-testid=\"' + CSS.escape(el.getAttribute('data-testid')) + '\"]'; + } + const parts = []; + let curr = el; + while (curr && curr.nodeType === 1 && curr !== document.body && parts.length < 8) { + let sel = curr.tagName.toLowerCase(); + const parent = curr.parentElement; + if (!parent) break; + const siblings = Array.from(parent.children).filter((s) => s.tagName === curr.tagName); + if (siblings.length > 1) { + sel += ':nth-of-type(' + (siblings.indexOf(curr) + 1) + ')'; + } + parts.unshift(sel); + const candidate = parts.join(' > '); + try { + if (document.querySelectorAll(candidate).length === 1) return candidate; + } catch { + // keep building + } + curr = parent; + } + return parts.join(' > '); + }; + + return nodes.slice(0, 400).map((el) => { + const tag = el.tagName.toLowerCase(); + const roleAttr = el.getAttribute('role'); + const role = roleAttr || (tag === 'a' ? 'link' : tag === 'button' ? 'button' : tag === 'input' || tag === 'textarea' ? 'textbox' : 'generic'); + const text = (el.innerText || el.textContent || '').trim(); + const value = 'value' in el ? (el.value || '') : ''; + const name = (el.getAttribute('aria-label') || text || value || el.getAttribute('name') || el.id || tag).trim(); + return { + role, + name: String(name), + selector: toSelector(el), + }; + }).filter((r) => r.selector); + })()`)) as Array<{ role: string; name: string; selector: string }>; + + const refs: RefMap = {}; + const lines: string[] = []; + + rows.forEach((row, index) => { + const ref = `e${index + 1}`; + refs[ref] = { + selector: row.selector, + role: row.role, + name: row.name, + }; + lines.push(`- ${row.role} "${row.name}" [ref=${ref}]`); + }); + + const tree = lines.join('\n') || 'Empty page'; + this.refMap = refs; + this.lastSnapshot = tree; + return { tree, refs }; + } + /** * Get Chrome's default user data directory paths for the current platform. * Returns an array of candidate paths to check (stable, then beta/canary). @@ -1776,6 +2199,9 @@ export class BrowserManager { * Create a new tab in the current context */ async newTab(): Promise<{ index: number; total: number }> { + if (this.isDirectTargetMode()) { + throw new Error('Direct target mode is single-target and does not support creating new tabs'); + } if (!this.browser || this.contexts.length === 0) { throw new Error('Browser not launched'); } @@ -1802,6 +2228,11 @@ export class BrowserManager { index: number; total: number; }> { + if (this.isDirectTargetMode()) { + throw new Error( + 'Direct target mode is single-target and does not support creating new windows' + ); + } if (!this.browser) { throw new Error('Browser not launched'); } @@ -1847,6 +2278,16 @@ export class BrowserManager { * Switch to a specific tab/page by index */ async switchTo(index: number): Promise<{ index: number; url: string; title: string }> { + if (this.isDirectTargetMode()) { + if (index !== 0) { + throw new Error('Direct target mode only supports tab index 0'); + } + return { + index: 0, + url: await this.directGetUrl(), + title: await this.directGetTitle(), + }; + } if (index < 0 || index >= this.pages.length) { throw new Error(`Invalid tab index: ${index}. Available: 0-${this.pages.length - 1}`); } @@ -1870,6 +2311,9 @@ export class BrowserManager { * Close a specific tab/page */ async closeTab(index?: number): Promise<{ closed: number; remaining: number }> { + if (this.isDirectTargetMode()) { + throw new Error('Cannot close tab in direct target mode. Use "close" to disconnect.'); + } const targetIndex = index ?? this.activePageIndex; if (targetIndex < 0 || targetIndex >= this.pages.length) { @@ -1903,6 +2347,17 @@ export class BrowserManager { * List all tabs with their info */ async listTabs(): Promise> { + if (this.isDirectTargetMode()) { + return [ + { + index: 0, + url: await this.directGetUrl(), + title: await this.directGetTitle(), + active: true, + }, + ]; + } + const tabs = await Promise.all( this.pages.map(async (page, index) => ({ index, @@ -1919,6 +2374,9 @@ export class BrowserManager { * Only works with Chromium-based browsers */ async getCDPSession(): Promise { + if (this.isDirectTargetMode()) { + throw new Error('Direct target mode does not support Playwright CDPSession'); + } if (this.cdpSession) { return this.cdpSession; } @@ -2463,6 +2921,15 @@ export class BrowserManager { * Close the browser and clean up */ async close(): Promise { + if (this.directTargetWs) { + for (const [, pending] of this.directTargetPending) { + pending.reject(new Error('Browser closed')); + } + this.directTargetPending.clear(); + this.directTargetWs.close(); + this.directTargetWs = null; + } + // Stop recording if active (saves video) if (this.recordingContext) { await this.stopRecording(); @@ -2541,6 +3008,8 @@ export class BrowserManager { this.pages = []; this.contexts = []; this.cdpEndpoint = null; + this.directTargetPending.clear(); + this.directTargetMsgId = 0; this.browserbaseSessionId = null; this.browserbaseApiKey = null; this.browserUseSessionId = null; From 4a5bf8bcb99e2c67daef80460ed836ca4c6b8138 Mon Sep 17 00:00:00 2001 From: Francois Laberge Date: Sun, 1 Mar 2026 20:31:16 -0500 Subject: [PATCH 2/2] docs: add electron webview CDP issue note --- ...t-browser-electron-webview-cdp-issue-v1.md | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 .github/notes/agent-browser-electron-webview-cdp-issue-v1.md diff --git a/.github/notes/agent-browser-electron-webview-cdp-issue-v1.md b/.github/notes/agent-browser-electron-webview-cdp-issue-v1.md new file mode 100644 index 0000000..83deac7 --- /dev/null +++ b/.github/notes/agent-browser-electron-webview-cdp-issue-v1.md @@ -0,0 +1,80 @@ +# Agent-Browser + Electron `` CDP Issue + +**Date:** March 1, 2026 +**Project:** Decode desktop app (`Electron`) +**Context:** Trying to automate embedded `` instances (example: Google home page) via `agent-browser` over CDP. + +## Summary + +`agent-browser` can connect to the Electron CDP port (e.g. `9222`) and control the top-level app window, but it cannot attach to a direct target endpoint like: + +- `ws://localhost:9222/devtools/page/` + +Even though Electron exposes the webview target correctly at `/json`, `agent-browser` throws: + +- `No page found. Make sure the app has loaded content.` + +## What Was Verified + +1. Electron CDP target discovery is working. +- `http://localhost:9222/json` lists both: + - `type: "page"` (Decode main window) + - `type: "webview"` (Google) + +2. The webview WebSocket endpoint itself is valid. +- Direct raw CDP calls to `ws://localhost:9222/devtools/page/` successfully returned: + - `document.title = "Google"` + - `location.href = "https://www.google.com/"` + +3. `agent-browser` fails on that same target endpoint. +- Command reproduced: + - `agent-browser --cdp "ws://localhost:9222/devtools/page/" snapshot` +- Result: + - `No page found. Make sure the app has loaded content.` + +## Root Cause + +`agent-browser` currently assumes CDP connections resolve to browser-level Playwright contexts/pages. + +When given a **target-level** endpoint (`/devtools/page/`), it still runs browser-context/page validation. That validation fails for this mode and exits early. + +In short: +- Discovery endpoint is fine. +- Webview target endpoint is fine. +- `agent-browser` connection model does not support this endpoint type yet. + +## Code References (Where It Fails) + +Installed `agent-browser` (`0.7.6`) checks contexts/pages after `connectOverCDP`: + +- `/Users/francoislaberge/.nvm/versions/node/v22.16.0/lib/node_modules/agent-browser/dist/browser.js#L796` +- `/Users/francoislaberge/.nvm/versions/node/v22.16.0/lib/node_modules/agent-browser/dist/browser.js#L804` +- `/Users/francoislaberge/.nvm/versions/node/v22.16.0/lib/node_modules/agent-browser/dist/browser.js#L809` +- `/Users/francoislaberge/.nvm/versions/node/v22.16.0/lib/node_modules/agent-browser/dist/browser.js#L811` + +Latest `agent-browser` (`0.15.1`) has same pattern: + +- `/Users/francoislaberge/conductor/workspaces/decode-next/berlin-v3/.context/tmp-agent-browser/package/dist/browser.js#L1277` +- `/Users/francoislaberge/conductor/workspaces/decode-next/berlin-v3/.context/tmp-agent-browser/package/dist/browser.js#L1292` +- `/Users/francoislaberge/conductor/workspaces/decode-next/berlin-v3/.context/tmp-agent-browser/package/dist/browser.js#L1294` + +## Notes About Electron Skill Docs + +The upstream Electron skill doc suggests webviews should be accessible via `agent-browser tab`, but it does not document a direct `/devtools/page/` workflow or workaround. + +Reference: +- https://github.com/vercel-labs/agent-browser/blob/main/skills/electron/SKILL.md + +## Practical Workarounds Right Now + +1. Use `agent-browser` with browser-level endpoint (`9222` or `/devtools/browser/`) for main-window automation. +2. Use raw CDP (custom script/tooling) for direct webview-target automation (`/devtools/page/`). + +## Proposed Fix in Agent-Browser + +To support webview target endpoints directly, `agent-browser` would need a dedicated target mode: + +1. Detect `.../devtools/page/` endpoints in `connectViaCDP`. +2. Skip browser `contexts/pages` validation in that mode. +3. Route commands (`snapshot`, `click`, `type`, `fill`, `press`, `eval`, `screenshot`) through raw CDP domains (`Runtime`, `DOM`, `Input`, `Page`) instead of Playwright `Page` APIs. +