diff --git a/src/core/__tests__/capabilities.test.ts b/src/core/__tests__/capabilities.test.ts index c10fce0ea..062f6546f 100644 --- a/src/core/__tests__/capabilities.test.ts +++ b/src/core/__tests__/capabilities.test.ts @@ -400,6 +400,7 @@ test('web supports only the initial browser interaction slice', () => { 'network', 'open', 'press', + 'record', 'screenshot', 'scroll', 'snapshot', @@ -428,7 +429,6 @@ test('web supports only the initial browser interaction slice', () => { 'perf', 'pinch', 'push', - 'record', 'reinstall', 'rotate', 'settings', diff --git a/src/core/capabilities.ts b/src/core/capabilities.ts index 91cc871b4..e3c8f82e3 100644 --- a/src/core/capabilities.ts +++ b/src/core/capabilities.ts @@ -42,6 +42,7 @@ const LINUX_DEVICE: KindMatrix = { device: true }; const LINUX_NONE: KindMatrix = {}; const WEB_DEVICE: KindMatrix = { device: true }; const WEB_RUNTIME_COMMANDS = ['open', 'close'] as const; +const WEB_RECORDING_COMMANDS = ['record'] as const; const WEB_QUERY_COMMANDS = [ 'find', 'get', @@ -55,6 +56,7 @@ const WEB_INTERACTION_COMMANDS = ['click', 'fill', 'focus', 'press', 'scroll', ' const WEB_SETTING_COMMANDS = ['viewport'] as const; const WEB_SUPPORTED_COMMANDS = new Set([ ...WEB_RUNTIME_COMMANDS, + ...WEB_RECORDING_COMMANDS, ...WEB_QUERY_COMMANDS, ...WEB_INTERACTION_COMMANDS, ...WEB_SETTING_COMMANDS, diff --git a/src/daemon-artifacts.ts b/src/daemon-artifacts.ts index cdb25ce55..ad9c004cc 100644 --- a/src/daemon-artifacts.ts +++ b/src/daemon-artifacts.ts @@ -6,6 +6,10 @@ import { pipeline } from 'node:stream/promises'; import { AppError } from './utils/errors.ts'; import type { DaemonArtifact, DaemonRequest, DaemonResponse } from './daemon/types.ts'; import { buildDaemonHttpAuthHeaders } from './daemon/http-contract.ts'; +import { + appendRecordingExtensionWhenMissing, + recordingExtensionForPlatform, +} from './recording/output-path.ts'; import { uploadArtifact } from './upload-client.ts'; // Mirrors the current daemon RPC timeout, but artifact download timeouts may diverge. @@ -214,17 +218,43 @@ function prepareRemoteArtifactCommand( }; } if (req.command === 'record' && (positionals[0] ?? '').toLowerCase() === 'start') { - const localPath = resolveClientArtifactOutputPath(req, 'outPath', '.mp4', 1); + if (!recordingHasRequestedClientPath(req) && req.flags?.platform === undefined) { + return null; + } + const fallbackExtension = recordingFallbackExtension(req); + const localPath = normalizeRecordingClientArtifactPath( + resolveClientArtifactOutputPath(req, 'outPath', fallbackExtension, 1), + req, + ); return { field: 'outPath', localPath, positionalIndex: 1, - positionalPath: buildRemoteTempArtifactPath('recording', path.extname(localPath) || '.mp4'), + positionalPath: buildRemoteTempArtifactPath( + 'recording', + path.extname(localPath) || fallbackExtension, + ), }; } return null; } +function recordingFallbackExtension(req: Omit): string { + return recordingExtensionForPlatform(req.flags?.platform); +} + +function recordingHasRequestedClientPath(req: Omit): boolean { + return hasNonEmptyString(req.positionals?.[1]) || hasNonEmptyString(req.flags?.out); +} + +function normalizeRecordingClientArtifactPath( + localPath: string, + req: Omit, +): string { + if (req.flags?.platform !== 'web') return localPath; + return appendRecordingExtensionWhenMissing(localPath, recordingFallbackExtension(req)); +} + function resolveClientArtifactOutputPath( req: Omit, field: 'path' | 'outPath', @@ -233,10 +263,14 @@ function resolveClientArtifactOutputPath( ): string { const requested = req.positionals?.[positionalIndex] ?? req.flags?.out; const fallbackName = `${field === 'path' ? 'screenshot' : 'recording'}-${Date.now()}${fallbackExtension}`; - const rawPath = requested && requested.trim().length > 0 ? requested : fallbackName; + const rawPath = hasNonEmptyString(requested) ? requested : fallbackName; return path.isAbsolute(rawPath) ? rawPath : path.resolve(req.meta?.cwd ?? process.cwd(), rawPath); } +function hasNonEmptyString(value: unknown): value is string { + return typeof value === 'string' && value.trim().length > 0; +} + function buildRemoteTempArtifactPath(prefix: string, extension: string): string { const safeExtension = extension.startsWith('.') ? extension : `.${extension}`; return path.posix.join( diff --git a/src/daemon/handlers/__tests__/record-trace.test.ts b/src/daemon/handlers/__tests__/record-trace.test.ts index 3193d77d5..b8d1308ca 100644 --- a/src/daemon/handlers/__tests__/record-trace.test.ts +++ b/src/daemon/handlers/__tests__/record-trace.test.ts @@ -63,8 +63,10 @@ import { trimRecordingStart, overlayRecordingTouches, } from '../../../recording/overlay.ts'; +import { resolveTargetDevice } from '../../../core/dispatch.ts'; import { runCmd, runCmdBackground } from '../../../utils/exec.ts'; import { isPlayableVideo, waitForStableFile } from '../../../utils/video.ts'; +import { withWebProvider, type WebProvider } from '../../../platforms/web/provider.ts'; type RunnerCall = { command: string; @@ -79,6 +81,7 @@ type RunnerCall = { const mockRunCmd = vi.mocked(runCmd); const mockRunCmdBackground = vi.mocked(runCmdBackground); const mockRunIosRunnerCommand = vi.mocked(runIosRunnerCommand); +const mockResolveTargetDevice = vi.mocked(resolveTargetDevice); const mockResizeRecording = vi.mocked(resizeRecording); const mockTrimRecordingStart = vi.mocked(trimRecordingStart); const mockOverlayRecordingTouches = vi.mocked(overlayRecordingTouches); @@ -125,6 +128,34 @@ function makeIosSimulatorSession(name: string): SessionState { }); } +function makeWebSession(name: string): SessionState { + return makeSession(name, { + platform: 'web', + id: 'agent-browser-chrome', + name: 'Agent Browser Chrome', + kind: 'device', + target: 'desktop', + booted: true, + }); +} + +function makeWebProvider(overrides: Partial = {}): WebProvider { + return { + open: async () => {}, + close: async () => {}, + startRecording: async () => {}, + stopRecording: async () => {}, + snapshot: async () => ({ nodes: [] }), + screenshot: async () => {}, + setViewport: async () => {}, + click: async () => {}, + fill: async () => {}, + typeText: async () => {}, + scroll: async () => {}, + ...overrides, + }; +} + function makeIosSimulatorRecordingSession( name: string, options: { @@ -311,6 +342,238 @@ test('record stop keeps normal app session open when stop validation fails', asy expect(sessionStore.get(sessionName)?.recording).toBeUndefined(); }); +test('record start and stop web recording keep the requested artifact path stable', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'web-recording-stable-path'; + const session = makeWebSession(sessionName); + sessionStore.set(sessionName, session); + const outPath = path.join(os.tmpdir(), `${sessionName}.webm`); + const calls: string[] = []; + const provider = makeWebProvider({ + startRecording: async (path) => { + calls.push(`start:${path}`); + }, + stopRecording: async () => { + calls.push('stop'); + fs.writeFileSync(outPath, 'webm'); + }, + }); + + try { + await withWebProvider(provider, async () => { + const start = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['start', outPath], + }); + const stop = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['stop'], + }); + + expect(start?.ok).toBe(true); + expect((start as any).data?.outPath).toBe(outPath); + expect(stop?.ok).toBe(true); + expect((stop as any).data?.outPath).toBe(outPath); + expect((stop as any).data?.artifacts?.[0]?.path).toBe(outPath); + }); + expect(calls).toEqual([`start:${outPath}`, 'stop']); + } finally { + fs.rmSync(outPath, { force: true }); + } +}); + +test('record start web appends .webm to extensionless paths before delegating', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'web-recording-extensionless'; + const session = makeWebSession(sessionName); + sessionStore.set(sessionName, session); + const requestedPath = path.join(os.tmpdir(), sessionName); + const expectedPath = `${requestedPath}.webm`; + const calls: string[] = []; + const provider = makeWebProvider({ + startRecording: async (path) => { + calls.push(path); + fs.writeFileSync(path, 'webm'); + }, + }); + + try { + await withWebProvider(provider, async () => { + const start = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['start', requestedPath], + }); + + expect(start?.ok).toBe(true); + expect((start as any).data?.outPath).toBe(expectedPath); + }); + expect(calls).toEqual([expectedPath]); + } finally { + fs.rmSync(expectedPath, { force: true }); + } +}); + +test('record start web rejects non-WebM output paths before delegating', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'web-recording-mp4'; + const session = makeWebSession(sessionName); + sessionStore.set(sessionName, session); + const provider = makeWebProvider({ + startRecording: async () => { + throw new Error('should not delegate invalid web recording path'); + }, + }); + + await withWebProvider(provider, async () => { + const start = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['start', path.join(os.tmpdir(), `${sessionName}.mp4`)], + }); + + expect(start?.ok).toBe(false); + if (!start || start.ok) { + throw new Error(`expected web recording start failure, got ${JSON.stringify(start)}`); + } + expect(start.error.code).toBe('INVALID_ARGS'); + expect(start.error.message).toMatch(/\.webm output path/); + }); +}); + +test('record start web rejects native recording flags before delegating', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'web-recording-native-flags'; + const session = makeWebSession(sessionName); + sessionStore.set(sessionName, session); + const provider = makeWebProvider({ + startRecording: async () => { + throw new Error('should not delegate unsupported web recording flags'); + }, + }); + + await withWebProvider(provider, async () => { + const start = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['start', path.join(os.tmpdir(), `${sessionName}.webm`)], + flags: { fps: 0, quality: 'high', screenshotMaxSize: 1024, hideTouches: true }, + }); + + expect(start?.ok).toBe(false); + if (!start || start.ok) { + throw new Error(`expected web recording start failure, got ${JSON.stringify(start)}`); + } + expect(start.error.code).toBe('INVALID_ARGS'); + expect(start.error.message).toContain('--fps, --quality, --max-size, --hide-touches'); + }); +}); + +test('record start web requires an existing browser session', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'web-recording-no-open'; + mockResolveTargetDevice.mockResolvedValueOnce({ + platform: 'web', + id: 'agent-browser-chrome', + name: 'Agent Browser Chrome', + kind: 'device', + target: 'desktop', + booted: true, + }); + + const response = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['start', path.join(os.tmpdir(), `${sessionName}.webm`)], + }); + + expect(response?.ok).toBe(false); + if (!response || response.ok) { + throw new Error(`expected web recording start failure, got ${JSON.stringify(response)}`); + } + expect(response.error.code).toBe('INVALID_ARGS'); + expect(response.error.message).toMatch(/run open --platform web first/); + expect(sessionStore.get(sessionName)).toBeUndefined(); +}); + +test('record stop closes record-only web sessions during cleanup', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'web-recording-record-only-cleanup'; + const session = makeWebSession(sessionName); + session.recordOnlySession = true; + const outPath = path.join(os.tmpdir(), `${sessionName}.webm`); + session.recording = { + platform: 'web', + outPath, + startedAt: Date.now(), + showTouches: false, + gestureEvents: [], + }; + sessionStore.set(sessionName, session); + const calls: string[] = []; + const provider = makeWebProvider({ + close: async () => { + calls.push('close'); + }, + stopRecording: async () => { + calls.push('stop'); + fs.writeFileSync(outPath, 'webm'); + }, + }); + + try { + await withWebProvider(provider, async () => { + const response = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['stop'], + }); + + expect(response?.ok).toBe(true); + }); + expect(calls).toEqual(['stop', 'close']); + expect(sessionStore.get(sessionName)).toBeUndefined(); + } finally { + fs.rmSync(outPath, { force: true }); + } +}); + +test('record stop rejects web recording when agent-browser does not finalize the file', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'web-recording-missing-file'; + const session = makeWebSession(sessionName); + sessionStore.set(sessionName, session); + const outPath = path.join(os.tmpdir(), `${sessionName}.webm`); + const provider = makeWebProvider(); + + try { + await withWebProvider(provider, async () => { + const start = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['start', outPath], + }); + const stop = await runRecordCommand({ + sessionStore, + sessionName, + positionals: ['stop'], + }); + + expect(start?.ok).toBe(true); + expect(stop?.ok).toBe(false); + if (!stop || stop.ok) { + throw new Error(`expected web recording stop failure, got ${JSON.stringify(stop)}`); + } + expect(stop.error.message).toMatch(/not finalized into a WebM video/); + expect(fs.existsSync(outPath)).toBe(false); + }); + } finally { + fs.rmSync(outPath, { force: true }); + } +}); + test('record start resolves relative output path from request cwd', async () => { const sessionStore = makeSessionStore(); const sessionName = 'ios-device-cwd'; @@ -1933,7 +2196,7 @@ test('record start falls back to /data/local/tmp when /sdcard is unavailable on expect(response?.ok).toBe(true); const recording = sessionStore.get(sessionName)?.recording; expect(recording?.platform).toBe('android'); - expect(recording?.remotePath ?? '').toMatch( + expect(recording?.platform === 'android' ? recording.remotePath : '').toMatch( /^\/data\/local\/tmp\/agent-device-recording-\d+\.mp4$/, ); }); diff --git a/src/daemon/handlers/record-trace-recording-backends.ts b/src/daemon/handlers/record-trace-recording-backends.ts index 7835b211e..8a45c4289 100644 --- a/src/daemon/handlers/record-trace-recording-backends.ts +++ b/src/daemon/handlers/record-trace-recording-backends.ts @@ -1,5 +1,13 @@ +import fs from 'node:fs'; +import path from 'node:path'; import type { DaemonRequest, DaemonResponse, SessionState } from '../types.ts'; import type { SessionStore } from '../session-store.ts'; +import { + appendRecordingExtensionWhenMissing, + defaultRecordingPath, + WEB_RECORDING_EXTENSION, +} from '../../recording/output-path.ts'; +import { resolveWebProvider } from '../../platforms/web/provider.ts'; import { errorResponse } from './response.ts'; import { startAndroidRecording, stopAndroidRecording } from './record-trace-android.ts'; import { @@ -44,12 +52,15 @@ type RecordingStopContext = { }; export type RecordingBackend = { + validateStart?: (req: DaemonRequest) => DaemonResponse | null; resolveOutputPath: (context: RecordingOutputPathContext) => string; start: (context: RecordingStartContext) => Promise; stop: (context: RecordingStopContext) => Promise; + cleanupRecordOnlySession?: (session: SessionState) => Promise; }; export function resolveRecordingBackendForDevice(device: SessionState['device']): RecordingBackend { + if (device.platform === 'web') return webRecordingBackend; if (device.platform === 'android') return androidRecordingBackend; if (device.platform === 'macos') return macOsRecordingBackend; if (device.platform === 'ios' && device.kind === 'device') return iosDeviceRecordingBackend; @@ -67,6 +78,8 @@ export function resolveRecordingBackendForRecording(recording: ActiveRecording): return iosDeviceRecordingBackend; case 'macos-runner': return macOsRecordingBackend; + case 'web': + return webRecordingBackend; } const exhaustive: never = recording; @@ -75,9 +88,56 @@ export function resolveRecordingBackendForRecording(recording: ActiveRecording): function resolveNativeRecordingOutputPath({ req }: RecordingOutputPathContext): string { const requestedPath = req.positionals?.[1]; - return requestedPath ?? `./recording-${Date.now()}.mp4`; + return requestedPath ?? defaultRecordingPath(undefined); } +function resolveWebRecordingOutputPath({ req }: RecordingOutputPathContext): string { + const requestedPath = req.positionals?.[1]; + return requestedPath === undefined + ? defaultRecordingPath('web') + : appendRecordingExtensionWhenMissing(requestedPath, WEB_RECORDING_EXTENSION); +} + +const webRecordingBackend: RecordingBackend = { + validateStart: (req) => validateWebRecordingFlags(req), + resolveOutputPath: resolveWebRecordingOutputPath, + start: async ({ activeSession, recordingBase, resolvedOut }) => { + const startError = validateWebRecordingOutputPath(resolvedOut); + if (startError) { + return startError; + } + if (activeSession.recordOnlySession) { + return errorResponse( + 'INVALID_ARGS', + 'record on web requires an active browser session; run open --platform web first', + ); + } + const provider = resolveWebProvider(); + if (!provider.startRecording) { + return errorResponse('UNSUPPORTED_OPERATION', 'record is not supported by this web provider'); + } + await provider.startRecording(resolvedOut); + return { + ...recordingBase, + outPath: resolvedOut, + startedAt: Date.now(), + platform: 'web', + showTouches: false, + }; + }, + stop: async ({ recording }) => + await stopWebRecording({ + recording: recording as Extract, + }), + cleanupRecordOnlySession: async () => { + try { + await resolveWebProvider().close(); + } catch { + // Best effort cleanup; deleting the daemon session still releases agent-device state. + } + }, +}; + const iosDeviceRecordingBackend: RecordingBackend = { resolveOutputPath: resolveNativeRecordingOutputPath, start: async ({ @@ -192,3 +252,68 @@ const unsupportedRecordingBackend: RecordingBackend = { stop: async () => errorResponse('UNSUPPORTED_OPERATION', 'record is not supported on this device'), }; + +function webRecordingUnsupportedFlags(req: DaemonRequest): string[] { + const unsupported: string[] = []; + if (req.flags?.fps !== undefined) unsupported.push('--fps'); + if (req.flags?.quality !== undefined) unsupported.push('--quality'); + if (req.flags?.screenshotMaxSize !== undefined) unsupported.push('--max-size'); + if (req.flags?.hideTouches !== undefined) unsupported.push('--hide-touches'); + return unsupported; +} + +function validateWebRecordingFlags(req: DaemonRequest): DaemonResponse | null { + const unsupportedWebFlags = webRecordingUnsupportedFlags(req); + if (unsupportedWebFlags.length > 0) { + return errorResponse( + 'INVALID_ARGS', + `web recordings do not support ${unsupportedWebFlags.join(', ')}; agent-browser records WebM directly`, + ); + } + return null; +} + +function validateWebRecordingOutputPath(outPath: string): DaemonResponse | null { + if (path.extname(outPath).toLowerCase() !== WEB_RECORDING_EXTENSION) { + return errorResponse( + 'INVALID_ARGS', + `web recordings must use a ${WEB_RECORDING_EXTENSION} output path`, + ); + } + return null; +} + +function removeInvalidRecordingOutput(outPath: string): void { + try { + fs.rmSync(outPath, { force: true }); + } catch { + // Best effort: the error response still reports the failed finalization. + } +} + +async function stopWebRecording(params: { + recording: Extract; +}): Promise { + const { recording } = params; + const provider = resolveWebProvider(); + if (!provider.stopRecording) { + return errorResponse('UNSUPPORTED_OPERATION', 'record is not supported by this web provider'); + } + await provider.stopRecording(); + if (!hasNonEmptyFile(recording.outPath)) { + removeInvalidRecordingOutput(recording.outPath); + return errorResponse( + 'COMMAND_FAILED', + `failed to stop recording: ${recording.outPath} was not finalized into a WebM video`, + ); + } + return null; +} + +function hasNonEmptyFile(outPath: string): boolean { + try { + return fs.statSync(outPath).size > 0; + } catch { + return false; + } +} diff --git a/src/daemon/handlers/record-trace-recording.ts b/src/daemon/handlers/record-trace-recording.ts index deee1af48..df1a15508 100644 --- a/src/daemon/handlers/record-trace-recording.ts +++ b/src/daemon/handlers/record-trace-recording.ts @@ -96,6 +96,10 @@ async function startRecording(params: { const qualityFlag = req.flags?.quality; const maxSizeFlag = req.flags?.screenshotMaxSize; const backend = resolveRecordingBackendForDevice(device); + const platformValidationError = backend.validateStart?.(req) ?? null; + if (platformValidationError) { + return platformValidationError; + } if ( fpsFlag !== undefined && (!Number.isInteger(fpsFlag) || @@ -275,15 +279,17 @@ function deriveClientTelemetryPath( return deriveRecordingTelemetryPath(recording.clientOutPath); } -function releaseRecordOnlySession( +async function releaseRecordOnlySession( sessionStore: SessionStore, sessionName: string, session: SessionState, options: { writeLog?: boolean } = {}, -): void { +): Promise { if (!session.recordOnlySession) { return; } + const backend = resolveRecordingBackendForDevice(session.device); + await backend.cleanupRecordOnlySession?.(session); if (options.writeLog) { sessionStore.writeSessionLog(session); } @@ -328,7 +334,7 @@ export async function handleRecordCommand(params: { const response = await stopRecording({ req, activeSession, device, logPath, deps }); if (!response.ok) { - releaseRecordOnlySession(sessionStore, sessionName, activeSession); + await releaseRecordOnlySession(sessionStore, sessionName, activeSession); return response; } @@ -342,6 +348,6 @@ export async function handleRecordCommand(params: { showTouches: response.data?.showTouches, }, }); - releaseRecordOnlySession(sessionStore, sessionName, activeSession, { writeLog: true }); + await releaseRecordOnlySession(sessionStore, sessionName, activeSession, { writeLog: true }); return response; } diff --git a/src/daemon/handlers/session-replay-video-recording.ts b/src/daemon/handlers/session-replay-video-recording.ts index 29a2e7d50..ff2b18597 100644 --- a/src/daemon/handlers/session-replay-video-recording.ts +++ b/src/daemon/handlers/session-replay-video-recording.ts @@ -6,6 +6,10 @@ import { sleep } from '../../utils/timeouts.ts'; import { handleRecordCommand } from './record-trace-recording.ts'; import { appendReplayTestTimingEvent } from './session-test-runtime.ts'; import { collectReplayActionArtifactPaths } from './session-replay-runtime.ts'; +import { + defaultRecordingPath, + recordingExtensionForPlatform, +} from '../../recording/output-path.ts'; const REPLAY_TEST_VIDEO_RECORDING_PREROLL_MS = 1_000; const REPLAY_TEST_VIDEO_RECORDING_TAIL_MS = 3_000; @@ -36,9 +40,10 @@ export async function startReplayTestVideoRecordingIfReady( const activeSession = sessionStore.get(sessionName); if (!activeSession || activeSession.recording) return undefined; + const extension = recordingExtensionForPlatform(activeSession.device.platform); const videoPath = artifactsDir - ? path.join(artifactsDir, 'recording.mp4') - : `./recording-${Date.now()}.mp4`; + ? path.join(artifactsDir, `recording${extension}`) + : defaultRecordingPath(activeSession.device.platform); appendVideoTimingEvent(tracePath, { type: 'video_recording_start', session: sessionName, diff --git a/src/daemon/types.ts b/src/daemon/types.ts index e970dabb0..fc8c7bebd 100644 --- a/src/daemon/types.ts +++ b/src/daemon/types.ts @@ -287,6 +287,9 @@ export type SessionState = { | (SessionRecordingBase & { platform: 'macos-runner'; remotePath?: string; + }) + | (SessionRecordingBase & { + platform: 'web'; }); /** Session-scoped app log stream; logs written to outPath for agent to grep */ appLog?: { diff --git a/src/platforms/web/agent-browser-provider.test.ts b/src/platforms/web/agent-browser-provider.test.ts index 7957e22bb..b5d702611 100644 --- a/src/platforms/web/agent-browser-provider.test.ts +++ b/src/platforms/web/agent-browser-provider.test.ts @@ -26,6 +26,7 @@ test('agent-browser provider maps supported operations to session-scoped JSON co await withCommandExecutorOverride(recordingExecutor(calls), async () => { await provider.open('https://example.test'); + const startRecording = await provider.startRecording?.('/tmp/clip.webm'); await provider.screenshot('/tmp/page.png', { fullscreen: true }); await provider.setViewport(1280, 900); await provider.click(10.4, 20.6); @@ -36,13 +37,18 @@ test('agent-browser provider maps supported operations to session-scoped JSON co await provider.scroll('down', { pixels: 400 }); const scrollResult = await provider.scroll('up', { pixels: 100, durationMs: 120 }); assert.deepEqual(scrollResult, { durationMs: 120 }); + const stopRecording = await provider.stopRecording?.(); await provider.close(); + + assert.equal(startRecording, undefined); + assert.equal(stopRecording, undefined); }); assert.deepEqual( calls.map((call) => call.args), [ ['open', 'https://example.test', '--json', '--session', 'web-session'], + ['record', 'start', '/tmp/clip.webm', '--json', '--session', 'web-session'], ['screenshot', '--full', '/tmp/page.png', '--json', '--session', 'web-session'], ['set', 'viewport', '1280', '900', '--json', '--session', 'web-session'], ['mouse', 'move', '10', '21', '--json', '--session', 'web-session'], @@ -60,6 +66,7 @@ test('agent-browser provider maps supported operations to session-scoped JSON co ['scroll', 'up', '34', '--json', '--session', 'web-session'], ['scroll', 'up', '33', '--json', '--session', 'web-session'], ['scroll', 'up', '33', '--json', '--session', 'web-session'], + ['record', 'stop', '--json', '--session', 'web-session'], ['close', '--json', '--session', 'web-session'], ], ); diff --git a/src/platforms/web/agent-browser-provider.ts b/src/platforms/web/agent-browser-provider.ts index 111a67d38..ab1fcb675 100644 --- a/src/platforms/web/agent-browser-provider.ts +++ b/src/platforms/web/agent-browser-provider.ts @@ -37,6 +37,12 @@ export function createAgentBrowserWebProvider( async close() { await runJson(['close']); }, + async startRecording(outPath) { + await runJson(['record', 'start', outPath]); + }, + async stopRecording() { + await runJson(['record', 'stop']); + }, async snapshot(snapshotOptions) { return await captureAgentBrowserSnapshot(runJson, snapshotOptions); }, diff --git a/src/platforms/web/provider.ts b/src/platforms/web/provider.ts index 42b9a1257..860d27e89 100644 --- a/src/platforms/web/provider.ts +++ b/src/platforms/web/provider.ts @@ -32,6 +32,8 @@ export type WebSnapshotResult = { export type WebProvider = { open(target: string, options?: WebOpenOptions): Promise; close(target?: string): Promise; + startRecording?(outPath: string): Promise; + stopRecording?(): Promise; snapshot(options?: WebSnapshotOptions): Promise; screenshot(outPath: string, options?: WebScreenshotOptions): Promise; setViewport(width: number, height: number): Promise; diff --git a/src/recording/output-path.ts b/src/recording/output-path.ts new file mode 100644 index 000000000..60bf3376b --- /dev/null +++ b/src/recording/output-path.ts @@ -0,0 +1,19 @@ +import path from 'node:path'; +import type { Platform, PlatformSelector } from '../utils/device.ts'; + +const DEFAULT_RECORDING_EXTENSION = '.mp4'; +export const WEB_RECORDING_EXTENSION = '.webm'; + +export function recordingExtensionForPlatform( + platform: Platform | PlatformSelector | undefined, +): string { + return platform === 'web' ? WEB_RECORDING_EXTENSION : DEFAULT_RECORDING_EXTENSION; +} + +export function appendRecordingExtensionWhenMissing(filePath: string, extension: string): string { + return path.extname(filePath) ? filePath : `${filePath}${extension}`; +} + +export function defaultRecordingPath(platform: Platform | undefined): string { + return `./recording-${Date.now()}${recordingExtensionForPlatform(platform)}`; +} diff --git a/src/utils/cli-help.ts b/src/utils/cli-help.ts index 6279d60f6..11178f8f8 100644 --- a/src/utils/cli-help.ts +++ b/src/utils/cli-help.ts @@ -281,12 +281,14 @@ Validation and evidence: agent-device click @e12 --platform web agent-device fill @e13 "qa@example.com" --platform web agent-device wait text "Welcome" 3000 --platform web + agent-device record start ./artifacts/web-flow.webm --platform web agent-device network dump 25 --include headers --platform web agent-device screenshot ./artifacts/web-home.png --platform web agent-device screenshot ./artifacts/web-full.png --platform web --fullscreen agent-device viewport 1280 900 --platform web + agent-device record stop --platform web agent-device close --platform web - Minimal web support is for browser sessions with open, snapshot, find, get, is, click/press, fill/type, wait, network dump, screenshot, close, and replay over those commands. Use agent-browser directly for browser-specific features that agent-device does not surface, such as tab/devtools management, advanced page scripting, network routing/HAR, or raw browser debugging. + Minimal web support is for browser sessions with open, snapshot, find, get, is, click/press, fill/type, wait, network dump, screenshot, record start/stop with WebM output, close, and replay over those commands. Use agent-browser directly for browser-specific features that agent-device does not surface, such as tab/devtools management, advanced page scripting, network routing/HAR, or raw browser debugging. macOS menu bar: open ... --platform macos --surface menubar; snapshot -i --platform macos --surface menubar. Maestro full-suite validation on explicit connected devices uses one test command with a comma-separated --device list and --shard-all. Use --shard-split only when splitting suite entries across devices: agent-device test ./e2e/maestro --maestro --device udid1,emulator-5554 --shard-all 2 @@ -753,14 +755,16 @@ First-slice loop: agent-device click @e12 --platform web agent-device fill @e13 "qa@example.com" --platform web agent-device wait text "Welcome" 3000 --platform web + agent-device record start ./artifacts/web-flow.webm --platform web agent-device network dump 25 --include headers --platform web agent-device screenshot ./artifacts/web-home.png --platform web agent-device screenshot ./artifacts/web-full.png --platform web --fullscreen agent-device viewport 1280 900 --platform web + agent-device record stop --platform web agent-device close --platform web Supported in agent-device web sessions: - open , snapshot -i, get text/attrs, is visible/exists/text, find text/selector, click/press @ref or selector, fill/type @ref or selector, wait text/selector, network dump, screenshot, close, and replay scripts made from those commands. + open , snapshot -i, get text/attrs, is visible/exists/text, find text/selector, click/press @ref or selector, fill/type @ref or selector, wait text/selector, network dump, screenshot, record start/stop with WebM output, close, and replay scripts made from those commands. Out of scope for agent-device web support: Browser runtime debugging, tabs/windows/devtools control, network routing/interception/HAR, storage/cookie management, arbitrary page scripting, downloads/uploads, multi-page orchestration, and agent-browser-specific diagnostics. Use agent-browser directly for those browser-specific workflows. diff --git a/test/integration/provider-scenarios/remote-daemon-client.test.ts b/test/integration/provider-scenarios/remote-daemon-client.test.ts index bc80b2c3b..804babd86 100644 --- a/test/integration/provider-scenarios/remote-daemon-client.test.ts +++ b/test/integration/provider-scenarios/remote-daemon-client.test.ts @@ -5,6 +5,7 @@ import os from 'node:os'; import path from 'node:path'; import { test } from 'vitest'; import { createAgentDeviceClient } from '../../../src/client.ts'; +import { prepareRemoteRequestArtifacts } from '../../../src/daemon-artifacts.ts'; import { createDaemonProxyServer } from '../../../src/daemon-proxy.ts'; import { normalizeAgentDeviceError } from '../../../src/utils/errors.ts'; import { @@ -590,6 +591,59 @@ test('Provider-backed integration remote daemon client materializes artifacts an } }); +test('remote web recording defaults client and daemon artifact paths to WebM', async () => { + const prepared = await prepareRemoteRequestArtifacts( + { + session: 'default', + command: 'record', + positionals: ['start'], + flags: { platform: 'web' }, + meta: { cwd: '/tmp/project' }, + }, + { baseUrl: 'http://127.0.0.1:1', token: 'remote-token' }, + ); + + assert.equal(prepared.positionals[0], 'start'); + assert.match(String(prepared.positionals[1] ?? ''), /^\/tmp\/agent-device-recording-.*\.webm$/); + assert.match( + prepared.clientArtifactPaths?.outPath ?? '', + /^\/tmp\/project\/recording-\d+\.webm$/, + ); +}); + +test('remote web recording appends WebM extension to extensionless client paths', async () => { + const prepared = await prepareRemoteRequestArtifacts( + { + session: 'default', + command: 'record', + positionals: ['start', 'recording'], + flags: { platform: 'web' }, + meta: { cwd: '/tmp/project' }, + }, + { baseUrl: 'http://127.0.0.1:1', token: 'remote-token' }, + ); + + assert.equal(prepared.positionals[0], 'start'); + assert.match(String(prepared.positionals[1] ?? ''), /^\/tmp\/agent-device-recording-.*\.webm$/); + assert.equal(prepared.clientArtifactPaths?.outPath, '/tmp/project/recording.webm'); +}); + +test('remote recording without platform or requested path lets daemon choose session-specific default', async () => { + const prepared = await prepareRemoteRequestArtifacts( + { + session: 'default', + command: 'record', + positionals: ['start'], + flags: {}, + meta: { cwd: '/tmp/project' }, + }, + { baseUrl: 'http://127.0.0.1:1', token: 'remote-token' }, + ); + + assert.deepEqual(prepared.positionals, ['start']); + assert.equal(prepared.clientArtifactPaths, undefined); +}); + test('Provider-backed integration daemon proxy forwards remote client RPC commands', async (t) => { if (await skipWhenLoopbackUnavailable(t, 'daemon proxy integration coverage')) { return; diff --git a/test/integration/provider-scenarios/web-desktop.test.ts b/test/integration/provider-scenarios/web-desktop.test.ts index b0149f492..f84d348ae 100644 --- a/test/integration/provider-scenarios/web-desktop.test.ts +++ b/test/integration/provider-scenarios/web-desktop.test.ts @@ -15,6 +15,10 @@ test('Provider-backed integration web desktop flow uses semantic web provider ca 'agent-device-provider-scenario-web', 'png', ); + const recordingPath = createProviderScenarioTempPath( + 'agent-device-provider-scenario-web-recording', + 'webm', + ); try { const devices = await daemon.client().devices.list({ platform: 'web' }); @@ -30,6 +34,12 @@ test('Provider-backed integration web desktop flow uses semantic web provider ca positionals: [WEB_URL], flags: { platform: 'web' }, }, + { + name: 'start web recording', + command: 'record', + positionals: ['start', recordingPath], + expectData: { recording: 'started', outPath: recordingPath }, + }, { name: 'capture interactive web snapshot', command: 'snapshot', @@ -123,6 +133,12 @@ test('Provider-backed integration web desktop flow uses semantic web provider ca assertPngFile(screenshotPath); }, }, + { + name: 'stop web recording', + command: 'record', + positionals: ['stop'], + expectData: { recording: 'stopped', outPath: recordingPath }, + }, ]); const actions = daemon.session()?.actions ?? []; @@ -158,6 +174,7 @@ test('Provider-backed integration web desktop flow uses semantic web provider ca assert.equal(close.statusCode, 200, JSON.stringify(close.json)); assertFlatToolCall(semanticCalls, ['web', 'open', WEB_URL, '']); + assertFlatToolCall(semanticCalls, ['web', 'recordStart', recordingPath]); assertFlatToolCall(semanticCalls, ['web', 'snapshot', 'true', '']); assertFlatToolCall(semanticCalls, ['web', 'clickRef', '@e4']); assertFlatToolCall(semanticCalls, ['web', 'fillRef', '@e3', 'qa@example.test', '1']); @@ -172,9 +189,11 @@ test('Provider-backed integration web desktop flow uses semantic web provider ca 'false', 'app', ]); + assertFlatToolCall(semanticCalls, ['web', 'recordStop']); assertFlatToolCall(semanticCalls, ['web', 'close', WEB_URL]); } finally { fs.rmSync(screenshotPath, { force: true }); + fs.rmSync(recordingPath, { force: true }); } }); }, 10_000); diff --git a/test/integration/provider-scenarios/web-world.ts b/test/integration/provider-scenarios/web-world.ts index 9d7baf7d7..dc51dec2f 100644 --- a/test/integration/provider-scenarios/web-world.ts +++ b/test/integration/provider-scenarios/web-world.ts @@ -40,6 +40,13 @@ export async function createWebDesktopWorld(): Promise { close: async (target) => { semanticCalls.push(['web', 'close', target ?? '']); }, + startRecording: async (outPath) => { + semanticCalls.push(['web', 'recordStart', outPath]); + fs.writeFileSync(outPath, 'webm'); + }, + stopRecording: async () => { + semanticCalls.push(['web', 'recordStop']); + }, snapshot: async (options) => { semanticCalls.push([ 'web',