From a0fc6e236f3b0416ee819ef9fb3a2508142ef27e Mon Sep 17 00:00:00 2001 From: yinanli1917-cloud Date: Tue, 24 Mar 2026 01:46:21 -0700 Subject: [PATCH] fix(browse): use launchPersistentContext for working extension support + cookie persistence The original extension support used browser.newContext() which creates an isolated context where extensions are invisible. Extensions only run in the default browser context. - Use launchPersistentContext() when BROWSE_EXTENSIONS_DIR is set - Use --headless=new instead of off-screen window positioning (no popup) - Strip --disable-extensions, --enable-automation via ignoreDefaultArgs - Add cookie persistence to ~/.gstack/browse-cookies.json - Document in BROWSER.md 375/375 browse unit tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- BROWSER.md | 22 ++++++++++ browse/src/browser-manager.ts | 80 ++++++++++++++++++++++++----------- browse/src/config.ts | 2 + browse/src/server.ts | 35 ++++++++++++++- 4 files changed, 114 insertions(+), 25 deletions(-) diff --git a/BROWSER.md b/BROWSER.md index 086d2278b..e46c1216d 100644 --- a/BROWSER.md +++ b/BROWSER.md @@ -114,6 +114,28 @@ Mutual exclusion: `--clip` + selector and `--viewport` + `--clip` both throw err Each server session generates a random UUID as a bearer token. The token is written to the state file (`.gstack/browse.json`) with chmod 600. Every HTTP request must include `Authorization: Bearer `. This prevents other processes on the machine from controlling the browser. +### Chrome extension support + +Set `BROWSE_EXTENSIONS_DIR` to an unpacked extension directory to load it into the browser. The server uses `launchPersistentContext()` with `--headless=new` so extensions run in the default browser context (where they actually work) while remaining fully headless (no visible window). + +```bash +export BROWSE_EXTENSIONS_DIR="/path/to/extension" +$B goto "https://example.com" # extension's content scripts run on the page +``` + +Key details: +- Extensions require Playwright's **persistent context** — the standard `browser.newContext()` creates an isolated context where extensions are invisible +- `--headless=new` (Chromium 112+) runs the full browser engine headless with extension support +- Playwright's `--disable-extensions` and `--enable-automation` flags are stripped via `ignoreDefaultArgs` so extensions load and `navigator.webdriver` returns `false` +- Works with Manifest V3 extensions (service workers, declarativeNetRequest) +- Effective against simple paywalls (cookie/header-based). Sites with DataDome/Cloudflare bot detection block Playwright regardless of extensions + +### Cookie persistence + +Browser cookies are automatically persisted to `~/.gstack/browse-cookies.json` (global, survives binary updates). Cookies are saved on graceful shutdown and every 5 minutes, and restored on the next server start. + +This means `cookie-import-browser` is a **one-time operation** — imported cookies survive all daemon restarts. There is no need to re-import cookies after a restart or update. + ### Console, network, and dialog capture The server hooks into Playwright's `page.on('console')`, `page.on('response')`, and `page.on('dialog')` events. All entries are kept in O(1) circular buffers (50,000 capacity each) and flushed to disk asynchronously via `Bun.write()`: diff --git a/browse/src/browser-manager.ts b/browse/src/browser-manager.ts index 335ff19e1..8ec678b9a 100644 --- a/browse/src/browser-manager.ts +++ b/browse/src/browser-manager.ts @@ -62,12 +62,8 @@ export class BrowserManager { private consecutiveFailures: number = 0; async launch() { - // ─── Extension Support ──────────────────────────────────── - // BROWSE_EXTENSIONS_DIR points to an unpacked Chrome extension directory. - // Extensions only work in headed mode, so we use an off-screen window. const extensionsDir = process.env.BROWSE_EXTENSIONS_DIR; const launchArgs: string[] = []; - let useHeadless = true; // Docker/CI: Chromium sandbox requires unprivileged user namespaces which // are typically disabled in containers. Detect container environment and @@ -76,26 +72,59 @@ export class BrowserManager { launchArgs.push('--no-sandbox'); } + const contextOptions: BrowserContextOptions = { + viewport: { width: 1280, height: 720 }, + }; + if (this.customUserAgent) { + contextOptions.userAgent = this.customUserAgent; + } + if (extensionsDir) { + // ─── Extension Mode: Persistent Context ────────────────── + // Extensions ONLY work in the default browser context. + // browser.newContext() creates an isolated context where extensions are invisible. + // launchPersistentContext() uses the default context → extensions run properly. launchArgs.push( `--disable-extensions-except=${extensionsDir}`, `--load-extension=${extensionsDir}`, - '--window-position=-9999,-9999', - '--window-size=1,1', + '--headless=new', + '--disable-blink-features=AutomationControlled', + ); + const ignoreArgs = [ + '--disable-extensions', + '--enable-automation', + '--disable-component-extensions-with-background-pages', + ]; + + const userDataDir = await import('fs').then(fs => + fs.promises.mkdtemp(require('path').join(require('os').tmpdir(), 'browse-ext-')) ); - useHeadless = false; // extensions require headed mode; off-screen window simulates headless + + // launchPersistentContext returns a BrowserContext directly (not a Browser). + // headless:false tells Playwright to use the full Chromium binary (not headless shell). + // --headless=new tells Chromium itself to run headless (no visible window). + this.context = await chromium.launchPersistentContext(userDataDir, { + headless: false, + // On Windows, Chromium's sandbox fails when the server is spawned through + // the Bun→Node process chain (GitHub #276). Disable it — local daemon + // browsing user-specified URLs has marginal sandbox benefit. + chromiumSandbox: process.platform !== 'win32', + args: launchArgs, + ignoreDefaultArgs: ignoreArgs, + ...contextOptions, + }); + this.browser = this.context.browser()!; console.log(`[browse] Extensions loaded from: ${extensionsDir}`); + } else { + // ─── Standard Mode: Isolated Context ───────────────────── + this.browser = await chromium.launch({ + headless: true, + chromiumSandbox: process.platform !== 'win32', + ...(launchArgs.length > 0 ? { args: launchArgs } : {}), + }); + this.context = await this.browser.newContext(contextOptions); } - this.browser = await chromium.launch({ - headless: useHeadless, - // On Windows, Chromium's sandbox fails when the server is spawned through - // the Bun→Node process chain (GitHub #276). Disable it — local daemon - // browsing user-specified URLs has marginal sandbox benefit. - chromiumSandbox: process.platform !== 'win32', - ...(launchArgs.length > 0 ? { args: launchArgs } : {}), - }); - // Chromium crash → exit with clear message this.browser.on('disconnected', () => { console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.'); @@ -103,14 +132,6 @@ export class BrowserManager { process.exit(1); }); - const contextOptions: BrowserContextOptions = { - viewport: { width: 1280, height: 720 }, - }; - if (this.customUserAgent) { - contextOptions.userAgent = this.customUserAgent; - } - this.context = await this.browser.newContext(contextOptions); - if (Object.keys(this.extraHeaders).length > 0) { await this.context.setExtraHTTPHeaders(this.extraHeaders); } @@ -354,6 +375,17 @@ export class BrowserManager { return { cookies, pages }; } + /** + * Restore only cookies into the current context (no page recreation). + * Used on startup to restore auth state from a previous session. + */ + async restoreCookies(cookies: Cookie[]): Promise { + if (!this.context) throw new Error('Browser not launched'); + if (cookies.length > 0) { + await this.context.addCookies(cookies); + } + } + /** * Restore browser state into the current context: cookies, pages, storage. * Navigates to saved URLs, restores storage, wires page events. diff --git a/browse/src/config.ts b/browse/src/config.ts index 04f166433..8fcbf5eae 100644 --- a/browse/src/config.ts +++ b/browse/src/config.ts @@ -17,6 +17,7 @@ export interface BrowseConfig { projectDir: string; stateDir: string; stateFile: string; + storageFile: string; consoleLog: string; networkLog: string; dialogLog: string; @@ -67,6 +68,7 @@ export function resolveConfig( projectDir, stateDir, stateFile, + storageFile: path.join(process.env.HOME || '/tmp', '.gstack', 'browse-cookies.json'), consoleLog: path.join(stateDir, 'browse-console.log'), networkLog: path.join(stateDir, 'browse-network.log'), dialogLog: path.join(stateDir, 'browse-dialog.log'), diff --git a/browse/src/server.ts b/browse/src/server.ts index fe2c27cbc..e78e0139b 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -139,6 +139,23 @@ async function flushBuffers() { // Flush every 1 second const flushInterval = setInterval(flushBuffers, 1000); +// ─── Cookie/Storage Persistence ────────────────────────────── +// Persist browser cookies + localStorage to disk so they survive daemon restarts. +// Called periodically (every 5 min) and on shutdown. +async function persistStorage() { + try { + const state = await browserManager.saveState(); + if (state.cookies.length === 0) return; + const tmpFile = config.storageFile + '.tmp'; + fs.writeFileSync(tmpFile, JSON.stringify(state, null, 2), { mode: 0o600 }); + fs.renameSync(tmpFile, config.storageFile); + } catch { + // Non-fatal — best-effort persistence + } +} + +const storageFlushInterval = setInterval(persistStorage, 5 * 60_000); + // ─── Idle Timer ──────────────────────────────────────────────── let lastActivity = Date.now(); @@ -273,11 +290,15 @@ async function shutdown() { console.log('[browse] Shutting down...'); clearInterval(flushInterval); clearInterval(idleCheckInterval); + clearInterval(storageFlushInterval); await flushBuffers(); // Final flush (async now) + // Persist cookies + storage before closing browser + await persistStorage(); + await browserManager.close(); - // Clean up state file + // Clean up state file (but keep storage file for next restart) try { fs.unlinkSync(config.stateFile); } catch {} process.exit(0); @@ -306,6 +327,18 @@ async function start() { // Launch browser await browserManager.launch(); + // Restore cookies from previous session (pages are not restored — start fresh) + try { + const raw = fs.readFileSync(config.storageFile, 'utf-8'); + const saved = JSON.parse(raw); + if (saved.cookies?.length > 0) { + await browserManager.restoreCookies(saved.cookies); + console.log(`[browse] Restored ${saved.cookies.length} cookies from previous session`); + } + } catch { + // No storage file or parse error — start fresh + } + const startTime = Date.now(); const server = Bun.serve({ port,