From 2b07d7bbe4115b9cf94b9f9cb85cf1cd3e7073ea Mon Sep 17 00:00:00 2001 From: Krzysztof Zarzycki Date: Sun, 21 Jun 2026 09:38:53 +0200 Subject: [PATCH 1/2] feat(cdp): --cdp-url to attach to an already-running browser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cdp backend always launches a throwaway `--headless=new` Chrome with an ephemeral profile, so it can't see anything behind a login, and on macOS it can't self-provision a browser at all (the bundled auto-download is linux-x64 only) — leaving CHROME_PATH as the only way to render. Add `--cdp-url URL` (env: PIXELSHOT_CDP_URL). When set, the backend connects to that DevTools endpoint (e.g. http://127.0.0.1:9222), creates a fresh tab per worker, renders using the running browser's existing session (cookies/logins), and closes only the tabs it created — never touching the user's other tabs and never killing the browser. Forces the standard path (turbo needs a process we launched) and needs no local Chrome binary. The launch and attach workers share extracted `_setup_page` and `_drain_queue` helpers (they differ only in how the page ws is obtained), so the queue/capture logic isn't duplicated. Unset → existing launch behavior is unchanged. Tests: URL normalization, attach-vs-launch routing (no browser opened), env fallback, and no-regression on the default path. --- render/src/pixelrag_render/backends/cdp.py | 319 +++++++++++++++++---- render/src/pixelrag_render/render.py | 13 + tests/test_cdp_attach.py | 85 ++++++ 3 files changed, 368 insertions(+), 49 deletions(-) create mode 100644 tests/test_cdp_attach.py diff --git a/render/src/pixelrag_render/backends/cdp.py b/render/src/pixelrag_render/backends/cdp.py index 6f9f65c..3dbd50e 100644 --- a/render/src/pixelrag_render/backends/cdp.py +++ b/render/src/pixelrag_render/backends/cdp.py @@ -24,6 +24,7 @@ import io import json import logging +import os import signal import subprocess import time @@ -77,6 +78,45 @@ async def _connect_cdp(port: int, retries: int = 5, delay: float = 1.0): raise ConnectionError(f"Failed to connect to Chrome on port {port}") +def _http_base_from_cdp_url(cdp_url: str) -> str: + """Normalize a ``--cdp-url`` value to an http DevTools base ``http://host:port``. + + Accepts ``http://host:port`` (any path is ignored), ``ws://host:port/...`` + (scheme swapped to http, path dropped), or a bare ``host:port``. + """ + from urllib.parse import urlparse + + p = urlparse(cdp_url if "//" in cdp_url else f"//{cdp_url}") + netloc = p.netloc or p.path + if not netloc: + raise ValueError(f"Invalid --cdp-url: {cdp_url!r}") + return f"http://{netloc}" + + +async def _connect_ws(ws_url: str): + """Open a CDP websocket to an explicit ws URL (browser- or page-level).""" + import websockets + + return await websockets.connect( + ws_url, open_timeout=10, max_size=50 * 1024 * 1024 + ) + + +def _browser_ws_url(http_base: str) -> str: + """Fetch the browser-level CDP websocket URL from ``/json/version``.""" + data = urllib.request.urlopen(f"{http_base}/json/version", timeout=5).read() + return json.loads(data)["webSocketDebuggerUrl"] + + +def _page_ws_url_for_target(http_base: str, target_id: str) -> str: + """Resolve the page-level websocket URL for a freshly created ``targetId``.""" + data = urllib.request.urlopen(f"{http_base}/json", timeout=5).read() + for t in json.loads(data): + if t.get("id") == target_id: + return t["webSocketDebuggerUrl"] + raise ConnectionError(f"Created target {target_id} not found in /json list") + + async def _cdp_send(ws, msg_id_ref: list, method: str, params: dict | None = None): """Send a CDP command and wait for its response.""" msg_id_ref[0] += 1 @@ -294,6 +334,80 @@ async def capture_url( return len(tiles) +async def _setup_page(ws, msg_id_ref: list, viewport_w: int, tile_height: int, + wait_network_idle: bool): + """Enable the CDP domains and fix the viewport for a page ws before capture.""" + await _cdp_send(ws, msg_id_ref, "Page.enable") + if wait_network_idle: + # PerformanceObserver (used by the idle wait) needs no CDP domain, but + # enabling Network keeps resource timing reliable across navigations. + await _cdp_send(ws, msg_id_ref, "Network.enable") + await _cdp_send( + ws, + msg_id_ref, + "Emulation.setDeviceMetricsOverride", + { + "width": viewport_w, + "height": tile_height, + "deviceScaleFactor": 1, + "mobile": False, + }, + ) + + +async def _drain_queue( + ws, + msg_id_ref: list, + work_queue: asyncio.Queue, + output_dir: Path, + tile_height: int, + quality: int, + viewport_w: int, + image_format: str, + from_surface: bool, + wait_network_idle: bool, + worker_id: int, + stats: dict, + results: list, +): + """Pull URLs off the queue and capture each through ``ws`` until it's empty. + + Shared by the launch (``_worker``) and attach (``_attached_worker``) paths — + they differ only in how ``ws`` is obtained, not in how work is processed. + """ + while True: + try: + item = work_queue.get_nowait() + except asyncio.QueueEmpty: + break + + url = item["url"] + stem = item["stem"] + tile_dir = output_dir / f"{stem}.png.tiles" + + t0 = time.monotonic() + try: + n_tiles = await capture_url( + ws, + msg_id_ref, + url, + tile_dir, + tile_h=tile_height, + quality=quality, + viewport_w=viewport_w, + image_format=image_format, + from_surface=from_surface, + wait_network_idle=wait_network_idle, + ) + stats["done"] += 1 + elapsed = time.monotonic() - t0 + logger.info("[w%d] %s → %d tiles (%.1fs)", worker_id, url, n_tiles, elapsed) + results.append(tile_dir) + except Exception as e: + stats["failed"] += 1 + logger.warning("[w%d] FAIL %s: %s", worker_id, url, str(e)[:200]) + + async def _worker( chrome_path: str, port: int, @@ -326,57 +440,22 @@ async def _worker( ws = await _connect_cdp(port) msg_id_ref = [0] - await _cdp_send(ws, msg_id_ref, "Page.enable") - if wait_network_idle: - # PerformanceObserver (used by the idle wait) needs no CDP domain, but - # enabling Network keeps resource timing reliable across navigations. - await _cdp_send(ws, msg_id_ref, "Network.enable") - await _cdp_send( + await _setup_page(ws, msg_id_ref, viewport_w, tile_height, wait_network_idle) + await _drain_queue( ws, msg_id_ref, - "Emulation.setDeviceMetricsOverride", - { - "width": viewport_w, - "height": tile_height, - "deviceScaleFactor": 1, - "mobile": False, - }, + work_queue, + output_dir, + tile_height, + quality, + viewport_w, + image_format, + from_surface, + wait_network_idle, + worker_id, + stats, + results, ) - - while True: - try: - item = work_queue.get_nowait() - except asyncio.QueueEmpty: - break - - url = item["url"] - stem = item["stem"] - tile_dir = output_dir / f"{stem}.png.tiles" - - t0 = time.monotonic() - try: - n_tiles = await capture_url( - ws, - msg_id_ref, - url, - tile_dir, - tile_h=tile_height, - quality=quality, - viewport_w=viewport_w, - image_format=image_format, - from_surface=from_surface, - wait_network_idle=wait_network_idle, - ) - stats["done"] += 1 - elapsed = time.monotonic() - t0 - logger.info( - "[w%d] %s → %d tiles (%.1fs)", worker_id, url, n_tiles, elapsed - ) - results.append(tile_dir) - except Exception as e: - stats["failed"] += 1 - logger.warning("[w%d] FAIL %s: %s", worker_id, url, str(e)[:200]) - await ws.close() finally: proc.send_signal(signal.SIGTERM) @@ -461,6 +540,120 @@ async def _run_batch( return results +async def _attached_worker( + http_base: str, + browser_ws_url: str, + work_queue: asyncio.Queue, + output_dir: Path, + tile_height: int, + quality: int, + viewport_w: int, + image_format: str, + from_surface: bool, + wait_network_idle: bool, + worker_id: int, + stats: dict, + results: list, +): + """Async worker that attaches to an already-running browser over CDP. + + Mirrors ``_worker`` but, instead of launching a throwaway ``--headless`` + process, creates its own fresh tab (target) in the existing browser, drives + only that tab, and closes only that tab on teardown. The browser's profile + — cookies, logins — is whatever the running instance has, so authenticated + pages render. Never touches the user's other tabs; never kills the browser. + """ + browser_ws = await _connect_ws(browser_ws_url) + bmsg = [0] + created = await _cdp_send( + browser_ws, bmsg, "Target.createTarget", {"url": "about:blank"} + ) + target_id = created["targetId"] + try: + ws = await _connect_ws(_page_ws_url_for_target(http_base, target_id)) + msg_id_ref = [0] + + await _setup_page(ws, msg_id_ref, viewport_w, tile_height, wait_network_idle) + await _drain_queue( + ws, + msg_id_ref, + work_queue, + output_dir, + tile_height, + quality, + viewport_w, + image_format, + from_surface, + wait_network_idle, + worker_id, + stats, + results, + ) + await ws.close() + finally: + # Close only the tab we created; leave the browser and its other tabs alone. + try: + await _cdp_send( + browser_ws, bmsg, "Target.closeTarget", {"targetId": target_id} + ) + except Exception: + pass + await browser_ws.close() + + +async def _run_batch_attached( + urls: list[str], + output_dir: Path, + num_workers: int, + tile_height: int, + quality: int, + viewport_w: int, + image_format: str, + from_surface: bool, + wait_network_idle: bool, + stems: list[str] | None, + cdp_url: str, +) -> list[Path]: + http_base = _http_base_from_cdp_url(cdp_url) + browser_ws_url = _browser_ws_url(http_base) + + work_queue: asyncio.Queue = asyncio.Queue() + stem_list = _derive_stems(urls, stems) + for url, stem in zip(urls, stem_list): + work_queue.put_nowait({"url": url, "stem": stem}) + + stats = {"done": 0, "failed": 0} + results: list[Path] = [] + + # One fresh tab per worker against the single shared browser — no extra + # processes, no interference with the user's existing tabs. + actual_workers = min(num_workers, len(urls)) + workers = [ + _attached_worker( + http_base, + browser_ws_url, + work_queue, + output_dir, + tile_height, + quality, + viewport_w, + image_format, + from_surface, + wait_network_idle, + wid, + stats, + results, + ) + for wid in range(actual_workers) + ] + await asyncio.gather(*workers, return_exceptions=True) + + logger.info( + "Batch complete (attached): done=%d failed=%d", stats["done"], stats["failed"] + ) + return results + + def render_urls( urls: list[str], output_dir: str | Path, @@ -475,6 +668,7 @@ def render_urls( wait_network_idle: bool = False, turbo: bool | None = None, chrome_path: str | None = None, + cdp_url: str | None = None, ) -> list[Path]: """Render URLs to tiled images via CDP. @@ -500,7 +694,15 @@ def render_urls( force. Turbo only applies to the default capture profile (jpeg, default viewport, fromSurface, no network-idle wait); other options always use the standard path. - chrome_path: Path to Chrome binary. Auto-detected if None. + chrome_path: Path to Chrome binary. Auto-detected if None. Ignored when + ``cdp_url`` is set (no browser is launched). + cdp_url: DevTools endpoint of an already-running browser (e.g. + ``http://127.0.0.1:9222``). When set (or via the + ``PIXELSHOT_CDP_URL`` env var), pixelshot attaches to that + browser and renders in a fresh tab using its existing + session (cookies/logins) instead of launching a throwaway + headless instance. Forces the standard path (no turbo) and + needs no local Chrome binary. Returns: List of Path objects for created tile directories. @@ -511,6 +713,25 @@ def render_urls( if not urls: return [] + cdp_url = cdp_url or os.environ.get("PIXELSHOT_CDP_URL") + if cdp_url: + logger.info("Attaching to existing browser at %s", cdp_url) + return asyncio.run( + _run_batch_attached( + urls, + output_dir, + workers, + tile_height, + quality, + viewport_width, + image_format, + from_surface, + wait_network_idle, + stems, + cdp_url, + ) + ) + chrome = chrome_path or _find_chrome() # Turbo only covers fast_cdp's capture profile; anything else → standard path. diff --git a/render/src/pixelrag_render/render.py b/render/src/pixelrag_render/render.py index 35a4753..554765d 100644 --- a/render/src/pixelrag_render/render.py +++ b/render/src/pixelrag_render/render.py @@ -8,6 +8,7 @@ import argparse import logging +import os import shutil import sys from pathlib import Path @@ -290,6 +291,16 @@ def main() -> None: default=200, help="DPI for PDF rendering (default: 200).", ) + parser.add_argument( + "--cdp-url", + default=os.environ.get("PIXELSHOT_CDP_URL"), + metavar="URL", + help="Attach to an already-running Chrome/Brave DevTools endpoint " + "(e.g. http://127.0.0.1:9222) instead of launching a throwaway headless " + "browser. Renders each input in a fresh tab using that browser's existing " + "session (cookies/logins) — so authenticated pages work — then closes only " + "that tab. Needs no local Chrome binary. Env: PIXELSHOT_CDP_URL.", + ) args = parser.parse_args() output_dir = Path(args.output) @@ -322,6 +333,7 @@ def main() -> None: viewport_width=args.viewport_width, workers=args.workers, wait_network_idle=args.wait_network_idle, + cdp_url=args.cdp_url, ) results.extend(tile_dirs) @@ -344,6 +356,7 @@ def main() -> None: viewport_width=args.viewport_width, workers=1, wait_network_idle=args.wait_network_idle, + cdp_url=args.cdp_url, ) elif suffix in {".png", ".jpg", ".jpeg", ".webp"}: tile_dirs = render_file(fpath, output_dir) diff --git a/tests/test_cdp_attach.py b/tests/test_cdp_attach.py new file mode 100644 index 0000000..31c6c65 --- /dev/null +++ b/tests/test_cdp_attach.py @@ -0,0 +1,85 @@ +"""Tests for the --cdp-url attach-to-existing-browser path. + +These run on a core install (no chrome, no browser): they exercise URL +normalization and the routing logic that decides between attaching to a running +browser and launching a throwaway one — without ever opening a browser. +""" + +import sys +from pathlib import Path + +import pytest + +from pixelrag_render.backends import cdp + +_BIN = Path(sys.executable).parent + + +def test_http_base_normalization(): + f = cdp._http_base_from_cdp_url + assert f("http://127.0.0.1:9222") == "http://127.0.0.1:9222" + assert f("http://127.0.0.1:9222/json/version") == "http://127.0.0.1:9222" + assert f("127.0.0.1:9222") == "http://127.0.0.1:9222" + assert f("ws://localhost:9222/devtools/browser/abc") == "http://localhost:9222" + + +def test_cdp_url_routes_to_attach_without_launching(monkeypatch, tmp_path): + """With cdp_url set, render_urls must take the attach path and never call + _find_chrome (i.e. never try to launch/auto-install a browser).""" + called = {} + + async def fake_attached(urls, output_dir, *a, **kw): + called["attached"] = list(urls) + return [Path(output_dir) / "x.png.tiles"] + + def boom(): + raise AssertionError("_find_chrome must not run on the attach path") + + monkeypatch.setattr(cdp, "_run_batch_attached", fake_attached) + monkeypatch.setattr(cdp, "_find_chrome", boom) + + out = cdp.render_urls( + ["https://example.com"], tmp_path, cdp_url="http://127.0.0.1:9222" + ) + assert called["attached"] == ["https://example.com"] + assert out and out[0].name == "x.png.tiles" + + +def test_env_var_fallback_routes_to_attach(monkeypatch, tmp_path): + called = {} + + async def fake_attached(urls, output_dir, *a, **kw): + called["hit"] = True + return [] + + monkeypatch.setattr(cdp, "_run_batch_attached", fake_attached) + monkeypatch.setattr( + cdp, "_find_chrome", lambda: pytest.fail("should not launch") + ) + monkeypatch.setenv("PIXELSHOT_CDP_URL", "http://127.0.0.1:9222") + + cdp.render_urls(["https://example.com"], tmp_path) + assert called.get("hit") is True + + +def test_default_path_still_resolves_chrome(monkeypatch, tmp_path): + """No cdp_url (and no env) → the launch path runs find_chrome as before.""" + monkeypatch.delenv("PIXELSHOT_CDP_URL", raising=False) + sentinel = RuntimeError("find_chrome reached") + + def boom(): + raise sentinel + + monkeypatch.setattr(cdp, "_find_chrome", boom) + with pytest.raises(RuntimeError, match="find_chrome reached"): + cdp.render_urls(["https://example.com"], tmp_path) + + +def test_cli_help_exposes_cdp_url(): + import subprocess + + r = subprocess.run( + [str(_BIN / "pixelshot"), "--help"], capture_output=True, text=True + ) + assert r.returncode == 0 + assert "--cdp-url" in r.stdout From b801830884730f348739fa630a40d7edf4bede6a Mon Sep 17 00:00:00 2001 From: Krzysztof Zarzycki Date: Sun, 21 Jun 2026 10:05:11 +0200 Subject: [PATCH 2/2] fix(cdp): harden --cdp-url attach error paths; test target lifecycle Addresses review feedback on the attach path: - _fetch_json maps connection failures to a clear "Could not reach CDP endpoint at " RuntimeError instead of a raw URLError/KeyError traceback. - _page_ws_url_for_target retries the /json lookup (a freshly created target can momentarily be absent) and runs the blocking HTTP fetch via asyncio.to_thread so it doesn't block the event loop. - _attached_worker moves Target.createTarget inside the try and guards closeTarget on target_id, so the browser ws is always closed and a failed create can't orphan a tab. Tests: mocked-ws test asserts only the self-created target is closed (never a pre-existing one, never Browser.close), and that a bad endpoint raises a clean RuntimeError. --- render/src/pixelrag_render/backends/cdp.py | 82 ++++++++++++++------ tests/test_cdp_attach.py | 88 ++++++++++++++++++++++ 2 files changed, 147 insertions(+), 23 deletions(-) diff --git a/render/src/pixelrag_render/backends/cdp.py b/render/src/pixelrag_render/backends/cdp.py index 3dbd50e..8f108fb 100644 --- a/render/src/pixelrag_render/backends/cdp.py +++ b/render/src/pixelrag_render/backends/cdp.py @@ -102,19 +102,49 @@ async def _connect_ws(ws_url: str): ) -def _browser_ws_url(http_base: str) -> str: - """Fetch the browser-level CDP websocket URL from ``/json/version``.""" - data = urllib.request.urlopen(f"{http_base}/json/version", timeout=5).read() - return json.loads(data)["webSocketDebuggerUrl"] +def _fetch_json(url: str, cdp_url: str, timeout: float = 5): + """GET ``url`` and parse JSON, mapping connection failures to a clear error. + ``cdp_url`` is the user-facing endpoint, used only for the message so a bad + or unreachable ``--cdp-url`` surfaces an actionable error instead of a raw + URLError traceback. + """ + try: + data = urllib.request.urlopen(url, timeout=timeout).read() + return json.loads(data) + except Exception as e: + raise RuntimeError(f"Could not reach CDP endpoint at {cdp_url}: {e}") from e -def _page_ws_url_for_target(http_base: str, target_id: str) -> str: - """Resolve the page-level websocket URL for a freshly created ``targetId``.""" - data = urllib.request.urlopen(f"{http_base}/json", timeout=5).read() - for t in json.loads(data): - if t.get("id") == target_id: - return t["webSocketDebuggerUrl"] - raise ConnectionError(f"Created target {target_id} not found in /json list") + +def _browser_ws_url(http_base: str, cdp_url: str) -> str: + """Fetch the browser-level CDP websocket URL from ``/json/version``.""" + info = _fetch_json(f"{http_base}/json/version", cdp_url) + try: + return info["webSocketDebuggerUrl"] + except (KeyError, TypeError) as e: + raise RuntimeError( + f"Could not reach CDP endpoint at {cdp_url}: " + f"unexpected /json/version response (no webSocketDebuggerUrl)" + ) from e + + +async def _page_ws_url_for_target( + http_base: str, target_id: str, cdp_url: str, retries: int = 5, delay: float = 0.5 +) -> str: + """Resolve the page-level websocket URL for a freshly created ``targetId``. + + A freshly created target can momentarily be absent from ``/json``, so poll a + few times (mirroring ``_connect_cdp``'s retry) before giving up. The blocking + HTTP fetch runs in a thread so it doesn't block the event loop. + """ + for attempt in range(retries): + targets = await asyncio.to_thread(_fetch_json, f"{http_base}/json", cdp_url) + for t in targets: + if t.get("id") == target_id: + return t["webSocketDebuggerUrl"] + if attempt < retries - 1: + await asyncio.sleep(delay) + raise RuntimeError(f"Created target {target_id} not found in /json list") async def _cdp_send(ws, msg_id_ref: list, method: str, params: dict | None = None): @@ -543,6 +573,7 @@ async def _run_batch( async def _attached_worker( http_base: str, browser_ws_url: str, + cdp_url: str, work_queue: asyncio.Queue, output_dir: Path, tile_height: int, @@ -565,12 +596,15 @@ async def _attached_worker( """ browser_ws = await _connect_ws(browser_ws_url) bmsg = [0] - created = await _cdp_send( - browser_ws, bmsg, "Target.createTarget", {"url": "about:blank"} - ) - target_id = created["targetId"] + target_id = None try: - ws = await _connect_ws(_page_ws_url_for_target(http_base, target_id)) + created = await _cdp_send( + browser_ws, bmsg, "Target.createTarget", {"url": "about:blank"} + ) + target_id = created["targetId"] + ws = await _connect_ws( + await _page_ws_url_for_target(http_base, target_id, cdp_url) + ) msg_id_ref = [0] await _setup_page(ws, msg_id_ref, viewport_w, tile_height, wait_network_idle) @@ -592,12 +626,13 @@ async def _attached_worker( await ws.close() finally: # Close only the tab we created; leave the browser and its other tabs alone. - try: - await _cdp_send( - browser_ws, bmsg, "Target.closeTarget", {"targetId": target_id} - ) - except Exception: - pass + if target_id is not None: + try: + await _cdp_send( + browser_ws, bmsg, "Target.closeTarget", {"targetId": target_id} + ) + except Exception: + pass await browser_ws.close() @@ -615,7 +650,7 @@ async def _run_batch_attached( cdp_url: str, ) -> list[Path]: http_base = _http_base_from_cdp_url(cdp_url) - browser_ws_url = _browser_ws_url(http_base) + browser_ws_url = _browser_ws_url(http_base, cdp_url) work_queue: asyncio.Queue = asyncio.Queue() stem_list = _derive_stems(urls, stems) @@ -632,6 +667,7 @@ async def _run_batch_attached( _attached_worker( http_base, browser_ws_url, + cdp_url, work_queue, output_dir, tile_height, diff --git a/tests/test_cdp_attach.py b/tests/test_cdp_attach.py index 31c6c65..11d0ddc 100644 --- a/tests/test_cdp_attach.py +++ b/tests/test_cdp_attach.py @@ -5,6 +5,7 @@ browser and launching a throwaway one — without ever opening a browser. """ +import json import sys from pathlib import Path @@ -75,6 +76,93 @@ def boom(): cdp.render_urls(["https://example.com"], tmp_path) +def test_attach_creates_and_closes_only_its_own_target(monkeypatch, tmp_path): + """The attach path must create its own target, and on teardown close ONLY + that target — never a pre-existing one, and never close/kill the browser. + + Mocks the websocket/CDP layer so no browser is needed: a fake ws records + every CDP method sent on the browser-level connection. + """ + browser_methods = [] # (method, params) sent on the browser ws + + class FakeBrowserWS: + async def send(self, raw): + msg = json.loads(raw) + browser_methods.append((msg["method"], msg.get("params", {}))) + method = msg["method"] + result = {} + if method == "Target.createTarget": + result = {"targetId": "OUR-TARGET-123"} + self._reply = {"id": msg["id"], "result": result} + + async def recv(self): + return json.dumps(self._reply) + + async def close(self): + pass + + class FakePageWS: + async def send(self, raw): + msg = json.loads(raw) + self._reply = {"id": msg["id"], "result": {}} + + async def recv(self): + return json.dumps(self._reply) + + async def close(self): + pass + + async def fake_connect_ws(ws_url): + return FakeBrowserWS() if ws_url == "BROWSER_WS" else FakePageWS() + + # Pre-existing target plus the one we create — only ours must be closed. + def fake_fetch_json(url, cdp_url, timeout=5): + if url.endswith("/json/version"): + return {"webSocketDebuggerUrl": "BROWSER_WS"} + return [ + {"id": "PREEXISTING-999", "webSocketDebuggerUrl": "ws://other"}, + {"id": "OUR-TARGET-123", "webSocketDebuggerUrl": "PAGE_WS"}, + ] + + async def fake_capture_url(*a, **kw): + return 1 + + monkeypatch.setattr(cdp, "_connect_ws", fake_connect_ws) + monkeypatch.setattr(cdp, "_fetch_json", fake_fetch_json) + monkeypatch.setattr(cdp, "capture_url", fake_capture_url) + + out = cdp.render_urls( + ["https://example.com"], tmp_path, cdp_url="http://127.0.0.1:9222" + ) + assert out and out[0].name.endswith(".png.tiles") + + methods = [m for m, _ in browser_methods] + assert "Target.createTarget" in methods + assert "Target.closeTarget" in methods + # Never closed/killed the browser. + assert "Browser.close" not in methods + + # closeTarget targeted ONLY our own created target, never the pre-existing one. + closed = [p["targetId"] for m, p in browser_methods if m == "Target.closeTarget"] + assert closed == ["OUR-TARGET-123"] + + +def test_attach_bad_cdp_url_raises_clean_error(monkeypatch, tmp_path): + """An unreachable/bad endpoint surfaces a clear RuntimeError, not a raw + URLError/KeyError traceback.""" + import urllib.error + + def boom(url, timeout=5): + raise urllib.error.URLError("connection refused") + + monkeypatch.setattr(cdp.urllib.request, "urlopen", boom) + + with pytest.raises(RuntimeError, match="Could not reach CDP endpoint at"): + cdp.render_urls( + ["https://example.com"], tmp_path, cdp_url="http://127.0.0.1:9999" + ) + + def test_cli_help_exposes_cdp_url(): import subprocess