From 6545389ff5c6225c635e1c1553e0d792be8f9c3e Mon Sep 17 00:00:00 2001
From: David Corvoysier <david@huggingface.co>
Date: Thu, 25 Jun 2026 15:11:57 +0000
Subject: [PATCH 1/2] chore: remove the Python implementation

The Rust crate now covers the whole pipeline (run, proxy, sandbox, drivers,
export, inspect, ls), so retire the Python package and its tooling:

- delete src/agentcap/, pyproject.toml, and the pytest suite (tests/*.py)
- drop the Python CI workflows (linux-live-tests.yml, linux-non-live-tests.yml);
  the Rust Test / Test - Live / Build - Release workflows replace them
- prune Python-only entries from .gitignore
- fix the crate doc and a few comments that still described the Python half

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/linux-live-tests.yml      |   60 -
 .github/workflows/linux-non-live-tests.yml  |   43 -
 .github/workflows/live.yml                  |    4 +-
 .github/workflows/test.yml                  |    2 +-
 .gitignore                                  |   11 +-
 pyproject.toml                              |   54 -
 src/agentcap/__init__.py                    |    3 -
 src/agentcap/__main__.py                    | 2684 -------------------
 src/agentcap/captures.py                    |  190 --
 src/agentcap/drivers/__init__.py            |  174 --
 src/agentcap/drivers/goose.py               |  125 -
 src/agentcap/drivers/hermes.py              |  204 --
 src/agentcap/drivers/opencode.py            |  236 --
 src/agentcap/drivers/pi.py                  |  183 --
 src/agentcap/export.py                      |  686 -----
 src/agentcap/followups/__init__.py          |   59 -
 src/agentcap/followups/continue_.py         |   15 -
 src/agentcap/followups/synthesized.py       |  127 -
 src/agentcap/followups/templates.py         |   28 -
 src/agentcap/orchestrator.py                |  220 --
 src/agentcap/provider.py                    |  144 -
 src/agentcap/proxy.py                       |  415 ---
 src/agentcap/sandbox/__init__.py            |  116 -
 src/agentcap/sandbox/podman.py              |  189 --
 src/agentcap/sandbox/podman_provisioning.py |  207 --
 src/agentcap/scan.py                        |  247 --
 src/lib.rs                                  |    6 +-
 src/provider.rs                             |    8 +-
 tests/__init__.py                           |    0
 tests/conftest.py                           |  514 ----
 tests/fixtures/__init__.py                  |    0
 tests/fixtures/sandbox_images.py            |  134 -
 tests/live.rs                               |    9 +-
 tests/test_captures.py                      |  127 -
 tests/test_cli.py                           |  451 ----
 tests/test_cli_live.py                      |  106 -
 tests/test_drivers.py                       |  239 --
 tests/test_drivers_live.py                  |  110 -
 tests/test_export.py                        |  541 ----
 tests/test_followups.py                     |  186 --
 tests/test_inspect_helpers.py               |  158 --
 tests/test_orchestrator.py                  |  292 --
 tests/test_podman_sandbox.py                |  229 --
 tests/test_provider.py                      |  108 -
 tests/test_proxy.py                         |  327 ---
 tests/test_proxy_http.py                    |  276 --
 tests/test_proxy_meta.py                    |  250 --
 tests/test_sandbox.py                       |   21 -
 tests/test_scan.py                          |  251 --
 49 files changed, 14 insertions(+), 10755 deletions(-)
 delete mode 100644 .github/workflows/linux-live-tests.yml
 delete mode 100644 .github/workflows/linux-non-live-tests.yml
 delete mode 100644 pyproject.toml
 delete mode 100644 src/agentcap/__init__.py
 delete mode 100644 src/agentcap/__main__.py
 delete mode 100644 src/agentcap/captures.py
 delete mode 100644 src/agentcap/drivers/__init__.py
 delete mode 100644 src/agentcap/drivers/goose.py
 delete mode 100644 src/agentcap/drivers/hermes.py
 delete mode 100644 src/agentcap/drivers/opencode.py
 delete mode 100644 src/agentcap/drivers/pi.py
 delete mode 100644 src/agentcap/export.py
 delete mode 100644 src/agentcap/followups/__init__.py
 delete mode 100644 src/agentcap/followups/continue_.py
 delete mode 100644 src/agentcap/followups/synthesized.py
 delete mode 100644 src/agentcap/followups/templates.py
 delete mode 100644 src/agentcap/orchestrator.py
 delete mode 100644 src/agentcap/provider.py
 delete mode 100644 src/agentcap/proxy.py
 delete mode 100644 src/agentcap/sandbox/__init__.py
 delete mode 100644 src/agentcap/sandbox/podman.py
 delete mode 100644 src/agentcap/sandbox/podman_provisioning.py
 delete mode 100644 src/agentcap/scan.py
 delete mode 100644 tests/__init__.py
 delete mode 100644 tests/conftest.py
 delete mode 100644 tests/fixtures/__init__.py
 delete mode 100644 tests/fixtures/sandbox_images.py
 delete mode 100644 tests/test_captures.py
 delete mode 100644 tests/test_cli.py
 delete mode 100644 tests/test_cli_live.py
 delete mode 100644 tests/test_drivers.py
 delete mode 100644 tests/test_drivers_live.py
 delete mode 100644 tests/test_export.py
 delete mode 100644 tests/test_followups.py
 delete mode 100644 tests/test_inspect_helpers.py
 delete mode 100644 tests/test_orchestrator.py
 delete mode 100644 tests/test_podman_sandbox.py
 delete mode 100644 tests/test_provider.py
 delete mode 100644 tests/test_proxy.py
 delete mode 100644 tests/test_proxy_http.py
 delete mode 100644 tests/test_proxy_meta.py
 delete mode 100644 tests/test_sandbox.py
 delete mode 100644 tests/test_scan.py

diff --git a/.github/workflows/linux-live-tests.yml b/.github/workflows/linux-live-tests.yml
deleted file mode 100644
index d2a9465..0000000
--- a/.github/workflows/linux-live-tests.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-name: linux-live-tests
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-jobs:
-  tests:
-    # Live end-to-end tests: build per-agent sandbox images, spawn
-    # the llama.cpp server as a sibling podman container, and run
-    # real agent CLIs against it. Failures here block merges — these
-    # are the only tests that exercise the full sandbox + proxy +
-    # agent stack end-to-end.
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6.0.3
-      - uses: actions/setup-python@v6
-        with:
-          python-version: "3.12"
-          cache: pip
-      - name: Install system deps (podman)
-        # Required by ``agentcap.sandbox.podman``. Ships in Ubuntu's
-        # default apt sources; rootless mode uses the runner user.
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y podman
-      - name: Install Python deps
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e '.[dev]'
-      - name: Cache GGUF weights
-        # The default test GGUF (Qwen3-1.7B Q8_0, ~1.8 GB) lands in
-        # the HF hub cache. Caching it keeps live runs cheap after
-        # the first download.
-        uses: actions/cache@v5
-        with:
-          path: ~/.cache/huggingface
-          key: hf-hub-gguf-${{ hashFiles('tests/conftest.py') }}
-          restore-keys: |
-            hf-hub-gguf-
-      - name: Cache sandbox images
-        # Per-agent sandbox images (hermes is the heaviest at ~900 MB)
-        # are built on first use and stamped with a hash label. Caching
-        # the rootless containers/storage between runs lets
-        # ``ensure_image`` short-circuit on hash match.
-        uses: actions/cache@v5
-        with:
-          path: ~/.local/share/containers
-          key: sandbox-images-${{ hashFiles('containers/**') }}
-          restore-keys: |
-            sandbox-images-
-      - name: Pre-build sandbox images
-        # Idempotent build of every per-agent image. On a cache hit
-        # this is a no-op (hash match in ``ensure_image``); on a miss
-        # it builds and the next run reuses the cache.
-        run: python tests/fixtures/sandbox_images.py
-      - name: Run live tests
-        run: pytest -m live tests/ -v
diff --git a/.github/workflows/linux-non-live-tests.yml b/.github/workflows/linux-non-live-tests.yml
deleted file mode 100644
index dfda9d8..0000000
--- a/.github/workflows/linux-non-live-tests.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-name: linux-non-live-tests
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-jobs:
-  tests:
-    # Lint + unit tests on the default Ubuntu runner. The live tests
-    # live in ``linux-live-tests.yml`` and run on a GPU runner so the
-    # local ``llama serve`` doesn't stall on CPU inference.
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6.0.3
-      - uses: actions/setup-python@v6
-        with:
-          python-version: "3.12"
-          cache: pip
-      - name: Install Python deps
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e '.[dev]'
-      - name: Install trufflehog
-        # Version pinned: detector-output changes (false-positive
-        # bucket) would silently flip test_scan / test_cli scan-on
-        # assertions.
-        env:
-          TRUFFLEHOG_VERSION: 3.95.3
-          TRUFFLEHOG_SHA256: 5d836eae522540a32ca0f1a1e00efd4c3153a52462466a4b4008fac1e6c1a548
-        run: |
-          set -euo pipefail
-          cd /tmp
-          curl -sSfL -O "https://github.com/trufflesecurity/trufflehog/releases/download/v${TRUFFLEHOG_VERSION}/trufflehog_${TRUFFLEHOG_VERSION}_linux_amd64.tar.gz"
-          echo "${TRUFFLEHOG_SHA256}  trufflehog_${TRUFFLEHOG_VERSION}_linux_amd64.tar.gz" | sha256sum -c -
-          mkdir -p "$HOME/.local/bin"
-          tar -xzf "trufflehog_${TRUFFLEHOG_VERSION}_linux_amd64.tar.gz" -C "$HOME/.local/bin" trufflehog
-          echo "$HOME/.local/bin" >> "$GITHUB_PATH"
-      - name: Lint (ruff)
-        run: ruff check .
-      - name: Tests (unit only, no live)
-        run: pytest -m "not live" tests/
diff --git a/.github/workflows/live.yml b/.github/workflows/live.yml
index 7023677..f8efa3b 100644
--- a/.github/workflows/live.yml
+++ b/.github/workflows/live.yml
@@ -3,7 +3,7 @@ name: Test - Live
 # Full agent×model end-to-end: spin a real llama.cpp server, build the per-agent
 # sandbox images on demand, and drive `agentcap run` against the server for each
 # agent (pi/hermes/goose). Heavy (GGUF download + image builds + CPU inference) —
-# this is agentcap's "live" tier, the Rust port of `linux-live-tests.yml`.
+# this is agentcap's "live" tier (real agent × model, end-to-end).
 
 on:
   push:
@@ -29,7 +29,7 @@ permissions:
 env:
   CARGO_TERM_COLOR: always
   RUST_BACKTRACE: "1"
-  # Pinned to match tests/conftest.py (the proven Python live setup).
+  # Pinned to the proven live setup (Qwen3-1.7B-Q8 on a CPU llama.cpp server).
   GGUF_REPO: Qwen/Qwen3-1.7B-GGUF
   GGUF_FILE: Qwen3-1.7B-Q8_0.gguf
   LLAMA_IMAGE: ghcr.io/ggml-org/llama.cpp:server-b9487
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6549d2d..767cd0b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -2,7 +2,7 @@ name: Test - Unit & Integration
 
 # Hermetic tests only: unit tests + the loopback proxy integration test. No
 # network, podman, or model server. The full agent×model end-to-end ("live")
-# tests are a separate, resource-heavy category — see `linux-live-tests.yml`.
+# tests are a separate, resource-heavy category — see `live.yml`.
 
 on:
   pull_request:
diff --git a/.gitignore b/.gitignore
index e77b1c4..51f99e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,19 +1,10 @@
-__pycache__/
-*.pyc
-.venv/
-.venv*/
-build/
-dist/
-*.egg-info/
 runs/
 .agentcap/
 examples/*/sandbox/
-.pytest_cache/
-.ruff_cache/
 .vscode/
 .idea/
 .DS_Store
 
 # Rust
 /target/
-
+dist/
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 6e52a0e..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,54 +0,0 @@
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-
-[project]
-name = "agentcap"
-version = "0.0.1"
-description = "Run coding agents at scale across (agent × model × corpus), capture every chat-completion byte, publish as a Hugging Face dataset."
-readme = "README.md"
-requires-python = ">=3.10"
-license = "Apache-2.0"
-license-files = ["LICENSE"]
-keywords = ["llm", "agent", "capture", "dataset", "huggingface", "kv-cache"]
-authors = [
-    { name = "David Corvoysier", email = "david@huggingface.co" },
-]
-
-dependencies = [
-    "httpx>=0.27",
-    # Floor pinned to address CVE-2026-48710 (GHSA-86qp-5c8j-p5mr):
-    # Host header poisons request.url.path; path-based middleware can be bypassed.
-    "starlette>=1.0.1",
-    "uvicorn>=0.30",
-    "huggingface_hub>=1.13",  # HfApi.upload_file with repo_type="dataset"
-    "pyyaml>=6",  # used by HermesDriver to overlay context_length / base_url into config.yaml
-    "click>=8.1",
-    "pyarrow>=15",  # streaming ParquetWriter in export_local
-    "tqdm>=4.60",  # per-row progress in export_local
-]
-
-[project.optional-dependencies]
-dev = [
-    "pytest>=8",
-    "pytest-asyncio>=0.24",
-    "ruff>=0.6",
-]
-
-[project.urls]
-Homepage = "https://github.com/huggingface/agentcap"
-Repository = "https://github.com/huggingface/agentcap"
-Issues = "https://github.com/huggingface/agentcap/issues"
-
-[project.scripts]
-agentcap = "agentcap.__main__:main"
-
-[tool.hatch.build.targets.wheel]
-packages = ["src/agentcap"]
-
-[tool.pytest.ini_options]
-testpaths = ["tests"]
-markers = [
-    "integration: tests that spin up real uvicorn servers and talk over TCP loopback (slower)",
-    "live: integration tests that invoke a real agent CLI against a real model server (skipped unless agent binaries + AGENTCAP_TEST_LLM_URL or AGENTCAP_TEST_GGUF are configured)",
-]
diff --git a/src/agentcap/__init__.py b/src/agentcap/__init__.py
deleted file mode 100644
index 6b66323..0000000
--- a/src/agentcap/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""agentcap: capture LLM-agent chat-completion bytes, export as HF datasets."""
-
-__version__ = "0.0.1"
diff --git a/src/agentcap/__main__.py b/src/agentcap/__main__.py
deleted file mode 100644
index c04c472..0000000
--- a/src/agentcap/__main__.py
+++ /dev/null
@@ -1,2684 +0,0 @@
-"""CLI entrypoint. See ``agentcap --help`` for the subcommand list."""
-
-from __future__ import annotations
-
-import functools
-import os
-import sys
-from collections.abc import Sequence
-from pathlib import Path
-from urllib.parse import urlparse
-
-import click
-
-from . import __version__
-from .drivers import known_drivers as _known_drivers
-
-
-@click.group(context_settings={"help_option_names": ["-h", "--help"]})
-@click.version_option(__version__, prog_name="agentcap")
-def cli() -> None:
-    """agentcap: capture LLM-agent chat-completion bytes."""
-
-
-def _is_hf_router_upstream(upstream: str) -> bool:
-    host = (urlparse(upstream).hostname or "").lower()
-    return host == "router.huggingface.co"
-
-
-def _read_hf_token_cache() -> str | None:
-    token_path = Path.home() / ".cache" / "huggingface" / "token"
-    try:
-        token = token_path.read_text().strip()
-    except OSError:
-        return None
-    return token or None
-
-
-_WORKSPACE_DIR = ".agentcap"
-
-
-def _workspace_source() -> tuple[Path, str]:
-    """Return the workspace root (without ``.agentcap`` suffix) and a
-    short label of where the value came from. Used by error messages
-    so the user can see, verbatim, what AGENTCAP_WORKSPACE resolved
-    to — catches shell typos like ``WORKSPACE==/path`` that leave a
-    leading ``=`` in the env var value."""
-    env = os.environ.get("AGENTCAP_WORKSPACE")
-    if env is not None:
-        return Path(env), f"AGENTCAP_WORKSPACE={env!r}"
-    return Path(os.getcwd()), "cwd (AGENTCAP_WORKSPACE unset)"
-
-
-def _workspace_root() -> Path:
-    base, _ = _workspace_source()
-    return base / _WORKSPACE_DIR
-
-
-def _no_workspace_msg(workspace: Path) -> str:
-    _, src = _workspace_source()
-    return (
-        f"no workspace at {str(workspace)!r} (from {src}). "
-        f"Run `agentcap run` first, or set AGENTCAP_WORKSPACE to a "
-        f"directory that contains a ``.agentcap/`` subdir."
-    )
-
-
-def _default_workdir(agent: str, provider_slug: str) -> Path:
-    import time
-    utc = time.strftime("%Y%m%d-%H%M%S", time.gmtime())
-    slug = provider_slug.replace("/", "-")
-    return _workspace_root() / f"{agent}-{slug}-{utc}"
-
-
-def _resolve_api_key(
-    *, upstream: str, explicit_api_key: str | None
-) -> tuple[str | None, str | None]:
-    if explicit_api_key:
-        return explicit_api_key, "--api-key / AGENTCAP_API_KEY"
-    if not _is_hf_router_upstream(upstream):
-        return None, None
-
-    hf_env = (os.environ.get("HF_TOKEN") or "").strip()
-    if hf_env:
-        return hf_env, "HF_TOKEN"
-
-    cached = _read_hf_token_cache()
-    if cached:
-        return cached, "~/.cache/huggingface/token"
-
-    return None, None
-
-
-def _complete_run_ids(ctx, param, incomplete):
-    """Shell completion for workspace run-ids — always against cwd's
-    ``.agentcap/`` (inspect doesn't consult ``$AGENTCAP_WORKSPACE``)."""
-    root = Path.cwd() / _WORKSPACE_DIR
-    if not root.is_dir():
-        return []
-    return [
-        d.name for d in root.iterdir()
-        if d.is_dir() and (d / "run.json").is_file()
-        and d.name.startswith(incomplete)
-    ]
-
-
-def _complete_request_ids(ctx, param, incomplete):
-    """Shell completion for captured request-ids across cwd's workspace."""
-    root = Path.cwd() / _WORKSPACE_DIR
-    if not root.is_dir():
-        return []
-    out: list[str] = []
-    for run_dir in root.iterdir():
-        captures = run_dir / "captures"
-        if not captures.is_dir():
-            continue
-        for req in captures.glob(f"{incomplete}*.request.json"):
-            out.append(req.name.removesuffix(".request.json"))
-    return out
-
-
-@cli.command("export")
-@click.argument("targets", nargs=-1, shell_complete=_complete_run_ids)
-@click.option(
-    "--all", "all_runs", is_flag=True,
-    help="Export every run in the workspace (mutually exclusive with positional run-ids).",
-)
-@click.option(
-    "--push",
-    required=True,
-    help="``<owner>/<base>`` — the Hugging Face Collection base name. "
-    "Captures parquets land in ``<owner>/<base>-captures``, raw session "
-    "traces in ``<owner>/<base>-traces``, and both are added to a "
-    "Collection titled ``<base>`` under ``<owner>``. Repos and "
-    "Collection are created on first push.",
-)
-@click.option(
-    "--no-scan", "no_scan", is_flag=True,
-    help="Skip the pre-export trufflehog secret scan. Off by default: "
-    "any **verified** secret in a target run dir aborts the export "
-    "before any push happens.",
-)
-def export_cmd(
-    targets: tuple[str, ...], all_runs: bool, push: str, no_scan: bool,
-) -> None:
-    """Render captured runs into parquets + upload native traces, in
-    one shot. Pushes to a paired ``-captures``/``-traces`` dataset
-    grouped under a Collection. ``TARGETS`` is one or more run-ids
-    (resolved against the workspace) or paths to a workdir; ``--all``
-    exports every run in the workspace.
-    """
-    import json as _json
-
-    from .export import (
-        captures_repo_id,
-        detect_model,
-        ensure_collection,
-        parse_collection_base,
-        push_agent_traces_dataset,
-        push_captures_dataset,
-    )
-
-    try:
-        owner, base = parse_collection_base(push)
-    except ValueError as exc:
-        raise click.UsageError(str(exc))
-    if all_runs and targets:
-        raise click.UsageError("pass --all OR positional run-ids, not both")
-    if not all_runs and not targets:
-        raise click.UsageError("specify one or more run-ids/paths, or pass --all")
-
-    workspace = _workspace_root()
-    if all_runs:
-        if not workspace.is_dir():
-            raise click.UsageError(_no_workspace_msg(workspace))
-        targets = tuple(
-            d.name for d in sorted(workspace.iterdir())
-            if d.is_dir() and (d / "run.json").is_file()
-        )
-        if not targets:
-            raise click.UsageError(f"no runs in {workspace}")
-
-    def _resolve(t: str) -> tuple[Path, str | None, str]:
-        """Return (capture_dir, agent_from_run_json, run_id) for a target.
-        run_id is the run-dir basename; it labels both the captures
-        rows and the traces-dataset folder."""
-        # 1. run-id in the workspace.
-        candidate = workspace / t
-        if (candidate / "captures").is_dir():
-            agent_from = None
-            meta = candidate / "run.json"
-            if meta.is_file():
-                try:
-                    agent_from = _json.loads(meta.read_text()).get("agent")
-                except (OSError, _json.JSONDecodeError):
-                    pass
-            return candidate / "captures", agent_from, candidate.name
-        # 2. an arbitrary workdir path with captures/ subdir.
-        p = Path(t)
-        if (p / "captures").is_dir():
-            agent_from = None
-            meta = p / "run.json"
-            if meta.is_file():
-                try:
-                    agent_from = _json.loads(meta.read_text()).get("agent")
-                except (OSError, _json.JSONDecodeError):
-                    pass
-            return p / "captures", agent_from, p.name
-        # 3. a path that *is* a capture dir.
-        if p.is_dir() and any(p.glob("*.request.json")):
-            return p, None, p.parent.name
-        raise click.UsageError(f"can't resolve {t!r} to a capture dir")
-
-    cap_items: list[dict] = []
-    trace_items: list[dict] = []
-    for t in targets:
-        cap_dir, agent, run_id = _resolve(t)
-        try:
-            model = detect_model(cap_dir)
-        except ValueError as exc:
-            raise click.UsageError(str(exc))
-        if model is None:
-            if all_runs:
-                click.echo(f"  [{t}] skipped (no captures)", err=True)
-                continue
-            raise click.UsageError(
-                f"{cap_dir} has no captured requests with a model field"
-            )
-        cap_items.append({
-            "capture_dir": cap_dir, "model": model, "agent": agent,
-            "run_id": run_id,
-        })
-        # Traces dir is sibling to captures; missing/empty is fine —
-        # push_traces_dataset accepts it and just records 0 files.
-        traces_dir = cap_dir.parent / "traces"
-        trace_items.append({"traces_dir": traces_dir, "run_id": run_id})
-        n_traces = sum(1 for _ in traces_dir.iterdir()) \
-            if traces_dir.is_dir() else 0
-        click.echo(
-            f"  [{t}] (agent={agent or '?'}, model={model}, "
-            f"traces={n_traces})",
-            err=True,
-        )
-    if not cap_items:
-        raise click.UsageError("no runs with captures to export")
-
-    # Pre-export gate: refuse to push if any run carries a verified
-    # secret. Verification round-trips to each provider's API so
-    # ``verified`` is high-precision (real, live credential).
-    # Unverified hits are surfaced but don't block — pattern-only
-    # detectors hit a real false-positive rate on model output.
-    if not no_scan:
-        run_dirs = [Path(c["capture_dir"]).parent for c in cap_items]
-        n_verified = _scan_run_dirs(run_dirs, no_verification=False)
-        if n_verified > 0:
-            raise click.ClickException(
-                f"export aborted: trufflehog found {n_verified} verified "
-                "secret(s) — see output above. Inspect, redact, or pass "
-                "--no-scan to override."
-            )
-
-    cap_repo, n_rows_list = push_captures_dataset(
-        cap_items, owner=owner, base=base,
-    )
-    click.echo(
-        f"agentcap export: pushed {sum(n_rows_list)} rows across "
-        f"{len(cap_items)} run(s) -> {cap_repo}",
-        err=True,
-    )
-
-    # Group traces by agent — one dataset per agent so the Hub
-    # viewer doesn't try to merge incompatible schemas.
-    by_agent: dict[str, list[dict]] = {}
-    for cap, tr in zip(cap_items, trace_items):
-        agent_name = cap.get("agent") or "unknown"
-        n = sum(1 for _ in tr["traces_dir"].iterdir()) \
-            if tr["traces_dir"].is_dir() else 0
-        if n == 0:
-            continue
-        by_agent.setdefault(agent_name, []).append(tr)
-
-    traces_repos: list[str] = []
-    for agent_name, tr_items in sorted(by_agent.items()):
-        tr_repo, n_files = push_agent_traces_dataset(
-            tr_items, owner=owner, base=base, agent=agent_name,
-        )
-        traces_repos.append(tr_repo)
-        click.echo(
-            f"agentcap export: pushed {n_files} trace file(s) for "
-            f"{agent_name} across {len(tr_items)} run(s) -> {tr_repo}",
-            err=True,
-        )
-
-    slug = ensure_collection(
-        owner=owner, base=base,
-        repos=[captures_repo_id(owner, base), *traces_repos],
-    )
-    click.echo(
-        f"agentcap export: collection -> https://huggingface.co/collections/{slug}",
-        err=True,
-    )
-
-
-@cli.command("run")
-@click.option(
-    "--agent",
-    type=click.Choice(_known_drivers()),
-    required=True,
-    help="Agent driver to use.",
-)
-@click.option(
-    "--model",
-    default=None,
-    help="Model id the agent uses in its outbound requests (and that "
-    "the capture proxy records as the ``model`` field). Required for "
-    "all drivers — hermes used to default to its own built-in id, but "
-    "that made captures lie about which model was actually run.",
-)
-@click.option(
-    "--upstream",
-    required=True,
-    help="Base URL of the upstream model server (e.g. http://127.0.0.1:8000).",
-)
-@click.option(
-    "--api-key",
-    "api_key",
-    default=None,
-    envvar="AGENTCAP_API_KEY",
-    help="Bearer token forwarded to the upstream. Required for "
-    "authenticated providers (HF Router, OpenAI, Together, …); leave "
-    "unset for local servers that don't auth (llama serve, vLLM). "
-    "Falls back to AGENTCAP_API_KEY. For HF Router only, if unset "
-    "we also auto-try HF_TOKEN and ~/.cache/huggingface/token.",
-)
-@click.option(
-    "--sandbox",
-    "sandbox_dir",
-    default=None,
-    type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    help="Host directory exposed as the agent's cwd (bind-mounted "
-    "writable into the per-agent container). Use this when the corpus "
-    "needs the agent to see real source — e.g. a transformers git "
-    "worktree for the transformers-coding-session corpus. If omitted, "
-    "an empty ``sandbox/`` is created next to ``captures/`` under the "
-    "auto-derived run dir.",
-)
-@click.option(
-    "--skills",
-    "skills_dir",
-    default=None,
-    type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    help="Host directory containing a huggingface/skills-shaped "
-    "checkout (``agents/AGENTS.md`` + ``skills/<name>/SKILL.md``). "
-    "Bind-mounted read-only into the sandbox; the agent's "
-    "image-side entrypoint wires it into the agent's expected "
-    "discovery location (``~/.hermes/skills/`` for hermes; "
-    "``AGENTS.md`` + ``skills/`` symlinks in cwd for "
-    "opencode/goose/pi).",
-)
-@click.option(
-    "--tasks",
-    "tasks_file",
-    required=True,
-    type=click.Path(exists=True, dir_okay=False),
-    help="Plain-text file with one prompt per line (# comments + blank lines ignored).",
-)
-@click.option(
-    "--turns",
-    type=int,
-    default=1,
-    show_default=True,
-    help="Total turns per task (1 = no follow-ups).",
-)
-@click.option(
-    "--followup",
-    type=click.Choice(["continue", "templates", "synthesized"]),
-    default="continue",
-    show_default=True,
-    help="Follow-up strategy for turns 2..N.",
-)
-@click.option(
-    "--timeout",
-    type=float,
-    default=1200,
-    show_default=True,
-    help="Per-turn timeout in seconds.",
-)
-def run_cmd(
-    agent: str,
-    model: str | None,
-    upstream: str,
-    api_key: str | None,
-    sandbox_dir: str | None,
-    skills_dir: str | None,
-    tasks_file: str,
-    turns: int,
-    followup: str,
-    timeout: float,
-) -> None:
-    """Drive an agent CLI through a corpus, capture, summarise."""
-    import json
-
-    from .drivers import get_driver, traces_dump_argv_for
-    from .followups import get_followup
-    from .orchestrator import Orchestrator, read_tasks_txt
-    from .provider import _hostname_fallback, refine_for_sub_provider
-    from .proxy import serve_in_thread
-    from .sandbox import require_sandbox_or_die
-
-    if not model:
-        raise click.UsageError(
-            f"--model is required for --agent {agent}"
-        )
-
-    api_key, api_key_source = _resolve_api_key(
-        upstream=upstream,
-        explicit_api_key=api_key,
-    )
-    if api_key_source and _is_hf_router_upstream(upstream):
-        click.echo(
-            f"  [auth] HF Router token source={api_key_source}",
-            err=True,
-        )
-
-    if followup == "synthesized":
-        fu = get_followup(
-            "synthesized", upstream=upstream, model=model, api_key=api_key
-        )
-    else:
-        fu = get_followup(followup)
-
-    # --- sandbox setup: from here on, side effects.
-
-    def _sb_log(msg: str) -> None:
-        click.echo(f"  [sandbox] {msg}", err=True)
-
-    # Hostname classification — used by the sandbox env to pick the
-    # agent's credential channel (env-var auth vs no-auth) and as part
-    # of the auto-generated workdir name.
-    provider_slug = refine_for_sub_provider(
-        _hostname_fallback(upstream), model
-    )
-    click.echo(f"  [provider] {provider_slug}", err=True)
-
-    workdir_p = _default_workdir(agent, provider_slug)
-    captures = workdir_p / "captures"
-    sessions = workdir_p / "sessions"
-    traces = workdir_p / "traces"
-    state = workdir_p / "state"
-    captures.mkdir(parents=True, exist_ok=True)
-    sessions.mkdir(parents=True, exist_ok=True)
-    traces.mkdir(parents=True, exist_ok=True)
-    state.mkdir(parents=True, exist_ok=True)
-    click.echo(f"  [workdir] {workdir_p}", err=True)
-
-    # Stub run.json so ``agentcap ls/inspect/export`` can discover this
-    # run while it's still in flight. Fully overwritten with the final
-    # summary (incl. per-task durations) at end-of-run.
-    (workdir_p / "run.json").write_text(json.dumps({
-        "agent": agent,
-        "model": model,
-        "provider": provider_slug,
-        "upstream": upstream,
-        "turns_per_task": turns,
-        "followup": followup,
-        "tasks": [],
-    }, indent=2))
-
-    # Resolve --sandbox up front: it joins the bind-mount set
-    # alongside --skills (RO) and the traces dir (RW).
-    if sandbox_dir is not None:
-        sandbox_cwd = str(Path(sandbox_dir).resolve())
-    else:
-        default_sandbox = workdir_p / "sandbox"
-        default_sandbox.mkdir(parents=True, exist_ok=True)
-        sandbox_cwd = str(default_sandbox)
-
-    tasks = read_tasks_txt(tasks_file)
-    if not tasks:
-        raise click.UsageError(f"no tasks found in {tasks_file}")
-
-    def _on_event(event: str, **kw):
-        click.echo(f"  [{event}] " + " ".join(f"{k}={v}" for k, v in kw.items()), err=True)
-
-    # Bind on 0.0.0.0 so the podman container (which has its own netns)
-    # can dial in via ``host.containers.internal``. Loopback would be
-    # unreachable from the container side.
-    with serve_in_thread(upstream, captures, host="0.0.0.0") as proxy:
-        proxy_url = f"http://host.containers.internal:{proxy.port}/v1"
-        click.echo(f"  [proxy] {proxy_url}", err=True)
-
-        sandbox_env = {
-            "AGENTCAP_PROXY_URL": proxy_url,
-            "AGENTCAP_MODEL": model,
-            "AGENTCAP_PROVIDER": provider_slug,
-            "AGENTCAP_TRACES_DIR": str(traces.resolve()),
-            # State dir: SQLite-backed agents (hermes, goose, opencode)
-            # redirect their session store at it, so the .db lands on
-            # host as it's written — survives container crashes. Pi
-            # streams JSONL via the traces symlink and ignores this.
-            "AGENTCAP_STATE_DIR": str(state.resolve()),
-        }
-        if api_key:
-            sandbox_env["AGENTCAP_API_KEY"] = api_key
-        sandbox_ro: list[Path] = []
-        if skills_dir is not None:
-            skills_abs = Path(skills_dir).resolve()
-            sandbox_env["AGENTCAP_SKILLS_DIR"] = str(skills_abs)
-            sandbox_ro.append(skills_abs)
-        sandbox_rw: list[Path] = [
-            traces.resolve(),
-            state.resolve(),
-            Path(sandbox_cwd).resolve(),
-        ]
-        # First call per agent builds/boots the image; can take minutes.
-        sandbox = require_sandbox_or_die(
-            agent=agent, command="agentcap run", log=_sb_log,
-            env=sandbox_env,
-            readonly_paths=sandbox_ro,
-            writable_paths=sandbox_rw,
-        )
-
-        driver_kwargs: dict = {
-            "sandbox": sandbox, "cwd": sandbox_cwd, "model": model,
-        }
-        driver = get_driver(agent, **driver_kwargs)
-
-        click.echo(
-            f"agentcap run: {len(tasks)} tasks × {turns} turns through "
-            f"{agent} -> {upstream}",
-            err=True,
-        )
-        orch = Orchestrator(
-            driver, fu, sessions_dir=sessions, on_event=_on_event,
-            set_capture_context=proxy.set_context,
-        )
-
-        try:
-            results = orch.run_corpus(
-                tasks, turns_per_task=turns, timeout=timeout,
-            )
-        finally:
-            # Dump SQLite-stored sessions to AGENTCAP_TRACES_DIR for
-            # agents whose images ship a ``dump-traces`` script
-            # (goose, opencode). No-op for symlink-style agents
-            # (pi, hermes) — their transcripts already streamed to
-            # the host. Failure is logged but never aborts the run.
-            dump_argv = traces_dump_argv_for(agent)
-            if dump_argv is not None:
-                try:
-                    r = sandbox.run(
-                        dump_argv,
-                        env=sandbox_env,
-                        cwd=sandbox_cwd,
-                        timeout=600,
-                    )
-                    if r.returncode != 0:
-                        click.echo(
-                            f"  [traces] dump-traces rc={r.returncode}",
-                            err=True,
-                        )
-                except Exception as exc:
-                    click.echo(f"  [traces] dump-traces failed: {exc}", err=True)
-            close = getattr(driver, "close", None)
-            if callable(close):
-                close()
-            sb_close = getattr(sandbox, "close", None)
-            if callable(sb_close):
-                sb_close()
-
-    summary = {
-        "agent": agent,
-        "model": model,
-        "provider": provider_slug,
-        "upstream": upstream,
-        "turns_per_task": turns,
-        "followup": followup,
-        "tasks": [
-            {
-                "task_id": r.task_id,
-                "prompt": r.prompt,
-                "session_id": r.session_id,
-                "completed_turns": r.completed_turns,
-                "turns": [
-                    {
-                        "turn": t.turn,
-                        "returncode": t.returncode,
-                        "duration_s": round(t.duration_s, 3),
-                    }
-                    for t in r.turns
-                ],
-            }
-            for r in results
-        ],
-    }
-    (workdir_p / "run.json").write_text(json.dumps(summary, indent=2))
-    n_ok = sum(1 for r in results if r.completed_turns == turns)
-    click.echo(
-        f"agentcap run: {n_ok}/{len(results)} tasks completed all {turns} turns; "
-        f"summary -> {workdir_p / 'run.json'}",
-        err=True,
-    )
-
-
-def _scan_run_dirs(
-    run_dirs: list[Path],
-    *,
-    no_verification: bool = False,
-    rescan: bool = False,
-) -> int:
-    """Run trufflehog over each run dir; print a per-run summary.
-    Returns the total count of **verified** hits across all runs.
-    Unverified hits are listed but never abort the caller —
-    Trufflehog's pattern matchers have a real false-positive rate.
-
-    Persists results to ``<run_dir>/scan.json`` so repeat scans skip
-    the verification round-trips. Pass ``rescan=True`` to force a
-    fresh scan."""
-    from collections import Counter
-
-    from .scan import TrufflehogMissingError, scan_run_dir
-
-    total_verified = 0
-    for run_dir in run_dirs:
-        try:
-            result, was_cached = scan_run_dir(
-                run_dir,
-                no_verification=no_verification,
-                rescan=rescan,
-            )
-        except TrufflehogMissingError as exc:
-            raise click.ClickException(str(exc))
-        n_unver = len(result.unverified)
-        n_ver = len(result.verified)
-        total_verified += n_ver
-        cache_tag = " (cached)" if was_cached else ""
-        click.echo(
-            f"  [scan] {run_dir.name}{cache_tag}: "
-            f"{result.chunks_scanned} chunks / {result.bytes_scanned} bytes; "
-            f"verified={n_ver} unverified={n_unver}",
-            err=True,
-        )
-        # Verified hits are rare + actionable — list each one.
-        for hit in result.verified:
-            click.echo(
-                f"    VERIFIED  {hit.detector}  {hit.file}",
-                err=True,
-            )
-        # Unverified hits are usually pattern-only false positives
-        # (Box matches any 32-char alphanumeric, Mailgun any 32-hex,
-        # …). Summarise by detector instead of dumping every line;
-        # per-hit detail lives in ``<run_dir>/scan.json``.
-        if result.unverified:
-            by_det = Counter(h.detector for h in result.unverified)
-            tail = ", ".join(
-                f"{det}={n}" for det, n in by_det.most_common()
-            )
-            click.echo(f"    unverified by detector: {tail}", err=True)
-    return total_verified
-
-
-@cli.command("ls")
-@click.argument(
-    "workspace",
-    required=False,
-    type=click.Path(file_okay=False, dir_okay=True, resolve_path=False),
-)
-@click.option(
-    "--long", "-l", "long_form", is_flag=True,
-    help="Long form: include upstream and per-run task counts.",
-)
-def ls_cmd(workspace: str | None, long_form: bool) -> None:
-    """List runs under a local workspace.
-
-    Without ``WORKSPACE``, looks at ``./.agentcap/``. Accepts either
-    the parent dir (where ``agentcap run`` created the ``.agentcap/``
-    subdir) or the ``.agentcap/`` dir itself.
-
-    Unlike ``agentcap run`` / ``export``, ``ls`` does NOT consult
-    ``$AGENTCAP_WORKSPACE`` — what you point it at is what you get.
-    """
-    import json as _json
-
-    if workspace is None:
-        root = Path.cwd() / _WORKSPACE_DIR
-    else:
-        # Normalize before checking .name so paths like ``.``,
-        # ``.agentcap/.`` or ``foo/`` classify correctly (``Path('.').name``
-        # is ``''``, not ``'.agentcap'``).
-        p = Path(os.path.normpath(workspace)).absolute()
-        root = p if p.name == _WORKSPACE_DIR else p / _WORKSPACE_DIR
-    if not root.is_dir():
-        click.echo(
-            f"no workspace at {str(root)!r}. "
-            f"Run `agentcap run` first, or pass a directory that "
-            f"contains a ``.agentcap/`` subdir.",
-            err=True,
-        )
-        return
-
-    rows: list[dict] = []
-    for run_dir in sorted(root.iterdir()):
-        meta_path = run_dir / "run.json"
-        if not run_dir.is_dir() or not meta_path.is_file():
-            continue
-        try:
-            meta = _json.loads(meta_path.read_text())
-        except (OSError, _json.JSONDecodeError):
-            continue
-        captures = run_dir / "captures"
-        n_caps = (
-            len(list(captures.glob("*.request.json"))) if captures.is_dir() else 0
-        )
-        tasks = meta.get("tasks") or []
-        turns = meta.get("turns_per_task", 1)
-        n_ok = sum(1 for t in tasks if t.get("completed_turns") == turns)
-        rows.append({
-            "run_id": run_dir.name,
-            "agent": meta.get("agent") or "?",
-            "model": (meta.get("model") or "?").split("/")[-1],
-            "provider": meta.get("provider") or "?",
-            "upstream": meta.get("upstream") or "?",
-            "n_tasks": len(tasks),
-            "n_ok": n_ok,
-            "n_caps": n_caps,
-        })
-
-    if not rows:
-        click.echo(f"no runs in {root}.", err=True)
-        return
-
-    if long_form:
-        cols = ["run_id", "agent", "model", "provider", "tasks", "captures", "upstream"]
-        widths = [
-            max(len("run_id"), max(len(r["run_id"]) for r in rows)),
-            max(len("agent"), max(len(r["agent"]) for r in rows)),
-            max(len("model"), max(len(r["model"]) for r in rows)),
-            max(len("provider"), max(len(r["provider"]) for r in rows)),
-            len("tasks"),
-            len("captures"),
-            max(len("upstream"), max(len(r["upstream"]) for r in rows)),
-        ]
-    else:
-        cols = ["run_id", "agent", "model", "tasks", "captures"]
-        widths = [
-            max(len("run_id"), max(len(r["run_id"]) for r in rows)),
-            max(len("agent"), max(len(r["agent"]) for r in rows)),
-            max(len("model"), max(len(r["model"]) for r in rows)),
-            len("tasks"),
-            len("captures"),
-        ]
-
-    def _fmt(cells: list[str]) -> str:
-        return "  ".join(c.ljust(w) for c, w in zip(cells, widths))
-
-    click.echo(_fmt([c.upper() for c in cols]))
-    for r in rows:
-        tasks_cell = f"{r['n_ok']}/{r['n_tasks']}"
-        if long_form:
-            click.echo(_fmt([
-                r["run_id"], r["agent"], r["model"], r["provider"],
-                tasks_cell, str(r["n_caps"]), r["upstream"],
-            ]))
-        else:
-            click.echo(_fmt([
-                r["run_id"], r["agent"], r["model"],
-                tasks_cell, str(r["n_caps"]),
-            ]))
-
-
-def _resolve_request_id(
-    rid: str, source: str | None, *, workspace: Path | None = None,
-) -> tuple[str, dict, dict | None, dict | None, Path | None]:
-    """Resolve ``rid`` (full or short prefix) to
-    ``(full_rid, body, response_record, request_record, capture_dir)``.
-
-    - If ``source`` is given, looks the rid up there via
-      ``captures.load_request`` (any agentcap-supported source: dir,
-      parquet, hf://) — exact match only. Response and request
-      records and ``capture_dir`` are unavailable in that path
-      (just the body).
-    - Otherwise scans ``workspace`` (defaults to ``_workspace_root()``
-      for legacy ``run`` / ``export`` callers; ``inspect``
-      passes cwd explicitly), accepting a prefix (git-style) and
-      returning the body, the paired response, the full request
-      record (which carries ``task_id``, ``turn``, ``captured_at``,
-      ``upstream_url``), and the capture dir the rid was found in.
-    """
-    from . import captures
-
-    if source is not None:
-        try:
-            return rid, captures.load_request(source, rid), None, None, None
-        except KeyError as exc:
-            raise click.UsageError(str(exc))
-        except (ValueError, FileNotFoundError) as exc:
-            raise click.UsageError(str(exc))
-
-    if workspace is None:
-        workspace = _workspace_root()
-    try:
-        found = captures.resolve_workspace_rid(workspace, rid)
-    except captures.AmbiguousRequestId as exc:
-        raise click.UsageError(str(exc))
-    if found is None:
-        raise click.UsageError(
-            f"request_id {rid!r} not found in workspace at {workspace}; "
-            f"pass a different TARGET (a dir, .parquet, or hf:// URI)."
-        )
-    capture_dir, full_rid = found
-    import json as _json
-    req_rec = _json.loads(
-        (capture_dir / f"{full_rid}.request.json").read_text()
-    )
-    resp_path = capture_dir / f"{full_rid}.response.json"
-    resp_rec = (
-        _json.loads(resp_path.read_text()) if resp_path.is_file() else None
-    )
-    body = req_rec.get("body")
-    if not isinstance(body, dict):
-        raise click.UsageError(
-            f"capture {capture_dir / f'{full_rid}.request.json'} has no body field"
-        )
-    return full_rid, body, resp_rec, req_rec, capture_dir
-
-
-def _enumerate_workspace_requests(
-    scope: str | None, *, workspace: Path | None = None,
-) -> list[dict]:
-    """Walk captures across the workspace (or one run if ``scope`` is a
-    run-id) and return one row per captured request, grouped by run
-    then chronological within each run. Each row has ``run_id``,
-    ``rid``, ``captured_at``, ``status``, and ``preview`` (last user
-    message, truncated). ``workspace`` defaults to ``_workspace_root()``
-    so legacy callers don't break; ``inspect`` passes it
-    explicitly from the resolved TARGET."""
-    import json as _json
-
-    root = workspace if workspace is not None else _workspace_root()
-    if not root.is_dir():
-        return []
-    run_dirs = (
-        [root / scope] if scope else [d for d in sorted(root.iterdir()) if d.is_dir()]
-    )
-    rows: list[dict] = []
-    for run_dir in run_dirs:
-        captures = run_dir / "captures"
-        if not captures.is_dir():
-            continue
-        # Sort within (task, time) so per-task ``prev_rid`` is the
-        # immediately-preceding capture in chronological order.
-        recs: list[tuple[str, dict]] = []
-        for req_path in captures.glob("*.request.json"):
-            rid = req_path.stem.split(".")[0]
-            try:
-                req = _json.loads(req_path.read_text())
-            except (OSError, _json.JSONDecodeError):
-                continue
-            recs.append((rid, req))
-        recs.sort(
-            key=lambda r: (r[1].get("task_id") or "", r[1].get("captured_at", 0))
-        )
-        prev_rid_by_task: dict = {}
-        prev_msgs_by_task: dict = {}
-        idx_by_task: dict = {}
-        for rid, req in recs:
-            resp_path = captures / f"{rid}.response.json"
-            status = "?"
-            if resp_path.is_file():
-                try:
-                    status = str(_json.loads(resp_path.read_text()).get("status_code", "?"))
-                except (OSError, _json.JSONDecodeError):
-                    pass
-            messages = (req.get("body") or {}).get("messages") or []
-            task_id = req.get("task_id")
-            # When task_id is missing, key the per-task caches on the
-            # rid so unrelated orphan captures don't accidentally chain
-            # together for the diff / prev_rid / req_index.
-            task_key = task_id if task_id is not None else rid
-            prev_msgs = prev_msgs_by_task.get(task_key)
-            if prev_msgs is None:
-                new_msgs = messages
-                label = f"(init {len(new_msgs)})"
-            else:
-                removed, new_msgs = _diff_messages(prev_msgs, messages)
-                label = f"({_delta_label(len(removed), len(new_msgs))})"
-            summary = _message_summary(new_msgs[-1]) if new_msgs else ""
-            preview = f"{label} {summary}".replace("\n", " ").strip()
-            # Concatenate every new message's content into a single
-            # searchable blob so fzf can match against deeper content
-            # (e.g. ``hf-cli`` referenced 4 messages back in the diff)
-            # without bloating the visible row.
-            searchable = " ".join(
-                _message_text(m) for m in new_msgs
-            ).replace("\n", " ").replace("\t", " ")
-            prev_rid = prev_rid_by_task.get(task_key)
-            prev_msgs_by_task[task_key] = messages
-            prev_rid_by_task[task_key] = rid
-            idx_by_task[task_key] = idx_by_task.get(task_key, 0) + 1
-            rows.append({
-                "run_id": run_dir.name,
-                "rid": rid,
-                "captured_at": int(req.get("captured_at", 0)),
-                "status": status,
-                "task_id": task_id,
-                "turn": req.get("turn"),
-                "req_index": idx_by_task[task_key],
-                "prev_rid": prev_rid,
-                "preview": preview,
-                "searchable": searchable,
-            })
-    rows.sort(key=lambda r: (r["run_id"], r["captured_at"]))
-    return rows
-
-
-def _enumerate_parquet_requests(parquet_path: Path) -> list[dict]:
-    """Same row shape as ``_enumerate_workspace_requests`` but sourced
-    from a single ``-captures`` parquet (``agentcap export`` output).
-    Newer parquets carry ``task_id`` / ``turn`` so the diff / prev_rid
-    chain groups per (run, task); older ones without those columns
-    fall back to one linear chain per ``run_id`` and the LOC cell
-    just stays ``-``."""
-    import json as _json
-    import pyarrow.parquet as pq
-
-    table_meta = pq.ParquetFile(str(parquet_path)).schema_arrow
-    available = set(table_meta.names)
-    cols = ["request_id", "captured_at", "request", "response", "run_id"]
-    has_task = "task_id" in available
-    has_turn = "turn" in available
-    if has_task:
-        cols.append("task_id")
-    if has_turn:
-        cols.append("turn")
-    t = pq.read_table(str(parquet_path), columns=cols)
-    n = t.num_rows
-    if n == 0:
-        return []
-    rids = t.column("request_id").to_pylist()
-    times = t.column("captured_at").to_pylist()
-    reqs = t.column("request").to_pylist()
-    resps = t.column("response").to_pylist()
-    runs = t.column("run_id").to_pylist()
-    task_ids = t.column("task_id").to_pylist() if has_task else [None] * n
-    turns = t.column("turn").to_pylist() if has_turn else [None] * n
-
-    order = sorted(range(n), key=lambda i: (runs[i] or "", int(times[i] or 0)))
-    rows: list[dict] = []
-    prev_msgs: dict = {}
-    prev_rid: dict = {}
-    idx_by_key: dict = {}
-    # Drop rows whose request_id isn't the proxy's 32-hex format.
-    # The picker would reject them anyway (``_pick_parquet_request``
-    # validates via the same regex) and they get interpolated into
-    # the fzf preview shell command via ``{2}`` / ``{3}`` — keeping
-    # them out at enumeration time also closes the door on any
-    # injection vector from a malformed parquet.
-    import re
-    _hex_rid = re.compile(r"[0-9a-f]{32}")
-    for i in order:
-        rid = rids[i]
-        if not rid or not _hex_rid.fullmatch(rid):
-            continue
-        try:
-            body = _json.loads(reqs[i] or "{}")
-        except _json.JSONDecodeError:
-            body = {}
-        messages = body.get("messages") or []
-        run_id = runs[i] or "?"
-        task_id = task_ids[i]
-        # Mirror workspace semantics: group prev/diff by (run, task);
-        # fall back to (run, rid) when task_id is missing so unrelated
-        # rows don't chain into one synthetic task.
-        key = (run_id, task_id if task_id is not None else rid)
-        prior = prev_msgs.get(key)
-        if prior is None:
-            new_msgs = messages
-            label = f"(init {len(new_msgs)})"
-        else:
-            removed, new_msgs = _diff_messages(prior, messages)
-            label = f"({_delta_label(len(removed), len(new_msgs))})"
-        summary = _message_summary(new_msgs[-1]) if new_msgs else ""
-        preview = f"{label} {summary}".replace("\n", " ").strip()
-        searchable = " ".join(
-            _message_text(m) for m in new_msgs
-        ).replace("\n", " ").replace("\t", " ")
-        status = "?"
-        try:
-            status = str(_json.loads(resps[i] or "{}").get("status_code", "?"))
-        except _json.JSONDecodeError:
-            pass
-        idx_by_key[key] = idx_by_key.get(key, 0) + 1
-        rows.append({
-            "run_id": run_id,
-            "rid": rid,
-            "captured_at": int(times[i] or 0),
-            "status": status,
-            "task_id": task_id,
-            "turn": turns[i],
-            "req_index": idx_by_key[key],
-            "prev_rid": prev_rid.get(key),
-            "preview": preview,
-            "searchable": searchable,
-        })
-        prev_msgs[key] = messages
-        prev_rid[key] = rid
-    return rows
-
-
-def _format_inspect_rows(rows: list[dict]) -> tuple[str, list[str]]:
-    """Flat table: one row per captured call. Columns are LOC
-    (``task_id.<req_index>``), RID, RUN (shown only when rows span
-    multiple runs — redundant otherwise), MESSAGES (``(+N)`` /
-    ``(init N)`` / ``(-X +Y)`` delta + one-line role-aware summary).
-    Time / status / model / size live in the fzf preview pane.
-
-    Returns ``(header, fzf_lines)``. Each fzf line is the visible
-    content followed by tab-delimited hidden columns the preview
-    command pulls via ``{2}`` / ``{3}`` (full rid, previous rid) plus
-    a searchable blob fzf matches against (column 4)."""
-    include_run = len({r.get("run_id") for r in rows}) > 1
-
-    rid_w = 8
-    loc_w = max(
-        len("LOC"),
-        max((
-            len(f"{r.get('task_id') or '?'}.{r.get('req_index')}")
-            if r.get("task_id") and r.get("req_index") is not None else 1
-            for r in rows
-        ), default=0),
-    )
-    run_w = (
-        max(len("RUN"), max((len(r["run_id"]) for r in rows), default=0))
-        if include_run else 0
-    )
-
-    def _row(loc, rid, run, prompt) -> str:
-        cells = [f"{loc:<{loc_w}}", f"{rid:<{rid_w}}"]
-        if include_run:
-            cells.append(f"{run:<{run_w}}")
-        cells.append(prompt)
-        return "  ".join(cells)
-
-    header = _row("LOC", "RID", "RUN", "MESSAGES")
-
-    fzf: list[str] = []
-    prev_task: str | None = None
-    for r in rows:
-        loc = (
-            f"{r.get('task_id') or '?'}.{r.get('req_index')}"
-            if r.get("task_id") and r.get("req_index") is not None
-            else "-"
-        )
-        # Strip tabs from the visible content so they don't shift the
-        # tab-delimited hidden columns appended below.
-        line = _row(loc, r["rid"][:8], r["run_id"], r["preview"]).replace("\t", " ")
-        task_id = r.get("task_id")
-        if task_id and task_id != prev_task:
-            # Reverse video: inverts fg/bg so the row pops on any
-            # terminal palette regardless of theme.
-            line = f"\033[7m{line}\033[0m"
-        prev_task = task_id
-        # Hidden tab columns (fzf searches all of them by default):
-        #   2 = full rid, 3 = prev rid, 4 = concatenated new-message
-        # bodies so a query like ``hf-cli`` matches rows whose deeper
-        # content references it.
-        fzf.append(
-            f"{line}\t{r['rid']}\t{r.get('prev_rid') or '-'}"
-            f"\t{r.get('searchable') or ''}"
-        )
-    return header, fzf
-
-
-def _fzf_pick(
-    header: str | None,
-    lines: list[str],
-    preview_cmd: str,
-    *,
-    extra_args: Sequence[str] = (),
-) -> str | None:
-    """Run fzf over ``lines``. Returns the selected line, or ``None``
-    if the user cancelled (Esc / Ctrl-C).
-
-    ``header=None`` means the first element of ``lines`` is the header
-    (passed to fzf via ``--header-lines=1`` so it stays in lockstep
-    with the body on reload — needed when the column widths grow as
-    background fetches land). Otherwise ``--header=<header>`` pins
-    a static line above the body."""
-    import shutil
-    import subprocess
-
-    if not shutil.which("fzf"):
-        raise click.UsageError(
-            "fzf is required for interactive pickers "
-            "(install via 'brew install fzf' or your distro's package manager)."
-        )
-
-    args = [
-        "fzf",
-        "--ansi",
-        "--layout=reverse",
-        "--header-first",
-        "--preview", preview_cmd,
-        "--preview-window=right:60%:wrap",
-        "--no-sort",
-    ]
-    if header is None:
-        args += ["--header-lines=1"]
-    else:
-        args += ["--header", header]
-    args.extend(extra_args)
-    proc = subprocess.run(
-        args,
-        input="\n".join(lines),
-        capture_output=True,
-        text=True,
-    )
-    if proc.returncode != 0:
-        return None
-    return proc.stdout.rstrip("\n") or None
-
-
-def _pick_workspace_run(*, workspace: Path | None = None) -> str | None:
-    """Open an fzf picker over the runs in the workspace, returning the
-    selected run-id, or ``None`` if cancelled. fzf is a hard
-    requirement of ``inspect``; the gate lives at the top of
-    ``inspect_cmd``. ``workspace`` defaults to ``_workspace_root()``;
-    ``inspect`` passes cwd or the resolved dir explicitly."""
-    import json as _json
-    import sys
-
-    root = workspace if workspace is not None else _workspace_root()
-    if not root.is_dir():
-        raise click.UsageError(f"no workspace at {root}")
-
-    rows: list[dict] = []
-    for run_dir in sorted(root.iterdir()):
-        meta_path = run_dir / "run.json"
-        if not run_dir.is_dir() or not meta_path.is_file():
-            continue
-        try:
-            meta = _json.loads(meta_path.read_text())
-        except (OSError, _json.JSONDecodeError):
-            continue
-        captures = run_dir / "captures"
-        n_caps = (
-            len(list(captures.glob("*.request.json"))) if captures.is_dir() else 0
-        )
-        if n_caps == 0:
-            continue  # skip empty runs — nothing to inspect
-        tasks = meta.get("tasks") or []
-        rows.append({
-            "run_id": run_dir.name,
-            "agent": meta.get("agent") or "?",
-            "model": (meta.get("model") or "?").split("/")[-1],
-            "n_tasks": len(tasks),
-            "n_caps": n_caps,
-        })
-    if not rows:
-        raise click.UsageError(f"no runs with captures in {root}")
-
-    agent_w = max(len("AGENT"), max(len(r["agent"]) for r in rows))
-    model_w = max(len("MODEL"), max(len(r["model"]) for r in rows))
-
-    def _row(agent, model, tasks, caps) -> str:
-        return (
-            f"{agent:<{agent_w}}  {model:<{model_w}}  "
-            f"{tasks:>5}  {caps:>4}"
-        )
-
-    header = _row("AGENT", "MODEL", "TASKS", "CAPS")
-    # Tab-delim hidden col 2 carries the run_id — picker shells out to
-    # ``_run_preview`` with it, and we extract it from the picked line
-    # below. Visible layout matches the HF parquet picker's terser
-    # ``AGENT  MODEL  …`` style.
-    lines = [
-        _row(r["agent"], r["model"], str(r["n_tasks"]), str(r["n_caps"]))
-        + f"\t{r['run_id']}"
-        for r in rows
-    ]
-    import shlex
-    ws_arg = f"--workspace {shlex.quote(str(root))}"
-    preview = (
-        f"{sys.executable} -m agentcap _run_preview {ws_arg} {{2}}"
-        f" 2>/dev/null | head -200"
-    )
-    picked = _fzf_pick(
-        header, lines, preview,
-        extra_args=["--delimiter", "\t", "--with-nth", "1"],
-    )
-    if picked is None:
-        return None
-    fields = picked.rsplit("\t", 1)
-    return fields[1].strip() if len(fields) == 2 else None
-
-
-def _pick_workspace_request(
-    scope: str | None, *, initial_short_rid: str | None = None,
-    workspace: Path | None = None,
-) -> str | None:
-    """fzf picker for a workspace request. Returns the picked short
-    rid, or ``None`` if cancelled. fzf is a hard requirement of
-    ``inspect``; the gate lives at the top of ``inspect_cmd``.
-
-    ``initial_short_rid`` (if given) positions the cursor on the row
-    whose rid starts with that prefix when the picker opens — used
-    when re-entering the picker from the message sub-picker so the
-    user lands back where they were. ``workspace`` defaults to
-    ``_workspace_root()``; ``inspect`` passes it
-    explicitly from the resolved TARGET."""
-    import shlex
-    import sys
-
-    if workspace is None:
-        workspace = _workspace_root()
-    rows = _enumerate_workspace_requests(scope, workspace=workspace)
-    if not rows:
-        where = f"run {scope!r}" if scope else "workspace"
-        raise click.UsageError(f"no captured requests in {where}")
-
-    header, fzf_lines = _format_inspect_rows(rows)
-    # Tab-delim hidden columns: 2 = full rid, 3 = previous-capture rid
-    # (or "-" for the first capture of a task). Pre-computing the prev
-    # rid here lets the preview pane skip a full cap-dir rescan per
-    # fzf hover. ``_highlight`` wraps each occurrence of fzf's current
-    # query (``{q}``) in red so the user can see where the match
-    # landed inside the preview. ``{q}`` is its own positional arg so
-    # fzf's automatic shell-escaping handles quoting end-to-end.
-    ws_arg = f"--workspace {shlex.quote(str(workspace))}"
-    preview = (
-        f"{sys.executable} -m agentcap _preview {ws_arg} {{2}} {{3}}"
-        f" 2>/dev/null | head -400"
-        f" | {sys.executable} -m agentcap _highlight {{q}}"
-    )
-
-    extra = [
-        "--delimiter", "\t", "--with-nth", "1",
-        "--no-hscroll",
-        "--bind", "change:refresh-preview",
-    ]
-    if initial_short_rid:
-        for i, line in enumerate(fzf_lines, start=1):
-            parts = line.split("\t")
-            # Hidden column 2 carries the full rid; match by prefix.
-            if len(parts) >= 2 and parts[1].startswith(initial_short_rid):
-                # ``load`` fires after fzf finishes reading stdin so
-                # the items exist when ``pos(N)`` runs (``start`` is
-                # too early — fires before items are loaded).
-                extra.extend(["--bind", f"load:pos({i})"])
-                break
-
-    picked = _fzf_pick(
-        header, fzf_lines, preview,
-        extra_args=extra,
-    )
-    if picked is None:
-        return None  # cancelled
-    # picked is the visible (column-1) line; RID is the second
-    # whitespace-separated field on it.
-    tokens = picked.split()
-    short = tokens[1] if len(tokens) >= 2 else ""
-    import re
-    if not re.fullmatch(r"[0-9a-f]{8}", short):
-        return None
-    return short
-
-
-def _classify_target(target: str | None) -> tuple[str, object]:
-    """Classify the ``TARGET`` positional of ``inspect``.
-
-    Returns ``(kind, payload)``:
-      - ``("workspace", Path)`` — local ``.agentcap`` dir to browse.
-      - ``("workspace-run", run_id)`` — scope to one run under cwd's
-        ``.agentcap`` (``run_id`` is the dir name).
-      - ``("rid", rid)`` — body dump; rid looked up in cwd's workspace.
-      - ``("parquet", Path)`` — local ``.parquet`` file.
-      - ``("hf", "<owner>/<name>")`` — HF dataset of captures.
-
-    Detection is content-based: ``<owner>/<name>`` is treated as HF
-    only when no local directory by that name exists, so a relative
-    path like ``./my-org/my-data`` (or ``my-org/my-data`` when it
-    exists as a dir) wins over the HF interpretation. Run-id and rid
-    are inferred from shape + existence under cwd's ``.agentcap``."""
-    import re
-    if target is None:
-        return "workspace", Path.cwd() / _WORKSPACE_DIR
-
-    if target.endswith(".parquet"):
-        p = Path(target)
-        if not p.is_file():
-            raise click.UsageError(f"parquet not found: {target}")
-        return "parquet", p
-
-    if target.startswith("hf://"):
-        s = target.removeprefix("hf://datasets/").removeprefix("hf://").strip("/")
-        if s.count("/") == 1 and all(s.split("/")):
-            return "hf", s
-        raise click.UsageError(f"invalid hf URI: {target!r}")
-
-    # Local directory → workspace (accept either parent or .agentcap).
-    # Normalize first so ``.`` / ``<path>/.agentcap/.`` / trailing-slash
-    # forms classify correctly (``Path('.').name`` is ``''``, not
-    # ``'.agentcap'``).
-    if Path(target).is_dir():
-        p = Path(os.path.normpath(target)).absolute()
-        ws = p if p.name == _WORKSPACE_DIR else p / _WORKSPACE_DIR
-        return "workspace", ws
-
-    # Run-id under cwd's .agentcap (run dirs always carry a timestamp,
-    # so they reliably contain a dash).
-    cwd_ws = Path.cwd() / _WORKSPACE_DIR
-    if "-" in target and (cwd_ws / target / "run.json").is_file():
-        return "workspace-run", target
-
-    # ``<owner>/<name>`` HF shorthand — only when it's not a local path.
-    if target.count("/") == 1 and all(target.split("/")):
-        return "hf", target
-
-    # All-hex string → request-id (looked up in cwd workspace).
-    if re.fullmatch(r"[0-9a-f]+", target) and len(target) >= 6:
-        return "rid", target
-
-    raise click.UsageError(
-        f"can't classify TARGET {target!r}: expected a directory, "
-        f"a .parquet file, an hf:// URI, an <owner>/<name> shorthand, "
-        f"a run-id (under ./.agentcap/), or a request-id (hex)."
-    )
-
-
-@functools.lru_cache(maxsize=1)
-def _hf_filesystem():
-    """Authenticated, process-wide ``HfFileSystem``."""
-    from huggingface_hub import HfFileSystem, get_token
-    return HfFileSystem(token=get_token())
-
-
-def _fetch_hf_parquet_meta(
-    repo_id: str, path: str, *,
-    revision: str | None = None,
-    kv_only: bool = False,
-) -> dict:
-    """Returns ``{agent, model, num_rows, tasks: [{id, turns, prompt}]}``.
-    ``kv_only=True`` skips the row-group reads — return value has no
-    ``tasks`` key in that case (the preview cmd uses its presence to
-    distinguish a partial write from "no task_id schema")."""
-    import json as _json
-    import pyarrow.parquet as pq
-    from huggingface_hub import try_to_load_from_cache
-    out: dict = {"agent": None, "model": None, "num_rows": 0}
-
-    opener = None
-    if revision:
-        local = try_to_load_from_cache(
-            repo_id=repo_id, filename=path,
-            repo_type="dataset", revision=revision,
-        )
-        if isinstance(local, str) and Path(local).is_file():
-            opener = open(local, "rb")
-    if opener is None:
-        opener = _hf_filesystem().open(f"datasets/{repo_id}/{path}", "rb")
-
-    with opener as fh:
-        pf = pq.ParquetFile(fh)
-        out["num_rows"] = pf.metadata.num_rows
-        # Schema-level KV metadata: ``export_local`` stamps ``agent``
-        # / ``model`` / ``tasks`` here. Bytes-keyed; ``None`` when
-        # missing.
-        schema_md = pf.schema_arrow.metadata or {}
-        for key in ("agent", "model"):
-            v = schema_md.get(key.encode())
-            if v:
-                out[key] = v.decode("utf-8", errors="replace")
-        tasks_raw = schema_md.get(b"tasks")
-        if tasks_raw:
-            try:
-                out["tasks"] = _json.loads(tasks_raw.decode("utf-8"))
-                return out  # KV has the full preview slice — no row-group read needed
-            except _json.JSONDecodeError:
-                pass
-        if kv_only:
-            return out
-        # Legacy fallback for parquets exported before tasks landed in KV.
-        out["tasks"] = []
-        cols = pf.schema_arrow.names
-        if "task_id" in cols and pf.num_row_groups:
-            # Row group 0 sample only — tasks in later row groups
-            # don't show up in the preview.
-            rg_cols = ["task_id"]
-            if "turn" in cols:
-                rg_cols.append("turn")
-            if "request" in cols:
-                rg_cols.append("request")
-            rg = pf.read_row_group(0, columns=rg_cols)
-            tids = rg.column("task_id").to_pylist()
-            turns = (
-                rg.column("turn").to_pylist()
-                if "turn" in rg_cols else [None] * len(tids)
-            )
-            raws = (
-                rg.column("request").to_pylist()
-                if "request" in rg_cols else [None] * len(tids)
-            )
-            per_task: dict[str, dict] = {}
-            for tid, t, raw in zip(tids, turns, raws):
-                if not tid:
-                    continue
-                d = per_task.setdefault(tid, {"turns": 0, "prompt": None})
-                if t is not None and int(t) > d["turns"]:
-                    d["turns"] = int(t)
-                if d["prompt"] is None and raw:
-                    try:
-                        msgs = (_json.loads(raw) or {}).get("messages") or []
-                    except (_json.JSONDecodeError, ValueError, TypeError):
-                        msgs = []
-                    for m in msgs:
-                        if m.get("role") == "user":
-                            d["prompt"] = _message_text(m).replace("\n", " ")
-                            break
-            out["tasks"] = [
-                {"id": tid, "turns": per_task[tid]["turns"],
-                 "prompt": per_task[tid]["prompt"]}
-                for tid in sorted(per_task)
-            ]
-    return out
-
-
-def _hf_list_parquets(repo_id: str) -> list[dict]:
-    """``.parquet`` files in ``<owner>/<name>`` as ``[{path, size}, ...]``,
-    sorted by path. Per-parquet metadata is hydrated later from each
-    parquet's footer."""
-    from huggingface_hub import HfApi
-    api = HfApi()
-    tree = api.list_repo_tree(repo_id, repo_type="dataset", recursive=True)
-    base: list[dict] = []
-    for entry in tree:
-        path = getattr(entry, "path", None) or getattr(entry, "rfilename", None)
-        if not path or not path.endswith(".parquet"):
-            continue
-        size = getattr(entry, "size", None) or 0
-        base.append({"path": path, "size": int(size)})
-    base.sort(key=lambda r: r["path"])
-    return base
-
-
-def _hf_meta_tempfile(tempdir: Path, path: str) -> Path:
-    """SHA-1 prefix avoids collisions across HF paths."""
-    import hashlib
-    digest = hashlib.sha1(path.encode()).hexdigest()[:16]
-    return tempdir / f"{Path(path).stem}-{digest}.json"
-
-
-def _write_meta_atomic(target: Path, meta: dict) -> None:
-    """Atomic write via .tmp + rename so the fzf preview cmd never
-    sees a half-written file."""
-    import json as _json
-    tmp = target.with_suffix(target.suffix + ".tmp")
-    tmp.write_text(_json.dumps(meta, ensure_ascii=False))
-    tmp.replace(target)
-
-
-_HF_PREVIEW_TASK_LIMIT = 15
-
-
-@cli.command("_hf_parquet_preview", hidden=True)
-@click.option(
-    "--tempdir", "tempdir_str", required=True,
-    type=click.Path(file_okay=False, dir_okay=True),
-    help="Session tempdir populated by the prefetch subprocess.",
-)
-@click.argument("path")
-def _hf_parquet_preview_cmd(tempdir_str: str, path: str) -> None:
-    """Render the fzf preview pane from the parquet's tempfile.
-    Exits immediately if not yet on disk; the prefetch subprocess
-    POSTs ``refresh-preview`` to re-invoke us once it lands."""
-    import json as _json
-    tempdir = Path(tempdir_str)
-    target = _hf_meta_tempfile(tempdir, path)
-
-    click.echo(f"path:   {path}")
-    if not target.is_file():
-        click.echo("loading…")
-        return
-
-    try:
-        meta = _json.loads(target.read_text())
-    except (OSError, _json.JSONDecodeError) as exc:
-        click.echo(f"(preview failed: {type(exc).__name__}: {exc})")
-        return
-    click.echo(f"agent:  {meta.get('agent') or '?'}")
-    click.echo(f"model:  {meta.get('model') or '?'}")
-    click.echo(f"rows:   {meta.get('num_rows', 0):,}")
-    if "tasks" not in meta:
-        click.echo("tasks:  …")
-        return
-    tasks = meta["tasks"]
-    click.echo(f"tasks:  {len(tasks)}")
-    if not tasks:
-        click.echo()
-        click.echo("(no task_id column — pre-schema-upgrade parquet)")
-        return
-    click.echo()
-    click.echo("─── TASKS ───")
-    shown = tasks[:_HF_PREVIEW_TASK_LIMIT]
-    for t in shown:
-        prompt = _flatten(t.get("prompt") or "(no user message)", 120)
-        click.echo(f"  {t['id']}: ({t.get('turns', 0)} turns) {prompt}")
-    hidden = len(tasks) - len(shown)
-    if hidden > 0:
-        click.echo(f"  … and {hidden} more")
-
-
-def _short_model(model: str | None) -> str:
-    """Strip the ``org/`` prefix for display, matching the local run
-    picker's layout."""
-    return model.rsplit("/", 1)[-1] if model else "..."
-
-
-def _picker_rows(tempdir: Path, entries: list[dict]) -> list[str]:
-    """Build the picker's header + row list from current tempdir state.
-    Returns ``[header, *body]``. Columns ``AGENT  MODEL  TASKS  CAPS``
-    mirror the local run picker. ``TASKS`` shows ``?`` until Pass B
-    writes the task list for that row; widths are dynamic so layout
-    stays tight as KV / task counts land via fzf reload."""
-    import json as _json
-    loaded: list[tuple[str | None, str | None, str, str, str]] = []
-    for entry in entries:
-        path = entry["path"]
-        agent = model = None
-        n_tasks = "?"
-        n_caps = "?"
-        tmpfile = _hf_meta_tempfile(tempdir, path)
-        if tmpfile.is_file():
-            try:
-                meta = _json.loads(tmpfile.read_text())
-                agent = meta.get("agent")
-                model = meta.get("model")
-                if meta.get("num_rows") is not None:
-                    n_caps = str(meta["num_rows"])
-                if "tasks" in meta:
-                    n_tasks = str(len(meta["tasks"]))
-            except (OSError, _json.JSONDecodeError):
-                pass
-        loaded.append((agent, model, n_tasks, n_caps, path))
-
-    def _w(label: str, fn) -> int:
-        return max(len(label), *(len(fn(r)) for r in loaded)) if loaded else len(label)
-
-    agent_w = _w("AGENT", lambda r: r[0] or "...")
-    model_w = _w("MODEL", lambda r: _short_model(r[1]))
-    tasks_w = _w("TASKS", lambda r: r[2])
-    caps_w = _w("CAPS", lambda r: r[3])
-
-    def _line(agent: str, model: str, tasks: str, caps: str, path: str = "") -> str:
-        return (
-            f"{agent:<{agent_w}}  {model:<{model_w}}  "
-            f"{tasks:>{tasks_w}}  {caps:>{caps_w}}"
-            + (f"\t{path}" if path else "")
-        )
-
-    header = _line("AGENT", "MODEL", "TASKS", "CAPS")
-    body = [
-        _line(a or "...", _short_model(m), t, c, p)
-        for a, m, t, c, p in loaded
-    ]
-    return [header, *body]
-
-
-@cli.command("_hf_picker_list", hidden=True)
-@click.option(
-    "--tempdir", "tempdir_str", required=True,
-    type=click.Path(file_okay=False, dir_okay=True),
-)
-@click.option(
-    "--paths-file", "paths_file", required=True,
-    type=click.Path(file_okay=True, dir_okay=False),
-)
-def _hf_picker_list_cmd(tempdir_str: str, paths_file: str) -> None:
-    """Emit current rows to stdout. fzf's ``reload(...)`` source —
-    re-invoked after each Pass-A write."""
-    import json as _json
-    try:
-        entries = _json.loads(Path(paths_file).read_text())
-    except (OSError, _json.JSONDecodeError):
-        return
-    for line in _picker_rows(Path(tempdir_str), entries):
-        click.echo(line)
-
-
-@cli.command("_hf_prefetch", hidden=True)
-@click.option(
-    "--tempdir", "tempdir_str", required=True,
-    type=click.Path(file_okay=False, dir_okay=True),
-)
-@click.option("--repo", "repo_id", required=True)
-@click.option(
-    "--fzf-port", "fzf_port", type=int, default=None,
-    help="HTTP port of fzf's --listen server, for refresh-preview "
-         "after each successful fetch.",
-)
-@click.option("--revision", "revision", type=str, default=None)
-@click.option("--paths-file", "paths_file", type=str, default=None)
-def _hf_prefetch_cmd(
-    tempdir_str: str, repo_id: str, fzf_port: int | None,
-    revision: str | None, paths_file: str | None,
-) -> None:
-    """Background fetcher: reads paths from stdin, runs Pass A
-    (parallel KV-only) and Pass B (serial full) concurrently, POSTs
-    fzf actions after each successful write. SIGKILLed by the picker
-    when fzf exits."""
-    import json as _json
-    import shlex as _shlex
-    import sys as _sys
-    import urllib.error
-    import urllib.request
-    tempdir = Path(tempdir_str)
-    try:
-        paths = _json.loads(_sys.stdin.read())
-    except (OSError, _json.JSONDecodeError):
-        return
-
-    # fzf invokes this as a shell cmd on each ``reload(...)`` POST.
-    reload_cmd = (
-        f"{_shlex.quote(_sys.executable)} -m agentcap _hf_picker_list"
-        f" --tempdir={_shlex.quote(tempdir_str)}"
-        f" --paths-file={_shlex.quote(paths_file or '')}"
-    ) if paths_file else None
-
-    def _nudge_fzf(body: bytes) -> None:
-        if fzf_port is None:
-            return
-        req = urllib.request.Request(
-            f"http://127.0.0.1:{fzf_port}/", data=body, method="POST",
-        )
-        try:
-            with urllib.request.urlopen(req, timeout=0.5) as resp:
-                resp.read()
-        except (urllib.error.URLError, OSError):
-            pass  # fzf not up yet, or already exited — harmless
-
-    def _has_tasks(target: Path) -> bool:
-        """True if target already holds a Pass-B (full) write."""
-        if not target.is_file():
-            return False
-        try:
-            return "tasks" in _json.loads(target.read_text())
-        except (OSError, _json.JSONDecodeError):
-            return False
-
-    # Pass A: KV-only footer reads, 4-way parallel.
-    def _pass_kv(path: str) -> None:
-        target = _hf_meta_tempfile(tempdir, path)
-        if _has_tasks(target):
-            return  # full data already there; don't clobber
-        try:
-            meta = _fetch_hf_parquet_meta(
-                repo_id, path, revision=revision, kv_only=True,
-            )
-        except Exception:  # noqa: BLE001
-            return
-        # Re-check: Pass-B may have finished writing the FULL file
-        # while our network fetch was in flight; overwriting it with
-        # KV-only would discard tasks and leave the preview stuck.
-        if _has_tasks(target):
-            return
-        try:
-            _write_meta_atomic(target, meta)
-        except OSError:
-            return
-        if reload_cmd is not None:
-            _nudge_fzf(f"reload({reload_cmd})".encode())
-
-    # Pass B: full row-group reads, serial (avoids HF retry storms).
-    def _pass_full(path: str) -> None:
-        target = _hf_meta_tempfile(tempdir, path)
-        if _has_tasks(target):
-            return
-        try:
-            meta = _fetch_hf_parquet_meta(
-                repo_id, path, revision=revision, kv_only=False,
-            )
-        except Exception:  # noqa: BLE001
-            return
-        try:
-            _write_meta_atomic(target, meta)
-        except OSError:
-            return
-        # ``reload`` so the row's TASKS count refreshes;
-        # ``refresh-preview`` so the focused row's preview pane picks up
-        # the new task list.
-        if reload_cmd is not None:
-            _nudge_fzf(f"reload({reload_cmd})+refresh-preview".encode())
-        else:
-            _nudge_fzf(b"refresh-preview")
-
-    import threading
-    from concurrent.futures import ThreadPoolExecutor
-
-    # Passes run concurrently so labels and previews fill in
-    # independently.
-    def _run_pass_a() -> None:
-        with ThreadPoolExecutor(max_workers=4) as pool:
-            list(pool.map(_pass_kv, paths))
-
-    a_thread = threading.Thread(target=_run_pass_a, daemon=True)
-    a_thread.start()
-    for path in paths:
-        _pass_full(path)
-    a_thread.join(timeout=5)
-
-
-def _pick_hf_dataset_parquet(
-    repo_id: str, tempdir: Path,
-    rows: list[dict], revision: str | None,
-) -> Path | None:
-    """Pick a parquet from an HF dataset repo. Returns its local
-    path (via ``hf_hub_download`` after the user selects), or
-    ``None`` on Esc. ``tempdir`` outlives this call so re-entries
-    reuse already-fetched tempfiles."""
-    import json as _json
-    import shlex
-    import socket
-    import subprocess
-    import sys
-    from huggingface_hub import hf_hub_download
-
-    paths_file = tempdir / "paths.json"
-    paths_file.write_text(_json.dumps(
-        [{"path": r["path"], "size": r["size"]} for r in rows],
-    ))
-
-    lines = _picker_rows(
-        tempdir, [{"path": r["path"], "size": r["size"]} for r in rows],
-    )
-
-    # Pre-allocate fzf's --listen port.
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(("127.0.0.1", 0))
-        fzf_port = s.getsockname()[1]
-
-    manifest = _json.dumps([r["path"] for r in rows])
-    proc = subprocess.Popen(
-        [sys.executable, "-m", "agentcap", "_hf_prefetch",
-         "--tempdir", str(tempdir), "--repo", repo_id,
-         "--fzf-port", str(fzf_port),
-         "--paths-file", str(paths_file),
-         *(["--revision", revision] if revision else []),
-         ],
-        stdin=subprocess.PIPE,
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL,
-    )
-    try:
-        assert proc.stdin is not None
-        proc.stdin.write(manifest.encode())
-        proc.stdin.close()
-    except (BrokenPipeError, OSError):
-        pass  # subprocess died early — we'll detect it via poll()
-
-    preview = (
-        f"{sys.executable} -m agentcap _hf_parquet_preview"
-        f" --tempdir={shlex.quote(str(tempdir))} {{2}} 2>/dev/null"
-    )
-    try:
-        picked = _fzf_pick(
-            None, lines, preview,
-            extra_args=[
-                "--delimiter", "\t", "--with-nth", "1",
-                "--no-hscroll",
-                f"--listen=127.0.0.1:{fzf_port}",
-            ],
-        )
-    finally:
-        if proc.poll() is None:
-            proc.kill()
-            try:
-                proc.wait(timeout=2)
-            except subprocess.TimeoutExpired:
-                pass
-    if picked is None:
-        return None
-    rel = picked.rsplit("\t", 1)[-1].strip()
-    return Path(hf_hub_download(
-        repo_id=repo_id, repo_type="dataset", filename=rel,
-    ))
-
-
-def _pick_parquet_request(parquet_path: Path) -> str | None:
-    """fzf picker over the rows of a captures parquet. Same shape as
-    ``_pick_workspace_request`` but the preview pipeline shells out to
-    ``_preview_parquet`` (which reads from the parquet) instead of
-    ``_preview`` (which scans the workspace). Returns the picked
-    FULL rid or ``None`` if cancelled. Unlike the workspace flow
-    (which accepts an 8-char prefix because ``resolve_workspace_rid``
-    expands it), the parquet-source path through ``_resolve_request_id``
-    does an exact-match lookup, so we must return the full rid."""
-    import shlex
-    import sys
-
-    rows = _enumerate_parquet_requests(parquet_path)
-    if not rows:
-        raise click.UsageError(f"no rows in {parquet_path}")
-    header, fzf_lines = _format_inspect_rows(rows)
-    pq_quoted = shlex.quote(str(parquet_path))
-    preview = (
-        f"{sys.executable} -m agentcap _preview_parquet {pq_quoted}"
-        f" {{2}} {{3}} 2>/dev/null"
-        f" | head -400"
-        f" | {sys.executable} -m agentcap _highlight {{q}}"
-    )
-    extra = [
-        "--delimiter", "\t", "--with-nth", "1",
-        "--no-hscroll",
-        "--bind", "change:refresh-preview",
-    ]
-    picked = _fzf_pick(header, fzf_lines, preview, extra_args=extra)
-    if picked is None:
-        return None
-    # Hidden tab-delim column 2 carries the full 32-char rid
-    # (set by ``_format_inspect_rows``). Avoid the visible 8-char
-    # prefix — the parquet's request_id column stores full rids.
-    fields = picked.split("\t")
-    import re
-    full_rid = fields[1] if len(fields) >= 2 else ""
-    if not re.fullmatch(r"[0-9a-f]{32}", full_rid):
-        return None
-    return full_rid
-
-
-@cli.command("inspect")
-@click.argument("target", required=False, shell_complete=_complete_request_ids)
-@click.option(
-    "--rid",
-    "print_rid_only",
-    is_flag=True,
-    help="When picking interactively, print only the selected request-id "
-    "(so it can be captured or piped into another command).",
-)
-def inspect_cmd(target: str | None, print_rid_only: bool) -> None:
-    """Inspect captured requests.
-
-    \b
-    - ``agentcap inspect``                          pick from cwd workspace
-    - ``agentcap inspect <run-id>``                 pick from one run in cwd workspace
-    - ``agentcap inspect <rid>``                    print the captured body
-    - ``agentcap inspect <dir>``                    pick from another local workspace
-    - ``agentcap inspect <file>.parquet``           pick from a captures parquet
-    - ``agentcap inspect hf://datasets/<o>/<n>``    pick from an HF dataset
-    - ``agentcap inspect <owner>/<name>``           same as above (shorthand)
-
-    TARGET is classified by content (does the path exist? does it
-    look like an hf URI? all hex?). ``inspect`` does NOT consult
-    ``$AGENTCAP_WORKSPACE`` — what you point it at is what you get.
-
-    The interactive pickers require fzf on PATH.
-    """
-    import json as _json
-
-    kind, payload = _classify_target(target)
-
-    if kind == "rid":
-        # Body dump from cwd workspace.
-        cwd_ws = Path.cwd() / _WORKSPACE_DIR
-        full_rid, body, resp_rec, _, _ = _resolve_request_id(
-            payload, None, workspace=cwd_ws,  # type: ignore[arg-type]
-        )
-        if resp_rec is not None:
-            click.echo(
-                f"  request_id={full_rid} "
-                f"captured_at={resp_rec.get('captured_at_resp', '?')} "
-                f"status={resp_rec.get('status_code', '?')}",
-                err=True,
-            )
-        click.echo(_json.dumps(body, indent=2, ensure_ascii=False))
-        return
-
-    if kind == "workspace":
-        ws: Path = payload  # type: ignore[assignment]
-        # run picker → request picker → message picker. Esc walks
-        # back one level at a time.
-        while True:
-            scope = _pick_workspace_run(workspace=ws)
-            if scope is None:
-                return
-            last_pick: str | None = None
-            while True:
-                pick = _pick_workspace_request(
-                    scope, initial_short_rid=last_pick, workspace=ws,
-                )
-                if pick is None:
-                    break
-                last_pick = pick
-                if print_rid_only:
-                    full_rid, _, _, _, _ = _resolve_request_id(
-                        pick, None, workspace=ws,
-                    )
-                    click.echo(full_rid)
-                    return
-                _pick_request_message(pick, workspace=ws)
-
-    if kind == "workspace-run":
-        ws = Path.cwd() / _WORKSPACE_DIR
-        scope = payload  # type: ignore[assignment]
-        last_pick = None
-        while True:
-            pick = _pick_workspace_request(
-                scope, initial_short_rid=last_pick, workspace=ws,
-            )
-            if pick is None:
-                return  # explicit run-id on CLI; Esc → exit
-            last_pick = pick
-            if print_rid_only:
-                full_rid, _, _, _, _ = _resolve_request_id(
-                    pick, None, workspace=ws,
-                )
-                click.echo(full_rid)
-                return
-            _pick_request_message(pick, workspace=ws)
-
-    if kind in ("parquet", "hf"):
-        # ``hf`` holds the picker's tempdir + row list at this scope
-        # so Esc-back re-entries are instant.
-        import contextlib as _contextlib
-        import logging as _logging
-        import tempfile as _tempfile
-        if kind == "hf":
-            # Mute huggingface_hub's retry warnings so transient hub
-            # slowness doesn't leak to the user's terminal.
-            _logging.getLogger("huggingface_hub").setLevel(_logging.ERROR)
-            from huggingface_hub import HfApi
-            hf_rows = _hf_list_parquets(payload)  # type: ignore[arg-type]
-            if not hf_rows:
-                raise click.UsageError(f"no .parquet files in {payload}")
-            try:
-                hf_revision = HfApi().repo_info(
-                    payload, repo_type="dataset",  # type: ignore[arg-type]
-                ).sha
-            except Exception:  # noqa: BLE001
-                hf_revision = None
-            td_cm = _tempfile.TemporaryDirectory(prefix="agentcap-hf-meta-")
-        else:
-            td_cm = _contextlib.nullcontext(None)
-            hf_rows, hf_revision = [], None
-        with td_cm as td_str:
-            hf_tempdir = Path(td_str) if td_str else None
-            while True:
-                if kind == "hf":
-                    pq_path = _pick_hf_dataset_parquet(
-                        payload, hf_tempdir, hf_rows, hf_revision,  # type: ignore[arg-type]
-                    )
-                    if pq_path is None:
-                        return  # Esc on the parquet picker → exit
-                else:
-                    pq_path = Path(payload)  # type: ignore[arg-type]
-                pq_source = str(pq_path)
-                while True:
-                    pick = _pick_parquet_request(pq_path)
-                    if pick is None:
-                        break  # Esc on the request picker → back one level
-                    if print_rid_only:
-                        full_rid, _, _, _, _ = _resolve_request_id(pick, pq_source)
-                        click.echo(full_rid)
-                        return
-                    _pick_request_message(pick, source=pq_source)
-                if kind == "parquet":
-                    return  # explicit parquet on CLI; Esc → exit
-
-
-@cli.command("_run_preview", hidden=True)
-@click.argument("run_id")
-@click.option("--workspace", default=None, help="Workspace root (.agentcap dir).")
-def _run_preview_cmd(run_id: str, workspace: str | None) -> None:
-    """Internal: preview a run's metadata for the run picker."""
-    import json as _json
-
-    root = Path(workspace) if workspace else _workspace_root()
-    run_dir = root / run_id
-    meta_path = run_dir / "run.json"
-    if not meta_path.is_file():
-        click.echo(f"(no run.json at {meta_path})")
-        return
-    try:
-        meta = _json.loads(meta_path.read_text())
-    except (OSError, _json.JSONDecodeError) as exc:
-        click.echo(f"(run.json unreadable: {exc})")
-        return
-    captures = run_dir / "captures"
-    n_caps = (
-        len(list(captures.glob("*.request.json"))) if captures.is_dir() else 0
-    )
-    click.echo(f"run:       {run_id}")
-    click.echo(f"agent:     {meta.get('agent', '?')}")
-    click.echo(f"model:     {meta.get('model', '?')}")
-    click.echo(f"upstream:  {meta.get('upstream', '?')}")
-    click.echo(f"followup:  {meta.get('followup', '?')}")
-    click.echo(f"turns/task: {meta.get('turns_per_task', '?')}")
-    click.echo(f"captures:  {n_caps}")
-    click.echo()
-    click.echo("─── TASKS ───")
-    for t in meta.get("tasks") or []:
-        prompt = (t.get("prompt") or "").replace("\n", " ")
-        completed = t.get("completed_turns", "?")
-        click.echo(f"  {t.get('task_id', '?')}: ({completed} turns) {prompt}")
-
-
-def _message_key(m: dict) -> tuple:
-    """Canonical key for a ``messages[]`` entry. Compares only the
-    load-bearing fields (role/content/tool_call_id/tool_calls); ignores
-    optional metadata like the tool ``name`` field that some agents
-    include on one turn but not the next (notably hermes when it
-    re-serialises its session DB across turn boundaries)."""
-    import json as _json
-    c = m.get("content")
-    if isinstance(c, list):
-        c = _json.dumps(c, sort_keys=True)
-    tc = m.get("tool_calls")
-    tc_key = _json.dumps(tc, sort_keys=True) if tc else None
-    return (m.get("role"), c, m.get("tool_call_id"), tc_key)
-
-
-def _diff_messages(prev: list, curr: list) -> tuple[list, list]:
-    """``(removed, added)`` — the suffixes of ``prev`` and ``curr`` that
-    diverge. Element-by-element so a length-equal turn boundary (where
-    an agent swaps a meta-prompt for the user's followup at the last
-    index) shows up as a real diff. Pure-append cases yield
-    ``removed=[]``; swaps yield non-empty removed AND added of equal
-    or unequal length depending on the truncation.
-    """
-    prev_keys = [_message_key(m) for m in prev]
-    curr_keys = [_message_key(m) for m in curr]
-    n = min(len(prev_keys), len(curr_keys))
-    i = n
-    for j in range(n):
-        if prev_keys[j] != curr_keys[j]:
-            i = j
-            break
-    return prev[i:], curr[i:]
-
-
-def _delta_label(removed: int, added: int) -> str:
-    """Compact ``messages[]`` delta marker. Hides the removed count
-    when zero (the common pure-append case) so mid-loop rows stay
-    visually quiet; surfaces it for swaps (e.g. ``-1 +1``)."""
-    if removed:
-        return f"-{removed} +{added}"
-    return f"+{added}"
-
-
-def _message_text(m: dict) -> str:
-    """Flatten ``message.content`` to a string. Tool / multimodal
-    messages carry list-typed content; join the text parts."""
-    c = m.get("content")
-    if isinstance(c, list):
-        return " ".join(
-            p.get("text", "") for p in c if isinstance(p, dict)
-        )
-    return c or ""
-
-
-def _flatten(s: str, cap: int) -> str:
-    """Single-line, length-capped text. Without this, content with
-    embedded newlines (assistant prose, tool outputs) would blow up to
-    many visible lines and push later messages off fzf's preview
-    window."""
-    s = " ".join(s.split())
-    return s if len(s) <= cap else s[:cap] + "…"
-
-
-_PICKER_SUMMARY_CAP = 160
-_PREVIEW_MSG_CAP = 400
-
-
-def _tag(label: str) -> str:
-    """Reverse-video the ``[label]`` marker that introduces each
-    preview line so the role boundaries are visually scannable across
-    many similar-looking rows."""
-    return f"\033[7m[{label}]\033[0m"
-
-
-def _message_summary(m: dict) -> str:
-    """One-line role-aware summary of one ``messages[]`` entry. Used
-    in the picker's MESSAGES column where we have ~one row to convey
-    'what's new in this call'. Truncated so a large tool result can't
-    bloat the row."""
-    role = (m or {}).get("role", "?")
-    if role == "assistant":
-        tcs = m.get("tool_calls") or []
-        if tcs:
-            tc = tcs[0]
-            fn = (tc.get("function") or {}).get("name") or "?"
-            args = (tc.get("function") or {}).get("arguments") or ""
-            extra = f" +{len(tcs)-1}" if len(tcs) > 1 else ""
-            s = f"assistant→{fn}{extra} {args}"
-        else:
-            s = f"assistant: {_message_text(m)}"
-    elif role == "tool":
-        s = f"tool: {_message_text(m)}"
-    else:
-        s = f"{role}: {_message_text(m)}"
-    return _flatten(s, _PICKER_SUMMARY_CAP)
-
-
-def _render_preview_message(m: dict) -> None:
-    """Render one ``messages[]`` entry into the inspect preview pane.
-    Each message stays on one line (newlines collapsed) so the diff
-    suffix remains visible inside fzf's 60% pane. ``color=True`` on
-    every echo: this command's stdout is captured by fzf's preview
-    subprocess (not a TTY), and click strips ANSI by default in that
-    case, which would silently swallow the reverse-video markers."""
-    role = m.get("role", "?")
-    if role == "assistant":
-        for tc in m.get("tool_calls") or []:
-            fn = (tc.get("function") or {}).get("name") or "?"
-            args = (tc.get("function") or {}).get("arguments") or ""
-            click.echo(
-                f"  {_tag(f'assistant tool_call → {fn}')}  args={_flatten(args, 240)}",
-                color=True,
-            )
-        content = _message_text(m)
-        if content:
-            click.echo(
-                f"  {_tag('assistant content')} {_flatten(content, _PREVIEW_MSG_CAP)}",
-                color=True,
-            )
-        return
-    if role == "tool":
-        tcid = (m.get("tool_call_id") or "?")[:8]
-        click.echo(f"  {_tag(f'tool result, tool_call_id={tcid}')}", color=True)
-        click.echo(f"  {_flatten(_message_text(m), _PREVIEW_MSG_CAP)}", color=True)
-        return
-    click.echo(
-        f"  {_tag(role)} {_flatten(_message_text(m), _PREVIEW_MSG_CAP)}",
-        color=True,
-    )
-
-
-@cli.command("_preview", hidden=True)
-@click.argument("request_id")
-@click.argument("prev_request_id", required=False, default=None)
-@click.option("--workspace", default=None, help="Workspace root (.agentcap dir).")
-def _preview_cmd(
-    request_id: str, prev_request_id: str | None, workspace: str | None,
-) -> None:
-    """Internal: header + initial PROMPT + MESSAGES diff for one
-    captured request — used by the fzf preview pane.
-
-    Not part of the public CLI surface — hidden from ``--help``. The
-    user-facing inspector is ``agentcap inspect <rid>``.
-
-    ``prev_request_id`` is pushed in by the picker so the preview can
-    load the diff base directly instead of scanning the capture dir on
-    every fzf hover. Accepts ``"-"`` (or absent) for "no previous".
-    """
-    import json as _json
-    import re
-
-    # Hovered a section-header line in the picker — render nothing.
-    if not re.fullmatch(r"[0-9a-f]+", request_id):
-        click.echo("(section header — navigate to a request id)")
-        return
-
-    ws = Path(workspace) if workspace else None
-    full_rid, body, resp_rec, req_rec, cap_dir = _resolve_request_id(
-        request_id, None, workspace=ws,
-    )
-    messages = body.get("messages") or []
-    initial_user = next(
-        (m for m in messages if m.get("role") == "user"),
-        None,
-    )
-    initial_prompt = _message_text(initial_user or {})
-    import time as _time
-
-    status = (
-        resp_rec.get("status_code") if resp_rec is not None else "?"
-    )
-    serialized = _json.dumps(body, ensure_ascii=False)
-    size_b = len(serialized.encode("utf-8"))
-    task_id = (req_rec or {}).get("task_id")
-    turn = (req_rec or {}).get("turn")
-    captured_at = (req_rec or {}).get("captured_at")
-    ts = (
-        _time.strftime("%H:%M:%S", _time.gmtime(int(captured_at)))
-        if captured_at else "?"
-    )
-    # Load the diff base directly from the prev-rid file in the same
-    # capture dir (already known from ``_resolve_request_id`` above —
-    # no second workspace scan). The picker pushes the predecessor's
-    # rid in as ``prev_request_id``. Reject anything that isn't
-    # lowercase hex so a hand-crafted arg can't escape the capture
-    # dir via ``..`` or absolute paths.
-    prev_messages: list = []
-    has_previous = False
-    if (
-        cap_dir is not None
-        and prev_request_id
-        and prev_request_id != "-"
-        and re.fullmatch(r"[0-9a-f]+", prev_request_id)
-    ):
-        prev_path = cap_dir / f"{prev_request_id}.request.json"
-        if prev_path.is_file():
-            try:
-                prev_rec = _json.loads(prev_path.read_text())
-                prev_messages = (prev_rec.get("body") or {}).get("messages") or []
-                has_previous = True
-            except (OSError, _json.JSONDecodeError):
-                pass
-    click.echo(f"rid:    {full_rid}")
-    if task_id is not None or turn is not None:
-        click.echo(f"task:   {task_id or '?'}  turn={turn if turn is not None else '?'}")
-    click.echo(f"time:   {ts}")
-    click.echo(f"status: {status}")
-    click.echo(f"model:  {body.get('model', '?')}")
-    click.echo(f"size:   {size_b:,} bytes (~{size_b // 4:,} tokens)")
-    click.echo()
-    click.echo("─── PROMPT ──────────────────────────────────────────────")
-    click.echo(initial_prompt or "(no user message)")
-    click.echo()
-    removed_messages, new_messages = _diff_messages(prev_messages, messages)
-    if has_previous:
-        header_suffix = (
-            f"{_delta_label(len(removed_messages), len(new_messages))} "
-            f"since previous call"
-        )
-    else:
-        n = len(new_messages)
-        header_suffix = f"initial: {n} msg{'' if n == 1 else 's'}"
-    click.echo(f"─── MESSAGES ({header_suffix}) ──────────")
-    if has_previous:
-        # Signals that the prior history (in prev_messages) was
-        # elided; what follows is the diff, not the whole conversation.
-        click.echo("  ...")
-    if not new_messages and not removed_messages:
-        click.echo("(no diff vs previous call)")
-    for m in new_messages:
-        _render_preview_message(m)
-
-
-def _load_parquet_body(parquet_path: Path, rid: str) -> tuple[dict, dict, int | None, str | None]:
-    """Pull one request out of a captures parquet. Returns
-    ``(body, resp_rec, captured_at, run_id)``.
-
-    The parquet's ``response`` column has two shapes depending on
-    whether the upstream streamed:
-
-      - stream:    ``{"stream": True, "raw": "<SSE bytes>"}``
-      - non-stream: the bare OpenAI body dict (no wrapper)
-
-    Workspace ``*.response.json`` records always have a ``body`` key,
-    and ``_decode_response`` follows that convention. Normalise the
-    non-stream parquet shape into ``{"stream": False, "body": ...}``
-    here so callers (notably ``_decode_response`` /
-    ``_request_messages_for_view``) get the model reply rendered."""
-    import json as _json
-    import pyarrow.parquet as pq
-
-    t = pq.read_table(
-        str(parquet_path),
-        columns=["request_id", "captured_at", "request", "response", "run_id"],
-        filters=[("request_id", "=", rid)],
-    )
-    if t.num_rows == 0:
-        return {}, {}, None, None
-    try:
-        body = _json.loads(t.column("request")[0].as_py() or "{}")
-    except _json.JSONDecodeError:
-        body = {}
-    try:
-        raw_resp = _json.loads(t.column("response")[0].as_py() or "{}")
-    except _json.JSONDecodeError:
-        raw_resp = {}
-    if raw_resp.get("stream"):
-        resp = raw_resp
-    else:
-        resp = {"stream": False, "body": raw_resp}
-    ts = t.column("captured_at")[0].as_py()
-    run_id = t.column("run_id")[0].as_py()
-    return body, resp, (int(ts) if ts is not None else None), run_id
-
-
-@cli.command("_preview_parquet", hidden=True)
-@click.argument("parquet_path")
-@click.argument("request_id")
-@click.argument("prev_request_id", required=False, default=None)
-def _preview_parquet_cmd(
-    parquet_path: str, request_id: str, prev_request_id: str | None,
-) -> None:
-    """Internal: same preview as ``_preview`` but sourced from a
-    parquet file. The picker passes the parquet path as a leading arg
-    so this hidden command stays stateless."""
-    import json as _json
-    import re
-    import time as _time
-
-    if not re.fullmatch(r"[0-9a-f]+", request_id):
-        click.echo("(section header — navigate to a request id)")
-        return
-    pq_path = Path(parquet_path)
-    body, resp, captured_at, run_id = _load_parquet_body(pq_path, request_id)
-    messages = body.get("messages") or []
-    initial_user = next(
-        (m for m in messages if m.get("role") == "user"),
-        None,
-    )
-    initial_prompt = _message_text(initial_user or {})
-    status = resp.get("status_code", "?") if resp else "?"
-    serialized = _json.dumps(body, ensure_ascii=False)
-    size_b = len(serialized.encode("utf-8"))
-    ts = (
-        _time.strftime("%H:%M:%S", _time.gmtime(captured_at))
-        if captured_at else "?"
-    )
-
-    prev_messages: list = []
-    has_previous = False
-    if (
-        prev_request_id
-        and prev_request_id != "-"
-        and re.fullmatch(r"[0-9a-f]+", prev_request_id)
-    ):
-        prev_body, _, _, _ = _load_parquet_body(pq_path, prev_request_id)
-        prev_messages = prev_body.get("messages") or []
-        has_previous = bool(prev_messages)
-
-    click.echo(f"rid:    {request_id}")
-    if run_id is not None:
-        click.echo(f"run:    {run_id}")
-    click.echo(f"time:   {ts}")
-    click.echo(f"status: {status}")
-    click.echo(f"model:  {body.get('model', '?')}")
-    click.echo(f"size:   {size_b:,} bytes (~{size_b // 4:,} tokens)")
-    click.echo()
-    click.echo("─── PROMPT ──────────────────────────────────────────────")
-    click.echo(initial_prompt or "(no user message)")
-    click.echo()
-    removed_messages, new_messages = _diff_messages(prev_messages, messages)
-    if has_previous:
-        header_suffix = (
-            f"{_delta_label(len(removed_messages), len(new_messages))} "
-            f"since previous call"
-        )
-    else:
-        n = len(new_messages)
-        header_suffix = f"initial: {n} msg{'' if n == 1 else 's'}"
-    click.echo(f"─── MESSAGES ({header_suffix}) ──────────")
-    if has_previous:
-        click.echo("  ...")
-    if not new_messages and not removed_messages:
-        click.echo("(no diff vs previous call)")
-    for m in new_messages:
-        _render_preview_message(m)
-
-
-def _decode_sse_response(raw: str) -> dict:
-    """Decode an OpenAI-compatible SSE response stream into a single
-    synthesized assistant message: ``{content, tool_calls,
-    finish_reason}``. Concatenates ``delta.content`` chunks; merges
-    ``delta.tool_calls`` chunks by their ``index`` field (the first
-    chunk for an index carries id + function.name; later chunks
-    accumulate ``function.arguments`` string fragments)."""
-    import json as _json
-    content_parts: list[str] = []
-    tool_calls_by_idx: dict[int, dict] = {}
-    finish_reason: str | None = None
-    for line in raw.splitlines():
-        if not line.startswith("data:"):
-            continue
-        payload = line[len("data:"):].strip()
-        if not payload or payload == "[DONE]":
-            continue
-        try:
-            obj = _json.loads(payload)
-        except (_json.JSONDecodeError, ValueError):
-            continue
-        for ch in obj.get("choices") or []:
-            delta = ch.get("delta") or {}
-            if delta.get("content"):
-                content_parts.append(delta["content"])
-            for tc_delta in delta.get("tool_calls") or []:
-                idx = tc_delta.get("index", 0)
-                slot = tool_calls_by_idx.setdefault(idx, {
-                    "id": "", "type": "function",
-                    "function": {"name": "", "arguments": ""},
-                })
-                if tc_delta.get("id"):
-                    slot["id"] = tc_delta["id"]
-                if tc_delta.get("type"):
-                    slot["type"] = tc_delta["type"]
-                fn = tc_delta.get("function") or {}
-                if fn.get("name"):
-                    slot["function"]["name"] = fn["name"]
-                if fn.get("arguments"):
-                    slot["function"]["arguments"] += fn["arguments"]
-            if ch.get("finish_reason"):
-                finish_reason = ch["finish_reason"]
-    return {
-        "content": "".join(content_parts),
-        "tool_calls": [tool_calls_by_idx[k] for k in sorted(tool_calls_by_idx)],
-        "finish_reason": finish_reason,
-    }
-
-
-def _decode_response(resp_rec: dict) -> dict:
-    """Synthesize an assistant message from a response record. Handles
-    both non-stream (``body.choices[0].message``) and stream (raw SSE
-    bytes in ``raw``)."""
-    if resp_rec.get("stream"):
-        return _decode_sse_response(resp_rec.get("raw") or "")
-    body = resp_rec.get("body") or {}
-    ch = (body.get("choices") or [{}])[0]
-    msg = ch.get("message") or {}
-    return {
-        "content": msg.get("content") or "",
-        "tool_calls": msg.get("tool_calls") or [],
-        "finish_reason": ch.get("finish_reason"),
-    }
-
-
-def _request_messages_for_view(
-    body: dict, resp_rec: dict | None
-) -> list[dict]:
-    """Flatten ``messages[]`` + decoded response into one record per
-    picker row. Each assistant ``tool_calls`` produces its own row
-    followed (if present) by a row for the assistant's content; the
-    decoded model response is appended at the end as the final
-    assistant turn so the viewer shows the model's reply inline.
-
-    Each record: ``{msg_idx, role, summary, content, ...}``. ``msg_idx``
-    is the index into the original ``messages[]`` (or ``None`` for the
-    synthesized response rows)."""
-    records: list[dict] = []
-    msgs = body.get("messages") or []
-    for i, m in enumerate(msgs):
-        role = m.get("role", "?")
-        if role == "assistant":
-            for tc in m.get("tool_calls") or []:
-                fn = (tc.get("function") or {}).get("name") or "?"
-                args = (tc.get("function") or {}).get("arguments") or ""
-                records.append({
-                    "msg_idx": i,
-                    "role": f"assistant→{fn}",
-                    "summary": args,
-                    "content": args,
-                    "tool_call_id": tc.get("id"),
-                })
-            content = _message_text(m)
-            if content:
-                records.append({
-                    "msg_idx": i,
-                    "role": "assistant",
-                    "summary": content,
-                    "content": content,
-                })
-            continue
-        if role == "tool":
-            content = _message_text(m)
-            records.append({
-                "msg_idx": i,
-                "role": "tool",
-                "summary": content,
-                "content": content,
-                "tool_call_id": m.get("tool_call_id"),
-            })
-            continue
-        content = _message_text(m)
-        records.append({
-            "msg_idx": i,
-            "role": role,
-            "summary": content,
-            "content": content,
-        })
-    if resp_rec is not None:
-        decoded = _decode_response(resp_rec)
-        for tc in decoded.get("tool_calls") or []:
-            fn = (tc.get("function") or {}).get("name") or "?"
-            args = (tc.get("function") or {}).get("arguments") or ""
-            records.append({
-                "msg_idx": None,
-                "role": f"response→{fn}",
-                "summary": args,
-                "content": args,
-                "tool_call_id": tc.get("id"),
-            })
-        content = decoded.get("content") or ""
-        if content:
-            records.append({
-                "msg_idx": None,
-                "role": "response",
-                "summary": content,
-                "content": content,
-                "finish_reason": decoded.get("finish_reason"),
-            })
-    return records
-
-
-def _render_msg_preview(records: list[dict], row: int) -> None:
-    """Echo one entry from a message list — shared between the
-    workspace- and parquet-sourced ``_msg_preview*`` commands."""
-    if row < 1 or row > len(records):
-        click.echo(f"(row {row} out of range; have {len(records)})")
-        return
-    rec = records[row - 1]
-    click.echo(f"role:         {rec['role']}")
-    if rec.get("msg_idx") is not None:
-        click.echo(f"msg_idx:      {rec['msg_idx']}")
-    else:
-        click.echo("msg_idx:      (response)")
-    if rec.get("tool_call_id"):
-        click.echo(f"tool_call_id: {rec['tool_call_id']}")
-    if rec.get("finish_reason"):
-        click.echo(f"finish_reason: {rec['finish_reason']}")
-    click.echo()
-    click.echo(rec.get("content") or "(no content)")
-
-
-@cli.command("_msg_preview", hidden=True)
-@click.argument("request_id")
-@click.argument("row", type=int)
-@click.option("--workspace", default=None, help="Workspace root (.agentcap dir).")
-def _msg_preview_cmd(
-    request_id: str, row: int, workspace: str | None,
-) -> None:
-    """Internal: render one message (1-indexed ``row``) from the
-    request's flattened message list. Used by the workspace-sourced
-    message sub-picker."""
-    import re
-    if not re.fullmatch(r"[0-9a-f]+", request_id):
-        click.echo("(invalid request id)")
-        return
-    ws = Path(workspace) if workspace else None
-    _, body, resp_rec, _, _ = _resolve_request_id(
-        request_id, None, workspace=ws,
-    )
-    _render_msg_preview(_request_messages_for_view(body, resp_rec), row)
-
-
-@cli.command("_msg_preview_parquet", hidden=True)
-@click.argument("parquet_path")
-@click.argument("request_id")
-@click.argument("row", type=int)
-def _msg_preview_parquet_cmd(
-    parquet_path: str, request_id: str, row: int,
-) -> None:
-    """Internal: same as ``_msg_preview`` but sourced from a parquet.
-    Parquet ``response`` column is a JSON blob (no streaming wrapper),
-    so we pass it through unchanged — ``_decode_response`` handles
-    both the non-stream and the SSE-wrapped shapes."""
-    import re
-    if not re.fullmatch(r"[0-9a-f]+", request_id):
-        click.echo("(invalid request id)")
-        return
-    body, resp, _, _ = _load_parquet_body(Path(parquet_path), request_id)
-    _render_msg_preview(_request_messages_for_view(body, resp or None), row)
-
-
-def _pick_request_message(
-    rid: str, *, source: str | None = None, workspace: Path | None = None,
-) -> None:
-    """Second-level fzf picker over the messages of the request the
-    user selected in the request picker. Read-only browse: Esc / Enter
-    both return to the caller without side effects.
-
-    ``source`` is ``None`` for workspace-sourced rids (current
-    behaviour) and a local parquet path for parquet-sourced rids — the
-    preview pipeline shells out to a different hidden command in each
-    case so the picker doesn't need to know how the body was loaded.
-    ``workspace`` overrides the default workspace lookup for
-    workspace-sourced rids (inspect passes cwd or the resolved
-    dir explicitly)."""
-    import shlex
-    import sys
-    # ``_resolve_request_id`` returns ``resp_rec=None`` for any
-    # ``source`` (it calls ``captures.load_request`` which only loads the
-    # request body). For parquet sources, read the response back via
-    # ``_load_parquet_body`` so the message picker can show the model
-    # reply rows synthesised by ``_request_messages_for_view``.
-    if source and source.endswith(".parquet"):
-        body, resp_rec, _, _ = _load_parquet_body(Path(source), rid)
-        full_rid = rid
-    else:
-        full_rid, body, resp_rec, _, _ = _resolve_request_id(
-            rid, source, workspace=workspace,
-        )
-    records = _request_messages_for_view(body, resp_rec)
-    if not records:
-        click.echo("(no messages in this request)")
-        return
-    role_w = max(len(r["role"]) for r in records)
-    lines: list[str] = []
-    for i, rec in enumerate(records, start=1):
-        summary = _flatten(rec.get("summary") or "", 200)
-        display = f"[{i:>3}]  {rec['role']:<{role_w}s}  {summary}"
-        # Hidden tab-delimited column 2 carries the 1-indexed row.
-        # The preview reads it as ``{2}`` instead of computing
-        # ``$(({n} + 1))`` — the latter is POSIX-arithmetic and fzf
-        # runs previews via ``$SHELL -c``, so fish would break.
-        lines.append(f"{display}\t{i}")
-    header = f"messages for {full_rid[:8]} ({len(records)} entries)"
-    if source and source.endswith(".parquet"):
-        preview = (
-            f"{sys.executable} -m agentcap _msg_preview_parquet"
-            f" {shlex.quote(source)} {full_rid} {{2}} 2>/dev/null"
-            f" | {sys.executable} -m agentcap _highlight {{q}}"
-        )
-    else:
-        ws_arg = (
-            f"--workspace {shlex.quote(str(workspace))} "
-            if workspace is not None else ""
-        )
-        preview = (
-            f"{sys.executable} -m agentcap _msg_preview {ws_arg}"
-            f"{full_rid} {{2}} 2>/dev/null"
-            f" | {sys.executable} -m agentcap _highlight {{q}}"
-        )
-    _fzf_pick(
-        header, lines, preview,
-        extra_args=[
-            "--delimiter", "\t", "--with-nth", "1",
-            "--no-hscroll",
-            "--bind", "change:refresh-preview",
-        ],
-    )
-
-
-def _parse_fzf_terms(query: str) -> list[str]:
-    """Split fzf's query into the literal text of each non-negated
-    term. Each term has its operator prefix (``'``, ``^``) and
-    trailing anchor (``$``) stripped so the remainder is the substring
-    to highlight. Negated terms (``!word``) and bare ``|`` OR
-    separators are skipped — they aren't substrings to colour."""
-    terms: list[str] = []
-    for raw in query.split():
-        if raw in ("|", ""):
-            continue
-        if raw.startswith("!"):
-            continue
-        t = raw
-        if t and t[0] in ("'", "^"):
-            t = t[1:]
-        if t.endswith("$"):
-            t = t[:-1]
-        if t:
-            terms.append(t)
-    return terms
-
-
-@cli.command("_highlight", hidden=True)
-@click.argument("query")
-def _highlight_cmd(query: str) -> None:
-    """Read stdin, write stdout with each (case-insensitive) literal
-    occurrence of every fzf search term in ``query`` wrapped in bold
-    red. Used by the inspect picker's preview pipeline so the user's
-    typed query is visible in the preview pane.
-
-    Substring match per term — agrees with fzf's exact-match operator
-    (``'word``) and the default fuzzy mode when the fuzzy chars happen
-    to be contiguous. Operators ``'``, ``^``, ``$`` are stripped from
-    each term before matching; negated terms (``!word``) and ``|`` OR
-    separators are skipped (nothing to highlight). Special characters
-    in each term are escaped, so typing ``.``, ``[``, etc. is safe.
-    """
-    import re
-    import sys
-    terms = _parse_fzf_terms(query)
-    if not terms:
-        sys.stdout.write(sys.stdin.read())
-        return
-    # Longest terms first so a longer substring isn't shadowed by a
-    # shorter one that's a prefix of it.
-    terms.sort(key=len, reverse=True)
-    pat = re.compile(
-        "|".join(re.escape(t) for t in terms), re.IGNORECASE
-    )
-    for line in sys.stdin:
-        sys.stdout.write(
-            pat.sub(lambda m: f"\033[1;31m{m.group(0)}\033[0m", line)
-        )
-
-
-def main() -> int:
-    cli.main(standalone_mode=True)
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/src/agentcap/captures.py b/src/agentcap/captures.py
deleted file mode 100644
index 4925b3c..0000000
--- a/src/agentcap/captures.py
+++ /dev/null
@@ -1,190 +0,0 @@
-"""Resolve a captured request by id and hand back the body.
-
-No agentcap-side normalisation or mutation of the JSON object — captures
-persist the request as parsed JSON, so the original byte sequence
-(whitespace, key ordering) isn't recoverable, but the JSON object is.
-Consumers that hit cross-server strictness do their own normalisation
-(see AGENTS.md #3).
-"""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from typing import Iterable
-
-
-def load_request(source: str, request_id: str) -> dict:
-    """Return the raw captured request body for ``request_id``.
-
-    ``source`` resolves any of:
-      - a local capture dir (``<rid>.request.json`` files),
-      - a local ``.parquet`` produced by ``agentcap export``,
-      - ``hf://datasets/<owner>/<name>`` or the bare ``<owner>/<name>``
-        form.
-
-    Raises ``KeyError`` if the id is not found.
-    """
-    return load_requests(source, [request_id])[request_id]
-
-
-def load_requests(
-    source: str, request_ids: Iterable[str]
-) -> dict[str, dict]:
-    """Batch form: one pass over the source per file, returns ``{id: body}``.
-
-    Raises ``KeyError`` listing any ids that weren't found.
-    """
-    wanted = set(request_ids)
-    if not wanted:
-        return {}
-
-    # Resolve local paths first — an existing dir/file wins over the HF
-    # heuristic, so ``runs/abc/captures`` isn't misclassified as a repo.
-    p = Path(source).expanduser()
-    if p.is_dir():
-        bodies = _load_from_capture_dir(p, wanted)
-    elif p.is_file() and p.suffix == ".parquet":
-        bodies = _load_from_parquet(p, wanted)
-    elif _looks_like_hf_source(source):
-        bodies = _load_from_hf_dataset(source, wanted)
-    else:
-        raise ValueError(
-            f"source must be a capture dir, a .parquet file, or an "
-            f"hf://datasets/... URI — got {source!r}"
-        )
-
-    missing = wanted - set(bodies)
-    if missing:
-        raise KeyError(
-            f"request_id(s) not found in {source!r}: {sorted(missing)}"
-        )
-    return bodies
-
-
-def _looks_like_hf_source(source: str) -> bool:
-    if source.startswith("hf://"):
-        return True
-    # Bare ``<owner>/<name>`` — exactly one ``/`` and no path-separator
-    # prefix. Heuristic for distinguishing an HF repo from a local path.
-    if source.startswith((".", "/", "~")):
-        return False
-    return source.count("/") == 1
-
-
-def _load_from_capture_dir(
-    capture_dir: Path, wanted: set[str]
-) -> dict[str, dict]:
-    out: dict[str, dict] = {}
-    for rid in wanted:
-        path = capture_dir / f"{rid}.request.json"
-        if not path.is_file():
-            continue
-        rec = json.loads(path.read_text())
-        body = rec.get("body")
-        if isinstance(body, dict):
-            out[rid] = body
-    return out
-
-
-def _load_from_parquet(
-    parquet_path: Path, wanted: set[str]
-) -> dict[str, dict]:
-    import pyarrow.parquet as pq
-
-    table = pq.read_table(
-        str(parquet_path),
-        columns=["request_id", "request"],
-        filters=[("request_id", "in", list(wanted))],
-    )
-    return _scan_arrow_table(table, wanted)
-
-
-def _load_from_hf_dataset(
-    source: str, wanted: set[str]
-) -> dict[str, dict]:
-    """Scan every parquet under ``data/`` in the dataset until all
-    wanted ids are found (or files exhausted)."""
-    import pyarrow.parquet as pq
-    from huggingface_hub import HfFileSystem
-
-    s = source.removeprefix("hf://datasets/").strip("/")
-    parts = s.split("/")
-    if len(parts) != 2 or not parts[0] or not parts[1]:
-        raise ValueError(
-            f"hf source must be <owner>/<name>, got {source!r}"
-        )
-    repo_id = f"{parts[0]}/{parts[1]}"
-    fs = HfFileSystem()
-    prefix = f"datasets/{repo_id}/data"
-
-    out: dict[str, dict] = {}
-    remaining = set(wanted)
-    for entry in fs.ls(prefix, detail=True):
-        if entry.get("type") != "file" or not entry["name"].endswith(".parquet"):
-            continue
-        with fs.open(entry["name"], "rb") as fh:
-            table = pq.read_table(
-                fh,
-                columns=["request_id", "request"],
-                filters=[("request_id", "in", list(remaining))],
-            )
-        found = _scan_arrow_table(table, remaining)
-        out.update(found)
-        remaining -= set(found)
-        if not remaining:
-            break
-    return out
-
-
-def _scan_arrow_table(table, wanted: set[str]) -> dict[str, dict]:
-    out: dict[str, dict] = {}
-    rid_col = table.column("request_id").to_pylist()
-    req_col = table.column("request").to_pylist()
-    for rid, req_str in zip(rid_col, req_col):
-        if rid in wanted and isinstance(req_str, str):
-            out[rid] = json.loads(req_str)
-    return out
-
-
-class AmbiguousRequestId(Exception):
-    """Raised when a short rid prefix matches more than one captured
-    request — caller should ask the user to disambiguate (like
-    ``git`` does)."""
-
-    def __init__(self, prefix: str, matches: list[str]):
-        self.prefix = prefix
-        self.matches = matches
-        super().__init__(
-            f"rid prefix {prefix!r} is ambiguous ({len(matches)} matches): "
-            f"{', '.join(sorted(matches)[:5])}{'…' if len(matches) > 5 else ''}"
-        )
-
-
-def resolve_workspace_rid(
-    workspace_root: Path, request_id: str
-) -> tuple[Path, str] | None:
-    """Find the capture dir + full rid for a (possibly truncated) request id.
-
-    Returns ``(capture_dir, full_rid)`` for the unique match, ``None`` if
-    no match. Raises ``AmbiguousRequestId`` when multiple rids share the
-    prefix.
-    """
-    if not workspace_root.is_dir():
-        return None
-    matches: list[tuple[Path, str]] = []
-    for run_dir in workspace_root.iterdir():
-        captures = run_dir / "captures"
-        if not captures.is_dir():
-            continue
-        # Exact match shortcut — also makes full-length rids O(1).
-        exact = captures / f"{request_id}.request.json"
-        if exact.is_file():
-            return captures, request_id
-        for hit in captures.glob(f"{request_id}*.request.json"):
-            matches.append((captures, hit.name.removesuffix(".request.json")))
-    if not matches:
-        return None
-    if len(matches) > 1:
-        raise AmbiguousRequestId(request_id, [m[1] for m in matches])
-    return matches[0]
diff --git a/src/agentcap/drivers/__init__.py b/src/agentcap/drivers/__init__.py
deleted file mode 100644
index ea15b46..0000000
--- a/src/agentcap/drivers/__init__.py
+++ /dev/null
@@ -1,174 +0,0 @@
-"""Agent driver adapters.
-
-A driver wraps an agent CLI (Hermes, OpenCode, …) so the orchestrator
-can:
-
-  - start a new session with an initial prompt,
-  - resume an existing session for a follow-up prompt,
-  - extract the final response text from each turn (for the
-    follow-up synthesizer).
-
-Drivers shell out to the agent's binary; they do not implement the
-agent's semantics. Configuring the agent to point at the capture proxy
-(via config file or env) is the orchestrator's responsibility.
-"""
-
-from __future__ import annotations
-
-import abc
-from dataclasses import dataclass, field
-from typing import Callable
-
-
-@dataclass
-class AgentTurn:
-    """One turn of agent execution."""
-
-    session_id: str | None
-    response_text: str
-    returncode: int
-    stdout: str
-    stderr: str
-    #: ``"<tool>: <message>"`` for each errored tool call in stdout
-    #: (driver-specific parser). Empty if the driver has no parser yet.
-    tool_errors: list[str] = field(default_factory=list)
-
-
-class AgentDriver(abc.ABC):
-    """Abstract adapter wrapping an agent CLI."""
-
-    name: str
-
-    @abc.abstractmethod
-    def start(
-        self,
-        prompt: str,
-        *,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        """Start a new session with ``prompt``. Must populate
-        ``session_id`` if the agent supports resume."""
-
-    @abc.abstractmethod
-    def resume(
-        self,
-        prompt: str,
-        *,
-        session_id: str,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        """Continue session ``session_id`` with ``prompt``. Drivers
-        whose agent doesn't natively support resume must emulate it
-        (e.g. by replaying prior messages)."""
-
-
-def _hermes_factory(**kwargs) -> AgentDriver:
-    from .hermes import HermesDriver
-
-    return HermesDriver(**kwargs)
-
-
-def _opencode_factory(**kwargs) -> AgentDriver:
-    from .opencode import OpenCodeDriver
-
-    return OpenCodeDriver(**kwargs)
-
-
-def _goose_factory(**kwargs) -> AgentDriver:
-    from .goose import GooseDriver
-
-    return GooseDriver(**kwargs)
-
-
-def _pi_factory(**kwargs) -> AgentDriver:
-    from .pi import PiDriver
-
-    return PiDriver(**kwargs)
-
-
-# Single source of truth for which agents the orchestrator supports.
-# Adding a new driver: write the module + factory, append one entry
-# here. Both ``get_driver`` and the ``--agent`` Click choice in
-# ``__main__`` consume this — they cannot drift apart.
-DRIVER_REGISTRY: dict[str, Callable[..., AgentDriver]] = {
-    "hermes": _hermes_factory,
-    "opencode": _opencode_factory,
-    "goose": _goose_factory,
-    "pi": _pi_factory,
-}
-
-
-def known_drivers() -> tuple[str, ...]:
-    """Names of registered driver adapters, in registration order.
-
-    Used to populate ``agentcap run --agent`` choices and to enumerate
-    what's available without importing each driver module eagerly.
-    """
-    return tuple(DRIVER_REGISTRY)
-
-
-# Native session-trace surfacing. Two patterns:
-#
-#  * **Symlink-the-dir** (pi, hermes): the agent writes one file per
-#    session into its native sessions dir. ``agentcap run`` bind-mounts
-#    ``<workdir>/traces/`` and the image entrypoint symlinks the
-#    native dir at it, so transcripts land on the host as they're
-#    written.
-#
-#  * **Post-run dump** (goose, opencode): the agent writes to a SQLite
-#    store; no per-session files exist on disk. The image ships a
-#    ``dump-traces`` script that lists sessions and exports each one
-#    via the agent's own CLI. The orchestrator calls it once after
-#    the corpus completes.
-#
-# The first column drives the in-container symlink at start-up
-# (set in the per-agent ``agentcap-init.sh``); the second drives the
-# post-corpus dump via :func:`traces_dump_argv_for`.
-SESSIONS_PATH_IN_CONTAINER: dict[str, str] = {
-    "pi": "/opt/pi-config/sessions",
-}
-
-# Agents whose images ship a ``dump-traces`` executable (on PATH).
-# Called as ``sandbox.run(["dump-traces"])`` after the corpus to
-# render SQLite-stored sessions into JSON/JSONL files under
-# ``AGENTCAP_TRACES_DIR``. Symlink-style agents (pi) don't need it.
-#
-# hermes/goose/opencode all use SQLite session stores — there's no
-# per-session file on disk to symlink, so we dump via the agent's
-# own export CLI post-corpus.
-_TRACES_DUMP_AGENTS: frozenset[str] = frozenset({"hermes", "goose", "opencode"})
-
-
-def sessions_path_for(agent: str) -> str | None:
-    return SESSIONS_PATH_IN_CONTAINER.get(agent)
-
-
-def traces_dump_argv_for(agent: str) -> list[str] | None:
-    """In-container argv for the post-corpus trace dump, or None if
-    the agent surfaces traces through the symlink mechanism (no dump
-    step needed)."""
-    if agent in _TRACES_DUMP_AGENTS:
-        return ["dump-traces"]
-    return None
-
-
-def get_driver(name: str, **kwargs) -> AgentDriver:
-    """Lookup a driver by short name."""
-    try:
-        factory = DRIVER_REGISTRY[name]
-    except KeyError:
-        raise ValueError(
-            f"unknown driver: {name!r}; known: {', '.join(known_drivers())}"
-        ) from None
-    return factory(**kwargs)
-
-
-__all__ = [
-    "AgentDriver",
-    "AgentTurn",
-    "DRIVER_REGISTRY",
-    "get_driver",
-    "known_drivers",
-]
diff --git a/src/agentcap/drivers/goose.py b/src/agentcap/drivers/goose.py
deleted file mode 100644
index 6da7c65..0000000
--- a/src/agentcap/drivers/goose.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""Goose driver.
-
-Drives ``goose run -t "<prompt>"`` non-interactively. The proxy URL +
-provider + ``OPENAI_API_KEY`` are baked into the per-agent image's
-ENV (see [containers/agentcap-goose.Containerfile](
-../../../containers/agentcap-goose.Containerfile)); the driver only
-sets ``GOOSE_MODEL`` per run.
-
-Goose's own session state lives at ``~/.config/goose/sessions/``
-inside the sandbox, redirected to the bind-mounted ``state/`` dir
-so it survives ``podman run --rm`` boundaries between turns.
-"""
-
-from __future__ import annotations
-
-import subprocess
-import uuid
-from pathlib import Path
-from typing import Sequence
-
-from . import AgentDriver, AgentTurn
-from ..sandbox import Sandbox
-
-
-def parse_tool_errors(stdout: str) -> list[str]:
-    # TODO: goose's tool-error format is not yet characterised.
-    return []
-
-
-class GooseDriver(AgentDriver):
-    name = "goose"
-
-    def __init__(
-        self,
-        *,
-        sandbox: Sandbox,
-        binary: str = "goose",
-        model: str | None = None,
-        cwd: Path | str | None = None,
-        extra_args: Sequence[str] = (),
-    ) -> None:
-        self.sandbox = sandbox
-        self.binary = binary
-        self.model = model
-        # ``cwd`` is sandbox-side: a host path bind-mounted into the
-        # container at the same path.
-        self.cwd = str(cwd) if cwd is not None else None
-        self.extra_args = list(extra_args)
-
-    def close(self) -> None:
-        """No-op."""
-
-    def _build_argv(
-        self, prompt: str, *, session_name: str | None, resume: bool
-    ) -> list[str]:
-        argv = [self.binary, "run", "-t", prompt, *self.extra_args]
-        if session_name is None:
-            argv.append("--no-session")
-        else:
-            argv.extend(["--name", session_name])
-            if resume:
-                argv.append("--resume")
-        return argv
-
-    def _run(
-        self,
-        argv: list[str],
-        env: dict | None,
-        timeout: float | None,
-    ) -> subprocess.CompletedProcess:
-        full_env: dict[str, str] = {}
-        if self.model:
-            full_env["GOOSE_MODEL"] = self.model
-        if env:
-            full_env.update(env)
-        return self.sandbox.run(
-            argv,
-            env=full_env,
-            cwd=self.cwd,
-            timeout=timeout,
-        )
-
-    def start(
-        self,
-        prompt: str,
-        *,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        session_name = f"agentcap-{uuid.uuid4().hex[:8]}"
-        proc = self._run(
-            self._build_argv(prompt, session_name=session_name, resume=False),
-            env,
-            timeout,
-        )
-        return AgentTurn(
-            session_id=session_name,
-            response_text=proc.stdout.strip(),
-            returncode=proc.returncode,
-            stdout=proc.stdout,
-            stderr=proc.stderr,
-            tool_errors=parse_tool_errors(proc.stdout),
-        )
-
-    def resume(
-        self,
-        prompt: str,
-        *,
-        session_id: str,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        proc = self._run(
-            self._build_argv(prompt, session_name=session_id, resume=True),
-            env,
-            timeout,
-        )
-        return AgentTurn(
-            session_id=session_id,
-            response_text=proc.stdout.strip(),
-            returncode=proc.returncode,
-            stdout=proc.stdout,
-            stderr=proc.stderr,
-            tool_errors=parse_tool_errors(proc.stdout),
-        )
diff --git a/src/agentcap/drivers/hermes.py b/src/agentcap/drivers/hermes.py
deleted file mode 100644
index 2ea34b1..0000000
--- a/src/agentcap/drivers/hermes.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""Hermes driver.
-
-Drives ``hermes chat -q "<prompt>"`` non-interactively. ``~/.hermes/``
-is baked into the per-agent image with the proxy URL and context
-length pointing at the in-process proxy — see
-[containers/agentcap-hermes.Containerfile](
-../../../containers/agentcap-hermes.Containerfile). The driver does
-no per-run config rewriting.
-
-Identity content (``SOUL.md``, etc.) and per-run state (``memories/``,
-``sessions/``, ``logs/``) live under the image's ``/root/.hermes/``;
-state-db symlinks redirect SQLite writes to the bind-mounted
-``state/`` dir so session continuity survives ``podman run --rm``
-boundaries between turns.
-"""
-
-from __future__ import annotations
-
-import re
-import subprocess
-from pathlib import Path
-from typing import Sequence
-
-import yaml
-
-from . import AgentDriver, AgentTurn
-from ..sandbox import Sandbox
-
-
-_SESSION_ID_RE = re.compile(r"session_id:\s*([a-zA-Z0-9_\-]+)")
-_RESUMED_MARKER = "Resumed"
-
-
-def parse_session_id(output: str) -> str | None:
-    m = _SESSION_ID_RE.search(output)
-    return m.group(1) if m else None
-
-
-def parse_tool_errors(stdout: str) -> list[str]:
-    # TODO: hermes' tool-error format is not yet characterised.
-    return []
-
-
-def parse_response_text(stdout: str) -> str:
-    """Extract the assistant body from a hermes run.
-
-    For a resumed session, hermes prints a ``↻ Resumed <id>`` marker
-    before the new turn — we slice everything after the last such
-    marker. For an initial run we use the whole stdout. Then strip
-    bare ``session_id:`` lines and surrounding whitespace.
-    """
-    lines = stdout.splitlines()
-    last = -1
-    for i, line in enumerate(lines):
-        if _RESUMED_MARKER in line and "↻" in line:
-            last = i
-    body_lines = lines[last + 1 :] if last >= 0 else lines
-    cleaned = [
-        ln for ln in body_lines if not _SESSION_ID_RE.match(ln.strip())
-    ]
-    return "\n".join(cleaned).strip()
-
-
-def _rewrite_config(
-    config_text: str,
-    *,
-    base_url: str,
-    context_length_override: int | None = None,
-) -> str:
-    """Round-trip a hermes ``config.yaml`` through PyYAML, overriding
-    ``model.base_url`` and (optionally) ``context_length``. Kept for
-    unit tests; the production path bakes the equivalent into the
-    image, so the driver never calls this at runtime."""
-    cfg = yaml.safe_load(config_text) or {}
-    if not isinstance(cfg, dict):
-        raise ValueError("hermes config.yaml is not a YAML mapping")
-
-    model = cfg.setdefault("model", {})
-    if not isinstance(model, dict):
-        raise ValueError("hermes config.yaml: 'model' must be a mapping")
-    model["base_url"] = base_url
-
-    if context_length_override is not None:
-        model["context_length"] = context_length_override
-        aux = cfg.setdefault("auxiliary", {})
-        if not isinstance(aux, dict):
-            raise ValueError(
-                "hermes config.yaml: 'auxiliary' must be a mapping"
-            )
-        comp = aux.setdefault("compression", {})
-        if not isinstance(comp, dict):
-            raise ValueError(
-                "hermes config.yaml: 'auxiliary.compression' must be a mapping"
-            )
-        comp["context_length"] = context_length_override
-
-    return yaml.safe_dump(cfg, sort_keys=False)
-
-
-class HermesDriver(AgentDriver):
-    name = "hermes"
-
-    def __init__(
-        self,
-        *,
-        sandbox: Sandbox,
-        binary: str = "hermes",
-        model: str | None = None,
-        extra_args: Sequence[str] = ("-Q", "--yolo", "--accept-hooks"),
-        cwd: Path | str | None = None,
-        ignore_rules: bool = False,
-        toolsets: str | None = None,
-    ) -> None:
-        # cwd: sandbox-side working directory. Hermes auto-injects
-        # AGENTS.md / CLAUDE.md / .cursorrules from its cwd into every
-        # system prompt; the orchestrator typically passes the result
-        # of ``sandbox.mkdtemp`` so per-run cwd state doesn't leak.
-        #
-        # ignore_rules / toolsets shrink the default Hermes system
-        # prompt for CPU + small-model runs.
-        #
-        # model: passed via ``hermes chat -m <id>``. The CLI flag is
-        # the only path that reliably populates the ``model`` field
-        # in the outbound OAI request body; ``model.name`` in
-        # ``config.yaml`` doesn't propagate for every provider profile.
-        self.sandbox = sandbox
-        self.binary = binary
-        self.model = model
-        self.extra_args = list(extra_args)
-        self.cwd = str(cwd) if cwd is not None else None
-        self.ignore_rules = ignore_rules
-        self.toolsets = toolsets
-
-    def close(self) -> None:
-        """No-op."""
-
-    def _build_argv(
-        self, prompt: str, *, session_id: str | None
-    ) -> list[str]:
-        argv = [self.binary, "chat", "-q", prompt, *self.extra_args]
-        if self.model:
-            argv.extend(["-m", self.model])
-        if self.ignore_rules:
-            argv.append("--ignore-rules")
-        if self.toolsets:
-            argv.extend(["-t", self.toolsets])
-        if session_id is None:
-            argv.append("--pass-session-id")
-        else:
-            argv.extend(["--resume", session_id])
-        return argv
-
-    def _run(
-        self,
-        argv: list[str],
-        env: dict | None,
-        timeout: float | None,
-    ) -> subprocess.CompletedProcess:
-        return self.sandbox.run(
-            argv,
-            env=env or {},
-            cwd=self.cwd,
-            timeout=timeout,
-        )
-
-    def start(
-        self,
-        prompt: str,
-        *,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        proc = self._run(
-            self._build_argv(prompt, session_id=None), env, timeout
-        )
-        combined = proc.stdout + "\n" + proc.stderr
-        return AgentTurn(
-            session_id=parse_session_id(combined),
-            response_text=parse_response_text(proc.stdout),
-            returncode=proc.returncode,
-            stdout=proc.stdout,
-            stderr=proc.stderr,
-            tool_errors=parse_tool_errors(proc.stdout),
-        )
-
-    def resume(
-        self,
-        prompt: str,
-        *,
-        session_id: str,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        proc = self._run(
-            self._build_argv(prompt, session_id=session_id), env, timeout
-        )
-        return AgentTurn(
-            session_id=session_id,
-            response_text=parse_response_text(proc.stdout),
-            returncode=proc.returncode,
-            stdout=proc.stdout,
-            stderr=proc.stderr,
-            tool_errors=parse_tool_errors(proc.stdout),
-        )
diff --git a/src/agentcap/drivers/opencode.py b/src/agentcap/drivers/opencode.py
deleted file mode 100644
index 34eaaae..0000000
--- a/src/agentcap/drivers/opencode.py
+++ /dev/null
@@ -1,236 +0,0 @@
-"""OpenCode driver.
-
-Drives ``opencode run --format json`` non-interactively. The provider
-config (proxy URL, ``minimal`` agent definition) is baked into the
-per-agent image at ``~/.config/opencode/opencode.json`` — see
-[containers/agentcap-opencode.Containerfile](
-../../../containers/agentcap-opencode.Containerfile). The driver
-passes the model id at the CLI (``--model local/<id>``); session
-continuity is via ``--session`` on resume.
-
-OpenCode emits NDJSON events on stdout when invoked with
-``--format json``. ``text`` events carry assistant chunks; the
-session id appears in every event as ``sessionID``.
-
-Always launch from a real project dir — opencode hangs ≥30 min if
-the model directs it to recursively glob from filesystem root.
-"""
-
-from __future__ import annotations
-
-import json
-import subprocess
-from pathlib import Path
-from typing import Sequence
-
-from . import AgentDriver, AgentTurn
-from ..sandbox import Sandbox
-
-
-_DEFAULT_PROVIDER_NAME = "local"
-
-
-def _iter_events(stdout: str):
-    for line in stdout.splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            yield json.loads(line)
-        except json.JSONDecodeError:
-            continue
-
-
-def parse_response_text(stdout: str) -> str:
-    """Concatenate ``text`` events from an opencode NDJSON stream."""
-    parts: list[str] = []
-    for obj in _iter_events(stdout):
-        if obj.get("type") == "text" and isinstance(obj.get("text"), str):
-            parts.append(obj["text"])
-    return "".join(parts).strip()
-
-
-def parse_session_id(stdout: str) -> str | None:
-    """Pull the first ``sessionID`` field out of the NDJSON stream."""
-    for obj in _iter_events(stdout):
-        sid = obj.get("sessionID")
-        if isinstance(sid, str) and sid:
-            return sid
-        # Some events nest it under ``part``.
-        part = obj.get("part")
-        if isinstance(part, dict):
-            sid = part.get("sessionID")
-            if isinstance(sid, str) and sid:
-                return sid
-    return None
-
-
-def parse_tool_errors(stdout: str) -> list[str]:
-    """Extract tool-call errors from opencode's NDJSON stream.
-
-    Each ``tool_use`` event carries a ``part.state`` block with a
-    ``status`` field (``"completed"`` / ``"error"``) and, on error,
-    an ``error`` message + the failing ``input``. We surface every
-    error as ``"<tool>: <message>"`` so the caller can fail loud
-    rather than mistake a destructive or no-op tool call for a real
-    edit.
-    """
-    errors: list[str] = []
-    for obj in _iter_events(stdout):
-        if obj.get("type") != "tool_use":
-            continue
-        part = obj.get("part") or {}
-        state = part.get("state") or {}
-        if state.get("status") != "error":
-            continue
-        tool = part.get("tool") or "<unknown>"
-        msg = state.get("error") or "(no error message)"
-        errors.append(f"{tool}: {msg}")
-    return errors
-
-
-# Retained for tests and back-compat callers. Not used by OpenCodeDriver
-# at runtime — the equivalent JSON is baked into the per-agent image.
-_MINIMAL_AGENT_PROMPT = (
-    "You are a coding assistant. Always make code changes by CALLING "
-    "the edit tool — do NOT just describe the change in prose. The "
-    "user's task is incomplete until your tool call actually modifies "
-    "the file. Use read first to see the current contents, then edit "
-    "to change them. Stop after a successful edit."
-)
-
-
-def build_opencode_config(
-    *,
-    provider_name: str,
-    base_url: str,
-    model_id: str,
-    context_window: int = 65536,
-    max_tokens: int = 8192,
-    minimal_agent: bool = False,
-) -> dict:
-    """Render an ``opencode.json`` payload. Kept for tests; the
-    production path bakes the equivalent into the image."""
-    cfg: dict = {
-        "$schema": "https://opencode.ai/config.json",
-        "provider": {
-            provider_name: {
-                "npm": "@ai-sdk/openai-compatible",
-                "name": f"Local via agentcap proxy ({base_url})",
-                "options": {"baseURL": base_url},
-                "models": {
-                    model_id: {
-                        "name": model_id,
-                        "options": {"max_tokens": max_tokens},
-                        "limit": {"context": context_window, "output": max_tokens},
-                    }
-                },
-            }
-        },
-        "model": f"{provider_name}/{model_id}",
-    }
-    if minimal_agent:
-        cfg["agent"] = {
-            "minimal": {
-                "description": "Stripped agent for CI / small-model CPU runs.",
-                # ``primary`` makes it selectable via ``--agent minimal``.
-                # Without ``mode``, opencode 1.15.x treats the agent as
-                # a subagent (@ autocomplete only) and the CLI flag
-                # falls through to "default agent".
-                "mode": "primary",
-                "model": f"{provider_name}/{model_id}",
-                "prompt": _MINIMAL_AGENT_PROMPT,
-                "permission": {"*": "deny", "read": "allow", "edit": "allow"},
-            }
-        }
-    return cfg
-
-
-class OpenCodeDriver(AgentDriver):
-    name = "opencode"
-
-    def __init__(
-        self,
-        *,
-        sandbox: Sandbox,
-        binary: str = "opencode",
-        model: str | None = None,
-        cwd: Path | str | None = None,
-        provider_name: str = _DEFAULT_PROVIDER_NAME,
-        extra_args: Sequence[str] = (),
-        minimal_agent: bool = False,
-    ) -> None:
-        self.sandbox = sandbox
-        self.binary = binary
-        self.model = model
-        self.cwd = str(cwd) if cwd is not None else None
-        self.provider_name = provider_name
-        self.extra_args = list(extra_args)
-        self.minimal_agent = minimal_agent
-
-    def close(self) -> None:
-        """No-op."""
-
-    def _build_argv(
-        self, prompt: str, *, session_id: str | None = None
-    ) -> list[str]:
-        argv = [self.binary, "run", "--format", "json"]
-        if self.model:
-            argv.extend(["--model", f"{self.provider_name}/{self.model}"])
-        if self.minimal_agent:
-            argv.extend(["--agent", "minimal"])
-        if session_id:
-            argv.extend(["--session", session_id])
-        argv.extend(self.extra_args)
-        argv.append(prompt)
-        return argv
-
-    def _run(
-        self,
-        argv: list[str],
-        env: dict | None,
-        timeout: float | None,
-    ) -> subprocess.CompletedProcess:
-        return self.sandbox.run(
-            argv,
-            env=env or {},
-            cwd=self.cwd,
-            timeout=timeout,
-        )
-
-    def start(
-        self,
-        prompt: str,
-        *,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        proc = self._run(self._build_argv(prompt), env, timeout)
-        return AgentTurn(
-            session_id=parse_session_id(proc.stdout),
-            response_text=parse_response_text(proc.stdout),
-            returncode=proc.returncode,
-            stdout=proc.stdout,
-            stderr=proc.stderr,
-            tool_errors=parse_tool_errors(proc.stdout),
-        )
-
-    def resume(
-        self,
-        prompt: str,
-        *,
-        session_id: str,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        proc = self._run(
-            self._build_argv(prompt, session_id=session_id), env, timeout
-        )
-        return AgentTurn(
-            session_id=parse_session_id(proc.stdout) or session_id,
-            response_text=parse_response_text(proc.stdout),
-            returncode=proc.returncode,
-            stdout=proc.stdout,
-            stderr=proc.stderr,
-            tool_errors=parse_tool_errors(proc.stdout),
-        )
diff --git a/src/agentcap/drivers/pi.py b/src/agentcap/drivers/pi.py
deleted file mode 100644
index 776a70d..0000000
--- a/src/agentcap/drivers/pi.py
+++ /dev/null
@@ -1,183 +0,0 @@
-"""pi-mono coding-agent driver.
-
-Drives ``pi -p "<prompt>" --provider local --model <id>`` non-
-interactively. The provider config (proxy URL, model entries) and
-PI_CODING_AGENT_DIR are baked into the per-agent image — see
-[containers/agentcap-pi.Containerfile](
-../../../containers/agentcap-pi.Containerfile). The driver passes
-the model id at the CLI.
-
-Native sessions: pi tracks the most recent session under
-``PI_CODING_AGENT_SESSION_DIR`` and resumes via ``--continue``. The
-driver lets pi mint its own UUID on ``start`` (no flag), then passes
-``--continue`` on ``resume``. The image's init script symlinks
-``PI_CODING_AGENT_SESSION_DIR`` at the bind-mounted ``traces/`` dir
-so session state survives ``podman run --rm`` boundaries.
-"""
-
-from __future__ import annotations
-
-import subprocess
-from pathlib import Path
-from typing import Sequence
-
-from . import AgentDriver, AgentTurn
-from ..sandbox import Sandbox
-
-
-_DEFAULT_PROVIDER_NAME = "local"
-
-
-def parse_tool_errors(stdout: str) -> list[str]:
-    # TODO: pi's tool-error format is not yet characterised.
-    return []
-
-
-def build_models_json(
-    *,
-    provider_name: str,
-    base_url: str,
-    model_id: str,
-    api_key_env: str = "PI_LOCAL_API_KEY",
-    context_window: int = 65536,
-    max_tokens: int = 4096,
-) -> dict:
-    """Render a pi ``models.json`` payload. Kept for tests; the
-    production path bakes the equivalent into the image."""
-    return {
-        "providers": {
-            provider_name: {
-                "baseUrl": base_url,
-                "api": "openai-completions",
-                "apiKey": api_key_env,
-                "compat": {
-                    "supportsDeveloperRole": False,
-                    "supportsReasoningEffort": False,
-                },
-                "models": [
-                    {
-                        "id": model_id,
-                        "name": model_id,
-                        "reasoning": False,
-                        "input": ["text"],
-                        "contextWindow": context_window,
-                        "maxTokens": max_tokens,
-                        "cost": {
-                            "input": 0, "output": 0,
-                            "cacheRead": 0, "cacheWrite": 0,
-                        },
-                    }
-                ],
-            }
-        }
-    }
-
-
-class PiDriver(AgentDriver):
-    name = "pi"
-    # In-container path where pi writes its native session files.
-    # Bind-mounted to ``<workdir>/traces/`` by ``agentcap run`` so the
-    # agent's own trace ends up next to the proxy captures.
-    sessions_path = "/opt/pi-config/sessions"
-
-    def __init__(
-        self,
-        *,
-        sandbox: Sandbox,
-        binary: str = "pi",
-        model: str | None = None,
-        cwd: Path | str | None = None,
-        provider_name: str = _DEFAULT_PROVIDER_NAME,
-        extra_args: Sequence[str] = (),
-    ) -> None:
-        self.sandbox = sandbox
-        self.binary = binary
-        self.model = model
-        self.cwd = str(cwd) if cwd is not None else None
-        self.provider_name = provider_name
-        self.extra_args = list(extra_args)
-
-    def close(self) -> None:
-        """No-op."""
-
-    def _build_argv(
-        self,
-        prompt: str,
-        *,
-        resume: bool,
-        no_session: bool,
-    ) -> list[str]:
-        argv = [
-            self.binary,
-            "-p",
-            prompt,
-            "--provider",
-            self.provider_name,
-            *self.extra_args,
-        ]
-        if self.model:
-            argv.extend(["--model", self.model])
-        if no_session:
-            argv.append("--no-session")
-        elif resume:
-            argv.append("--continue")
-        return argv
-
-    def _run(
-        self,
-        argv: list[str],
-        env: dict | None,
-        timeout: float | None,
-    ) -> subprocess.CompletedProcess:
-        return self.sandbox.run(
-            argv,
-            env=env or {},
-            cwd=self.cwd,
-            timeout=timeout,
-        )
-
-    def start(
-        self,
-        prompt: str,
-        *,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        # No --session: pi mints its own UUID and writes it under
-        # PI_CODING_AGENT_SESSION_DIR. Resume picks the latest via
-        # --continue (synthetic marker returned to the orchestrator).
-        proc = self._run(
-            self._build_argv(prompt, resume=False, no_session=False),
-            env,
-            timeout,
-        )
-        return AgentTurn(
-            session_id="latest",
-            response_text=proc.stdout.strip(),
-            returncode=proc.returncode,
-            stdout=proc.stdout,
-            stderr=proc.stderr,
-            tool_errors=parse_tool_errors(proc.stdout),
-        )
-
-    def resume(
-        self,
-        prompt: str,
-        *,
-        session_id: str,
-        env: dict | None = None,
-        timeout: float | None = None,
-    ) -> AgentTurn:
-        proc = self._run(
-            self._build_argv(prompt, resume=True, no_session=False),
-            env,
-            timeout,
-        )
-        return AgentTurn(
-            session_id=session_id,
-            response_text=proc.stdout.strip(),
-            returncode=proc.returncode,
-            stdout=proc.stdout,
-            stderr=proc.stderr,
-            tool_errors=parse_tool_errors(proc.stdout),
-        )
diff --git a/src/agentcap/export.py b/src/agentcap/export.py
deleted file mode 100644
index 76d1b64..0000000
--- a/src/agentcap/export.py
+++ /dev/null
@@ -1,686 +0,0 @@
-"""Capture dir → parquet export.
-
-For each ``<request_id>.request.json``, pair with the matching
-``<request_id>.response.json`` and emit one parquet row.
-
-Destination: ``--push <owner>/<name>[/<subdir>]`` — uploaded into a
-Hugging Face Dataset repo. Files under ``data/`` get the Hub Dataset
-Viewer automatically.
-"""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from typing import Iterator
-
-from .provider import _hostname_fallback, refine_for_sub_provider
-
-
-def detect_provider_columns(capture_dir: Path | str) -> dict:
-    """Derive ``provider`` + ``upstream_url`` from the per-request
-    ``upstream_url`` stamp. Empty dict for legacy capture dirs missing
-    the stamp."""
-    for req_path in sorted(Path(capture_dir).glob("*.request.json")):
-        try:
-            rec = json.loads(req_path.read_text())
-        except (OSError, json.JSONDecodeError):
-            continue
-        upstream_url = rec.get("upstream_url")
-        if not isinstance(upstream_url, str) or not upstream_url:
-            continue
-        model = (rec.get("body") or {}).get("model")
-        provider = refine_for_sub_provider(
-            _hostname_fallback(upstream_url),
-            model if isinstance(model, str) else None,
-        )
-        return {"provider": provider, "upstream_url": upstream_url}
-    return {}
-
-
-def detect_model(capture_dir: Path | str) -> str | None:
-    """Unique ``body.model`` across all captured requests, or ``None``.
-    Raises ``ValueError`` on mixed models (datasets never mix models).
-    ``@revision`` suffixes are stripped."""
-    capture_dir = Path(capture_dir)
-    seen: set[str] = set()
-    for req_path in sorted(capture_dir.glob("*.request.json")):
-        try:
-            rec = json.loads(req_path.read_text())
-        except (OSError, json.JSONDecodeError):
-            continue
-        m = (rec.get("body") or {}).get("model")
-        if isinstance(m, str) and m:
-            seen.add(_bare_model_id(m))
-    if len(seen) > 1:
-        raise ValueError(
-            f"capture dir contains requests for multiple models: "
-            f"{sorted(seen)}. Datasets never mix models — split into "
-            f"separate capture dirs and export each one independently."
-        )
-    return seen.pop() if seen else None
-
-
-def _bare_model_id(model: str) -> str:
-    """Strip ``@revision`` suffix so ``gemma-4-E4B-it`` and
-    ``gemma-4-E4B-it@main`` are treated as the same id."""
-    return model.split("@", 1)[0]
-
-
-def _iter_pairs(
-    capture_dir: Path,
-) -> Iterator[tuple[str, dict, dict | None, int, dict, str | None, int | None]]:
-    """Yield (request_id, request_body, response_body, captured_at,
-    upstream_fingerprint, task_id, turn) per captured request, in
-    filename order. ``task_id`` / ``turn`` come from the wrapping
-    ``.request.json`` record (orchestrator-side metadata that isn't
-    inside the OpenAI body) — preserving them in the parquet lets
-    downstream picker UIs group + index rows without having to fall
-    back to ``-``."""
-    for req_path in sorted(capture_dir.glob("*.request.json")):
-        rec = json.loads(req_path.read_text())
-        rid = rec.get("request_id") or req_path.stem.split(".")[0]
-        captured_at = int(rec.get("captured_at", 0))
-        body = rec.get("body") or {}
-        task_id = rec.get("task_id")
-        turn = rec.get("turn")
-        resp_path = capture_dir / f"{rid}.response.json"
-        resp_body: dict | None = None
-        upstream_fp: dict = {}
-        if resp_path.exists():
-            resp_rec = json.loads(resp_path.read_text())
-            upstream_fp = resp_rec.get("upstream_fingerprint") or {}
-            if resp_rec.get("stream"):
-                resp_body = {"stream": True, "raw": resp_rec.get("raw", "")}
-            else:
-                resp_body = resp_rec.get("body") or {}
-        yield rid, body, resp_body, captured_at, upstream_fp, task_id, turn
-
-
-def _fingerprint_columns(fp: dict | None) -> dict:
-    fp = fp or {}
-    return {
-        "served_by": fp.get("x_served_by"),
-        "served_build_info": fp.get("build_info"),
-        "served_model": fp.get("served_model"),
-    }
-
-
-def _row(
-    request_id: str,
-    request_body: dict,
-    response_body: dict | None,
-    captured_at: int,
-    upstream_fp: dict | None,
-    task_id: str | None = None,
-    turn: int | None = None,
-) -> dict:
-    # request / response stringified so Arrow doesn't infer a schema over
-    # heterogeneous tool-schema fields. Consumers json.loads them.
-    model = (request_body.get("model") or "") if isinstance(request_body, dict) else ""
-    return {
-        "request_id": request_id,
-        "model": model,
-        "captured_at": captured_at,
-        "task_id": task_id,
-        "turn": turn,
-        "request": json.dumps(request_body, ensure_ascii=False),
-        "response": json.dumps(response_body or {}, ensure_ascii=False),
-        **_fingerprint_columns(upstream_fp),
-    }
-
-
-def export_local(
-    capture_dir: Path | str,
-    output: Path | str,
-    *,
-    batch_size: int = 32,
-    progress: bool = True,
-    provider_columns: dict | None = None,
-    agent: str | None = None,
-    model: str | None = None,
-) -> int:
-    """Stream the capture dir into a single parquet. Returns row count.
-    Batches via ``ParquetWriter`` so a mid-render kill leaves a valid
-    parquet up to the last flushed batch.
-
-    ``agent`` and ``model`` are stamped into the parquet's schema-level
-    KV metadata so downstream consumers (``agentcap inspect``'s picker
-    in particular) can label each parquet without re-parsing the
-    filename — that filename is a brittle contract, the KV metadata is
-    the authoritative source."""
-    import pyarrow as pa
-    import pyarrow.parquet as pq
-
-    capture_dir = Path(capture_dir)
-    output = Path(output)
-    output.parent.mkdir(parents=True, exist_ok=True)
-    if provider_columns is None:
-        provider_columns = detect_provider_columns(capture_dir)
-
-    request_files = sorted(capture_dir.glob("*.request.json"))
-    total = len(request_files)
-    if total == 0:
-        raise ValueError(f"no captured requests in {capture_dir}")
-
-    pairs_iter = _iter_pairs(capture_dir)
-    if progress:
-        try:
-            from tqdm import tqdm
-            pairs_iter = tqdm(
-                pairs_iter,
-                total=total,
-                desc=f"export {capture_dir.name}",
-                unit="row",
-            )
-        except ImportError:
-            pass
-
-    writer: pq.ParquetWriter | None = None
-    schema: pa.Schema | None = None
-    batch: list[dict] = []
-    n_written = 0
-    # ``tasks_buf`` accumulates {task_id → (max-turn, first-user-prompt)}
-    # across all batches so the parquet's schema KV ends up with an
-    # accurate, complete task list. We post-process the parquet at the
-    # end to stamp it (the streaming writer's schema is fixed at open).
-    tasks_buf: dict[str, dict] = {}
-
-    def _absorb_tasks(rows: list[dict]) -> None:
-        for r in rows:
-            tid = r.get("task_id")
-            if not tid:
-                continue
-            d = tasks_buf.setdefault(tid, {"turns": 0, "prompt": None})
-            turn = r.get("turn")
-            if turn is not None and int(turn) > d["turns"]:
-                d["turns"] = int(turn)
-            if d["prompt"] is None:
-                try:
-                    body = json.loads(r.get("request") or "{}")
-                except (json.JSONDecodeError, ValueError, TypeError):
-                    body = {}
-                for m in body.get("messages") or []:
-                    if m.get("role") == "user":
-                        content = m.get("content") or ""
-                        if isinstance(content, list):
-                            content = " ".join(
-                                c.get("text", "") for c in content
-                                if isinstance(c, dict)
-                            )
-                        d["prompt"] = (
-                            (content or "").replace("\n", " ").strip()[:200]
-                        )
-                        break
-
-    def _flush(rows: list[dict]) -> None:
-        nonlocal writer, schema, n_written
-        if not rows:
-            return
-        if provider_columns:
-            for r in rows:
-                for k, v in provider_columns.items():
-                    r.setdefault(k, v)
-        _absorb_tasks(rows)
-        table = pa.Table.from_pylist(rows)
-        # ``task_id`` / ``turn`` are optional orchestrator metadata.
-        # If the first batch's values are all ``None``, Arrow infers
-        # ``null`` for the column type and the writer's schema locks
-        # that in — every later batch with non-null values then fails
-        # ``table.cast(schema)``. Force the canonical dtypes up front
-        # so the first-batch dtype matches what later batches will
-        # carry.
-        for col, dtype in (("task_id", pa.string()), ("turn", pa.int64())):
-            if col not in table.schema.names:
-                continue
-            field = table.schema.field(col)
-            if pa.types.is_null(field.type):
-                idx = table.schema.get_field_index(col)
-                table = table.set_column(
-                    idx, col, pa.array([None] * table.num_rows, type=dtype),
-                )
-        if writer is None:
-            kv = {
-                k.encode(): v.encode()
-                for k, v in (("agent", agent), ("model", model))
-                if v
-            }
-            schema = table.schema.with_metadata(kv) if kv else table.schema
-            writer = pq.ParquetWriter(str(output), schema)
-        else:
-            table = table.cast(schema)
-        writer.write_table(table)
-        n_written += len(rows)
-
-    try:
-        for rid, body, resp, captured_at, upstream_fp, task_id, turn in pairs_iter:
-            batch.append(
-                _row(rid, body, resp, captured_at, upstream_fp, task_id, turn)
-            )
-            if len(batch) >= batch_size:
-                _flush(batch)
-                batch = []
-        _flush(batch)
-    finally:
-        if writer is not None:
-            writer.close()
-
-    # The streaming writer freezes the schema at open, so we can't
-    # stamp ``tasks`` until we've consumed every row.
-    if tasks_buf:
-        tasks_list = [
-            {"id": tid, "turns": d["turns"], "prompt": d["prompt"]}
-            for tid, d in sorted(tasks_buf.items())
-        ]
-        table = pq.read_table(str(output))
-        kv = dict(table.schema.metadata or {})
-        kv[b"tasks"] = json.dumps(tasks_list, ensure_ascii=False).encode()
-        # ``Path.replace`` is POSIX-atomic — a kill mid-rewrite leaves
-        # the original intact.
-        tmp = output.with_suffix(output.suffix + ".rewrite")
-        try:
-            pq.write_table(table.replace_schema_metadata(kv), str(tmp))
-            tmp.replace(output)
-        except Exception:
-            if tmp.exists():
-                tmp.unlink()
-            raise
-
-    return n_written
-
-
-def parse_collection_base(uri: str) -> tuple[str, str]:
-    """Split ``<owner>/<base>`` (optionally prefixed with
-    ``hf://datasets/``) into ``("<owner>", "<base>")``.
-
-    ``<base>`` drives all three artifacts: captures dataset
-    ``<owner>/<base>-captures``, traces dataset ``<owner>/<base>-traces``,
-    and the HF Collection of the same title under ``<owner>``."""
-    s = uri.removeprefix("hf://datasets/").strip("/")
-    parts = s.split("/")
-    if len(parts) != 2 or not parts[0] or not parts[1]:
-        raise ValueError(
-            f"--push must be <owner>/<base>, got {uri!r}"
-        )
-    return parts[0], parts[1]
-
-
-def captures_repo_id(owner: str, base: str) -> str:
-    return f"{owner}/{base}-captures"
-
-
-_FILENAME_SAFE = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_."
-
-
-def _slug(s: str) -> str:
-    """Filename-safe slug. Strips ``org/`` prefix from HF model ids."""
-    s = s.split("/")[-1]
-    out = "".join(c if c in _FILENAME_SAFE else "-" for c in s)
-    while "--" in out:
-        out = out.replace("--", "-")
-    return out.strip("-_.") or "x"
-
-
-def _default_filename(
-    agent: str | None = None,
-    model: str | None = None,
-    provider: str | None = None,
-) -> str:
-    """``train-[<agent>-<model>-<provider>-]<utc>-<hex>.parquet``."""
-    import time
-    import uuid
-
-    ts = time.strftime("%Y%m%dT%H%M%S", time.gmtime())
-    parts = ["train"]
-    if agent:
-        parts.append(_slug(agent))
-    if model:
-        parts.append(_slug(model))
-    if provider:
-        # Preserve hf-router/fireworks-ai → hf-router-fireworks-ai
-        # (``_slug`` would otherwise strip everything before the last /).
-        parts.append(_slug(provider.replace("/", "-")))
-    parts.append(ts)
-    parts.append(uuid.uuid4().hex[:6])
-    return "-".join(parts) + ".parquet"
-
-
-_CAPTURES_README_TEMPLATE = """\
----
-license: apache-2.0
-tags:
-- agentcap
-- agentcap-captures
----
-
-# {repo_id}
-
-HTTP captures of agent ↔ model interactions — one parquet row per
-`/v1/chat/completions` call. Produced by
-[agentcap](https://github.com/huggingface/agentcap).
-
-Native session traces for the same runs live in companion datasets
-named `{base}-<agent>-traces`. They're all grouped under the
-[{collection_title} Collection](https://huggingface.co/{owner})
-alongside this dataset. Join on `run_id`.
-
-## Loading
-
-```python
-from datasets import load_dataset
-
-ds = load_dataset("{repo_id}", split="train")
-```
-
-## Schema
-
-| column | description |
-|---|---|
-| `run_id` | agentcap run id; matches the per-run folder in the traces dataset |
-| `request_id` | UUID minted by the capture proxy |
-| `model` | Model id from the captured request body |
-| `captured_at` | Epoch seconds when the request was captured |
-| `request` | Raw OpenAI request body, JSON-stringified |
-| `response` | Raw OpenAI response body, JSON-stringified (or `{{"stream": true, "raw": ...}}` for SSE) |
-| `served_by` | Per-response `X-Served-By` header (HF Router sub-provider routing) |
-| `served_build_info` | Per-response `X-Build-Info` header |
-| `served_model` | Per-response body-echoed `model` |
-| `provider` | Derived from the proxy upstream URL (constant per file) |
-| `upstream_url` | Proxy upstream URL at capture time (constant per file) |
-
-`request` and `response` are JSON strings; consumers `json.loads(...)`
-them. To recover per-message token ranges, render `request.messages`
-through the model's chat template yourself —
-`transformers.AutoTokenizer.apply_chat_template`.
-"""
-
-
-_TRACES_README_TEMPLATE = """\
----
-license: apache-2.0
-tags:
-- agent-traces
-- agentcap
-- agentcap-traces
-- agentcap-traces-{agent}
-source_datasets:
-- {captures_repo}
----
-
-# {repo_id}
-
-{agent} coding-agent session traces produced by
-[agentcap](https://github.com/huggingface/agentcap) runs. Each run
-contributes one folder under `data/<run_id>/`; inside, one file per
-session in `{agent}`'s native export format.
-
-The on-the-wire HTTP captures for these same runs live in
-[{captures_repo}](https://huggingface.co/datasets/{captures_repo}).
-Both belong to the
-[{collection_title} Collection](https://huggingface.co/{owner})
-— join on `run_id` to align captures with traces.
-"""
-
-
-def traces_repo_id_for(owner: str, base: str, agent: str) -> str:
-    """Per-agent traces dataset id. One agent per dataset keeps the
-    schema homogeneous — the Hub viewer can't reconcile pi's
-    type-discriminated events with goose's session-as-object dump."""
-    return f"{owner}/{base}-{agent}-traces"
-
-
-def _captures_readme(
-    *,
-    repo_id: str,
-    owner: str,
-    base: str,
-    collection_title: str,
-) -> str:
-    return _CAPTURES_README_TEMPLATE.format(
-        repo_id=repo_id,
-        owner=owner,
-        base=base,
-        collection_title=collection_title,
-    )
-
-
-def _traces_readme(
-    *,
-    repo_id: str,
-    captures_repo: str,
-    owner: str,
-    collection_title: str,
-    agent: str,
-) -> str:
-    return _TRACES_README_TEMPLATE.format(
-        repo_id=repo_id,
-        captures_repo=captures_repo,
-        owner=owner,
-        collection_title=collection_title,
-        agent=agent,
-    )
-
-
-def push_captures_dataset(
-    items: list[dict],
-    *,
-    owner: str,
-    base: str,
-) -> tuple[str, list[int]]:
-    """Render N capture dirs to parquet under ``<owner>/<base>-captures``
-    in a single commit. Returns ``(repo_id, [n_rows...])``.
-
-    ``items`` is a list of dicts, each with:
-      - ``capture_dir`` (required): path to a capture dir
-      - ``model`` (required): model id used in the default filename
-      - ``agent`` (optional): agent name embedded in the default filename
-      - ``run_id`` (optional): stamped onto every row + into the filename
-      - ``filename`` (optional): overrides the default unique name
-
-    The repo is created on first push (``exist_ok=True``); files land
-    under ``data/<filename>.parquet`` so the Hub Dataset Viewer picks
-    them up automatically.
-    """
-    import tempfile
-
-    from huggingface_hub import CommitOperationAdd, HfApi
-
-    repo_id = captures_repo_id(owner, base)
-    api = HfApi()
-    api.create_repo(
-        repo_id=repo_id, repo_type="dataset",
-        private=True, exist_ok=True,
-    )
-
-    # Seed a dataset card on first push (no README in the repo yet).
-    # Later pushes leave any existing README alone — including
-    # user-edited ones.
-    try:
-        existing = set(api.list_repo_files(repo_id, repo_type="dataset"))
-    except Exception:
-        existing = set()
-    include_readme = "README.md" not in existing
-
-    n_rows_list: list[int] = []
-    with tempfile.TemporaryDirectory() as tmpdir:
-        operations: list[CommitOperationAdd] = []
-        if include_readme:
-            operations.append(CommitOperationAdd(
-                path_in_repo="README.md",
-                path_or_fileobj=_captures_readme(
-                    repo_id=repo_id,
-                    owner=owner,
-                    base=base,
-                    collection_title=base,
-                ).encode("utf-8"),
-            ))
-        for i, item in enumerate(items):
-            cap_dir = item["capture_dir"]
-            model = item["model"]
-            agent = item.get("agent")
-            run_id = item.get("run_id")
-            filename = item.get("filename")
-            provider_columns = detect_provider_columns(cap_dir)
-            extra_columns = dict(provider_columns)
-            if run_id:
-                extra_columns["run_id"] = run_id
-            if filename is None:
-                filename = _default_filename(
-                    agent=agent,
-                    model=model,
-                    provider=provider_columns.get("provider") or None,
-                )
-            path_in_repo = f"data/{filename}"
-            local_file = Path(tmpdir) / f"{i}-{filename}"
-            n_rows = export_local(
-                cap_dir, local_file, provider_columns=extra_columns,
-                progress=False, agent=agent, model=model,
-            )
-            n_rows_list.append(n_rows)
-            operations.append(CommitOperationAdd(
-                path_in_repo=path_in_repo,
-                path_or_fileobj=str(local_file),
-            ))
-
-        api.create_commit(
-            repo_id=repo_id,
-            repo_type="dataset",
-            operations=operations,
-            commit_message=f"agentcap export: add {len(operations)} parquet(s)",
-        )
-
-    return repo_id, n_rows_list
-
-
-def push_agent_traces_dataset(
-    items: list[dict],
-    *,
-    owner: str,
-    base: str,
-    agent: str,
-) -> tuple[str, int]:
-    """Upload raw trace files for ONE agent under
-    ``<owner>/<base>-<agent>-traces`` in a single commit. Returns
-    ``(repo_id, n_files_total)``.
-
-    ``items`` is a list of dicts, each with:
-      - ``traces_dir`` (required): path to a ``<run>/traces/`` dir
-      - ``run_id`` (required): folder name in the dataset repo
-
-    Splitting by agent (one dataset per agent) keeps each dataset's
-    schema homogeneous — the Hub viewer can't reconcile pi's
-    type-discriminated events with goose's session-as-object dump.
-
-    Files are uploaded **as-is** — no JSON parsing, no schema
-    transformation. Empty trace dirs contribute 0 files. Returns 0
-    files when the entire item list has no files; the repo is still
-    created so the collection link stays consistent.
-    """
-    from huggingface_hub import CommitOperationAdd, HfApi
-
-    repo_id = traces_repo_id_for(owner, base, agent)
-    captures_repo = captures_repo_id(owner, base)
-    api = HfApi()
-    api.create_repo(
-        repo_id=repo_id, repo_type="dataset",
-        private=True, exist_ok=True,
-    )
-
-    try:
-        existing = set(api.list_repo_files(repo_id, repo_type="dataset"))
-    except Exception:
-        existing = set()
-    include_readme = "README.md" not in existing
-
-    operations: list[CommitOperationAdd] = []
-    if include_readme:
-        operations.append(CommitOperationAdd(
-            path_in_repo="README.md",
-            path_or_fileobj=_traces_readme(
-                repo_id=repo_id,
-                captures_repo=captures_repo,
-                owner=owner,
-                collection_title=base,
-                agent=agent,
-            ).encode("utf-8"),
-        ))
-
-    n_files = 0
-    for item in items:
-        traces_dir = Path(item["traces_dir"])
-        run_id = item["run_id"]
-        if not traces_dir.is_dir():
-            continue
-        for f in sorted(p for p in traces_dir.iterdir() if p.is_file()):
-            operations.append(CommitOperationAdd(
-                path_in_repo=f"data/{run_id}/{f.name}",
-                path_or_fileobj=str(f),
-            ))
-            n_files += 1
-
-    # Only commit if we have something to add. If even the README is
-    # already up, skip the empty commit silently.
-    if not operations:
-        return repo_id, n_files
-    api.create_commit(
-        repo_id=repo_id,
-        repo_type="dataset",
-        operations=operations,
-        commit_message=(
-            f"agentcap export: add {agent} traces "
-            f"({n_files} file(s) across {len(items)} run(s))"
-        ),
-    )
-
-    return repo_id, n_files
-
-
-def ensure_collection(
-    *,
-    owner: str,
-    base: str,
-    repos: list[str],
-) -> str:
-    """Find-or-create the ``<owner>/<base>`` collection and ensure every
-    repo in ``repos`` is an item. Returns the collection slug.
-
-    Idempotent: existing items are kept (``exists_ok=True``)."""
-    from huggingface_hub import HfApi
-
-    api = HfApi()
-    slug: str | None = None
-    try:
-        for c in api.list_collections(owner=owner, q=base, limit=20):
-            if c.title == base:
-                slug = c.slug
-                break
-    except Exception:
-        slug = None
-
-    if slug is None:
-        col = api.create_collection(
-            title=base,
-            namespace=owner,
-            description=(
-                "agentcap: paired HTTP captures + native session "
-                "traces. Join on run_id."
-            ),
-            private=True,
-            exists_ok=True,
-        )
-        slug = col.slug
-
-    for repo in repos:
-        try:
-            api.add_collection_item(
-                collection_slug=slug,
-                item_id=repo,
-                item_type="dataset",
-                exists_ok=True,
-            )
-        except Exception:
-            # Item-add isn't load-bearing — the README cross-links
-            # already make the relationship discoverable. Keep going.
-            pass
-
-    return slug
diff --git a/src/agentcap/followups/__init__.py b/src/agentcap/followups/__init__.py
deleted file mode 100644
index 09f2c0c..0000000
--- a/src/agentcap/followups/__init__.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""Follow-up strategies for multi-turn agent runs.
-
-Each strategy implements ``FollowUp.next(...)`` returning the next
-user message to feed to the agent given the prior turn's response and
-the original task. Strategies are stateful (``templates`` rotates a
-pool, ``synthesized`` may keep a model client) but the contract is
-the same.
-
-Three built-in strategies, in increasing order of cost / realism:
-
-  - ``continue`` (default): the literal string ``"continue"``. Cheapest
-    and maximises cross-session match opportunity since user-message
-    tokens are byte-identical across sessions.
-  - ``templates``: rotates through a small pool (``"continue"``,
-    ``"go on"``, ``"what else?"``, ``"keep going"``).
-  - ``synthesized``: feeds (original task + agent's last response)
-    into a separate model call to produce a realistic follow-up. The
-    synthesizer call **bypasses the capture proxy** by design — its
-    requests are not part of the capture.
-"""
-
-from __future__ import annotations
-
-import abc
-
-
-class FollowUp(abc.ABC):
-    """Strategy for picking the next user message in a multi-turn run."""
-
-    name: str
-
-    @abc.abstractmethod
-    def next(self, *, original_task: str, last_response: str, turn: int) -> str:
-        """Return the next user message.
-
-        ``turn`` is the 1-indexed number of the *upcoming* turn (so the
-        first follow-up is ``turn=2`` because the original task was
-        turn 1). Strategies that don't care about ``turn`` simply ignore
-        the arg.
-        """
-
-
-def get_followup(name: str, **kwargs) -> FollowUp:
-    if name == "continue":
-        from .continue_ import ContinueFollowUp
-
-        return ContinueFollowUp(**kwargs)
-    if name == "templates":
-        from .templates import TemplatesFollowUp
-
-        return TemplatesFollowUp(**kwargs)
-    if name == "synthesized":
-        from .synthesized import SynthesizedFollowUp
-
-        return SynthesizedFollowUp(**kwargs)
-    raise ValueError(f"unknown follow-up strategy: {name!r}")
-
-
-__all__ = ["FollowUp", "get_followup"]
diff --git a/src/agentcap/followups/continue_.py b/src/agentcap/followups/continue_.py
deleted file mode 100644
index f64dd07..0000000
--- a/src/agentcap/followups/continue_.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""Literal-``continue`` follow-up strategy."""
-
-from __future__ import annotations
-
-from . import FollowUp
-
-
-class ContinueFollowUp(FollowUp):
-    name = "continue"
-
-    def __init__(self, text: str = "continue") -> None:
-        self.text = text
-
-    def next(self, *, original_task: str, last_response: str, turn: int) -> str:
-        return self.text
diff --git a/src/agentcap/followups/synthesized.py b/src/agentcap/followups/synthesized.py
deleted file mode 100644
index 3bcf10e..0000000
--- a/src/agentcap/followups/synthesized.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""Synthesized follow-up strategy.
-
-Sends ``(original_task, agent's last response)`` to a small synthesizer
-LLM and uses the response as the next user message.
-
-By design the synthesizer call **bypasses the capture proxy** — it
-talks to the model server (or a different endpoint) directly. The
-capture must remain a clean record of agent↔model interaction;
-the synthesizer is just a way to produce realistic next user inputs.
-"""
-
-from __future__ import annotations
-
-import json
-import sys
-from typing import Callable
-
-from . import FollowUp
-
-
-PROMPT_TEMPLATE = """\
-You are a developer interacting with a coding agent. Given the agent's
-last response, produce ONE short follow-up question or instruction
-(<=30 words) that pushes the conversation forward. Don't ask the
-agent to summarise; ask it to do or show something.
-
-Original task:
-<<<{task}>>>
-
-Agent's last response:
-<<<{response}>>>
-
-Follow-up:
-"""
-
-
-def _default_call_synth(
-    *,
-    upstream: str,
-    model: str,
-    prompt: str,
-    timeout: float | None,
-    api_key: str | None = None,
-) -> str:
-    """Default OpenAI-compat chat-completion call."""
-    import httpx
-
-    body = {
-        "model": model,
-        "messages": [{"role": "user", "content": prompt}],
-        # Reason-by-default models (Gemma-4, Qwen3.5+) burn the budget
-        # in reasoning_content before the answer; an 80-token cap was
-        # silently producing empty content + finish_reason="length".
-        "max_tokens": 2048,
-        "temperature": 0.7,
-    }
-    base = upstream.rstrip("/")
-    if base.endswith("/v1"):
-        url = base + "/chat/completions"
-    else:
-        url = base + "/v1/chat/completions"
-    headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
-    resp = httpx.post(url, json=body, timeout=timeout, headers=headers)
-    resp.raise_for_status()
-    data = resp.json()
-    try:
-        return data["choices"][0]["message"]["content"].strip()
-    except (KeyError, IndexError, TypeError) as exc:
-        raise RuntimeError(
-            f"synthesizer response missing choices[0].message.content: "
-            f"{json.dumps(data)[:200]}"
-        ) from exc
-
-
-class SynthesizedFollowUp(FollowUp):
-    name = "synthesized"
-
-    def __init__(
-        self,
-        *,
-        upstream: str,
-        model: str,
-        timeout: float | None = 60,
-        call: Callable[..., str] | None = None,
-        prompt_template: str = PROMPT_TEMPLATE,
-        fallback: str = "continue",
-        api_key: str | None = None,
-    ) -> None:
-        """``upstream`` should point at the model server **directly**,
-        not at the capture proxy. ``call`` is overridable for tests.
-        ``api_key`` is forwarded as ``Authorization: Bearer …`` on each
-        synthesizer call — required for authenticated upstreams like
-        the HF Router."""
-        self.upstream = upstream
-        self.model = model
-        self.timeout = timeout
-        self._call = call or _default_call_synth
-        self.prompt_template = prompt_template
-        self.fallback = fallback
-        self.api_key = api_key
-
-    def next(self, *, original_task: str, last_response: str, turn: int) -> str:
-        prompt = self.prompt_template.format(
-            task=original_task, response=last_response
-        )
-        try:
-            text = self._call(
-                upstream=self.upstream,
-                model=self.model,
-                prompt=prompt,
-                timeout=self.timeout,
-                api_key=self.api_key,
-            )
-        except Exception as exc:
-            # Silence here used to mask 401s against authenticated upstreams,
-            # making the whole sweep produce ``continue`` follow-ups while
-            # ``run.json`` still claimed ``followup: synthesized``.
-            msg = " ".join(str(exc).splitlines())
-            print(
-                f"[followups] synthesized turn={turn} fell back to "
-                f"{self.fallback!r}: {type(exc).__name__}: {msg}",
-                file=sys.stderr,
-                flush=True,
-            )
-            return self.fallback
-        text = text.strip()
-        return text or self.fallback
diff --git a/src/agentcap/followups/templates.py b/src/agentcap/followups/templates.py
deleted file mode 100644
index 4277ec4..0000000
--- a/src/agentcap/followups/templates.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""Rotating-template follow-up strategy.
-
-Cycles through a small fixed pool. No extra inference cost; minor
-variation in user-message tokens compared to plain ``continue``.
-"""
-
-from __future__ import annotations
-
-from typing import Sequence
-
-from . import FollowUp
-
-
-_DEFAULT_POOL = ("continue", "go on", "what else?", "keep going")
-
-
-class TemplatesFollowUp(FollowUp):
-    name = "templates"
-
-    def __init__(self, pool: Sequence[str] = _DEFAULT_POOL) -> None:
-        if not pool:
-            raise ValueError("templates pool must be non-empty")
-        self.pool = list(pool)
-
-    def next(self, *, original_task: str, last_response: str, turn: int) -> str:
-        # turn=2 (first follow-up) → pool[0]; turn=3 → pool[1]; etc.
-        idx = (turn - 2) % len(self.pool)
-        return self.pool[idx]
diff --git a/src/agentcap/orchestrator.py b/src/agentcap/orchestrator.py
deleted file mode 100644
index 9d6bc1f..0000000
--- a/src/agentcap/orchestrator.py
+++ /dev/null
@@ -1,220 +0,0 @@
-"""Drive an agent CLI through a corpus of prompts.
-
-The orchestrator pairs an :class:`AgentDriver` with a :class:`FollowUp`
-strategy and steps each task through ``turns_per_task`` turns. The
-proxy that captures the actual chat-completion bytes is configured
-separately (started before the orchestrator runs and pointed at via
-the agent's own config); this module is intentionally proxy-agnostic.
-
-Per-turn driver stdout/stderr is written under
-``<sessions_dir>/task_<NN>_turn_<K>.{out,err}`` for debugging. The
-orchestrator's primary output is the list of :class:`TaskResult`
-objects returned by :meth:`Orchestrator.run_corpus`.
-"""
-
-from __future__ import annotations
-
-import subprocess
-import time
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Callable, Iterable, Sequence
-
-from .drivers import AgentDriver, AgentTurn
-from .followups import FollowUp
-
-
-@dataclass
-class TaskTurnResult:
-    turn: int                     # 1-indexed
-    prompt: str
-    session_id: str | None
-    returncode: int
-    response_text: str
-    duration_s: float
-
-
-@dataclass
-class TaskResult:
-    task_id: str
-    prompt: str
-    turns: list[TaskTurnResult] = field(default_factory=list)
-
-    @property
-    def session_id(self) -> str | None:
-        if self.turns:
-            return self.turns[0].session_id
-        return None
-
-    @property
-    def completed_turns(self) -> int:
-        return sum(1 for t in self.turns if t.returncode == 0)
-
-
-def read_tasks_txt(path: Path | str) -> list[str]:
-    """Read a plain-text tasks file (one prompt per line, ``#`` comments
-    and blank lines ignored)."""
-    text = Path(path).read_text()
-    out: list[str] = []
-    for line in text.splitlines():
-        s = line.strip()
-        if not s or s.startswith("#"):
-            continue
-        out.append(s)
-    return out
-
-
-class Orchestrator:
-    """Run a corpus through an agent driver with a follow-up strategy."""
-
-    def __init__(
-        self,
-        driver: AgentDriver,
-        followup: FollowUp,
-        *,
-        sessions_dir: Path | str | None = None,
-        set_capture_context: Callable[..., None] | None = None,
-        on_event: Callable[..., None] | None = None,
-    ) -> None:
-        self.driver = driver
-        self.followup = followup
-        self.sessions_dir = Path(sessions_dir) if sessions_dir else None
-        self.set_capture_context = set_capture_context or (lambda **_: None)
-        if self.sessions_dir is not None:
-            self.sessions_dir.mkdir(parents=True, exist_ok=True)
-        self.on_event = on_event or (lambda **_: None)
-
-    def _log_turn(self, task_id: str, turn: int, agent_turn: AgentTurn) -> None:
-        if self.sessions_dir is None:
-            return
-        base = self.sessions_dir / f"{task_id}_turn_{turn:02d}"
-        base.with_suffix(".out").write_text(agent_turn.stdout)
-        base.with_suffix(".err").write_text(agent_turn.stderr)
-
-    def run_task(
-        self,
-        prompt: str,
-        *,
-        task_id: str,
-        turns: int,
-        timeout: float | None = None,
-    ) -> TaskResult:
-        if turns < 1:
-            raise ValueError("turns must be >= 1")
-
-        result = TaskResult(task_id=task_id, prompt=prompt)
-
-        # Turn 1: open session
-        self.on_event(event="task_start", task_id=task_id, prompt=prompt, turns=turns)
-        self.set_capture_context(task_id=task_id, turn=1)
-        t0 = time.time()
-        try:
-            first = self.driver.start(prompt, timeout=timeout)
-        except subprocess.TimeoutExpired:
-            dur = time.time() - t0
-            self.on_event(
-                event="task_aborted",
-                task_id=task_id,
-                reason="initial-turn-timeout",
-                duration_s=dur,
-            )
-            return result
-        dur = time.time() - t0
-        result.turns.append(
-            TaskTurnResult(
-                turn=1,
-                prompt=prompt,
-                session_id=first.session_id,
-                returncode=first.returncode,
-                response_text=first.response_text,
-                duration_s=dur,
-            )
-        )
-        self._log_turn(task_id, 1, first)
-        self.on_event(
-            event="turn_done",
-            task_id=task_id,
-            turn=1,
-            session_id=first.session_id,
-            returncode=first.returncode,
-            duration_s=dur,
-        )
-
-        if first.returncode != 0:
-            self.on_event(event="task_aborted", task_id=task_id, reason="initial-turn-failed")
-            return result
-        if first.session_id is None and turns > 1:
-            self.on_event(event="task_aborted", task_id=task_id, reason="no-session-id")
-            return result
-
-        # Follow-up turns
-        last_response = first.response_text
-        sid = first.session_id
-        for turn in range(2, turns + 1):
-            next_prompt = self.followup.next(
-                original_task=prompt, last_response=last_response, turn=turn
-            )
-            self.set_capture_context(task_id=task_id, turn=turn)
-            t0 = time.time()
-            try:
-                fu = self.driver.resume(next_prompt, session_id=sid, timeout=timeout)
-            except NotImplementedError:
-                self.on_event(
-                    event="task_aborted",
-                    task_id=task_id,
-                    reason="resume-not-supported",
-                )
-                break
-            except subprocess.TimeoutExpired:
-                dur = time.time() - t0
-                self.on_event(
-                    event="task_aborted",
-                    task_id=task_id,
-                    reason="follow-up-turn-timeout",
-                    turn=turn,
-                    duration_s=dur,
-                )
-                break
-            dur = time.time() - t0
-            result.turns.append(
-                TaskTurnResult(
-                    turn=turn,
-                    prompt=next_prompt,
-                    session_id=sid,
-                    returncode=fu.returncode,
-                    response_text=fu.response_text,
-                    duration_s=dur,
-                )
-            )
-            self._log_turn(task_id, turn, fu)
-            self.on_event(
-                event="turn_done",
-                task_id=task_id,
-                turn=turn,
-                session_id=sid,
-                returncode=fu.returncode,
-                duration_s=dur,
-            )
-            if fu.returncode != 0:
-                break
-            last_response = fu.response_text
-
-        return result
-
-    def run_corpus(
-        self,
-        tasks: Sequence[str] | Iterable[str],
-        *,
-        turns_per_task: int,
-        timeout: float | None = None,
-        task_id_format: str = "task_{i:02d}",
-    ) -> list[TaskResult]:
-        results: list[TaskResult] = []
-        for i, prompt in enumerate(tasks, start=1):
-            tid = task_id_format.format(i=i)
-            results.append(
-                self.run_task(
-                    prompt, task_id=tid, turns=turns_per_task, timeout=timeout
-                )
-            )
-        return results
diff --git a/src/agentcap/provider.py b/src/agentcap/provider.py
deleted file mode 100644
index afdcd5b..0000000
--- a/src/agentcap/provider.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""Identify the inference backend behind an upstream URL.
-
-Hostname classification (:func:`_hostname_fallback`) +
-HF Router sub-provider pin (:func:`refine_for_sub_provider`).
-:func:`probe` is the richer (network) variant — issues parallel
-GETs to well-known introspection endpoints, never raises.
-"""
-
-from __future__ import annotations
-
-import concurrent.futures
-import ipaddress
-import time
-from typing import Any
-from urllib.parse import urlparse
-
-import httpx
-
-
-# Reverse proxies / custom domains won't match; the probe path catches those.
-_HOSTNAME_TO_PROVIDER: dict[str, str] = {
-    "router.huggingface.co": "hf-router",
-    "api.openai.com": "openai",
-    "api.together.xyz": "together",
-    "api.anthropic.com": "anthropic",
-    "api.cerebras.ai": "cerebras",
-    "api.fireworks.ai": "fireworks",
-    "api.groq.com": "groq",
-}
-
-
-def _base_root(upstream_url: str) -> str:
-    # Introspection endpoints (/props, /info, ...) live under the
-    # server root, not /v1.
-    base = upstream_url.rstrip("/")
-    if base.endswith("/v1"):
-        base = base[:-3]
-    return base
-
-
-def _hostname_fallback(upstream_url: str) -> str:
-    host = (urlparse(upstream_url).hostname or "").lower()
-    if not host:
-        return "unknown"
-    if host in _HOSTNAME_TO_PROVIDER:
-        return _HOSTNAME_TO_PROVIDER[host]
-    if host in ("localhost", "::1"):
-        return "local"
-    try:
-        ip = ipaddress.ip_address(host)
-        return "local" if (ip.is_loopback or ip.is_private) else host
-    except ValueError:
-        pass
-    parts = host.split(".")
-    return parts[-2] if len(parts) >= 2 else host
-
-
-def _try_get(url: str, headers: dict, timeout: float) -> dict | None:
-    try:
-        r = httpx.get(url, headers=headers, timeout=timeout)
-    except (httpx.HTTPError, OSError):
-        return None
-    if r.status_code != 200:
-        return None
-    ct = r.headers.get("content-type", "")
-    out: dict[str, Any] = {"headers": {k.lower(): v for k, v in r.headers.items()}}
-    try:
-        if "json" in ct:
-            out["body"] = r.json()
-        else:
-            out["text"] = r.text[:4096]
-    except Exception:
-        return None
-    return out
-
-
-def probe(
-    upstream_url: str,
-    *,
-    api_key: str | None = None,
-    timeout: float = 3.0,
-) -> dict:
-    """Probe an OpenAI-compat upstream. Never raises."""
-    root = _base_root(upstream_url)
-    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
-    targets = {
-        "props":   f"{root}/props",          # llama.cpp
-        "info":    f"{root}/info",           # TGI
-        "version": f"{root}/version",        # vLLM
-        "models":  f"{root}/v1/models",
-        "metrics": f"{root}/metrics",
-    }
-    endpoints: dict[str, dict] = {}
-    with concurrent.futures.ThreadPoolExecutor(max_workers=len(targets)) as pool:
-        futures = {
-            name: pool.submit(_try_get, url, headers, timeout)
-            for name, url in targets.items()
-        }
-        for name, fut in futures.items():
-            try:
-                res = fut.result(timeout=timeout + 1.0)
-            except concurrent.futures.TimeoutError:
-                res = None
-            if res is not None:
-                endpoints[name] = res
-
-    return {
-        "upstream_url": upstream_url,
-        "provider": _classify(endpoints, upstream_url),
-        "probed_at": int(time.time()),
-        "endpoints": endpoints,
-    }
-
-
-def _classify(endpoints: dict, upstream_url: str) -> str:
-    models_body = (endpoints.get("models") or {}).get("body") or {}
-    model_ids = [m.get("id", "") for m in (models_body.get("data") or [])]
-
-    # HF Router model ids carry a ``:<sub-provider>`` suffix.
-    if any(":" in i for i in model_ids):
-        return "hf-router"
-    if endpoints.get("props") is not None:
-        return "local-llama-server"
-
-    info_body = (endpoints.get("info") or {}).get("body") or {}
-    if isinstance(info_body, dict) and info_body.get("model_id"):
-        return "tgi"
-
-    version_body = (endpoints.get("version") or {}).get("body") or {}
-    if isinstance(version_body, dict) and version_body.get("version"):
-        return "vllm"
-
-    if any(i.startswith(("gpt-", "o1-", "o3-", "o4-")) for i in model_ids):
-        return "openai"
-
-    return _hostname_fallback(upstream_url)
-
-
-def refine_for_sub_provider(provider: str, model: str | None) -> str:
-    """Surface HF Router's ``meta-llama/...:fireworks-ai`` pin as
-    ``hf-router/fireworks-ai`` in the provider slug."""
-    if provider == "hf-router" and model and ":" in model:
-        return f"hf-router/{model.split(':', 1)[1]}"
-    return provider
diff --git a/src/agentcap/proxy.py b/src/agentcap/proxy.py
deleted file mode 100644
index 60df7e7..0000000
--- a/src/agentcap/proxy.py
+++ /dev/null
@@ -1,415 +0,0 @@
-"""Capture proxy for OpenAI-compat chat completions.
-
-Captures ``POST /v1/chat/completions`` to
-``<capture_dir>/<request_id>.{request,response}.json``; other paths
-pass through. Streaming responses are forwarded chunk-by-chunk and
-the assembled bytes persisted at end-of-stream.
-"""
-
-from __future__ import annotations
-
-import json
-import time
-import uuid
-from pathlib import Path
-from typing import Any, AsyncIterator, Optional
-
-import httpx
-from starlette.applications import Starlette
-from starlette.requests import Request
-from starlette.responses import Response, StreamingResponse
-from starlette.routing import Route
-
-
-# Constant so per-agent Containerfiles can bake the proxy URL into
-# the agent's config files without per-run rewriting.
-IN_PROCESS_PROXY_HOST = "127.0.0.1"
-IN_PROCESS_PROXY_PORT = 0  # kernel-assigned ephemeral; read back via ProxyHandle.port
-
-CHAT_COMPLETIONS_PATH = "/v1/chat/completions"
-
-# Hop-by-hop (RFC 7230 §6.1) plus content-length / content-encoding
-# which the framework recomputes from the re-emitted body.
-_HOP_BY_HOP = frozenset({
-    "host",
-    "content-length",
-    "content-encoding",
-    "transfer-encoding",
-    "connection",
-    "keep-alive",
-    "proxy-authenticate",
-    "proxy-authorization",
-    "te",
-    "trailers",
-    "upgrade",
-})
-
-
-def _filter_headers(headers: Any) -> dict[str, str]:
-    return {k: v for k, v in headers.items() if k.lower() not in _HOP_BY_HOP}
-
-
-def _safe_json_loads(raw: bytes) -> Any:
-    """Parse JSON; on failure, return a {"raw": <decoded>} placeholder so
-    the capture stays well-formed even on malformed input."""
-    try:
-        return json.loads(raw)
-    except (json.JSONDecodeError, ValueError):
-        return {"_unparsed_raw": raw.decode("utf-8", errors="replace")}
-
-
-def _lower_headers(headers: Any) -> dict[str, str]:
-    try:
-        return {k.lower(): v for k, v in headers.items()}
-    except AttributeError:
-        return {}
-
-
-def _extract_model_from_sse(raw: bytes) -> str | None:
-    """Find a ``"model"`` field in the first parseable SSE data line."""
-    for line in raw.splitlines():
-        if not line.startswith(b"data:"):
-            continue
-        payload = line[len(b"data:"):].strip()
-        if not payload or payload == b"[DONE]":
-            continue
-        try:
-            obj = json.loads(payload)
-        except (json.JSONDecodeError, ValueError):
-            continue
-        if isinstance(obj, dict):
-            m = obj.get("model")
-            if isinstance(m, str) and m:
-                return m
-    return None
-
-
-def _response_fingerprint(headers: Any, body_obj: Any) -> dict[str, str | None]:
-    h = _lower_headers(headers)
-    served_model: str | None = None
-    if isinstance(body_obj, dict):
-        m = body_obj.get("model")
-        if isinstance(m, str) and m:
-            served_model = m
-    return {
-        "server": h.get("server") or None,
-        "x_served_by": h.get("x-served-by") or None,
-        "via": h.get("via") or None,
-        "build_info": h.get("x-build-info") or None,
-        "served_model": served_model,
-    }
-
-
-class CaptureProxy:
-    """Capture proxy as a Starlette handler bundle.
-
-    Pass a custom ``client`` (typically ``httpx.AsyncClient`` with
-    ``ASGITransport``) to wire against a mock upstream in tests.
-    """
-
-    def __init__(
-        self,
-        upstream: str,
-        capture_dir: Path | str,
-        *,
-        client: Optional[httpx.AsyncClient] = None,
-    ) -> None:
-        self.upstream = upstream.rstrip("/")
-        self.capture_dir = Path(capture_dir)
-        self.capture_dir.mkdir(parents=True, exist_ok=True)
-        self._client = client
-        self._owns_client = client is None
-        # Context the orchestrator sets before each turn — stamped into
-        # each captured request so rid → (task_id, turn) is recoverable
-        # from the capture file alone, no sidecar mapping.
-        self._task_id: str | None = None
-        self._turn: int | None = None
-
-    def set_context(self, *, task_id: str | None, turn: int | None) -> None:
-        self._task_id = task_id
-        self._turn = turn
-
-    async def _get_client(self) -> httpx.AsyncClient:
-        if self._client is None:
-            # No timeout: agent calls can be long, agent decides when to give up.
-            self._client = httpx.AsyncClient(timeout=None)
-        return self._client
-
-    async def aclose(self) -> None:
-        if self._client is not None and self._owns_client:
-            await self._client.aclose()
-
-    def _persist_request(self, request_id: str, body_bytes: bytes, captured_at: int) -> None:
-        path = self.capture_dir / f"{request_id}.request.json"
-        record = {
-            "request_id": request_id,
-            "captured_at": captured_at,
-            "upstream_url": self.upstream,
-            "task_id": self._task_id,
-            "turn": self._turn,
-            "body": _safe_json_loads(body_bytes),
-        }
-        path.write_text(json.dumps(record, indent=2))
-
-    def _persist_response_nonstream(
-        self,
-        request_id: str,
-        status_code: int,
-        body_bytes: bytes,
-        captured_at: int,
-        upstream_headers: Any,
-    ) -> None:
-        body = _safe_json_loads(body_bytes)
-        fp = _response_fingerprint(upstream_headers, body)
-        path = self.capture_dir / f"{request_id}.response.json"
-        record = {
-            "request_id": request_id,
-            "captured_at_resp": captured_at,
-            "stream": False,
-            "status_code": status_code,
-            "body": body,
-            "upstream_fingerprint": fp,
-        }
-        path.write_text(json.dumps(record, indent=2))
-
-    def _persist_response_stream(
-        self,
-        request_id: str,
-        status_code: int,
-        raw_bytes: bytes,
-        captured_at: int,
-        upstream_headers: Any,
-    ) -> None:
-        sse_model = _extract_model_from_sse(raw_bytes)
-        synthetic_body = {"model": sse_model} if sse_model else None
-        fp = _response_fingerprint(upstream_headers, synthetic_body)
-        path = self.capture_dir / f"{request_id}.response.json"
-        record = {
-            "request_id": request_id,
-            "captured_at_resp": captured_at,
-            "stream": True,
-            "status_code": status_code,
-            "raw": raw_bytes.decode("utf-8", errors="replace"),
-            "upstream_fingerprint": fp,
-        }
-        path.write_text(json.dumps(record, indent=2))
-
-    async def chat_completions(self, request: Request) -> Response:
-        body_bytes = await request.body()
-        body_obj = _safe_json_loads(body_bytes)
-        is_stream = bool(isinstance(body_obj, dict) and body_obj.get("stream", False))
-
-        request_id = uuid.uuid4().hex
-        self._persist_request(request_id, body_bytes, int(time.time()))
-
-        url = f"{self.upstream}{CHAT_COMPLETIONS_PATH}"
-        fwd_headers = _filter_headers(request.headers)
-        client = await self._get_client()
-
-        if is_stream:
-            return await self._forward_stream(
-                client, url, body_bytes, fwd_headers, request_id
-            )
-        return await self._forward_nonstream(
-            client, url, body_bytes, fwd_headers, request_id
-        )
-
-    async def _forward_nonstream(
-        self,
-        client: httpx.AsyncClient,
-        url: str,
-        body_bytes: bytes,
-        fwd_headers: dict[str, str],
-        request_id: str,
-    ) -> Response:
-        upstream_resp = await client.post(url, content=body_bytes, headers=fwd_headers)
-        resp_bytes = upstream_resp.content
-        self._persist_response_nonstream(
-            request_id,
-            upstream_resp.status_code,
-            resp_bytes,
-            int(time.time()),
-            upstream_resp.headers,
-        )
-        return Response(
-            content=resp_bytes,
-            status_code=upstream_resp.status_code,
-            headers=_filter_headers(upstream_resp.headers),
-            media_type=upstream_resp.headers.get("content-type"),
-        )
-
-    async def _forward_stream(
-        self,
-        client: httpx.AsyncClient,
-        url: str,
-        body_bytes: bytes,
-        fwd_headers: dict[str, str],
-        request_id: str,
-    ) -> StreamingResponse:
-        # We need the upstream status + content-type before we can
-        # construct the StreamingResponse. Open the stream eagerly,
-        # capture metadata, then yield bytes lazily.
-        async def streamer() -> AsyncIterator[bytes]:
-            chunks: list[bytes] = []
-            status_code = 502
-            upstream_headers: Any = {}
-            try:
-                async with client.stream(
-                    "POST", url, content=body_bytes, headers=fwd_headers
-                ) as upstream_resp:
-                    status_code = upstream_resp.status_code
-                    upstream_headers = upstream_resp.headers
-                    async for chunk in upstream_resp.aiter_bytes():
-                        chunks.append(chunk)
-                        yield chunk
-            finally:
-                self._persist_response_stream(
-                    request_id,
-                    status_code,
-                    b"".join(chunks),
-                    int(time.time()),
-                    upstream_headers,
-                )
-
-        return StreamingResponse(streamer(), media_type="text/event-stream")
-
-    async def passthrough(self, request: Request) -> Response:
-        url = f"{self.upstream}{request.url.path}"
-        if request.url.query:
-            url = f"{url}?{request.url.query}"
-        body_bytes = await request.body()
-        fwd_headers = _filter_headers(request.headers)
-        client = await self._get_client()
-        upstream_resp = await client.request(
-            request.method,
-            url,
-            content=body_bytes if body_bytes else None,
-            headers=fwd_headers,
-        )
-        return Response(
-            content=upstream_resp.content,
-            status_code=upstream_resp.status_code,
-            headers=_filter_headers(upstream_resp.headers),
-            media_type=upstream_resp.headers.get("content-type"),
-        )
-
-
-def make_app(
-    upstream: str,
-    capture_dir: Path | str,
-    *,
-    client: Optional[httpx.AsyncClient] = None,
-) -> Starlette:
-    """Build the Starlette ASGI app wrapping a CaptureProxy."""
-    proxy = CaptureProxy(upstream, capture_dir, client=client)
-    routes = [
-        Route(CHAT_COMPLETIONS_PATH, proxy.chat_completions, methods=["POST"]),
-        Route(
-            "/{full_path:path}",
-            proxy.passthrough,
-            methods=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"],
-        ),
-    ]
-
-    from contextlib import asynccontextmanager
-
-    @asynccontextmanager
-    async def lifespan(app: Starlette):
-        try:
-            yield
-        finally:
-            await proxy.aclose()
-
-    app = Starlette(routes=routes, lifespan=lifespan)
-    app.state.proxy = proxy
-    return app
-
-
-def serve(
-    upstream: str,
-    capture_dir: Path | str,
-    host: str = "127.0.0.1",
-    port: int = 8001,
-) -> None:
-    import uvicorn
-
-    app = make_app(upstream, capture_dir)
-    uvicorn.run(app, host=host, port=port)
-
-
-class ProxyHandle:
-    """Running in-process proxy. Use as a context manager."""
-
-    def __init__(
-        self, server, thread, host: str, port: int,
-        proxy: CaptureProxy,
-    ) -> None:
-        self._server = server
-        self._thread = thread
-        self.host = host
-        self.port = port
-        self.proxy = proxy
-
-    def set_context(self, *, task_id: str | None, turn: int | None) -> None:
-        """Forward to the underlying ``CaptureProxy`` so subsequent
-        captures are stamped with the given orchestrator-turn context."""
-        self.proxy.set_context(task_id=task_id, turn=turn)
-
-    @property
-    def base_url(self) -> str:
-        return f"http://{self.host}:{self.port}"
-
-    def shutdown(self, *, timeout: float = 10) -> None:
-        self._server.should_exit = True
-        self._thread.join(timeout=timeout)
-
-    def __enter__(self) -> "ProxyHandle":
-        return self
-
-    def __exit__(self, *exc) -> None:
-        self.shutdown()
-
-
-def serve_in_thread(
-    upstream: str,
-    capture_dir: Path | str,
-    host: str = IN_PROCESS_PROXY_HOST,
-    port: int = IN_PROCESS_PROXY_PORT,
-    *,
-    log_level: str = "warning",
-    startup_timeout: float = 10.0,
-) -> ProxyHandle:
-    """Start the proxy on a daemon thread; block until uvicorn is bound.
-
-    With ``port=0`` the kernel-assigned port is read back into
-    ``ProxyHandle.port``.
-    """
-    import threading
-    import time
-
-    import uvicorn
-
-    app = make_app(upstream, capture_dir)
-    config = uvicorn.Config(app, host=host, port=port, log_level=log_level)
-    server = uvicorn.Server(config)
-
-    thread = threading.Thread(target=server.run, daemon=True)
-    thread.start()
-
-    deadline = time.time() + startup_timeout
-    while not server.started:
-        if time.time() > deadline:
-            server.should_exit = True
-            thread.join(timeout=2)
-            raise RuntimeError(
-                f"proxy did not start within {startup_timeout}s on {host}:{port}"
-            )
-        time.sleep(0.05)
-
-    bound_host, bound_port = host, port
-    try:
-        bound_host, bound_port = server.servers[0].sockets[0].getsockname()[:2]
-    except (AttributeError, IndexError, TypeError):
-        pass
-
-    return ProxyHandle(server, thread, bound_host, bound_port, proxy=app.state.proxy)
diff --git a/src/agentcap/sandbox/__init__.py b/src/agentcap/sandbox/__init__.py
deleted file mode 100644
index 80a400a..0000000
--- a/src/agentcap/sandbox/__init__.py
+++ /dev/null
@@ -1,116 +0,0 @@
-"""Filesystem / network sandbox for capture-run subprocesses.
-
-Single implementation: each ``run()`` is an ephemeral
-``podman run --rm`` against the per-agent image built from
-``containers/agentcap-<agent>.Containerfile``. The agent CLI lives
-inside the image, never on the host.
-"""
-
-from __future__ import annotations
-
-import platform
-import shutil
-import subprocess
-import sys
-from pathlib import Path
-from typing import Protocol, runtime_checkable
-
-
-@runtime_checkable
-class Sandbox(Protocol):
-    """Paths returned by :meth:`mkdtemp` and consumed by
-    :meth:`write_text` / :meth:`read_text` are host paths bind-mounted
-    into the agent's view at the same path."""
-
-    name: str
-
-    def wrap(
-        self,
-        argv: list[str],
-        *,
-        writable_paths: list[Path],
-        deny_network: bool = False,
-    ) -> list[str]:
-        ...
-
-    def run(
-        self,
-        argv: list[str],
-        *,
-        env: dict[str, str] | None = None,
-        cwd: str | None = None,
-        writable_paths: list[Path] | None = None,
-        deny_network: bool = False,
-        timeout: float | None = None,
-        check: bool = False,
-    ) -> subprocess.CompletedProcess:
-        ...
-
-    def mkdtemp(self, prefix: str = "agentcap-") -> str: ...
-    def rmtree(self, path: str) -> None: ...
-    def write_text(self, path: str, content: str) -> None: ...
-    def read_text(self, path: str) -> str: ...
-
-
-def get_sandbox(
-    *,
-    agent: str,
-    env: dict[str, str] | None = None,
-    readonly_paths: list[Path] | None = None,
-    writable_paths: list[Path] | None = None,
-) -> Sandbox:
-    """Return a sandbox handle for ``agent``. Pure: does not build
-    the image. Call :func:`require_sandbox_or_die` to provision."""
-    from .podman import PodmanSandbox
-    from .podman_provisioning import image_tag
-    return PodmanSandbox(
-        image=image_tag(agent), env=env,
-        readonly_paths=readonly_paths,
-        writable_paths=writable_paths,
-    )
-
-
-def require_sandbox_or_die(
-    *,
-    agent: str,
-    command: str = "agentcap run",
-    log=lambda msg: None,
-    env: dict[str, str] | None = None,
-    readonly_paths: list[Path] | None = None,
-    writable_paths: list[Path] | None = None,
-) -> "Sandbox":
-    """Return a sandbox handle, or exit 2 with an install hint.
-    Triggers an image build on first use."""
-    system = platform.system()
-    if system not in ("Linux", "Darwin"):
-        sys.stderr.write(
-            f"{command}: agentcap sandboxing is only supported on "
-            f"Linux and macOS; host is {system!r}.\n"
-        )
-        sys.exit(2)
-    if not shutil.which("podman"):
-        sys.stderr.write(
-            f"{command}: podman is required.\n"
-            "    Install with: brew install podman (macOS) "
-            "or apt install podman (Linux)\n"
-        )
-        sys.exit(2)
-    from .podman_provisioning import ensure_image, ensure_machine_running
-    try:
-        ensure_machine_running(log=log)
-        ensure_image(agent, log=log)
-    except (FileNotFoundError, RuntimeError) as exc:
-        sys.stderr.write(f"{command}: {exc}\n")
-        sys.exit(2)
-    return get_sandbox(
-        agent=agent, env=env,
-        readonly_paths=readonly_paths,
-        writable_paths=writable_paths,
-    )
-
-
-__all__ = [
-    "Sandbox",
-    "get_sandbox",
-    "require_sandbox_or_die",
-]
diff --git a/src/agentcap/sandbox/podman.py b/src/agentcap/sandbox/podman.py
deleted file mode 100644
index 3072412..0000000
--- a/src/agentcap/sandbox/podman.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""Podman container sandbox.
-
-Each ``run()`` is a fresh ``podman run --rm`` against a pre-built
-image. Host paths in ``writable_paths`` / ``readonly_paths`` are
-bind-mounted into the container at the same path so the agent sees
-identical paths inside and outside.
-
-The image is *not* built here — callers must ensure it exists in the
-local podman image store before constructing the sandbox.
-"""
-
-from __future__ import annotations
-
-import shutil
-import subprocess
-import tempfile
-from pathlib import Path
-
-
-_PODMAN = "podman"
-
-
-def build_command(
-    argv: list[str],
-    *,
-    image: str,
-    writable_paths: list[Path],
-    readonly_paths: list[Path] | None = None,
-    deny_network: bool = False,
-    env: dict[str, str] | None = None,
-    cwd: str | None = None,
-) -> list[str]:
-    """Assemble a ``podman run --rm ... <image> <argv>`` invocation."""
-    cmd = [_PODMAN, "run", "--rm"]
-    if deny_network:
-        cmd.append("--network=none")
-    if cwd is not None:
-        cmd.extend(["--workdir", str(cwd)])
-
-    bound: set[str] = set()
-    all_writable = list(writable_paths)
-    if cwd is not None:
-        all_writable.append(Path(cwd))
-    for p in all_writable:
-        resolved = str(Path(p).resolve())
-        if resolved in bound:
-            continue
-        bound.add(resolved)
-        cmd.extend(["--mount", f"type=bind,src={resolved},dst={resolved}"])
-    for p in readonly_paths or []:
-        resolved = str(Path(p).resolve())
-        if resolved in bound:
-            continue
-        bound.add(resolved)
-        cmd.extend(["--mount", f"type=bind,src={resolved},dst={resolved},ro"])
-
-    for k, v in (env or {}).items():
-        cmd.extend(["-e", f"{k}={v}"])
-
-    cmd.append(image)
-    cmd.extend(argv)
-    return cmd
-
-
-class PodmanSandbox:
-    """Image-based sandbox using ``podman run --rm``.
-
-    The image holds the agent CLI + deps; nothing on the host is
-    visible inside the container except paths the driver explicitly
-    passes via ``writable_paths`` / ``readonly_paths``.
-    """
-
-    name = "podman"
-
-    def __init__(
-        self,
-        image: str,
-        *,
-        env: dict[str, str] | None = None,
-        readonly_paths: list[Path] | None = None,
-        writable_paths: list[Path] | None = None,
-    ) -> None:
-        self.image = image
-        self._extra_env: dict[str, str] = dict(env or {})
-        self._readonly_paths: list[Path] = list(readonly_paths or [])
-        self._writable_paths: list[Path] = list(writable_paths or [])
-
-    def close(self) -> None:
-        """No-op. Each ``run()`` produces an ephemeral container."""
-
-    def __enter__(self) -> "PodmanSandbox":
-        return self
-
-    def __exit__(self, *_exc) -> None:
-        self.close()
-
-    def wrap(
-        self,
-        argv: list[str],
-        *,
-        writable_paths: list[Path],
-        deny_network: bool = False,
-        env: dict[str, str] | None = None,
-        cwd: str | None = None,
-    ) -> list[str]:
-        full_env = dict(self._extra_env)
-        if env:
-            full_env.update(env)
-        return build_command(
-            argv,
-            image=self.image,
-            writable_paths=list(writable_paths) + self._writable_paths,
-            readonly_paths=self._readonly_paths,
-            deny_network=deny_network,
-            env=full_env,
-            cwd=cwd,
-        )
-
-    def run(
-        self,
-        argv: list[str],
-        *,
-        env: dict[str, str] | None = None,
-        cwd: str | None = None,
-        writable_paths: list[Path] | None = None,
-        deny_network: bool = False,
-        timeout: float | None = None,
-        check: bool = False,
-    ) -> subprocess.CompletedProcess:
-        wrapped = self.wrap(
-            argv,
-            writable_paths=writable_paths or [],
-            deny_network=deny_network,
-            env=env,
-            cwd=cwd,
-        )
-        # ``--rm`` only fires on a clean container exit; if the orchestrator
-        # is killed, times out, or the parent process dies before the
-        # container does, the container is orphaned and its overlay layer
-        # accumulates in the podman VM. Tag every invocation with a unique
-        # ``--name`` so a ``finally`` can force-remove it no matter how
-        # ``subprocess.run`` returned.
-        import uuid
-        name = f"agentcap-{uuid.uuid4().hex[:12]}"
-        wrapped.insert(2, "--name")
-        wrapped.insert(3, name)
-        try:
-            return subprocess.run(
-                wrapped,
-                stdin=subprocess.DEVNULL,
-                capture_output=True, text=True,
-                timeout=timeout, check=check,
-            )
-        finally:
-            # Cleanup is best-effort: if it raises (timeout, podman
-            # missing, etc.) we must NOT shadow the primary outcome of
-            # ``run()`` — turning a successful container exit into a
-            # cleanup failure (or hiding the real subprocess error
-            # behind a generic rm failure) would surprise every caller.
-            try:
-                subprocess.run(
-                    [_PODMAN, "rm", "-f", name],
-                    stdin=subprocess.DEVNULL,
-                    capture_output=True, text=True,
-                    timeout=30,
-                )
-            except Exception:  # noqa: BLE001
-                pass
-
-    @staticmethod
-    def _runs_dir() -> Path:
-        d = Path.home() / ".cache" / "agentcap" / "runs"
-        d.mkdir(parents=True, exist_ok=True)
-        return d
-
-    def mkdtemp(self, prefix: str = "agentcap-") -> str:
-        return tempfile.mkdtemp(prefix=prefix, dir=str(self._runs_dir()))
-
-    def rmtree(self, path: str) -> None:
-        shutil.rmtree(path, ignore_errors=True)
-
-    def write_text(self, path: str, content: str) -> None:
-        Path(path).write_text(content)
-
-    def read_text(self, path: str) -> str:
-        return Path(path).read_text()
-
-
-__all__ = ["PodmanSandbox", "build_command"]
diff --git a/src/agentcap/sandbox/podman_provisioning.py b/src/agentcap/sandbox/podman_provisioning.py
deleted file mode 100644
index 7b9580e..0000000
--- a/src/agentcap/sandbox/podman_provisioning.py
+++ /dev/null
@@ -1,207 +0,0 @@
-"""Per-agent podman image lifecycle: ``ensure_image`` for
-``agentcap run`` and the pytest fixture both.
-
-The Containerfile is the source of truth: its SHA256 is baked into
-the built image as a label, and a hash mismatch on subsequent runs
-forces a rebuild.
-"""
-
-from __future__ import annotations
-
-import hashlib
-import json
-import platform
-import shutil
-import subprocess
-from pathlib import Path
-
-_CONTAINERFILE_DIR = (
-    Path(__file__).resolve().parents[3] / "containers"
-)
-
-_HASH_LABEL = "agentcap.containerfile-hash"
-
-
-def containerfile_path(agent: str) -> Path:
-    return _CONTAINERFILE_DIR / f"agentcap-{agent}.Containerfile"
-
-
-def image_tag(agent: str) -> str:
-    return f"localhost/agentcap-{agent}:latest"
-
-
-def _containerfile_hash(path: Path) -> str:
-    h = hashlib.sha256()
-    h.update(path.read_bytes())
-    name = path.stem
-    ctx = path.parent / name
-    if ctx.is_dir():
-        for f in sorted(ctx.rglob("*")):
-            if f.is_file():
-                h.update(str(f.relative_to(ctx)).encode())
-                h.update(b"\0")
-                h.update(f.read_bytes())
-                h.update(b"\0")
-    return h.hexdigest()
-
-
-def _image_info(tag: str) -> dict | None:
-    if not shutil.which("podman"):
-        return None
-    r = subprocess.run(
-        ["podman", "image", "inspect", tag],
-        capture_output=True, text=True,
-    )
-    if r.returncode != 0:
-        return None
-    try:
-        info = json.loads(r.stdout)
-    except json.JSONDecodeError:
-        return None
-    return info[0] if isinstance(info, list) and info else None
-
-
-def _image_stored_hash(info: dict) -> str | None:
-    labels = (info.get("Labels") or info.get("Config", {}).get("Labels")) or {}
-    return labels.get(_HASH_LABEL)
-
-
-def _image_is_current(tag: str, cf: Path) -> bool:
-    info = _image_info(tag)
-    if info is None:
-        return False
-    stored = _image_stored_hash(info)
-    return stored is not None and stored == _containerfile_hash(cf)
-
-
-def ensure_image(
-    agent: str,
-    *,
-    log=lambda msg: None,
-) -> str:
-    """Build the per-agent podman image from the Containerfile if
-    absent or stale; return the image tag.
-
-    Raises ``FileNotFoundError`` if the Containerfile is missing,
-    ``RuntimeError`` if ``podman`` isn't installed or the build fails.
-    """
-    if not shutil.which("podman"):
-        raise RuntimeError(
-            "podman not on $PATH (brew install podman / apt install podman)"
-        )
-    cf = containerfile_path(agent)
-    if not cf.is_file():
-        raise FileNotFoundError(f"Containerfile not found: {cf}")
-    tag = image_tag(agent)
-
-    if _image_is_current(tag, cf):
-        log(f"{tag} ready (Containerfile hash match)")
-        return tag
-
-    if _image_info(tag) is not None:
-        log(f"{tag} is stale; rebuilding…")
-        subprocess.run(
-            ["podman", "rmi", "--force", tag],
-            capture_output=True, text=True, check=False,
-        )
-    else:
-        log(f"{tag} not built; building (cold build can take minutes)…")
-
-    cf_hash = _containerfile_hash(cf)
-    r = subprocess.run(
-        [
-            "podman", "build",
-            "-f", str(cf),
-            "-t", tag,
-            "--label", f"{_HASH_LABEL}={cf_hash}",
-            str(cf.parent),
-        ],
-        timeout=1800,
-    )
-    if r.returncode != 0:
-        raise RuntimeError(
-            f"podman build failed for {tag} (rc={r.returncode}); "
-            f"see streamed output above."
-        )
-    log(f"{tag} built")
-    return tag
-
-
-def rmi_image(tag: str) -> None:
-    subprocess.run(
-        ["podman", "rmi", "--force", tag],
-        capture_output=True, text=True, timeout=60, check=False,
-    )
-
-
-def _machine_status() -> str | None:
-    """Return the status (``Running`` / ``Stopped`` / ``Starting`` /
-    ...) of the default podman machine, or ``None`` if no machine
-    exists."""
-    if not shutil.which("podman"):
-        return None
-    r = subprocess.run(
-        ["podman", "machine", "list", "--format", "json"],
-        capture_output=True, text=True,
-    )
-    if r.returncode != 0:
-        return None
-    try:
-        machines = json.loads(r.stdout)
-    except json.JSONDecodeError:
-        return None
-    if not machines:
-        return None
-    default = next(
-        (m for m in machines if m.get("Default")), machines[0],
-    )
-    if default.get("Running"):
-        return "Running"
-    if default.get("Starting"):
-        return "Starting"
-    return "Stopped"
-
-
-def ensure_machine_running(*, log=lambda msg: None) -> None:
-    """macOS only: ensure ``podman machine`` is up. No-op on Linux,
-    where podman talks to the host kernel directly.
-
-    Never auto-initialises the machine — that's a 1-2 GB download
-    and a multi-minute operation the user should consent to. Raises
-    ``RuntimeError`` if podman isn't installed, no machine exists,
-    or the machine can't be started.
-    """
-    if platform.system() != "Darwin":
-        return
-    if not shutil.which("podman"):
-        raise RuntimeError(
-            "podman not on $PATH (brew install podman)"
-        )
-    status = _machine_status()
-    if status is None:
-        raise RuntimeError(
-            "no podman machine found. Initialise one first:\n"
-            "    podman machine init\n"
-            "    podman machine start"
-        )
-    if status == "Running":
-        return
-    log(f"podman machine is {status}; starting…")
-    r = subprocess.run(
-        ["podman", "machine", "start"],
-        capture_output=True, text=True, timeout=300,
-    )
-    if r.returncode != 0:
-        raise RuntimeError(
-            f"podman machine start failed (rc={r.returncode}): "
-            f"{r.stderr.strip()}"
-        )
-
-
-__all__ = [
-    "containerfile_path",
-    "ensure_image",
-    "ensure_machine_running",
-    "image_tag",
-    "rmi_image",
-]
diff --git a/src/agentcap/scan.py b/src/agentcap/scan.py
deleted file mode 100644
index dc4e86d..0000000
--- a/src/agentcap/scan.py
+++ /dev/null
@@ -1,247 +0,0 @@
-"""Secret scan over a capture run, gating ``agentcap export``.
-
-Shells out to `trufflehog filesystem` and parses its JSON output.
-Captures and traces are scanned as plain text (JSON / JSONL); the
-parquet repackaging happens after the scan, so we always check the
-unpacked source.
-
-Policy: a single ``verified`` hit aborts the export. ``unverified``
-hits are reported but do not block — TruffleHog's pattern matchers
-have a real false-positive rate (e.g. a 32-char alphanumeric in a
-model response looks like a Box OAuth token), and we don't have
-verification credentials for most providers.
-
-Scan results are persisted to ``<run_dir>/scan.json`` so subsequent
-``agentcap export`` invocations skip the (sometimes slow) verify
-step. The cache is invalidated when the user passes ``--rescan`` or
-when the recorded ``no_verification`` mode doesn't match the
-requested mode (an unverified cache can't satisfy a verified
-request).
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import shutil
-import subprocess
-import time
-from dataclasses import asdict, dataclass, field
-from pathlib import Path
-
-
-@dataclass
-class ScanHit:
-    detector: str
-    file: str
-    verified: bool
-    raw: str  # redacted-by-Trufflehog "Raw" field, kept for context
-
-
-@dataclass
-class ScanResult:
-    bytes_scanned: int = 0
-    chunks_scanned: int = 0
-    verified: list[ScanHit] = field(default_factory=list)
-    unverified: list[ScanHit] = field(default_factory=list)
-
-
-class TrufflehogMissingError(RuntimeError):
-    """``trufflehog`` is not on PATH (and not in ~/.local/bin)."""
-
-
-_INSTALL_HINT = (
-    "trufflehog is required for the pre-export secret scan but was not "
-    "found on PATH. Install with:\n"
-    "    curl -sSfL https://raw.githubusercontent.com/trufflesecurity/"
-    "trufflehog/main/scripts/install.sh | sh -s -- -b ~/.local/bin\n"
-    "Or pass --no-scan to ``agentcap export`` to skip the scan."
-)
-
-
-def find_trufflehog() -> str:
-    """Locate the ``trufflehog`` binary. Checks PATH then
-    ``~/.local/bin`` (the installer's default target).
-    Raises :class:`TrufflehogMissingError` if not found."""
-    on_path = shutil.which("trufflehog")
-    if on_path:
-        return on_path
-    local = Path.home() / ".local" / "bin" / "trufflehog"
-    if local.is_file() and os.access(local, os.X_OK):
-        return str(local)
-    raise TrufflehogMissingError(_INSTALL_HINT)
-
-
-def scan_path(
-    path: Path | str,
-    *,
-    no_verification: bool = False,
-    extra_args: tuple[str, ...] = (),
-) -> ScanResult:
-    """Scan ``path`` (a directory or file) with trufflehog.
-
-    ``no_verification=False`` (the default) round-trips every
-    candidate against the provider's API (Stripe, AWS, GitHub, HF, …)
-    so the ``verified`` bucket is high-precision. Requires network.
-    Pass ``True`` for offline pattern-only matching — faster but
-    everything lands as ``unverified``.
-    """
-    bin_path = find_trufflehog()
-    argv = [
-        bin_path, "filesystem", str(path),
-        "--json", "--no-color",
-        "--results=verified,unverified",
-    ]
-    if no_verification:
-        argv.append("--no-verification")
-    argv.extend(extra_args)
-
-    proc = subprocess.run(
-        argv, capture_output=True, text=True, check=False,
-    )
-
-    result = ScanResult()
-    for line in proc.stdout.splitlines():
-        if not line.strip():
-            continue
-        try:
-            rec = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if "DetectorName" not in rec:
-            continue
-        hit = ScanHit(
-            detector=rec.get("DetectorName") or "?",
-            file=(
-                rec.get("SourceMetadata", {})
-                .get("Data", {})
-                .get("Filesystem", {})
-                .get("file") or "?"
-            ),
-            verified=bool(rec.get("Verified")),
-            raw=str(rec.get("Raw") or "")[:80],
-        )
-        (result.verified if hit.verified else result.unverified).append(hit)
-
-    # The summary line on stderr looks like:
-    #   ... finished scanning {"chunks":..., "bytes":..., "verified_secrets":..., "unverified_secrets":...}
-    # Parse what we can; the per-hit list above is authoritative.
-    for line in proc.stderr.splitlines():
-        if "finished scanning" not in line:
-            continue
-        brace = line.find("{")
-        if brace < 0:
-            continue
-        try:
-            stats = json.loads(line[brace:])
-        except json.JSONDecodeError:
-            continue
-        result.bytes_scanned = int(stats.get("bytes", 0))
-        result.chunks_scanned = int(stats.get("chunks", 0))
-        break
-
-    return result
-
-
-SCAN_CACHE_NAME = "scan.json"
-
-
-def _result_to_dict(result: ScanResult, *, no_verification: bool) -> dict:
-    return {
-        "scanned_at": int(time.time()),
-        "no_verification": no_verification,
-        "bytes_scanned": result.bytes_scanned,
-        "chunks_scanned": result.chunks_scanned,
-        "verified": [asdict(h) for h in result.verified],
-        "unverified": [asdict(h) for h in result.unverified],
-    }
-
-
-def _result_from_dict(d: dict) -> ScanResult:
-    return ScanResult(
-        bytes_scanned=int(d.get("bytes_scanned") or 0),
-        chunks_scanned=int(d.get("chunks_scanned") or 0),
-        verified=[ScanHit(**h) for h in (d.get("verified") or [])],
-        unverified=[ScanHit(**h) for h in (d.get("unverified") or [])],
-    )
-
-
-def load_cached_scan(
-    run_dir: Path | str, *, no_verification: bool,
-) -> ScanResult | None:
-    """Return a previously persisted scan if it covers the requested
-    verification mode. A cache produced with ``no_verification=True``
-    cannot satisfy a ``no_verification=False`` request (the verified
-    bucket would be unsound), so we re-scan in that direction.
-    Returns ``None`` when no usable cache exists."""
-    cache_path = Path(run_dir) / SCAN_CACHE_NAME
-    if not cache_path.is_file():
-        return None
-    try:
-        d = json.loads(cache_path.read_text())
-    except (OSError, json.JSONDecodeError):
-        return None
-    cached_no_verify = bool(d.get("no_verification", True))
-    if cached_no_verify and not no_verification:
-        # Want verified results; cache only has patterns.
-        return None
-    return _result_from_dict(d)
-
-
-_SCAN_SUBDIRS = ("captures", "traces", "sessions")
-
-
-def scan_run_dir(
-    run_dir: Path | str,
-    *,
-    no_verification: bool = False,
-    rescan: bool = False,
-) -> tuple[ScanResult, bool]:
-    """Scan a run dir, persisting the result to ``<run_dir>/scan.json``
-    for cheap reuse. Returns ``(result, was_cached)``.
-
-    Scans the three subdirs that can hold user/agent text — captures,
-    traces, and sessions — and skips top-level files like
-    ``run.json`` and the cache itself. ``rescan=True`` ignores any
-    persisted result and re-runs trufflehog. Otherwise the cache is
-    used when it covers the requested mode."""
-    run_dir = Path(run_dir)
-    if not rescan:
-        cached = load_cached_scan(run_dir, no_verification=no_verification)
-        if cached is not None:
-            return cached, True
-
-    merged = ScanResult()
-    for name in _SCAN_SUBDIRS:
-        sub = run_dir / name
-        if not sub.is_dir():
-            continue
-        part = scan_path(sub, no_verification=no_verification)
-        merged.bytes_scanned += part.bytes_scanned
-        merged.chunks_scanned += part.chunks_scanned
-        merged.verified.extend(part.verified)
-        merged.unverified.extend(part.unverified)
-
-    try:
-        (run_dir / SCAN_CACHE_NAME).write_text(
-            json.dumps(
-                _result_to_dict(merged, no_verification=no_verification),
-                indent=2,
-            )
-        )
-    except OSError:
-        # Cache write isn't load-bearing — let the scan result through.
-        pass
-    return merged, False
-
-
-__all__ = [
-    "SCAN_CACHE_NAME",
-    "ScanHit",
-    "ScanResult",
-    "TrufflehogMissingError",
-    "find_trufflehog",
-    "load_cached_scan",
-    "scan_path",
-    "scan_run_dir",
-]
diff --git a/src/lib.rs b/src/lib.rs
index d8d0b8d..6238658 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,7 @@
-//! agentcap — Rust port (data/UI half: export, push, scan, inspect).
+//! agentcap — capture agent ↔ model interactions and publish them as HF datasets.
 //!
-//! The capture/runtime half (`run`, proxy, sandbox, drivers) still lives in the
-//! Python package under `src/agentcap/`; this crate reads the captures it writes.
+//! `run` drives an agent through a corpus behind a capture proxy; `export` renders
+//! the captures to parquet and pushes them to the Hub; `inspect` / `ls` browse them.
 
 pub mod captures;
 pub mod diff;
diff --git a/src/provider.rs b/src/provider.rs
index 19de462..29b3a90 100644
--- a/src/provider.rs
+++ b/src/provider.rs
@@ -1,9 +1,9 @@
 //! Identify the inference backend behind an upstream URL.
 //!
-//! This is the pure subset the export path needs: hostname classification
-//! ([`hostname_fallback`]) + the HF Router sub-provider pin
-//! ([`refine_for_sub_provider`]). The network `probe` (live introspection) is
-//! part of the capture/runtime half and lives in the Python package for now.
+//! Hostname classification ([`hostname_fallback`]) + the HF Router sub-provider pin
+//! ([`refine_for_sub_provider`]) — what `run` and `export` use to slug a backend.
+//! Live network introspection of the backend isn't implemented; the hostname slug
+//! is enough for both paths.
 
 use std::net::IpAddr;
 
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index 7b7940b..0000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,514 +0,0 @@
-"""Shared pytest fixtures.
-
-Live tests run when prereqs are present, skip otherwise. Prereqs:
-
-  - Agent binary present in the per-agent sandbox
-    (``agentcap run --agent <name>`` once provisions it).
-  - ``podman`` on PATH (the fixture pulls and runs the official
-    ``ghcr.io/ggml-org/llama.cpp`` server image).
-"""
-
-from __future__ import annotations
-
-import http.server
-import os
-import shutil
-import socket
-import socketserver
-import subprocess
-import sys
-import threading
-import time
-from pathlib import Path
-from urllib.request import urlopen
-
-import pytest
-
-
-pytest_plugins = ["tests.fixtures.sandbox_images"]
-
-
-def _log(msg: str) -> None:
-    """Write a progress line to stderr (visible with ``pytest -s``)."""
-    sys.stderr.write(f"  [agentcap-test] {msg}\n")
-    sys.stderr.flush()
-
-
-# Default test target. ``hf_hub_download`` of Qwen3-1.7B Q8_0 is the
-# "click and run" path — agentcap fetches the model bytes, user
-# doesn't manage GGUF files. Qwen3-1.7B is the smallest checkpoint
-# in this family that chains read → edit reliably across the four
-# drivers; ~1.7 GB downloads + loads on a CI runner in a couple of
-# minutes. Semantic correctness is intentionally not graded; the
-# live tests verify the wire path, not the agent's task quality.
-_DEFAULT_GGUF_REPO = "Qwen/Qwen3-1.7B-GGUF"
-_DEFAULT_GGUF_FILE = "Qwen3-1.7B-Q8_0.gguf"
-_DEFAULT_MODEL_ALIAS = "Qwen3-1.7B"
-
-# Official llama.cpp server image, version-pinned per llama.cpp
-# commit. Override via ``AGENTCAP_TEST_LLAMA_IMAGE`` to test a
-# different release. CPU-only; the GPU variants are tagged
-# ``server-cuda13-*`` / ``server-vulkan-*``.
-_DEFAULT_LLAMA_IMAGE = "ghcr.io/ggml-org/llama.cpp:server-b9487"
-
-
-def _fetch_default_gguf() -> str | None:
-    """Pull the default GGUF from HF Hub. Cached in the HF default
-    cache dir; first call downloads ~5GB (tqdm progress on stderr),
-    subsequent calls return the cached path instantly. Returns None
-    on any failure — caller treats that as 'skip live tests'."""
-    try:
-        from huggingface_hub import hf_hub_download
-    except ImportError:
-        return None
-    _log(
-        f"fetching default GGUF "
-        f"{_DEFAULT_GGUF_REPO}/{_DEFAULT_GGUF_FILE} "
-        f"(cached in ~/.cache/huggingface/ after first download)…"
-    )
-    try:
-        return hf_hub_download(
-            repo_id=_DEFAULT_GGUF_REPO,
-            filename=_DEFAULT_GGUF_FILE,
-        )
-    except Exception as exc:
-        _log(f"GGUF download failed: {exc}")
-        return None
-
-
-def _free_port() -> int:
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(("127.0.0.1", 0))
-        return s.getsockname()[1]
-
-
-def _wait_ready(
-    url: str, timeout: float = 180.0, log=lambda msg: None,
-) -> None:
-    """Poll a ``/v1/models`` endpoint until it responds 200 or we
-    blow ``timeout`` seconds. Tiny GGUFs load in seconds; the budget
-    is generous so we don't flake on a cold weight load.
-
-    Emits a heartbeat every ~10s so the test runner shows progress
-    during a slow weight load instead of looking hung."""
-    deadline = time.time() + timeout
-    start = time.time()
-    last_hb = start
-    while time.time() < deadline:
-        try:
-            with urlopen(url, timeout=2) as r:
-                if r.status == 200:
-                    return
-        except Exception:
-            pass
-        now = time.time()
-        if now - last_hb >= 10:
-            log(f"waiting for llama serve… ({int(now - start)}s elapsed)")
-            last_hb = now
-        time.sleep(1)
-    raise RuntimeError(f"llama serve never became ready at {url}")
-
-
-def _agent_reachable_host() -> str:
-    """Hostname the agent (inside the podman container) uses to reach
-    a host-side server. Podman exposes the host gateway as
-    ``host.containers.internal``."""
-    return "host.containers.internal"
-
-
-@pytest.fixture(scope="session")
-def live_llama_url():
-    """Host-side server root of the llama backend (no ``/v1`` suffix).
-
-    For tests that spawn their own proxy on top and need a directly-
-    reachable upstream. Reuses an existing ``llama serve`` on
-    8000/8080, or spawns one as a podman container.
-    """
-    for probe_port in (8000, 8080):
-        try:
-            with urlopen(
-                f"http://127.0.0.1:{probe_port}/v1/models", timeout=1,
-            ) as r:
-                if r.status == 200:
-                    _log(f"reusing existing llama serve on :{probe_port}")
-                    yield f"http://127.0.0.1:{probe_port}"
-                    return
-        except Exception:
-            pass
-
-    if not shutil.which("podman"):
-        pytest.skip(
-            "podman not on PATH; install with brew install podman "
-            "(macOS) or apt install podman (Linux)."
-        )
-    # macOS: bring the podman machine up before any ``podman run`` so
-    # a stopped/uninitialised machine surfaces as a clear skip with
-    # an install hint, not a generic ``podman run`` failure.
-    from agentcap.sandbox.podman_provisioning import ensure_machine_running
-    try:
-        ensure_machine_running(log=_log)
-    except RuntimeError as exc:
-        pytest.skip(str(exc))
-    gguf = os.environ.get("AGENTCAP_TEST_GGUF") or _fetch_default_gguf()
-    if not gguf:
-        pytest.skip(
-            "couldn't obtain a GGUF; HF fetch failed and no "
-            "AGENTCAP_TEST_GGUF override set."
-        )
-    # HF cache stores GGUFs as symlinks into ``blobs/``; the container
-    # needs the realpath's parent dir bound in.
-    real_gguf = Path(gguf).resolve()
-    gguf_dir = real_gguf.parent
-    gguf_name = real_gguf.name
-
-    image = os.environ.get(
-        "AGENTCAP_TEST_LLAMA_IMAGE", _DEFAULT_LLAMA_IMAGE,
-    )
-    port = _free_port()
-    ctx = os.environ.get("AGENTCAP_TEST_CTX_SIZE", "8192")
-    name = f"agentcap-llama-{os.getpid()}"
-    argv = [
-        "podman", "run", "--rm", "-d", "--name", name,
-        "-p", f"127.0.0.1:{port}:8080",
-        "--mount", f"type=bind,src={gguf_dir},dst=/models,ro",
-        image,
-        "--model", f"/models/{gguf_name}",
-        "--host", "0.0.0.0",
-        "--port", "8080",
-        "--ctx-size", ctx,
-        "--reasoning-format", "none",
-        "--jinja",
-    ]
-    _log(
-        f"spawning llama container {name} on :{port} "
-        f"(image={image}, gguf={gguf_name}, ctx={ctx})"
-    )
-    # 10 min covers a cold-cache image pull (~1 GB) on a slow CI
-    # runner plus the actual ``podman run -d`` setup.
-    r = subprocess.run(argv, capture_output=True, text=True, timeout=600)
-    if r.returncode != 0:
-        # ``podman run`` failing once the host has podman is a real
-        # problem (bad flags, pull failure, permissions), not a missing
-        # prereq. Fail loud so CI doesn't silently green over it.
-        pytest.fail(f"podman run failed: {r.stderr.strip()}")
-    try:
-        _wait_ready(
-            f"http://127.0.0.1:{port}/v1/models",
-            timeout=180,
-            log=_log,
-        )
-        _log(f"llama container ready at :{port}")
-        yield f"http://127.0.0.1:{port}"
-    finally:
-        subprocess.run(
-            ["podman", "rm", "-f", name],
-            capture_output=True, text=True, timeout=30,
-        )
-
-
-@pytest.fixture(scope="session")
-def live_proxy_base_url(live_llama_url):
-    """Agent-side ``/v1`` URL of the in-process capture proxy.
-
-    For tests that exercise the agent ↔ proxy ↔ llama path from
-    outside.
-    """
-    import tempfile
-
-    from agentcap.proxy import serve_in_thread
-    proxy_port = _free_port()
-    capture_dir = tempfile.mkdtemp(prefix="agentcap-pytest-captures-")
-    agent_url = f"http://{_agent_reachable_host()}:{proxy_port}/v1"
-    _log(
-        f"starting in-process proxy on 0.0.0.0:{proxy_port} "
-        f"-> {live_llama_url} (agents reach it at {agent_url})"
-    )
-    with serve_in_thread(
-        live_llama_url, capture_dir,
-        host="0.0.0.0", port=proxy_port,
-    ):
-        yield agent_url
-
-
-@pytest.fixture(scope="session")
-def live_model() -> str:
-    return os.environ.get("AGENTCAP_TEST_MODEL", _DEFAULT_MODEL_ALIAS)
-
-
-@pytest.fixture(scope="session")
-def sandbox_for(
-    agentcap_image_for, live_proxy_base_url, live_model,
-):
-    """Factory: ``sandbox_for("hermes")`` returns a Sandbox keyed on
-    the given agent. The image fixture ensures the per-agent podman
-    image is built first.
-
-    The sandbox env is seeded with ``AGENTCAP_PROXY_URL`` *and*
-    ``AGENTCAP_MODEL`` so the per-agent entrypoint can start — the
-    opencode init script bails out without ``AGENTCAP_MODEL``, which
-    is enough to make ``command -v opencode`` (used as a skip probe
-    by ``agent_proj_for``) exit non-zero and silently skip the test.
-    """
-    from agentcap.sandbox import get_sandbox
-
-    cache: dict[str, object] = {}
-
-    def _get(agent: str):
-        if agent in cache:
-            return cache[agent]
-        agentcap_image_for(agent)
-        sb = get_sandbox(
-            agent=agent,
-            env={
-                "AGENTCAP_PROXY_URL": live_proxy_base_url,
-                "AGENTCAP_MODEL": live_model,
-            },
-        )
-        cache[agent] = sb
-        return sb
-
-    yield _get
-    for sb in cache.values():
-        close = getattr(sb, "close", None)
-        if callable(close):
-            close()
-
-
-@pytest.fixture
-def agent_proj_for(sandbox_for):
-    """Factory: ``agent_proj_for("hermes")`` returns
-    ``(sandbox, proj_path)``. The sandbox is probed for the agent
-    binary (test skips if it's missing) and a fresh empty project
-    dir is minted to serve as ``cwd``.
-
-    The dir is removed at the end of the test.
-    """
-    created: list[tuple[object, str]] = []
-
-    def _build(agent: str) -> tuple[object, str]:
-        sb = sandbox_for(agent)
-        _log(f"probing {agent!r} binary in sandbox…")
-        r = sb.run(
-            ["sh", "-c", f"command -v {agent}"], check=False, timeout=10,
-        )
-        if r.returncode != 0:
-            pytest.skip(
-                f"{agent!r} is not on the sandbox's PATH; build the "
-                f"agentcap-{agent} image before running live tests."
-            )
-        proj = sb.mkdtemp(prefix=f"agentcap-{agent}-proj-")
-        _log(f"{agent} project: {proj}")
-        created.append((sb, proj))
-        return sb, proj
-
-    yield _build
-    for sb, proj in created:
-        sb.rmtree(proj)
-
-
-@pytest.fixture
-def fake_sandbox():
-    """A pass-through Sandbox stub for driver/CLI unit tests that
-    don't actually exercise sandbox isolation. Lives only in tests;
-    no production code depends on it."""
-    import os
-    import tempfile
-
-    class _FakeSandbox:
-        name = "fake"
-
-        def wrap(self, argv, *, writable_paths, deny_network=False):
-            return list(argv)
-
-        def run(
-            self, argv, *, env=None, cwd=None, writable_paths=None,
-            deny_network=False, timeout=None, check=False,
-        ):
-            full_env = {**os.environ, **(env or {})}
-            return subprocess.run(
-                list(argv), env=full_env, cwd=cwd,
-                capture_output=True, text=True,
-                timeout=timeout, check=check,
-            )
-
-        def mkdtemp(self, prefix="agentcap-"):
-            return tempfile.mkdtemp(prefix=prefix)
-
-        def rmtree(self, path):
-            shutil.rmtree(path, ignore_errors=True)
-
-        def write_text(self, path, content):
-            Path(path).write_text(content)
-
-        def read_text(self, path):
-            return Path(path).read_text()
-
-    return _FakeSandbox()
-
-
-# ---------------------------------------------------------------------------
-# Fake huggingface_hub.HfApi for export tests
-# ---------------------------------------------------------------------------
-
-
-class _FakeHfApi:
-    """Captures HfApi calls so the export layer can be asserted on
-    without hitting the network. Records ``create_repo`` /
-    ``list_repo_files`` / ``create_commit`` for the two dataset repos
-    (``-captures`` + per-agent ``-traces``), and the Collections API
-    surface used by ``ensure_collection`` (``list_collections``,
-    ``create_collection``, ``add_collection_item``).
-
-    Parquet payloads are read back so tests can assert row counts +
-    column sets + request_ids; bytes payloads (README.md, raw trace
-    files) and string-path payloads (raw trace files committed via
-    ``CommitOperationAdd(path_or_fileobj=str)``) are recorded as their
-    content."""
-
-    def __init__(self):
-        self.created_repos: list[dict] = []
-        self.commits: list[dict] = []
-        self.collections_created: list[dict] = []
-        self.collection_items: list[dict] = []
-        # Default to steady-state: README already in the repo, so
-        # parquet-focused tests don't see the first-push README op
-        # bleed into their assertions. Tests exercising first-push
-        # behaviour clear this.
-        self.existing_files: list[str] = ["README.md"]
-
-    # Back-compat single-call accessor for older tests that only
-    # cared about one repo.
-    @property
-    def created_repo(self) -> dict | None:
-        return self.created_repos[0] if self.created_repos else None
-
-    def create_repo(self, *, repo_id, repo_type, exist_ok, private=False):
-        self.created_repos.append({
-            "repo_id": repo_id, "repo_type": repo_type,
-            "exist_ok": exist_ok, "private": private,
-        })
-
-    def list_repo_files(self, repo_id, repo_type):
-        return list(self.existing_files)
-
-    def create_commit(self, *, repo_id, repo_type, operations, commit_message):
-        import pyarrow.parquet as pq
-
-        op_list: list[dict] = []
-        for op in operations:
-            entry: dict = {"path_in_repo": op.path_in_repo}
-            payload = op.path_or_fileobj
-            if isinstance(payload, (bytes, bytearray)):
-                entry["bytes"] = bytes(payload)
-            elif isinstance(payload, str) and op.path_in_repo.endswith(".parquet"):
-                table = pq.read_table(payload)
-                entry["n_rows"] = table.num_rows
-                entry["columns"] = list(table.column_names)
-                entry["request_ids"] = list(table.column("request_id").to_pylist())
-            else:
-                # Raw file (trace JSONL/JSON). Read bytes so tests
-                # can introspect the committed payload.
-                from pathlib import Path as _Path
-                entry["bytes"] = _Path(payload).read_bytes() if isinstance(payload, str) else b""
-            op_list.append(entry)
-        self.commits.append({
-            "repo_id": repo_id,
-            "repo_type": repo_type,
-            "commit_message": commit_message,
-            "operations": op_list,
-        })
-
-    # --- Collections API ---
-
-    def list_collections(self, *, owner=None, q=None, limit=20):
-        # Idempotent ensure_collection looks for an existing one by
-        # title; the fake starts empty and returns whatever was made.
-        for c in self.collections_created:
-            if owner and c.get("namespace") != owner:
-                continue
-            if q and q not in (c.get("title") or ""):
-                continue
-            yield _FakeCollection(c["slug"], c["title"])
-
-    def create_collection(
-        self, title, *, namespace=None, description=None,
-        private=False, exists_ok=False, **_,
-    ):
-        slug = f"{namespace}/{title}-deadbeef" if namespace else f"{title}-deadbeef"
-        record = {
-            "slug": slug, "title": title, "namespace": namespace,
-            "description": description, "private": private,
-        }
-        self.collections_created.append(record)
-        return _FakeCollection(slug, title)
-
-    def add_collection_item(
-        self, *, collection_slug, item_id, item_type,
-        exists_ok=False, **_,
-    ):
-        self.collection_items.append({
-            "collection_slug": collection_slug,
-            "item_id": item_id,
-            "item_type": item_type,
-        })
-
-
-class _FakeCollection:
-    __slots__ = ("slug", "title")
-    def __init__(self, slug: str, title: str) -> None:
-        self.slug = slug
-        self.title = title
-
-
-@pytest.fixture
-def fake_hf_api(monkeypatch):
-    fake = _FakeHfApi()
-    monkeypatch.setattr("huggingface_hub.HfApi", lambda *a, **kw: fake)
-    return fake
-
-
-# ---------------------------------------------------------------------------
-# Mock HTTP server fixture
-# ---------------------------------------------------------------------------
-
-class _RecordingHandler(http.server.BaseHTTPRequestHandler):
-    """GET-only handler that records every requested path on a class
-    attribute. Reset per fixture invocation."""
-    received_paths: list[str] = []
-
-    def do_GET(self):  # noqa: N802
-        type(self).received_paths.append(self.path)
-        self.send_response(200)
-        self.send_header("Content-Type", "application/json")
-        self.end_headers()
-        self.wfile.write(b'{"ok": true}')
-
-    def log_message(self, *args, **kwargs):  # silence the stderr noise
-        pass
-
-
-@pytest.fixture
-def mock_http_server():
-    """Spin up a tiny in-process HTTP server on a free port for the
-    duration of one test. Bound to ``0.0.0.0`` so a podman container
-    can reach it via ``host.containers.internal``.
-
-    Yields ``(port, received_paths)``: the port the server is
-    listening on, and a list (live, mutated by request handlers)
-    of every path the server has been hit on. Useful for asserting
-    a sandboxed subprocess actually made the call we expected.
-    """
-    _RecordingHandler.received_paths = []
-    # Pick a free port by binding to :0 first, then handing it off.
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(("", 0))
-        port = s.getsockname()[1]
-    httpd = socketserver.TCPServer(("0.0.0.0", port), _RecordingHandler)
-    thread = threading.Thread(target=httpd.serve_forever, daemon=True)
-    thread.start()
-    try:
-        yield port, _RecordingHandler.received_paths
-    finally:
-        httpd.shutdown()
-        httpd.server_close()
-        thread.join(timeout=5)
diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/fixtures/sandbox_images.py b/tests/fixtures/sandbox_images.py
deleted file mode 100644
index 78cf932..0000000
--- a/tests/fixtures/sandbox_images.py
+++ /dev/null
@@ -1,134 +0,0 @@
-"""Per-agent sandbox image lifecycle: pytest fixture + CLI.
-
-Two callers for the same logic:
-
-* ``python tests/fixtures/sandbox_images.py`` — pre-build every
-  per-agent image as a CI setup step so the test runner doesn't pay
-  the cold-build cost.
-* ``agentcap_image_for`` pytest fixture — same logic, on demand,
-  when a test requests it.
-
-Registered as a pytest plugin in ``tests/conftest.py`` via
-``pytest_plugins``.
-"""
-
-from __future__ import annotations
-
-import argparse
-import fnmatch
-import sys
-
-import pytest
-
-from agentcap.drivers import known_drivers
-from agentcap.sandbox.podman_provisioning import (
-    ensure_image, ensure_machine_running,
-)
-
-
-def _log(msg: str) -> None:
-    sys.stderr.write(f"  [sandbox-images] {msg}\n")
-    sys.stderr.flush()
-
-
-def build_one(agent: str) -> str:
-    ensure_machine_running(log=_log)
-    return ensure_image(agent, log=_log)
-
-
-def build_many(agents: list[str]) -> dict[str, str | Exception]:
-    """Build each agent's image, capturing per-agent failures so CI
-    surfaces the full failure set in one go."""
-    out: dict[str, str | Exception] = {}
-    for agent in agents:
-        try:
-            out[agent] = build_one(agent)
-        except (FileNotFoundError, RuntimeError) as exc:
-            out[agent] = exc
-    return out
-
-
-@pytest.fixture(scope="session")
-def agentcap_image_for():
-    """Factory: ``agentcap_image_for("hermes")`` ensures the
-    per-agent podman image is built and current. Skips if podman
-    or its machine isn't available."""
-    cache: dict[str, str] = {}
-
-    def _ensure(agent: str) -> str:
-        if agent in cache:
-            return cache[agent]
-        try:
-            tag = build_one(agent)
-        except (FileNotFoundError, RuntimeError) as exc:
-            pytest.skip(str(exc))
-        cache[agent] = tag
-        return tag
-
-    return _ensure
-
-
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description=(
-            "Pre-build the per-agent sandbox images used by "
-            "`agentcap run` and the live driver tests."
-        ),
-    )
-    parser.add_argument(
-        "--list",
-        action="store_true",
-        help="List available agent names and exit.",
-    )
-    parser.add_argument(
-        "pattern",
-        nargs="?",
-        default="*",
-        help=(
-            "Glob pattern to filter agents (e.g. 'goose', 'pi', "
-            "'*'). Default: '*' (all)."
-        ),
-    )
-    args = parser.parse_args()
-
-    all_agents = sorted(known_drivers())
-
-    if args.list:
-        for name in all_agents:
-            print(name)
-        return 0
-
-    targets = [a for a in all_agents if fnmatch.fnmatch(a, args.pattern)]
-    if not targets:
-        print(
-            f"no agents match pattern {args.pattern!r}; "
-            f"available: {', '.join(all_agents)}",
-            file=sys.stderr,
-        )
-        return 1
-
-    _log(f"building: {', '.join(targets)}")
-    results = build_many(targets)
-
-    ok = {a: t for a, t in results.items() if not isinstance(t, Exception)}
-    failed = {a: e for a, e in results.items() if isinstance(e, Exception)}
-
-    for agent, tag in ok.items():
-        _log(f"  OK   {agent} -> {tag}")
-    for agent, exc in failed.items():
-        _log(f"  FAIL {agent}: {exc}")
-
-    if failed:
-        _log(f"{len(failed)}/{len(targets)} failed")
-        return 1
-    _log(f"all {len(targets)} images ready")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/tests/live.rs b/tests/live.rs
index f55ccb6..1622b49 100644
--- a/tests/live.rs
+++ b/tests/live.rs
@@ -171,11 +171,8 @@ fn live_goose() {
 // hermes and opencode are intentionally omitted — neither runs via `agentcap run`
 // on the tiny CI model:
 //   - hermes: its base system prompt (~3.9k tokens) exceeds the budget on
-//     Qwen3-1.7B, so it bails before any model call. The Python suite never ran
-//     hermes through the CLI either — `test_hermes_live` drove the driver directly
-//     with prompt-shrinking flags (`ignore_rules`, `toolsets="file"`) that `run`
-//     doesn't expose. hermes stdout parsing is covered by unit tests.
-//   - opencode: 1.15.x doesn't pick up the baked `agent.minimal` from the image
-//     (matching the `@pytest.mark.skip` on `test_opencode_live`).
+//     Qwen3-1.7B, so it bails before any model call. hermes stdout parsing is
+//     covered by unit tests.
+//   - opencode: 1.15.x doesn't pick up the baked `agent.minimal` from the image.
 // pi (symlink/JSONL traces) + goose (dump-traces/SQLite) cover the full stack
 // across both trace-surfacing mechanisms.
diff --git a/tests/test_captures.py b/tests/test_captures.py
deleted file mode 100644
index 29ab09e..0000000
--- a/tests/test_captures.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""Unit tests for ``agentcap.captures``."""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-
-import pytest
-
-from agentcap.captures import (
-    load_request,
-    load_requests,
-    resolve_workspace_rid,
-)
-
-
-def _write_capture(d: Path, rid: str, body: dict) -> None:
-    (d / f"{rid}.request.json").write_text(
-        json.dumps({
-            "request_id": rid,
-            "captured_at": 1,
-            "upstream_url": "http://localhost:8000",
-            "body": body,
-        })
-    )
-
-
-def test_load_request_from_capture_dir(tmp_path: Path) -> None:
-    cap = tmp_path / "captures"
-    cap.mkdir()
-    body = {"model": "m", "messages": [{"role": "user", "content": "hi"}]}
-    _write_capture(cap, "abc", body)
-
-    assert load_request(str(cap), "abc") == body
-
-
-def test_load_requests_batch_from_capture_dir(tmp_path: Path) -> None:
-    cap = tmp_path / "captures"
-    cap.mkdir()
-    _write_capture(cap, "a", {"model": "m", "messages": []})
-    _write_capture(cap, "b", {"model": "m", "messages": [{"role": "user"}]})
-
-    out = load_requests(str(cap), ["a", "b"])
-    assert set(out) == {"a", "b"}
-    assert out["a"]["messages"] == []
-
-
-def test_load_request_missing_id_raises(tmp_path: Path) -> None:
-    cap = tmp_path / "captures"
-    cap.mkdir()
-    _write_capture(cap, "a", {"model": "m"})
-
-    with pytest.raises(KeyError):
-        load_request(str(cap), "ghost")
-
-
-def test_load_request_from_parquet(tmp_path: Path) -> None:
-    """Round-trip a body through ``export_local`` and back via the loader."""
-    from agentcap.export import export_local
-
-    cap = tmp_path / "captures"
-    cap.mkdir()
-    body = {
-        "model": "m",
-        "messages": [{"role": "user", "content": "hello"}],
-        "tools": [],
-    }
-    _write_capture(cap, "rid", body)
-    # Pair with a minimal response file so export_local has both halves.
-    (cap / "rid.response.json").write_text(json.dumps({
-        "request_id": "rid", "captured_at_resp": 2,
-        "status_code": 200, "body": {"choices": []},
-    }))
-
-    parquet = tmp_path / "out.parquet"
-    n = export_local(cap, parquet, progress=False)
-    assert n == 1
-
-    loaded = load_request(str(parquet), "rid")
-    assert loaded == body
-
-
-def test_load_requests_bad_source(tmp_path: Path) -> None:
-    not_a_thing = tmp_path / "nope.txt"
-    not_a_thing.write_text("x")
-    with pytest.raises(ValueError):
-        load_requests(str(not_a_thing), ["a"])
-
-
-def test_resolve_workspace_rid_finds_run(tmp_path: Path) -> None:
-    ws = tmp_path / ".agentcap"
-    run = ws / "hermes-local-20260101-000000"
-    cap = run / "captures"
-    cap.mkdir(parents=True)
-    _write_capture(cap, "rid-target", {"model": "m"})
-
-    found = resolve_workspace_rid(ws, "rid-target")
-    assert found == (cap, "rid-target")
-
-
-def test_resolve_workspace_rid_accepts_prefix(tmp_path: Path) -> None:
-    ws = tmp_path / ".agentcap"
-    cap = ws / "hermes-local-20260101-000000" / "captures"
-    cap.mkdir(parents=True)
-    _write_capture(cap, "abc12345deadbeef", {"model": "m"})
-
-    found = resolve_workspace_rid(ws, "abc12345")
-    assert found == (cap, "abc12345deadbeef")
-
-
-def test_resolve_workspace_rid_ambiguous_prefix_raises(tmp_path: Path) -> None:
-    from agentcap.captures import AmbiguousRequestId
-
-    ws = tmp_path / ".agentcap"
-    cap = ws / "hermes-local-20260101-000000" / "captures"
-    cap.mkdir(parents=True)
-    _write_capture(cap, "abc12345_a", {"model": "m"})
-    _write_capture(cap, "abc12345_b", {"model": "m"})
-
-    with pytest.raises(AmbiguousRequestId):
-        resolve_workspace_rid(ws, "abc12345")
-
-
-def test_resolve_workspace_rid_returns_none_when_absent(tmp_path: Path) -> None:
-    ws = tmp_path / ".agentcap"
-    ws.mkdir()
-    assert resolve_workspace_rid(ws, "ghost") is None
diff --git a/tests/test_cli.py b/tests/test_cli.py
deleted file mode 100644
index 0531e9e..0000000
--- a/tests/test_cli.py
+++ /dev/null
@@ -1,451 +0,0 @@
-"""CLI smoke tests for `agentcap`.
-
-These do not actually start a uvicorn server — they patch out
-``agentcap.proxy.serve_in_thread`` and assert the right kwargs are
-computed from the CLI flags. The proxy itself has its own integration
-test suite.
-"""
-
-from __future__ import annotations
-
-import os
-import shutil
-import types
-from pathlib import Path
-
-import pytest
-from click.testing import CliRunner
-
-from agentcap.__main__ import cli
-
-
-def _has_trufflehog() -> bool:
-    if shutil.which("trufflehog"):
-        return True
-    local = Path.home() / ".local" / "bin" / "trufflehog"
-    return local.is_file() and os.access(local, os.X_OK)
-
-
-_HAS_TRUFFLEHOG = _has_trufflehog()
-
-
-@pytest.fixture(
-    params=[
-        pytest.param([], id="scan"),
-        pytest.param(["--no-scan"], id="no-scan"),
-    ]
-)
-def scan_args(request):
-    """Yields ``[]`` (scan on, the default) or ``["--no-scan"]``.
-
-    The scan-on variant requires trufflehog on PATH (or
-    ~/.local/bin); without it, that parametrisation is skipped so
-    the no-scan variant still runs."""
-    if not request.param and not _HAS_TRUFFLEHOG:
-        pytest.skip("trufflehog not installed; cannot exercise scan path")
-    return request.param
-
-
-def test_help_lists_subcommands():
-    runner = CliRunner()
-    result = runner.invoke(cli, ["--help"])
-    assert result.exit_code == 0
-    for sub in ("export", "run"):
-        assert sub in result.output
-
-
-def test_version_flag():
-    from agentcap import __version__
-
-    runner = CliRunner()
-    result = runner.invoke(cli, ["--version"])
-    assert result.exit_code == 0
-    assert __version__ in result.output
-
-
-def test_run_requires_agent_upstream_and_workdir():
-    runner = CliRunner()
-    result = runner.invoke(cli, ["run"])
-    assert result.exit_code != 0
-    # Click reports the first missing required option
-    assert "--agent" in result.output
-
-
-# Plumbing for ``agentcap run`` (CLI flag → env-var composition →
-# orchestrator → run.json shape) is exercised end-to-end against a
-# real model server in ``tests/test_cli_live.py::test_agentcap_run_live``.
-# It replaces two previously heavily-mocked unit tests; the live test
-# touches the real proxy + sandbox + agent so we don't have to stub
-# them here.
-
-
-def test_export_requires_push(tmp_path: Path):
-    runner = CliRunner()
-    result = runner.invoke(cli, ["export", str(tmp_path)])
-    assert result.exit_code != 0
-    assert "--push" in result.output
-
-
-def test_export_requires_targets_or_all(tmp_path: Path):
-    runner = CliRunner()
-    result = runner.invoke(
-        cli, ["export", "--push", "me/d"]
-    )
-    assert result.exit_code != 0
-    assert "run-ids" in result.output or "--all" in result.output
-
-
-def test_export_rejects_both_targets_and_all(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        ["export", str(capture), "--all", "--push", "me/d"],
-    )
-    assert result.exit_code != 0
-    assert "not both" in result.output
-
-
-def test_run_hf_router_api_key_auto_from_hf_token_env(
-    tmp_path: Path, monkeypatch, fake_sandbox
-):
-    import contextlib
-
-    from agentcap.drivers import AgentTurn
-
-    tasks = tmp_path / "tasks.txt"
-    tasks.write_text("a task\n")
-
-    class _FakeDriver:
-        name = "hermes"
-
-        def start(self, prompt, *, env=None, timeout=None):
-            return AgentTurn(
-                session_id="ses_xyz", response_text="r", returncode=0,
-                stdout="", stderr="",
-            )
-
-        def resume(self, prompt, *, session_id, env=None, timeout=None):
-            return AgentTurn(
-                session_id=session_id, response_text="r", returncode=0,
-                stdout="", stderr="",
-            )
-
-    monkeypatch.setattr(
-        "agentcap.drivers.get_driver", lambda name, **kw: _FakeDriver()
-    )
-    monkeypatch.setattr(
-        "agentcap.sandbox.require_sandbox_or_die",
-        lambda **kw: fake_sandbox,
-    )
-
-    @contextlib.contextmanager
-    def fake_proxy(*args, **kwargs):
-        yield types.SimpleNamespace(
-            host="127.0.0.1", port=18001,
-            set_context=lambda **_: None,
-        )
-
-    monkeypatch.setattr("agentcap.proxy.serve_in_thread", fake_proxy)
-    monkeypatch.setenv("HF_TOKEN", "hf_env_token")
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(tmp_path))
-    monkeypatch.chdir(tmp_path)
-
-    runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        [
-            "run",
-            "--agent", "hermes",
-            "--model", "Qwen/Qwen3-8B",
-            "--upstream", "https://router.huggingface.co",
-            "--tasks", str(tasks),
-            "--turns", "1",
-        ],
-    )
-    assert result.exit_code == 0, result.output
-    assert "HF Router token source=HF_TOKEN" in result.output
-
-
-def _write_capture(capture_dir: Path, rid: str, model: str) -> None:
-    import json
-    (capture_dir / f"{rid}.request.json").write_text(json.dumps({
-        "request_id": rid, "captured_at": 1,
-        "body": {"model": model, "messages": []},
-    }))
-
-
-def test_export_auto_detects_model_from_captures(
-    tmp_path: Path, fake_hf_api, scan_args,
-):
-    """The model auto-detected from captures lands in the committed filename.
-    Runs under both scan modes — the scan path doesn't change the
-    parquet shape, but exercising both keeps the gate honest."""
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "abcdef12", "google/gemma-4-E4B-it")
-
-    result = CliRunner().invoke(
-        cli, ["export", str(capture), "--push", "me/d", *scan_args],
-    )
-    assert result.exit_code == 0, result.output
-    op = fake_hf_api.commits[0]["operations"][0]
-    assert "gemma-4-E4B-it" in op["path_in_repo"]
-
-
-def test_export_auto_detect_fails_on_mixed_models(tmp_path: Path):
-    """Captures spanning multiple models fail loudly."""
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "a", "model-1")
-    _write_capture(capture, "b", "model-2")
-
-    result = CliRunner().invoke(
-        cli, ["export", str(capture), "--push", "me/d"],
-    )
-    assert result.exit_code != 0
-    assert "multiple models" in result.output
-
-
-def test_export_no_model_in_captures_fails(tmp_path: Path):
-    """A capture dir with no model field at all is a hard error."""
-    import json
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    (capture / "abcdef12.request.json").write_text(json.dumps({
-        "request_id": "abcdef12", "captured_at": 1,
-        "body": {"messages": []},
-    }))
-
-    result = CliRunner().invoke(
-        cli, ["export", str(capture), "--push", "me/d"],
-    )
-    assert result.exit_code != 0
-    assert "no captured requests with a model field" in result.output
-
-
-def test_export_push_rejects_malformed_dataset_uri(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "abcdef12", "m")
-
-    result = CliRunner().invoke(
-        cli, ["export", str(capture), "--push", "just-an-owner"],
-    )
-    assert result.exit_code != 0
-    assert "<owner>/<base>" in result.output
-
-
-def test_export_resolves_workdir_layout_and_reads_agent_from_run_json(
-    tmp_path: Path, fake_hf_api, scan_args,
-):
-    """Pointing export at a workdir uses its captures/ subdir AND picks up
-    agent from run.json so the parquet filename embeds the agent."""
-    import json
-    workdir = tmp_path / "ws" / "hermes-local-20260512-162345"
-    captures = workdir / "captures"
-    captures.mkdir(parents=True)
-    _write_capture(captures, "abcdef12", "google/gemma-4-E4B-it")
-    (workdir / "run.json").write_text(json.dumps({"agent": "hermes"}))
-
-    result = CliRunner().invoke(
-        cli, ["export", str(workdir), "--push", "me/d", *scan_args],
-    )
-    assert result.exit_code == 0, result.output
-    op = fake_hf_api.commits[0]["operations"][0]
-    assert "hermes" in op["path_in_repo"]
-
-
-def test_export_all_walks_workspace_in_one_commit(
-    tmp_path: Path, monkeypatch, fake_hf_api, scan_args,
-):
-    """--all enumerates every run-id in the workspace and pushes them all
-    in one git commit."""
-    import json
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(tmp_path))
-    monkeypatch.chdir(tmp_path)
-    ws = tmp_path / ".agentcap"
-    for run_id in ("hermes-local-20260512-160000", "goose-local-20260512-170000"):
-        d = ws / run_id / "captures"
-        d.mkdir(parents=True)
-        _write_capture(d, "abcdef12", "m")
-        (ws / run_id / "run.json").write_text(json.dumps({
-            "agent": run_id.split("-")[0],
-        }))
-
-    result = CliRunner().invoke(
-        cli, ["export", "--all", "--push", "me/d", *scan_args],
-    )
-    assert result.exit_code == 0, result.output
-    assert len(fake_hf_api.commits) == 1
-    assert len(fake_hf_api.commits[0]["operations"]) == 2
-
-
-def test_ls_defaults_to_cwd(tmp_path: Path, monkeypatch):
-    """Without WORKSPACE, ``ls`` looks at ``./.agentcap/``."""
-    monkeypatch.chdir(tmp_path)
-    _seed_workspace_run_with_meta(tmp_path, "hermes-local-20260512-160000")
-    result = CliRunner().invoke(cli, ["ls"])
-    assert result.exit_code == 0, result.output
-    assert "hermes-local-20260512-160000" in result.output
-
-
-def test_ls_ignores_env_var(tmp_path: Path, monkeypatch):
-    """``ls`` MUST NOT consult ``$AGENTCAP_WORKSPACE`` — it's the only
-    way to keep the command's output a function of its arguments."""
-    other = tmp_path / "other"
-    other.mkdir()
-    _seed_workspace_run_with_meta(other, "hermes-local-20260512-160000")
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(other))
-    monkeypatch.chdir(tmp_path)  # cwd has no .agentcap/
-    result = CliRunner().invoke(cli, ["ls"])
-    # Falls back to ./.agentcap/ (which doesn't exist), NOT to $AGENTCAP_WORKSPACE.
-    assert result.exit_code == 0
-    assert "no workspace" in result.output
-
-
-def test_ls_accepts_parent_dir(tmp_path: Path):
-    """``ls <parent>`` finds ``<parent>/.agentcap/``."""
-    _seed_workspace_run_with_meta(tmp_path, "hermes-local-20260512-160000")
-    result = CliRunner().invoke(cli, ["ls", str(tmp_path)])
-    assert result.exit_code == 0, result.output
-    assert "hermes-local-20260512-160000" in result.output
-
-
-def test_ls_accepts_dot_agentcap_dir(tmp_path: Path):
-    """``ls <parent>/.agentcap`` works too — same listing either way."""
-    _seed_workspace_run_with_meta(tmp_path, "hermes-local-20260512-160000")
-    result = CliRunner().invoke(cli, ["ls", str(tmp_path / ".agentcap")])
-    assert result.exit_code == 0, result.output
-    assert "hermes-local-20260512-160000" in result.output
-
-
-def test_ls_accepts_dot_from_inside_workspace(tmp_path: Path, monkeypatch):
-    """``ls .`` from inside a ``.agentcap/`` dir lists that workspace —
-    ``Path('.').name`` is ``''`` so the classifier must normalize."""
-    _seed_workspace_run_with_meta(tmp_path, "hermes-local-20260512-160000")
-    monkeypatch.chdir(tmp_path / ".agentcap")
-    result = CliRunner().invoke(cli, ["ls", "."])
-    assert result.exit_code == 0, result.output
-    assert "hermes-local-20260512-160000" in result.output
-
-
-def test_ls_missing_workspace_message(tmp_path: Path, monkeypatch):
-    """Missing-workspace error is silent about ``$AGENTCAP_WORKSPACE``
-    since ``ls`` doesn't consult it."""
-    monkeypatch.chdir(tmp_path)
-    result = CliRunner().invoke(cli, ["ls"])
-    assert result.exit_code == 0
-    assert "AGENTCAP_WORKSPACE" not in result.output
-    assert "no workspace" in result.output
-
-
-def _seed_workspace_run(root: Path, run_id: str, rids: list[tuple[str, str]]) -> None:
-    """Create a fake workspace run with captures for each (rid, prompt)."""
-    import json as _json
-    cap = root / ".agentcap" / run_id / "captures"
-    cap.mkdir(parents=True)
-    for i, (rid, prompt) in enumerate(rids):
-        body = {"model": "m", "messages": [{"role": "user", "content": prompt}]}
-        (cap / f"{rid}.request.json").write_text(_json.dumps({
-            "request_id": rid, "captured_at": 1000 + i,
-            "upstream_url": "http://x", "body": body,
-        }))
-        (cap / f"{rid}.response.json").write_text(_json.dumps({
-            "request_id": rid, "captured_at_resp": 1001 + i,
-            "status_code": 200, "body": {},
-        }))
-
-
-def _seed_workspace_run_with_meta(
-    root: Path, run_id: str, *, agent: str = "hermes", model: str = "m",
-) -> None:
-    """Like _seed_workspace_run but also writes a minimal run.json so
-    the run picker discovers it."""
-    import json as _json
-    _seed_workspace_run(root, run_id, [("aaa", "p1")])
-    (root / ".agentcap" / run_id / "run.json").write_text(_json.dumps({
-        "agent": agent, "model": model, "upstream": "http://x",
-        "turns_per_task": 1,
-        "tasks": [{
-            "task_id": "task_01", "prompt": "p1", "completed_turns": 1,
-            "turns": [{"turn": 1, "returncode": 0, "duration_s": 1.0}],
-        }],
-    }))
-
-
-def test_inspect_resolves_rid_from_workspace(tmp_path: Path, monkeypatch):
-    import json as _json
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(tmp_path))
-    monkeypatch.chdir(tmp_path)
-    cap = tmp_path / ".agentcap" / "hermes-local-20260101-000000" / "captures"
-    cap.mkdir(parents=True)
-    body = {"model": "m", "messages": [{"role": "user", "content": "hi"}]}
-    (cap / "abcdef12.request.json").write_text(_json.dumps({
-        "request_id": "abcdef12", "captured_at": 1,
-        "upstream_url": "http://x", "body": body,
-    }))
-    (cap / "abcdef12.response.json").write_text(_json.dumps({
-        "request_id": "abcdef12", "captured_at_resp": 2,
-        "status_code": 200, "body": {},
-    }))
-
-    result = CliRunner().invoke(cli, ["inspect", "abcdef12"])
-    assert result.exit_code == 0, result.stderr
-    assert _json.loads(result.stdout) == body
-
-
-def test_inspect_unknown_rid_errors(tmp_path: Path, monkeypatch):
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(tmp_path))
-    monkeypatch.chdir(tmp_path)
-    (tmp_path / ".agentcap").mkdir()
-    result = CliRunner().invoke(cli, ["inspect", "ghost"])
-    assert result.exit_code != 0
-    assert "ghost" in result.output
-
-
-def test_inspect_run_id_errors_without_fzf(tmp_path: Path, monkeypatch):
-    """``inspect <run-id>`` needs the request picker; without fzf on PATH
-    the command errors out with a clear message instead of dumping a
-    half-usable table."""
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(tmp_path))
-    monkeypatch.chdir(tmp_path)
-    monkeypatch.setenv("PATH", "")
-    # _seed_workspace_run_with_meta writes the run.json the classifier
-    # needs to recognise the dashed name as a run-id under cwd's
-    # ``.agentcap`` (otherwise it falls through to other rules).
-    _seed_workspace_run_with_meta(
-        tmp_path, "hermes-local-20260101-000000",
-        agent="hermes", model="m",
-    )
-
-    result = CliRunner().invoke(cli, ["inspect", "hermes-local-20260101-000000"])
-    assert result.exit_code != 0
-    assert "fzf is required" in result.output
-
-
-def test_inspect_no_arg_errors_without_fzf(tmp_path: Path, monkeypatch):
-    """``inspect`` with no arg also needs the run picker; same error."""
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(tmp_path))
-    monkeypatch.chdir(tmp_path)
-    monkeypatch.setenv("PATH", "")
-    _seed_workspace_run_with_meta(
-        tmp_path, "hermes-local-20260101-000000",
-        agent="hermes", model="m",
-    )
-
-    result = CliRunner().invoke(cli, ["inspect"])
-    assert result.exit_code != 0
-    assert "fzf is required" in result.output
-
-
-def test_inspect_no_arg_empty_workspace_errors(tmp_path: Path, monkeypatch):
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(tmp_path))
-    monkeypatch.chdir(tmp_path)
-    (tmp_path / ".agentcap").mkdir()
-    result = CliRunner().invoke(cli, ["inspect"])
-    assert result.exit_code != 0
-    assert "no runs" in result.output or "no workspace" in result.output
-
-
diff --git a/tests/test_cli_live.py b/tests/test_cli_live.py
deleted file mode 100644
index 7dfc3f6..0000000
--- a/tests/test_cli_live.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""End-to-end live test for ``agentcap run``.
-
-Exercises the full CLI → orchestrator → sandbox → real agent path
-against a real OpenAI-compat ``/v1`` server (the live fixture spawns
-``ghcr.io/ggml-org/llama.cpp:server`` as a sibling podman container).
-Replaces the heavily-mocked plumbing tests previously in
-``test_cli.py``:
-``test_run_synthesized_defaults_from_upstream_and_model`` and
-``test_run_invokes_orchestrator_under_proxy``.
-
-Pi is the agent under test — its image install is small, sessions
-stream as per-file JSONL through the symlink (no SQLite dump
-required), and it's the most CI-friendly of the four agents.
-"""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-
-import pytest
-from click.testing import CliRunner
-
-from agentcap.__main__ import cli
-
-
-@pytest.mark.live
-def test_agentcap_run_live(
-    tmp_path: Path,
-    monkeypatch: pytest.MonkeyPatch,
-    live_llama_url: str,
-    live_model: str,
-    agentcap_image_for,
-):
-    """``agentcap run --agent pi`` against a real model server.
-
-    Verifies the CLI plumbing the mocked tests used to cover:
-    - flag parsing → ``AGENTCAP_PROXY_URL`` / ``AGENTCAP_MODEL`` /
-      ``AGENTCAP_PROVIDER`` / ``AGENTCAP_TRACES_DIR`` /
-      ``AGENTCAP_STATE_DIR`` reach the sandbox,
-    - the in-process proxy wraps the orchestrator (captures land in
-      ``<run_dir>/captures/``),
-    - per-run ``traces/`` is populated as the agent runs (pi streams
-      JSONL through the in-container symlink),
-    - ``run.json`` summary is written with the right shape.
-
-    No internal monkeypatching — the only env manipulation is
-    ``AGENTCAP_WORKSPACE`` (a legitimate CLI input).
-    """
-    # Pre-build the pi image. The fixture is also pulled in by the
-    # sandbox-using live tests; first call builds, subsequent calls
-    # are a no-op.
-    agentcap_image_for("pi")
-
-    tasks = tmp_path / "tasks.txt"
-    tasks.write_text("Say hello in one short sentence, then stop.\n")
-
-    workspace = tmp_path / "ws"
-    workspace.mkdir()
-    monkeypatch.setenv("AGENTCAP_WORKSPACE", str(workspace))
-
-    # ``--upstream`` wants the server root reachable from the host
-    # (the in-process proxy will forward to it). ``live_llama_url``
-    # is host-side; ``live_proxy_base_url`` is agent-side and would
-    # not resolve from the host process.
-    upstream = live_llama_url
-
-    runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        [
-            "run",
-            "--agent", "pi",
-            "--model", live_model,
-            "--upstream", upstream,
-            "--tasks", str(tasks),
-            "--turns", "1",
-            "--timeout", "180",
-        ],
-    )
-    assert result.exit_code == 0, result.output
-
-    # One run dir was created under the workspace.
-    run_dirs = sorted((workspace / ".agentcap").glob("pi-*"))
-    assert len(run_dirs) == 1, run_dirs
-    run_dir = run_dirs[0]
-
-    # run.json shape — same assertions the mocked predecessor made.
-    summary = json.loads((run_dir / "run.json").read_text())
-    assert summary["agent"] == "pi"
-    assert summary["model"] == live_model
-    assert summary["upstream"] == upstream
-    assert summary["turns_per_task"] == 1
-    assert len(summary["tasks"]) == 1
-    task = summary["tasks"][0]
-    assert task["completed_turns"] == 1
-    assert task["session_id"], "pi should mint a session id"
-
-    # Captures landed on disk via the in-process proxy.
-    captures = list((run_dir / "captures").glob("*.request.json"))
-    assert captures, "proxy should have captured at least one request"
-
-    # Pi's native session JSONL landed via the in-container symlink.
-    traces = list((run_dir / "traces").iterdir())
-    assert traces, "pi should have streamed at least one trace file"
-    assert any(f.suffix == ".jsonl" for f in traces)
diff --git a/tests/test_drivers.py b/tests/test_drivers.py
deleted file mode 100644
index c7a3474..0000000
--- a/tests/test_drivers.py
+++ /dev/null
@@ -1,239 +0,0 @@
-"""Pure-Python tests for ``agentcap.drivers``.
-
-These cover the *parser*, *config-builder*, and *overlay-scaffolding*
-helpers — none of which shell out to a real agent. Live integration
-tests for each driver (which actually invoke the agent CLI against a
-running model server) live in ``test_drivers_live.py`` and are gated
-on the agent binary being available in the sandbox image and on
-``podman`` being on the host PATH.
-"""
-
-from __future__ import annotations
-
-
-import pytest
-import yaml
-
-from agentcap.drivers import get_driver
-from agentcap.drivers.goose import GooseDriver
-from agentcap.drivers.hermes import (
-    HermesDriver,
-    _rewrite_config,
-    parse_response_text as hermes_parse,
-    parse_session_id,
-)
-from agentcap.drivers.opencode import (
-    OpenCodeDriver,
-    build_opencode_config,
-    parse_response_text as opencode_parse,
-    parse_session_id as opencode_parse_session,
-)
-from agentcap.drivers.pi import PiDriver, build_models_json
-
-
-# ---------------------------------------------------------------------------
-# Hermes parsers
-# ---------------------------------------------------------------------------
-
-
-def test_parse_session_id_finds_id():
-    s = "blah\nsession_id: abc123_xyz\nmore\n"
-    assert parse_session_id(s) == "abc123_xyz"
-
-
-def test_parse_session_id_missing_returns_none():
-    assert parse_session_id("nothing here") is None
-
-
-def test_hermes_parse_response_initial_run():
-    out = "Working on it...\nHere is the answer.\n"
-    assert hermes_parse(out) == "Working on it...\nHere is the answer."
-
-
-def test_hermes_parse_response_after_resumed_marker():
-    out = (
-        "↻ Resumed abc123\n"
-        "old content\n"
-        "↻ Resumed abc123\n"
-        "the actual final answer\n"
-        "across two lines\n"
-    )
-    assert hermes_parse(out) == "the actual final answer\nacross two lines"
-
-
-def test_hermes_parse_response_strips_session_id_lines():
-    out = "session_id: aa_bb\nactual response\n"
-    assert hermes_parse(out) == "actual response"
-
-
-# ---------------------------------------------------------------------------
-# OpenCode parsers + config builder
-# ---------------------------------------------------------------------------
-
-
-def test_opencode_parse_concatenates_text_events():
-    stdout = (
-        '{"type":"step_start"}\n'
-        '{"type":"text","text":"hello "}\n'
-        '{"type":"text","text":"world"}\n'
-        '{"type":"step_finish"}\n'
-    )
-    assert opencode_parse(stdout) == "hello world"
-
-
-def test_opencode_parse_skips_malformed_lines():
-    stdout = (
-        "not json at all\n"
-        '{"type":"text","text":"good"}\n'
-        "\n"
-    )
-    assert opencode_parse(stdout) == "good"
-
-
-def test_build_opencode_config_shape():
-    cfg = build_opencode_config(
-        provider_name="local",
-        base_url="http://127.0.0.1:8001/v1",
-        model_id="qwen-test",
-    )
-    prov = cfg["provider"]["local"]
-    assert prov["options"]["baseURL"] == "http://127.0.0.1:8001/v1"
-    assert "qwen-test" in prov["models"]
-    assert cfg["model"] == "local/qwen-test"
-
-
-# ---------------------------------------------------------------------------
-# pi config builder
-# ---------------------------------------------------------------------------
-
-
-def test_pi_build_models_json_shape():
-    payload = build_models_json(
-        provider_name="local",
-        base_url="http://127.0.0.1:8001/v1",
-        model_id="qwen-test",
-    )
-    prov = payload["providers"]["local"]
-    assert prov["baseUrl"] == "http://127.0.0.1:8001/v1"
-    assert prov["api"] == "openai-completions"
-    # llama.cpp's OpenAI shim doesn't accept the developer role pi
-    # uses for reasoning-capable models — the config must downgrade.
-    assert prov["compat"]["supportsDeveloperRole"] is False
-    assert prov["compat"]["supportsReasoningEffort"] is False
-    assert prov["models"][0]["id"] == "qwen-test"
-
-
-# ---------------------------------------------------------------------------
-# Driver registry + non-resumable driver behaviour
-# ---------------------------------------------------------------------------
-
-
-def test_get_driver_known_names(fake_sandbox):
-    assert isinstance(get_driver("hermes", sandbox=fake_sandbox), HermesDriver)
-    assert isinstance(get_driver("opencode", sandbox=fake_sandbox), OpenCodeDriver)
-    assert isinstance(get_driver("goose", sandbox=fake_sandbox), GooseDriver)
-    assert isinstance(get_driver("pi", sandbox=fake_sandbox), PiDriver)
-
-
-def test_get_driver_unknown_name(fake_sandbox):
-    with pytest.raises(ValueError):
-        get_driver("not-a-real-driver", sandbox=fake_sandbox)
-
-
-def test_opencode_parse_session_id_finds_top_level():
-    stdout = (
-        '{"type":"step_start","sessionID":"ses_abc123"}\n'
-        '{"type":"text","text":"hi"}\n'
-    )
-    assert opencode_parse_session(stdout) == "ses_abc123"
-
-
-def test_opencode_parse_session_id_finds_nested_under_part():
-    stdout = (
-        '{"type":"step_finish","timestamp":1,"part":{"sessionID":"ses_xyz"}}\n'
-    )
-    assert opencode_parse_session(stdout) == "ses_xyz"
-
-
-def test_opencode_parse_session_id_missing_returns_none():
-    assert opencode_parse_session('{"type":"text","text":"hi"}\n') is None
-
-
-def test_hermes_driver_close_is_idempotent(fake_sandbox):
-    drv = HermesDriver(sandbox=fake_sandbox)
-    drv.close()
-    drv.close()  # second call should not raise
-
-
-# ---------------------------------------------------------------------------
-# Hermes overlay HERMES_HOME (proxy_base_url support)
-# ---------------------------------------------------------------------------
-
-
-def test_rewrite_config_replaces_base_url_only():
-    text = (
-        "model:\n"
-        "  provider: custom\n"
-        "  base_url: http://localhost:8000/v1\n"
-        "  key_env: OPENAI_API_KEY\n"
-    )
-    out = _rewrite_config(text, base_url="http://127.0.0.1:8001/v1")
-    assert "base_url: http://127.0.0.1:8001/v1" in out
-    assert "http://localhost:8000/v1" not in out
-    # other keys preserved
-    assert "key_env: OPENAI_API_KEY" in out
-    assert "provider: custom" in out
-    # no context_length added when override not requested
-    assert "context_length" not in out
-
-
-def test_rewrite_config_inserts_model_section_when_missing():
-    out = _rewrite_config("", base_url="http://x:1/v1")
-    assert "base_url: http://x:1/v1" in out
-
-
-def test_rewrite_config_overrides_both_context_length_guards():
-    """Hermes refuses startup if EITHER ``model.context_length`` or
-    ``auxiliary.compression.context_length`` is below 64 K. The
-    overlay must override both."""
-    text = "model:\n  provider: custom\n  base_url: http://localhost:8000/v1\n"
-    out = _rewrite_config(
-        text,
-        base_url="http://127.0.0.1:8001/v1",
-        context_length_override=65536,
-    )
-    cfg = yaml.safe_load(out)
-    assert cfg["model"]["context_length"] == 65536
-    assert cfg["auxiliary"]["compression"]["context_length"] == 65536
-    assert cfg["model"]["base_url"] == "http://127.0.0.1:8001/v1"
-
-
-def test_rewrite_config_preserves_existing_auxiliary_keys():
-    text = (
-        "model:\n"
-        "  provider: custom\n"
-        "  base_url: http://localhost:8000/v1\n"
-        "auxiliary:\n"
-        "  compression:\n"
-        "    model: my-compressor\n"
-        "  other_key: keep_me\n"
-    )
-    out = _rewrite_config(
-        text,
-        base_url="http://x/v1",
-        context_length_override=65536,
-    )
-    cfg = yaml.safe_load(out)
-    assert cfg["auxiliary"]["compression"]["model"] == "my-compressor"
-    assert cfg["auxiliary"]["compression"]["context_length"] == 65536
-    assert cfg["auxiliary"]["other_key"] == "keep_me"
-
-
-# NOTE: the host-side `build_overlay_hermes_home` function was
-# removed when HermesDriver moved its overlay logic *inside* the
-# sandbox. Behaviour previously verified by 5 unit tests against
-# fake user-homes is now covered by the live driver test
-# (tests/test_drivers_live.py::test_hermes_live) which exercises
-# the full path against a real podman container. The pure-Python
-# parts that survived as standalone helpers (`_rewrite_config`)
-# keep their own tests above.
diff --git a/tests/test_drivers_live.py b/tests/test_drivers_live.py
deleted file mode 100644
index 2bd3195..0000000
--- a/tests/test_drivers_live.py
+++ /dev/null
@@ -1,110 +0,0 @@
-"""Live integration tests for each agent driver.
-
-Verifies the infrastructure path only: agent runs inside its per-agent
-podman container, dials the in-process capture proxy, and gets a
-response back. Agent output *quality* — whether the model emits a
-syntactically valid tool call, whether it picks the right file, etc.
-— is intentionally not asserted. A separate (model-grading) test
-would be the place for that.
-
-Assertions per agent: ``returncode == 0`` and ``turn.response_text``
-non-empty (the agent received at least one model response through
-the proxy).
-"""
-
-from __future__ import annotations
-
-import pytest
-
-from agentcap.drivers.goose import GooseDriver
-from agentcap.drivers.hermes import HermesDriver
-from agentcap.drivers.opencode import OpenCodeDriver
-from agentcap.drivers.pi import PiDriver
-
-
-INFRA_PROMPT = "Say hi, then stop."
-
-
-def _assert_infrastructure_works(turn) -> None:
-    assert turn.returncode == 0, (
-        f"agent exited rc={turn.returncode}\n"
-        f"--- stdout (tail) ---\n{turn.stdout[-500:]}\n"
-        f"--- stderr (tail) ---\n{turn.stderr[-500:]}"
-    )
-    assert turn.response_text, (
-        f"agent produced no response text — wire path may be broken.\n"
-        f"--- stdout (tail) ---\n{turn.stdout[-500:]}\n"
-        f"--- stderr (tail) ---\n{turn.stderr[-500:]}"
-    )
-
-
-@pytest.mark.live
-def test_goose_live(live_proxy_base_url, live_model, agent_proj_for):
-    sandbox, proj = agent_proj_for("goose")
-    drv = GooseDriver(
-        sandbox=sandbox, binary="goose", model=live_model, cwd=proj,
-    )
-    try:
-        turn = drv.start(INFRA_PROMPT, timeout=900)
-        assert turn.session_id and turn.session_id.startswith("agentcap-")
-        _assert_infrastructure_works(turn)
-    finally:
-        drv.close()
-
-
-@pytest.mark.live
-def test_pi_live(live_proxy_base_url, live_model, agent_proj_for):
-    sandbox, proj = agent_proj_for("pi")
-    drv = PiDriver(
-        sandbox=sandbox, binary="pi", model=live_model, cwd=proj,
-    )
-    try:
-        turn = drv.start(INFRA_PROMPT, timeout=900)
-        _assert_infrastructure_works(turn)
-    finally:
-        drv.close()
-
-
-@pytest.mark.live
-@pytest.mark.skip(
-    reason=(
-        "opencode 1.15.x doesn't pick up the baked ``agent.minimal`` from "
-        "``~/.config/opencode/opencode.json`` inside the per-agent "
-        "container — fails with ``agent \"minimal\" not found`` and "
-        "``Model not found`` even with ``mode: primary`` + explicit "
-        "model. Needs investigation: instrument the init script with "
-        "``opencode debug config`` to see what config opencode actually "
-        "resolves."
-    )
-)
-def test_opencode_live(live_proxy_base_url, live_model, agent_proj_for):
-    sandbox, proj = agent_proj_for("opencode")
-    # OpenCode recursively globs from / in empty dirs; seed a
-    # package.json to bound its exploration.
-    sandbox.write_text(
-        f"{proj}/package.json", '{"name":"smoke","version":"0.0.0"}\n'
-    )
-    drv = OpenCodeDriver(
-        sandbox=sandbox, binary="opencode", model=live_model, cwd=proj,
-        minimal_agent=True,
-    )
-    try:
-        turn = drv.start(INFRA_PROMPT, timeout=900)
-        _assert_infrastructure_works(turn)
-    finally:
-        drv.close()
-
-
-@pytest.mark.live
-def test_hermes_live(live_proxy_base_url, agent_proj_for):
-    sandbox, proj = agent_proj_for("hermes")
-    drv = HermesDriver(
-        sandbox=sandbox, binary="hermes", cwd=proj,
-        ignore_rules=True, toolsets="file",
-    )
-    try:
-        turn = drv.start(INFRA_PROMPT, timeout=900)
-        assert turn.session_id is not None
-        _assert_infrastructure_works(turn)
-    finally:
-        drv.close()
diff --git a/tests/test_export.py b/tests/test_export.py
deleted file mode 100644
index c67169a..0000000
--- a/tests/test_export.py
+++ /dev/null
@@ -1,541 +0,0 @@
-"""Unit tests for ``agentcap.export``.
-
-Captures + traces are now pushed to a paired ``-captures`` /
-``-<agent>-traces`` dataset pair under a single HF Collection. The
-tests assert: URI parsing, repo-id derivation, the parquet payload
-shape (incl. the new ``run_id`` column), the raw-JSONL trace upload,
-the README cross-links, and ``ensure_collection`` idempotency.
-"""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-
-import pytest
-
-from agentcap.export import (
-    _row,
-    captures_repo_id,
-    detect_model,
-    detect_provider_columns,
-    ensure_collection,
-    export_local,
-    parse_collection_base,
-    push_agent_traces_dataset,
-    push_captures_dataset,
-    traces_repo_id_for,
-)
-
-
-def _write_capture(
-    capture_dir: Path,
-    rid: str,
-    body: dict,
-    response: dict,
-    *,
-    upstream_url: str = "http://127.0.0.1:8000",
-    upstream_fingerprint: dict | None = None,
-) -> None:
-    (capture_dir / f"{rid}.request.json").write_text(
-        json.dumps({
-            "request_id": rid,
-            "captured_at": 1000,
-            "upstream_url": upstream_url,
-            "body": body,
-        })
-    )
-    (capture_dir / f"{rid}.response.json").write_text(
-        json.dumps({
-            "request_id": rid,
-            "captured_at_resp": 1001,
-            "stream": False,
-            "status_code": 200,
-            "body": response,
-            "upstream_fingerprint": upstream_fingerprint or {},
-        })
-    )
-
-
-_BODY = {
-    "model": "google/gemma-4-E4B-it",
-    "messages": [{"role": "user", "content": "u"}],
-    "tools": [],
-}
-
-
-# ---------------------------------------------------------------------------
-# Row construction
-# ---------------------------------------------------------------------------
-
-
-def test_row_serialises_bodies_as_json_strings():
-    row = _row(
-        request_id="rid",
-        request_body={"model": "m", "messages": [{"role": "user", "content": "x"}]},
-        response_body={"choices": [{"message": {"content": "hi"}}]},
-        captured_at=42,
-        upstream_fp=None,
-    )
-    assert row["request_id"] == "rid"
-    assert row["model"] == "m"
-    assert row["captured_at"] == 42
-    assert isinstance(row["request"], str)
-    assert isinstance(row["response"], str)
-    assert json.loads(row["request"])["messages"][0]["content"] == "x"
-    assert json.loads(row["response"])["choices"][0]["message"]["content"] == "hi"
-
-
-def test_row_includes_fingerprint_columns_when_present():
-    fp = {
-        "x_served_by": "fireworks-pod-7",
-        "build_info": "b9039",
-        "served_model": "qwen-actually-served",
-    }
-    row = _row("rid", _BODY, {}, 1, fp)
-    assert row["served_by"] == "fireworks-pod-7"
-    assert row["served_build_info"] == "b9039"
-    assert row["served_model"] == "qwen-actually-served"
-
-
-def test_row_fingerprint_columns_default_to_none():
-    row = _row("rid", _BODY, {}, 1, None)
-    assert row["served_by"] is None
-    assert row["served_build_info"] is None
-    assert row["served_model"] is None
-
-
-def test_row_empty_response_serialises_to_empty_object():
-    row = _row("rid", _BODY, None, 1, None)
-    assert row["response"] == "{}"
-
-
-# ---------------------------------------------------------------------------
-# detect_model — same uniqueness contract as before
-# ---------------------------------------------------------------------------
-
-
-def test_detect_model_returns_unique_model(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    body = {"model": "google/gemma-4-E4B-it", "messages": []}
-    _write_capture(capture, "a", body, {"choices": []})
-    _write_capture(capture, "b", body, {"choices": []})
-    assert detect_model(capture) == "google/gemma-4-E4B-it"
-
-
-def test_detect_model_strips_revision_suffix(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "a", {"model": "google/gemma-4-E4B-it", "messages": []}, {})
-    _write_capture(capture, "b", {"model": "google/gemma-4-E4B-it@main", "messages": []}, {})
-    assert detect_model(capture) == "google/gemma-4-E4B-it"
-
-
-def test_detect_model_raises_on_mixed_models(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "a", {"model": "google/gemma-4-E4B-it", "messages": []}, {})
-    _write_capture(capture, "b", {"model": "Qwen/Qwen3-7B", "messages": []}, {})
-    with pytest.raises(ValueError) as exc_info:
-        detect_model(capture)
-    assert "multiple models" in str(exc_info.value)
-
-
-def test_detect_model_returns_none_on_empty_capture_dir(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    assert detect_model(capture) is None
-
-
-def test_detect_model_returns_none_when_no_request_has_model_field(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    (capture / "rid.request.json").write_text(
-        json.dumps({"request_id": "rid", "captured_at": 1, "body": {"messages": []}})
-    )
-    assert detect_model(capture) is None
-
-
-# ---------------------------------------------------------------------------
-# Provider derivation from the per-request upstream_url stamp
-# ---------------------------------------------------------------------------
-
-
-def test_detect_provider_columns_hostname_classification(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(
-        capture, "rid", _BODY, {},
-        upstream_url="http://127.0.0.1:8000",
-    )
-    cols = detect_provider_columns(capture)
-    assert cols == {"provider": "local", "upstream_url": "http://127.0.0.1:8000"}
-
-
-def test_detect_provider_columns_hf_router_sub_provider_refinement(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(
-        capture, "rid",
-        {"model": "meta-llama/Llama-3.3-70B:fireworks-ai", "messages": []},
-        {},
-        upstream_url="https://router.huggingface.co",
-    )
-    cols = detect_provider_columns(capture)
-    assert cols["provider"] == "hf-router/fireworks-ai"
-    assert cols["upstream_url"] == "https://router.huggingface.co"
-
-
-def test_detect_provider_columns_empty_when_no_upstream_stamp(tmp_path: Path):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    (capture / "rid.request.json").write_text(
-        json.dumps({"request_id": "rid", "captured_at": 1, "body": _BODY})
-    )
-    assert detect_provider_columns(capture) == {}
-
-
-# ---------------------------------------------------------------------------
-# Collection-base parsing + repo-id derivation
-# ---------------------------------------------------------------------------
-
-
-def test_parse_collection_base_owner_and_base():
-    owner, base = parse_collection_base("owner/my-collection")
-    assert owner == "owner"
-    assert base == "my-collection"
-
-
-def test_parse_collection_base_strips_hf_datasets_prefix():
-    owner, base = parse_collection_base("hf://datasets/owner/base")
-    assert (owner, base) == ("owner", "base")
-
-
-def test_parse_collection_base_rejects_subdir():
-    """A third segment is ambiguous — the collection-base form is a
-    single ``<base>``, not a ``<base>/<subdir>``."""
-    with pytest.raises(ValueError, match="<owner>/<base>"):
-        parse_collection_base("owner/base/extra")
-
-
-def test_parse_collection_base_rejects_missing_name():
-    with pytest.raises(ValueError, match="<owner>/<base>"):
-        parse_collection_base("owner")
-
-
-def test_repo_id_derivation():
-    assert captures_repo_id("me", "sweep") == "me/sweep-captures"
-    assert traces_repo_id_for("me", "sweep", "pi") == "me/sweep-pi-traces"
-    assert traces_repo_id_for("me", "sweep", "hermes") == "me/sweep-hermes-traces"
-
-
-# ---------------------------------------------------------------------------
-# push_captures_dataset
-# ---------------------------------------------------------------------------
-
-
-def test_push_captures_creates_captures_repo(tmp_path: Path, fake_hf_api):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "rid", _BODY, {"choices": []})
-
-    repo_id, n_rows = push_captures_dataset(
-        [{"capture_dir": capture, "model": "google/gemma-4-E4B-it", "agent": "pi",
-          "run_id": "pi-local-20260601-090000"}],
-        owner="me", base="sweep",
-    )
-
-    assert repo_id == "me/sweep-captures"
-    assert n_rows == [1]
-    assert fake_hf_api.created_repos[0] == {
-        "repo_id": "me/sweep-captures", "repo_type": "dataset",
-        "exist_ok": True, "private": True,
-    }
-
-
-def test_push_captures_lands_under_data(tmp_path: Path, fake_hf_api):
-    import re
-
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "rid", _BODY, {"choices": []})
-
-    push_captures_dataset(
-        [{"capture_dir": capture, "model": "google/gemma-4-E4B-it",
-          "agent": "pi", "run_id": "pi-local-20260601-090000"}],
-        owner="me", base="sweep",
-    )
-    op = fake_hf_api.commits[0]["operations"][0]
-    # ``-captures`` repo, single ``data/<filename>.parquet`` layout.
-    assert re.fullmatch(
-        r"data/train-pi-gemma-4-E4B-it-local-\d{8}T\d{6}-[0-9a-f]{6}\.parquet",
-        op["path_in_repo"],
-    ), op["path_in_repo"]
-
-
-def test_push_captures_stamps_run_id_column(tmp_path: Path, fake_hf_api):
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "rid", _BODY, {"choices": []})
-
-    push_captures_dataset(
-        [{"capture_dir": capture, "model": "m", "agent": "pi",
-          "run_id": "pi-local-20260601-090000"}],
-        owner="me", base="sweep",
-    )
-    op = fake_hf_api.commits[0]["operations"][0]
-    assert "run_id" in op["columns"]
-
-
-def test_push_captures_batches_into_one_commit(tmp_path: Path, fake_hf_api):
-    items = []
-    for i in range(3):
-        cap = tmp_path / f"capture-{i}"
-        cap.mkdir()
-        _write_capture(cap, f"rid{i}", _BODY, {})
-        items.append({
-            "capture_dir": cap, "model": "m", "agent": "hermes",
-            "run_id": f"hermes-local-2026060{i+1}-000000",
-        })
-
-    push_captures_dataset(items, owner="me", base="sweep")
-    assert len(fake_hf_api.commits) == 1
-    assert len(fake_hf_api.commits[0]["operations"]) == 3
-    paths = [op["path_in_repo"] for op in fake_hf_api.commits[0]["operations"]]
-    assert len(set(paths)) == 3, f"filenames collided: {paths}"
-
-
-def test_push_captures_seeds_readme_with_collection_link(
-    tmp_path: Path, fake_hf_api,
-):
-    fake_hf_api.existing_files = []  # simulate freshly-created repo
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "rid", _BODY, {})
-
-    push_captures_dataset(
-        [{"capture_dir": capture, "model": "m", "run_id": "r"}],
-        owner="me", base="sweep",
-    )
-
-    ops = fake_hf_api.commits[0]["operations"]
-    readme_ops = [op for op in ops if op["path_in_repo"] == "README.md"]
-    assert readme_ops, "captures README missing on first push"
-    body = readme_ops[0]["bytes"].decode("utf-8")
-    # Cross-links to the traces sibling family and the Collection.
-    assert "me/sweep-captures" in body
-    assert "sweep-<agent>-traces" in body
-    assert "sweep Collection" in body
-
-
-def test_push_captures_skips_readme_on_subsequent_push(
-    tmp_path: Path, fake_hf_api,
-):
-    # fake_hf_api defaults to existing_files=["README.md"]
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(capture, "rid", _BODY, {})
-
-    push_captures_dataset(
-        [{"capture_dir": capture, "model": "m", "run_id": "r"}],
-        owner="me", base="sweep",
-    )
-    paths = [op["path_in_repo"] for op in fake_hf_api.commits[0]["operations"]]
-    assert "README.md" not in paths
-
-
-# ---------------------------------------------------------------------------
-# push_agent_traces_dataset — raw JSONL upload
-# ---------------------------------------------------------------------------
-
-
-def test_push_traces_uploads_files_as_is(tmp_path: Path, fake_hf_api):
-    fake_hf_api.existing_files = []
-    traces = tmp_path / "traces"
-    traces.mkdir()
-    (traces / "session-a.jsonl").write_text('{"type":"session","id":"a"}\n')
-    (traces / "session-b.jsonl").write_text('{"type":"session","id":"b"}\n')
-
-    repo_id, n_files = push_agent_traces_dataset(
-        [{"traces_dir": traces, "run_id": "pi-local-20260601-090000"}],
-        owner="me", base="sweep", agent="pi",
-    )
-
-    assert repo_id == "me/sweep-pi-traces"
-    assert n_files == 2
-    paths = [op["path_in_repo"] for op in fake_hf_api.commits[0]["operations"]]
-    # One README + two raw files under data/<run_id>/.
-    assert "README.md" in paths
-    assert "data/pi-local-20260601-090000/session-a.jsonl" in paths
-    assert "data/pi-local-20260601-090000/session-b.jsonl" in paths
-
-
-def test_push_traces_readme_marks_agent_and_links_captures(
-    tmp_path: Path, fake_hf_api,
-):
-    fake_hf_api.existing_files = []
-    traces = tmp_path / "traces"
-    traces.mkdir()
-    (traces / "x.jsonl").write_text("{}\n")
-
-    push_agent_traces_dataset(
-        [{"traces_dir": traces, "run_id": "r1"}],
-        owner="me", base="sweep", agent="hermes",
-    )
-    ops = fake_hf_api.commits[0]["operations"]
-    readme = next(op for op in ops if op["path_in_repo"] == "README.md")
-    body = readme["bytes"].decode("utf-8")
-    # Tags: agent-traces, agentcap-traces, per-agent suffix.
-    assert "agent-traces" in body
-    assert "agentcap-traces-hermes" in body
-    # source_datasets points back at the captures sibling.
-    assert "me/sweep-captures" in body
-    assert "sweep Collection" in body
-
-
-def test_push_traces_skips_when_no_files_and_readme_exists(
-    tmp_path: Path, fake_hf_api,
-):
-    """Empty trace dir + README already in repo → no commit."""
-    traces = tmp_path / "traces"
-    traces.mkdir()
-    repo_id, n_files = push_agent_traces_dataset(
-        [{"traces_dir": traces, "run_id": "r1"}],
-        owner="me", base="sweep", agent="pi",
-    )
-    assert repo_id == "me/sweep-pi-traces"
-    assert n_files == 0
-    assert fake_hf_api.commits == []
-
-
-def test_push_traces_repo_created_private(tmp_path: Path, fake_hf_api):
-    traces = tmp_path / "traces"
-    traces.mkdir()
-    (traces / "x.jsonl").write_text("{}")
-    push_agent_traces_dataset(
-        [{"traces_dir": traces, "run_id": "r1"}],
-        owner="me", base="sweep", agent="pi",
-    )
-    record = next(
-        r for r in fake_hf_api.created_repos if r["repo_id"] == "me/sweep-pi-traces"
-    )
-    assert record["private"] is True
-
-
-# ---------------------------------------------------------------------------
-# ensure_collection — find-or-create + idempotent item-add
-# ---------------------------------------------------------------------------
-
-
-def test_ensure_collection_creates_when_missing(fake_hf_api):
-    slug = ensure_collection(
-        owner="me", base="sweep",
-        repos=["me/sweep-captures", "me/sweep-pi-traces"],
-    )
-    assert slug.startswith("me/sweep-")
-    assert len(fake_hf_api.collections_created) == 1
-    assert fake_hf_api.collections_created[0]["title"] == "sweep"
-    assert fake_hf_api.collections_created[0]["private"] is True
-    item_ids = [it["item_id"] for it in fake_hf_api.collection_items]
-    assert item_ids == ["me/sweep-captures", "me/sweep-pi-traces"]
-
-
-def test_ensure_collection_is_idempotent_on_second_call(fake_hf_api):
-    first = ensure_collection(
-        owner="me", base="sweep",
-        repos=["me/sweep-captures"],
-    )
-    second = ensure_collection(
-        owner="me", base="sweep",
-        repos=["me/sweep-captures", "me/sweep-hermes-traces"],
-    )
-    assert first == second
-    # Only one collection was created across the two calls.
-    assert len(fake_hf_api.collections_created) == 1
-
-
-# ---------------------------------------------------------------------------
-# Round-trip — captures parquet shape (incl. run_id column)
-# ---------------------------------------------------------------------------
-
-
-def test_export_local_round_trip(tmp_path: Path):
-    """End-to-end: write captures, export with provider+run_id columns,
-    read parquet back, assert columns + that request JSON survives
-    serialisation."""
-    import pyarrow.parquet as pq
-
-    capture = tmp_path / "capture"
-    capture.mkdir()
-    _write_capture(
-        capture, "ra", _BODY, {"choices": [{"index": 0}]},
-        upstream_fingerprint={"x_served_by": "pod-7", "served_model": "gemma"},
-    )
-    _write_capture(capture, "rb", _BODY, {"choices": [{"index": 0}]})
-
-    out = tmp_path / "rows.parquet"
-    extra_cols = {
-        "provider": "local",
-        "upstream_url": "http://127.0.0.1:8000",
-        "run_id": "pi-local-20260601-090000",
-    }
-    n_rows = export_local(
-        capture, out, progress=False, provider_columns=extra_cols,
-    )
-    assert n_rows == 2
-
-    table = pq.read_table(out)
-    assert table.num_rows == 2
-    assert set(table.column_names) == {
-        "request_id", "model", "captured_at", "task_id", "turn",
-        "request", "response",
-        "served_by", "served_build_info", "served_model",
-        "provider", "upstream_url", "run_id",
-    }
-    rows = table.to_pylist()
-    by_rid = {r["request_id"]: r for r in rows}
-    assert by_rid["ra"]["served_by"] == "pod-7"
-    assert by_rid["ra"]["served_model"] == "gemma"
-    assert by_rid["rb"]["served_by"] is None
-    for r in rows:
-        assert r["provider"] == "local"
-        assert r["upstream_url"] == "http://127.0.0.1:8000"
-        assert r["run_id"] == "pi-local-20260601-090000"
-    sample = json.loads(by_rid["ra"]["request"])
-    assert sample["messages"][0]["role"] == "user"
-
-
-def test_export_local_stamps_agent_and_model_in_schema_metadata(tmp_path):
-    """``agent`` and ``model`` are written to the parquet's schema-level
-    KV metadata. ``inspect`` reads them from there instead of parsing
-    the filename — so it's the canonical labelling source."""
-    import pyarrow.parquet as pq
-    capture = tmp_path / "cap"
-    capture.mkdir()
-    _write_capture(capture, "ra", _BODY, {"choices": [{"index": 0}]})
-
-    out = tmp_path / "rows.parquet"
-    export_local(
-        capture, out, progress=False, agent="hermes", model="GLM-4.6",
-    )
-    md = pq.read_schema(out).metadata or {}
-    assert md.get(b"agent") == b"hermes"
-    assert md.get(b"model") == b"GLM-4.6"
-
-
-def test_export_local_omits_metadata_when_agent_model_unset(tmp_path):
-    """Backwards-compat: when callers don't pass ``agent``/``model``,
-    we don't write empty markers — the parquet just has no KV metadata
-    and the picker falls back to ``?`` on read."""
-    import pyarrow.parquet as pq
-    capture = tmp_path / "cap"
-    capture.mkdir()
-    _write_capture(capture, "ra", _BODY, {"choices": [{"index": 0}]})
-
-    out = tmp_path / "rows.parquet"
-    export_local(capture, out, progress=False)
-    md = pq.read_schema(out).metadata
-    # ``with_metadata({})`` is never called when both are None, so the
-    # schema carries no custom metadata at all.
-    assert md is None or (b"agent" not in md and b"model" not in md)
diff --git a/tests/test_followups.py b/tests/test_followups.py
deleted file mode 100644
index 7d3f599..0000000
--- a/tests/test_followups.py
+++ /dev/null
@@ -1,186 +0,0 @@
-"""Tests for the follow-up strategies."""
-
-from __future__ import annotations
-
-import httpx
-import pytest
-
-from agentcap.followups import get_followup
-from agentcap.followups.continue_ import ContinueFollowUp
-from agentcap.followups.synthesized import SynthesizedFollowUp
-from agentcap.followups.synthesized import _default_call_synth
-from agentcap.followups.templates import TemplatesFollowUp
-
-
-def test_continue_followup_always_returns_continue():
-    fu = ContinueFollowUp()
-    for turn in (2, 3, 100):
-        assert (
-            fu.next(original_task="anything", last_response="resp", turn=turn)
-            == "continue"
-        )
-
-
-def test_continue_followup_custom_text():
-    fu = ContinueFollowUp(text="more")
-    assert fu.next(original_task="t", last_response="r", turn=2) == "more"
-
-
-def test_templates_followup_rotates_through_pool():
-    fu = TemplatesFollowUp(pool=("a", "b", "c"))
-    seen = [
-        fu.next(original_task="t", last_response="r", turn=t)
-        for t in (2, 3, 4, 5, 6)
-    ]
-    assert seen == ["a", "b", "c", "a", "b"]
-
-
-def test_templates_followup_default_pool_is_nonempty():
-    fu = TemplatesFollowUp()
-    out = fu.next(original_task="t", last_response="r", turn=2)
-    assert isinstance(out, str) and out
-
-
-def test_templates_followup_rejects_empty_pool():
-    with pytest.raises(ValueError):
-        TemplatesFollowUp(pool=())
-
-
-def test_synthesized_followup_calls_synth_with_prompt():
-    captured: dict = {}
-
-    def fake_call(*, upstream, model, prompt, timeout, api_key=None):
-        captured["upstream"] = upstream
-        captured["model"] = model
-        captured["prompt"] = prompt
-        captured["timeout"] = timeout
-        captured["api_key"] = api_key
-        return "  Show me the migration plan.  "
-
-    fu = SynthesizedFollowUp(
-        upstream="http://synth:9000",
-        model="synth-model",
-        call=fake_call,
-        timeout=10,
-    )
-    out = fu.next(
-        original_task="Plan the S3 backend.",
-        last_response="Here's a draft plan.",
-        turn=2,
-    )
-    assert out == "Show me the migration plan."
-    assert captured["upstream"] == "http://synth:9000"
-    assert captured["model"] == "synth-model"
-    assert captured["timeout"] == 10
-    # Prompt embeds task and response
-    assert "Plan the S3 backend." in captured["prompt"]
-    assert "Here's a draft plan." in captured["prompt"]
-
-
-def test_synthesized_followup_falls_back_on_exception(capsys):
-    def boom(**_):
-        raise RuntimeError("synth down")
-
-    fu = SynthesizedFollowUp(
-        upstream="http://synth", model="m", call=boom, fallback="continue"
-    )
-    assert fu.next(original_task="t", last_response="r", turn=2) == "continue"
-    # Fallback must be noisy — silence here used to mask 401s against
-    # authenticated upstreams while run.json kept claiming
-    # ``followup: synthesized``.
-    err = capsys.readouterr().err
-    assert "synthesized turn=2 fell back to 'continue'" in err
-    assert "RuntimeError" in err and "synth down" in err
-
-
-def test_synthesized_followup_falls_back_on_empty_response():
-    fu = SynthesizedFollowUp(
-        upstream="http://synth",
-        model="m",
-        call=lambda **_: "   ",
-        fallback="keep going",
-    )
-    assert fu.next(original_task="t", last_response="r", turn=2) == "keep going"
-
-
-def test_get_followup_dispatch():
-    assert isinstance(get_followup("continue"), ContinueFollowUp)
-    assert isinstance(get_followup("templates"), TemplatesFollowUp)
-    # synthesized requires upstream/model kwargs
-    fu = get_followup(
-        "synthesized", upstream="http://x", model="m", call=lambda **_: "ok"
-    )
-    assert isinstance(fu, SynthesizedFollowUp)
-
-
-def test_get_followup_unknown():
-    with pytest.raises(ValueError):
-        get_followup("not-a-strategy")
-
-
-def test_default_call_synth_accepts_upstream_with_v1_suffix(monkeypatch):
-    class _Resp:
-        def raise_for_status(self):
-            return None
-
-        def json(self):
-            return {"choices": [{"message": {"content": "ok"}}]}
-
-    captured = {}
-
-    def fake_post(url, json, timeout, headers=None):
-        captured["url"] = url
-        captured["headers"] = headers
-        return _Resp()
-
-    monkeypatch.setattr(httpx, "post", fake_post)
-
-    out = _default_call_synth(
-        upstream="https://router.huggingface.co/v1",
-        model="Qwen/Qwen3-8B",
-        prompt="p",
-        timeout=5,
-    )
-    assert out == "ok"
-    assert captured["url"] == "https://router.huggingface.co/v1/chat/completions"
-    assert captured["headers"] is None  # no api_key => no Authorization
-
-
-def test_default_call_synth_sends_bearer_when_api_key_given(monkeypatch):
-    class _Resp:
-        def raise_for_status(self):
-            return None
-
-        def json(self):
-            return {"choices": [{"message": {"content": "ok"}}]}
-
-    captured = {}
-
-    def fake_post(url, json, timeout, headers=None):
-        captured["headers"] = headers
-        return _Resp()
-
-    monkeypatch.setattr(httpx, "post", fake_post)
-
-    _default_call_synth(
-        upstream="https://router.huggingface.co",
-        model="m",
-        prompt="p",
-        timeout=5,
-        api_key="hf_xyz",
-    )
-    assert captured["headers"] == {"Authorization": "Bearer hf_xyz"}
-
-
-def test_synthesized_followup_passes_api_key_to_call():
-    seen = {}
-
-    def fake(*, upstream, model, prompt, timeout, api_key):
-        seen["api_key"] = api_key
-        return "next"
-
-    fu = SynthesizedFollowUp(
-        upstream="http://synth", model="m", call=fake, api_key="hf_abc"
-    )
-    fu.next(original_task="t", last_response="r", turn=2)
-    assert seen["api_key"] == "hf_abc"
diff --git a/tests/test_inspect_helpers.py b/tests/test_inspect_helpers.py
deleted file mode 100644
index c46ad07..0000000
--- a/tests/test_inspect_helpers.py
+++ /dev/null
@@ -1,158 +0,0 @@
-"""Unit tests for the inspect picker's parsing helpers.
-
-Covers ``_decode_sse_response`` (OpenAI-compatible SSE → synthesized
-assistant message) and ``_parse_fzf_terms`` (fzf query → list of
-substrings to highlight). These functions are pure and live behind the
-interactive picker, so they're easy to drift on without notice.
-"""
-
-from __future__ import annotations
-
-import json
-
-from agentcap.__main__ import _decode_sse_response
-from agentcap.__main__ import _parse_fzf_terms
-
-
-def _sse(*objs) -> str:
-    """Assemble an SSE blob: one ``data: <json>`` line per object,
-    plus a trailing ``data: [DONE]`` marker like real servers send."""
-    return (
-        "\n".join(f"data: {json.dumps(o)}" for o in objs)
-        + "\ndata: [DONE]\n"
-    )
-
-
-def test_decode_sse_empty_returns_empty_message():
-    out = _decode_sse_response("")
-    assert out == {"content": "", "tool_calls": [], "finish_reason": None}
-
-
-def test_decode_sse_concatenates_content_chunks():
-    raw = _sse(
-        {"choices": [{"delta": {"content": "Hello"}}]},
-        {"choices": [{"delta": {"content": ", "}}]},
-        {"choices": [{"delta": {"content": "world!"}}]},
-        {"choices": [{"delta": {}, "finish_reason": "stop"}]},
-    )
-    out = _decode_sse_response(raw)
-    assert out["content"] == "Hello, world!"
-    assert out["tool_calls"] == []
-    assert out["finish_reason"] == "stop"
-
-
-def test_decode_sse_merges_tool_call_argument_fragments():
-    # First chunk for a tool call carries id + function.name; later
-    # chunks accumulate ``arguments`` fragments under the same index.
-    raw = _sse(
-        {"choices": [{"delta": {"tool_calls": [{
-            "index": 0, "id": "call_1", "type": "function",
-            "function": {"name": "read", "arguments": ""},
-        }]}}]},
-        {"choices": [{"delta": {"tool_calls": [{
-            "index": 0, "function": {"arguments": '{"path"'},
-        }]}}]},
-        {"choices": [{"delta": {"tool_calls": [{
-            "index": 0, "function": {"arguments": ': "a.py"}'},
-        }]}}]},
-        {"choices": [{"delta": {}, "finish_reason": "tool_calls"}]},
-    )
-    out = _decode_sse_response(raw)
-    assert out["content"] == ""
-    assert out["tool_calls"] == [{
-        "id": "call_1", "type": "function",
-        "function": {"name": "read", "arguments": '{"path": "a.py"}'},
-    }]
-    assert out["finish_reason"] == "tool_calls"
-
-
-def test_decode_sse_keeps_multiple_tool_calls_in_index_order():
-    # Two parallel tool calls — index 1's first chunk arrives before
-    # index 0's last; the decoder must still emit them sorted by index.
-    raw = _sse(
-        {"choices": [{"delta": {"tool_calls": [{
-            "index": 0, "id": "c0",
-            "function": {"name": "first", "arguments": "{"},
-        }]}}]},
-        {"choices": [{"delta": {"tool_calls": [{
-            "index": 1, "id": "c1",
-            "function": {"name": "second", "arguments": "{}"},
-        }]}}]},
-        {"choices": [{"delta": {"tool_calls": [{
-            "index": 0, "function": {"arguments": "}"},
-        }]}}]},
-    )
-    out = _decode_sse_response(raw)
-    names = [tc["function"]["name"] for tc in out["tool_calls"]]
-    ids = [tc["id"] for tc in out["tool_calls"]]
-    args = [tc["function"]["arguments"] for tc in out["tool_calls"]]
-    assert names == ["first", "second"]
-    assert ids == ["c0", "c1"]
-    assert args == ["{}", "{}"]
-
-
-def test_decode_sse_skips_malformed_json_lines():
-    # A garbled chunk in the middle must not abort the whole stream.
-    raw = (
-        'data: {"choices":[{"delta":{"content":"ok"}}]}\n'
-        "data: {not json\n"
-        'data: {"choices":[{"delta":{"content":"!"}}]}\n'
-        "data: [DONE]\n"
-    )
-    out = _decode_sse_response(raw)
-    assert out["content"] == "ok!"
-
-
-def test_decode_sse_ignores_non_data_and_blank_lines():
-    # Real streams interleave keep-alive comments (``: ping``) and
-    # blank separators between events.
-    raw = (
-        ": keepalive\n"
-        "\n"
-        'data: {"choices":[{"delta":{"content":"x"}}]}\n'
-        "\n"
-        "event: end\n"
-        'data: {"choices":[{"delta":{},"finish_reason":"stop"}]}\n'
-        "data: [DONE]\n"
-    )
-    out = _decode_sse_response(raw)
-    assert out["content"] == "x"
-    assert out["finish_reason"] == "stop"
-
-
-def test_parse_fzf_terms_empty_query_returns_empty_list():
-    assert _parse_fzf_terms("") == []
-    assert _parse_fzf_terms("   ") == []
-
-
-def test_parse_fzf_terms_plain_words():
-    assert _parse_fzf_terms("alpha beta") == ["alpha", "beta"]
-
-
-def test_parse_fzf_terms_strips_exact_match_quote():
-    # ``'word`` → exact-match in fzf; the leading quote is a fzf
-    # operator, not part of the substring to highlight.
-    assert _parse_fzf_terms("'hf-cli") == ["hf-cli"]
-
-
-def test_parse_fzf_terms_strips_anchors():
-    # ``^`` (prefix) and ``$`` (suffix) are fzf anchors — neither is
-    # part of the substring being matched.
-    assert _parse_fzf_terms("^foo") == ["foo"]
-    assert _parse_fzf_terms("bar$") == ["bar"]
-    assert _parse_fzf_terms("^baz$") == ["baz"]
-
-
-def test_parse_fzf_terms_drops_negated_terms():
-    # ``!word`` excludes matches in fzf — nothing to colour for it.
-    assert _parse_fzf_terms("keep !drop also") == ["keep", "also"]
-
-
-def test_parse_fzf_terms_drops_bare_or_separator():
-    # A bare ``|`` between two terms is fzf's OR — not a substring.
-    assert _parse_fzf_terms("a | b") == ["a", "b"]
-
-
-def test_parse_fzf_terms_handles_mixed_operators():
-    out = _parse_fzf_terms("'exact ^prefix suffix$ !nope plain")
-    assert out == ["exact", "prefix", "suffix", "plain"]
diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py
deleted file mode 100644
index f043a34..0000000
--- a/tests/test_orchestrator.py
+++ /dev/null
@@ -1,292 +0,0 @@
-"""Tests for the orchestrator."""
-
-from __future__ import annotations
-
-from pathlib import Path
-
-import pytest
-
-from agentcap.drivers import AgentDriver, AgentTurn
-from agentcap.followups.continue_ import ContinueFollowUp
-from agentcap.followups.templates import TemplatesFollowUp
-from agentcap.orchestrator import Orchestrator, read_tasks_txt
-
-
-# ---------------------------------------------------------------------------
-# Fake driver
-# ---------------------------------------------------------------------------
-
-
-class FakeDriver(AgentDriver):
-    """Records every call; returns scripted AgentTurns."""
-
-    name = "fake"
-
-    def __init__(
-        self,
-        *,
-        start_turn: AgentTurn | None = None,
-        resume_turn: AgentTurn | None = None,
-        resume_unsupported: bool = False,
-    ) -> None:
-        self.calls: list[tuple[str, str, str | None]] = []  # (op, prompt, sid)
-        self._start_turn = start_turn or AgentTurn(
-            session_id="ses_fake", response_text="initial response", returncode=0,
-            stdout="hi", stderr="",
-        )
-        self._resume_turn = resume_turn or AgentTurn(
-            session_id="ses_fake", response_text="continuation", returncode=0,
-            stdout="ok", stderr="",
-        )
-        self._resume_unsupported = resume_unsupported
-
-    def start(self, prompt, *, env=None, timeout=None):
-        self.calls.append(("start", prompt, None))
-        return self._start_turn
-
-    def resume(self, prompt, *, session_id, env=None, timeout=None):
-        self.calls.append(("resume", prompt, session_id))
-        if self._resume_unsupported:
-            raise NotImplementedError("fake doesn't resume")
-        return self._resume_turn
-
-
-# ---------------------------------------------------------------------------
-# read_tasks_txt
-# ---------------------------------------------------------------------------
-
-
-def test_read_tasks_skips_comments_and_blanks(tmp_path: Path):
-    p = tmp_path / "tasks.txt"
-    p.write_text(
-        "# header comment\n"
-        "first task\n"
-        "\n"
-        "  # indented comment\n"
-        "second task\n"
-        "  third task with leading space\n"
-    )
-    tasks = read_tasks_txt(p)
-    assert tasks == ["first task", "second task", "third task with leading space"]
-
-
-def test_read_tasks_empty_file(tmp_path: Path):
-    p = tmp_path / "tasks.txt"
-    p.write_text("# only comments\n\n")
-    assert read_tasks_txt(p) == []
-
-
-# ---------------------------------------------------------------------------
-# Orchestrator.run_task
-# ---------------------------------------------------------------------------
-
-
-def test_run_task_single_turn_no_followup_call():
-    drv = FakeDriver()
-    fu = ContinueFollowUp()
-    orch = Orchestrator(drv, fu)
-
-    result = orch.run_task("Plan the S3 backend", task_id="t01", turns=1)
-    assert len(result.turns) == 1
-    assert result.turns[0].turn == 1
-    assert result.turns[0].prompt == "Plan the S3 backend"
-    assert result.session_id == "ses_fake"
-    # Driver was called once for start, never for resume
-    assert [c[0] for c in drv.calls] == ["start"]
-
-
-def test_run_task_multi_turn_uses_continue_followup():
-    drv = FakeDriver()
-    orch = Orchestrator(drv, ContinueFollowUp())
-    result = orch.run_task("task", task_id="t01", turns=4)
-    assert len(result.turns) == 4
-    assert [c[0] for c in drv.calls] == ["start", "resume", "resume", "resume"]
-    # All resume prompts are "continue"
-    for op, prompt, sid in drv.calls[1:]:
-        assert prompt == "continue"
-        assert sid == "ses_fake"
-
-
-def test_run_task_multi_turn_uses_templates_pool():
-    drv = FakeDriver()
-    pool = ("first", "second", "third")
-    orch = Orchestrator(drv, TemplatesFollowUp(pool=pool))
-    orch.run_task("task", task_id="t01", turns=4)
-    # Skip the start call; resume prompts cycle through pool
-    resume_prompts = [p for op, p, _ in drv.calls if op == "resume"]
-    assert resume_prompts == list(pool)
-
-
-def test_run_task_aborts_when_initial_returncode_nonzero():
-    drv = FakeDriver(
-        start_turn=AgentTurn(
-            session_id=None, response_text="", returncode=1, stdout="", stderr="boom"
-        )
-    )
-    orch = Orchestrator(drv, ContinueFollowUp())
-    result = orch.run_task("task", task_id="t01", turns=3)
-    assert len(result.turns) == 1
-    assert result.completed_turns == 0
-    # No resume calls were made
-    assert all(c[0] == "start" for c in drv.calls)
-
-
-def test_run_task_aborts_when_no_session_id_for_multi_turn():
-    drv = FakeDriver(
-        start_turn=AgentTurn(
-            session_id=None, response_text="hi", returncode=0, stdout="", stderr=""
-        )
-    )
-    orch = Orchestrator(drv, ContinueFollowUp())
-    result = orch.run_task("task", task_id="t01", turns=3)
-    assert len(result.turns) == 1
-    # Only the start call; resume never happens because session_id is None
-    assert all(c[0] == "start" for c in drv.calls)
-
-
-def test_run_task_breaks_loop_on_resume_failure():
-    drv = FakeDriver(
-        resume_turn=AgentTurn(
-            session_id="ses_fake", response_text="", returncode=124,
-            stdout="", stderr="timeout",
-        )
-    )
-    orch = Orchestrator(drv, ContinueFollowUp())
-    result = orch.run_task("task", task_id="t01", turns=4)
-    # One success + one failure, then the loop breaks
-    assert len(result.turns) == 2
-    assert result.turns[1].returncode == 124
-
-
-def test_run_task_handles_resume_not_implemented():
-    drv = FakeDriver(resume_unsupported=True)
-    orch = Orchestrator(drv, ContinueFollowUp())
-    result = orch.run_task("task", task_id="t01", turns=3)
-    # First turn succeeds; resume raises NotImplementedError; orchestrator stops
-    assert len(result.turns) == 1
-
-
-def test_run_task_rejects_zero_turns():
-    drv = FakeDriver()
-    orch = Orchestrator(drv, ContinueFollowUp())
-    with pytest.raises(ValueError):
-        orch.run_task("t", task_id="x", turns=0)
-
-
-def test_run_task_writes_session_logs_when_sessions_dir_set(tmp_path: Path):
-    drv = FakeDriver(
-        start_turn=AgentTurn(
-            session_id="s1", response_text="r", returncode=0,
-            stdout="STDOUT-init", stderr="STDERR-init",
-        ),
-        resume_turn=AgentTurn(
-            session_id="s1", response_text="r2", returncode=0,
-            stdout="STDOUT-cont", stderr="STDERR-cont",
-        ),
-    )
-    sessions = tmp_path / "sessions"
-    orch = Orchestrator(drv, ContinueFollowUp(), sessions_dir=sessions)
-    orch.run_task("t", task_id="task_01", turns=2)
-
-    assert (sessions / "task_01_turn_01.out").read_text() == "STDOUT-init"
-    assert (sessions / "task_01_turn_01.err").read_text() == "STDERR-init"
-    assert (sessions / "task_01_turn_02.out").read_text() == "STDOUT-cont"
-    assert (sessions / "task_01_turn_02.err").read_text() == "STDERR-cont"
-
-
-# ---------------------------------------------------------------------------
-# Orchestrator.run_corpus
-# ---------------------------------------------------------------------------
-
-
-def test_run_corpus_iterates_tasks_with_default_id_format():
-    drv = FakeDriver()
-    orch = Orchestrator(drv, ContinueFollowUp())
-    results = orch.run_corpus(
-        ["task A", "task B", "task C"], turns_per_task=1
-    )
-    assert [r.task_id for r in results] == ["task_01", "task_02", "task_03"]
-    assert [r.prompt for r in results] == ["task A", "task B", "task C"]
-
-
-def test_run_corpus_records_events():
-    drv = FakeDriver()
-    events: list[tuple[str, dict]] = []
-
-    def listener(event: str, **kw):
-        events.append((event, kw))
-
-    orch = Orchestrator(drv, ContinueFollowUp(), on_event=listener)
-    orch.run_corpus(["task A"], turns_per_task=2)
-    event_names = [e for e, _ in events]
-    assert event_names[0] == "task_start"
-    assert event_names.count("turn_done") == 2
-
-
-def _timeout_after_n(n: int):
-    """Return a driver whose start/resume raises TimeoutExpired on the
-    n-th call (1-indexed), succeeds otherwise."""
-    import subprocess
-
-    class TimeoutDriver(FakeDriver):
-        def __init__(self):
-            super().__init__()
-            self._n = 0
-
-        def start(self, prompt, *, env=None, timeout=None):
-            self._n += 1
-            if self._n == n:
-                raise subprocess.TimeoutExpired(["fake"], timeout or 1)
-            return super().start(prompt, env=env, timeout=timeout)
-
-        def resume(self, prompt, *, session_id, env=None, timeout=None):
-            self._n += 1
-            if self._n == n:
-                raise subprocess.TimeoutExpired(["fake"], timeout or 1)
-            return super().resume(prompt, session_id=session_id, env=env, timeout=timeout)
-
-    return TimeoutDriver()
-
-
-def test_run_task_aborts_on_initial_turn_timeout():
-    """A driver timeout on turn 1 must not propagate; the task is
-    aborted with a recorded event and ``run_corpus`` keeps going."""
-    drv = _timeout_after_n(1)
-    events: list[tuple[str, dict]] = []
-    orch = Orchestrator(
-        drv, ContinueFollowUp(), on_event=lambda **kw: events.append((kw.pop("event"), kw))
-    )
-    result = orch.run_task("anything", task_id="t01", turns=2)
-    assert result.turns == []
-    aborted = [e for e in events if e[0] == "task_aborted"]
-    assert aborted and aborted[0][1]["reason"] == "initial-turn-timeout"
-
-
-def test_run_corpus_keeps_going_when_one_task_times_out():
-    """Critical: a timeout on task 1 must not kill tasks 2+."""
-    # Total calls across the run: t1 start (timeout), t2 start (ok),
-    # t3 start (ok). Trip the 1st call only.
-    drv = _timeout_after_n(1)
-    orch = Orchestrator(drv, ContinueFollowUp())
-    results = orch.run_corpus(
-        ["task A", "task B", "task C"], turns_per_task=1
-    )
-    assert len(results) == 3
-    # task A failed before any turn could be recorded
-    assert results[0].turns == []
-    # tasks B and C completed turn 1
-    assert len(results[1].turns) == 1
-    assert len(results[2].turns) == 1
-
-
-def test_run_task_aborts_on_followup_turn_timeout():
-    drv = _timeout_after_n(2)  # 1st call ok (start), 2nd (resume) times out
-    events: list[tuple[str, dict]] = []
-    orch = Orchestrator(
-        drv, ContinueFollowUp(), on_event=lambda **kw: events.append((kw.pop("event"), kw))
-    )
-    result = orch.run_task("anything", task_id="t01", turns=3)
-    # Only turn 1 recorded.
-    assert len(result.turns) == 1
-    aborted = [e for e in events if e[0] == "task_aborted"]
-    assert aborted and aborted[0][1]["reason"] == "follow-up-turn-timeout"
diff --git a/tests/test_podman_sandbox.py b/tests/test_podman_sandbox.py
deleted file mode 100644
index 6a67fc1..0000000
--- a/tests/test_podman_sandbox.py
+++ /dev/null
@@ -1,229 +0,0 @@
-"""Structural tests for :mod:`agentcap.sandbox.podman`.
-
-Argv-assembly only — these don't shell out to podman. End-to-end
-coverage against a real ``podman run`` lives in
-``tests/test_drivers_live.py`` via the live driver tests.
-"""
-
-from __future__ import annotations
-
-from agentcap.sandbox import Sandbox
-from agentcap.sandbox.podman import PodmanSandbox, build_command
-
-
-def test_podman_sandbox_implements_protocol():
-    assert isinstance(PodmanSandbox(image="agentcap-goose:latest"), Sandbox)
-
-
-def test_build_command_minimal():
-    cmd = build_command(
-        ["echo", "hi"],
-        image="img:latest",
-        writable_paths=[],
-    )
-    assert cmd[:3] == ["podman", "run", "--rm"]
-    assert cmd[-3:] == ["img:latest", "echo", "hi"]
-
-
-def test_build_command_writable_bind_mount(tmp_path):
-    cmd = build_command(
-        ["true"],
-        image="img:latest",
-        writable_paths=[tmp_path],
-    )
-    expected = f"type=bind,src={tmp_path.resolve()},dst={tmp_path.resolve()}"
-    assert "--mount" in cmd
-    assert expected in cmd
-
-
-def test_build_command_readonly_bind_mount(tmp_path):
-    cmd = build_command(
-        ["true"],
-        image="img:latest",
-        writable_paths=[],
-        readonly_paths=[tmp_path],
-    )
-    expected = (
-        f"type=bind,src={tmp_path.resolve()},dst={tmp_path.resolve()},ro"
-    )
-    assert expected in cmd
-
-
-def test_build_command_deny_network():
-    cmd = build_command(
-        ["true"], image="img:latest", writable_paths=[],
-        deny_network=True,
-    )
-    assert "--network=none" in cmd
-
-
-def test_build_command_propagates_env():
-    cmd = build_command(
-        ["true"], image="img:latest", writable_paths=[],
-        env={"FOO": "bar"},
-    )
-    assert "-e" in cmd
-    assert "FOO=bar" in cmd
-
-
-def test_build_command_propagates_cwd(tmp_path):
-    cmd = build_command(
-        ["true"], image="img:latest", writable_paths=[],
-        cwd=str(tmp_path),
-    )
-    assert "--workdir" in cmd
-    assert str(tmp_path) in cmd
-    # ``cwd`` is also added to the writable bind set so chdir
-    # resolves inside the container.
-    expected = f"type=bind,src={tmp_path.resolve()},dst={tmp_path.resolve()}"
-    assert expected in cmd
-
-
-def test_build_command_dedups_overlapping_mounts(tmp_path):
-    cmd = build_command(
-        ["true"], image="img:latest",
-        writable_paths=[tmp_path, tmp_path],
-        readonly_paths=[tmp_path],
-    )
-    mount_args = [a for a in cmd if a.startswith("type=bind,")]
-    assert len(mount_args) == 1
-
-
-def test_wrap_layers_constructor_env_under_call_env(tmp_path):
-    sb = PodmanSandbox(image="img:latest", env={"A": "1", "B": "2"})
-    cmd = sb.wrap(["true"], writable_paths=[], env={"B": "override"})
-    assert "A=1" in cmd
-    assert "B=override" in cmd
-    assert "B=2" not in cmd
-
-
-def test_wrap_combines_lifetime_and_per_call_writable_paths(tmp_path):
-    lifetime = tmp_path / "lifetime"
-    lifetime.mkdir()
-    per_call = tmp_path / "percall"
-    per_call.mkdir()
-    sb = PodmanSandbox(image="img:latest", writable_paths=[lifetime])
-    cmd = sb.wrap(["true"], writable_paths=[per_call])
-    assert f"type=bind,src={lifetime.resolve()},dst={lifetime.resolve()}" in cmd
-    assert f"type=bind,src={per_call.resolve()},dst={per_call.resolve()}" in cmd
-
-
-def test_close_is_noop():
-    sb = PodmanSandbox(image="img:latest")
-    sb.close()
-    sb.close()
-
-
-def test_context_manager_closes():
-    with PodmanSandbox(image="img:latest") as sb:
-        assert sb.image == "img:latest"
-
-
-def test_run_names_container_and_force_removes_it(monkeypatch):
-    """Every ``run()`` must inject ``--name agentcap-<hex>`` and, in a
-    ``finally`` block, fire ``podman rm -f <same-name>`` even when the
-    main subprocess succeeded — ``--rm`` only fires on a clean container
-    exit, so this is the guarantee against orphaned containers when
-    timeouts/kills/dead parents prevent that."""
-    import subprocess
-    from agentcap.sandbox import podman as pmod
-
-    calls: list[list[str]] = []
-
-    class _Completed:
-        returncode = 0
-        stdout = ""
-        stderr = ""
-
-    def fake_run(argv, **_kw):
-        calls.append(list(argv))
-        return _Completed()
-
-    monkeypatch.setattr(subprocess, "run", fake_run)
-    sb = pmod.PodmanSandbox(image="img:latest")
-    sb.run(["echo", "hi"])
-
-    assert len(calls) == 2, f"expected run + rm; got {calls!r}"
-    run_cmd, rm_cmd = calls
-    # ``--name <agentcap-...>`` was inserted right after ``podman run``.
-    assert "--name" in run_cmd
-    name_idx = run_cmd.index("--name")
-    name = run_cmd[name_idx + 1]
-    assert name.startswith("agentcap-")
-    # The cleanup targets the same name.
-    assert rm_cmd[:3] == ["podman", "rm", "-f"]
-    assert rm_cmd[3] == name
-
-
-def test_run_force_removes_container_even_if_subprocess_raises(monkeypatch):
-    """When ``subprocess.run`` raises (e.g. ``TimeoutExpired``), the
-    container can still be alive — the cleanup ``podman rm -f`` must
-    fire from the ``finally`` so the orchestrator never leaks a
-    container even on timeout / SIGINT. The cleanup must also target
-    the SAME name that ``podman run`` was given; removing the wrong
-    container would silently nuke something else."""
-    import subprocess
-    from agentcap.sandbox import podman as pmod
-
-    run_calls: list[list[str]] = []
-    rm_calls: list[list[str]] = []
-
-    def fake_run(argv, **kw):
-        if argv[:3] == ["podman", "rm", "-f"]:
-            rm_calls.append(list(argv))
-            class _R:
-                returncode = 0
-                stdout = ""
-                stderr = ""
-            return _R()
-        run_calls.append(list(argv))
-        raise subprocess.TimeoutExpired(cmd=argv, timeout=kw.get("timeout"))
-
-    monkeypatch.setattr(subprocess, "run", fake_run)
-    sb = pmod.PodmanSandbox(image="img:latest")
-    try:
-        sb.run(["sleep", "60"], timeout=0.01)
-    except subprocess.TimeoutExpired:
-        pass
-    else:
-        raise AssertionError("expected TimeoutExpired to propagate")
-
-    # Extract the name passed to ``podman run`` via ``--name <X>``.
-    assert len(run_calls) == 1, run_calls
-    run_argv = run_calls[0]
-    assert "--name" in run_argv
-    run_name = run_argv[run_argv.index("--name") + 1]
-    assert run_name.startswith("agentcap-")
-
-    # Cleanup must have targeted that exact name — not some other
-    # container, and not no container.
-    assert len(rm_calls) == 1, rm_calls
-    assert rm_calls[0] == ["podman", "rm", "-f", run_name]
-
-
-def test_run_propagates_main_failure_when_cleanup_also_fails(monkeypatch):
-    """If both the main ``podman run`` and the cleanup ``podman rm -f``
-    raise, callers must see the ORIGINAL exception — not the cleanup's.
-    Otherwise a transient ``rm`` failure would mask the real reason the
-    container run failed (timeout, exit code, etc.)."""
-    import subprocess
-    from agentcap.sandbox import podman as pmod
-
-    def fake_run(argv, **kw):
-        if argv[:3] == ["podman", "rm", "-f"]:
-            raise RuntimeError("cleanup boom")
-        raise subprocess.TimeoutExpired(cmd=argv, timeout=kw.get("timeout"))
-
-    monkeypatch.setattr(subprocess, "run", fake_run)
-    sb = pmod.PodmanSandbox(image="img:latest")
-    try:
-        sb.run(["sleep", "60"], timeout=0.01)
-    except subprocess.TimeoutExpired:
-        pass  # original exception preserved
-    except RuntimeError as exc:
-        raise AssertionError(
-            f"cleanup exception ({exc}) leaked past the finally — "
-            f"primary TimeoutExpired was masked"
-        )
-    else:
-        raise AssertionError("expected TimeoutExpired to propagate")
diff --git a/tests/test_provider.py b/tests/test_provider.py
deleted file mode 100644
index 1e02bfb..0000000
--- a/tests/test_provider.py
+++ /dev/null
@@ -1,108 +0,0 @@
-"""Pure-Python tests for ``agentcap.provider`` — classifier + hostname
-fallback + HF Router sub-provider refinement. The actual network probe
-is tested implicitly via the live integration suite; here we feed
-synthetic ``endpoints`` dicts to exercise the classification logic."""
-
-from __future__ import annotations
-
-from agentcap.provider import (
-    _classify,
-    _hostname_fallback,
-    refine_for_sub_provider,
-)
-
-
-# ---------------------------------------------------------------------------
-# hostname fallback
-# ---------------------------------------------------------------------------
-
-
-def test_hostname_fallback_known_providers():
-    assert _hostname_fallback("https://router.huggingface.co/v1") == "hf-router"
-    assert _hostname_fallback("https://api.openai.com/v1") == "openai"
-    assert _hostname_fallback("https://api.together.xyz/v1") == "together"
-    assert _hostname_fallback("https://api.fireworks.ai/v1") == "fireworks"
-
-
-def test_hostname_fallback_loopback_and_private():
-    assert _hostname_fallback("http://127.0.0.1:8000/v1") == "local"
-    assert _hostname_fallback("http://localhost:8000/v1") == "local"
-    assert _hostname_fallback("http://10.0.0.5:8000/v1") == "local"
-    assert _hostname_fallback("http://192.168.1.42:8000/v1") == "local"
-
-
-def test_hostname_fallback_unknown_public():
-    # eTLD+1-style: api.mycompany.com → "mycompany"
-    assert _hostname_fallback("https://api.mycompany.com/v1") == "mycompany"
-
-
-# ---------------------------------------------------------------------------
-# classifier
-# ---------------------------------------------------------------------------
-
-
-def test_classify_hf_router_via_colon_suffix():
-    endpoints = {
-        "models": {"body": {"data": [
-            {"id": "meta-llama/Llama-3.3-70B-Instruct"},
-            {"id": "meta-llama/Llama-3.3-70B-Instruct:fireworks-ai"},
-        ]}},
-    }
-    assert _classify(endpoints, "https://router.huggingface.co/v1") == "hf-router"
-
-
-def test_classify_llama_cpp_via_props():
-    endpoints = {
-        "props": {"body": {"chat_template": "...", "n_ctx": 65536}},
-        "models": {"body": {"data": [{"id": "qwen-test"}]}},
-    }
-    assert _classify(endpoints, "http://127.0.0.1:8000/v1") == "local-llama-server"
-
-
-def test_classify_tgi_via_info_model_id():
-    endpoints = {
-        "info": {"body": {"model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                          "version": "2.4.1"}},
-    }
-    assert _classify(endpoints, "http://10.0.0.5:8000/v1") == "tgi"
-
-
-def test_classify_vllm_via_version():
-    endpoints = {
-        "version": {"body": {"version": "0.7.0"}},
-        "models": {"body": {"data": [{"id": "served-model"}]}},
-    }
-    assert _classify(endpoints, "http://10.0.0.5:8000/v1") == "vllm"
-
-
-def test_classify_openai_via_model_ids():
-    endpoints = {
-        "models": {"body": {"data": [
-            {"id": "gpt-4o-mini"},
-            {"id": "o1-preview"},
-        ]}},
-    }
-    assert _classify(endpoints, "https://api.openai.com/v1") == "openai"
-
-
-def test_classify_falls_back_to_hostname_when_probe_empty():
-    assert _classify({}, "https://router.huggingface.co/v1") == "hf-router"
-    assert _classify({}, "http://127.0.0.1:8000/v1") == "local"
-
-
-# ---------------------------------------------------------------------------
-# refine_for_sub_provider
-# ---------------------------------------------------------------------------
-
-
-def test_refine_pins_hf_router_sub_provider():
-    assert refine_for_sub_provider(
-        "hf-router", "meta-llama/Llama-3.3-70B-Instruct:fireworks-ai"
-    ) == "hf-router/fireworks-ai"
-
-
-def test_refine_noop_without_colon_or_non_hf_router():
-    assert refine_for_sub_provider("hf-router", "meta-llama/Llama-3.3-70B") == "hf-router"
-    assert refine_for_sub_provider("local", "anything:fireworks-ai") == "local"
-
-
diff --git a/tests/test_proxy.py b/tests/test_proxy.py
deleted file mode 100644
index bca9ca7..0000000
--- a/tests/test_proxy.py
+++ /dev/null
@@ -1,327 +0,0 @@
-"""Tests for the capture proxy.
-
-Strategy: stand up a mock upstream Starlette app and wire the proxy's
-internal httpx client to it via ``ASGITransport``. Then drive the
-proxy via Starlette's ``TestClient`` and assert on (a) what bytes the
-agent-side client sees, and (b) what files land on disk in the capture
-dir.
-
-End-to-end network sockets are not used — everything runs in-process.
-"""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-
-import httpx
-import pytest
-from starlette.applications import Starlette
-from starlette.requests import Request
-from starlette.responses import JSONResponse, Response, StreamingResponse
-from starlette.routing import Route
-from starlette.testclient import TestClient
-
-from agentcap.proxy import CHAT_COMPLETIONS_PATH, make_app
-
-
-# ---------------------------------------------------------------------------
-# Mock upstream — a tiny Starlette app that pretends to be an OpenAI-compat
-# model server. Each test parameterises its behaviour by setting attributes
-# on the wrapping ``Holder``.
-# ---------------------------------------------------------------------------
-
-
-class UpstreamSpy:
-    """Records what the proxy forwarded to upstream + lets each test
-    plug in a custom response factory."""
-
-    def __init__(self) -> None:
-        self.received_bodies: list[dict] = []
-        self.received_headers: list[dict] = []
-        self.received_paths: list[str] = []
-        self.responder = None  # async callable: (request) -> Response
-
-    def set_responder(self, fn) -> None:
-        self.responder = fn
-
-
-def _build_upstream(spy: UpstreamSpy) -> Starlette:
-    async def chat_handler(request: Request) -> Response:
-        body = await request.body()
-        try:
-            spy.received_bodies.append(json.loads(body))
-        except json.JSONDecodeError:
-            spy.received_bodies.append({"_unparsed": body.decode("utf-8", errors="replace")})
-        spy.received_headers.append(dict(request.headers))
-        spy.received_paths.append(request.url.path)
-        if spy.responder is None:
-            return JSONResponse({"error": "no responder configured"}, status_code=500)
-        return await spy.responder(request)
-
-    async def models_handler(request: Request) -> Response:
-        spy.received_paths.append(request.url.path)
-        return JSONResponse(
-            {"object": "list", "data": [{"id": "mock-model", "object": "model"}]}
-        )
-
-    async def echo_handler(request: Request) -> Response:
-        spy.received_paths.append(request.url.path)
-        return JSONResponse({"path": request.url.path, "method": request.method})
-
-    return Starlette(
-        routes=[
-            Route(CHAT_COMPLETIONS_PATH, chat_handler, methods=["POST"]),
-            Route("/v1/models", models_handler, methods=["GET"]),
-            Route(
-                "/{anything:path}",
-                echo_handler,
-                methods=["GET", "POST", "PUT", "DELETE"],
-            ),
-        ]
-    )
-
-
-@pytest.fixture
-def spy() -> UpstreamSpy:
-    return UpstreamSpy()
-
-
-@pytest.fixture
-def capture_dir(tmp_path: Path) -> Path:
-    d = tmp_path / "capture"
-    d.mkdir()
-    return d
-
-
-@pytest.fixture
-def proxy_client(spy: UpstreamSpy, capture_dir: Path):
-    """A TestClient hitting the proxy, where the proxy talks to the
-    mock upstream via ASGITransport."""
-    upstream_app = _build_upstream(spy)
-    upstream_transport = httpx.ASGITransport(app=upstream_app)
-    upstream_client = httpx.AsyncClient(
-        transport=upstream_transport, base_url="http://upstream"
-    )
-    proxy_app = make_app("http://upstream", capture_dir, client=upstream_client)
-    with TestClient(proxy_app) as client:
-        yield client
-
-
-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-
-def test_chat_nonstreaming_captures_request_and_response(
-    spy: UpstreamSpy, capture_dir: Path, proxy_client: TestClient
-):
-    async def responder(request):
-        return JSONResponse(
-            {
-                "id": "chatcmpl-test",
-                "object": "chat.completion",
-                "choices": [
-                    {
-                        "index": 0,
-                        "message": {"role": "assistant", "content": "hi back"},
-                        "finish_reason": "stop",
-                    }
-                ],
-            }
-        )
-
-    spy.set_responder(responder)
-    body = {
-        "model": "test-model",
-        "messages": [{"role": "user", "content": "hello"}],
-        "stream": False,
-    }
-    resp = proxy_client.post(CHAT_COMPLETIONS_PATH, json=body)
-    assert resp.status_code == 200
-    assert resp.json()["choices"][0]["message"]["content"] == "hi back"
-
-    # Upstream saw the same body
-    assert spy.received_bodies == [body]
-    assert spy.received_paths == [CHAT_COMPLETIONS_PATH]
-
-    # Trace dir has exactly one request + response pair
-    req_files = sorted(capture_dir.glob("*.request.json"))
-    resp_files = sorted(capture_dir.glob("*.response.json"))
-    assert len(req_files) == 1
-    assert len(resp_files) == 1
-    assert req_files[0].stem.split(".")[0] == resp_files[0].stem.split(".")[0]
-
-    req_record = json.loads(req_files[0].read_text())
-    assert req_record["body"] == body
-    assert "request_id" in req_record
-    assert isinstance(req_record["captured_at"], int)
-
-    resp_record = json.loads(resp_files[0].read_text())
-    assert resp_record["stream"] is False
-    assert resp_record["status_code"] == 200
-    assert resp_record["body"]["choices"][0]["message"]["content"] == "hi back"
-
-
-def test_chat_streaming_forwards_chunks_and_captures_raw(
-    spy: UpstreamSpy, capture_dir: Path, proxy_client: TestClient
-):
-    sse_chunks = [
-        b'data: {"choices":[{"delta":{"role":"assistant"}}]}\n\n',
-        b'data: {"choices":[{"delta":{"content":"hi"}}]}\n\n',
-        b'data: {"choices":[{"delta":{"content":" back"}}]}\n\n',
-        b"data: [DONE]\n\n",
-    ]
-
-    async def responder(request):
-        async def gen():
-            for c in sse_chunks:
-                yield c
-
-        return StreamingResponse(gen(), media_type="text/event-stream")
-
-    spy.set_responder(responder)
-
-    body = {
-        "model": "test-model",
-        "messages": [{"role": "user", "content": "hi"}],
-        "stream": True,
-    }
-    with proxy_client.stream("POST", CHAT_COMPLETIONS_PATH, json=body) as resp:
-        assert resp.status_code == 200
-        received = b"".join(resp.iter_bytes())
-
-    # The agent-side client got the bytes the upstream produced
-    assert received == b"".join(sse_chunks)
-
-    # The capture's response.json captured the assembled stream + status
-    resp_files = list(capture_dir.glob("*.response.json"))
-    assert len(resp_files) == 1
-    record = json.loads(resp_files[0].read_text())
-    assert record["stream"] is True
-    assert record["status_code"] == 200
-    assert record["raw"] == b"".join(sse_chunks).decode("utf-8")
-
-
-def test_passthrough_models_endpoint_is_not_captured(
-    spy: UpstreamSpy, capture_dir: Path, proxy_client: TestClient
-):
-    resp = proxy_client.get("/v1/models")
-    assert resp.status_code == 200
-    assert resp.json() == {
-        "object": "list",
-        "data": [{"id": "mock-model", "object": "model"}],
-    }
-    # Upstream saw the call
-    assert "/v1/models" in spy.received_paths
-    # But nothing was written to the capture dir
-    assert list(capture_dir.iterdir()) == []
-
-
-def test_arbitrary_passthrough_path(
-    spy: UpstreamSpy, capture_dir: Path, proxy_client: TestClient
-):
-    resp = proxy_client.get("/unrelated/path?x=1")
-    assert resp.status_code == 200
-    body = resp.json()
-    assert body["path"] == "/unrelated/path"
-    assert body["method"] == "GET"
-    # Trace dir untouched
-    assert list(capture_dir.iterdir()) == []
-
-
-def test_two_requests_get_distinct_request_ids(
-    spy: UpstreamSpy, capture_dir: Path, proxy_client: TestClient
-):
-    async def responder(request):
-        return JSONResponse({"id": "x", "choices": []})
-
-    spy.set_responder(responder)
-
-    body = {"model": "m", "messages": [{"role": "user", "content": "."}]}
-    proxy_client.post(CHAT_COMPLETIONS_PATH, json=body)
-    proxy_client.post(CHAT_COMPLETIONS_PATH, json=body)
-
-    req_files = sorted(capture_dir.glob("*.request.json"))
-    assert len(req_files) == 2
-    ids = {json.loads(p.read_text())["request_id"] for p in req_files}
-    assert len(ids) == 2  # distinct
-
-
-def test_malformed_request_body_still_captured(
-    spy: UpstreamSpy, capture_dir: Path, proxy_client: TestClient
-):
-    async def responder(request):
-        return JSONResponse({"choices": []})
-
-    spy.set_responder(responder)
-
-    raw = b"{not json"
-    resp = proxy_client.post(
-        CHAT_COMPLETIONS_PATH, content=raw, headers={"content-type": "application/json"}
-    )
-    # Upstream still got the bytes verbatim — we don't sanitise input.
-    # Whether upstream accepts it is upstream's problem; we just relay.
-    assert resp.status_code == 200
-
-    req_files = list(capture_dir.glob("*.request.json"))
-    assert len(req_files) == 1
-    record = json.loads(req_files[0].read_text())
-    # Body is preserved as a placeholder dict instead of crashing
-    assert record["body"] == {"_unparsed_raw": "{not json"}
-
-
-def test_upstream_500_is_forwarded_and_captured(
-    spy: UpstreamSpy, capture_dir: Path, proxy_client: TestClient
-):
-    async def responder(request):
-        return JSONResponse({"error": {"message": "boom"}}, status_code=500)
-
-    spy.set_responder(responder)
-
-    body = {"model": "m", "messages": [{"role": "user", "content": "x"}]}
-    resp = proxy_client.post(CHAT_COMPLETIONS_PATH, json=body)
-    assert resp.status_code == 500
-    assert resp.json()["error"]["message"] == "boom"
-
-    resp_files = list(capture_dir.glob("*.response.json"))
-    assert len(resp_files) == 1
-    record = json.loads(resp_files[0].read_text())
-    assert record["status_code"] == 500
-    assert record["body"]["error"]["message"] == "boom"
-
-
-def test_request_id_is_consistent_across_request_and_response_files(
-    spy: UpstreamSpy, capture_dir: Path, proxy_client: TestClient
-):
-    async def responder(request):
-        return JSONResponse({"choices": []})
-
-    spy.set_responder(responder)
-
-    proxy_client.post(
-        CHAT_COMPLETIONS_PATH,
-        json={"model": "m", "messages": [{"role": "user", "content": "."}]},
-    )
-    req_files = list(capture_dir.glob("*.request.json"))
-    resp_files = list(capture_dir.glob("*.response.json"))
-    assert len(req_files) == 1
-    assert len(resp_files) == 1
-    rid_from_req = json.loads(req_files[0].read_text())["request_id"]
-    rid_from_resp = json.loads(resp_files[0].read_text())["request_id"]
-    assert rid_from_req == rid_from_resp
-    # Filenames also share the prefix
-    assert req_files[0].name.startswith(rid_from_req)
-    assert resp_files[0].name.startswith(rid_from_req)
-
-
-def test_capture_dir_is_created_if_missing(tmp_path: Path, spy: UpstreamSpy):
-    """make_app should create the capture dir on init."""
-    target = tmp_path / "does" / "not" / "exist"
-    upstream_app = _build_upstream(spy)
-    upstream_transport = httpx.ASGITransport(app=upstream_app)
-    upstream_client = httpx.AsyncClient(
-        transport=upstream_transport, base_url="http://upstream"
-    )
-    make_app("http://upstream", target, client=upstream_client)
-    assert target.is_dir()
diff --git a/tests/test_proxy_http.py b/tests/test_proxy_http.py
deleted file mode 100644
index 4bb23be..0000000
--- a/tests/test_proxy_http.py
+++ /dev/null
@@ -1,276 +0,0 @@
-"""Integration tests for the capture proxy — real HTTP over TCP loopback.
-
-Two uvicorn servers run in worker threads:
-  - mock upstream (Starlette app on a free port)
-  - capture proxy (Starlette app on another free port, pointed at upstream)
-
-The test client makes real ``httpx.Client`` HTTP calls to the proxy.
-This catches wiring issues that the in-process ASGITransport unit
-tests in ``test_proxy.py`` would not — header reconstruction, content
-encoding, streaming-chunk pump-through, etc.
-
-Marked as ``integration`` so they can be filtered out with
-``pytest -m 'not integration'`` when iterating on logic.
-"""
-
-from __future__ import annotations
-
-import json
-import socket
-import threading
-import time
-from pathlib import Path
-
-import httpx
-import pytest
-import uvicorn
-from starlette.applications import Starlette
-from starlette.requests import Request
-from starlette.responses import JSONResponse, Response, StreamingResponse
-from starlette.routing import Route
-
-from agentcap.proxy import CHAT_COMPLETIONS_PATH, make_app
-
-
-pytestmark = pytest.mark.integration
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _free_port() -> int:
-    s = socket.socket()
-    s.bind(("127.0.0.1", 0))
-    port = s.getsockname()[1]
-    s.close()
-    return port
-
-
-class UvicornThreadServer:
-    """Run a uvicorn server in a daemon thread and shut it down cleanly."""
-
-    def __init__(self, app, host: str = "127.0.0.1", port: int | None = None):
-        self.host = host
-        self.port = port or _free_port()
-        config = uvicorn.Config(
-            app,
-            host=host,
-            port=self.port,
-            log_level="error",
-            lifespan="on",
-        )
-        self.server = uvicorn.Server(config)
-        # Disable signal handler installation — we're not the main thread.
-        self.server.install_signal_handlers = lambda *_: None
-        self._thread: threading.Thread | None = None
-
-    @property
-    def url(self) -> str:
-        return f"http://{self.host}:{self.port}"
-
-    def start(self, timeout: float = 5.0) -> None:
-        self._thread = threading.Thread(target=self.server.run, daemon=True)
-        self._thread.start()
-        deadline = time.monotonic() + timeout
-        # Poll until the server is accepting connections — server.started
-        # flips to True once uvicorn's serve() has bound the socket.
-        while time.monotonic() < deadline:
-            if self.server.started:
-                # Smoke-check the socket is actually accepting.
-                try:
-                    with socket.create_connection((self.host, self.port), timeout=0.2):
-                        return
-                except OSError:
-                    pass
-            time.sleep(0.05)
-        raise RuntimeError(f"uvicorn server on :{self.port} failed to start in {timeout}s")
-
-    def stop(self, timeout: float = 5.0) -> None:
-        self.server.should_exit = True
-        if self._thread is not None:
-            self._thread.join(timeout=timeout)
-            if self._thread.is_alive():
-                # Best-effort: force-exit. uvicorn's force_exit triggers
-                # the loop to exit immediately on the next iteration.
-                self.server.force_exit = True
-                self._thread.join(timeout=timeout)
-
-
-# ---------------------------------------------------------------------------
-# Mock upstream
-# ---------------------------------------------------------------------------
-
-
-class UpstreamSpy:
-    def __init__(self) -> None:
-        self.received_bodies: list[dict] = []
-        self.received_paths: list[str] = []
-        self.responder = None
-
-    def set_responder(self, fn) -> None:
-        self.responder = fn
-
-
-def _build_upstream(spy: UpstreamSpy) -> Starlette:
-    async def chat_handler(request: Request) -> Response:
-        body = await request.body()
-        try:
-            spy.received_bodies.append(json.loads(body))
-        except json.JSONDecodeError:
-            spy.received_bodies.append({"_raw": body.decode("utf-8", errors="replace")})
-        spy.received_paths.append(request.url.path)
-        if spy.responder is None:
-            return JSONResponse({"error": "no responder"}, status_code=500)
-        return await spy.responder(request)
-
-    async def models_handler(request: Request) -> Response:
-        spy.received_paths.append(request.url.path)
-        return JSONResponse(
-            {"object": "list", "data": [{"id": "real-mock", "object": "model"}]}
-        )
-
-    return Starlette(
-        routes=[
-            Route(CHAT_COMPLETIONS_PATH, chat_handler, methods=["POST"]),
-            Route("/v1/models", models_handler, methods=["GET"]),
-        ]
-    )
-
-
-# ---------------------------------------------------------------------------
-# Fixtures — two real uvicorn servers + a clean capture dir per test
-# ---------------------------------------------------------------------------
-
-
-@pytest.fixture
-def spy() -> UpstreamSpy:
-    return UpstreamSpy()
-
-
-@pytest.fixture
-def capture_dir(tmp_path: Path) -> Path:
-    d = tmp_path / "capture"
-    d.mkdir()
-    return d
-
-
-@pytest.fixture
-def upstream(spy: UpstreamSpy):
-    server = UvicornThreadServer(_build_upstream(spy))
-    server.start()
-    yield server
-    server.stop()
-
-
-@pytest.fixture
-def proxy(upstream: UvicornThreadServer, capture_dir: Path):
-    # Real proxy → real upstream URL. No client injection: the proxy
-    # creates its own httpx.AsyncClient and dials over TCP loopback.
-    proxy_app = make_app(upstream.url, capture_dir)
-    server = UvicornThreadServer(proxy_app)
-    server.start()
-    yield server
-    server.stop()
-
-
-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-
-def test_chat_nonstreaming_over_real_http(
-    spy: UpstreamSpy, capture_dir: Path, proxy: UvicornThreadServer
-):
-    async def responder(request):
-        return JSONResponse(
-            {
-                "id": "chatcmpl-http-1",
-                "choices": [
-                    {"index": 0, "message": {"role": "assistant", "content": "ok"}}
-                ],
-            }
-        )
-
-    spy.set_responder(responder)
-    body = {
-        "model": "test-model",
-        "messages": [{"role": "user", "content": "ping"}],
-        "stream": False,
-    }
-    with httpx.Client(timeout=10.0) as client:
-        resp = client.post(f"{proxy.url}{CHAT_COMPLETIONS_PATH}", json=body)
-    assert resp.status_code == 200
-    assert resp.json()["choices"][0]["message"]["content"] == "ok"
-
-    # Upstream saw the body verbatim
-    assert spy.received_bodies == [body]
-
-    # Trace dir got both files
-    req_files = list(capture_dir.glob("*.request.json"))
-    resp_files = list(capture_dir.glob("*.response.json"))
-    assert len(req_files) == 1 and len(resp_files) == 1
-    assert json.loads(req_files[0].read_text())["body"] == body
-    assert (
-        json.loads(resp_files[0].read_text())["body"]["choices"][0]["message"]["content"]
-        == "ok"
-    )
-
-
-def test_chat_streaming_over_real_http(
-    spy: UpstreamSpy, capture_dir: Path, proxy: UvicornThreadServer
-):
-    sse_chunks = [
-        b'data: {"choices":[{"delta":{"role":"assistant"}}]}\n\n',
-        b'data: {"choices":[{"delta":{"content":"hello"}}]}\n\n',
-        b'data: {"choices":[{"delta":{"content":" world"}}]}\n\n',
-        b"data: [DONE]\n\n",
-    ]
-
-    async def responder(request):
-        async def gen():
-            for c in sse_chunks:
-                yield c
-
-        return StreamingResponse(gen(), media_type="text/event-stream")
-
-    spy.set_responder(responder)
-    body = {
-        "model": "test-model",
-        "messages": [{"role": "user", "content": "stream me"}],
-        "stream": True,
-    }
-    received = bytearray()
-    with httpx.Client(timeout=10.0) as client:
-        with client.stream(
-            "POST", f"{proxy.url}{CHAT_COMPLETIONS_PATH}", json=body
-        ) as resp:
-            assert resp.status_code == 200
-            for chunk in resp.iter_bytes():
-                received.extend(chunk)
-
-    expected = b"".join(sse_chunks)
-    assert bytes(received) == expected
-
-    resp_files = list(capture_dir.glob("*.response.json"))
-    assert len(resp_files) == 1
-    record = json.loads(resp_files[0].read_text())
-    assert record["stream"] is True
-    assert record["status_code"] == 200
-    assert record["raw"] == expected.decode("utf-8")
-
-
-def test_passthrough_over_real_http_does_not_capture(
-    spy: UpstreamSpy, capture_dir: Path, proxy: UvicornThreadServer
-):
-    with httpx.Client(timeout=10.0) as client:
-        resp = client.get(f"{proxy.url}/v1/models")
-    assert resp.status_code == 200
-    assert resp.json() == {
-        "object": "list",
-        "data": [{"id": "real-mock", "object": "model"}],
-    }
-    # Upstream got the call, capture dir untouched
-    assert "/v1/models" in spy.received_paths
-    assert list(capture_dir.iterdir()) == []
diff --git a/tests/test_proxy_meta.py b/tests/test_proxy_meta.py
deleted file mode 100644
index cede408..0000000
--- a/tests/test_proxy_meta.py
+++ /dev/null
@@ -1,250 +0,0 @@
-"""Tests for the proxy's per-request stamping: ``upstream_url`` on
-captured requests and ``upstream_fingerprint`` on captured responses.
-
-The proxy keeps no metadata file, no startup probe, no drift state —
-those concerns moved to the export layer, derived from the per-row
-stamps tested here.
-
-Uses the same in-process ASGI wiring as ``test_proxy.py`` — proxy
-talks to a Starlette mock upstream through an ``httpx.AsyncClient``
-backed by ``ASGITransport``.
-"""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-
-import httpx
-import pytest
-from starlette.applications import Starlette
-from starlette.requests import Request
-from starlette.responses import JSONResponse, Response, StreamingResponse
-from starlette.routing import Route
-from starlette.testclient import TestClient
-
-from agentcap.proxy import CHAT_COMPLETIONS_PATH, make_app
-
-
-class UpstreamSpy:
-    def __init__(self) -> None:
-        self.responder = None
-
-    def set_responder(self, fn) -> None:
-        self.responder = fn
-
-
-def _build_upstream(spy: UpstreamSpy) -> Starlette:
-    async def chat_handler(request: Request) -> Response:
-        if spy.responder is None:
-            return JSONResponse({"error": "no responder"}, status_code=500)
-        return await spy.responder(request)
-
-    return Starlette(
-        routes=[Route(CHAT_COMPLETIONS_PATH, chat_handler, methods=["POST"])]
-    )
-
-
-@pytest.fixture
-def capture_dir(tmp_path: Path) -> Path:
-    d = tmp_path / "capture"
-    d.mkdir()
-    return d
-
-
-@pytest.fixture
-def spy() -> UpstreamSpy:
-    return UpstreamSpy()
-
-
-@pytest.fixture
-def proxy_app(spy: UpstreamSpy, capture_dir: Path):
-    upstream_transport = httpx.ASGITransport(app=_build_upstream(spy))
-    upstream_client = httpx.AsyncClient(
-        transport=upstream_transport, base_url="http://upstream"
-    )
-    return make_app("http://upstream", capture_dir, client=upstream_client)
-
-
-# ---------------------------------------------------------------------------
-# Per-request: upstream_url stamping
-# ---------------------------------------------------------------------------
-
-
-def test_request_stamps_upstream_url(
-    spy: UpstreamSpy, capture_dir: Path, proxy_app
-):
-    """Every ``.request.json`` carries the URL the proxy was forwarding
-    to. Export derives the provider classification from this stamp
-    alone — no sidecar metadata file involved."""
-    async def responder(request):
-        return JSONResponse({"id": "x", "model": "m", "choices": []})
-
-    spy.set_responder(responder)
-    with TestClient(proxy_app) as client:
-        client.post(
-            CHAT_COMPLETIONS_PATH,
-            json={"model": "m", "messages": [{"role": "user", "content": "."}]},
-        )
-
-    req_files = list(capture_dir.glob("*.request.json"))
-    assert len(req_files) == 1
-    rec = json.loads(req_files[0].read_text())
-    assert rec["upstream_url"] == "http://upstream"
-
-
-def test_no_metadata_file_written(
-    spy: UpstreamSpy, capture_dir: Path, proxy_app
-):
-    """The capture dir contains only per-request capture files — no
-    ``_proxy.json``, no ``_meta.json``, nothing else."""
-    async def responder(request):
-        return JSONResponse({"id": "x", "model": "m", "choices": []})
-
-    spy.set_responder(responder)
-    with TestClient(proxy_app) as client:
-        client.post(
-            CHAT_COMPLETIONS_PATH,
-            json={"model": "m", "messages": [{"role": "user", "content": "."}]},
-        )
-
-    names = sorted(p.name for p in capture_dir.iterdir())
-    # One .request.json + one .response.json, nothing else.
-    assert len(names) == 2
-    assert all(
-        n.endswith(".request.json") or n.endswith(".response.json")
-        for n in names
-    )
-
-
-# ---------------------------------------------------------------------------
-# Per-response: upstream_fingerprint stamping
-# ---------------------------------------------------------------------------
-
-
-def test_response_fingerprint_extracted_from_upstream_headers(
-    spy: UpstreamSpy, capture_dir: Path, proxy_app
-):
-    async def responder(request):
-        return JSONResponse(
-            {
-                "id": "x",
-                "model": "qwen-actually-served",
-                "choices": [{"index": 0, "message": {"role": "assistant", "content": "hi"}}],
-            },
-            headers={"server": "llama.cpp", "x-served-by": "fireworks-pod-7"},
-        )
-
-    spy.set_responder(responder)
-    with TestClient(proxy_app) as client:
-        resp = client.post(
-            CHAT_COMPLETIONS_PATH,
-            json={"model": "m", "messages": [{"role": "user", "content": "."}]},
-        )
-        assert resp.status_code == 200
-
-    resp_files = list(capture_dir.glob("*.response.json"))
-    assert len(resp_files) == 1
-    rec = json.loads(resp_files[0].read_text())
-    fp = rec["upstream_fingerprint"]
-    assert fp["server"] == "llama.cpp"
-    assert fp["x_served_by"] == "fireworks-pod-7"
-    assert fp["served_model"] == "qwen-actually-served"
-    assert fp["build_info"] is None  # not echoed on this response
-
-
-def test_streaming_response_fingerprint_picks_model_from_first_chunk(
-    spy: UpstreamSpy, capture_dir: Path, proxy_app
-):
-    """For SSE responses the body isn't a single dict; extract ``model``
-    from the first parseable ``data:`` payload."""
-    sse_chunks = [
-        b'data: {"id":"x","model":"qwen-served","choices":[{"delta":{"role":"assistant"}}]}\n\n',
-        b'data: {"id":"x","model":"qwen-served","choices":[{"delta":{"content":"hi"}}]}\n\n',
-        b"data: [DONE]\n\n",
-    ]
-
-    async def responder(request):
-        async def gen():
-            for c in sse_chunks:
-                yield c
-        return StreamingResponse(
-            gen(),
-            media_type="text/event-stream",
-            headers={"server": "llama.cpp"},
-        )
-
-    spy.set_responder(responder)
-    with TestClient(proxy_app) as client:
-        with client.stream(
-            "POST",
-            CHAT_COMPLETIONS_PATH,
-            json={"model": "m", "stream": True, "messages": [{"role": "user", "content": "."}]},
-        ) as resp:
-            for _ in resp.iter_bytes():
-                pass
-
-    rec = json.loads(next(capture_dir.glob("*.response.json")).read_text())
-    assert rec["stream"] is True
-    assert rec["upstream_fingerprint"]["served_model"] == "qwen-served"
-    assert rec["upstream_fingerprint"]["server"] == "llama.cpp"
-
-
-# ---------------------------------------------------------------------------
-# Export-side provider derivation from the per-request stamp
-# ---------------------------------------------------------------------------
-
-
-def test_detect_provider_columns_derives_from_request_stamp(tmp_path: Path):
-    from agentcap.export import detect_provider_columns
-
-    capture = tmp_path / "t"
-    capture.mkdir()
-    (capture / "rid.request.json").write_text(json.dumps({
-        "request_id": "rid",
-        "captured_at": 1,
-        "upstream_url": "https://router.huggingface.co",
-        "body": {"model": "meta-llama/Llama-3.3-70B:fireworks-ai", "messages": []},
-    }))
-    cols = detect_provider_columns(capture)
-    assert cols["upstream_url"] == "https://router.huggingface.co"
-    assert cols["provider"] == "hf-router/fireworks-ai"
-
-
-def test_detect_provider_columns_local_upstream(tmp_path: Path):
-    from agentcap.export import detect_provider_columns
-
-    capture = tmp_path / "t"
-    capture.mkdir()
-    (capture / "rid.request.json").write_text(json.dumps({
-        "request_id": "rid",
-        "captured_at": 1,
-        "upstream_url": "http://127.0.0.1:8000",
-        "body": {"model": "qwen-test", "messages": []},
-    }))
-    cols = detect_provider_columns(capture)
-    assert cols["provider"] == "local"
-
-
-def test_detect_provider_columns_empty_for_legacy_capture(tmp_path: Path):
-    """Trace dir from before the proxy started stamping upstream_url —
-    no way to derive the column, return empty so the parquet schema
-    just omits it."""
-    from agentcap.export import detect_provider_columns
-
-    capture = tmp_path / "t"
-    capture.mkdir()
-    (capture / "rid.request.json").write_text(json.dumps({
-        "request_id": "rid",
-        "captured_at": 1,
-        "body": {"model": "m", "messages": []},
-    }))
-    assert detect_provider_columns(capture) == {}
-
-
-def test_detect_provider_columns_no_requests(tmp_path: Path):
-    from agentcap.export import detect_provider_columns
-
-    capture = tmp_path / "t"
-    capture.mkdir()
-    assert detect_provider_columns(capture) == {}
diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py
deleted file mode 100644
index a0f70ae..0000000
--- a/tests/test_sandbox.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""Tests for the sandbox abstraction."""
-
-from __future__ import annotations
-
-import pytest
-
-from agentcap.sandbox import get_sandbox
-from agentcap.sandbox.podman import PodmanSandbox
-
-
-def test_get_sandbox_returns_podman_sandbox():
-    """The factory hands back a ``PodmanSandbox`` keyed on the
-    canonical per-agent image ref."""
-    sb = get_sandbox(agent="goose")
-    assert isinstance(sb, PodmanSandbox)
-    assert sb.image == "localhost/agentcap-goose:latest"
-
-
-def test_get_sandbox_requires_agent():
-    with pytest.raises(TypeError):
-        get_sandbox()  # type: ignore[call-arg]
diff --git a/tests/test_scan.py b/tests/test_scan.py
deleted file mode 100644
index da896e3..0000000
--- a/tests/test_scan.py
+++ /dev/null
@@ -1,251 +0,0 @@
-"""Unit tests for ``agentcap.scan``.
-
-These exercise the real trufflehog binary (skipped when not
-installed). Cache behaviour is checked by calling ``scan_run_dir``
-twice and inspecting the returned ``was_cached`` flag + the
-persisted ``scan.json`` — no monkeypatching of ``scan_path`` itself,
-so the tests fail if the cache short-circuit regresses.
-
-Missing-binary errors are exercised by manipulating ``PATH`` /
-``HOME`` (legitimate inputs to ``find_trufflehog``), not by
-mocking ``shutil.which``.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import shutil
-from pathlib import Path
-
-import pytest
-
-from agentcap.scan import (
-    SCAN_CACHE_NAME,
-    TrufflehogMissingError,
-    find_trufflehog,
-    load_cached_scan,
-    scan_path,
-    scan_run_dir,
-)
-
-
-def _has_trufflehog() -> bool:
-    if shutil.which("trufflehog"):
-        return True
-    local = Path.home() / ".local" / "bin" / "trufflehog"
-    return local.is_file() and os.access(local, os.X_OK)
-
-
-_HAS_TRUFFLEHOG = _has_trufflehog()
-
-
-live = pytest.mark.skipif(
-    not _HAS_TRUFFLEHOG, reason="trufflehog binary not installed"
-)
-
-
-def _make_run_dir(root: Path, *, with_poisoned_capture: bool = False) -> Path:
-    """Minimal run layout with only ``captures/`` populated, so each
-    test controls exactly which subdir trufflehog has to scan."""
-    run_dir = root / "agent-local-20260601-000000"
-    captures = run_dir / "captures"
-    captures.mkdir(parents=True)
-    body = '{"request_id":"rid","captured_at":1,"body":{"model":"m","messages":[]}}'
-    (captures / "rid.request.json").write_text(body)
-    if with_poisoned_capture:
-        # Stripe doc-example key — pattern-matches the Stripe detector,
-        # never verifies against the live API.
-        (captures / "poisoned.request.json").write_text(
-            '{"messages":[{"role":"user","content":"sk_live_4eC39HqLyjWDarjtT1zdp7dc"}]}'
-        )
-    return run_dir
-
-
-# ---------------------------------------------------------------------------
-# find_trufflehog — PATH / HOME-driven, no mocking
-# ---------------------------------------------------------------------------
-
-
-def test_find_trufflehog_raises_when_path_and_home_are_empty(
-    monkeypatch, tmp_path,
-):
-    """Both PATH lookup and ~/.local/bin fallback miss → install hint."""
-    monkeypatch.setenv("PATH", str(tmp_path))  # nothing in this dir
-    monkeypatch.setenv("HOME", str(tmp_path))  # no .local/bin/trufflehog under HOME
-    with pytest.raises(TrufflehogMissingError) as exc:
-        find_trufflehog()
-    assert "install.sh" in str(exc.value)
-
-
-def test_find_trufflehog_falls_back_to_local_bin(monkeypatch, tmp_path):
-    """No PATH hit → ~/.local/bin/trufflehog wins."""
-    fake = tmp_path / ".local" / "bin" / "trufflehog"
-    fake.parent.mkdir(parents=True)
-    fake.write_text("#!/bin/sh\n")
-    fake.chmod(0o755)
-    monkeypatch.setenv("PATH", str(tmp_path))  # empty
-    monkeypatch.setenv("HOME", str(tmp_path))
-    assert find_trufflehog() == str(fake)
-
-
-@live
-def test_find_trufflehog_finds_installed_binary():
-    """The real installed binary is locatable."""
-    bin_path = find_trufflehog()
-    assert os.path.basename(bin_path) == "trufflehog"
-    assert os.access(bin_path, os.X_OK)
-
-
-# ---------------------------------------------------------------------------
-# scan_path — runs the real binary
-# ---------------------------------------------------------------------------
-
-
-@live
-def test_scan_path_clean_dir_no_hits(tmp_path):
-    (tmp_path / "f.json").write_text('{"model": "m", "messages": []}\n')
-    result = scan_path(tmp_path, no_verification=True)
-    assert result.verified == []
-    assert result.unverified == []
-    assert result.chunks_scanned >= 1
-
-
-@live
-def test_scan_path_detects_unverified_stripe_pattern(tmp_path):
-    """A canned Stripe-shaped string trips the Stripe detector.
-    With ``no_verification=True`` Stripe's docs example lands as
-    unverified — we don't call the live API."""
-    (tmp_path / "poisoned.json").write_text(
-        '{"messages":[{"role":"user","content":"sk_live_4eC39HqLyjWDarjtT1zdp7dc"}]}'
-    )
-    result = scan_path(tmp_path, no_verification=True)
-    assert result.verified == []
-    assert len(result.unverified) >= 1
-    assert result.unverified[0].detector.lower() == "stripe"
-    assert result.unverified[0].file.endswith("poisoned.json")
-
-
-# ---------------------------------------------------------------------------
-# scan_run_dir — cache write + reuse + mode mismatch (real binary)
-# ---------------------------------------------------------------------------
-
-
-@live
-def test_scan_run_dir_writes_cache_on_first_call(tmp_path):
-    run_dir = _make_run_dir(tmp_path)
-    result, was_cached = scan_run_dir(run_dir, no_verification=True)
-    assert was_cached is False
-    cache_path = run_dir / SCAN_CACHE_NAME
-    assert cache_path.is_file(), "scan.json should be written on first call"
-    cache = json.loads(cache_path.read_text())
-    assert cache["no_verification"] is True
-    assert cache["chunks_scanned"] == result.chunks_scanned
-    assert cache["bytes_scanned"] == result.bytes_scanned
-
-
-@live
-def test_scan_run_dir_reuses_cache_on_second_call(tmp_path):
-    """Second call in the same mode short-circuits — no trufflehog
-    invocation. Verified by stat-ing the cache mtime: a fresh scan
-    would rewrite it."""
-    run_dir = _make_run_dir(tmp_path)
-    scan_run_dir(run_dir, no_verification=True)
-    mtime_after_first = (run_dir / SCAN_CACHE_NAME).stat().st_mtime
-
-    _, was_cached = scan_run_dir(run_dir, no_verification=True)
-    assert was_cached is True
-    assert (run_dir / SCAN_CACHE_NAME).stat().st_mtime == mtime_after_first
-
-
-@live
-def test_scan_run_dir_rescan_forces_fresh_subprocess(tmp_path):
-    run_dir = _make_run_dir(tmp_path)
-    scan_run_dir(run_dir, no_verification=True)
-
-    # mtime resolution on linux can be coarse — overwrite the cache
-    # with a sentinel so a successful re-scan is observable by the
-    # cache content changing back to a real ScanResult.
-    (run_dir / SCAN_CACHE_NAME).write_text("{}")
-
-    _, was_cached = scan_run_dir(run_dir, no_verification=True, rescan=True)
-    assert was_cached is False
-    # Cache rewritten with a real ScanResult, not the sentinel ``{}``.
-    refreshed = json.loads((run_dir / SCAN_CACHE_NAME).read_text())
-    assert "chunks_scanned" in refreshed
-    assert refreshed["chunks_scanned"] >= 1
-
-
-@live
-def test_scan_run_dir_rescans_when_verify_request_meets_pattern_only_cache(
-    tmp_path,
-):
-    """Pattern-only cache can't satisfy a verified request — re-scan."""
-    run_dir = _make_run_dir(tmp_path)
-    scan_run_dir(run_dir, no_verification=True)  # pattern-only cache
-    _, was_cached = scan_run_dir(run_dir, no_verification=False)
-    assert was_cached is False
-    # Cache file now records the new (verified) mode.
-    cache = json.loads((run_dir / SCAN_CACHE_NAME).read_text())
-    assert cache["no_verification"] is False
-
-
-@live
-def test_scan_run_dir_verified_cache_satisfies_pattern_request(tmp_path):
-    """Verified cache covers a pattern-only request (patterns are a
-    subset of what verification ran on)."""
-    run_dir = _make_run_dir(tmp_path)
-    scan_run_dir(run_dir, no_verification=False)  # verified cache
-    _, was_cached = scan_run_dir(run_dir, no_verification=True)
-    assert was_cached is True
-
-
-@live
-def test_scan_run_dir_finds_hits_in_captures(tmp_path):
-    run_dir = _make_run_dir(tmp_path, with_poisoned_capture=True)
-    result, _ = scan_run_dir(run_dir, no_verification=True)
-    assert result.verified == []
-    assert any(h.detector.lower() == "stripe" for h in result.unverified)
-
-
-@live
-def test_scan_run_dir_excludes_self_cache(tmp_path):
-    """scan.json itself must never be in the scan corpus — that would
-    inflate chunk counts on every re-scan."""
-    run_dir = _make_run_dir(tmp_path)
-    first, _ = scan_run_dir(run_dir, no_verification=True)
-    second, _ = scan_run_dir(run_dir, no_verification=True, rescan=True)
-    assert first.chunks_scanned == second.chunks_scanned
-    assert first.bytes_scanned == second.bytes_scanned
-
-
-# ---------------------------------------------------------------------------
-# load_cached_scan — pure file IO, no binary needed
-# ---------------------------------------------------------------------------
-
-
-def test_load_cached_scan_returns_none_when_missing(tmp_path):
-    assert load_cached_scan(tmp_path, no_verification=False) is None
-
-
-def test_load_cached_scan_returns_none_on_invalid_json(tmp_path):
-    (tmp_path / SCAN_CACHE_NAME).write_text("not json at all")
-    assert load_cached_scan(tmp_path, no_verification=False) is None
-
-
-def test_load_cached_scan_rejects_pattern_only_when_verify_requested(
-    tmp_path,
-):
-    (tmp_path / SCAN_CACHE_NAME).write_text(json.dumps({
-        "scanned_at": 0,
-        "no_verification": True,
-        "bytes_scanned": 0,
-        "chunks_scanned": 0,
-        "verified": [],
-        "unverified": [],
-    }))
-    # Pattern-only cache; caller wants verified → cache is not enough.
-    assert load_cached_scan(tmp_path, no_verification=False) is None
-    # Same cache satisfies a pattern-only request.
-    result = load_cached_scan(tmp_path, no_verification=True)
-    assert result is not None

From e8e28ae983a73bc4624d30300c048339221be029 Mon Sep 17 00:00:00 2001
From: David Corvoysier <david@huggingface.co>
Date: Thu, 25 Jun 2026 15:31:12 +0000
Subject: [PATCH 2/2] chore: drop Python references from doc comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the Python package gone, remove the "Ports `*.py`" provenance and the
"matches the Python" notes from the Rust module docs — comments should describe
the code as it stands. Kept the references to the external pyarrow / `datasets`
ecosystem (consumer-side parquet compatibility), which remain accurate.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/captures.rs              | 3 +--
 src/diff.rs                  | 6 ++----
 src/drivers/goose.rs         | 4 ++--
 src/drivers/hermes.rs        | 4 ++--
 src/drivers/mod.rs           | 2 +-
 src/drivers/opencode.rs      | 2 +-
 src/drivers/pi.rs            | 2 +-
 src/export.rs                | 2 +-
 src/followups/mod.rs         | 2 +-
 src/followups/synthesized.rs | 2 +-
 src/hub/footer.rs            | 4 ++--
 src/inspect/mod.rs           | 4 ++--
 src/inspect/render.rs        | 9 ++++-----
 src/inspect/sources.rs       | 6 ++----
 src/ls.rs                    | 2 +-
 src/model.rs                 | 6 +++---
 src/orchestrator.rs          | 4 ++--
 src/parquet_io.rs            | 3 +--
 src/proxy/capture.rs         | 3 +--
 src/proxy/mod.rs             | 2 +-
 src/query.rs                 | 2 +-
 src/run.rs                   | 2 +-
 src/sandbox/mod.rs           | 4 ++--
 src/sandbox/provisioning.rs  | 6 +++---
 src/scan.rs                  | 4 ++--
 src/sse.rs                   | 4 ++--
 tests/cross_impl.rs          | 2 +-
 tests/live.rs                | 2 +-
 28 files changed, 45 insertions(+), 53 deletions(-)

diff --git a/src/captures.rs b/src/captures.rs
index 037fe60..53191ac 100644
--- a/src/captures.rs
+++ b/src/captures.rs
@@ -1,8 +1,7 @@
 //! Resolve a captured request by id and hand back the body.
 //!
 //! No normalization of the JSON — captures persist the request as parsed JSON,
-//! so the original byte sequence isn't recoverable, but the object is. Ports
-//! `captures.py` (`load_request(s)`, `resolve_workspace_rid`).
+//! so the original byte sequence isn't recoverable, but the object is.
 
 use std::collections::{HashMap, HashSet};
 use std::path::{Path, PathBuf};
diff --git a/src/diff.rs b/src/diff.rs
index 85cd4b8..aa0269b 100644
--- a/src/diff.rs
+++ b/src/diff.rs
@@ -1,6 +1,4 @@
-//! Message diffing + one-line summaries for the inspect picker. Ports
-//! `_message_key` / `_diff_messages` / `_delta_label` / `_message_text` /
-//! `_message_summary` / `_flatten` from the Python.
+//! Message diffing + one-line summaries for the inspect picker.
 
 use crate::model::canonical_json;
 use serde_json::Value;
@@ -37,7 +35,7 @@ pub fn message_key(m: &Value) -> MessageKey {
     }
 }
 
-/// Python truthiness for the values we test: non-empty arrays/strings/objects,
+/// Truthiness for the values we test: non-empty arrays/strings/objects,
 /// non-zero numbers, `true`. `null`/`false`/empty are falsy.
 fn is_truthy(v: &Value) -> bool {
     match v {
diff --git a/src/drivers/goose.rs b/src/drivers/goose.rs
index 44d50f5..8f4fd5b 100644
--- a/src/drivers/goose.rs
+++ b/src/drivers/goose.rs
@@ -1,5 +1,5 @@
-//! Goose driver: `goose run -t "<prompt>"`. Ports `drivers/goose.py`. The proxy
-//! URL + provider are baked into the image ENV; the driver sets `GOOSE_MODEL`.
+//! Goose driver: `goose run -t "<prompt>"`. The proxy URL + provider are baked
+//! into the image ENV; the driver sets `GOOSE_MODEL`.
 
 use std::collections::BTreeMap;
 use std::sync::Arc;
diff --git a/src/drivers/hermes.rs b/src/drivers/hermes.rs
index 8c0b7ae..69d3ce7 100644
--- a/src/drivers/hermes.rs
+++ b/src/drivers/hermes.rs
@@ -1,5 +1,5 @@
-//! Hermes driver: `hermes chat -q "<prompt>"` non-interactively. Ports
-//! `drivers/hermes.py`. The proxy URL + config are baked into the image.
+//! Hermes driver: `hermes chat -q "<prompt>"` non-interactively. The proxy URL
+//! + config are baked into the image.
 
 use std::collections::BTreeMap;
 use std::sync::{Arc, OnceLock};
diff --git a/src/drivers/mod.rs b/src/drivers/mod.rs
index a94bdcf..50f6d40 100644
--- a/src/drivers/mod.rs
+++ b/src/drivers/mod.rs
@@ -1,4 +1,4 @@
-//! Agent driver adapters. Ports `drivers/__init__.py` + the four agent modules.
+//! Agent driver adapters.
 //!
 //! A driver wraps an agent CLI so the orchestrator can `start` a session,
 //! `resume` it, and read back the final response text. Drivers shell out via the
diff --git a/src/drivers/opencode.rs b/src/drivers/opencode.rs
index 3812b61..e211d93 100644
--- a/src/drivers/opencode.rs
+++ b/src/drivers/opencode.rs
@@ -1,4 +1,4 @@
-//! OpenCode driver: `opencode run --format json`. Ports `drivers/opencode.py`.
+//! OpenCode driver: `opencode run --format json`.
 //! OpenCode emits NDJSON events on stdout: `text` events carry assistant chunks,
 //! every event has `sessionID`, `tool_use` events carry error states.
 
diff --git a/src/drivers/pi.rs b/src/drivers/pi.rs
index 1a90c46..a160569 100644
--- a/src/drivers/pi.rs
+++ b/src/drivers/pi.rs
@@ -1,4 +1,4 @@
-//! pi-mono driver: `pi -p "<prompt>" --provider local`. Ports `drivers/pi.py`.
+//! pi-mono driver: `pi -p "<prompt>" --provider local`.
 //! pi mints its own session UUID on start and resumes the latest via `--continue`.
 
 use std::collections::BTreeMap;
diff --git a/src/export.rs b/src/export.rs
index b117f65..a31f509 100644
--- a/src/export.rs
+++ b/src/export.rs
@@ -1,6 +1,6 @@
 //! `export`: render capture dirs to parquet and push to the Hub.
 //!
-//! Ports `export.py` + the `export_cmd` orchestration in `__main__.py`. Three
+//! Three
 //! artifacts per push: `<owner>/<base>-captures` (parquet), one
 //! `<owner>/<base>-<agent>-traces` per agent (raw native traces), and a
 //! Collection titled `<base>` grouping them. The trufflehog gate (verified hits
diff --git a/src/followups/mod.rs b/src/followups/mod.rs
index deef85d..9576eb9 100644
--- a/src/followups/mod.rs
+++ b/src/followups/mod.rs
@@ -1,4 +1,4 @@
-//! Follow-up strategies for multi-turn runs. Ports `followups/*`.
+//! Follow-up strategies for multi-turn runs.
 //!
 //! `turn` is the 1-indexed number of the upcoming turn (first follow-up is
 //! `turn=2`). `continue` is cheapest; `templates` rotates a small pool;
diff --git a/src/followups/synthesized.rs b/src/followups/synthesized.rs
index 67e1eaf..3684533 100644
--- a/src/followups/synthesized.rs
+++ b/src/followups/synthesized.rs
@@ -1,5 +1,5 @@
 //! Synthesized follow-up: sends `(original_task, last_response)` to a model and
-//! uses the reply as the next user message. Ports `followups/synthesized.py`.
+//! uses the reply as the next user message.
 //!
 //! By design this call **bypasses the capture proxy** — it talks to the model
 //! server directly so the captured corpus stays a clean record of agent↔model
diff --git a/src/hub/footer.rs b/src/hub/footer.rs
index 762f5d3..1cc436e 100644
--- a/src/hub/footer.rs
+++ b/src/hub/footer.rs
@@ -3,8 +3,8 @@
 //!
 //! Our writer stamps `agent`/`model`/`tasks` into the parquet key-value
 //! metadata, so a footer read surfaces the full preview slice. (Parquets written
-//! by the Python pyarrow path keep those under the embedded `ARROW:schema` blob
-//! instead; those show only the row count here until selected.)
+//! by pyarrow keep those under the embedded `ARROW:schema` blob instead; those
+//! show only the row count here until selected.)
 
 use anyhow::{bail, Result};
 use parquet::errors::ParquetError;
diff --git a/src/inspect/mod.rs b/src/inspect/mod.rs
index 21b22ef..8d0730d 100644
--- a/src/inspect/mod.rs
+++ b/src/inspect/mod.rs
@@ -1,5 +1,5 @@
 //! `inspect`: classify the TARGET and launch the picker (or dump a request body
-//! for a bare hex rid). Ports `_classify_target` + `inspect_cmd`.
+//! for a bare hex rid).
 
 mod app;
 mod render;
@@ -111,7 +111,7 @@ fn dump_rid(rid: &str, rid_flag: bool) -> Result<()> {
     Ok(())
 }
 
-/// Classify the TARGET positional. Ports `_classify_target`.
+/// Classify the TARGET positional.
 fn classify_target(target: Option<&str>) -> Result<Target> {
     let Some(target) = target else {
         return Ok(Target::Workspace(std::env::current_dir()?.join(WORKSPACE_DIR)));
diff --git a/src/inspect/render.rs b/src/inspect/render.rs
index 9aa3941..a869df2 100644
--- a/src/inspect/render.rs
+++ b/src/inspect/render.rs
@@ -1,7 +1,6 @@
 //! Preview-pane rendering for inspect: build `ratatui::Text` for a run, a
 //! request (header + prompt + message diff), a flattened message, and an HF
-//! parquet entry. Query terms are highlighted (bold red), replacing the Python
-//! `_highlight` ANSI pipeline.
+//! parquet entry. Query terms are highlighted (bold red).
 
 use ratatui::style::{Color, Modifier, Style};
 use ratatui::text::{Line, Span, Text};
@@ -13,7 +12,7 @@ use super::sources::{MsgRecord, ReqRow, RunRow};
 
 const ARGS_CAP: usize = 240;
 
-/// Run metadata preview (run picker). Ports `_run_preview_cmd`.
+/// Run metadata preview (run picker).
 pub fn run_preview(run: &RunRow) -> Text<'static> {
     let meta: Value = std::fs::read(run.run_dir.join("run.json"))
         .ok()
@@ -75,7 +74,7 @@ pub fn hf_parquet_preview(
     Text::from(lines)
 }
 
-/// Request preview: header + initial prompt + message diff. Ports `_preview_cmd`.
+/// Request preview: header + initial prompt + message diff.
 pub fn request_preview(row: &ReqRow, body: &Value, prev_body: Option<&Value>, terms: &[String]) -> Text<'static> {
     let messages = body
         .get("messages")
@@ -157,7 +156,7 @@ pub fn request_preview(row: &ReqRow, body: &Value, prev_body: Option<&Value>, te
     Text::from(lines)
 }
 
-/// One flattened message detail (message picker). Ports `_render_msg_preview`.
+/// One flattened message detail (message picker).
 pub fn message_preview(rec: &MsgRecord, terms: &[String]) -> Text<'static> {
     let mut lines = vec![plain(format!("role:         {}", rec.role))];
     match rec.msg_idx {
diff --git a/src/inspect/sources.rs b/src/inspect/sources.rs
index 6782005..0fc1d16 100644
--- a/src/inspect/sources.rs
+++ b/src/inspect/sources.rs
@@ -1,8 +1,6 @@
 //! Data layer for inspect: enumerate runs / requests / messages from a
 //! workspace or a parquet, and load request/response bodies for the preview and
-//! message levels. Ports `_enumerate_workspace_requests`,
-//! `_enumerate_parquet_requests`, `_request_messages_for_view`, and the
-//! body/response loaders from `__main__.py`.
+//! message levels.
 
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
@@ -311,7 +309,7 @@ fn value_to_string(v: &Value) -> String {
 }
 
 /// Flatten request `messages[]` + the decoded response into one record per
-/// picker row. Ports `_request_messages_for_view`.
+/// picker row.
 pub fn request_messages_for_view(body: &Value, resp: Option<&Value>) -> Vec<MsgRecord> {
     let mut records = Vec::new();
     let msgs = body
diff --git a/src/ls.rs b/src/ls.rs
index 452b0da..2f53b40 100644
--- a/src/ls.rs
+++ b/src/ls.rs
@@ -1,4 +1,4 @@
-//! `ls`: list runs under a local workspace. Ports `ls_cmd`.
+//! `ls`: list runs under a local workspace.
 //!
 //! Unlike `export`, `ls` does NOT consult `$AGENTCAP_WORKSPACE` — what you point
 //! it at is what you get. Accepts either the parent dir or the `.agentcap/` dir.
diff --git a/src/model.rs b/src/model.rs
index a97c6ae..e8548d5 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -16,9 +16,9 @@ pub struct DecodedResponse {
     pub finish_reason: Option<String>,
 }
 
-/// Canonical JSON string with object keys sorted recursively — the Rust analog
-/// of Python's `json.dumps(obj, sort_keys=True)`. Used to make heterogeneous
-/// sub-objects (message content arrays, tool_calls) comparable for diffing.
+/// Canonical JSON string with object keys sorted recursively. Used to make
+/// heterogeneous sub-objects (message content arrays, tool_calls) comparable
+/// for diffing.
 pub fn canonical_json(v: &Value) -> String {
     let mut out = String::new();
     write_canonical(v, &mut out);
diff --git a/src/orchestrator.rs b/src/orchestrator.rs
index 09d6804..aafeb63 100644
--- a/src/orchestrator.rs
+++ b/src/orchestrator.rs
@@ -1,5 +1,5 @@
-//! Drive an agent driver through a corpus with a follow-up strategy. Ports
-//! `orchestrator.py`. Proxy-agnostic: the caller wires capture context via the
+//! Drive an agent driver through a corpus with a follow-up strategy.
+//! Proxy-agnostic: the caller wires capture context via the
 //! `set_ctx` callback (the proxy stamps it onto each capture).
 
 use std::path::Path;
diff --git a/src/parquet_io.rs b/src/parquet_io.rs
index 87efec8..1fde237 100644
--- a/src/parquet_io.rs
+++ b/src/parquet_io.rs
@@ -1,7 +1,6 @@
 //! Capture dir → parquet (export) and parquet → request bodies (read).
 //!
-//! Ports `export.py`'s `export_local` / `_iter_pairs` / `_row` and the parquet
-//! readers in `captures.py`. The `request` / `response` columns are
+//! The `request` / `response` columns are
 //! JSON-stringified bodies (Arrow can't infer a schema over heterogeneous
 //! tool-schema fields); `agent` / `model` / `tasks` are stamped into the
 //! parquet key-value metadata so the inspect picker can label files cheaply.
diff --git a/src/proxy/capture.rs b/src/proxy/capture.rs
index 2567fd4..1ba50ee 100644
--- a/src/proxy/capture.rs
+++ b/src/proxy/capture.rs
@@ -1,7 +1,6 @@
 //! Capture record shapes + persistence. Writes `<rid>.request.json` /
 //! `<rid>.response.json` in the exact shape the data/UI half reads (see
-//! `parquet_io` / `captures`). Ports the `_persist_*` / fingerprint / SSE-model
-//! helpers from `proxy.py`.
+//! `parquet_io` / `captures`).
 
 use std::io;
 use std::path::Path;
diff --git a/src/proxy/mod.rs b/src/proxy/mod.rs
index 811508e..190d7a4 100644
--- a/src/proxy/mod.rs
+++ b/src/proxy/mod.rs
@@ -1,4 +1,4 @@
-//! Synchronous capture proxy. Ports `proxy.py`.
+//! Synchronous capture proxy.
 //!
 //! A `tiny_http` server on a worker-thread pool fronts an OpenAI-compatible
 //! upstream. `POST /v1/chat/completions` is captured to
diff --git a/src/query.rs b/src/query.rs
index 1d222c9..a13e430 100644
--- a/src/query.rs
+++ b/src/query.rs
@@ -1,5 +1,5 @@
 //! fzf-style query parsing → the literal substrings to highlight in the preview
-//! pane. Ports `_parse_fzf_terms`. nucleo handles the actual matching with the
+//! pane. nucleo handles the actual matching with the
 //! same operator atoms; this only extracts what to colour.
 
 /// Split a query into the literal text of each non-negated term, with the
diff --git a/src/run.rs b/src/run.rs
index 03bc048..c4febfb 100644
--- a/src/run.rs
+++ b/src/run.rs
@@ -1,5 +1,5 @@
 //! The `run` command: drive an agent CLI through a corpus, capture every
-//! chat-completion, and summarise. Ports `run_cmd` from `__main__.py`.
+//! chat-completion, and summarise.
 
 use std::collections::BTreeMap;
 use std::path::{Path, PathBuf};
diff --git a/src/sandbox/mod.rs b/src/sandbox/mod.rs
index 49a33b1..cdfae56 100644
--- a/src/sandbox/mod.rs
+++ b/src/sandbox/mod.rs
@@ -1,4 +1,4 @@
-//! Podman container sandbox. Ports `sandbox/podman.py` + `sandbox/__init__.py`.
+//! Podman container sandbox.
 //!
 //! Each [`PodmanSandbox::run`] is a fresh `podman run --rm` against a pre-built
 //! per-agent image. Host paths in `writable_paths` / `readonly_paths` are
@@ -178,7 +178,7 @@ fn run_child(wrapped: &[String], timeout: Option<Duration>) -> std::result::Resu
 }
 
 /// Provision (build the image if needed) and return a sandbox, or an error with
-/// an install hint. Ports `require_sandbox_or_die`.
+/// an install hint.
 pub fn require_sandbox(
     agent: &str,
     env: BTreeMap<String, String>,
diff --git a/src/sandbox/provisioning.rs b/src/sandbox/provisioning.rs
index f01d766..121d15a 100644
--- a/src/sandbox/provisioning.rs
+++ b/src/sandbox/provisioning.rs
@@ -1,9 +1,9 @@
-//! Per-agent podman image lifecycle. Ports `sandbox/podman_provisioning.py`.
+//! Per-agent podman image lifecycle.
 //!
 //! The Containerfile is the source of truth: its SHA256 (plus any sibling
 //! context dir) is baked into the built image as a label; a mismatch on a later
-//! run forces a rebuild. The hash algorithm matches the Python byte-for-byte so
-//! Rust and Python agree and don't trigger needless rebuilds.
+//! run forces a rebuild. The hash format is fixed so images built by earlier
+//! agentcap versions are reused on upgrade, not needlessly rebuilt.
 
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
diff --git a/src/scan.rs b/src/scan.rs
index 6fefd9b..cedb761 100644
--- a/src/scan.rs
+++ b/src/scan.rs
@@ -1,7 +1,7 @@
 //! Secret scan over a capture run, gating `export`.
 //!
-//! Shells out to `trufflehog filesystem` and parses its JSON. Policy (matching
-//! the Python `scan.py`): a single **verified** hit aborts the export;
+//! Shells out to `trufflehog filesystem` and parses its JSON. Policy: a single
+//! **verified** hit aborts the export;
 //! **unverified** hits are reported but non-blocking (pattern matchers have a
 //! real false-positive rate). Results are cached to `<run_dir>/scan.json`; the
 //! cache is invalidated by `rescan` or when a pattern-only cache can't satisfy a
diff --git a/src/sse.rs b/src/sse.rs
index 0884a7f..d063321 100644
--- a/src/sse.rs
+++ b/src/sse.rs
@@ -1,5 +1,5 @@
 //! Decode OpenAI-compatible responses into a single synthesized assistant
-//! message. Ports `_decode_sse_response` / `_decode_response` from the Python.
+//! message.
 
 use crate::model::DecodedResponse;
 use serde_json::{json, Value};
@@ -120,7 +120,7 @@ mod tests {
     use serde_json::json;
 
     /// Assemble an SSE blob: one `data: <json>` line per object + trailing
-    /// `[DONE]`, matching the Python test helper.
+    /// `[DONE]`.
     fn sse(objs: &[Value]) -> String {
         let mut s: String = objs
             .iter()
diff --git a/tests/cross_impl.rs b/tests/cross_impl.rs
index 6a530dc..c2268f0 100644
--- a/tests/cross_impl.rs
+++ b/tests/cross_impl.rs
@@ -1,5 +1,5 @@
 //! Opt-in cross-implementation check: write a parquet with the Rust exporter to
-//! `$AGENTCAP_PARQUET_OUT`, so a Python/pyarrow reader can confirm the schema,
+//! `$AGENTCAP_PARQUET_OUT`, so a pyarrow reader can confirm the schema,
 //! KV metadata, and row JSON load cleanly. Ignored by default (needs the env
 //! var + leaves the file in place):
 //!
diff --git a/tests/live.rs b/tests/live.rs
index 1622b49..c965830 100644
--- a/tests/live.rs
+++ b/tests/live.rs
@@ -1,7 +1,7 @@
 //! Live end-to-end tests: drive the real `agentcap run` binary through a real
 //! OpenAI-compatible server for each agent, asserting the wire path (the agent
 //! reaches the model through the proxy and the turn completes) — not task
-//! quality. Ports `test_cli_live.py` + `test_drivers_live.py`.
+//! quality.
 //!
 //! `#[ignore]` by default so `cargo test` stays hermetic. The `Test - Live`
 //! workflow provisions a llama.cpp server + builds the per-agent images, then