diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 8b3e93e..838727b 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -12,7 +12,7 @@
       "name": "bauto",
       "source": "./src/automator/data/skills",
       "description": "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)",
-      "version": "0.6.1",
+      "version": "0.6.2",
       "author": {
         "name": "pinkyd"
       },
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5534d89..f7b9f02 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,28 @@ All notable changes to `bmad-auto` are documented here. The format is based on
 [Semantic Versioning](https://semver.org/spec/v2.0.0.html). While the project is pre-1.0,
 breaking changes may land in a minor release.
 
+## [0.6.2] — 2026-06-21
+
+### Added
+
+- **`bmad-auto probe-adapter` (alias `collect-adapter-data`).** A self-service command that
+  collects and sanitizes everything needed to finalize a CLI adapter profile — the hook payload
+  shape, transcript location/format, and token-usage schema for a `usage_parser` — so a user of
+  any coding CLI can paste back a clean, content-free report. A default zero-launch **scan** reads
+  on-disk conventions; opt-in `--probe` does a live capture in an ephemeral workspace. All output
+  passes through one audited PII sanitizer (token counts and field names survive; paths, prose, and
+  emails are redacted).
+- **GitHub Copilot CLI profile.** Bundled `copilot` profile (Copilot CLI ≥ 2026-02): `-i`
+  interactive launch, VS Code-compatible `Stop` hook, `--allow-all-tools` for unattended runs.
+  Still pending live E2E and a `usage_parser` — `probe-adapter` captures the token schema to write
+  one.
+
+### Docs
+
+- **Adapter authoring guide.** New [adapter authoring guide](docs/adapter-authoring-guide.md)
+  walks through finalizing a CLI profile with `probe-adapter` (scan vs probe, the PII model, and
+  the parser-writing loop); `probe-adapter` is added to both command references.
+
 ## [0.6.1] — 2026-06-20
 
 ### Added
@@ -429,6 +451,7 @@ enforced in CI.
   implementation phase, driven by a Python control loop with hook-based session transport and
   resumable on-disk run state.
 
+[0.6.2]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.2
 [0.6.1]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.1
 [0.6.0]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.0
 [0.5.0]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.5.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5cc170e..e08f321 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -182,7 +182,7 @@ Keep messages under 72 characters. Each commit = one logical change.
 - **Tests** live under `tests/`; add or update them for behavior changes. The mock adapter lets most of the loop run without a live CLI.
 - **Skills** ship as markdown under `src/automator/data/skills/` (the `bmad-auto-*` automation skills).
 - **Plugins** extend the orchestrator via a `plugin.toml` manifest — see the [plugin authoring guide](docs/plugin-authoring-guide.md).
-- **New coding CLIs** are usually a TOML profile, not Python — see the CLI adapter section in the [README](README.md).
+- **New coding CLIs** are usually a TOML profile, not Python — see the CLI adapter section in the [README](README.md) and the [adapter authoring guide](docs/adapter-authoring-guide.md) (use `bmad-auto probe-adapter` to collect the hook/transcript/token data a profile needs).
 
 ---
 
diff --git a/README.md b/README.md
index afef13b..a8b8853 100644
--- a/README.md
+++ b/README.md
@@ -58,24 +58,25 @@ bmad-auto tui                    # …or drive everything from the dashboard
 
 ## Command reference
 
-| Command                       | What it does                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `bmad-auto init`              | Install the bundled `bmad-auto-*` skills, the hook relay, `.automator/policy.toml`, and a runs-dir gitignore. `--cli <profile>` (repeatable) targets specific agents; `--no-skills` / `--force-skills` control skill copying.                                                                                                                                                                                                                                                      |
-| `bmad-auto validate`          | Preflight every prerequisite: BMAD config, sprint-status, git, tmux, CLI binary, hook registration.                                                                                                                                                                                                                                                                                                                                                                                |
-| `bmad-auto run`               | Drive the dev → review → verify → commit loop. `--epic N`, `--story KEY`, `--max-stories N`, `--dry-run`.                                                                                                                                                                                                                                                                                                                                                                          |
-| `bmad-auto sweep`             | Triage + execute open `deferred-work.md` entries. `--no-prompt`, `--decisions-only`, `--max-bundles N`, `--repeat`, `--max-cycles N`, `--dry-run`.                                                                                                                                                                                                                                                                                                                                 |
-| `bmad-auto resume <run-id>`   | Continue a run paused at a gate, escalation, or interruption.                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| `bmad-auto resolve <run-id>`  | Resolve a CRITICAL escalation: open an interactive resolve agent to fix the frozen spec, then re-arm the story and resume. `--story KEY`, `--no-interactive`, `--resume` / `--no-resume`.                                                                                                                                                                                                                                                                                          |
-| `bmad-auto decisions`         | Answer deferred-work decisions earlier sweeps left unanswered (skipped by `--no-prompt`, or an abandoned interactive sweep). Recorded so the next sweep acts on them without re-asking. `--list` shows them without answering.                                                                                                                                                                                                                                                     |
-| `bmad-auto list` (`ls`)       | List every run/sweep with its short ref, type, and status — the handle you pass to the commands below.                                                                                                                                                                                                                                                                                                                                                                             |
-| `bmad-auto status [<run-id>]` | Run + sprint summary with per-story token totals (plus a count of decisions awaiting an answer).                                                                                                                                                                                                                                                                                                                                                                                   |
-| `bmad-auto attach [<run-id>]` | tmux-attach to a run's live agent session.                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| `bmad-auto stop <run-id>`     | Stop a live run — the engine and its agent tmux session.                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| `bmad-auto delete <run-id>`   | Delete a run directory. `--force` stops the run first if it is still live.                                                                                                                                                                                                                                                                                                                                                                                                         |
-| `bmad-auto archive <run-id>`  | Compress a run into `.automator/archive` and remove the run dir. `--force` stops the run first if it is still live.                                                                                                                                                                                                                                                                                                                                                                |
-| `bmad-auto cleanup`           | Remove leftover tmux artifacts **for the current project**: kill `bmad-auto-<id>` sessions for finished/stopped/interrupted runs (and orphans whose run dir is gone) and close parked `bmad-auto-ctl` windows. `--dry-run` lists without killing. Live runs — and any session/window belonging to another project — are never touched.                                                                                                                                             |
-| `bmad-auto clean`             | Reclaim **disk** from concluded runs per `[cleanup]`: tear down git worktrees a mid-flight stop orphaned (freeing their Unity `Library/` + MCP-server builds), trim the heavy `worktrees/` tree from runs kept for history (they stay viewable in the TUI), and archive/delete runs past the retention window. Only finished/stopped runs are touched; `--dry-run` previews, `--keep <run-id>` protects, `--retain N` overrides the window, `--hard` deletes instead of archiving. |
-| `bmad-auto tui`               | The interactive dashboard (needs the `[tui]` extra). `--low-frame-rate` caps it to 15fps + disables animations (fixes repaint tearing over slow/SSH links; also `[tui] low_frame_rate`).                                                                                                                                                                                                                                                                                           |
+| Command                                                  | What it does                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| -------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `bmad-auto init`                                         | Install the bundled `bmad-auto-*` skills, the hook relay, `.automator/policy.toml`, and a runs-dir gitignore. `--cli <profile>` (repeatable) targets specific agents; `--no-skills` / `--force-skills` control skill copying.                                                                                                                                                                                                                                                      |
+| `bmad-auto validate`                                     | Preflight every prerequisite: BMAD config, sprint-status, git, tmux, CLI binary, hook registration.                                                                                                                                                                                                                                                                                                                                                                                |
+| `bmad-auto run`                                          | Drive the dev → review → verify → commit loop. `--epic N`, `--story KEY`, `--max-stories N`, `--dry-run`.                                                                                                                                                                                                                                                                                                                                                                          |
+| `bmad-auto sweep`                                        | Triage + execute open `deferred-work.md` entries. `--no-prompt`, `--decisions-only`, `--max-bundles N`, `--repeat`, `--max-cycles N`, `--dry-run`.                                                                                                                                                                                                                                                                                                                                 |
+| `bmad-auto resume <run-id>`                              | Continue a run paused at a gate, escalation, or interruption.                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| `bmad-auto resolve <run-id>`                             | Resolve a CRITICAL escalation: open an interactive resolve agent to fix the frozen spec, then re-arm the story and resume. `--story KEY`, `--no-interactive`, `--resume` / `--no-resume`.                                                                                                                                                                                                                                                                                          |
+| `bmad-auto decisions`                                    | Answer deferred-work decisions earlier sweeps left unanswered (skipped by `--no-prompt`, or an abandoned interactive sweep). Recorded so the next sweep acts on them without re-asking. `--list` shows them without answering.                                                                                                                                                                                                                                                     |
+| `bmad-auto list` (`ls`)                                  | List every run/sweep with its short ref, type, and status — the handle you pass to the commands below.                                                                                                                                                                                                                                                                                                                                                                             |
+| `bmad-auto status [<run-id>]`                            | Run + sprint summary with per-story token totals (plus a count of decisions awaiting an answer).                                                                                                                                                                                                                                                                                                                                                                                   |
+| `bmad-auto attach [<run-id>]`                            | tmux-attach to a run's live agent session.                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| `bmad-auto stop <run-id>`                                | Stop a live run — the engine and its agent tmux session.                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| `bmad-auto delete <run-id>`                              | Delete a run directory. `--force` stops the run first if it is still live.                                                                                                                                                                                                                                                                                                                                                                                                         |
+| `bmad-auto archive <run-id>`                             | Compress a run into `.automator/archive` and remove the run dir. `--force` stops the run first if it is still live.                                                                                                                                                                                                                                                                                                                                                                |
+| `bmad-auto cleanup`                                      | Remove leftover tmux artifacts **for the current project**: kill `bmad-auto-<id>` sessions for finished/stopped/interrupted runs (and orphans whose run dir is gone) and close parked `bmad-auto-ctl` windows. `--dry-run` lists without killing. Live runs — and any session/window belonging to another project — are never touched.                                                                                                                                             |
+| `bmad-auto clean`                                        | Reclaim **disk** from concluded runs per `[cleanup]`: tear down git worktrees a mid-flight stop orphaned (freeing their Unity `Library/` + MCP-server builds), trim the heavy `worktrees/` tree from runs kept for history (they stay viewable in the TUI), and archive/delete runs past the retention window. Only finished/stopped runs are touched; `--dry-run` previews, `--keep <run-id>` protects, `--retain N` overrides the window, `--hard` deletes instead of archiving. |
+| `bmad-auto tui`                                          | The interactive dashboard (needs the `[tui]` extra). `--low-frame-rate` caps it to 15fps + disables animations (fixes repaint tearing over slow/SSH links; also `[tui] low_frame_rate`).                                                                                                                                                                                                                                                                                           |
+| `bmad-auto probe-adapter <cli>` (`collect-adapter-data`) | Collect + sanitize the data needed to finalize a CLI adapter profile (hook payload shape, transcript location/format, token schema). Default is a zero-launch **scan**; `--probe` opts into a live capture. `--transcript`, `--session-dir`, `--binary` (CLIs with no profile yet), `--out`, `--json`. See the [adapter authoring guide](docs/adapter-authoring-guide.md).                                                                                                         |
 
 Every command takes `--project <dir>` (default: the current directory). Any `<run-id>` may be a
 partial — the tail after the last `-` (e.g. `a1b2`), shortened to any prefix that stays unique;
@@ -438,17 +439,20 @@ Each run drives its agents inside a dedicated tmux session, `bmad-auto-<run-id>`
 
 One generic driver (`adapters/generic_tmux.py`) runs any coding CLI that fits the tmux-injection + hook-signal transport; everything CLI-specific lives in a declarative **profile** (`adapters/profile.py`). Built-in profiles ship as TOML in `automator/data/profiles/`:
 
-| Profile  | Status                  | Notes                                                                                                                                                                                                            |
-| -------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `claude` | supported               | reference implementation                                                                                                                                                                                         |
-| `codex`  | supported, E2E-verified | Codex ≥ 0.139. No slash expansion in the initial prompt — the profile renders `$skill-name` mentions (plus a "use subagents as needed" nudge) instead. No SessionEnd hook; window-death fallback covers crashes. |
-| `gemini` | supported, E2E-verified | Gemini CLI ≥ 0.46 (hooks on by default since then). Launches with `-i` to stay interactive; `AfterAgent` maps to canonical Stop. Usage parser validated against real chat logs.                                  |
+| Profile   | Status                    | Notes                                                                                                                                                                                                                                                                |
+| --------- | ------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `claude`  | supported                 | reference implementation                                                                                                                                                                                                                                             |
+| `codex`   | supported, E2E-verified   | Codex ≥ 0.139. No slash expansion in the initial prompt — the profile renders `$skill-name` mentions (plus a "use subagents as needed" nudge) instead. No SessionEnd hook; window-death fallback covers crashes.                                                     |
+| `gemini`  | supported, E2E-verified   | Gemini CLI ≥ 0.46 (hooks on by default since then). Launches with `-i` to stay interactive; `AfterAgent` maps to canonical Stop. Usage parser validated against real chat logs.                                                                                      |
+| `copilot` | bundled, pending live E2E | GitHub Copilot CLI ≥ 2026-02. Launches with `-i` to stay interactive; VS Code-compatible PascalCase `Stop` hook (snake_case payloads); `--allow-all-tools` for unattended runs. No `usage_parser` yet — run `probe-adapter` to capture its token schema (see below). |
 
 **On budgets:** agentic sessions are dominated by cache reads (80–90%+ of raw tokens), which every supported vendor bills at ~0.1x base input. The `max_tokens_per_story` check therefore uses a cost-weighted total — cache reads count at `limits.cache_read_weight` (default 0.1) — while displayed totals stay raw. Set the weight to 1.0 to budget raw tokens.
 
 **Shared prerequisites:** the `bmad-auto-*` skills must be present in `.agents/skills/` (codex and gemini read it; Claude Code reads `.claude/skills/`), and each CLI must have been run once interactively in the project for auth/trust — `bmad-auto init --cli codex --cli gemini` installs the skills into `.agents/skills/`, registers the hook relay, and prints the per-CLI first-run steps.
 
-**Adding a CLI without touching Python:** drop a TOML file in `<project>/.automator/profiles/<name>.toml` (same fields as the built-ins: binary, `prompt_template`, bypass flags, a `[hooks]` block picking one of the config dialects `claude-settings-json` / `codex-hooks-json` / `gemini-settings-json`, and a native→canonical event map). The hook relay script and orchestrator are CLI-agnostic — each registration passes the canonical event name as the script argument. A CLI whose hook config clones one of the existing dialects (the ecosystem trend) needs nothing else; a genuinely different transport gets its own adapter class instead (see the opencode HTTP+SSE design stub in `adapters/opencode_http.py`).
+**Adding a CLI without touching Python:** drop a TOML file in `<project>/.automator/profiles/<name>.toml` (same fields as the built-ins: binary, `prompt_template`, bypass flags, a `[hooks]` block picking one of the config dialects `claude-settings-json` / `codex-hooks-json` / `gemini-settings-json` / `copilot-settings-json`, and a native→canonical event map). The hook relay script and orchestrator are CLI-agnostic — each registration passes the canonical event name as the script argument. A CLI whose hook config clones one of the existing dialects (the ecosystem trend) needs nothing else; a genuinely different transport gets its own adapter class instead (see the opencode HTTP+SSE design stub in `adapters/opencode_http.py`).
+
+**Finalizing a profile:** the facts a profile needs that live in no doc — the CLI's exact hook payload shape, its transcript location/format, and the token schema a `usage_parser` reads — are collected and sanitized by `bmad-auto probe-adapter <cli>` (a zero-launch scan by default, or `--probe` for a live capture). The [adapter authoring guide](docs/adapter-authoring-guide.md) walks through using it end to end.
 
 Cursor CLI is currently blocked on two gaps, for whoever picks it up: token usage is not exposed anywhere (hooks, JSON output, or on-disk chats), and slash-command expansion of the initial prompt argument is unverified — its `sessionStart`/`stop` hooks do fire in the CLI, so a profile using the window-death fallback plus `usage_parser = "none"` is feasible.
 
diff --git a/docs/FEATURES.md b/docs/FEATURES.md
index 447f595..663a87a 100644
--- a/docs/FEATURES.md
+++ b/docs/FEATURES.md
@@ -113,8 +113,10 @@ See [README.md](../README.md) for the narrative overview and [setup-guide.md](se
 
 - Generic tmux adapter drives any CLI fitting the tmux-injection + hook-signal transport; CLI specifics live in declarative TOML profiles.
 - Supported, E2E-verified: `claude` (reference), `codex` (≥ 0.139), `gemini` (≥ 0.46).
+- Bundled but pending live E2E verification: `copilot` (GitHub Copilot CLI ≥ 2026-02; VS Code-compatible `Stop` hook, `-i` interactive launch, `--allow-all-tools`).
 - Per-stage CLI/model overrides: run dev on one CLI/model, review on another (`[adapter.dev]`, `[adapter.review]`, `[adapter.triage]`).
 - Add a CLI without touching Python: drop a TOML profile in `.automator/profiles/<name>.toml` (binary, prompt template, bypass flags, hook dialect, native→canonical event map).
+- `bmad-auto probe-adapter` collects + sanitizes the data needed to finalize/add a profile (hook payload shape, transcript location/format, token schema): a zero-launch scan by default, opt-in `--probe` for live capture. See the [adapter authoring guide](adapter-authoring-guide.md).
 
 ### Budgeting & cost tracking
 
@@ -170,4 +172,5 @@ See [README.md](../README.md) for the narrative overview and [setup-guide.md](se
 - `bmad-auto cleanup` — remove leftover tmux artifacts for finished/stopped runs.
 - `bmad-auto clean` — reclaim disk from concluded runs per `[cleanup]`: tear down worktrees a mid-flight stop orphaned, trim heavy `worktrees/` from runs kept for history, archive/delete past the retention window (`--dry-run`, `--keep`, `--retain N`, `--hard`).
 - `bmad-auto tui` — the interactive dashboard (`--low-frame-rate` for slow/SSH links).
+- `bmad-auto probe-adapter <cli>` (`collect-adapter-data`) — collect + sanitize adapter-finalization data for a CLI profile; default zero-launch scan, opt-in `--probe` live capture.
 - Every command takes `--project <dir>` (default: current directory). Any `<run-id>` accepts a partial — the tail after the last `-`, shortened to any unique prefix.
diff --git a/docs/README.md b/docs/README.md
index 54dc5fb..19b5e78 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -11,6 +11,7 @@ guides below go deeper, roughly in the order you'll need them.
 
 ## Extending bmad-auto
 
+- **[Finalizing a CLI adapter profile](adapter-authoring-guide.md)** — using `bmad-auto probe-adapter` to collect + sanitize the hook payload shape, transcript location, and token schema a new CLI profile needs.
 - **[Writing a bmad-auto plugin](plugin-authoring-guide.md)** — the plugin system: `plugin.toml` manifest, hooks, lifecycle stages, settings, the trust model, and workflow injection, with a worked walkthrough.
 - **[Writing a Game Engine plugin](game-engine-plugin-guide.md)** — the game-engine layer (built on the plugin system): driving a live engine Editor, the `editor_mode` ↔ `[scm] isolation` coupling, a minimal Godot example.
 - **[Writing a plugin for a specific Editor MCP](game-engine-mcp-guide.md)** — Editor-MCP specifics for the bundled Unity plugin: IvanMurzak vs CoplayDev, readiness probes, `per_worktree` isolation, and the full `BMAD_AUTO_*` env-var reference.
diff --git a/docs/adapter-authoring-guide.md b/docs/adapter-authoring-guide.md
new file mode 100644
index 0000000..3d90a62
--- /dev/null
+++ b/docs/adapter-authoring-guide.md
@@ -0,0 +1,164 @@
+# Finalizing a CLI adapter profile with `probe-adapter`
+
+bmad-auto drives any coding CLI that fits the **tmux-injection + hook-signal**
+transport through one generic adapter (`adapters/generic_tmux.py`); everything
+CLI-specific lives in a declarative **TOML profile** (`adapters/profile.py`). The
+[README adapter section](../README.md#other-coding-clis) covers the profile fields
+and how to drop one in without touching Python.
+
+The hard part of a new profile isn't the TOML — it's the **facts that live in no
+doc**: the CLI's exact hook payload shape (field names and casing, whether
+`session_id` / `transcript_path` / `cwd` are present), where it writes its session
+transcript and in what format, and the token-usage schema a `usage_parser` has to
+read. Historically the only way to get these was to hand a volunteer a manual
+recipe and ask them to sanitize the output by hand — error-prone and PII-risky.
+
+**`bmad-auto probe-adapter`** (alias `collect-adapter-data`) pulls all of that and
+runs it through an audited sanitizer, so a user of any coding CLI can run one
+command and paste back a clean, content-free report.
+
+```bash
+bmad-auto probe-adapter <cli> --project .          # default: zero-launch scan
+bmad-auto probe-adapter <cli> --probe --project .  # opt-in live capture
+```
+
+---
+
+## Two modes
+
+Both modes emit the **same single sanitized report** (markdown to stdout, or to a
+file with `--out`; add `--json` for a machine-readable block).
+
+### SCAN (default — no process launch)
+
+Runs `<binary> --version` / `--help`, locates the newest **already-existing**
+session transcript by convention, reads the declared hook config, and infers the
+token schema from the transcript. Works whenever you've used the CLI before, with
+zero execution risk. This is the right first step for any CLI that already has a
+profile (claude/codex/gemini/copilot) or that you've run by hand.
+
+### PROBE (`--probe` — opt-in live capture)
+
+In an ephemeral `mkdtemp` workspace, `probe` registers a full-payload capture hook
+for every native event in the profile, launches **one trivial content-free turn**
+(`Reply with exactly: OK`) in a tmux window, captures each hook event's complete
+payload, locates the transcript, then tears everything down. Use it to confirm the
+**exact hook payload shape** and that the CLI actually **accepts the hook dialect**
+your profile declares — facts scan can't see without running the CLI.
+
+`--probe` needs a known profile (it uses the profile's hook dialect and event map).
+If `tmux` or the binary is missing, probe degrades gracefully to a scan.
+
+---
+
+## PII safety model
+
+The report is built to be **safe to paste into an issue or PR**. A single audited
+sanitizer (`src/automator/sanitize.py`) is the only chokepoint:
+
+- **numbers, booleans, and `null` pass through** — token _counts_ are not PII;
+- **dict keys are kept verbatim** — field names and casing are the whole point of
+  a payload probe;
+- every **leaf string** is `$HOME`→`~` redacted and then kept **only if** it looks
+  like a short machine identifier (e.g. `claude-opus-4-8`, `session-abc_123`);
+  anything else — prose, code, paths, emails — becomes `<redacted:str>`;
+- **list lengths are preserved**, contents are scrubbed element by element;
+- `--help` / `--version` text and log tails have the home dir and any emails
+  redacted, with a line cap.
+
+In PROBE mode the raw capture exists **only transiently** inside the temp dir,
+which is `rmtree`'d in a `finally` (even on exception or Ctrl-C). The CLI's own
+transcript stays in its home dir — the command reads its _structure_, never copies
+it. A hidden `--keep-temp` flag retains the raw temp dir for debugging and prints a
+loud **"raw retained — do not share"** warning; never paste a `--keep-temp` run.
+
+---
+
+## Walkthrough: finalizing a profile
+
+### 1. Draft a profile
+
+Drop a TOML file in `<project>/.automator/profiles/<name>.toml` with the fields
+described in the [README adapter section](../README.md#other-coding-clis). The
+contract is the `CLIProfile` / `HookSpec` dataclasses in
+[`src/automator/adapters/profile.py`](../src/automator/adapters/profile.py): a
+`binary`, a `prompt_template`, bypass flags, a `[hooks]` block picking one of the
+config dialects (`claude-settings-json` / `codex-hooks-json` /
+`gemini-settings-json` / `copilot-settings-json`) and a native→canonical event
+map, and a `usage_parser` (start with `"none"` until you've written one).
+
+### 2. Scan
+
+```bash
+bmad-auto probe-adapter <cli> --project .
+```
+
+Read three sections of the report:
+
+- **CLI flags** — your profile's launch/bypass flags plus the scrubbed
+  `--version` / `--help`, so you can confirm the flags you chose exist.
+- **Transcript** — the redacted location, format, size, line count, and modified
+  date of the newest transcript the convention glob found.
+- **Token usage schema** — the structural key paths (types only, never values) and
+  the **token-field candidates** (int leaves whose names look token-ish). When a
+  real parser is already declared, its parsed counts are shown as a self-check.
+
+### 3. Probe (confirm the live payload + dialect)
+
+```bash
+bmad-auto probe-adapter <cli> --probe --project /tmp/scratch
+```
+
+The **Hook payload shape** section now shows, per captured event, the native→
+canonical pairing, the payload keys, and the scrubbed payload — so you can confirm
+`session_id` / `transcript_path` casing and that the CLI accepted the hook config
+for your dialect. If the CLI rejects the config or never fires a hook, the report
+says so (with a scrubbed log tail) instead of silently producing nothing.
+
+### 4. Write the `usage_parser`
+
+Turn the report's `token_field_candidates` into a parser in
+[`src/automator/tokens.py`](../src/automator/tokens.py), following the existing
+ones (`tally` for claude, `tally_codex_rollout`, `tally_gemini_chat`) and
+registering it in `read_usage`. The report flags **per-call vs cumulative** as a
+human call — a `token_count`-style event that carries running totals (codex) is
+read differently from per-message blocks that are summed (claude/gemini). Re-run
+scan after wiring the parser: the **parsed counts** self-check should now appear.
+
+---
+
+## Flags reference
+
+| Flag                | Purpose                                                                          |
+| ------------------- | -------------------------------------------------------------------------------- |
+| `--probe`           | Opt-in live capture (default is scan). Needs a known profile.                    |
+| `--transcript PATH` | Inspect this exact transcript file, bypassing convention discovery.              |
+| `--session-dir DIR` | Glob this dir (`**/*.jsonl` then `*.json`, newest) — for custom/unknown CLIs.    |
+| `--binary NAME`     | Binary to probe for a CLI that has no profile yet (enables a reduced report).    |
+| `--model NAME`      | Model passed to the probe turn (PROBE mode).                                     |
+| `--timeout SECONDS` | Probe turn timeout (default 90).                                                 |
+| `--out FILE`        | Write the report to a file instead of stdout (the only file the command writes). |
+| `--json`            | Append a machine-readable JSON block to the report.                              |
+| `--keep-temp`       | (hidden, debug) keep the raw probe temp dir — prints a "do not share" warning.   |
+
+Exit codes mirror `validate`: `0` whenever a report is produced (warnings are
+fine), `1` only when nothing could be produced. An **unknown CLI with `--binary`**
+still yields a _reduced_ report (version/help + discovery, no hook events); an
+unknown CLI without `--binary` fails and lists the available profiles.
+
+---
+
+## Worked example: copilot
+
+The bundled `copilot` profile ships with `usage_parser = "none"` — Copilot's
+token-usage schema hadn't been captured when the profile landed. That's exactly
+the gap `probe-adapter` closes:
+
+```bash
+bmad-auto probe-adapter copilot --probe --project /tmp/scratch
+```
+
+captures the `Stop` payload (confirming `session_id` / `transcript_path` casing),
+locates `~/.copilot/session-state/*/events.jsonl`, and infers its token schema —
+the data needed to write a `copilot-*` parser in `tokens.py` and flip the profile's
+`usage_parser` off `"none"`. Confirm the `mkdtemp` dir is gone afterward.
diff --git a/module.yaml b/module.yaml
index eba5ce0..48a72b7 100644
--- a/module.yaml
+++ b/module.yaml
@@ -1,7 +1,7 @@
 code: bauto
 name: BMAD Auto Skills
 description: "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)"
-module_version: 0.6.1
+module_version: 0.6.2
 default_selected: false
 module_greeting: >
   BMAD Auto installed — both the four automation skills and the
diff --git a/pyproject.toml b/pyproject.toml
index 3e230ba..cf6c4e0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "bmad-auto"
-version = "0.6.1"
+version = "0.6.2"
 description = "Deterministic ralph-loop orchestrator for the BMAD implementation phase"
 readme = "README.md"
 license = "MIT"
diff --git a/src/automator/__init__.py b/src/automator/__init__.py
index a25ea40..b96d34a 100644
--- a/src/automator/__init__.py
+++ b/src/automator/__init__.py
@@ -6,4 +6,4 @@
 spec files, and the per-run directory under .automator/runs/.
 """
 
-__version__ = "0.6.1"
+__version__ = "0.6.2"
diff --git a/src/automator/adapters/profile.py b/src/automator/adapters/profile.py
index 7366f03..1699ece 100644
--- a/src/automator/adapters/profile.py
+++ b/src/automator/adapters/profile.py
@@ -19,7 +19,12 @@
 from pathlib import Path
 
 USAGE_PARSERS = {"claude-jsonl", "codex-rollout", "gemini-chat", "none"}
-HOOK_DIALECTS = {"claude-settings-json", "codex-hooks-json", "gemini-settings-json"}
+HOOK_DIALECTS = {
+    "claude-settings-json",
+    "codex-hooks-json",
+    "gemini-settings-json",
+    "copilot-settings-json",
+}
 CANONICAL_EVENTS = {"SessionStart", "Stop", "SessionEnd", "PreCompact"}
 USER_PROFILES_REL = Path(".automator") / "profiles"
 
diff --git a/src/automator/cli.py b/src/automator/cli.py
index a6808fa..0a5c8df 100644
--- a/src/automator/cli.py
+++ b/src/automator/cli.py
@@ -885,6 +885,56 @@ def cmd_tui(args: argparse.Namespace) -> int:
     return run_tui(project)
 
 
+def cmd_probe(args: argparse.Namespace) -> int:
+    from . import probe as probe_mod
+    from .adapters.profile import ProfileError, get_profile
+
+    project = _project(args)
+    hints = probe_mod.Hints(
+        binary=args.binary,
+        transcript=args.transcript,
+        session_dir=args.session_dir,
+        model=args.model,
+    )
+
+    profile = None
+    try:
+        profile = get_profile(args.cli, project)
+    except ProfileError as e:
+        if not args.binary:
+            print(f"FAIL: {e}", file=sys.stderr)
+            return 1
+        print(f"  ok: unknown profile {args.cli!r}; reduced report from --binary {args.binary}")
+
+    if args.probe:
+        if profile is None:
+            print("FAIL: --probe needs a known profile (its hook dialect/events)", file=sys.stderr)
+            return 1
+        finding = probe_mod.probe(
+            cli=args.cli,
+            profile=profile,
+            project=project,
+            hints=hints,
+            timeout_s=args.timeout,
+            keep_temp=args.keep_temp,
+        )
+    else:
+        finding = probe_mod.scan(cli=args.cli, profile=profile, project=project, hints=hints)
+
+    report = probe_mod.render_markdown(finding)
+    if args.json:
+        report = report + "\n\n## JSON\n\n```json\n" + probe_mod.render_json(finding) + "\n```\n"
+
+    if args.out:
+        out_path = Path(args.out)
+        out_path.write_text(report, encoding="utf-8")
+        print(f"  ok: report written to {out_path} ({len(finding.warnings)} warning(s))")
+    else:
+        print(report)
+        print(f"  ok: {finding.mode} report for {args.cli} ({len(finding.warnings)} warning(s))")
+    return 0
+
+
 def cmd_init(args: argparse.Namespace) -> int:
     from .install import install_into
 
@@ -935,6 +985,36 @@ def add(name: str, func, help: str, *, aliases=()) -> argparse.ArgumentParser:
     )
     add("validate", cmd_validate, "preflight checks; exit non-zero on failure")
 
+    probe_p = add(
+        "probe-adapter",
+        cmd_probe,
+        "collect + sanitize adapter-finalization data for a coding CLI",
+        aliases=["collect-adapter-data"],
+    )
+    probe_p.add_argument(
+        "cli", help="CLI profile name (claude | codex | gemini | copilot | custom)"
+    )
+    probe_p.add_argument(
+        "--probe",
+        action="store_true",
+        help="opt-in LIVE capture: launch one trivial content-free turn in a temp "
+        "workspace and capture real hook payloads (default: zero-launch scan)",
+    )
+    probe_p.add_argument(
+        "--transcript", help="exact transcript file to inspect (overrides discovery)"
+    )
+    probe_p.add_argument(
+        "--session-dir", help="dir to glob for the newest transcript (custom CLIs)"
+    )
+    probe_p.add_argument("--binary", help="binary name for a CLI with no profile yet")
+    probe_p.add_argument("--model", help="model passed to the probe turn (probe mode)")
+    probe_p.add_argument(
+        "--timeout", type=float, default=90, help="probe turn timeout (default: 90s)"
+    )
+    probe_p.add_argument("--out", help="write the report to this file instead of stdout")
+    probe_p.add_argument("--json", action="store_true", help="append a machine-readable JSON block")
+    probe_p.add_argument("--keep-temp", action="store_true", help=argparse.SUPPRESS)
+
     run_p = add("run", cmd_run, "run the orchestration loop")
     run_p.add_argument("--epic", type=int, help="only stories from this epic")
     run_p.add_argument("--story", help="story: E-S / E.S, a slug fragment, or full key")
diff --git a/src/automator/data/bmad_auto_probe_hook.py b/src/automator/data/bmad_auto_probe_hook.py
new file mode 100644
index 0000000..c0e9109
--- /dev/null
+++ b/src/automator/data/bmad_auto_probe_hook.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+"""Full-payload capture hook for `bmad-auto probe-adapter --probe`. Stdlib only.
+
+A throwaway sibling of bmad_auto_hook.py used ONLY during an opt-in live probe.
+It no-ops (exit 0) unless BMAD_AUTO_PROBE_CAPTURE_DIR is set — a DISTINCT env var
+from the real relay's BMAD_AUTO_RUN_DIR, so the capture hook and the signal relay
+can never fire in each other's context (a normal interactive session sees neither).
+
+For every event it writes two files atomically into the capture dir:
+
+  <ts>-<event>.signal.json   SignalWatcher-shaped {ts,event,task_id,session_id,
+                             transcript_path,cwd} so the probe's completion poll
+                             (a plain SignalWatcher over the capture dir) works
+                             with no change to the watcher.
+  <ts>-<event>.payload.json  the ENTIRE raw stdin payload plus an injected
+                             "argv_event" (the native event name from argv, for
+                             native->canonical pairing) so a maintainer can read
+                             the CLI's exact field names and casing. The probe
+                             command sanitizes this before it is ever shown;
+                             nothing written here is displayed raw.
+
+Tolerant of empty/garbage stdin and of write errors — it must never crash the
+CLI window it is hooked into.
+"""
+
+import json
+import os
+import sys
+import time
+
+
+def _atomic_write(path: str, obj) -> None:
+    tmp = path + ".tmp"
+    with open(tmp, "w", encoding="utf-8") as f:
+        json.dump(obj, f)
+    os.replace(tmp, path)
+
+
+def main() -> int:
+    capture_dir = os.environ.get("BMAD_AUTO_PROBE_CAPTURE_DIR")
+    if not capture_dir:
+        return 0
+    task_id = os.environ.get("BMAD_AUTO_TASK_ID", "probe")
+    event_name = sys.argv[1] if len(sys.argv) > 1 else "Unknown"
+    try:
+        payload = json.load(sys.stdin)
+    except (json.JSONDecodeError, ValueError):
+        payload = {}
+    if not isinstance(payload, dict):
+        payload = {}
+
+    ts = time.time_ns()
+    try:
+        os.makedirs(capture_dir, exist_ok=True)
+        signal = {
+            "ts": ts,
+            "event": event_name,
+            "task_id": task_id,
+            "session_id": payload.get("session_id") or payload.get("conversation_id"),
+            "transcript_path": payload.get("transcript_path"),
+            "cwd": payload.get("cwd"),
+        }
+        _atomic_write(os.path.join(capture_dir, f"{ts}-{event_name}.signal.json"), signal)
+        captured = dict(payload)
+        captured["argv_event"] = event_name
+        _atomic_write(os.path.join(capture_dir, f"{ts}-{event_name}.payload.json"), captured)
+    except OSError:
+        return 0
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/automator/data/profiles/copilot.toml b/src/automator/data/profiles/copilot.toml
new file mode 100644
index 0000000..1437595
--- /dev/null
+++ b/src/automator/data/profiles/copilot.toml
@@ -0,0 +1,34 @@
+# GitHub Copilot CLI (GA since 2026-02). `-i "<prompt>"` starts an interactive
+# session and auto-runs the prompt (a bare `-p` prompt runs headless and exits).
+# Copilot has no native skill discovery, so the prompt tells it to read the
+# SKILL.md directly (skills live in .agents/skills/, shared with codex/gemini and
+# matching upstream BMAD-METHOD's github-copilot installer). "use subagents as
+# needed" keeps parallel skill phases (e.g. review layers) actually spawning
+# subagents, same as codex.
+#
+# Hook events are registered under Copilot's VS Code-compatible PascalCase names
+# (Stop/SessionStart/SessionEnd/PreCompact, same set as claude). That casing makes
+# Copilot emit SNAKE_CASE payloads (session_id, transcript_path, cwd) — exactly
+# what the shared relay reads — and the Stop payload carries transcript_path, so no
+# relay change is needed and a future usage_parser gets the transcript for free.
+# (The camelCase names agentStop/sessionStart emit camelCase payloads the relay
+# would miss.) NOTE: an enterprise policy permissions.disableBypassPermissionsMode
+# = 'disable' suppresses the --allow-all-* flags and will block unattended runs.
+name = "copilot"
+binary = "copilot"
+prompt_template = "LOAD the FULL .agents/skills/{skill}/SKILL.md, read its entire contents and follow its directions exactly, using subagents as needed: {args}"
+launch_args = ["-i"]
+bypass_args = ["--allow-all-tools", "--allow-all-paths"]
+model_flag = "--model"
+usage_parser = "none"
+first_run_note = "run `copilot` once and authenticate (gh / Copilot subscription); requires Copilot CLI GA (>= 2026-02)"
+skill_tree = ".agents/skills"
+# .github/copilot/settings.json is the inline hook config (and can also hold MCP
+# servers) — gitignored in many projects, so a worktree checkout omits it and
+# isolated sessions lose it; seeded first, then the Stop hook is merged in.
+seed_files = [".github/copilot/settings.json"]
+
+[hooks]
+dialect = "copilot-settings-json"
+config_path = ".github/copilot/settings.json"
+events = { Stop = "Stop", SessionStart = "SessionStart", SessionEnd = "SessionEnd", PreCompact = "PreCompact" }
diff --git a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml
index eba5ce0..48a72b7 100644
--- a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml
+++ b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml
@@ -1,7 +1,7 @@
 code: bauto
 name: BMAD Auto Skills
 description: "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)"
-module_version: 0.6.1
+module_version: 0.6.2
 default_selected: false
 module_greeting: >
   BMAD Auto installed — both the four automation skills and the
diff --git a/src/automator/install.py b/src/automator/install.py
index 8a7b194..0f3cad7 100644
--- a/src/automator/install.py
+++ b/src/automator/install.py
@@ -4,7 +4,7 @@
 - idempotently merges hook registrations into each selected CLI's hook config
   (dialect + native->canonical event map come from the CLI profile)
 - installs the bundled bmad-auto-* skills into each selected CLI's skill tree
-  (.claude/skills for claude, .agents/skills for codex/gemini)
+  (.claude/skills for claude, .agents/skills for codex/gemini/copilot)
 - writes .automator/policy.toml from the template (if missing)
 - gitignores generated dirs: .automator/runs/ (per-run state) and
   .automator/cache/ (engine plugins' rebuildable caches, e.g. the Unity Library)
@@ -28,8 +28,12 @@
 from .policy import POLICY_TEMPLATE
 
 HOOK_SCRIPT_REL = ".automator/bmad_auto_hook.py"
-HOOK_MARKER = "bmad_auto_hook.py"
+# Dedup marker: matches any bmad-auto-managed hook command — both the signal
+# relay (bmad_auto_hook.py) and the probe-adapter capture hook
+# (bmad_auto_probe_hook.py) — so merge_hooks stays idempotent for either.
+HOOK_MARKER = "bmad_auto"
 GEMINI_HOOK_TIMEOUT_MS = 60_000
+COPILOT_HOOK_TIMEOUT_SEC = 60
 
 # The bmad-auto-* skills bundled in the wheel (automator/data/skills/) that
 # `bmad-auto init` lays down. They must be installed together — bmad-auto-review
@@ -56,6 +60,9 @@ def _hook_entry(dialect: str, command: str) -> dict:
     if dialect == "gemini-settings-json":
         handler["timeout"] = GEMINI_HOOK_TIMEOUT_MS  # Gemini timeouts are milliseconds
         return {"matcher": "", "hooks": [handler]}
+    if dialect == "copilot-settings-json":
+        handler["timeoutSec"] = COPILOT_HOOK_TIMEOUT_SEC  # Copilot timeouts are seconds
+        return handler  # Copilot stores the handler directly in the event list
     # claude-settings-json and codex-hooks-json share the schema
     return {"hooks": [handler]}
 
@@ -63,14 +70,19 @@ def _hook_entry(dialect: str, command: str) -> dict:
 def merge_hooks(config: dict, registrations: dict[str, str], dialect: str) -> tuple[dict, bool]:
     """Add relay registrations (native event -> command) to a hook config dict."""
     changed = False
+    if dialect == "copilot-settings-json":
+        config.setdefault("version", 1)  # Copilot hook configs are versioned
     hooks = config.setdefault("hooks", {})
     for native_event, command in registrations.items():
         matchers = hooks.setdefault(native_event, [])
+        # claude/codex/gemini nest handlers under "hooks"; copilot stores the
+        # handler dict directly in the event list — check both shapes so a re-run
+        # stays idempotent for every dialect.
         already = any(
             HOOK_MARKER in handler.get("command", "")
-            for matcher in matchers
-            if isinstance(matcher, dict)
-            for handler in matcher.get("hooks", [])
+            for entry in matchers
+            if isinstance(entry, dict)
+            for handler in (entry, *entry.get("hooks", []))
             if isinstance(handler, dict)
         )
         if not already:
diff --git a/src/automator/probe.py b/src/automator/probe.py
new file mode 100644
index 0000000..2ce91d5
--- /dev/null
+++ b/src/automator/probe.py
@@ -0,0 +1,795 @@
+"""`bmad-auto probe-adapter`: collect + sanitize adapter-finalization data.
+
+Finalizing a generic-adapter CLI profile needs facts that live in no doc: the
+CLI's exact hook payload shape (field names/casing, whether transcript_path /
+session_id / cwd are present), where its transcript lives and in what format,
+and the token-usage schema a `usage_parser` must read. This command pulls all of
+that and runs it through the audited :mod:`automator.sanitize` chokepoint, so a
+user of any coding CLI can run one command and paste back a clean, content-free
+report.
+
+Two strategies, one report shape:
+
+- SCAN (default, zero process launch beyond ``--version``/``--help``): locate the
+  newest already-existing transcript by convention, read the declared hook config,
+  infer the token schema. Works whenever the user has used the CLI before.
+- PROBE (``--probe``, opt-in): in an ephemeral ``mkdtemp`` workspace, register the
+  full-payload capture hook for every native event, launch one trivial content-free
+  turn in a tmux window, capture each event's complete payload, then tear down. The
+  raw capture exists only transiently inside the temp dir, which is ``rmtree``'d in a
+  ``finally`` (even on exception / Ctrl-C).
+"""
+
+from __future__ import annotations
+
+import glob
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import tempfile
+import time
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from importlib import resources
+from pathlib import Path
+
+from . import sanitize
+from .adapters.profile import CLIProfile
+from .install import merge_hooks
+from .signals import SignalWatcher
+from .tokens import _jsonl_entries, read_usage
+
+# Per-parser transcript-location conventions (from tokens.py docstrings).
+TRANSCRIPT_GLOBS = {
+    "claude-jsonl": "~/.claude/projects/*/*.jsonl",
+    "codex-rollout": "~/.codex/sessions/*/*/*/rollout-*.jsonl",
+    "gemini-chat": "~/.gemini/tmp/*/chats/session-*.jsonl",
+}
+# Fallback family glob keyed by the `cli` name, so a CLI whose usage_parser is
+# still "none" (e.g. copilot, freshly added) still gets transcript discovery.
+FAMILY_GLOBS = {
+    "claude": "~/.claude/projects/*/*.jsonl",
+    "codex": "~/.codex/sessions/*/*/*/rollout-*.jsonl",
+    "gemini": "~/.gemini/tmp/*/chats/session-*.jsonl",
+    "copilot": "~/.copilot/session-state/*/events.jsonl",
+}
+
+_TOKEN_KEY_RE = re.compile(
+    r"(token|tokens|cached|input|output|prompt|completion|thoughts|usage)", re.I
+)
+
+PROBE_HOOK_NAME = "bmad_auto_probe_hook.py"
+PROBE_PROMPT = "Reply with exactly: OK"
+PROBE_TASK_ID = "probe"
+TMUX_TIMEOUT_S = 30
+PROBE_GRACE_S = 3.0
+MAX_SCHEMA_ENTRIES = 200
+
+
+# --------------------------------------------------------------- dataclasses
+
+
+@dataclass
+class FlagFinding:
+    binary: str
+    found: bool
+    version: str | None = None  # scrubbed
+    help: str | None = None  # scrubbed
+
+
+@dataclass
+class TranscriptFinding:
+    glob: str | None = None  # the convention glob used (already ~-relative)
+    location: str | None = None  # redacted path of the chosen transcript
+    fmt: str | None = None  # "jsonl" | "json"
+    size_bytes: int | None = None
+    line_count: int | None = None
+    mtime_date: str | None = None  # date only (no time), UTC
+    multiple: bool = False
+    note: str | None = None
+    real_path: Path | None = None  # NOT rendered; used for schema inference
+
+
+@dataclass
+class TokenSchema:
+    parser: str
+    entries_scanned: int = 0
+    parsed_usage: dict | None = None  # only when parser != "none"
+    key_paths: list[str] = field(default_factory=list)  # "a.b.c:int", TYPE only
+    token_field_candidates: list[str] = field(default_factory=list)
+
+
+@dataclass
+class EventCapture:
+    native_event: str
+    canonical_event: str | None
+    payload_keys: list[str]
+    payload: dict  # scrubbed
+
+
+@dataclass
+class ProfileFinding:
+    cli: str
+    mode: str  # "scan" | "probe"
+    known_profile: bool
+    binary: str
+    parser: str
+    dialect: str | None = None
+    flags: FlagFinding | None = None
+    declared_events: dict = field(default_factory=dict)  # native -> canonical
+    registered: bool | None = None  # scan: hooks present in the CLI's config?
+    captured_events: list[EventCapture] = field(default_factory=list)  # probe
+    transcript: TranscriptFinding | None = None
+    tokens: TokenSchema | None = None
+    warnings: list[str] = field(default_factory=list)
+    next_steps: list[str] = field(default_factory=list)
+
+
+@dataclass
+class Hints:
+    binary: str | None = None
+    transcript: str | None = None
+    session_dir: str | None = None
+    model: str | None = None
+
+
+# ------------------------------------------------------------ version / help
+
+
+def _run_capture(argv: list[str], timeout_s: float) -> str | None:
+    try:
+        proc = subprocess.run(argv, capture_output=True, text=True, timeout=timeout_s)
+    except (OSError, subprocess.SubprocessError):
+        return None
+    out = (proc.stdout or "") + (proc.stderr or "")
+    return out.strip() or None
+
+
+def run_version_help(binary: str, timeout_s: float = 10) -> FlagFinding:
+    """Scrubbed ``--version``/``--help`` for a binary. Never raises."""
+    if not shutil.which(binary):
+        return FlagFinding(binary=binary, found=False)
+    version = _run_capture([binary, "--version"], timeout_s)
+    help_txt = _run_capture([binary, "--help"], timeout_s)
+    return FlagFinding(
+        binary=binary,
+        found=True,
+        version=sanitize.scrub_text(version, max_lines=5) if version else None,
+        help=sanitize.scrub_text(help_txt, max_lines=80) if help_txt else None,
+    )
+
+
+# ------------------------------------------------------ transcript discovery
+
+
+def _redact_location(path: Path) -> str:
+    """Redact a path to a paste-safe form: home -> ``~``, and any path component
+    that isn't a plain machine identifier (e.g. a munged-cwd dir that embeds a
+    username) -> ``<redacted>``. The session-id filename usually survives."""
+
+    def comp(c: str) -> str:
+        return c if sanitize.looks_like_identifier(c) else "<redacted>"
+
+    home = Path(os.path.expanduser("~"))
+    try:
+        rel = path.relative_to(home)
+        return "/".join(["~", *(comp(c) for c in rel.parts)])
+    except ValueError:
+        parts = [comp(c) for c in path.parts if c not in ("/", "")]
+        return "/" + "/".join(parts)
+
+
+def _describe_transcript(path: Path, *, glob_pat: str | None, multiple: bool) -> TranscriptFinding:
+    try:
+        stat = path.stat()
+        size = stat.st_size
+        mtime_date = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).strftime("%Y-%m-%d")
+    except OSError:
+        size, mtime_date = None, None
+    line_count = None
+    try:
+        with path.open(encoding="utf-8", errors="replace") as f:
+            line_count = sum(1 for _ in f)
+    except OSError:
+        pass
+    return TranscriptFinding(
+        glob=glob_pat,
+        location=_redact_location(path),
+        fmt="jsonl" if path.suffix == ".jsonl" else (path.suffix.lstrip(".") or "unknown"),
+        size_bytes=size,
+        line_count=line_count,
+        mtime_date=mtime_date,
+        multiple=multiple,
+        real_path=path,
+    )
+
+
+def _newest(paths: list[Path]) -> Path:
+    return max(paths, key=lambda p: p.stat().st_mtime if p.exists() else 0)
+
+
+def discover_transcript(
+    parser: str,
+    *,
+    cli: str,
+    hints: Hints,
+) -> TranscriptFinding | None:
+    """Locate the newest existing transcript via override or convention glob."""
+    if hints.transcript:
+        path = Path(hints.transcript).expanduser()
+        if not path.is_file():
+            return TranscriptFinding(note=f"--transcript path does not exist: {path.name}")
+        return _describe_transcript(path, glob_pat=None, multiple=False)
+
+    if hints.session_dir:
+        base = Path(hints.session_dir).expanduser()
+        matches = sorted(base.glob("**/*.jsonl")) or sorted(base.glob("**/*.json"))
+        if not matches:
+            return TranscriptFinding(note=f"no *.jsonl/*.json under --session-dir {base.name}")
+        return _describe_transcript(_newest(matches), glob_pat=None, multiple=len(matches) > 1)
+
+    pattern = TRANSCRIPT_GLOBS.get(parser) or FAMILY_GLOBS.get(cli)
+    if not pattern:
+        return TranscriptFinding(
+            note="no transcript-location convention for this CLI; "
+            "pass --transcript PATH or --session-dir DIR"
+        )
+    matches = [Path(p) for p in glob.glob(os.path.expanduser(pattern))]
+    matches = [p for p in matches if p.is_file()]
+    if not matches:
+        return TranscriptFinding(
+            glob=pattern,
+            note="no existing transcript matched the convention glob; "
+            "use --transcript / --session-dir, or run --probe",
+        )
+    return _describe_transcript(_newest(matches), glob_pat=pattern, multiple=len(matches) > 1)
+
+
+# ---------------------------------------------------------- schema inference
+
+
+def _type_name(value) -> str:
+    if value is None:
+        return "null"
+    if isinstance(value, bool):
+        return "bool"
+    if isinstance(value, int):
+        return "int"
+    if isinstance(value, float):
+        return "float"
+    if isinstance(value, str):
+        return "str"
+    return "other"
+
+
+def _walk_paths(obj, prefix: str, out: set[str]) -> None:
+    """Collect dotted key paths with the LEAF TYPE only (never values); list
+    indices collapse to ``[]`` so ``messages[].tokens.input:int`` is one path.
+
+    A dict key that isn't a plain identifier (e.g. a transcript that keys by
+    relative file path or a per-file backup id) is collapsed to ``<key>`` —
+    static field names (the ones a parser keys on, like ``input_tokens``) survive
+    untouched, but dynamic keys can't leak paths/content into the summary."""
+    if isinstance(obj, dict):
+        for key, value in obj.items():
+            key = str(key) if sanitize.looks_like_identifier(str(key)) else "<key>"
+            child = f"{prefix}.{key}" if prefix else key
+            _walk_paths(value, child, out)
+    elif isinstance(obj, list):
+        child = f"{prefix}[]"
+        for value in obj:
+            _walk_paths(value, child, out)
+    else:
+        out.add(f"{prefix}:{_type_name(obj)}")
+
+
+def _is_token_candidate(path: str) -> bool:
+    name, _, typ = path.rpartition(":")
+    if typ != "int":
+        return False
+    last = name.split(".")[-1].replace("[]", "")
+    return bool(_TOKEN_KEY_RE.search(last))
+
+
+def infer_token_schema(
+    parser: str, path: Path, *, max_entries: int = MAX_SCHEMA_ENTRIES
+) -> TokenSchema:
+    """Structural key-path summary (types only) + token-field candidates.
+
+    Works even when ``parser == "none"``: the candidates are exactly what a
+    maintainer needs to write a parser for a brand-new CLI. When a real parser
+    exists, its parsed integer counts are included as a self-check.
+    """
+    paths: set[str] = set()
+    scanned = 0
+    for entry in _jsonl_entries(path):
+        if scanned >= max_entries:
+            break
+        scanned += 1
+        _walk_paths(entry, "", paths)
+    candidates = sorted(p for p in paths if _is_token_candidate(p))
+    parsed = None
+    if parser != "none":
+        usage = read_usage(parser, path)
+        if usage is not None:
+            parsed = usage.to_dict()
+    return TokenSchema(
+        parser=parser,
+        entries_scanned=scanned,
+        parsed_usage=parsed,
+        key_paths=sorted(paths),
+        token_field_candidates=candidates,
+    )
+
+
+# --------------------------------------------------------------- hook config
+
+
+def _hooks_registered(project: Path, profile: CLIProfile) -> bool:
+    config_path = project / profile.hooks.config_path
+    if not config_path.is_file():
+        return False
+    import json
+
+    try:
+        hooks = json.loads(config_path.read_text(encoding="utf-8")).get("hooks", {})
+    except (json.JSONDecodeError, OSError):
+        return False
+    return any(
+        "bmad_auto_hook" in json.dumps(hooks.get(event, [])) for event in profile.hooks.events
+    )
+
+
+# ----------------------------------------------------------------- SCAN mode
+
+
+def scan(
+    *,
+    cli: str,
+    profile: CLIProfile | None,
+    project: Path,
+    hints: Hints,
+) -> ProfileFinding:
+    binary = hints.binary or (profile.binary if profile else cli)
+    parser = profile.usage_parser if profile else "none"
+    finding = ProfileFinding(
+        cli=cli,
+        mode="scan",
+        known_profile=profile is not None,
+        binary=binary,
+        parser=parser,
+        dialect=profile.hooks.dialect if profile else None,
+        declared_events=dict(profile.hooks.events) if profile else {},
+    )
+
+    finding.flags = run_version_help(binary)
+    if not finding.flags.found:
+        finding.warnings.append(
+            f"binary {binary!r} not found on PATH — version/help unavailable "
+            "(scan continues from on-disk conventions)"
+        )
+
+    if profile is not None:
+        finding.registered = _hooks_registered(project, profile)
+        if not finding.registered:
+            finding.next_steps.append(
+                f"hooks not registered in {profile.hooks.config_path}; "
+                f"`bmad-auto init --cli {cli}` to validate the dialect end-to-end, "
+                "or re-run with --probe"
+            )
+
+    finding.transcript = discover_transcript(parser, cli=cli, hints=hints)
+    if finding.transcript and finding.transcript.note:
+        finding.warnings.append(finding.transcript.note)
+    if finding.transcript and finding.transcript.real_path is not None:
+        finding.tokens = infer_token_schema(parser, finding.transcript.real_path)
+        if finding.transcript.multiple:
+            finding.next_steps.append(
+                "multiple fresh transcripts matched; pass --transcript to pin the right one"
+            )
+    return finding
+
+
+# ---------------------------------------------------------- PROBE tmux launcher
+
+
+class _ProbeLauncher:
+    """The few tmux primitives PROBE needs — deliberately NOT GenericTmuxAdapter,
+    which mandates a Policy and story-completion logic irrelevant here."""
+
+    def __init__(self, session_name: str):
+        self.session_name = session_name
+
+    def _tmux(self, *args: str) -> subprocess.CompletedProcess:
+        return subprocess.run(
+            ["tmux", *args], capture_output=True, text=True, timeout=TMUX_TIMEOUT_S
+        )
+
+    def start(self, argv: list[str], env: dict[str, str], cwd: Path, log_file: Path) -> str | None:
+        new = self._tmux(
+            "new-session", "-d", "-s", self.session_name, "-c", str(cwd), "-x", "220", "-y", "50"
+        )
+        if new.returncode != 0:
+            return None
+        env_args: list[str] = []
+        for key, value in env.items():
+            env_args += ["-e", f"{key}={value}"]
+        command = " ".join(shlex.quote(a) for a in argv)
+        win = self._tmux(
+            "new-window",
+            "-t",
+            f"={self.session_name}:",
+            "-c",
+            str(cwd),
+            "-P",
+            "-F",
+            "#{window_id}",
+            *env_args,
+            command,
+        )
+        if win.returncode != 0:
+            return None
+        window_id = win.stdout.strip()
+        # pipe-pane may race a window that dies instantly; tolerate failure.
+        self._tmux("pipe-pane", "-t", window_id, "-o", f"cat >> {shlex.quote(str(log_file))}")
+        return window_id
+
+    def window_alive(self, window_id: str) -> bool:
+        probe = self._tmux("list-windows", "-t", f"={self.session_name}", "-F", "#{window_id}")
+        if probe.returncode != 0:
+            return False
+        return window_id in probe.stdout.split()
+
+    def kill(self) -> None:
+        self._tmux("kill-session", "-t", f"={self.session_name}")
+
+
+def _probe_argv(profile: CLIProfile, binary: str, hints: Hints) -> list[str]:
+    argv = [
+        binary,
+        *profile.launch_args,
+        profile.render_prompt(PROBE_PROMPT),
+        *profile.bypass_args,
+    ]
+    if hints.model:
+        argv += [profile.model_flag, hints.model]
+    return argv
+
+
+def _collect_captures(capture_dir: Path, events_map: dict[str, str]) -> list[EventCapture]:
+    captures: list[EventCapture] = []
+    for payload_file in sorted(capture_dir.glob("*.payload.json")):
+        import json
+
+        try:
+            raw = json.loads(payload_file.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            continue
+        if not isinstance(raw, dict):
+            continue
+        native = str(raw.pop("argv_event", "Unknown"))
+        captures.append(
+            EventCapture(
+                native_event=native,
+                canonical_event=events_map.get(native),
+                payload_keys=sorted(raw.keys()),
+                payload=sanitize.scrub_event_payload(raw),
+            )
+        )
+    return captures
+
+
+def probe(
+    *,
+    cli: str,
+    profile: CLIProfile,
+    project: Path,
+    hints: Hints,
+    timeout_s: float = 90,
+    keep_temp: bool = False,
+) -> ProfileFinding:
+    import json
+
+    binary = hints.binary or profile.binary
+    finding = ProfileFinding(
+        cli=cli,
+        mode="probe",
+        known_profile=True,
+        binary=binary,
+        parser=profile.usage_parser,
+        dialect=profile.hooks.dialect,
+        declared_events=dict(profile.hooks.events),
+    )
+    finding.flags = run_version_help(binary)
+
+    if not shutil.which("tmux") or not shutil.which(binary):
+        missing = "tmux" if not shutil.which("tmux") else binary
+        finding.warnings.append(f"{missing} not on PATH — cannot probe; falling back to scan")
+        scanned = scan(cli=cli, profile=profile, project=project, hints=hints)
+        scanned.mode = "probe"
+        return scanned
+
+    tmpdir = Path(tempfile.mkdtemp(prefix="bmad-auto-probe-"))
+    launcher = _ProbeLauncher(session_name=f"bmad-auto-probe-{tmpdir.name}")
+    try:
+        capture_dir = tmpdir / "capture"
+        capture_dir.mkdir(parents=True, exist_ok=True)
+
+        # 1. lay down the capture hook + a hook config registered through the very
+        #    same merge_hooks `bmad-auto init` uses — so a bad dialect surfaces live.
+        hook_src = resources.files("automator.data").joinpath(PROBE_HOOK_NAME)
+        hook_path = tmpdir / PROBE_HOOK_NAME
+        hook_path.write_text(hook_src.read_text(encoding="utf-8"), encoding="utf-8")
+        registrations = {
+            native: f"python3 {shlex.quote(str(hook_path))} {canonical}"
+            for native, canonical in profile.hooks.events.items()
+        }
+        config, _ = merge_hooks({}, registrations, profile.hooks.dialect)
+        config_path = tmpdir / profile.hooks.config_path
+        config_path.parent.mkdir(parents=True, exist_ok=True)
+        config_path.write_text(json.dumps(config, indent=2) + "\n", encoding="utf-8")
+
+        # 2. launch one trivial content-free turn in a fresh tmux window
+        argv = _probe_argv(profile, binary, hints)
+        env = {
+            **profile.env,
+            "BMAD_AUTO_RUN_DIR": str(tmpdir),
+            "BMAD_AUTO_TASK_ID": PROBE_TASK_ID,
+            "BMAD_AUTO_PROBE_CAPTURE_DIR": str(capture_dir),
+        }
+        log_file = tmpdir / "probe.log"
+        watcher = SignalWatcher(capture_dir)
+        launched_ns = time.time_ns()
+        window_id = launcher.start(argv, env, tmpdir, log_file)
+        if window_id is None:
+            finding.warnings.append("could not launch the CLI in tmux; no events captured")
+            return finding
+
+        # 3. completion: first of — canonical Stop for `probe`; any capture file
+        #    appeared and the window died; window died; deadline.
+        deadline = time.monotonic() + timeout_s
+        while True:
+            remaining = deadline - time.monotonic()
+            if remaining <= 0:
+                finding.warnings.append(
+                    "no Stop event before --timeout; the CLI may need first-run auth "
+                    "(a pending login dialog reads as a timeout). See the log tail below."
+                )
+                break
+            event = watcher.wait_for(
+                PROBE_TASK_ID,
+                {"Stop"},
+                timeout_s=min(remaining, 5.0),
+                since_ns=launched_ns,
+            )
+            if event is not None:
+                break
+            alive = launcher.window_alive(window_id)
+            captured_any = any(capture_dir.glob("*.payload.json"))
+            if not alive:
+                if not captured_any:
+                    finding.warnings.append(
+                        "the CLI window died before any hook fired — the dialect may be "
+                        f"rejected for {profile.hooks.dialect}, or launch/auth failed. "
+                        "See the log tail below."
+                    )
+                break
+
+        # 4. one short grace poll so a Stop's sibling files all land, then collect.
+        time.sleep(PROBE_GRACE_S)
+        finding.captured_events = _collect_captures(capture_dir, profile.hooks.events)
+        if not finding.captured_events:
+            finding.next_steps.append(
+                "no hook payloads captured — confirm the CLI is authenticated and that "
+                f"the {profile.hooks.dialect} hook config is accepted, then re-run --probe"
+            )
+            tail = _log_tail(log_file)
+            if tail:
+                finding.warnings.append("log tail (scrubbed):\n" + tail)
+
+        # 5. transcript discovery + schema inference from the user's real home
+        finding.transcript = discover_transcript(profile.usage_parser, cli=cli, hints=hints)
+        if finding.transcript and finding.transcript.note:
+            finding.warnings.append(finding.transcript.note)
+        if finding.transcript and finding.transcript.real_path is not None:
+            finding.tokens = infer_token_schema(profile.usage_parser, finding.transcript.real_path)
+        return finding
+    finally:
+        launcher.kill()
+        if keep_temp:
+            finding.warnings.append(
+                f"--keep-temp: RAW probe data retained at {tmpdir} — DO NOT SHARE; "
+                "delete it after inspection"
+            )
+        else:
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+def _log_tail(log_file: Path, max_lines: int = 20) -> str | None:
+    try:
+        text = log_file.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return None
+    if not text.strip():
+        return None
+    lines = text.splitlines()[-max_lines:]
+    return sanitize.scrub_text("\n".join(lines), max_lines=max_lines)
+
+
+# ------------------------------------------------------------------ rendering
+
+
+def _fmt_kv(label: str, value) -> str:
+    return f"- **{label}:** {value}"
+
+
+def render_markdown(f: ProfileFinding) -> str:
+    out: list[str] = []
+    out.append(f"# Profile finalize report — {f.cli} ({f.mode})")
+    out.append("")
+
+    # Summary
+    out.append("## Summary")
+    out.append(_fmt_kv("CLI", f.cli))
+    out.append(
+        _fmt_kv("binary", f"{f.binary} ({'found' if f.flags and f.flags.found else 'NOT found'})")
+    )
+    out.append(_fmt_kv("known profile", "yes" if f.known_profile else "no (reduced report)"))
+    out.append(_fmt_kv("hook dialect", f.dialect or "—"))
+    out.append(_fmt_kv("usage_parser", f.parser))
+    if f.registered is not None:
+        out.append(_fmt_kv("hooks registered", "yes" if f.registered else "no"))
+    out.append(_fmt_kv("warnings", str(len(f.warnings))))
+    out.append("")
+
+    # CLI flags
+    out.append("## CLI flags")
+    out.append(_fmt_kv("launch_args / bypass_args", "see profile (rendered verbatim below)"))
+    if f.flags and f.flags.version:
+        out.append("\n```\n" + f.flags.version + "\n```")
+    if f.flags and f.flags.help:
+        out.append("\n<details><summary>--help (scrubbed)</summary>\n")
+        out.append("```\n" + f.flags.help + "\n```")
+        out.append("</details>")
+    if not f.flags or not f.flags.found:
+        out.append("_binary not available; flags/help not captured._")
+    out.append("")
+
+    # Hook payload shape
+    out.append("## Hook payload shape")
+    if f.mode == "scan":
+        if f.declared_events:
+            out.append(
+                "Declared native → canonical events (registered = "
+                f"{'yes' if f.registered else 'no'}):"
+            )
+            for native, canonical in f.declared_events.items():
+                out.append(f"- `{native}` → `{canonical}`")
+        else:
+            out.append("_no profile; events unknown. Re-run with --probe to capture payloads._")
+    else:
+        if f.captured_events:
+            for ev in f.captured_events:
+                out.append(f"### `{ev.native_event}` → `{ev.canonical_event or '?'}`")
+                out.append(
+                    _fmt_kv("payload keys", ", ".join(f"`{k}`" for k in ev.payload_keys) or "—")
+                )
+                out.append("\n```json\n" + _json_dump(ev.payload) + "\n```")
+        else:
+            out.append("_no hook payloads captured (see warnings)._")
+    out.append("")
+
+    # Transcript
+    out.append("## Transcript")
+    t = f.transcript
+    if t and t.real_path is not None:
+        out.append(_fmt_kv("location", f"`{t.location}`"))
+        if t.glob:
+            out.append(_fmt_kv("matched glob", f"`{t.glob}`"))
+        out.append(_fmt_kv("format", t.fmt))
+        out.append(_fmt_kv("size", f"{t.size_bytes} bytes"))
+        out.append(_fmt_kv("lines", t.line_count))
+        out.append(_fmt_kv("mtime", t.mtime_date))
+        if t.multiple:
+            out.append("- _multiple candidates matched; newest shown — pass --transcript to pin._")
+    else:
+        out.append("_no transcript located._" + (f" ({t.note})" if t and t.note else ""))
+    out.append("")
+
+    # Token usage schema
+    out.append("## Token usage schema")
+    tk = f.tokens
+    if tk:
+        out.append(_fmt_kv("declared parser", tk.parser))
+        out.append(_fmt_kv("entries scanned", tk.entries_scanned))
+        if tk.parsed_usage is not None:
+            out.append(_fmt_kv("parsed counts (self-check)", f"`{tk.parsed_usage}`"))
+        out.append(
+            "\n**Token-field candidates** (int leaves; per-call-vs-cumulative is a human call):"
+        )
+        if tk.token_field_candidates:
+            for cand in tk.token_field_candidates:
+                out.append(f"- `{cand}`")
+        else:
+            out.append("- _none matched the token-name heuristic._")
+        out.append("\n<details><summary>All key paths (types only, no values)</summary>\n")
+        out.append("```\n" + "\n".join(tk.key_paths) + "\n```")
+        out.append("</details>")
+    else:
+        out.append("_no transcript to infer from._")
+    out.append("")
+
+    # Warnings / next steps
+    out.append("## Warnings / next steps")
+    if not f.warnings and not f.next_steps:
+        out.append("_none._")
+    for w in f.warnings:
+        out.append(f"- ⚠️ {w}")
+    for s in f.next_steps:
+        out.append(f"- → {s}")
+    out.append("")
+    return "\n".join(out)
+
+
+def _json_dump(obj) -> str:
+    import json
+
+    return json.dumps(obj, indent=2, sort_keys=True)
+
+
+def render_json(f: ProfileFinding) -> str:
+    import json
+
+    def transcript_dict(t: TranscriptFinding | None):
+        if t is None:
+            return None
+        return {
+            "glob": t.glob,
+            "location": t.location,
+            "format": t.fmt,
+            "size_bytes": t.size_bytes,
+            "line_count": t.line_count,
+            "mtime_date": t.mtime_date,
+            "multiple": t.multiple,
+            "note": t.note,
+        }
+
+    data = {
+        "cli": f.cli,
+        "mode": f.mode,
+        "known_profile": f.known_profile,
+        "binary": f.binary,
+        "binary_found": bool(f.flags and f.flags.found),
+        "dialect": f.dialect,
+        "usage_parser": f.parser,
+        "hooks_registered": f.registered,
+        "declared_events": f.declared_events,
+        "version": f.flags.version if f.flags else None,
+        "help": f.flags.help if f.flags else None,
+        "captured_events": [
+            {
+                "native_event": ev.native_event,
+                "canonical_event": ev.canonical_event,
+                "payload_keys": ev.payload_keys,
+                "payload": ev.payload,
+            }
+            for ev in f.captured_events
+        ],
+        "transcript": transcript_dict(f.transcript),
+        "tokens": (
+            {
+                "parser": f.tokens.parser,
+                "entries_scanned": f.tokens.entries_scanned,
+                "parsed_usage": f.tokens.parsed_usage,
+                "key_paths": f.tokens.key_paths,
+                "token_field_candidates": f.tokens.token_field_candidates,
+            }
+            if f.tokens
+            else None
+        ),
+        "warnings": f.warnings,
+        "next_steps": f.next_steps,
+    }
+    return json.dumps(data, indent=2)
diff --git a/src/automator/sanitize.py b/src/automator/sanitize.py
new file mode 100644
index 0000000..8f583ab
--- /dev/null
+++ b/src/automator/sanitize.py
@@ -0,0 +1,104 @@
+"""PII-scrubbing chokepoint for `bmad-auto probe-adapter`.
+
+Pure stdlib, no automator imports — the single audited place that decides what
+data from a foreign CLI is safe to show a maintainer. The probe command routes
+every captured payload, every help/version blob, and every discovered path
+through here before rendering; nothing is displayed raw.
+
+Guarantees:
+- token *counts* are non-PII, so numbers/bools/null pass through verbatim;
+- dict **keys** are kept verbatim — field names/casing are the whole point of a
+  payload probe — but every leaf **string** is `$HOME`-redacted and then kept
+  ONLY if it matches a conservative identifier shape (a short slug with no
+  spaces / `@` / `/`, e.g. ``claude-opus-4-8`` or ``session-abc_123``);
+  anything else (prose, code, paths, emails) becomes ``<redacted:str>``;
+- list lengths are preserved (the count is structural, the contents aren't);
+- recursion is depth-guarded so a pathological payload can't blow the stack.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import Any
+
+# A conservative "this is a machine identifier, not prose or PII" shape: starts
+# alphanumeric, then only word-ish chars (letters, digits, ``.`` ``_`` ``-``),
+# bounded length. No spaces, no ``@``, no ``/`` — so emails, paths, and sentences
+# can never satisfy it. Model ids and session/conversation ids do.
+_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
+_IDENTIFIER_MAX = 80
+
+_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
+
+_REDACTED_STR = "<redacted:str>"
+_REDACTED_EMAIL = "<redacted:email>"
+_REDACTED_DEPTH = "<redacted:depth>"
+
+
+def _home() -> str:
+    home = os.path.expanduser("~")
+    return home if home and home != "~" else ""
+
+
+def redact_home(s: str) -> str:
+    """Replace the current user's home directory prefix with ``~``.
+
+    Catches the literal expanded home (``/home/alice`` -> ``~``); the munged,
+    slash-stripped forms some CLIs use for directory names (``-home-alice-...``)
+    do not match a path and are handled by the identifier filter instead.
+    """
+    home = _home()
+    if home and home != "/" and home in s:
+        s = s.replace(home, "~")
+    return s
+
+
+def looks_like_identifier(s: str) -> bool:
+    """True for a short machine slug safe to surface verbatim (no PII)."""
+    return 0 < len(s) <= _IDENTIFIER_MAX and bool(_IDENTIFIER_RE.match(s))
+
+
+def scrub_text(s: str, *, max_lines: int | None = None) -> str:
+    """Sanitize free text (a CLI's ``--help`` / ``--version`` / a log tail).
+
+    Less aggressive than :func:`scrub_json` — help text is the CLI's own and
+    flag lines must survive — so we only redact the home dir and any emails,
+    then optionally cap the line count.
+    """
+    s = redact_home(s)
+    s = _EMAIL_RE.sub(_REDACTED_EMAIL, s)
+    if max_lines is not None:
+        lines = s.splitlines()
+        if len(lines) > max_lines:
+            dropped = len(lines) - max_lines
+            lines = lines[:max_lines] + [f"… ({dropped} more lines redacted)"]
+        s = "\n".join(lines)
+    return s
+
+
+def _scrub(obj: Any, depth: int, max_depth: int) -> Any:
+    if depth > max_depth:
+        return _REDACTED_DEPTH
+    # bool is an int subclass — handled by the numeric branch; both pass through.
+    if obj is None or isinstance(obj, (bool, int, float)):
+        return obj
+    if isinstance(obj, str):
+        red = redact_home(obj)
+        return red if looks_like_identifier(red) else _REDACTED_STR
+    if isinstance(obj, dict):
+        return {str(k): _scrub(v, depth + 1, max_depth) for k, v in obj.items()}
+    if isinstance(obj, (list, tuple)):
+        return [_scrub(v, depth + 1, max_depth) for v in obj]
+    # any other type (shouldn't appear in JSON) is treated as an opaque string
+    return _REDACTED_STR
+
+
+def scrub_json(obj: Any, *, max_depth: int = 40) -> Any:
+    """Recursively sanitize a JSON-shaped value (see module docstring)."""
+    return _scrub(obj, 0, max_depth)
+
+
+def scrub_event_payload(payload: Any) -> Any:
+    """Sanitize one captured hook payload — the probe's per-event chokepoint."""
+    return scrub_json(payload)
diff --git a/tests/test_install.py b/tests/test_install.py
index ec1041d..cdc6c85 100644
--- a/tests/test_install.py
+++ b/tests/test_install.py
@@ -60,6 +60,55 @@ def test_merge_hooks_gemini_entry_shape():
     assert handler["command"].endswith("bmad_auto_hook.py Stop")
 
 
+def test_merge_hooks_copilot_entry_shape():
+    profile = get_profile("copilot")
+    settings, _ = merge_hooks({}, _registrations(profile), profile.hooks.dialect)
+    assert settings["version"] == 1  # Copilot hook configs are versioned
+    # Copilot stores the handler dict directly in the event list (no "hooks" wrapper)
+    handler = settings["hooks"]["Stop"][0]
+    assert handler["type"] == "command"
+    assert handler["timeoutSec"] == 60  # Copilot hook timeouts are seconds
+    # registered under the native event but relaying the canonical name
+    assert handler["command"].endswith("bmad_auto_hook.py Stop")
+
+
+def test_merge_hooks_copilot_idempotent():
+    # the bare-handler shape must still dedupe on a re-run
+    profile = get_profile("copilot")
+    settings, _ = merge_hooks({}, _registrations(profile), profile.hooks.dialect)
+    again, changed = merge_hooks(settings, _registrations(profile), profile.hooks.dialect)
+    assert not changed
+    for event in profile.hooks.events:
+        assert len(again["hooks"][event]) == 1
+
+
+def test_copilot_profile_render_prompt():
+    # {skill} must expand plainly (no codex-style $ prefix) into the SKILL.md path
+    profile = get_profile("copilot")
+    rendered = profile.render_prompt("/bmad-auto-dev 1-2-a")
+    assert ".agents/skills/bmad-auto-dev/SKILL.md" in rendered
+    assert "1-2-a" in rendered
+
+
+def test_install_into_copilot(tmp_path):
+    assert install_into(tmp_path, clis=("copilot",)) == 0
+    settings = json.loads((tmp_path / ".github" / "copilot" / "settings.json").read_text())
+    assert settings["version"] == 1
+    # registered under VS Code-compatible PascalCase names (snake_case payloads)
+    assert set(settings["hooks"]) == {"Stop", "SessionStart", "SessionEnd", "PreCompact"}
+    cmd = settings["hooks"]["Stop"][0]["command"]
+    # absolute path baked in (no $CLAUDE_PROJECT_DIR equivalent in copilot)
+    assert str(tmp_path.resolve()) in cmd and cmd.endswith(" Stop")
+    # skills land in the shared .agents/skills tree
+    for skill in MODULE_SKILLS:
+        assert (tmp_path / ".agents" / "skills" / skill / "SKILL.md").is_file()
+
+    # idempotent re-run does not duplicate the bare handler
+    assert install_into(tmp_path, clis=("copilot",)) == 0
+    settings = json.loads((tmp_path / ".github" / "copilot" / "settings.json").read_text())
+    assert len(settings["hooks"]["Stop"]) == 1
+
+
 def test_install_into_full(tmp_path):
     assert install_into(tmp_path) == 0
     assert (tmp_path / ".automator" / "bmad_auto_hook.py").is_file()
diff --git a/tests/test_probe.py b/tests/test_probe.py
new file mode 100644
index 0000000..b8a122d
--- /dev/null
+++ b/tests/test_probe.py
@@ -0,0 +1,271 @@
+"""SCAN machinery: transcript discovery, schema inference, registration,
+CLI plumbing, and end-to-end scrub-through. No live CLI required."""
+
+import json
+
+import pytest
+
+from automator import cli, probe
+from automator.adapters.profile import get_profile
+
+# ----------------------------------------------------------- fixtures / helpers
+
+
+def _write_jsonl(path, rows):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text("\n".join(json.dumps(r) for r in rows) + "\n", encoding="utf-8")
+    return path
+
+
+CLAUDE_ROWS = [
+    {"type": "assistant", "message": {"usage": {"input_tokens": 100, "output_tokens": 50}}},
+    {
+        "type": "assistant",
+        "message": {
+            "usage": {
+                "input_tokens": 10,
+                "output_tokens": 5,
+                "cache_read_input_tokens": 2000,
+                "cache_creation_input_tokens": 300,
+            }
+        },
+    },
+]
+
+CODEX_ROWS = [
+    {
+        "type": "event_msg",
+        "payload": {
+            "type": "token_count",
+            "info": {
+                "total_token_usage": {
+                    "input_tokens": 500,
+                    "cached_input_tokens": 200,
+                    "output_tokens": 60,
+                }
+            },
+        },
+    },
+]
+
+GEMINI_ROWS = [
+    {"$set": {"messages": [{"id": "u1", "type": "user", "content": []}]}},
+    {"id": "g1", "type": "gemini", "tokens": {"input": 12273, "output": 45, "cached": 0}},
+]
+
+
+# ----------------------------------------------------------- token inference
+
+
+def test_infer_claude_candidates_and_self_check(tmp_path):
+    path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS)
+    schema = probe.infer_token_schema("claude-jsonl", path)
+    assert "message.usage.input_tokens:int" in schema.token_field_candidates
+    assert "message.usage.output_tokens:int" in schema.token_field_candidates
+    # parsed self-check matches the real parser
+    assert schema.parsed_usage == {
+        "input_tokens": 110,
+        "output_tokens": 55,
+        "cache_read_tokens": 2000,
+        "cache_creation_tokens": 300,
+    }
+
+
+def test_infer_codex_nested_candidates(tmp_path):
+    path = _write_jsonl(tmp_path / "r.jsonl", CODEX_ROWS)
+    schema = probe.infer_token_schema("codex-rollout", path)
+    assert "payload.info.total_token_usage.input_tokens:int" in schema.token_field_candidates
+    assert schema.parsed_usage["output_tokens"] == 60
+
+
+def test_infer_gemini_list_paths_collapse(tmp_path):
+    path = _write_jsonl(tmp_path / "s.jsonl", GEMINI_ROWS)
+    schema = probe.infer_token_schema("gemini-chat", path)
+    # list indices collapse to [] so the per-message tokens are one path
+    assert any(
+        "$set.messages[].tokens.input:int" == p for p in schema.token_field_candidates
+    ) or any("tokens.input:int" in p for p in schema.token_field_candidates)
+
+
+def test_key_paths_carry_types_never_values(tmp_path):
+    path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS)
+    schema = probe.infer_token_schema("claude-jsonl", path)
+    blob = "\n".join(schema.key_paths)
+    # types appear, raw integer values never do
+    assert ":int" in blob
+    assert "100" not in blob and "2000" not in blob
+
+
+def test_infer_with_parser_none_still_finds_candidates(tmp_path):
+    path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS)
+    schema = probe.infer_token_schema("none", path)
+    assert schema.parsed_usage is None  # no parser to self-check
+    assert "message.usage.input_tokens:int" in schema.token_field_candidates
+
+
+# ----------------------------------------------------------- discovery
+
+
+def test_discover_picks_newest_mtime(tmp_path):
+    base = tmp_path / "sessions"
+    old = _write_jsonl(base / "old.jsonl", CLAUDE_ROWS)
+    new = _write_jsonl(base / "new.jsonl", CLAUDE_ROWS)
+    import os
+
+    os.utime(old, (1, 1))
+    os.utime(new, (10_000_000, 10_000_000))
+    hints = probe.Hints(session_dir=str(base))
+    found = probe.discover_transcript("none", cli="custom", hints=hints)
+    assert found.real_path == new
+    assert found.multiple is True
+
+
+def test_discover_transcript_override(tmp_path):
+    path = _write_jsonl(tmp_path / "exact.jsonl", CLAUDE_ROWS)
+    found = probe.discover_transcript(
+        "claude-jsonl", cli="claude", hints=probe.Hints(transcript=str(path))
+    )
+    assert found.real_path == path
+    assert found.location and "exact.jsonl" in found.location
+
+
+def test_discover_missing_override_notes(tmp_path):
+    found = probe.discover_transcript(
+        "claude-jsonl", cli="claude", hints=probe.Hints(transcript=str(tmp_path / "nope.jsonl"))
+    )
+    assert found.real_path is None
+    assert "does not exist" in found.note
+
+
+def test_discover_location_redacts_username(tmp_path, monkeypatch):
+    # a munged-cwd dir embedding a username must not survive verbatim
+    monkeypatch.setenv("HOME", str(tmp_path))
+    path = _write_jsonl(tmp_path / ".secret-home-dir" / "abc-123.jsonl", CLAUDE_ROWS)
+    found = probe.discover_transcript("none", cli="x", hints=probe.Hints(transcript=str(path)))
+    assert found.location.startswith("~/")
+    assert ".secret-home-dir" not in found.location
+    assert "abc-123.jsonl" in found.location  # the id-like filename survives
+
+
+# ----------------------------------------------------------- registration
+
+
+@pytest.mark.parametrize("dialect_cli", ["claude", "codex", "gemini", "copilot"])
+def test_probe_hook_registers_under_native_events(dialect_cli):
+    from automator.install import merge_hooks
+
+    profile = get_profile(dialect_cli)
+    registrations = {
+        native: f"python3 /tmp/bmad_auto_probe_hook.py {canonical}"
+        for native, canonical in profile.hooks.events.items()
+    }
+    config, changed = merge_hooks({}, registrations, profile.hooks.dialect)
+    assert changed
+    for native in profile.hooks.events:
+        assert native in config["hooks"]
+    # idempotent re-run
+    again, changed2 = merge_hooks(config, registrations, profile.hooks.dialect)
+    assert not changed2
+
+
+def test_scan_reports_registered_state(project):
+    proj = project.project
+    profile = get_profile("claude")
+    finding = probe.scan(cli="claude", profile=profile, project=proj, hints=probe.Hints())
+    assert finding.registered is False  # nothing installed in the sandbox
+    # now install hooks and re-scan
+    from automator.install import install_into
+
+    install_into(proj, clis=("claude",))
+    finding2 = probe.scan(cli="claude", profile=profile, project=proj, hints=probe.Hints())
+    assert finding2.registered is True
+
+
+# ----------------------------------------------------------- CLI plumbing
+
+
+def test_cli_scan_produces_sections(tmp_path, capsys):
+    path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS)
+    rc = cli.main(
+        ["probe-adapter", "claude", "--project", str(tmp_path), "--transcript", str(path)]
+    )
+    assert rc == 0
+    out = capsys.readouterr().out
+    assert "# Profile finalize report — claude (scan)" in out
+    assert "## Hook payload shape" in out
+    assert "## Token usage schema" in out
+    assert "message.usage.input_tokens:int" in out
+
+
+def test_cli_unknown_cli_without_binary_fails(tmp_path, capsys):
+    rc = cli.main(["probe-adapter", "no-such-cli", "--project", str(tmp_path)])
+    assert rc == 1
+    err = capsys.readouterr().err
+    assert "FAIL" in err
+
+
+def test_cli_unknown_cli_with_binary_reduced_report(tmp_path, capsys):
+    rc = cli.main(["probe-adapter", "no-such-cli", "--project", str(tmp_path), "--binary", "true"])
+    assert rc == 0
+    out = capsys.readouterr().out
+    assert "reduced report" in out or "no (reduced report)" in out
+
+
+def test_cli_out_writes_file(tmp_path):
+    path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS)
+    out_file = tmp_path / "report.md"
+    rc = cli.main(
+        [
+            "probe-adapter",
+            "claude",
+            "--project",
+            str(tmp_path),
+            "--transcript",
+            str(path),
+            "--out",
+            str(out_file),
+        ]
+    )
+    assert rc == 0
+    assert out_file.is_file()
+    assert "Profile finalize report" in out_file.read_text()
+
+
+def test_cli_json_block_appended(tmp_path, capsys):
+    path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS)
+    rc = cli.main(
+        ["probe-adapter", "claude", "--project", str(tmp_path), "--transcript", str(path), "--json"]
+    )
+    assert rc == 0
+    out = capsys.readouterr().out
+    assert "## JSON" in out
+    # the JSON block must parse
+    blob = out.split("```json", 1)[1].rsplit("```", 1)[0]
+    data = json.loads(blob)
+    assert data["cli"] == "claude" and data["mode"] == "scan"
+
+
+# ----------------------------------------------------------- scrub-through
+
+
+def test_scan_report_contains_no_pii(tmp_path, capsys, monkeypatch):
+    """A transcript carrying an email + a home path produces a report with neither."""
+    monkeypatch.setenv("HOME", str(tmp_path))
+    rows = [
+        {
+            "type": "assistant",
+            "author": "secret@example.com",
+            "cwd": f"{tmp_path}/private/project",
+            "message": {"usage": {"input_tokens": 7, "output_tokens": 3}},
+        }
+    ]
+    path = _write_jsonl(tmp_path / "t.jsonl", rows)
+    rc = cli.main(
+        ["probe-adapter", "claude", "--project", str(tmp_path), "--transcript", str(path), "--json"]
+    )
+    assert rc == 0
+    out = capsys.readouterr().out
+    assert "secret@example.com" not in out
+    assert "private/project" not in out
+    # but the token schema is still there
+    assert "message.usage.input_tokens:int" in out
diff --git a/tests/test_probe_hook.py b/tests/test_probe_hook.py
new file mode 100644
index 0000000..4f3258d
--- /dev/null
+++ b/tests/test_probe_hook.py
@@ -0,0 +1,84 @@
+"""The capture hook runs as a real subprocess, like the CLI runs it."""
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+SCRIPT = Path(__file__).parent.parent / "src" / "automator" / "data" / "bmad_auto_probe_hook.py"
+
+
+def run_hook(event: str, env: dict, payload) -> subprocess.CompletedProcess:
+    return subprocess.run(
+        [sys.executable, str(SCRIPT), event],
+        input=json.dumps(payload) if payload is not None else "",
+        env={"PATH": "/usr/bin:/bin", **env},
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+
+
+def test_noop_without_capture_dir(tmp_path):
+    proc = run_hook("Stop", {}, {"session_id": "s1"})
+    assert proc.returncode == 0
+    assert list(tmp_path.iterdir()) == []
+
+
+def test_writes_signal_and_payload(tmp_path):
+    capture = tmp_path / "capture"
+    env = {"BMAD_AUTO_PROBE_CAPTURE_DIR": str(capture), "BMAD_AUTO_TASK_ID": "probe"}
+    payload = {
+        "session_id": "abc-123",
+        "transcript_path": "/home/u/.copilot/x/events.jsonl",
+        "cwd": "/proj",
+        "extra": {"nested": "field"},
+    }
+    proc = run_hook("Stop", env, payload)
+    assert proc.returncode == 0
+
+    signals = list(capture.glob("*.signal.json"))
+    payloads = list(capture.glob("*.payload.json"))
+    assert len(signals) == 1 and len(payloads) == 1
+    assert "Stop" in signals[0].name and "Stop" in payloads[0].name
+
+    signal = json.loads(signals[0].read_text())
+    assert signal["event"] == "Stop"
+    assert signal["task_id"] == "probe"
+    assert signal["session_id"] == "abc-123"
+    assert signal["transcript_path"].endswith("events.jsonl")
+
+    captured = json.loads(payloads[0].read_text())
+    # the ENTIRE raw payload survives (un-sanitized; the command scrubs later)
+    assert captured["extra"] == {"nested": "field"}
+    assert captured["argv_event"] == "Stop"  # native event name for pairing
+    assert not list(capture.glob("*.tmp"))
+
+
+def test_conversation_id_fallback(tmp_path):
+    capture = tmp_path / "capture"
+    env = {"BMAD_AUTO_PROBE_CAPTURE_DIR": str(capture)}
+    proc = run_hook("Stop", env, {"conversation_id": "conv-9"})
+    assert proc.returncode == 0
+    signal = json.loads(next(capture.glob("*.signal.json")).read_text())
+    assert signal["session_id"] == "conv-9"
+    # task_id defaults when the env var is absent
+    assert signal["task_id"] == "probe"
+
+
+def test_tolerates_garbage_stdin(tmp_path):
+    capture = tmp_path / "capture"
+    env = {"BMAD_AUTO_PROBE_CAPTURE_DIR": str(capture)}
+    proc = run_hook("SessionStart", env, None)  # empty stdin
+    assert proc.returncode == 0
+    assert len(list(capture.glob("*.signal.json"))) == 1
+    captured = json.loads(next(capture.glob("*.payload.json")).read_text())
+    assert captured == {"argv_event": "SessionStart"}
+
+
+def test_installed_copy_matches_source(tmp_path):
+    # packaged alongside the real relay; importlib.resources resolves it
+    from importlib import resources
+
+    packaged = resources.files("automator.data").joinpath("bmad_auto_probe_hook.py")
+    assert packaged.read_text(encoding="utf-8") == SCRIPT.read_text(encoding="utf-8")
diff --git a/tests/test_sanitize.py b/tests/test_sanitize.py
new file mode 100644
index 0000000..ca873d7
--- /dev/null
+++ b/tests/test_sanitize.py
@@ -0,0 +1,126 @@
+"""The crown-jewel PII case table for the probe sanitizer."""
+
+import pytest
+
+from automator import sanitize
+
+
+@pytest.fixture
+def home(monkeypatch, tmp_path):
+    monkeypatch.setenv("HOME", str(tmp_path))
+    # os.path.expanduser reads HOME on POSIX; force a clean cache-free lookup
+    return str(tmp_path)
+
+
+# ------------------------------------------------------------- redact_home
+
+
+def test_redact_home_replaces_home_prefix(home):
+    assert sanitize.redact_home(f"{home}/.claude/x.jsonl") == "~/.claude/x.jsonl"
+
+
+def test_redact_home_noop_when_absent(home):
+    assert sanitize.redact_home("/etc/passwd") == "/etc/passwd"
+
+
+# ------------------------------------------------------- looks_like_identifier
+
+
+@pytest.mark.parametrize(
+    "value",
+    ["claude-opus-4-8", "session-abc_123", "Stop", "gpt-5-codex", "4.8", "abc123"],
+)
+def test_identifier_accepts_slugs(value):
+    assert sanitize.looks_like_identifier(value)
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        "",
+        "has spaces",
+        "user@example.com",
+        "/home/alice/x",
+        "a/b",
+        ".claude",  # leading dot is not alphanumeric
+        "x" * 200,  # too long to be a slug
+        "I am a sentence of prose.",
+    ],
+)
+def test_identifier_rejects_prose_paths_emails(value):
+    assert not sanitize.looks_like_identifier(value)
+
+
+# --------------------------------------------------------------- scrub_json
+
+
+def test_scrub_json_passes_numbers_bools_null():
+    obj = {"input_tokens": 123, "ratio": 1.5, "ok": True, "off": False, "none": None}
+    assert sanitize.scrub_json(obj) == obj
+
+
+def test_scrub_json_keeps_keys_verbatim_redacts_string_leaves(home):
+    obj = {
+        "session_id": "abc-123",  # identifier -> kept
+        "transcript_path": f"{home}/.claude/x.jsonl",  # path -> redacted
+        "email": "me@example.com",  # email -> redacted
+        "prose": "this is a free-form sentence",  # prose -> redacted
+        "model": "claude-opus-4-8",  # identifier -> kept
+    }
+    out = sanitize.scrub_json(obj)
+    assert set(out) == set(obj)  # keys kept verbatim
+    assert out["session_id"] == "abc-123"
+    assert out["model"] == "claude-opus-4-8"
+    assert out["transcript_path"] == "<redacted:str>"
+    assert out["email"] == "<redacted:str>"
+    assert out["prose"] == "<redacted:str>"
+
+
+def test_scrub_json_preserves_list_length_not_content():
+    out = sanitize.scrub_json({"items": ["a b c", "tok-1", 7]})
+    assert out["items"] == ["<redacted:str>", "tok-1", 7]
+
+
+def test_scrub_json_depth_guard():
+    obj = cur = {}
+    for _ in range(60):
+        cur["next"] = {}
+        cur = cur["next"]
+    cur["leaf"] = "deep"
+    out = sanitize.scrub_json(obj, max_depth=10)
+    # walk down to the guard
+    node = out
+    saw_guard = False
+    for _ in range(60):
+        if node == "<redacted:depth>":
+            saw_guard = True
+            break
+        node = node.get("next")
+        if node is None:
+            break
+    assert saw_guard
+
+
+# --------------------------------------------------------------- scrub_text
+
+
+def test_scrub_text_keeps_flags_redacts_email_and_home(home):
+    text = f"Usage: foo [options]\n  --bar    do bar\ncontact me@example.com or see {home}/cfg"
+    out = sanitize.scrub_text(text)
+    assert "--bar" in out
+    assert "me@example.com" not in out
+    assert "<redacted:email>" in out
+    assert f"{home}/cfg" not in out
+    assert "~/cfg" in out
+
+
+def test_scrub_text_max_lines_truncates():
+    out = sanitize.scrub_text("\n".join(f"line{i}" for i in range(50)), max_lines=5)
+    assert out.count("\n") == 5  # 5 kept lines + the ellipsis marker
+    assert "more lines redacted" in out
+
+
+def test_scrub_event_payload_is_scrub_json(home):
+    payload = {"session_id": "s-1", "cwd": f"{home}/proj", "n": 5}
+    out = sanitize.scrub_event_payload(payload)
+    assert out == {"session_id": "s-1", "cwd": "<redacted:str>", "n": 5}
diff --git a/uv.lock b/uv.lock
index ee442cf..8414409 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4,7 +4,7 @@ requires-python = ">=3.11"
 
 [[package]]
 name = "bmad-auto"
-version = "0.6.1"
+version = "0.6.2"
 source = { editable = "." }
 dependencies = [
     { name = "pyyaml" },