diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 8b3e93e..838727b 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "bauto", "source": "./src/automator/data/skills", "description": "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)", - "version": "0.6.1", + "version": "0.6.2", "author": { "name": "pinkyd" }, diff --git a/CHANGELOG.md b/CHANGELOG.md index 5534d89..f7b9f02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,28 @@ All notable changes to `bmad-auto` are documented here. The format is based on [Semantic Versioning](https://semver.org/spec/v2.0.0.html). While the project is pre-1.0, breaking changes may land in a minor release. +## [0.6.2] — 2026-06-21 + +### Added + +- **`bmad-auto probe-adapter` (alias `collect-adapter-data`).** A self-service command that + collects and sanitizes everything needed to finalize a CLI adapter profile — the hook payload + shape, transcript location/format, and token-usage schema for a `usage_parser` — so a user of + any coding CLI can paste back a clean, content-free report. A default zero-launch **scan** reads + on-disk conventions; opt-in `--probe` does a live capture in an ephemeral workspace. All output + passes through one audited PII sanitizer (token counts and field names survive; paths, prose, and + emails are redacted). +- **GitHub Copilot CLI profile.** Bundled `copilot` profile (Copilot CLI ≥ 2026-02): `-i` + interactive launch, VS Code-compatible `Stop` hook, `--allow-all-tools` for unattended runs. + Still pending live E2E and a `usage_parser` — `probe-adapter` captures the token schema to write + one. + +### Docs + +- **Adapter authoring guide.** New [adapter authoring guide](docs/adapter-authoring-guide.md) + walks through finalizing a CLI profile with `probe-adapter` (scan vs probe, the PII model, and + the parser-writing loop); `probe-adapter` is added to both command references. + ## [0.6.1] — 2026-06-20 ### Added @@ -429,6 +451,7 @@ enforced in CI. implementation phase, driven by a Python control loop with hook-based session transport and resumable on-disk run state. +[0.6.2]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.2 [0.6.1]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.1 [0.6.0]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.0 [0.5.0]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.5.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5cc170e..e08f321 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -182,7 +182,7 @@ Keep messages under 72 characters. Each commit = one logical change. - **Tests** live under `tests/`; add or update them for behavior changes. The mock adapter lets most of the loop run without a live CLI. - **Skills** ship as markdown under `src/automator/data/skills/` (the `bmad-auto-*` automation skills). - **Plugins** extend the orchestrator via a `plugin.toml` manifest — see the [plugin authoring guide](docs/plugin-authoring-guide.md). -- **New coding CLIs** are usually a TOML profile, not Python — see the CLI adapter section in the [README](README.md). +- **New coding CLIs** are usually a TOML profile, not Python — see the CLI adapter section in the [README](README.md) and the [adapter authoring guide](docs/adapter-authoring-guide.md) (use `bmad-auto probe-adapter` to collect the hook/transcript/token data a profile needs). --- diff --git a/README.md b/README.md index afef13b..a8b8853 100644 --- a/README.md +++ b/README.md @@ -58,24 +58,25 @@ bmad-auto tui # …or drive everything from the dashboard ## Command reference -| Command | What it does | -| ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `bmad-auto init` | Install the bundled `bmad-auto-*` skills, the hook relay, `.automator/policy.toml`, and a runs-dir gitignore. `--cli ` (repeatable) targets specific agents; `--no-skills` / `--force-skills` control skill copying. | -| `bmad-auto validate` | Preflight every prerequisite: BMAD config, sprint-status, git, tmux, CLI binary, hook registration. | -| `bmad-auto run` | Drive the dev → review → verify → commit loop. `--epic N`, `--story KEY`, `--max-stories N`, `--dry-run`. | -| `bmad-auto sweep` | Triage + execute open `deferred-work.md` entries. `--no-prompt`, `--decisions-only`, `--max-bundles N`, `--repeat`, `--max-cycles N`, `--dry-run`. | -| `bmad-auto resume ` | Continue a run paused at a gate, escalation, or interruption. | -| `bmad-auto resolve ` | Resolve a CRITICAL escalation: open an interactive resolve agent to fix the frozen spec, then re-arm the story and resume. `--story KEY`, `--no-interactive`, `--resume` / `--no-resume`. | -| `bmad-auto decisions` | Answer deferred-work decisions earlier sweeps left unanswered (skipped by `--no-prompt`, or an abandoned interactive sweep). Recorded so the next sweep acts on them without re-asking. `--list` shows them without answering. | -| `bmad-auto list` (`ls`) | List every run/sweep with its short ref, type, and status — the handle you pass to the commands below. | -| `bmad-auto status []` | Run + sprint summary with per-story token totals (plus a count of decisions awaiting an answer). | -| `bmad-auto attach []` | tmux-attach to a run's live agent session. | -| `bmad-auto stop ` | Stop a live run — the engine and its agent tmux session. | -| `bmad-auto delete ` | Delete a run directory. `--force` stops the run first if it is still live. | -| `bmad-auto archive ` | Compress a run into `.automator/archive` and remove the run dir. `--force` stops the run first if it is still live. | -| `bmad-auto cleanup` | Remove leftover tmux artifacts **for the current project**: kill `bmad-auto-` sessions for finished/stopped/interrupted runs (and orphans whose run dir is gone) and close parked `bmad-auto-ctl` windows. `--dry-run` lists without killing. Live runs — and any session/window belonging to another project — are never touched. | -| `bmad-auto clean` | Reclaim **disk** from concluded runs per `[cleanup]`: tear down git worktrees a mid-flight stop orphaned (freeing their Unity `Library/` + MCP-server builds), trim the heavy `worktrees/` tree from runs kept for history (they stay viewable in the TUI), and archive/delete runs past the retention window. Only finished/stopped runs are touched; `--dry-run` previews, `--keep ` protects, `--retain N` overrides the window, `--hard` deletes instead of archiving. | -| `bmad-auto tui` | The interactive dashboard (needs the `[tui]` extra). `--low-frame-rate` caps it to 15fps + disables animations (fixes repaint tearing over slow/SSH links; also `[tui] low_frame_rate`). | +| Command | What it does | +| -------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `bmad-auto init` | Install the bundled `bmad-auto-*` skills, the hook relay, `.automator/policy.toml`, and a runs-dir gitignore. `--cli ` (repeatable) targets specific agents; `--no-skills` / `--force-skills` control skill copying. | +| `bmad-auto validate` | Preflight every prerequisite: BMAD config, sprint-status, git, tmux, CLI binary, hook registration. | +| `bmad-auto run` | Drive the dev → review → verify → commit loop. `--epic N`, `--story KEY`, `--max-stories N`, `--dry-run`. | +| `bmad-auto sweep` | Triage + execute open `deferred-work.md` entries. `--no-prompt`, `--decisions-only`, `--max-bundles N`, `--repeat`, `--max-cycles N`, `--dry-run`. | +| `bmad-auto resume ` | Continue a run paused at a gate, escalation, or interruption. | +| `bmad-auto resolve ` | Resolve a CRITICAL escalation: open an interactive resolve agent to fix the frozen spec, then re-arm the story and resume. `--story KEY`, `--no-interactive`, `--resume` / `--no-resume`. | +| `bmad-auto decisions` | Answer deferred-work decisions earlier sweeps left unanswered (skipped by `--no-prompt`, or an abandoned interactive sweep). Recorded so the next sweep acts on them without re-asking. `--list` shows them without answering. | +| `bmad-auto list` (`ls`) | List every run/sweep with its short ref, type, and status — the handle you pass to the commands below. | +| `bmad-auto status []` | Run + sprint summary with per-story token totals (plus a count of decisions awaiting an answer). | +| `bmad-auto attach []` | tmux-attach to a run's live agent session. | +| `bmad-auto stop ` | Stop a live run — the engine and its agent tmux session. | +| `bmad-auto delete ` | Delete a run directory. `--force` stops the run first if it is still live. | +| `bmad-auto archive ` | Compress a run into `.automator/archive` and remove the run dir. `--force` stops the run first if it is still live. | +| `bmad-auto cleanup` | Remove leftover tmux artifacts **for the current project**: kill `bmad-auto-` sessions for finished/stopped/interrupted runs (and orphans whose run dir is gone) and close parked `bmad-auto-ctl` windows. `--dry-run` lists without killing. Live runs — and any session/window belonging to another project — are never touched. | +| `bmad-auto clean` | Reclaim **disk** from concluded runs per `[cleanup]`: tear down git worktrees a mid-flight stop orphaned (freeing their Unity `Library/` + MCP-server builds), trim the heavy `worktrees/` tree from runs kept for history (they stay viewable in the TUI), and archive/delete runs past the retention window. Only finished/stopped runs are touched; `--dry-run` previews, `--keep ` protects, `--retain N` overrides the window, `--hard` deletes instead of archiving. | +| `bmad-auto tui` | The interactive dashboard (needs the `[tui]` extra). `--low-frame-rate` caps it to 15fps + disables animations (fixes repaint tearing over slow/SSH links; also `[tui] low_frame_rate`). | +| `bmad-auto probe-adapter ` (`collect-adapter-data`) | Collect + sanitize the data needed to finalize a CLI adapter profile (hook payload shape, transcript location/format, token schema). Default is a zero-launch **scan**; `--probe` opts into a live capture. `--transcript`, `--session-dir`, `--binary` (CLIs with no profile yet), `--out`, `--json`. See the [adapter authoring guide](docs/adapter-authoring-guide.md). | Every command takes `--project ` (default: the current directory). Any `` may be a partial — the tail after the last `-` (e.g. `a1b2`), shortened to any prefix that stays unique; @@ -438,17 +439,20 @@ Each run drives its agents inside a dedicated tmux session, `bmad-auto-` One generic driver (`adapters/generic_tmux.py`) runs any coding CLI that fits the tmux-injection + hook-signal transport; everything CLI-specific lives in a declarative **profile** (`adapters/profile.py`). Built-in profiles ship as TOML in `automator/data/profiles/`: -| Profile | Status | Notes | -| -------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `claude` | supported | reference implementation | -| `codex` | supported, E2E-verified | Codex ≥ 0.139. No slash expansion in the initial prompt — the profile renders `$skill-name` mentions (plus a "use subagents as needed" nudge) instead. No SessionEnd hook; window-death fallback covers crashes. | -| `gemini` | supported, E2E-verified | Gemini CLI ≥ 0.46 (hooks on by default since then). Launches with `-i` to stay interactive; `AfterAgent` maps to canonical Stop. Usage parser validated against real chat logs. | +| Profile | Status | Notes | +| --------- | ------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `claude` | supported | reference implementation | +| `codex` | supported, E2E-verified | Codex ≥ 0.139. No slash expansion in the initial prompt — the profile renders `$skill-name` mentions (plus a "use subagents as needed" nudge) instead. No SessionEnd hook; window-death fallback covers crashes. | +| `gemini` | supported, E2E-verified | Gemini CLI ≥ 0.46 (hooks on by default since then). Launches with `-i` to stay interactive; `AfterAgent` maps to canonical Stop. Usage parser validated against real chat logs. | +| `copilot` | bundled, pending live E2E | GitHub Copilot CLI ≥ 2026-02. Launches with `-i` to stay interactive; VS Code-compatible PascalCase `Stop` hook (snake_case payloads); `--allow-all-tools` for unattended runs. No `usage_parser` yet — run `probe-adapter` to capture its token schema (see below). | **On budgets:** agentic sessions are dominated by cache reads (80–90%+ of raw tokens), which every supported vendor bills at ~0.1x base input. The `max_tokens_per_story` check therefore uses a cost-weighted total — cache reads count at `limits.cache_read_weight` (default 0.1) — while displayed totals stay raw. Set the weight to 1.0 to budget raw tokens. **Shared prerequisites:** the `bmad-auto-*` skills must be present in `.agents/skills/` (codex and gemini read it; Claude Code reads `.claude/skills/`), and each CLI must have been run once interactively in the project for auth/trust — `bmad-auto init --cli codex --cli gemini` installs the skills into `.agents/skills/`, registers the hook relay, and prints the per-CLI first-run steps. -**Adding a CLI without touching Python:** drop a TOML file in `/.automator/profiles/.toml` (same fields as the built-ins: binary, `prompt_template`, bypass flags, a `[hooks]` block picking one of the config dialects `claude-settings-json` / `codex-hooks-json` / `gemini-settings-json`, and a native→canonical event map). The hook relay script and orchestrator are CLI-agnostic — each registration passes the canonical event name as the script argument. A CLI whose hook config clones one of the existing dialects (the ecosystem trend) needs nothing else; a genuinely different transport gets its own adapter class instead (see the opencode HTTP+SSE design stub in `adapters/opencode_http.py`). +**Adding a CLI without touching Python:** drop a TOML file in `/.automator/profiles/.toml` (same fields as the built-ins: binary, `prompt_template`, bypass flags, a `[hooks]` block picking one of the config dialects `claude-settings-json` / `codex-hooks-json` / `gemini-settings-json` / `copilot-settings-json`, and a native→canonical event map). The hook relay script and orchestrator are CLI-agnostic — each registration passes the canonical event name as the script argument. A CLI whose hook config clones one of the existing dialects (the ecosystem trend) needs nothing else; a genuinely different transport gets its own adapter class instead (see the opencode HTTP+SSE design stub in `adapters/opencode_http.py`). + +**Finalizing a profile:** the facts a profile needs that live in no doc — the CLI's exact hook payload shape, its transcript location/format, and the token schema a `usage_parser` reads — are collected and sanitized by `bmad-auto probe-adapter ` (a zero-launch scan by default, or `--probe` for a live capture). The [adapter authoring guide](docs/adapter-authoring-guide.md) walks through using it end to end. Cursor CLI is currently blocked on two gaps, for whoever picks it up: token usage is not exposed anywhere (hooks, JSON output, or on-disk chats), and slash-command expansion of the initial prompt argument is unverified — its `sessionStart`/`stop` hooks do fire in the CLI, so a profile using the window-death fallback plus `usage_parser = "none"` is feasible. diff --git a/docs/FEATURES.md b/docs/FEATURES.md index 447f595..663a87a 100644 --- a/docs/FEATURES.md +++ b/docs/FEATURES.md @@ -113,8 +113,10 @@ See [README.md](../README.md) for the narrative overview and [setup-guide.md](se - Generic tmux adapter drives any CLI fitting the tmux-injection + hook-signal transport; CLI specifics live in declarative TOML profiles. - Supported, E2E-verified: `claude` (reference), `codex` (≥ 0.139), `gemini` (≥ 0.46). +- Bundled but pending live E2E verification: `copilot` (GitHub Copilot CLI ≥ 2026-02; VS Code-compatible `Stop` hook, `-i` interactive launch, `--allow-all-tools`). - Per-stage CLI/model overrides: run dev on one CLI/model, review on another (`[adapter.dev]`, `[adapter.review]`, `[adapter.triage]`). - Add a CLI without touching Python: drop a TOML profile in `.automator/profiles/.toml` (binary, prompt template, bypass flags, hook dialect, native→canonical event map). +- `bmad-auto probe-adapter` collects + sanitizes the data needed to finalize/add a profile (hook payload shape, transcript location/format, token schema): a zero-launch scan by default, opt-in `--probe` for live capture. See the [adapter authoring guide](adapter-authoring-guide.md). ### Budgeting & cost tracking @@ -170,4 +172,5 @@ See [README.md](../README.md) for the narrative overview and [setup-guide.md](se - `bmad-auto cleanup` — remove leftover tmux artifacts for finished/stopped runs. - `bmad-auto clean` — reclaim disk from concluded runs per `[cleanup]`: tear down worktrees a mid-flight stop orphaned, trim heavy `worktrees/` from runs kept for history, archive/delete past the retention window (`--dry-run`, `--keep`, `--retain N`, `--hard`). - `bmad-auto tui` — the interactive dashboard (`--low-frame-rate` for slow/SSH links). +- `bmad-auto probe-adapter ` (`collect-adapter-data`) — collect + sanitize adapter-finalization data for a CLI profile; default zero-launch scan, opt-in `--probe` live capture. - Every command takes `--project ` (default: current directory). Any `` accepts a partial — the tail after the last `-`, shortened to any unique prefix. diff --git a/docs/README.md b/docs/README.md index 54dc5fb..19b5e78 100644 --- a/docs/README.md +++ b/docs/README.md @@ -11,6 +11,7 @@ guides below go deeper, roughly in the order you'll need them. ## Extending bmad-auto +- **[Finalizing a CLI adapter profile](adapter-authoring-guide.md)** — using `bmad-auto probe-adapter` to collect + sanitize the hook payload shape, transcript location, and token schema a new CLI profile needs. - **[Writing a bmad-auto plugin](plugin-authoring-guide.md)** — the plugin system: `plugin.toml` manifest, hooks, lifecycle stages, settings, the trust model, and workflow injection, with a worked walkthrough. - **[Writing a Game Engine plugin](game-engine-plugin-guide.md)** — the game-engine layer (built on the plugin system): driving a live engine Editor, the `editor_mode` ↔ `[scm] isolation` coupling, a minimal Godot example. - **[Writing a plugin for a specific Editor MCP](game-engine-mcp-guide.md)** — Editor-MCP specifics for the bundled Unity plugin: IvanMurzak vs CoplayDev, readiness probes, `per_worktree` isolation, and the full `BMAD_AUTO_*` env-var reference. diff --git a/docs/adapter-authoring-guide.md b/docs/adapter-authoring-guide.md new file mode 100644 index 0000000..3d90a62 --- /dev/null +++ b/docs/adapter-authoring-guide.md @@ -0,0 +1,164 @@ +# Finalizing a CLI adapter profile with `probe-adapter` + +bmad-auto drives any coding CLI that fits the **tmux-injection + hook-signal** +transport through one generic adapter (`adapters/generic_tmux.py`); everything +CLI-specific lives in a declarative **TOML profile** (`adapters/profile.py`). The +[README adapter section](../README.md#other-coding-clis) covers the profile fields +and how to drop one in without touching Python. + +The hard part of a new profile isn't the TOML — it's the **facts that live in no +doc**: the CLI's exact hook payload shape (field names and casing, whether +`session_id` / `transcript_path` / `cwd` are present), where it writes its session +transcript and in what format, and the token-usage schema a `usage_parser` has to +read. Historically the only way to get these was to hand a volunteer a manual +recipe and ask them to sanitize the output by hand — error-prone and PII-risky. + +**`bmad-auto probe-adapter`** (alias `collect-adapter-data`) pulls all of that and +runs it through an audited sanitizer, so a user of any coding CLI can run one +command and paste back a clean, content-free report. + +```bash +bmad-auto probe-adapter --project . # default: zero-launch scan +bmad-auto probe-adapter --probe --project . # opt-in live capture +``` + +--- + +## Two modes + +Both modes emit the **same single sanitized report** (markdown to stdout, or to a +file with `--out`; add `--json` for a machine-readable block). + +### SCAN (default — no process launch) + +Runs ` --version` / `--help`, locates the newest **already-existing** +session transcript by convention, reads the declared hook config, and infers the +token schema from the transcript. Works whenever you've used the CLI before, with +zero execution risk. This is the right first step for any CLI that already has a +profile (claude/codex/gemini/copilot) or that you've run by hand. + +### PROBE (`--probe` — opt-in live capture) + +In an ephemeral `mkdtemp` workspace, `probe` registers a full-payload capture hook +for every native event in the profile, launches **one trivial content-free turn** +(`Reply with exactly: OK`) in a tmux window, captures each hook event's complete +payload, locates the transcript, then tears everything down. Use it to confirm the +**exact hook payload shape** and that the CLI actually **accepts the hook dialect** +your profile declares — facts scan can't see without running the CLI. + +`--probe` needs a known profile (it uses the profile's hook dialect and event map). +If `tmux` or the binary is missing, probe degrades gracefully to a scan. + +--- + +## PII safety model + +The report is built to be **safe to paste into an issue or PR**. A single audited +sanitizer (`src/automator/sanitize.py`) is the only chokepoint: + +- **numbers, booleans, and `null` pass through** — token _counts_ are not PII; +- **dict keys are kept verbatim** — field names and casing are the whole point of + a payload probe; +- every **leaf string** is `$HOME`→`~` redacted and then kept **only if** it looks + like a short machine identifier (e.g. `claude-opus-4-8`, `session-abc_123`); + anything else — prose, code, paths, emails — becomes ``; +- **list lengths are preserved**, contents are scrubbed element by element; +- `--help` / `--version` text and log tails have the home dir and any emails + redacted, with a line cap. + +In PROBE mode the raw capture exists **only transiently** inside the temp dir, +which is `rmtree`'d in a `finally` (even on exception or Ctrl-C). The CLI's own +transcript stays in its home dir — the command reads its _structure_, never copies +it. A hidden `--keep-temp` flag retains the raw temp dir for debugging and prints a +loud **"raw retained — do not share"** warning; never paste a `--keep-temp` run. + +--- + +## Walkthrough: finalizing a profile + +### 1. Draft a profile + +Drop a TOML file in `/.automator/profiles/.toml` with the fields +described in the [README adapter section](../README.md#other-coding-clis). The +contract is the `CLIProfile` / `HookSpec` dataclasses in +[`src/automator/adapters/profile.py`](../src/automator/adapters/profile.py): a +`binary`, a `prompt_template`, bypass flags, a `[hooks]` block picking one of the +config dialects (`claude-settings-json` / `codex-hooks-json` / +`gemini-settings-json` / `copilot-settings-json`) and a native→canonical event +map, and a `usage_parser` (start with `"none"` until you've written one). + +### 2. Scan + +```bash +bmad-auto probe-adapter --project . +``` + +Read three sections of the report: + +- **CLI flags** — your profile's launch/bypass flags plus the scrubbed + `--version` / `--help`, so you can confirm the flags you chose exist. +- **Transcript** — the redacted location, format, size, line count, and modified + date of the newest transcript the convention glob found. +- **Token usage schema** — the structural key paths (types only, never values) and + the **token-field candidates** (int leaves whose names look token-ish). When a + real parser is already declared, its parsed counts are shown as a self-check. + +### 3. Probe (confirm the live payload + dialect) + +```bash +bmad-auto probe-adapter --probe --project /tmp/scratch +``` + +The **Hook payload shape** section now shows, per captured event, the native→ +canonical pairing, the payload keys, and the scrubbed payload — so you can confirm +`session_id` / `transcript_path` casing and that the CLI accepted the hook config +for your dialect. If the CLI rejects the config or never fires a hook, the report +says so (with a scrubbed log tail) instead of silently producing nothing. + +### 4. Write the `usage_parser` + +Turn the report's `token_field_candidates` into a parser in +[`src/automator/tokens.py`](../src/automator/tokens.py), following the existing +ones (`tally` for claude, `tally_codex_rollout`, `tally_gemini_chat`) and +registering it in `read_usage`. The report flags **per-call vs cumulative** as a +human call — a `token_count`-style event that carries running totals (codex) is +read differently from per-message blocks that are summed (claude/gemini). Re-run +scan after wiring the parser: the **parsed counts** self-check should now appear. + +--- + +## Flags reference + +| Flag | Purpose | +| ------------------- | -------------------------------------------------------------------------------- | +| `--probe` | Opt-in live capture (default is scan). Needs a known profile. | +| `--transcript PATH` | Inspect this exact transcript file, bypassing convention discovery. | +| `--session-dir DIR` | Glob this dir (`**/*.jsonl` then `*.json`, newest) — for custom/unknown CLIs. | +| `--binary NAME` | Binary to probe for a CLI that has no profile yet (enables a reduced report). | +| `--model NAME` | Model passed to the probe turn (PROBE mode). | +| `--timeout SECONDS` | Probe turn timeout (default 90). | +| `--out FILE` | Write the report to a file instead of stdout (the only file the command writes). | +| `--json` | Append a machine-readable JSON block to the report. | +| `--keep-temp` | (hidden, debug) keep the raw probe temp dir — prints a "do not share" warning. | + +Exit codes mirror `validate`: `0` whenever a report is produced (warnings are +fine), `1` only when nothing could be produced. An **unknown CLI with `--binary`** +still yields a _reduced_ report (version/help + discovery, no hook events); an +unknown CLI without `--binary` fails and lists the available profiles. + +--- + +## Worked example: copilot + +The bundled `copilot` profile ships with `usage_parser = "none"` — Copilot's +token-usage schema hadn't been captured when the profile landed. That's exactly +the gap `probe-adapter` closes: + +```bash +bmad-auto probe-adapter copilot --probe --project /tmp/scratch +``` + +captures the `Stop` payload (confirming `session_id` / `transcript_path` casing), +locates `~/.copilot/session-state/*/events.jsonl`, and infers its token schema — +the data needed to write a `copilot-*` parser in `tokens.py` and flip the profile's +`usage_parser` off `"none"`. Confirm the `mkdtemp` dir is gone afterward. diff --git a/module.yaml b/module.yaml index eba5ce0..48a72b7 100644 --- a/module.yaml +++ b/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)" -module_version: 0.6.1 +module_version: 0.6.2 default_selected: false module_greeting: > BMAD Auto installed — both the four automation skills and the diff --git a/pyproject.toml b/pyproject.toml index 3e230ba..cf6c4e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bmad-auto" -version = "0.6.1" +version = "0.6.2" description = "Deterministic ralph-loop orchestrator for the BMAD implementation phase" readme = "README.md" license = "MIT" diff --git a/src/automator/__init__.py b/src/automator/__init__.py index a25ea40..b96d34a 100644 --- a/src/automator/__init__.py +++ b/src/automator/__init__.py @@ -6,4 +6,4 @@ spec files, and the per-run directory under .automator/runs/. """ -__version__ = "0.6.1" +__version__ = "0.6.2" diff --git a/src/automator/adapters/profile.py b/src/automator/adapters/profile.py index 7366f03..1699ece 100644 --- a/src/automator/adapters/profile.py +++ b/src/automator/adapters/profile.py @@ -19,7 +19,12 @@ from pathlib import Path USAGE_PARSERS = {"claude-jsonl", "codex-rollout", "gemini-chat", "none"} -HOOK_DIALECTS = {"claude-settings-json", "codex-hooks-json", "gemini-settings-json"} +HOOK_DIALECTS = { + "claude-settings-json", + "codex-hooks-json", + "gemini-settings-json", + "copilot-settings-json", +} CANONICAL_EVENTS = {"SessionStart", "Stop", "SessionEnd", "PreCompact"} USER_PROFILES_REL = Path(".automator") / "profiles" diff --git a/src/automator/cli.py b/src/automator/cli.py index a6808fa..0a5c8df 100644 --- a/src/automator/cli.py +++ b/src/automator/cli.py @@ -885,6 +885,56 @@ def cmd_tui(args: argparse.Namespace) -> int: return run_tui(project) +def cmd_probe(args: argparse.Namespace) -> int: + from . import probe as probe_mod + from .adapters.profile import ProfileError, get_profile + + project = _project(args) + hints = probe_mod.Hints( + binary=args.binary, + transcript=args.transcript, + session_dir=args.session_dir, + model=args.model, + ) + + profile = None + try: + profile = get_profile(args.cli, project) + except ProfileError as e: + if not args.binary: + print(f"FAIL: {e}", file=sys.stderr) + return 1 + print(f" ok: unknown profile {args.cli!r}; reduced report from --binary {args.binary}") + + if args.probe: + if profile is None: + print("FAIL: --probe needs a known profile (its hook dialect/events)", file=sys.stderr) + return 1 + finding = probe_mod.probe( + cli=args.cli, + profile=profile, + project=project, + hints=hints, + timeout_s=args.timeout, + keep_temp=args.keep_temp, + ) + else: + finding = probe_mod.scan(cli=args.cli, profile=profile, project=project, hints=hints) + + report = probe_mod.render_markdown(finding) + if args.json: + report = report + "\n\n## JSON\n\n```json\n" + probe_mod.render_json(finding) + "\n```\n" + + if args.out: + out_path = Path(args.out) + out_path.write_text(report, encoding="utf-8") + print(f" ok: report written to {out_path} ({len(finding.warnings)} warning(s))") + else: + print(report) + print(f" ok: {finding.mode} report for {args.cli} ({len(finding.warnings)} warning(s))") + return 0 + + def cmd_init(args: argparse.Namespace) -> int: from .install import install_into @@ -935,6 +985,36 @@ def add(name: str, func, help: str, *, aliases=()) -> argparse.ArgumentParser: ) add("validate", cmd_validate, "preflight checks; exit non-zero on failure") + probe_p = add( + "probe-adapter", + cmd_probe, + "collect + sanitize adapter-finalization data for a coding CLI", + aliases=["collect-adapter-data"], + ) + probe_p.add_argument( + "cli", help="CLI profile name (claude | codex | gemini | copilot | custom)" + ) + probe_p.add_argument( + "--probe", + action="store_true", + help="opt-in LIVE capture: launch one trivial content-free turn in a temp " + "workspace and capture real hook payloads (default: zero-launch scan)", + ) + probe_p.add_argument( + "--transcript", help="exact transcript file to inspect (overrides discovery)" + ) + probe_p.add_argument( + "--session-dir", help="dir to glob for the newest transcript (custom CLIs)" + ) + probe_p.add_argument("--binary", help="binary name for a CLI with no profile yet") + probe_p.add_argument("--model", help="model passed to the probe turn (probe mode)") + probe_p.add_argument( + "--timeout", type=float, default=90, help="probe turn timeout (default: 90s)" + ) + probe_p.add_argument("--out", help="write the report to this file instead of stdout") + probe_p.add_argument("--json", action="store_true", help="append a machine-readable JSON block") + probe_p.add_argument("--keep-temp", action="store_true", help=argparse.SUPPRESS) + run_p = add("run", cmd_run, "run the orchestration loop") run_p.add_argument("--epic", type=int, help="only stories from this epic") run_p.add_argument("--story", help="story: E-S / E.S, a slug fragment, or full key") diff --git a/src/automator/data/bmad_auto_probe_hook.py b/src/automator/data/bmad_auto_probe_hook.py new file mode 100644 index 0000000..c0e9109 --- /dev/null +++ b/src/automator/data/bmad_auto_probe_hook.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +"""Full-payload capture hook for `bmad-auto probe-adapter --probe`. Stdlib only. + +A throwaway sibling of bmad_auto_hook.py used ONLY during an opt-in live probe. +It no-ops (exit 0) unless BMAD_AUTO_PROBE_CAPTURE_DIR is set — a DISTINCT env var +from the real relay's BMAD_AUTO_RUN_DIR, so the capture hook and the signal relay +can never fire in each other's context (a normal interactive session sees neither). + +For every event it writes two files atomically into the capture dir: + + -.signal.json SignalWatcher-shaped {ts,event,task_id,session_id, + transcript_path,cwd} so the probe's completion poll + (a plain SignalWatcher over the capture dir) works + with no change to the watcher. + -.payload.json the ENTIRE raw stdin payload plus an injected + "argv_event" (the native event name from argv, for + native->canonical pairing) so a maintainer can read + the CLI's exact field names and casing. The probe + command sanitizes this before it is ever shown; + nothing written here is displayed raw. + +Tolerant of empty/garbage stdin and of write errors — it must never crash the +CLI window it is hooked into. +""" + +import json +import os +import sys +import time + + +def _atomic_write(path: str, obj) -> None: + tmp = path + ".tmp" + with open(tmp, "w", encoding="utf-8") as f: + json.dump(obj, f) + os.replace(tmp, path) + + +def main() -> int: + capture_dir = os.environ.get("BMAD_AUTO_PROBE_CAPTURE_DIR") + if not capture_dir: + return 0 + task_id = os.environ.get("BMAD_AUTO_TASK_ID", "probe") + event_name = sys.argv[1] if len(sys.argv) > 1 else "Unknown" + try: + payload = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + payload = {} + if not isinstance(payload, dict): + payload = {} + + ts = time.time_ns() + try: + os.makedirs(capture_dir, exist_ok=True) + signal = { + "ts": ts, + "event": event_name, + "task_id": task_id, + "session_id": payload.get("session_id") or payload.get("conversation_id"), + "transcript_path": payload.get("transcript_path"), + "cwd": payload.get("cwd"), + } + _atomic_write(os.path.join(capture_dir, f"{ts}-{event_name}.signal.json"), signal) + captured = dict(payload) + captured["argv_event"] = event_name + _atomic_write(os.path.join(capture_dir, f"{ts}-{event_name}.payload.json"), captured) + except OSError: + return 0 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/automator/data/profiles/copilot.toml b/src/automator/data/profiles/copilot.toml new file mode 100644 index 0000000..1437595 --- /dev/null +++ b/src/automator/data/profiles/copilot.toml @@ -0,0 +1,34 @@ +# GitHub Copilot CLI (GA since 2026-02). `-i ""` starts an interactive +# session and auto-runs the prompt (a bare `-p` prompt runs headless and exits). +# Copilot has no native skill discovery, so the prompt tells it to read the +# SKILL.md directly (skills live in .agents/skills/, shared with codex/gemini and +# matching upstream BMAD-METHOD's github-copilot installer). "use subagents as +# needed" keeps parallel skill phases (e.g. review layers) actually spawning +# subagents, same as codex. +# +# Hook events are registered under Copilot's VS Code-compatible PascalCase names +# (Stop/SessionStart/SessionEnd/PreCompact, same set as claude). That casing makes +# Copilot emit SNAKE_CASE payloads (session_id, transcript_path, cwd) — exactly +# what the shared relay reads — and the Stop payload carries transcript_path, so no +# relay change is needed and a future usage_parser gets the transcript for free. +# (The camelCase names agentStop/sessionStart emit camelCase payloads the relay +# would miss.) NOTE: an enterprise policy permissions.disableBypassPermissionsMode +# = 'disable' suppresses the --allow-all-* flags and will block unattended runs. +name = "copilot" +binary = "copilot" +prompt_template = "LOAD the FULL .agents/skills/{skill}/SKILL.md, read its entire contents and follow its directions exactly, using subagents as needed: {args}" +launch_args = ["-i"] +bypass_args = ["--allow-all-tools", "--allow-all-paths"] +model_flag = "--model" +usage_parser = "none" +first_run_note = "run `copilot` once and authenticate (gh / Copilot subscription); requires Copilot CLI GA (>= 2026-02)" +skill_tree = ".agents/skills" +# .github/copilot/settings.json is the inline hook config (and can also hold MCP +# servers) — gitignored in many projects, so a worktree checkout omits it and +# isolated sessions lose it; seeded first, then the Stop hook is merged in. +seed_files = [".github/copilot/settings.json"] + +[hooks] +dialect = "copilot-settings-json" +config_path = ".github/copilot/settings.json" +events = { Stop = "Stop", SessionStart = "SessionStart", SessionEnd = "SessionEnd", PreCompact = "PreCompact" } diff --git a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml index eba5ce0..48a72b7 100644 --- a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml +++ b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)" -module_version: 0.6.1 +module_version: 0.6.2 default_selected: false module_greeting: > BMAD Auto installed — both the four automation skills and the diff --git a/src/automator/install.py b/src/automator/install.py index 8a7b194..0f3cad7 100644 --- a/src/automator/install.py +++ b/src/automator/install.py @@ -4,7 +4,7 @@ - idempotently merges hook registrations into each selected CLI's hook config (dialect + native->canonical event map come from the CLI profile) - installs the bundled bmad-auto-* skills into each selected CLI's skill tree - (.claude/skills for claude, .agents/skills for codex/gemini) + (.claude/skills for claude, .agents/skills for codex/gemini/copilot) - writes .automator/policy.toml from the template (if missing) - gitignores generated dirs: .automator/runs/ (per-run state) and .automator/cache/ (engine plugins' rebuildable caches, e.g. the Unity Library) @@ -28,8 +28,12 @@ from .policy import POLICY_TEMPLATE HOOK_SCRIPT_REL = ".automator/bmad_auto_hook.py" -HOOK_MARKER = "bmad_auto_hook.py" +# Dedup marker: matches any bmad-auto-managed hook command — both the signal +# relay (bmad_auto_hook.py) and the probe-adapter capture hook +# (bmad_auto_probe_hook.py) — so merge_hooks stays idempotent for either. +HOOK_MARKER = "bmad_auto" GEMINI_HOOK_TIMEOUT_MS = 60_000 +COPILOT_HOOK_TIMEOUT_SEC = 60 # The bmad-auto-* skills bundled in the wheel (automator/data/skills/) that # `bmad-auto init` lays down. They must be installed together — bmad-auto-review @@ -56,6 +60,9 @@ def _hook_entry(dialect: str, command: str) -> dict: if dialect == "gemini-settings-json": handler["timeout"] = GEMINI_HOOK_TIMEOUT_MS # Gemini timeouts are milliseconds return {"matcher": "", "hooks": [handler]} + if dialect == "copilot-settings-json": + handler["timeoutSec"] = COPILOT_HOOK_TIMEOUT_SEC # Copilot timeouts are seconds + return handler # Copilot stores the handler directly in the event list # claude-settings-json and codex-hooks-json share the schema return {"hooks": [handler]} @@ -63,14 +70,19 @@ def _hook_entry(dialect: str, command: str) -> dict: def merge_hooks(config: dict, registrations: dict[str, str], dialect: str) -> tuple[dict, bool]: """Add relay registrations (native event -> command) to a hook config dict.""" changed = False + if dialect == "copilot-settings-json": + config.setdefault("version", 1) # Copilot hook configs are versioned hooks = config.setdefault("hooks", {}) for native_event, command in registrations.items(): matchers = hooks.setdefault(native_event, []) + # claude/codex/gemini nest handlers under "hooks"; copilot stores the + # handler dict directly in the event list — check both shapes so a re-run + # stays idempotent for every dialect. already = any( HOOK_MARKER in handler.get("command", "") - for matcher in matchers - if isinstance(matcher, dict) - for handler in matcher.get("hooks", []) + for entry in matchers + if isinstance(entry, dict) + for handler in (entry, *entry.get("hooks", [])) if isinstance(handler, dict) ) if not already: diff --git a/src/automator/probe.py b/src/automator/probe.py new file mode 100644 index 0000000..2ce91d5 --- /dev/null +++ b/src/automator/probe.py @@ -0,0 +1,795 @@ +"""`bmad-auto probe-adapter`: collect + sanitize adapter-finalization data. + +Finalizing a generic-adapter CLI profile needs facts that live in no doc: the +CLI's exact hook payload shape (field names/casing, whether transcript_path / +session_id / cwd are present), where its transcript lives and in what format, +and the token-usage schema a `usage_parser` must read. This command pulls all of +that and runs it through the audited :mod:`automator.sanitize` chokepoint, so a +user of any coding CLI can run one command and paste back a clean, content-free +report. + +Two strategies, one report shape: + +- SCAN (default, zero process launch beyond ``--version``/``--help``): locate the + newest already-existing transcript by convention, read the declared hook config, + infer the token schema. Works whenever the user has used the CLI before. +- PROBE (``--probe``, opt-in): in an ephemeral ``mkdtemp`` workspace, register the + full-payload capture hook for every native event, launch one trivial content-free + turn in a tmux window, capture each event's complete payload, then tear down. The + raw capture exists only transiently inside the temp dir, which is ``rmtree``'d in a + ``finally`` (even on exception / Ctrl-C). +""" + +from __future__ import annotations + +import glob +import os +import re +import shlex +import shutil +import subprocess +import tempfile +import time +from dataclasses import dataclass, field +from datetime import datetime, timezone +from importlib import resources +from pathlib import Path + +from . import sanitize +from .adapters.profile import CLIProfile +from .install import merge_hooks +from .signals import SignalWatcher +from .tokens import _jsonl_entries, read_usage + +# Per-parser transcript-location conventions (from tokens.py docstrings). +TRANSCRIPT_GLOBS = { + "claude-jsonl": "~/.claude/projects/*/*.jsonl", + "codex-rollout": "~/.codex/sessions/*/*/*/rollout-*.jsonl", + "gemini-chat": "~/.gemini/tmp/*/chats/session-*.jsonl", +} +# Fallback family glob keyed by the `cli` name, so a CLI whose usage_parser is +# still "none" (e.g. copilot, freshly added) still gets transcript discovery. +FAMILY_GLOBS = { + "claude": "~/.claude/projects/*/*.jsonl", + "codex": "~/.codex/sessions/*/*/*/rollout-*.jsonl", + "gemini": "~/.gemini/tmp/*/chats/session-*.jsonl", + "copilot": "~/.copilot/session-state/*/events.jsonl", +} + +_TOKEN_KEY_RE = re.compile( + r"(token|tokens|cached|input|output|prompt|completion|thoughts|usage)", re.I +) + +PROBE_HOOK_NAME = "bmad_auto_probe_hook.py" +PROBE_PROMPT = "Reply with exactly: OK" +PROBE_TASK_ID = "probe" +TMUX_TIMEOUT_S = 30 +PROBE_GRACE_S = 3.0 +MAX_SCHEMA_ENTRIES = 200 + + +# --------------------------------------------------------------- dataclasses + + +@dataclass +class FlagFinding: + binary: str + found: bool + version: str | None = None # scrubbed + help: str | None = None # scrubbed + + +@dataclass +class TranscriptFinding: + glob: str | None = None # the convention glob used (already ~-relative) + location: str | None = None # redacted path of the chosen transcript + fmt: str | None = None # "jsonl" | "json" + size_bytes: int | None = None + line_count: int | None = None + mtime_date: str | None = None # date only (no time), UTC + multiple: bool = False + note: str | None = None + real_path: Path | None = None # NOT rendered; used for schema inference + + +@dataclass +class TokenSchema: + parser: str + entries_scanned: int = 0 + parsed_usage: dict | None = None # only when parser != "none" + key_paths: list[str] = field(default_factory=list) # "a.b.c:int", TYPE only + token_field_candidates: list[str] = field(default_factory=list) + + +@dataclass +class EventCapture: + native_event: str + canonical_event: str | None + payload_keys: list[str] + payload: dict # scrubbed + + +@dataclass +class ProfileFinding: + cli: str + mode: str # "scan" | "probe" + known_profile: bool + binary: str + parser: str + dialect: str | None = None + flags: FlagFinding | None = None + declared_events: dict = field(default_factory=dict) # native -> canonical + registered: bool | None = None # scan: hooks present in the CLI's config? + captured_events: list[EventCapture] = field(default_factory=list) # probe + transcript: TranscriptFinding | None = None + tokens: TokenSchema | None = None + warnings: list[str] = field(default_factory=list) + next_steps: list[str] = field(default_factory=list) + + +@dataclass +class Hints: + binary: str | None = None + transcript: str | None = None + session_dir: str | None = None + model: str | None = None + + +# ------------------------------------------------------------ version / help + + +def _run_capture(argv: list[str], timeout_s: float) -> str | None: + try: + proc = subprocess.run(argv, capture_output=True, text=True, timeout=timeout_s) + except (OSError, subprocess.SubprocessError): + return None + out = (proc.stdout or "") + (proc.stderr or "") + return out.strip() or None + + +def run_version_help(binary: str, timeout_s: float = 10) -> FlagFinding: + """Scrubbed ``--version``/``--help`` for a binary. Never raises.""" + if not shutil.which(binary): + return FlagFinding(binary=binary, found=False) + version = _run_capture([binary, "--version"], timeout_s) + help_txt = _run_capture([binary, "--help"], timeout_s) + return FlagFinding( + binary=binary, + found=True, + version=sanitize.scrub_text(version, max_lines=5) if version else None, + help=sanitize.scrub_text(help_txt, max_lines=80) if help_txt else None, + ) + + +# ------------------------------------------------------ transcript discovery + + +def _redact_location(path: Path) -> str: + """Redact a path to a paste-safe form: home -> ``~``, and any path component + that isn't a plain machine identifier (e.g. a munged-cwd dir that embeds a + username) -> ````. The session-id filename usually survives.""" + + def comp(c: str) -> str: + return c if sanitize.looks_like_identifier(c) else "" + + home = Path(os.path.expanduser("~")) + try: + rel = path.relative_to(home) + return "/".join(["~", *(comp(c) for c in rel.parts)]) + except ValueError: + parts = [comp(c) for c in path.parts if c not in ("/", "")] + return "/" + "/".join(parts) + + +def _describe_transcript(path: Path, *, glob_pat: str | None, multiple: bool) -> TranscriptFinding: + try: + stat = path.stat() + size = stat.st_size + mtime_date = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).strftime("%Y-%m-%d") + except OSError: + size, mtime_date = None, None + line_count = None + try: + with path.open(encoding="utf-8", errors="replace") as f: + line_count = sum(1 for _ in f) + except OSError: + pass + return TranscriptFinding( + glob=glob_pat, + location=_redact_location(path), + fmt="jsonl" if path.suffix == ".jsonl" else (path.suffix.lstrip(".") or "unknown"), + size_bytes=size, + line_count=line_count, + mtime_date=mtime_date, + multiple=multiple, + real_path=path, + ) + + +def _newest(paths: list[Path]) -> Path: + return max(paths, key=lambda p: p.stat().st_mtime if p.exists() else 0) + + +def discover_transcript( + parser: str, + *, + cli: str, + hints: Hints, +) -> TranscriptFinding | None: + """Locate the newest existing transcript via override or convention glob.""" + if hints.transcript: + path = Path(hints.transcript).expanduser() + if not path.is_file(): + return TranscriptFinding(note=f"--transcript path does not exist: {path.name}") + return _describe_transcript(path, glob_pat=None, multiple=False) + + if hints.session_dir: + base = Path(hints.session_dir).expanduser() + matches = sorted(base.glob("**/*.jsonl")) or sorted(base.glob("**/*.json")) + if not matches: + return TranscriptFinding(note=f"no *.jsonl/*.json under --session-dir {base.name}") + return _describe_transcript(_newest(matches), glob_pat=None, multiple=len(matches) > 1) + + pattern = TRANSCRIPT_GLOBS.get(parser) or FAMILY_GLOBS.get(cli) + if not pattern: + return TranscriptFinding( + note="no transcript-location convention for this CLI; " + "pass --transcript PATH or --session-dir DIR" + ) + matches = [Path(p) for p in glob.glob(os.path.expanduser(pattern))] + matches = [p for p in matches if p.is_file()] + if not matches: + return TranscriptFinding( + glob=pattern, + note="no existing transcript matched the convention glob; " + "use --transcript / --session-dir, or run --probe", + ) + return _describe_transcript(_newest(matches), glob_pat=pattern, multiple=len(matches) > 1) + + +# ---------------------------------------------------------- schema inference + + +def _type_name(value) -> str: + if value is None: + return "null" + if isinstance(value, bool): + return "bool" + if isinstance(value, int): + return "int" + if isinstance(value, float): + return "float" + if isinstance(value, str): + return "str" + return "other" + + +def _walk_paths(obj, prefix: str, out: set[str]) -> None: + """Collect dotted key paths with the LEAF TYPE only (never values); list + indices collapse to ``[]`` so ``messages[].tokens.input:int`` is one path. + + A dict key that isn't a plain identifier (e.g. a transcript that keys by + relative file path or a per-file backup id) is collapsed to ```` — + static field names (the ones a parser keys on, like ``input_tokens``) survive + untouched, but dynamic keys can't leak paths/content into the summary.""" + if isinstance(obj, dict): + for key, value in obj.items(): + key = str(key) if sanitize.looks_like_identifier(str(key)) else "" + child = f"{prefix}.{key}" if prefix else key + _walk_paths(value, child, out) + elif isinstance(obj, list): + child = f"{prefix}[]" + for value in obj: + _walk_paths(value, child, out) + else: + out.add(f"{prefix}:{_type_name(obj)}") + + +def _is_token_candidate(path: str) -> bool: + name, _, typ = path.rpartition(":") + if typ != "int": + return False + last = name.split(".")[-1].replace("[]", "") + return bool(_TOKEN_KEY_RE.search(last)) + + +def infer_token_schema( + parser: str, path: Path, *, max_entries: int = MAX_SCHEMA_ENTRIES +) -> TokenSchema: + """Structural key-path summary (types only) + token-field candidates. + + Works even when ``parser == "none"``: the candidates are exactly what a + maintainer needs to write a parser for a brand-new CLI. When a real parser + exists, its parsed integer counts are included as a self-check. + """ + paths: set[str] = set() + scanned = 0 + for entry in _jsonl_entries(path): + if scanned >= max_entries: + break + scanned += 1 + _walk_paths(entry, "", paths) + candidates = sorted(p for p in paths if _is_token_candidate(p)) + parsed = None + if parser != "none": + usage = read_usage(parser, path) + if usage is not None: + parsed = usage.to_dict() + return TokenSchema( + parser=parser, + entries_scanned=scanned, + parsed_usage=parsed, + key_paths=sorted(paths), + token_field_candidates=candidates, + ) + + +# --------------------------------------------------------------- hook config + + +def _hooks_registered(project: Path, profile: CLIProfile) -> bool: + config_path = project / profile.hooks.config_path + if not config_path.is_file(): + return False + import json + + try: + hooks = json.loads(config_path.read_text(encoding="utf-8")).get("hooks", {}) + except (json.JSONDecodeError, OSError): + return False + return any( + "bmad_auto_hook" in json.dumps(hooks.get(event, [])) for event in profile.hooks.events + ) + + +# ----------------------------------------------------------------- SCAN mode + + +def scan( + *, + cli: str, + profile: CLIProfile | None, + project: Path, + hints: Hints, +) -> ProfileFinding: + binary = hints.binary or (profile.binary if profile else cli) + parser = profile.usage_parser if profile else "none" + finding = ProfileFinding( + cli=cli, + mode="scan", + known_profile=profile is not None, + binary=binary, + parser=parser, + dialect=profile.hooks.dialect if profile else None, + declared_events=dict(profile.hooks.events) if profile else {}, + ) + + finding.flags = run_version_help(binary) + if not finding.flags.found: + finding.warnings.append( + f"binary {binary!r} not found on PATH — version/help unavailable " + "(scan continues from on-disk conventions)" + ) + + if profile is not None: + finding.registered = _hooks_registered(project, profile) + if not finding.registered: + finding.next_steps.append( + f"hooks not registered in {profile.hooks.config_path}; " + f"`bmad-auto init --cli {cli}` to validate the dialect end-to-end, " + "or re-run with --probe" + ) + + finding.transcript = discover_transcript(parser, cli=cli, hints=hints) + if finding.transcript and finding.transcript.note: + finding.warnings.append(finding.transcript.note) + if finding.transcript and finding.transcript.real_path is not None: + finding.tokens = infer_token_schema(parser, finding.transcript.real_path) + if finding.transcript.multiple: + finding.next_steps.append( + "multiple fresh transcripts matched; pass --transcript to pin the right one" + ) + return finding + + +# ---------------------------------------------------------- PROBE tmux launcher + + +class _ProbeLauncher: + """The few tmux primitives PROBE needs — deliberately NOT GenericTmuxAdapter, + which mandates a Policy and story-completion logic irrelevant here.""" + + def __init__(self, session_name: str): + self.session_name = session_name + + def _tmux(self, *args: str) -> subprocess.CompletedProcess: + return subprocess.run( + ["tmux", *args], capture_output=True, text=True, timeout=TMUX_TIMEOUT_S + ) + + def start(self, argv: list[str], env: dict[str, str], cwd: Path, log_file: Path) -> str | None: + new = self._tmux( + "new-session", "-d", "-s", self.session_name, "-c", str(cwd), "-x", "220", "-y", "50" + ) + if new.returncode != 0: + return None + env_args: list[str] = [] + for key, value in env.items(): + env_args += ["-e", f"{key}={value}"] + command = " ".join(shlex.quote(a) for a in argv) + win = self._tmux( + "new-window", + "-t", + f"={self.session_name}:", + "-c", + str(cwd), + "-P", + "-F", + "#{window_id}", + *env_args, + command, + ) + if win.returncode != 0: + return None + window_id = win.stdout.strip() + # pipe-pane may race a window that dies instantly; tolerate failure. + self._tmux("pipe-pane", "-t", window_id, "-o", f"cat >> {shlex.quote(str(log_file))}") + return window_id + + def window_alive(self, window_id: str) -> bool: + probe = self._tmux("list-windows", "-t", f"={self.session_name}", "-F", "#{window_id}") + if probe.returncode != 0: + return False + return window_id in probe.stdout.split() + + def kill(self) -> None: + self._tmux("kill-session", "-t", f"={self.session_name}") + + +def _probe_argv(profile: CLIProfile, binary: str, hints: Hints) -> list[str]: + argv = [ + binary, + *profile.launch_args, + profile.render_prompt(PROBE_PROMPT), + *profile.bypass_args, + ] + if hints.model: + argv += [profile.model_flag, hints.model] + return argv + + +def _collect_captures(capture_dir: Path, events_map: dict[str, str]) -> list[EventCapture]: + captures: list[EventCapture] = [] + for payload_file in sorted(capture_dir.glob("*.payload.json")): + import json + + try: + raw = json.loads(payload_file.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + continue + if not isinstance(raw, dict): + continue + native = str(raw.pop("argv_event", "Unknown")) + captures.append( + EventCapture( + native_event=native, + canonical_event=events_map.get(native), + payload_keys=sorted(raw.keys()), + payload=sanitize.scrub_event_payload(raw), + ) + ) + return captures + + +def probe( + *, + cli: str, + profile: CLIProfile, + project: Path, + hints: Hints, + timeout_s: float = 90, + keep_temp: bool = False, +) -> ProfileFinding: + import json + + binary = hints.binary or profile.binary + finding = ProfileFinding( + cli=cli, + mode="probe", + known_profile=True, + binary=binary, + parser=profile.usage_parser, + dialect=profile.hooks.dialect, + declared_events=dict(profile.hooks.events), + ) + finding.flags = run_version_help(binary) + + if not shutil.which("tmux") or not shutil.which(binary): + missing = "tmux" if not shutil.which("tmux") else binary + finding.warnings.append(f"{missing} not on PATH — cannot probe; falling back to scan") + scanned = scan(cli=cli, profile=profile, project=project, hints=hints) + scanned.mode = "probe" + return scanned + + tmpdir = Path(tempfile.mkdtemp(prefix="bmad-auto-probe-")) + launcher = _ProbeLauncher(session_name=f"bmad-auto-probe-{tmpdir.name}") + try: + capture_dir = tmpdir / "capture" + capture_dir.mkdir(parents=True, exist_ok=True) + + # 1. lay down the capture hook + a hook config registered through the very + # same merge_hooks `bmad-auto init` uses — so a bad dialect surfaces live. + hook_src = resources.files("automator.data").joinpath(PROBE_HOOK_NAME) + hook_path = tmpdir / PROBE_HOOK_NAME + hook_path.write_text(hook_src.read_text(encoding="utf-8"), encoding="utf-8") + registrations = { + native: f"python3 {shlex.quote(str(hook_path))} {canonical}" + for native, canonical in profile.hooks.events.items() + } + config, _ = merge_hooks({}, registrations, profile.hooks.dialect) + config_path = tmpdir / profile.hooks.config_path + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps(config, indent=2) + "\n", encoding="utf-8") + + # 2. launch one trivial content-free turn in a fresh tmux window + argv = _probe_argv(profile, binary, hints) + env = { + **profile.env, + "BMAD_AUTO_RUN_DIR": str(tmpdir), + "BMAD_AUTO_TASK_ID": PROBE_TASK_ID, + "BMAD_AUTO_PROBE_CAPTURE_DIR": str(capture_dir), + } + log_file = tmpdir / "probe.log" + watcher = SignalWatcher(capture_dir) + launched_ns = time.time_ns() + window_id = launcher.start(argv, env, tmpdir, log_file) + if window_id is None: + finding.warnings.append("could not launch the CLI in tmux; no events captured") + return finding + + # 3. completion: first of — canonical Stop for `probe`; any capture file + # appeared and the window died; window died; deadline. + deadline = time.monotonic() + timeout_s + while True: + remaining = deadline - time.monotonic() + if remaining <= 0: + finding.warnings.append( + "no Stop event before --timeout; the CLI may need first-run auth " + "(a pending login dialog reads as a timeout). See the log tail below." + ) + break + event = watcher.wait_for( + PROBE_TASK_ID, + {"Stop"}, + timeout_s=min(remaining, 5.0), + since_ns=launched_ns, + ) + if event is not None: + break + alive = launcher.window_alive(window_id) + captured_any = any(capture_dir.glob("*.payload.json")) + if not alive: + if not captured_any: + finding.warnings.append( + "the CLI window died before any hook fired — the dialect may be " + f"rejected for {profile.hooks.dialect}, or launch/auth failed. " + "See the log tail below." + ) + break + + # 4. one short grace poll so a Stop's sibling files all land, then collect. + time.sleep(PROBE_GRACE_S) + finding.captured_events = _collect_captures(capture_dir, profile.hooks.events) + if not finding.captured_events: + finding.next_steps.append( + "no hook payloads captured — confirm the CLI is authenticated and that " + f"the {profile.hooks.dialect} hook config is accepted, then re-run --probe" + ) + tail = _log_tail(log_file) + if tail: + finding.warnings.append("log tail (scrubbed):\n" + tail) + + # 5. transcript discovery + schema inference from the user's real home + finding.transcript = discover_transcript(profile.usage_parser, cli=cli, hints=hints) + if finding.transcript and finding.transcript.note: + finding.warnings.append(finding.transcript.note) + if finding.transcript and finding.transcript.real_path is not None: + finding.tokens = infer_token_schema(profile.usage_parser, finding.transcript.real_path) + return finding + finally: + launcher.kill() + if keep_temp: + finding.warnings.append( + f"--keep-temp: RAW probe data retained at {tmpdir} — DO NOT SHARE; " + "delete it after inspection" + ) + else: + shutil.rmtree(tmpdir, ignore_errors=True) + + +def _log_tail(log_file: Path, max_lines: int = 20) -> str | None: + try: + text = log_file.read_text(encoding="utf-8", errors="replace") + except OSError: + return None + if not text.strip(): + return None + lines = text.splitlines()[-max_lines:] + return sanitize.scrub_text("\n".join(lines), max_lines=max_lines) + + +# ------------------------------------------------------------------ rendering + + +def _fmt_kv(label: str, value) -> str: + return f"- **{label}:** {value}" + + +def render_markdown(f: ProfileFinding) -> str: + out: list[str] = [] + out.append(f"# Profile finalize report — {f.cli} ({f.mode})") + out.append("") + + # Summary + out.append("## Summary") + out.append(_fmt_kv("CLI", f.cli)) + out.append( + _fmt_kv("binary", f"{f.binary} ({'found' if f.flags and f.flags.found else 'NOT found'})") + ) + out.append(_fmt_kv("known profile", "yes" if f.known_profile else "no (reduced report)")) + out.append(_fmt_kv("hook dialect", f.dialect or "—")) + out.append(_fmt_kv("usage_parser", f.parser)) + if f.registered is not None: + out.append(_fmt_kv("hooks registered", "yes" if f.registered else "no")) + out.append(_fmt_kv("warnings", str(len(f.warnings)))) + out.append("") + + # CLI flags + out.append("## CLI flags") + out.append(_fmt_kv("launch_args / bypass_args", "see profile (rendered verbatim below)")) + if f.flags and f.flags.version: + out.append("\n```\n" + f.flags.version + "\n```") + if f.flags and f.flags.help: + out.append("\n
--help (scrubbed)\n") + out.append("```\n" + f.flags.help + "\n```") + out.append("
") + if not f.flags or not f.flags.found: + out.append("_binary not available; flags/help not captured._") + out.append("") + + # Hook payload shape + out.append("## Hook payload shape") + if f.mode == "scan": + if f.declared_events: + out.append( + "Declared native → canonical events (registered = " + f"{'yes' if f.registered else 'no'}):" + ) + for native, canonical in f.declared_events.items(): + out.append(f"- `{native}` → `{canonical}`") + else: + out.append("_no profile; events unknown. Re-run with --probe to capture payloads._") + else: + if f.captured_events: + for ev in f.captured_events: + out.append(f"### `{ev.native_event}` → `{ev.canonical_event or '?'}`") + out.append( + _fmt_kv("payload keys", ", ".join(f"`{k}`" for k in ev.payload_keys) or "—") + ) + out.append("\n```json\n" + _json_dump(ev.payload) + "\n```") + else: + out.append("_no hook payloads captured (see warnings)._") + out.append("") + + # Transcript + out.append("## Transcript") + t = f.transcript + if t and t.real_path is not None: + out.append(_fmt_kv("location", f"`{t.location}`")) + if t.glob: + out.append(_fmt_kv("matched glob", f"`{t.glob}`")) + out.append(_fmt_kv("format", t.fmt)) + out.append(_fmt_kv("size", f"{t.size_bytes} bytes")) + out.append(_fmt_kv("lines", t.line_count)) + out.append(_fmt_kv("mtime", t.mtime_date)) + if t.multiple: + out.append("- _multiple candidates matched; newest shown — pass --transcript to pin._") + else: + out.append("_no transcript located._" + (f" ({t.note})" if t and t.note else "")) + out.append("") + + # Token usage schema + out.append("## Token usage schema") + tk = f.tokens + if tk: + out.append(_fmt_kv("declared parser", tk.parser)) + out.append(_fmt_kv("entries scanned", tk.entries_scanned)) + if tk.parsed_usage is not None: + out.append(_fmt_kv("parsed counts (self-check)", f"`{tk.parsed_usage}`")) + out.append( + "\n**Token-field candidates** (int leaves; per-call-vs-cumulative is a human call):" + ) + if tk.token_field_candidates: + for cand in tk.token_field_candidates: + out.append(f"- `{cand}`") + else: + out.append("- _none matched the token-name heuristic._") + out.append("\n
All key paths (types only, no values)\n") + out.append("```\n" + "\n".join(tk.key_paths) + "\n```") + out.append("
") + else: + out.append("_no transcript to infer from._") + out.append("") + + # Warnings / next steps + out.append("## Warnings / next steps") + if not f.warnings and not f.next_steps: + out.append("_none._") + for w in f.warnings: + out.append(f"- ⚠️ {w}") + for s in f.next_steps: + out.append(f"- → {s}") + out.append("") + return "\n".join(out) + + +def _json_dump(obj) -> str: + import json + + return json.dumps(obj, indent=2, sort_keys=True) + + +def render_json(f: ProfileFinding) -> str: + import json + + def transcript_dict(t: TranscriptFinding | None): + if t is None: + return None + return { + "glob": t.glob, + "location": t.location, + "format": t.fmt, + "size_bytes": t.size_bytes, + "line_count": t.line_count, + "mtime_date": t.mtime_date, + "multiple": t.multiple, + "note": t.note, + } + + data = { + "cli": f.cli, + "mode": f.mode, + "known_profile": f.known_profile, + "binary": f.binary, + "binary_found": bool(f.flags and f.flags.found), + "dialect": f.dialect, + "usage_parser": f.parser, + "hooks_registered": f.registered, + "declared_events": f.declared_events, + "version": f.flags.version if f.flags else None, + "help": f.flags.help if f.flags else None, + "captured_events": [ + { + "native_event": ev.native_event, + "canonical_event": ev.canonical_event, + "payload_keys": ev.payload_keys, + "payload": ev.payload, + } + for ev in f.captured_events + ], + "transcript": transcript_dict(f.transcript), + "tokens": ( + { + "parser": f.tokens.parser, + "entries_scanned": f.tokens.entries_scanned, + "parsed_usage": f.tokens.parsed_usage, + "key_paths": f.tokens.key_paths, + "token_field_candidates": f.tokens.token_field_candidates, + } + if f.tokens + else None + ), + "warnings": f.warnings, + "next_steps": f.next_steps, + } + return json.dumps(data, indent=2) diff --git a/src/automator/sanitize.py b/src/automator/sanitize.py new file mode 100644 index 0000000..8f583ab --- /dev/null +++ b/src/automator/sanitize.py @@ -0,0 +1,104 @@ +"""PII-scrubbing chokepoint for `bmad-auto probe-adapter`. + +Pure stdlib, no automator imports — the single audited place that decides what +data from a foreign CLI is safe to show a maintainer. The probe command routes +every captured payload, every help/version blob, and every discovered path +through here before rendering; nothing is displayed raw. + +Guarantees: +- token *counts* are non-PII, so numbers/bools/null pass through verbatim; +- dict **keys** are kept verbatim — field names/casing are the whole point of a + payload probe — but every leaf **string** is `$HOME`-redacted and then kept + ONLY if it matches a conservative identifier shape (a short slug with no + spaces / `@` / `/`, e.g. ``claude-opus-4-8`` or ``session-abc_123``); + anything else (prose, code, paths, emails) becomes ````; +- list lengths are preserved (the count is structural, the contents aren't); +- recursion is depth-guarded so a pathological payload can't blow the stack. +""" + +from __future__ import annotations + +import os +import re +from typing import Any + +# A conservative "this is a machine identifier, not prose or PII" shape: starts +# alphanumeric, then only word-ish chars (letters, digits, ``.`` ``_`` ``-``), +# bounded length. No spaces, no ``@``, no ``/`` — so emails, paths, and sentences +# can never satisfy it. Model ids and session/conversation ids do. +_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") +_IDENTIFIER_MAX = 80 + +_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}") + +_REDACTED_STR = "" +_REDACTED_EMAIL = "" +_REDACTED_DEPTH = "" + + +def _home() -> str: + home = os.path.expanduser("~") + return home if home and home != "~" else "" + + +def redact_home(s: str) -> str: + """Replace the current user's home directory prefix with ``~``. + + Catches the literal expanded home (``/home/alice`` -> ``~``); the munged, + slash-stripped forms some CLIs use for directory names (``-home-alice-...``) + do not match a path and are handled by the identifier filter instead. + """ + home = _home() + if home and home != "/" and home in s: + s = s.replace(home, "~") + return s + + +def looks_like_identifier(s: str) -> bool: + """True for a short machine slug safe to surface verbatim (no PII).""" + return 0 < len(s) <= _IDENTIFIER_MAX and bool(_IDENTIFIER_RE.match(s)) + + +def scrub_text(s: str, *, max_lines: int | None = None) -> str: + """Sanitize free text (a CLI's ``--help`` / ``--version`` / a log tail). + + Less aggressive than :func:`scrub_json` — help text is the CLI's own and + flag lines must survive — so we only redact the home dir and any emails, + then optionally cap the line count. + """ + s = redact_home(s) + s = _EMAIL_RE.sub(_REDACTED_EMAIL, s) + if max_lines is not None: + lines = s.splitlines() + if len(lines) > max_lines: + dropped = len(lines) - max_lines + lines = lines[:max_lines] + [f"… ({dropped} more lines redacted)"] + s = "\n".join(lines) + return s + + +def _scrub(obj: Any, depth: int, max_depth: int) -> Any: + if depth > max_depth: + return _REDACTED_DEPTH + # bool is an int subclass — handled by the numeric branch; both pass through. + if obj is None or isinstance(obj, (bool, int, float)): + return obj + if isinstance(obj, str): + red = redact_home(obj) + return red if looks_like_identifier(red) else _REDACTED_STR + if isinstance(obj, dict): + return {str(k): _scrub(v, depth + 1, max_depth) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [_scrub(v, depth + 1, max_depth) for v in obj] + # any other type (shouldn't appear in JSON) is treated as an opaque string + return _REDACTED_STR + + +def scrub_json(obj: Any, *, max_depth: int = 40) -> Any: + """Recursively sanitize a JSON-shaped value (see module docstring).""" + return _scrub(obj, 0, max_depth) + + +def scrub_event_payload(payload: Any) -> Any: + """Sanitize one captured hook payload — the probe's per-event chokepoint.""" + return scrub_json(payload) diff --git a/tests/test_install.py b/tests/test_install.py index ec1041d..cdc6c85 100644 --- a/tests/test_install.py +++ b/tests/test_install.py @@ -60,6 +60,55 @@ def test_merge_hooks_gemini_entry_shape(): assert handler["command"].endswith("bmad_auto_hook.py Stop") +def test_merge_hooks_copilot_entry_shape(): + profile = get_profile("copilot") + settings, _ = merge_hooks({}, _registrations(profile), profile.hooks.dialect) + assert settings["version"] == 1 # Copilot hook configs are versioned + # Copilot stores the handler dict directly in the event list (no "hooks" wrapper) + handler = settings["hooks"]["Stop"][0] + assert handler["type"] == "command" + assert handler["timeoutSec"] == 60 # Copilot hook timeouts are seconds + # registered under the native event but relaying the canonical name + assert handler["command"].endswith("bmad_auto_hook.py Stop") + + +def test_merge_hooks_copilot_idempotent(): + # the bare-handler shape must still dedupe on a re-run + profile = get_profile("copilot") + settings, _ = merge_hooks({}, _registrations(profile), profile.hooks.dialect) + again, changed = merge_hooks(settings, _registrations(profile), profile.hooks.dialect) + assert not changed + for event in profile.hooks.events: + assert len(again["hooks"][event]) == 1 + + +def test_copilot_profile_render_prompt(): + # {skill} must expand plainly (no codex-style $ prefix) into the SKILL.md path + profile = get_profile("copilot") + rendered = profile.render_prompt("/bmad-auto-dev 1-2-a") + assert ".agents/skills/bmad-auto-dev/SKILL.md" in rendered + assert "1-2-a" in rendered + + +def test_install_into_copilot(tmp_path): + assert install_into(tmp_path, clis=("copilot",)) == 0 + settings = json.loads((tmp_path / ".github" / "copilot" / "settings.json").read_text()) + assert settings["version"] == 1 + # registered under VS Code-compatible PascalCase names (snake_case payloads) + assert set(settings["hooks"]) == {"Stop", "SessionStart", "SessionEnd", "PreCompact"} + cmd = settings["hooks"]["Stop"][0]["command"] + # absolute path baked in (no $CLAUDE_PROJECT_DIR equivalent in copilot) + assert str(tmp_path.resolve()) in cmd and cmd.endswith(" Stop") + # skills land in the shared .agents/skills tree + for skill in MODULE_SKILLS: + assert (tmp_path / ".agents" / "skills" / skill / "SKILL.md").is_file() + + # idempotent re-run does not duplicate the bare handler + assert install_into(tmp_path, clis=("copilot",)) == 0 + settings = json.loads((tmp_path / ".github" / "copilot" / "settings.json").read_text()) + assert len(settings["hooks"]["Stop"]) == 1 + + def test_install_into_full(tmp_path): assert install_into(tmp_path) == 0 assert (tmp_path / ".automator" / "bmad_auto_hook.py").is_file() diff --git a/tests/test_probe.py b/tests/test_probe.py new file mode 100644 index 0000000..b8a122d --- /dev/null +++ b/tests/test_probe.py @@ -0,0 +1,271 @@ +"""SCAN machinery: transcript discovery, schema inference, registration, +CLI plumbing, and end-to-end scrub-through. No live CLI required.""" + +import json + +import pytest + +from automator import cli, probe +from automator.adapters.profile import get_profile + +# ----------------------------------------------------------- fixtures / helpers + + +def _write_jsonl(path, rows): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(json.dumps(r) for r in rows) + "\n", encoding="utf-8") + return path + + +CLAUDE_ROWS = [ + {"type": "assistant", "message": {"usage": {"input_tokens": 100, "output_tokens": 50}}}, + { + "type": "assistant", + "message": { + "usage": { + "input_tokens": 10, + "output_tokens": 5, + "cache_read_input_tokens": 2000, + "cache_creation_input_tokens": 300, + } + }, + }, +] + +CODEX_ROWS = [ + { + "type": "event_msg", + "payload": { + "type": "token_count", + "info": { + "total_token_usage": { + "input_tokens": 500, + "cached_input_tokens": 200, + "output_tokens": 60, + } + }, + }, + }, +] + +GEMINI_ROWS = [ + {"$set": {"messages": [{"id": "u1", "type": "user", "content": []}]}}, + {"id": "g1", "type": "gemini", "tokens": {"input": 12273, "output": 45, "cached": 0}}, +] + + +# ----------------------------------------------------------- token inference + + +def test_infer_claude_candidates_and_self_check(tmp_path): + path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS) + schema = probe.infer_token_schema("claude-jsonl", path) + assert "message.usage.input_tokens:int" in schema.token_field_candidates + assert "message.usage.output_tokens:int" in schema.token_field_candidates + # parsed self-check matches the real parser + assert schema.parsed_usage == { + "input_tokens": 110, + "output_tokens": 55, + "cache_read_tokens": 2000, + "cache_creation_tokens": 300, + } + + +def test_infer_codex_nested_candidates(tmp_path): + path = _write_jsonl(tmp_path / "r.jsonl", CODEX_ROWS) + schema = probe.infer_token_schema("codex-rollout", path) + assert "payload.info.total_token_usage.input_tokens:int" in schema.token_field_candidates + assert schema.parsed_usage["output_tokens"] == 60 + + +def test_infer_gemini_list_paths_collapse(tmp_path): + path = _write_jsonl(tmp_path / "s.jsonl", GEMINI_ROWS) + schema = probe.infer_token_schema("gemini-chat", path) + # list indices collapse to [] so the per-message tokens are one path + assert any( + "$set.messages[].tokens.input:int" == p for p in schema.token_field_candidates + ) or any("tokens.input:int" in p for p in schema.token_field_candidates) + + +def test_key_paths_carry_types_never_values(tmp_path): + path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS) + schema = probe.infer_token_schema("claude-jsonl", path) + blob = "\n".join(schema.key_paths) + # types appear, raw integer values never do + assert ":int" in blob + assert "100" not in blob and "2000" not in blob + + +def test_infer_with_parser_none_still_finds_candidates(tmp_path): + path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS) + schema = probe.infer_token_schema("none", path) + assert schema.parsed_usage is None # no parser to self-check + assert "message.usage.input_tokens:int" in schema.token_field_candidates + + +# ----------------------------------------------------------- discovery + + +def test_discover_picks_newest_mtime(tmp_path): + base = tmp_path / "sessions" + old = _write_jsonl(base / "old.jsonl", CLAUDE_ROWS) + new = _write_jsonl(base / "new.jsonl", CLAUDE_ROWS) + import os + + os.utime(old, (1, 1)) + os.utime(new, (10_000_000, 10_000_000)) + hints = probe.Hints(session_dir=str(base)) + found = probe.discover_transcript("none", cli="custom", hints=hints) + assert found.real_path == new + assert found.multiple is True + + +def test_discover_transcript_override(tmp_path): + path = _write_jsonl(tmp_path / "exact.jsonl", CLAUDE_ROWS) + found = probe.discover_transcript( + "claude-jsonl", cli="claude", hints=probe.Hints(transcript=str(path)) + ) + assert found.real_path == path + assert found.location and "exact.jsonl" in found.location + + +def test_discover_missing_override_notes(tmp_path): + found = probe.discover_transcript( + "claude-jsonl", cli="claude", hints=probe.Hints(transcript=str(tmp_path / "nope.jsonl")) + ) + assert found.real_path is None + assert "does not exist" in found.note + + +def test_discover_location_redacts_username(tmp_path, monkeypatch): + # a munged-cwd dir embedding a username must not survive verbatim + monkeypatch.setenv("HOME", str(tmp_path)) + path = _write_jsonl(tmp_path / ".secret-home-dir" / "abc-123.jsonl", CLAUDE_ROWS) + found = probe.discover_transcript("none", cli="x", hints=probe.Hints(transcript=str(path))) + assert found.location.startswith("~/") + assert ".secret-home-dir" not in found.location + assert "abc-123.jsonl" in found.location # the id-like filename survives + + +# ----------------------------------------------------------- registration + + +@pytest.mark.parametrize("dialect_cli", ["claude", "codex", "gemini", "copilot"]) +def test_probe_hook_registers_under_native_events(dialect_cli): + from automator.install import merge_hooks + + profile = get_profile(dialect_cli) + registrations = { + native: f"python3 /tmp/bmad_auto_probe_hook.py {canonical}" + for native, canonical in profile.hooks.events.items() + } + config, changed = merge_hooks({}, registrations, profile.hooks.dialect) + assert changed + for native in profile.hooks.events: + assert native in config["hooks"] + # idempotent re-run + again, changed2 = merge_hooks(config, registrations, profile.hooks.dialect) + assert not changed2 + + +def test_scan_reports_registered_state(project): + proj = project.project + profile = get_profile("claude") + finding = probe.scan(cli="claude", profile=profile, project=proj, hints=probe.Hints()) + assert finding.registered is False # nothing installed in the sandbox + # now install hooks and re-scan + from automator.install import install_into + + install_into(proj, clis=("claude",)) + finding2 = probe.scan(cli="claude", profile=profile, project=proj, hints=probe.Hints()) + assert finding2.registered is True + + +# ----------------------------------------------------------- CLI plumbing + + +def test_cli_scan_produces_sections(tmp_path, capsys): + path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS) + rc = cli.main( + ["probe-adapter", "claude", "--project", str(tmp_path), "--transcript", str(path)] + ) + assert rc == 0 + out = capsys.readouterr().out + assert "# Profile finalize report — claude (scan)" in out + assert "## Hook payload shape" in out + assert "## Token usage schema" in out + assert "message.usage.input_tokens:int" in out + + +def test_cli_unknown_cli_without_binary_fails(tmp_path, capsys): + rc = cli.main(["probe-adapter", "no-such-cli", "--project", str(tmp_path)]) + assert rc == 1 + err = capsys.readouterr().err + assert "FAIL" in err + + +def test_cli_unknown_cli_with_binary_reduced_report(tmp_path, capsys): + rc = cli.main(["probe-adapter", "no-such-cli", "--project", str(tmp_path), "--binary", "true"]) + assert rc == 0 + out = capsys.readouterr().out + assert "reduced report" in out or "no (reduced report)" in out + + +def test_cli_out_writes_file(tmp_path): + path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS) + out_file = tmp_path / "report.md" + rc = cli.main( + [ + "probe-adapter", + "claude", + "--project", + str(tmp_path), + "--transcript", + str(path), + "--out", + str(out_file), + ] + ) + assert rc == 0 + assert out_file.is_file() + assert "Profile finalize report" in out_file.read_text() + + +def test_cli_json_block_appended(tmp_path, capsys): + path = _write_jsonl(tmp_path / "t.jsonl", CLAUDE_ROWS) + rc = cli.main( + ["probe-adapter", "claude", "--project", str(tmp_path), "--transcript", str(path), "--json"] + ) + assert rc == 0 + out = capsys.readouterr().out + assert "## JSON" in out + # the JSON block must parse + blob = out.split("```json", 1)[1].rsplit("```", 1)[0] + data = json.loads(blob) + assert data["cli"] == "claude" and data["mode"] == "scan" + + +# ----------------------------------------------------------- scrub-through + + +def test_scan_report_contains_no_pii(tmp_path, capsys, monkeypatch): + """A transcript carrying an email + a home path produces a report with neither.""" + monkeypatch.setenv("HOME", str(tmp_path)) + rows = [ + { + "type": "assistant", + "author": "secret@example.com", + "cwd": f"{tmp_path}/private/project", + "message": {"usage": {"input_tokens": 7, "output_tokens": 3}}, + } + ] + path = _write_jsonl(tmp_path / "t.jsonl", rows) + rc = cli.main( + ["probe-adapter", "claude", "--project", str(tmp_path), "--transcript", str(path), "--json"] + ) + assert rc == 0 + out = capsys.readouterr().out + assert "secret@example.com" not in out + assert "private/project" not in out + # but the token schema is still there + assert "message.usage.input_tokens:int" in out diff --git a/tests/test_probe_hook.py b/tests/test_probe_hook.py new file mode 100644 index 0000000..4f3258d --- /dev/null +++ b/tests/test_probe_hook.py @@ -0,0 +1,84 @@ +"""The capture hook runs as a real subprocess, like the CLI runs it.""" + +import json +import subprocess +import sys +from pathlib import Path + +SCRIPT = Path(__file__).parent.parent / "src" / "automator" / "data" / "bmad_auto_probe_hook.py" + + +def run_hook(event: str, env: dict, payload) -> subprocess.CompletedProcess: + return subprocess.run( + [sys.executable, str(SCRIPT), event], + input=json.dumps(payload) if payload is not None else "", + env={"PATH": "/usr/bin:/bin", **env}, + capture_output=True, + text=True, + timeout=10, + ) + + +def test_noop_without_capture_dir(tmp_path): + proc = run_hook("Stop", {}, {"session_id": "s1"}) + assert proc.returncode == 0 + assert list(tmp_path.iterdir()) == [] + + +def test_writes_signal_and_payload(tmp_path): + capture = tmp_path / "capture" + env = {"BMAD_AUTO_PROBE_CAPTURE_DIR": str(capture), "BMAD_AUTO_TASK_ID": "probe"} + payload = { + "session_id": "abc-123", + "transcript_path": "/home/u/.copilot/x/events.jsonl", + "cwd": "/proj", + "extra": {"nested": "field"}, + } + proc = run_hook("Stop", env, payload) + assert proc.returncode == 0 + + signals = list(capture.glob("*.signal.json")) + payloads = list(capture.glob("*.payload.json")) + assert len(signals) == 1 and len(payloads) == 1 + assert "Stop" in signals[0].name and "Stop" in payloads[0].name + + signal = json.loads(signals[0].read_text()) + assert signal["event"] == "Stop" + assert signal["task_id"] == "probe" + assert signal["session_id"] == "abc-123" + assert signal["transcript_path"].endswith("events.jsonl") + + captured = json.loads(payloads[0].read_text()) + # the ENTIRE raw payload survives (un-sanitized; the command scrubs later) + assert captured["extra"] == {"nested": "field"} + assert captured["argv_event"] == "Stop" # native event name for pairing + assert not list(capture.glob("*.tmp")) + + +def test_conversation_id_fallback(tmp_path): + capture = tmp_path / "capture" + env = {"BMAD_AUTO_PROBE_CAPTURE_DIR": str(capture)} + proc = run_hook("Stop", env, {"conversation_id": "conv-9"}) + assert proc.returncode == 0 + signal = json.loads(next(capture.glob("*.signal.json")).read_text()) + assert signal["session_id"] == "conv-9" + # task_id defaults when the env var is absent + assert signal["task_id"] == "probe" + + +def test_tolerates_garbage_stdin(tmp_path): + capture = tmp_path / "capture" + env = {"BMAD_AUTO_PROBE_CAPTURE_DIR": str(capture)} + proc = run_hook("SessionStart", env, None) # empty stdin + assert proc.returncode == 0 + assert len(list(capture.glob("*.signal.json"))) == 1 + captured = json.loads(next(capture.glob("*.payload.json")).read_text()) + assert captured == {"argv_event": "SessionStart"} + + +def test_installed_copy_matches_source(tmp_path): + # packaged alongside the real relay; importlib.resources resolves it + from importlib import resources + + packaged = resources.files("automator.data").joinpath("bmad_auto_probe_hook.py") + assert packaged.read_text(encoding="utf-8") == SCRIPT.read_text(encoding="utf-8") diff --git a/tests/test_sanitize.py b/tests/test_sanitize.py new file mode 100644 index 0000000..ca873d7 --- /dev/null +++ b/tests/test_sanitize.py @@ -0,0 +1,126 @@ +"""The crown-jewel PII case table for the probe sanitizer.""" + +import pytest + +from automator import sanitize + + +@pytest.fixture +def home(monkeypatch, tmp_path): + monkeypatch.setenv("HOME", str(tmp_path)) + # os.path.expanduser reads HOME on POSIX; force a clean cache-free lookup + return str(tmp_path) + + +# ------------------------------------------------------------- redact_home + + +def test_redact_home_replaces_home_prefix(home): + assert sanitize.redact_home(f"{home}/.claude/x.jsonl") == "~/.claude/x.jsonl" + + +def test_redact_home_noop_when_absent(home): + assert sanitize.redact_home("/etc/passwd") == "/etc/passwd" + + +# ------------------------------------------------------- looks_like_identifier + + +@pytest.mark.parametrize( + "value", + ["claude-opus-4-8", "session-abc_123", "Stop", "gpt-5-codex", "4.8", "abc123"], +) +def test_identifier_accepts_slugs(value): + assert sanitize.looks_like_identifier(value) + + +@pytest.mark.parametrize( + "value", + [ + "", + "has spaces", + "user@example.com", + "/home/alice/x", + "a/b", + ".claude", # leading dot is not alphanumeric + "x" * 200, # too long to be a slug + "I am a sentence of prose.", + ], +) +def test_identifier_rejects_prose_paths_emails(value): + assert not sanitize.looks_like_identifier(value) + + +# --------------------------------------------------------------- scrub_json + + +def test_scrub_json_passes_numbers_bools_null(): + obj = {"input_tokens": 123, "ratio": 1.5, "ok": True, "off": False, "none": None} + assert sanitize.scrub_json(obj) == obj + + +def test_scrub_json_keeps_keys_verbatim_redacts_string_leaves(home): + obj = { + "session_id": "abc-123", # identifier -> kept + "transcript_path": f"{home}/.claude/x.jsonl", # path -> redacted + "email": "me@example.com", # email -> redacted + "prose": "this is a free-form sentence", # prose -> redacted + "model": "claude-opus-4-8", # identifier -> kept + } + out = sanitize.scrub_json(obj) + assert set(out) == set(obj) # keys kept verbatim + assert out["session_id"] == "abc-123" + assert out["model"] == "claude-opus-4-8" + assert out["transcript_path"] == "" + assert out["email"] == "" + assert out["prose"] == "" + + +def test_scrub_json_preserves_list_length_not_content(): + out = sanitize.scrub_json({"items": ["a b c", "tok-1", 7]}) + assert out["items"] == ["", "tok-1", 7] + + +def test_scrub_json_depth_guard(): + obj = cur = {} + for _ in range(60): + cur["next"] = {} + cur = cur["next"] + cur["leaf"] = "deep" + out = sanitize.scrub_json(obj, max_depth=10) + # walk down to the guard + node = out + saw_guard = False + for _ in range(60): + if node == "": + saw_guard = True + break + node = node.get("next") + if node is None: + break + assert saw_guard + + +# --------------------------------------------------------------- scrub_text + + +def test_scrub_text_keeps_flags_redacts_email_and_home(home): + text = f"Usage: foo [options]\n --bar do bar\ncontact me@example.com or see {home}/cfg" + out = sanitize.scrub_text(text) + assert "--bar" in out + assert "me@example.com" not in out + assert "" in out + assert f"{home}/cfg" not in out + assert "~/cfg" in out + + +def test_scrub_text_max_lines_truncates(): + out = sanitize.scrub_text("\n".join(f"line{i}" for i in range(50)), max_lines=5) + assert out.count("\n") == 5 # 5 kept lines + the ellipsis marker + assert "more lines redacted" in out + + +def test_scrub_event_payload_is_scrub_json(home): + payload = {"session_id": "s-1", "cwd": f"{home}/proj", "n": 5} + out = sanitize.scrub_event_payload(payload) + assert out == {"session_id": "s-1", "cwd": "", "n": 5} diff --git a/uv.lock b/uv.lock index ee442cf..8414409 100644 --- a/uv.lock +++ b/uv.lock @@ -4,7 +4,7 @@ requires-python = ">=3.11" [[package]] name = "bmad-auto" -version = "0.6.1" +version = "0.6.2" source = { editable = "." } dependencies = [ { name = "pyyaml" },