From 50ec623777cf2bd6618fa7de54950b9f9cd54f6a Mon Sep 17 00:00:00 2001 From: pbean Date: Sun, 21 Jun 2026 12:40:27 -0700 Subject: [PATCH 1/3] fix(copilot): correct turn-end event, payload casing, and probe prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finalized the GitHub Copilot CLI adapter against a real probe run (CLI 1.0.63); the profile that landed in 0.6.2 was wrong in ways no doc would reveal: - Turn-end is `agentStop`, not PascalCase `Stop` (which never fires) — so every session read as a timeout. Remapped events to agentStop/sessionStart/sessionEnd and dropped the non-existent PreCompact. - Payload keys are camelCase (sessionId/transcriptPath) regardless of event-name casing, so the shared hook relay now reads both camelCase and snake_case. - Probe mode sends its prompt verbatim (not via render_prompt): a content-free turn has no skill name, so a skill-templating prompt_template rendered a missing .../skills//SKILL.md path the agent hunted for until the probe timed out. Also adds the `copilot-events` usage_parser (reads ~/.copilot/session-state/*/events.jsonl, data.modelMetrics..usage.*, cumulative-per-model) and wires the copilot profile to it (was "none"). Co-Authored-By: Claude Opus 4.8 --- docs/adapter-authoring-guide.md | 24 ++++++--- src/automator/adapters/profile.py | 2 +- src/automator/data/bmad_auto_hook.py | 17 ++++-- src/automator/data/profiles/copilot.toml | 25 +++++---- src/automator/probe.py | 7 ++- src/automator/tokens.py | 44 +++++++++++++++ tests/test_hook_script.py | 15 ++++++ tests/test_install.py | 13 ++--- tests/test_profile.py | 8 +++ tests/test_tokens.py | 69 +++++++++++++++++++++++- 10 files changed, 192 insertions(+), 32 deletions(-) diff --git a/docs/adapter-authoring-guide.md b/docs/adapter-authoring-guide.md index 3d90a62..191d033 100644 --- a/docs/adapter-authoring-guide.md +++ b/docs/adapter-authoring-guide.md @@ -150,15 +150,25 @@ unknown CLI without `--binary` fails and lists the available profiles. ## Worked example: copilot -The bundled `copilot` profile ships with `usage_parser = "none"` — Copilot's -token-usage schema hadn't been captured when the profile landed. That's exactly -the gap `probe-adapter` closes: +The `copilot` profile was finalized from a real probe run — a good illustration of +why `probe-adapter` exists, because the as-drafted profile was wrong in ways no doc +would reveal: ```bash bmad-auto probe-adapter copilot --probe --project /tmp/scratch ``` -captures the `Stop` payload (confirming `session_id` / `transcript_path` casing), -locates `~/.copilot/session-state/*/events.jsonl`, and infers its token schema — -the data needed to write a `copilot-*` parser in `tokens.py` and flip the profile's -`usage_parser` off `"none"`. Confirm the `mkdtemp` dir is gone afterward. +On Copilot CLI 1.0.63 this surfaced three corrections: + +- **Turn-end event.** The draft registered PascalCase `Stop`, which never fires — + the turn-end hook is `agentStop` (camelCase). Without this, every session reads + as a timeout. The profile now maps `agentStop = "Stop"` (and `sessionStart` / + `sessionEnd`; there is no `PreCompact` equivalent). +- **Payload casing.** Keys are camelCase (`sessionId`, `transcriptPath`), not + snake_case — so the shared relay (`bmad_auto_hook.py`) reads both casings. +- **Token schema.** The probe located `~/.copilot/session-state/*/events.jsonl` and + inferred its token fields (`data.modelMetrics..usage.*`), which became the + `copilot-events` parser in `tokens.py`; the profile's `usage_parser` is now wired + to it instead of `"none"`. + +Confirm the `mkdtemp` dir is gone afterward. diff --git a/src/automator/adapters/profile.py b/src/automator/adapters/profile.py index 1699ece..23bc33f 100644 --- a/src/automator/adapters/profile.py +++ b/src/automator/adapters/profile.py @@ -18,7 +18,7 @@ from importlib import resources from pathlib import Path -USAGE_PARSERS = {"claude-jsonl", "codex-rollout", "gemini-chat", "none"} +USAGE_PARSERS = {"claude-jsonl", "codex-rollout", "gemini-chat", "copilot-events", "none"} HOOK_DIALECTS = { "claude-settings-json", "codex-hooks-json", diff --git a/src/automator/data/bmad_auto_hook.py b/src/automator/data/bmad_auto_hook.py index 4d5c29b..3b31225 100644 --- a/src/automator/data/bmad_auto_hook.py +++ b/src/automator/data/bmad_auto_hook.py @@ -2,9 +2,12 @@ """Coding-CLI hook relay for bmad-auto. Stdlib only. Each CLI's hook config registers this script under its native event names -(Claude/Codex: SessionStart/Stop/..., Gemini: AfterAgent for Stop) but always -passes the CANONICAL event name as argv[1] — the orchestrator only ever sees -canonical events. Reads the hook payload from stdin and writes one event file +(Claude/Codex: SessionStart/Stop/..., Gemini: AfterAgent for Stop, Copilot: +agentStop for Stop) but always passes the CANONICAL event name as argv[1] — the +orchestrator only ever sees canonical events. Payload keys vary too: snake_case +(claude/codex), conversation_id (cursor), or camelCase (copilot's sessionId/ +transcriptPath); the field extraction below tries each. Reads the hook payload +from stdin and writes one event file into the orchestrator's run directory. No-ops (exit 0) unless the session was spawned by bmad-auto (detected via env vars set on the tmux window), so normal interactive sessions are unaffected. @@ -34,8 +37,12 @@ def main() -> int: "ts": ts, "event": event_name, "task_id": task_id, - "session_id": payload.get("session_id") or payload.get("conversation_id"), - "transcript_path": payload.get("transcript_path"), + # Payload keys vary by CLI: snake_case (claude/codex), conversation_id + # (cursor), or camelCase (copilot's sessionId/transcriptPath). Try each. + "session_id": ( + payload.get("session_id") or payload.get("conversation_id") or payload.get("sessionId") + ), + "transcript_path": payload.get("transcript_path") or payload.get("transcriptPath"), "cwd": payload.get("cwd"), } events_dir = os.path.join(run_dir, "events") diff --git a/src/automator/data/profiles/copilot.toml b/src/automator/data/profiles/copilot.toml index 1437595..cbbba41 100644 --- a/src/automator/data/profiles/copilot.toml +++ b/src/automator/data/profiles/copilot.toml @@ -6,29 +6,32 @@ # needed" keeps parallel skill phases (e.g. review layers) actually spawning # subagents, same as codex. # -# Hook events are registered under Copilot's VS Code-compatible PascalCase names -# (Stop/SessionStart/SessionEnd/PreCompact, same set as claude). That casing makes -# Copilot emit SNAKE_CASE payloads (session_id, transcript_path, cwd) — exactly -# what the shared relay reads — and the Stop payload carries transcript_path, so no -# relay change is needed and a future usage_parser gets the transcript for free. -# (The camelCase names agentStop/sessionStart emit camelCase payloads the relay -# would miss.) NOTE: an enterprise policy permissions.disableBypassPermissionsMode -# = 'disable' suppresses the --allow-all-* flags and will block unattended runs. +# Hook events (verified against Copilot CLI 1.0.63 via `bmad-auto probe-adapter +# copilot --probe`): the turn-end event is `agentStop` — PascalCase `Stop` does +# NOT fire on this build, so it must NOT be registered (it yields no completion +# signal and every session reads as a timeout). `sessionStart`/`sessionEnd` cover +# session lifecycle; Copilot has no PreCompact equivalent. Payload keys are +# camelCase (sessionId, transcriptPath, cwd) regardless of event-name casing, and +# the `agentStop` payload carries `transcriptPath` + `stopReason: end_turn` — the +# shared relay reads both camelCase and snake_case, so the usage_parser gets the +# transcript. CAVEAT: single build/OS (1.0.63, macOS); a future build may alias +# `Stop`. NOTE: an enterprise policy permissions.disableBypassPermissionsMode = +# 'disable' suppresses the --allow-all-* flags and will block unattended runs. name = "copilot" binary = "copilot" prompt_template = "LOAD the FULL .agents/skills/{skill}/SKILL.md, read its entire contents and follow its directions exactly, using subagents as needed: {args}" launch_args = ["-i"] bypass_args = ["--allow-all-tools", "--allow-all-paths"] model_flag = "--model" -usage_parser = "none" +usage_parser = "copilot-events" first_run_note = "run `copilot` once and authenticate (gh / Copilot subscription); requires Copilot CLI GA (>= 2026-02)" skill_tree = ".agents/skills" # .github/copilot/settings.json is the inline hook config (and can also hold MCP # servers) — gitignored in many projects, so a worktree checkout omits it and -# isolated sessions lose it; seeded first, then the Stop hook is merged in. +# isolated sessions lose it; seeded first, then the agentStop hook is merged in. seed_files = [".github/copilot/settings.json"] [hooks] dialect = "copilot-settings-json" config_path = ".github/copilot/settings.json" -events = { Stop = "Stop", SessionStart = "SessionStart", SessionEnd = "SessionEnd", PreCompact = "PreCompact" } +events = { agentStop = "Stop", sessionStart = "SessionStart", sessionEnd = "SessionEnd" } diff --git a/src/automator/probe.py b/src/automator/probe.py index 2ce91d5..d353176 100644 --- a/src/automator/probe.py +++ b/src/automator/probe.py @@ -46,6 +46,7 @@ "claude-jsonl": "~/.claude/projects/*/*.jsonl", "codex-rollout": "~/.codex/sessions/*/*/*/rollout-*.jsonl", "gemini-chat": "~/.gemini/tmp/*/chats/session-*.jsonl", + "copilot-events": "~/.copilot/session-state/*/events.jsonl", } # Fallback family glob keyed by the `cli` name, so a CLI whose usage_parser is # still "none" (e.g. copilot, freshly added) still gets transcript discovery. @@ -450,7 +451,11 @@ def _probe_argv(profile: CLIProfile, binary: str, hints: Hints) -> list[str]: argv = [ binary, *profile.launch_args, - profile.render_prompt(PROBE_PROMPT), + # Send the probe prompt verbatim, NOT through profile.render_prompt: a + # content-free turn has no skill name, so a skill-templating prompt_template + # (copilot, codex) would render a nonexistent .../skills//SKILL.md path the + # agent hunts for, and the turn never ends within the probe timeout. + PROBE_PROMPT, *profile.bypass_args, ] if hints.model: diff --git a/src/automator/tokens.py b/src/automator/tokens.py index 8408b0c..321436a 100644 --- a/src/automator/tokens.py +++ b/src/automator/tokens.py @@ -16,6 +16,12 @@ message id is re-emitted as it accretes content, so the last occurrence per id wins, then unique messages are summed. `input` includes the cached portion. +- copilot-events: ~/.copilot/session-state//events.jsonl; some + entries carry `data.modelMetrics..usage` + {inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, + reasoningTokens} that is CUMULATIVE per model, so the last + entry bearing modelMetrics holds the session totals (summed + across models). reasoningTokens fold into output. """ from __future__ import annotations @@ -34,6 +40,8 @@ def read_usage(parser: str, transcript_path: Path) -> TokenUsage | None: return tally_codex_rollout(transcript_path) if parser == "gemini-chat": return tally_gemini_chat(transcript_path) + if parser == "copilot-events": + return tally_copilot_events(transcript_path) return None @@ -150,3 +158,39 @@ def tally_gemini_chat(transcript_path: Path) -> TokenUsage | None: ) ) return total + + +# --------------------------------------------------------- copilot-events + + +def tally_copilot_events(transcript_path: Path) -> TokenUsage | None: + # data.modelMetrics..usage is cumulative per model, so the LAST entry + # carrying modelMetrics holds the session totals; sum across its models. + # (Schema verified against a single Copilot CLI 1.0.63 events.jsonl — revisit + # the cumulative/multi-model assumption if a newer build disagrees.) + last: dict[str, Any] | None = None + for entry in _jsonl_entries(transcript_path): + data = entry.get("data") + if not isinstance(data, dict): + continue + metrics = data.get("modelMetrics") + if isinstance(metrics, dict) and metrics: + last = metrics + if last is None: + return None + total = TokenUsage() + for model_metrics in last.values(): + if not isinstance(model_metrics, dict): + continue + usage = model_metrics.get("usage") + if not isinstance(usage, dict): + continue + total.add( + TokenUsage( + input_tokens=_int(usage.get("inputTokens")), + output_tokens=_int(usage.get("outputTokens")) + _int(usage.get("reasoningTokens")), + cache_read_tokens=_int(usage.get("cacheReadTokens")), + cache_creation_tokens=_int(usage.get("cacheWriteTokens")), + ) + ) + return total diff --git a/tests/test_hook_script.py b/tests/test_hook_script.py index 62a88bf..e8da5cb 100644 --- a/tests/test_hook_script.py +++ b/tests/test_hook_script.py @@ -55,6 +55,21 @@ def test_conversation_id_fallback(tmp_path): assert json.loads(files[0].read_text())["session_id"] == "conv-9" +def test_camelcase_payload(tmp_path): + """Copilot payloads carry camelCase sessionId / transcriptPath.""" + env = {"BMAD_AUTO_RUN_DIR": str(tmp_path), "BMAD_AUTO_TASK_ID": "t1"} + payload = { + "sessionId": "cop-7", + "transcriptPath": "/home/u/.copilot/session-state/cop-7/events.jsonl", + "stopReason": "end_turn", + } + proc = run_hook("Stop", env, payload) + assert proc.returncode == 0 + event = json.loads(next((tmp_path / "events").glob("*.json")).read_text()) + assert event["session_id"] == "cop-7" + assert event["transcript_path"].endswith("events.jsonl") + + def test_tolerates_garbage_stdin(tmp_path): env = {"BMAD_AUTO_RUN_DIR": str(tmp_path), "BMAD_AUTO_TASK_ID": "t1"} proc = run_hook("SessionEnd", env, None) # empty stdin diff --git a/tests/test_install.py b/tests/test_install.py index cdc6c85..2a01c7e 100644 --- a/tests/test_install.py +++ b/tests/test_install.py @@ -65,10 +65,10 @@ def test_merge_hooks_copilot_entry_shape(): settings, _ = merge_hooks({}, _registrations(profile), profile.hooks.dialect) assert settings["version"] == 1 # Copilot hook configs are versioned # Copilot stores the handler dict directly in the event list (no "hooks" wrapper) - handler = settings["hooks"]["Stop"][0] + handler = settings["hooks"]["agentStop"][0] assert handler["type"] == "command" assert handler["timeoutSec"] == 60 # Copilot hook timeouts are seconds - # registered under the native event but relaying the canonical name + # registered under the native event (agentStop) but relaying the canonical name assert handler["command"].endswith("bmad_auto_hook.py Stop") @@ -94,9 +94,10 @@ def test_install_into_copilot(tmp_path): assert install_into(tmp_path, clis=("copilot",)) == 0 settings = json.loads((tmp_path / ".github" / "copilot" / "settings.json").read_text()) assert settings["version"] == 1 - # registered under VS Code-compatible PascalCase names (snake_case payloads) - assert set(settings["hooks"]) == {"Stop", "SessionStart", "SessionEnd", "PreCompact"} - cmd = settings["hooks"]["Stop"][0]["command"] + # registered under the camelCase native names Copilot 1.0.63 actually fires + # (agentStop is turn-end; PascalCase Stop never fires); relay still gets canonical + assert set(settings["hooks"]) == {"agentStop", "sessionStart", "sessionEnd"} + cmd = settings["hooks"]["agentStop"][0]["command"] # absolute path baked in (no $CLAUDE_PROJECT_DIR equivalent in copilot) assert str(tmp_path.resolve()) in cmd and cmd.endswith(" Stop") # skills land in the shared .agents/skills tree @@ -106,7 +107,7 @@ def test_install_into_copilot(tmp_path): # idempotent re-run does not duplicate the bare handler assert install_into(tmp_path, clis=("copilot",)) == 0 settings = json.loads((tmp_path / ".github" / "copilot" / "settings.json").read_text()) - assert len(settings["hooks"]["Stop"]) == 1 + assert len(settings["hooks"]["agentStop"]) == 1 def test_install_into_full(tmp_path): diff --git a/tests/test_profile.py b/tests/test_profile.py index c52d4d1..bf160f4 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -35,6 +35,14 @@ def test_builtin_profiles_load(): assert ".claude/settings.json" in profiles["claude"].seed_files assert profiles["codex"].seed_files == (".codex/config.toml",) assert profiles["gemini"].seed_files == (".gemini/settings.json",) + # copilot: turn-end is agentStop (Copilot 1.0.63 never fires PascalCase Stop), + # no PreCompact equivalent, and its events.jsonl parser is wired up + assert profiles["copilot"].hooks.events == { + "agentStop": "Stop", + "sessionStart": "SessionStart", + "sessionEnd": "SessionEnd", + } + assert profiles["copilot"].usage_parser == "copilot-events" def test_seed_files_default_empty_when_unset(tmp_path): diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 610df42..ceecb61 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,7 +1,13 @@ import json from automator.model import TokenUsage -from automator.tokens import read_usage, tally, tally_codex_rollout, tally_gemini_chat +from automator.tokens import ( + read_usage, + tally, + tally_codex_rollout, + tally_copilot_events, + tally_gemini_chat, +) def test_weighted_total(): @@ -168,8 +174,69 @@ def test_gemini_chat_without_tokens_is_none(tmp_path): assert tally_gemini_chat(tmp_path / "nope.jsonl") is None +def test_copilot_events_last_cumulative_across_models(tmp_path): + # shape from ~/.copilot/session-state//events.jsonl: per line + # {id, type, data:{...}}; data.modelMetrics..usage is cumulative. + lines = [ + {"id": "e0", "type": "session_start", "data": {"sessionId": "s1"}}, + # an earlier, smaller cumulative snapshot — superseded by the last one + { + "id": "e1", + "type": "metrics", + "data": {"modelMetrics": {"gpt-5-mini": {"usage": {"inputTokens": 100}}}}, + }, + {"id": "e2", "type": "message", "data": {"content": "noise"}}, + # final cumulative snapshot, two models — totals come from here + { + "id": "e3", + "type": "metrics", + "data": { + "modelMetrics": { + "gpt-5-mini": { + "usage": { + "inputTokens": 500, + "outputTokens": 60, + "cacheReadTokens": 200, + "cacheWriteTokens": 30, + "reasoningTokens": 5, + } + }, + "gpt-5": { + "usage": { + "inputTokens": 40, + "outputTokens": 8, + "reasoningTokens": 2, + } + }, + } + }, + }, + ] + path = tmp_path / "events.jsonl" + path.write_text("\n".join(json.dumps(line) for line in lines) + "\nnot json\n") + + usage = tally_copilot_events(path) + assert usage.input_tokens == 540 # 500 + 40 + assert usage.output_tokens == 75 # (60 + 5) + (8 + 2), reasoning folded in + assert usage.cache_read_tokens == 200 + assert usage.cache_creation_tokens == 30 + + +def test_copilot_events_without_metrics_is_none(tmp_path): + path = tmp_path / "events.jsonl" + path.write_text(json.dumps({"id": "e0", "type": "message", "data": {"content": "hi"}}) + "\n") + assert tally_copilot_events(path) is None + assert tally_copilot_events(tmp_path / "nope.jsonl") is None + + def test_read_usage_dispatch(tmp_path): path = tmp_path / "t.jsonl" path.write_text(json.dumps({"usage": {"input_tokens": 1, "output_tokens": 2}}) + "\n") assert read_usage("claude-jsonl", path).total == 3 assert read_usage("none", path) is None + + cop = tmp_path / "events.jsonl" + cop.write_text( + json.dumps({"data": {"modelMetrics": {"m": {"usage": {"inputTokens": 7}}}}}) + "\n" + ) + assert read_usage("copilot-events", cop).input_tokens == 7 From 9a610d148d45a9f6cbbdaf5d195a664f1b2e847f Mon Sep 17 00:00:00 2001 From: pbean Date: Sun, 21 Jun 2026 12:40:34 -0700 Subject: [PATCH 2/3] chore(release): reseed dev skill forks during release prepare The version bump stamps the canonical bmad-auto-setup/assets/module.yaml, which immediately drifts the gitignored dev-workspace skill forks (.claude/skills, .agents/skills) and fails tests/test_module_skills_sync.py locally until they are re-copied by hand. Add scripts/seed_skills.py (reseed, with --check) and call it from `release.py prepare` right after stamping so the forks track the bump automatically. The forks are gitignored, so nothing here is staged or committed, and an absent tree (CI) is skipped. Co-Authored-By: Claude Opus 4.8 --- scripts/release.py | 18 ++++++ scripts/seed_skills.py | 121 ++++++++++++++++++++++++++++++++++++++ tests/test_seed_skills.py | 95 ++++++++++++++++++++++++++++++ 3 files changed, 234 insertions(+) create mode 100644 scripts/seed_skills.py create mode 100644 tests/test_seed_skills.py diff --git a/scripts/release.py b/scripts/release.py index 872e0be..53b39ee 100644 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ REPO = sync_version.ROOT CHANGELOG = REPO / "CHANGELOG.md" SYNC_VERSION = REPO / "scripts" / "sync_version.py" +SEED_SKILLS = REPO / "scripts" / "seed_skills.py" GEN_SCREENSHOTS = REPO / "scripts" / "gen_screenshots.py" GEN_DEMO = REPO / "scripts" / "gen_demo.py" TUI_PATH = "src/automator/tui" @@ -257,6 +258,20 @@ def _regen_assets(dry_run: bool) -> None: _run(cmd) +def _reseed_skills(dry_run: bool) -> None: + """Re-copy the canonical bmad-auto skills into the gitignored dev-workspace + forks (.claude/skills, .agents/skills) so they pick up the just-stamped + module.yaml version. Without this the version bump drifts the forks and the + local `tests/test_module_skills_sync.py` fails until reseeded by hand. The + forks are gitignored, so nothing here is staged or committed.""" + cmd = ["uv", "run", "python", str(SEED_SKILLS)] + if dry_run: + print(f" would run: {' '.join(cmd)}") + return + print("reseeding dev-workspace skill forks ...") + _run(cmd) + + def _run_trunk_fmt(dry_run: bool) -> None: if not shutil.which("trunk"): print(" trunk not on PATH — skipping fmt (run `trunk check` before pushing)") @@ -330,6 +345,7 @@ def cmd_prepare(args: argparse.Namespace) -> int: print("\n[dry-run] planned actions:") print(f" ensure CHANGELOG link ref: [{version}]: {url}/releases/tag/{tag}") print(f" run: python {SYNC_VERSION.relative_to(REPO)} {version} (+ uv lock)") + _reseed_skills(dry_run=True) if regen: _regen_assets(dry_run=True) _run_trunk_fmt(dry_run=True) @@ -343,6 +359,8 @@ def cmd_prepare(args: argparse.Namespace) -> int: print(f"stamping version via {SYNC_VERSION.name} ...") _run(["uv", "run", "python", str(SYNC_VERSION), version]) + _reseed_skills(dry_run=False) + if regen: print("regenerating screenshots + demo ...") _regen_assets(dry_run=False) diff --git a/scripts/seed_skills.py b/scripts/seed_skills.py new file mode 100644 index 0000000..7f8eb06 --- /dev/null +++ b/scripts/seed_skills.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +"""Reseed the dev-workspace skill forks from the canonical wheel source. + +``src/automator/data/skills/`` is the single source of truth for the +``bmad-auto-*`` automation skills (bundled into the wheel; ``bmad-auto init`` +installs them). Two dev-workspace trees hold byte-identical *forks* of those +skills so the local agents can run them out of this repo: + +* ``.claude/skills/`` — read by Claude Code +* ``.agents/skills/`` — read by codex / gemini + +``tests/test_module_skills_sync.py`` turns any drift between canonical and a +fork into a failure. The version bump in ``scripts/sync_version.py`` stamps the +canonical ``bmad-auto-setup/assets/module.yaml``, which immediately drifts both +forks — so every release had to be followed by a hand reseed before the local +suite went green again. This script is that reseed, and ``release.py prepare`` +runs it automatically right after stamping. + +Both fork trees are gitignored dev-only workspaces, so nothing here is committed +— a tree that is absent (as in CI) is simply skipped, never created. + +Usage:: + + uv run python scripts/seed_skills.py # reseed every present fork + uv run python scripts/seed_skills.py --check # report drift, mutate nothing +""" + +from __future__ import annotations + +import filecmp +import shutil +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +# Import MODULE_SKILLS straight from the package so this list can never drift +# from the one the installer and the sync test use. +sys.path.insert(0, str(ROOT / "src")) +from automator.install import MODULE_SKILLS # noqa: E402 + +SKILLS_SRC = ROOT / "src" / "automator" / "data" / "skills" +FORK_TREES = (".claude/skills", ".agents/skills") + + +def drift(canonical: Path, fork: Path) -> list[str]: + """Recursively compare a canonical skill dir against its fork, returning a + list of human-readable drift problems (empty when byte-identical). Mirrors + the comparison in tests/test_module_skills_sync.py.""" + if not fork.exists(): + return [f"fork missing: {fork.relative_to(ROOT)}"] + problems: list[str] = [] + stack = [filecmp.dircmp(canonical, fork)] + while stack: + node = stack.pop() + rel = Path(node.left).relative_to(canonical) + problems += [f"only in canonical: {rel / n}" for n in node.left_only] + problems += [f"extra in fork: {rel / n}" for n in node.right_only] + _, mismatch, errors = filecmp.cmpfiles( + node.left, node.right, node.common_files, shallow=False + ) + problems += [f"content differs: {rel / n}" for n in mismatch + errors] + stack.extend(node.subdirs.values()) + return problems + + +def reseed(canonical: Path, fork: Path) -> None: + """Replace ``fork`` with an exact copy of ``canonical``.""" + if fork.exists(): + shutil.rmtree(fork) + fork.parent.mkdir(parents=True, exist_ok=True) + shutil.copytree(canonical, fork) + + +def run(check: bool) -> int: + present = [tree for tree in FORK_TREES if (ROOT / tree).is_dir()] + if not present: + print("no dev-workspace skill forks present (.claude/.agents) — nothing to reseed") + return 0 + + drifted: list[tuple[str, list[str]]] = [] + reseeded: list[str] = [] + for tree in present: + for skill in MODULE_SKILLS: + canonical = SKILLS_SRC / skill + if not canonical.is_dir(): + sys.exit(f"error: canonical skill missing: {canonical.relative_to(ROOT)}") + fork = ROOT / tree / skill + problems = drift(canonical, fork) + if not problems: + continue + if check: + drifted.append((f"{tree}/{skill}", problems)) + else: + reseed(canonical, fork) + reseeded.append(f"{tree}/{skill}") + + if check: + if drifted: + print("skill fork drift detected (run scripts/seed_skills.py to fix):", file=sys.stderr) + for label, problems in drifted: + for p in problems: + print(f" - {label}: {p}", file=sys.stderr) + return 1 + print("ok: every skill fork matches canonical") + return 0 + + if reseeded: + print("reseeded skill forks from canonical:\n " + "\n ".join(reseeded)) + else: + print("skill forks already match canonical — nothing to reseed") + return 0 + + +def main(argv: list[str]) -> int: + if len(argv) > 1 or (argv and argv[0] != "--check"): + sys.exit("usage: seed_skills.py [--check]") + return run(check=bool(argv)) + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/tests/test_seed_skills.py b/tests/test_seed_skills.py new file mode 100644 index 0000000..019c2be --- /dev/null +++ b/tests/test_seed_skills.py @@ -0,0 +1,95 @@ +"""Unit tests for scripts/seed_skills.py — the reseed that keeps the gitignored +dev-workspace skill forks (.claude/skills, .agents/skills) byte-identical to the +canonical src/automator/data/skills/ source after a version bump. + +The module's paths and skill list are module-level globals, so each test points +them at a throwaway tmp workspace via monkeypatch rather than touching the real +repo trees. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +SCRIPTS = Path(__file__).resolve().parent.parent / "scripts" +sys.path.insert(0, str(SCRIPTS)) + +import seed_skills # noqa: E402 + + +def _build_workspace(monkeypatch: pytest.MonkeyPatch, tmp_path: Path, trees: tuple[str, ...]): + """Lay out canonical + fork trees under tmp_path and repoint the module at + them. Returns (canonical_skill_dir, {tree: fork_skill_dir}).""" + canonical = tmp_path / "src" / "automator" / "data" / "skills" / "demo-skill" + canonical.mkdir(parents=True) + (canonical / "SKILL.md").write_text("canonical body\n") + (canonical / "assets").mkdir() + (canonical / "assets" / "module.yaml").write_text("module_version: 9.9.9\n") + + forks = {} + for tree in trees: + fork = tmp_path / tree / "demo-skill" + fork.mkdir(parents=True) + (fork / "SKILL.md").write_text("stale body\n") # drifted on purpose + forks[tree] = fork + + monkeypatch.setattr(seed_skills, "ROOT", tmp_path) + monkeypatch.setattr(seed_skills, "SKILLS_SRC", canonical.parent) + monkeypatch.setattr(seed_skills, "FORK_TREES", trees) + monkeypatch.setattr(seed_skills, "MODULE_SKILLS", ("demo-skill",)) + return canonical, forks + + +def test_check_detects_drift(monkeypatch, tmp_path, capsys): + _build_workspace(monkeypatch, tmp_path, (".claude/skills",)) + assert seed_skills.run(check=True) == 1 + assert "drift detected" in capsys.readouterr().err + + +def test_reseed_fixes_drift(monkeypatch, tmp_path): + canonical, forks = _build_workspace(monkeypatch, tmp_path, (".claude/skills", ".agents/skills")) + assert seed_skills.run(check=False) == 0 + # Both forks now byte-match canonical, including the nested asset. + for fork in forks.values(): + assert not seed_skills.drift(canonical, fork) + assert (fork / "assets" / "module.yaml").read_text() == "module_version: 9.9.9\n" + # And a follow-up --check is clean. + assert seed_skills.run(check=True) == 0 + + +def test_reseed_prunes_extra_fork_files(monkeypatch, tmp_path): + canonical, forks = _build_workspace(monkeypatch, tmp_path, (".claude/skills",)) + stray = forks[".claude/skills"] / "leftover.md" + stray.write_text("should be removed\n") + seed_skills.run(check=False) + assert not stray.exists() + + +def test_missing_fork_tree_is_skipped(monkeypatch, tmp_path, capsys): + # No .claude/.agents trees present at all (the CI shape). + canonical = tmp_path / "src" / "automator" / "data" / "skills" / "demo-skill" + canonical.mkdir(parents=True) + (canonical / "SKILL.md").write_text("body\n") + monkeypatch.setattr(seed_skills, "ROOT", tmp_path) + monkeypatch.setattr(seed_skills, "SKILLS_SRC", canonical.parent) + monkeypatch.setattr(seed_skills, "FORK_TREES", (".claude/skills", ".agents/skills")) + monkeypatch.setattr(seed_skills, "MODULE_SKILLS", ("demo-skill",)) + assert seed_skills.run(check=False) == 0 + assert "nothing to reseed" in capsys.readouterr().out + + +def test_missing_canonical_skill_is_fatal(monkeypatch, tmp_path): + _build_workspace(monkeypatch, tmp_path, (".claude/skills",)) + monkeypatch.setattr(seed_skills, "MODULE_SKILLS", ("does-not-exist",)) + with pytest.raises(SystemExit) as exc: + seed_skills.run(check=False) + assert "canonical skill missing" in str(exc.value) + + +def test_main_rejects_bad_args(monkeypatch, tmp_path): + _build_workspace(monkeypatch, tmp_path, (".claude/skills",)) + with pytest.raises(SystemExit): + seed_skills.main(["--bogus"]) From 6df225740b3ecf9ad3dc9443a85420e890b918b6 Mon Sep 17 00:00:00 2001 From: pbean Date: Sun, 21 Jun 2026 12:43:07 -0700 Subject: [PATCH 3/3] =?UTF-8?q?chore(release):=200.6.3=20=E2=80=94=20GitHu?= =?UTF-8?q?b=20Copilot=20adapter=20(CLI=201.0.63).=20Turn-end=20is=20`agen?= =?UTF-8?q?tStop`=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .claude-plugin/marketplace.json | 2 +- CHANGELOG.md | 17 +++++++++++++++++ module.yaml | 2 +- pyproject.toml | 2 +- src/automator/__init__.py | 2 +- .../skills/bmad-auto-setup/assets/module.yaml | 2 +- uv.lock | 2 +- 7 files changed, 23 insertions(+), 6 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 838727b..7fb272d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "bauto", "source": "./src/automator/data/skills", "description": "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)", - "version": "0.6.2", + "version": "0.6.3", "author": { "name": "pinkyd" }, diff --git a/CHANGELOG.md b/CHANGELOG.md index f7b9f02..0c8a9d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to `bmad-auto` are documented here. The format is based on [Semantic Versioning](https://semver.org/spec/v2.0.0.html). While the project is pre-1.0, breaking changes may land in a minor release. +## [0.6.3] — 2026-06-21 + +### Fixed + +- **GitHub Copilot adapter (CLI 1.0.63).** Turn-end is `agentStop`, not PascalCase `Stop` + (which never fires) — every session previously read as a timeout. Remapped events, dropped + the non-existent `PreCompact`, and the shared hook relay now reads camelCase payload keys + (`sessionId`/`transcriptPath`). Probe mode sends its prompt verbatim so a skill-templating + `prompt_template` no longer renders a missing-skill path that stalls the turn. + +### Added + +- **Copilot token accounting.** New `copilot-events` `usage_parser` reads + `~/.copilot/session-state/*/events.jsonl` (`data.modelMetrics..usage.*`); the `copilot` + profile is wired to it (was `usage_parser = "none"`). + ## [0.6.2] — 2026-06-21 ### Added @@ -451,6 +467,7 @@ enforced in CI. implementation phase, driven by a Python control loop with hook-based session transport and resumable on-disk run state. +[0.6.3]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.3 [0.6.2]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.2 [0.6.1]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.1 [0.6.0]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.6.0 diff --git a/module.yaml b/module.yaml index 48a72b7..3089c88 100644 --- a/module.yaml +++ b/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)" -module_version: 0.6.2 +module_version: 0.6.3 default_selected: false module_greeting: > BMAD Auto installed — both the four automation skills and the diff --git a/pyproject.toml b/pyproject.toml index cf6c4e0..315e1e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bmad-auto" -version = "0.6.2" +version = "0.6.3" description = "Deterministic ralph-loop orchestrator for the BMAD implementation phase" readme = "README.md" license = "MIT" diff --git a/src/automator/__init__.py b/src/automator/__init__.py index b96d34a..7dee1bb 100644 --- a/src/automator/__init__.py +++ b/src/automator/__init__.py @@ -6,4 +6,4 @@ spec files, and the per-run directory under .automator/runs/. """ -__version__ = "0.6.2" +__version__ = "0.6.3" diff --git a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml index 48a72b7..3089c88 100644 --- a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml +++ b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: unattended dev (bmad-auto-dev), adversarial review (bmad-auto-review), and deferred-work sweep triage (bmad-auto-sweep)" -module_version: 0.6.2 +module_version: 0.6.3 default_selected: false module_greeting: > BMAD Auto installed — both the four automation skills and the diff --git a/uv.lock b/uv.lock index 8414409..02ce071 100644 --- a/uv.lock +++ b/uv.lock @@ -4,7 +4,7 @@ requires-python = ">=3.11" [[package]] name = "bmad-auto" -version = "0.6.2" +version = "0.6.3" source = { editable = "." } dependencies = [ { name = "pyyaml" },