From 10e390c3797e8aac4cebc820a09bf874bbcec2ff Mon Sep 17 00:00:00 2001 From: Vinod Muthusamy Date: Mon, 1 Jun 2026 11:04:06 -0500 Subject: [PATCH 1/8] feat: add synthesize-skill skill for all platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new evolve-lite skill that converts a saved trajectory into a reusable agent skill (SKILL.md + supporting scripts). Models the SKILL.md shape on the existing learn skill: judgment lives in a forked subagent (read trajectory, identify the successful workflow, draft a SKILL.md and scripts), file-system plumbing lives in scripts/synthesize.py (frontmatter validation, dual writes, audit-log entry). Lives at plugin-source/skills/evolve-lite/synthesize-skill/ — universal, ships to all four platforms (claude, codex, claw-code, bob). The SKILL.md template uses the shared invoke()/skill_ref() macros for platform-aware shell paths and slash-prefixes; the script is templated to set _RUNTIME_MIRROR_DIR per platform. On claude, the script writes both to .evolve/skills// (canonical) and .claude/skills// (so the Claude Code skill loader picks it up automatically). Other platforms write only to .evolve/skills// for now — adopting an automatic runtime mirror on those hosts is a follow-up. The skill is invoked manually for now; not wired into a Stop hook. --- .../commands/evolve-lite-synthesize-skill.md | 4 + .../evolve-lite-synthesize-skill/SKILL.md | 148 ++++++++++++++++ .../scripts/synthesize.py | 161 +++++++++++++++++ .../evolve-lite/synthesize-skill/SKILL.md | 153 ++++++++++++++++ .../synthesize-skill/scripts/synthesize.py | 161 +++++++++++++++++ .../evolve-lite/synthesize-skill/SKILL.md | 148 ++++++++++++++++ .../synthesize-skill/scripts/synthesize.py | 161 +++++++++++++++++ .../evolve-lite/synthesize-skill/SKILL.md | 148 ++++++++++++++++ .../synthesize-skill/scripts/synthesize.py | 161 +++++++++++++++++ .../evolve-lite/synthesize-skill/SKILL.md.j2 | 152 ++++++++++++++++ .../synthesize-skill/scripts/synthesize.py.j2 | 165 ++++++++++++++++++ 11 files changed, 1562 insertions(+) create mode 100644 platform-integrations/bob/evolve-lite/commands/evolve-lite-synthesize-skill.md create mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md create mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py create mode 100644 platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md create mode 100644 platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py create mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md create mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py create mode 100644 plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 create mode 100755 plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 diff --git a/platform-integrations/bob/evolve-lite/commands/evolve-lite-synthesize-skill.md b/platform-integrations/bob/evolve-lite/commands/evolve-lite-synthesize-skill.md new file mode 100644 index 00000000..3a34a825 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/commands/evolve-lite-synthesize-skill.md @@ -0,0 +1,4 @@ +--- +description: Convert a saved trajectory into a reusable agent skill (SKILL.md + supporting scripts) that future agents can invoke to skip rediscovered work. Use when a session captured a non-trivial workflow worth promoting from a free-text guideline to an executable skill. +--- +Use the `evolve-lite-synthesize-skill` skill on the current conversation. Follow the skill's instructions exactly. diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md new file mode 100644 index 00000000..f7df57d7 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md @@ -0,0 +1,148 @@ +--- +name: evolve-lite:synthesize-skill +description: Convert a saved trajectory into a reusable agent skill (SKILL.md + supporting scripts) that future agents can invoke to skip rediscovered work. Use when a session captured a non-trivial workflow worth promoting from a free-text guideline to an executable skill. +--- + +# Skill Synthesizer + +## Overview + +This skill reads a saved trajectory and produces a **reusable agent skill** — a `SKILL.md` plus any supporting scripts — that captures the *successful* workflow the session discovered. The output goes to `.evolve/skills//` (canonical, evolve-managed). Future sessions on the same project can then invoke the skill directly instead of re-deriving the workflow. + +This is the **executable** counterpart to the `learn` skill's free-text guidelines: `learn` writes Markdown the next agent has to *read and decide what to do*; `synthesize-skill` writes a skill the next agent can simply *call*. + +## When To Use + +Use this skill when a trajectory captured: + +- A **non-trivial workflow** that succeeded after trial-and-error (the eventual happy path is worth promoting from free-text advice to an invocable artifact). +- A **reusable script or command sequence** the model wrote during the session — particularly one the agent had to reconstruct over multiple attempts. +- An environment-specific workaround (a missing system tool, a permissions wrinkle, a fallback pipeline) that future sessions in the same project will hit. + +Skip this skill — and let `learn` cover the case with a guideline alone — when: + +- The successful path was a single trivial command. +- The workflow embeds secrets, tokens, or one-off user inputs that can't be safely generalized. +- A skill with the same trigger already exists in `.evolve/skills/` (use `learn`'s guideline path to refine the existing skill instead of creating a duplicate). + +## Workflow + +### Step 0: Locate the Trajectory + +This skill runs in a forked context. **You cannot see the parent conversation directly** — read the trajectory the parent passed in via `args` or via the `Run evolve-lite:synthesize-skill on ` instruction. + +The trajectory path is either: + +- supplied directly as `args` to the skill invocation, or +- stated in the parent's invocation message as `The saved trajectory path is: ` — take everything after the colon, strip surrounding whitespace and quotes. + +If neither is present, scan `.evolve/trajectories/` for the most recently modified `claude-transcript_.jsonl` and use that. If `.evolve/trajectories/` does not exist or is empty, output zero artifacts and exit — do not invent a trajectory. + +**Read the trajectory with the `Read` tool — do NOT shell out.** The transcript is JSONL: one JSON object per line. Filter for `"type": "assistant"` and `"type": "human"` records and reconstruct the flow from `message.content`. + +### Step 1: Identify the Successful Workflow + +Walk the trajectory and locate the **final, working** tool sequence — the one that actually produced the answer. Distinguish it from the trial-and-error leading up to it. + +Capture: + +- **What the user asked** (the original prompt). +- **What ultimately worked** — the exact tool calls, scripts, or command sequences that produced the answer. Quote them verbatim from the trajectory. +- **What didn't work** — the dead-ends. You will use these to write a `Triggers` section so the future agent knows when to reach for this skill *instead of* the failing approaches. +- **Environment assumptions** — what was missing or had to be installed (e.g. "no exiftool, pip install Pillow needed"). + +If no clearly successful workflow is in the trajectory (the session ended without reaching an answer, or the answer came from a single trivial call), output zero artifacts and exit. + +### Step 2: Decide a Skill Name and Trigger + +The skill **name** must be: + +- kebab-case, action-oriented (`extract-exif-metadata`, `parse-cloudwatch-logs`, `restart-stuck-deploy`) +- specific enough that a future agent reading just the name can guess what it does +- not a duplicate of any existing entry under `.evolve/skills/` + +The skill **description** (one line, in the SKILL.md frontmatter) should describe the *task* the skill solves, not the trajectory it came from. Bad: "Solves the focal-length question from session abc123." Good: "Extract EXIF metadata (focal length, GPS, lens, timestamps) from JPEG/HEIC images using Pillow when system EXIF tools are unavailable." + +The **trigger** (in the SKILL.md body, under `## When To Use`) should describe the broad task context, not the narrow original request — same rule as the `learn` skill's guidelines. + +Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls`) and confirm your chosen name does not collide with an existing skill. + +### Step 3: Draft the SKILL.md + +Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: + +``` +--- +name: +description: +--- + +# + +## Overview +<1–2 sentences: what the skill does and when to use it> + +## When To Use +- <trigger 1> +- <trigger 2> + +## Workflow +<step-by-step instructions for the agent> +``` + +Notes: + +- `context: fork` is **omitted** for synthesized skills. They run in the parent context so they can write files into the workspace and report back. +- Do NOT inline the full successful script into the SKILL.md if it's more than ~10 lines — put it in a sibling `scripts/` file (Step 4) and reference it from the SKILL.md. +- The Workflow section should describe what to do *to solve the task*, not retell the original session. A future agent reading this should be able to act without ever seeing the trajectory. + +### Step 4: Emit Supporting Scripts + +If the successful workflow used a non-trivial script (more than a one-liner), write it as a sibling file under `scripts/` of your draft skill directory. Use the **already-validated code from the trajectory** — do not invent variations. Strip incidental one-off inputs (literal file names, IDs, hard-coded outputs) and replace with arguments or stdin where appropriate. + +Common shape: + +``` +.evolve/skills/<name>/ +├── SKILL.md +└── scripts/ + └── <action>.py # callable as `python3 scripts/<action>.py <args>` +``` + +If the workflow was a sequence of shell commands rather than a script, encode it as an executable shell script (`scripts/<action>.sh`) so future agents can invoke it as a single unit instead of replaying each command. + +If no non-trivial script is needed (the workflow is a sequence of standard tool calls), skip this step — the SKILL.md alone is the skill. + +### Step 5: Finalize + +Place your draft files (SKILL.md and any scripts) under a temporary directory inside the workspace, e.g. `/tmp/synthesized-<name>/`, then call: + +```bash +python3 .bob/skills/evolve-lite-synthesize-skill/scripts/synthesize.py finalize --src /tmp/synthesized-<name>/ --name <kebab-case-name> --trajectory <saved_trajectory_path> +``` + +The script will: + +- Validate the SKILL.md frontmatter (`name` and `description` required; `name` must match `--name`). +- Reject the skill if a same-named skill already exists in `.evolve/skills/` (overwriting requires `--force`). +- Copy the directory into `.evolve/skills/<name>/` (canonical). +- Append a `synthesize_skill` event to `.evolve/audit.log` recording the new skill, the source trajectory, and the timestamp. +- Print the destination path(s). + +If the validator rejects the draft, fix the SKILL.md and retry — do not edit files under `.evolve/skills/` directly. + +### Step 6: Confirm + +After the script returns, list the destination directories with the `Glob` tool to confirm the files landed. Output a short summary: + +- The skill name and description. +- The destination paths. +- A one-line note on what future sessions should now be able to do that they couldn't before. + +## Best Practices + +1. **One skill per workflow.** If the trajectory contains two unrelated successful workflows, run synthesis twice with different names — do not pack them into one skill. +2. **Cite the trajectory.** Include the `--trajectory` flag so the audit log records provenance; future maintainers can trace the skill back to the session that produced it. +3. **Don't promote one-shots.** A skill is worth synthesizing only if the trigger is plausibly recurring. If the trajectory looks like a one-off, prefer the `learn` skill's guideline path instead. +4. **Don't paraphrase failure.** The skill describes what *worked*. If you find yourself writing "this skill avoids the problem where exiftool isn't installed," restate it as "uses Pillow to extract EXIF; works in environments without system EXIF tools." Triggers describe *when*, not *what failed*. +5. **Keep scripts minimal.** Strip incidental log lines, debug prints, and validation that wasn't actually exercised in the trajectory. If a feature wasn't validated, leave it out. diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py new file mode 100644 index 00000000..ef8273d7 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +"""Synthesize-skill helper: validate and install a synthesized skill. + +The synthesize-skill skill (a subagent) is responsible for the *judgment* — +reading the trajectory, identifying the successful workflow, and writing +draft SKILL.md + supporting scripts into a temporary directory. + +This script is the *plumbing* — it validates the draft frontmatter, copies +the directory into both the canonical evolve-managed location and the +platform-specific skills loader location, and writes an audit-log entry. + +Usage: + synthesize.py finalize --src <draft_dir> --name <kebab-case-name> \ + [--trajectory <path>] [--workspace <path>] [--force] +""" + +from __future__ import annotations + +import argparse +import re +import shutil +import sys +from pathlib import Path + +# Reuse the plugin's lib helpers (audit-log writer + entities-dir locator). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): + if (_candidate / "audit.py").is_file(): + _lib = _candidate + break + if _lib is not None: + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from audit import append as audit_append # noqa: E402 + + +KEBAB_RE = re.compile(r"^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$") +FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n", re.DOTALL) + + +def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: + """Minimal YAML-ish frontmatter parser. Supports `key: value` lines only. + + Returns (frontmatter_dict, body_text). + """ + text = skill_md.read_text(encoding="utf-8") + match = FRONTMATTER_RE.match(text) + if not match: + raise ValueError(f"{skill_md} has no frontmatter block") + fm: dict[str, str] = {} + for line in match.group(1).splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if ":" not in line: + raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") + key, _, value = line.partition(":") + fm[key.strip()] = value.strip().strip('"').strip("'") + return fm, text[match.end():] + + +def _validate_draft(src: Path, name: str) -> None: + if not KEBAB_RE.match(name): + raise SystemExit(f"--name {name!r} is not kebab-case") + if not src.is_dir(): + raise SystemExit(f"--src {src} is not a directory") + skill_md = src / "SKILL.md" + if not skill_md.is_file(): + raise SystemExit(f"missing SKILL.md in {src}") + + fm, body = _parse_frontmatter(skill_md) + if "name" not in fm or "description" not in fm: + raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") + if fm["name"] != name: + raise SystemExit(f"frontmatter name {fm['name']!r} does not match --name {name!r}") + if not fm["description"]: + raise SystemExit("SKILL.md description is empty") + if len(body.strip()) < 50: + raise SystemExit("SKILL.md body is suspiciously short — not enough instructions to be useful") + + +def _resolve_workspace(arg: str | None) -> Path: + if arg: + return Path(arg).resolve() + # Fall back to the current working directory; at runtime this is the + # workspace mounted into the sandbox (`/workspace`) or the host repo root. + return Path.cwd().resolve() + + +def _copy_into(src: Path, dst: Path, force: bool) -> None: + if dst.exists(): + if not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + shutil.rmtree(dst) + shutil.copytree(src, dst) + + +# Per-platform runtime mirror — the loader-discoverable directory the +# synthesized skill is copied into so the host agent picks it up +# automatically. Set to None where the platform doesn't have a runtime +# skills directory; only the canonical `.evolve/skills/<name>/` write +# happens in that case. +_RUNTIME_MIRROR_DIR: str | None = None + + +def cmd_finalize(args: argparse.Namespace) -> int: + src = Path(args.src).resolve() + name = args.name + workspace = _resolve_workspace(args.workspace) + + _validate_draft(src, name) + + evolve_dst = workspace / ".evolve" / "skills" / name + _copy_into(src, evolve_dst, args.force) + + runtime_dst: Path | None = None + if _RUNTIME_MIRROR_DIR is not None: + runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name + _copy_into(src, runtime_dst, args.force) + + audit_append( + project_root=str(workspace), + event="synthesize_skill", + skill=name, + evolve_path=str(evolve_dst.relative_to(workspace)), + runtime_path=str(runtime_dst.relative_to(workspace)) if runtime_dst else "", + trajectory=args.trajectory or "", + ) + + print(f"Installed skill {name!r}:") + print(f" evolve: {evolve_dst}") + if runtime_dst is not None: + print(f" runtime: {runtime_dst}") + return 0 + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_finalize = sub.add_parser( + "finalize", + help="Validate a draft skill directory and install it under .evolve/skills/ (and the platform's runtime skills dir, if any).", + ) + p_finalize.add_argument("--src", required=True, help="Draft directory containing SKILL.md and any scripts/") + p_finalize.add_argument("--name", required=True, help="Kebab-case skill name; must match SKILL.md frontmatter") + p_finalize.add_argument("--trajectory", default="", help="Source trajectory path (recorded in audit.log)") + p_finalize.add_argument("--workspace", default=None, help="Project root (defaults to CWD)") + p_finalize.add_argument("--force", action="store_true", help="Overwrite existing skill of the same name") + p_finalize.set_defaults(func=cmd_finalize) + + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md new file mode 100644 index 00000000..c9cf62fc --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md @@ -0,0 +1,153 @@ +--- +name: synthesize-skill +description: Convert a saved trajectory into a reusable agent skill (SKILL.md + supporting scripts) that future agents can invoke to skip rediscovered work. Use when a session captured a non-trivial workflow worth promoting from a free-text guideline to an executable skill. +context: fork +--- + +# Skill Synthesizer + +## Overview + +This skill reads a saved trajectory and produces a **reusable agent skill** — a `SKILL.md` plus any supporting scripts — that captures the *successful* workflow the session discovered. The output goes to `.evolve/skills/<skill-name>/` (canonical, evolve-managed) and to `.claude/skills/<skill-name>/` (so Claude Code's skills loader picks it up automatically). Future sessions on the same project can then invoke the skill directly instead of re-deriving the workflow. + +This is the **executable** counterpart to the `learn` skill's free-text guidelines: `learn` writes Markdown the next agent has to *read and decide what to do*; `synthesize-skill` writes a skill the next agent can simply *call*. + +## When To Use + +Use this skill when a trajectory captured: + +- A **non-trivial workflow** that succeeded after trial-and-error (the eventual happy path is worth promoting from free-text advice to an invocable artifact). +- A **reusable script or command sequence** the model wrote during the session — particularly one the agent had to reconstruct over multiple attempts. +- An environment-specific workaround (a missing system tool, a permissions wrinkle, a fallback pipeline) that future sessions in the same project will hit. + +Skip this skill — and let `learn` cover the case with a guideline alone — when: + +- The successful path was a single trivial command. +- The workflow embeds secrets, tokens, or one-off user inputs that can't be safely generalized. +- A skill with the same trigger already exists in `.evolve/skills/` (use `learn`'s guideline path to refine the existing skill instead of creating a duplicate). + +## Workflow + +### Step 0: Locate the Trajectory + +This skill runs in a forked context. **You cannot see the parent conversation directly** — read the trajectory the parent passed in via `args` or via the `Run /evolve-lite:synthesize-skill on <path>` instruction. + +The trajectory path is either: + +- supplied directly as `args` to the skill invocation, or +- stated in the parent's invocation message as `The saved trajectory path is: <path>` — take everything after the colon, strip surrounding whitespace and quotes. + +If neither is present, scan `.evolve/trajectories/` for the most recently modified `claude-transcript_<session-id>.jsonl` and use that. If `.evolve/trajectories/` does not exist or is empty, output zero artifacts and exit — do not invent a trajectory. + +**Read the trajectory with the `Read` tool — do NOT shell out.** The transcript is JSONL: one JSON object per line. Filter for `"type": "assistant"` and `"type": "human"` records and reconstruct the flow from `message.content`. + +### Step 1: Identify the Successful Workflow + +Walk the trajectory and locate the **final, working** tool sequence — the one that actually produced the answer. Distinguish it from the trial-and-error leading up to it. + +Capture: + +- **What the user asked** (the original prompt). +- **What ultimately worked** — the exact tool calls, scripts, or command sequences that produced the answer. Quote them verbatim from the trajectory. +- **What didn't work** — the dead-ends. You will use these to write a `Triggers` section so the future agent knows when to reach for this skill *instead of* the failing approaches. +- **Environment assumptions** — what was missing or had to be installed (e.g. "no exiftool, pip install Pillow needed"). + +If no clearly successful workflow is in the trajectory (the session ended without reaching an answer, or the answer came from a single trivial call), output zero artifacts and exit. + +### Step 2: Decide a Skill Name and Trigger + +The skill **name** must be: + +- kebab-case, action-oriented (`extract-exif-metadata`, `parse-cloudwatch-logs`, `restart-stuck-deploy`) +- specific enough that a future agent reading just the name can guess what it does +- not a duplicate of any existing entry under `.evolve/skills/` + +The skill **description** (one line, in the SKILL.md frontmatter) should describe the *task* the skill solves, not the trajectory it came from. Bad: "Solves the focal-length question from session abc123." Good: "Extract EXIF metadata (focal length, GPS, lens, timestamps) from JPEG/HEIC images using Pillow when system EXIF tools are unavailable." + +The **trigger** (in the SKILL.md body, under `## When To Use`) should describe the broad task context, not the narrow original request — same rule as the `learn` skill's guidelines. + +Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls`) and confirm your chosen name does not collide with an existing skill. + +### Step 3: Draft the SKILL.md + +Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: + +``` +--- +name: <kebab-case-name> +description: <one-line task description> +--- + +# <Title Case Name> + +## Overview +<1–2 sentences: what the skill does and when to use it> + +## When To Use +- <trigger 1> +- <trigger 2> + +## Workflow +<step-by-step instructions for the agent> +``` + +Notes: + +- `context: fork` is **omitted** for synthesized skills. They run in the parent context so they can write files into the workspace and report back. +- Do NOT inline the full successful script into the SKILL.md if it's more than ~10 lines — put it in a sibling `scripts/` file (Step 4) and reference it from the SKILL.md. +- The Workflow section should describe what to do *to solve the task*, not retell the original session. A future agent reading this should be able to act without ever seeing the trajectory. + +### Step 4: Emit Supporting Scripts + +If the successful workflow used a non-trivial script (more than a one-liner), write it as a sibling file under `scripts/` of your draft skill directory. Use the **already-validated code from the trajectory** — do not invent variations. Strip incidental one-off inputs (literal file names, IDs, hard-coded outputs) and replace with arguments or stdin where appropriate. + +Common shape: + +``` +.evolve/skills/<name>/ +├── SKILL.md +└── scripts/ + └── <action>.py # callable as `python3 scripts/<action>.py <args>` +``` + +If the workflow was a sequence of shell commands rather than a script, encode it as an executable shell script (`scripts/<action>.sh`) so future agents can invoke it as a single unit instead of replaying each command. + +If no non-trivial script is needed (the workflow is a sequence of standard tool calls), skip this step — the SKILL.md alone is the skill. + +### Step 5: Finalize + +Place your draft files (SKILL.md and any scripts) under a temporary directory inside the workspace, e.g. `/tmp/synthesized-<name>/`, then call: + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/synthesize-skill/scripts/synthesize.py \ + finalize \ + --src /tmp/synthesized-<name>/ \ + --name <kebab-case-name> \ + --trajectory <saved_trajectory_path> +``` + +The script will: + +- Validate the SKILL.md frontmatter (`name` and `description` required; `name` must match `--name`). +- Reject the skill if a same-named skill already exists in `.evolve/skills/` (overwriting requires `--force`). +- Copy the directory into `.evolve/skills/<name>/` (canonical), then mirror it into `.claude/skills/<name>/` so Claude Code's skill loader sees it. +- Append a `synthesize_skill` event to `.evolve/audit.log` recording the new skill, the source trajectory, and the timestamp. +- Print the destination path(s). + +If the validator rejects the draft, fix the SKILL.md and retry — do not edit files under `.evolve/skills/` or `.claude/skills/` directly. + +### Step 6: Confirm + +After the script returns, list the destination directories with the `Glob` tool to confirm the files landed. Output a short summary: + +- The skill name and description. +- The destination paths. +- A one-line note on what future sessions should now be able to do that they couldn't before. + +## Best Practices + +1. **One skill per workflow.** If the trajectory contains two unrelated successful workflows, run synthesis twice with different names — do not pack them into one skill. +2. **Cite the trajectory.** Include the `--trajectory` flag so the audit log records provenance; future maintainers can trace the skill back to the session that produced it. +3. **Don't promote one-shots.** A skill is worth synthesizing only if the trigger is plausibly recurring. If the trajectory looks like a one-off, prefer the `learn` skill's guideline path instead. +4. **Don't paraphrase failure.** The skill describes what *worked*. If you find yourself writing "this skill avoids the problem where exiftool isn't installed," restate it as "uses Pillow to extract EXIF; works in environments without system EXIF tools." Triggers describe *when*, not *what failed*. +5. **Keep scripts minimal.** Strip incidental log lines, debug prints, and validation that wasn't actually exercised in the trajectory. If a feature wasn't validated, leave it out. diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py new file mode 100644 index 00000000..fafb5623 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +"""Synthesize-skill helper: validate and install a synthesized skill. + +The synthesize-skill skill (a subagent) is responsible for the *judgment* — +reading the trajectory, identifying the successful workflow, and writing +draft SKILL.md + supporting scripts into a temporary directory. + +This script is the *plumbing* — it validates the draft frontmatter, copies +the directory into both the canonical evolve-managed location and the +platform-specific skills loader location, and writes an audit-log entry. + +Usage: + synthesize.py finalize --src <draft_dir> --name <kebab-case-name> \ + [--trajectory <path>] [--workspace <path>] [--force] +""" + +from __future__ import annotations + +import argparse +import re +import shutil +import sys +from pathlib import Path + +# Reuse the plugin's lib helpers (audit-log writer + entities-dir locator). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): + if (_candidate / "audit.py").is_file(): + _lib = _candidate + break + if _lib is not None: + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from audit import append as audit_append # noqa: E402 + + +KEBAB_RE = re.compile(r"^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$") +FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n", re.DOTALL) + + +def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: + """Minimal YAML-ish frontmatter parser. Supports `key: value` lines only. + + Returns (frontmatter_dict, body_text). + """ + text = skill_md.read_text(encoding="utf-8") + match = FRONTMATTER_RE.match(text) + if not match: + raise ValueError(f"{skill_md} has no frontmatter block") + fm: dict[str, str] = {} + for line in match.group(1).splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if ":" not in line: + raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") + key, _, value = line.partition(":") + fm[key.strip()] = value.strip().strip('"').strip("'") + return fm, text[match.end():] + + +def _validate_draft(src: Path, name: str) -> None: + if not KEBAB_RE.match(name): + raise SystemExit(f"--name {name!r} is not kebab-case") + if not src.is_dir(): + raise SystemExit(f"--src {src} is not a directory") + skill_md = src / "SKILL.md" + if not skill_md.is_file(): + raise SystemExit(f"missing SKILL.md in {src}") + + fm, body = _parse_frontmatter(skill_md) + if "name" not in fm or "description" not in fm: + raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") + if fm["name"] != name: + raise SystemExit(f"frontmatter name {fm['name']!r} does not match --name {name!r}") + if not fm["description"]: + raise SystemExit("SKILL.md description is empty") + if len(body.strip()) < 50: + raise SystemExit("SKILL.md body is suspiciously short — not enough instructions to be useful") + + +def _resolve_workspace(arg: str | None) -> Path: + if arg: + return Path(arg).resolve() + # Fall back to the current working directory; at runtime this is the + # workspace mounted into the sandbox (`/workspace`) or the host repo root. + return Path.cwd().resolve() + + +def _copy_into(src: Path, dst: Path, force: bool) -> None: + if dst.exists(): + if not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + shutil.rmtree(dst) + shutil.copytree(src, dst) + + +# Per-platform runtime mirror — the loader-discoverable directory the +# synthesized skill is copied into so the host agent picks it up +# automatically. Set to None where the platform doesn't have a runtime +# skills directory; only the canonical `.evolve/skills/<name>/` write +# happens in that case. +_RUNTIME_MIRROR_DIR: str | None = ".claude/skills" + + +def cmd_finalize(args: argparse.Namespace) -> int: + src = Path(args.src).resolve() + name = args.name + workspace = _resolve_workspace(args.workspace) + + _validate_draft(src, name) + + evolve_dst = workspace / ".evolve" / "skills" / name + _copy_into(src, evolve_dst, args.force) + + runtime_dst: Path | None = None + if _RUNTIME_MIRROR_DIR is not None: + runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name + _copy_into(src, runtime_dst, args.force) + + audit_append( + project_root=str(workspace), + event="synthesize_skill", + skill=name, + evolve_path=str(evolve_dst.relative_to(workspace)), + runtime_path=str(runtime_dst.relative_to(workspace)) if runtime_dst else "", + trajectory=args.trajectory or "", + ) + + print(f"Installed skill {name!r}:") + print(f" evolve: {evolve_dst}") + if runtime_dst is not None: + print(f" runtime: {runtime_dst}") + return 0 + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_finalize = sub.add_parser( + "finalize", + help="Validate a draft skill directory and install it under .evolve/skills/ (and the platform's runtime skills dir, if any).", + ) + p_finalize.add_argument("--src", required=True, help="Draft directory containing SKILL.md and any scripts/") + p_finalize.add_argument("--name", required=True, help="Kebab-case skill name; must match SKILL.md frontmatter") + p_finalize.add_argument("--trajectory", default="", help="Source trajectory path (recorded in audit.log)") + p_finalize.add_argument("--workspace", default=None, help="Project root (defaults to CWD)") + p_finalize.add_argument("--force", action="store_true", help="Overwrite existing skill of the same name") + p_finalize.set_defaults(func=cmd_finalize) + + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md new file mode 100644 index 00000000..8f466f42 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md @@ -0,0 +1,148 @@ +--- +name: synthesize-skill +description: Convert a saved trajectory into a reusable agent skill (SKILL.md + supporting scripts) that future agents can invoke to skip rediscovered work. Use when a session captured a non-trivial workflow worth promoting from a free-text guideline to an executable skill. +--- + +# Skill Synthesizer + +## Overview + +This skill reads a saved trajectory and produces a **reusable agent skill** — a `SKILL.md` plus any supporting scripts — that captures the *successful* workflow the session discovered. The output goes to `.evolve/skills/<skill-name>/` (canonical, evolve-managed). Future sessions on the same project can then invoke the skill directly instead of re-deriving the workflow. + +This is the **executable** counterpart to the `learn` skill's free-text guidelines: `learn` writes Markdown the next agent has to *read and decide what to do*; `synthesize-skill` writes a skill the next agent can simply *call*. + +## When To Use + +Use this skill when a trajectory captured: + +- A **non-trivial workflow** that succeeded after trial-and-error (the eventual happy path is worth promoting from free-text advice to an invocable artifact). +- A **reusable script or command sequence** the model wrote during the session — particularly one the agent had to reconstruct over multiple attempts. +- An environment-specific workaround (a missing system tool, a permissions wrinkle, a fallback pipeline) that future sessions in the same project will hit. + +Skip this skill — and let `learn` cover the case with a guideline alone — when: + +- The successful path was a single trivial command. +- The workflow embeds secrets, tokens, or one-off user inputs that can't be safely generalized. +- A skill with the same trigger already exists in `.evolve/skills/` (use `learn`'s guideline path to refine the existing skill instead of creating a duplicate). + +## Workflow + +### Step 0: Locate the Trajectory + +This skill runs in a forked context. **You cannot see the parent conversation directly** — read the trajectory the parent passed in via `args` or via the `Run /evolve-lite:synthesize-skill on <path>` instruction. + +The trajectory path is either: + +- supplied directly as `args` to the skill invocation, or +- stated in the parent's invocation message as `The saved trajectory path is: <path>` — take everything after the colon, strip surrounding whitespace and quotes. + +If neither is present, scan `.evolve/trajectories/` for the most recently modified `claude-transcript_<session-id>.jsonl` and use that. If `.evolve/trajectories/` does not exist or is empty, output zero artifacts and exit — do not invent a trajectory. + +**Read the trajectory with the `Read` tool — do NOT shell out.** The transcript is JSONL: one JSON object per line. Filter for `"type": "assistant"` and `"type": "human"` records and reconstruct the flow from `message.content`. + +### Step 1: Identify the Successful Workflow + +Walk the trajectory and locate the **final, working** tool sequence — the one that actually produced the answer. Distinguish it from the trial-and-error leading up to it. + +Capture: + +- **What the user asked** (the original prompt). +- **What ultimately worked** — the exact tool calls, scripts, or command sequences that produced the answer. Quote them verbatim from the trajectory. +- **What didn't work** — the dead-ends. You will use these to write a `Triggers` section so the future agent knows when to reach for this skill *instead of* the failing approaches. +- **Environment assumptions** — what was missing or had to be installed (e.g. "no exiftool, pip install Pillow needed"). + +If no clearly successful workflow is in the trajectory (the session ended without reaching an answer, or the answer came from a single trivial call), output zero artifacts and exit. + +### Step 2: Decide a Skill Name and Trigger + +The skill **name** must be: + +- kebab-case, action-oriented (`extract-exif-metadata`, `parse-cloudwatch-logs`, `restart-stuck-deploy`) +- specific enough that a future agent reading just the name can guess what it does +- not a duplicate of any existing entry under `.evolve/skills/` + +The skill **description** (one line, in the SKILL.md frontmatter) should describe the *task* the skill solves, not the trajectory it came from. Bad: "Solves the focal-length question from session abc123." Good: "Extract EXIF metadata (focal length, GPS, lens, timestamps) from JPEG/HEIC images using Pillow when system EXIF tools are unavailable." + +The **trigger** (in the SKILL.md body, under `## When To Use`) should describe the broad task context, not the narrow original request — same rule as the `learn` skill's guidelines. + +Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls`) and confirm your chosen name does not collide with an existing skill. + +### Step 3: Draft the SKILL.md + +Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: + +``` +--- +name: <kebab-case-name> +description: <one-line task description> +--- + +# <Title Case Name> + +## Overview +<1–2 sentences: what the skill does and when to use it> + +## When To Use +- <trigger 1> +- <trigger 2> + +## Workflow +<step-by-step instructions for the agent> +``` + +Notes: + +- `context: fork` is **omitted** for synthesized skills. They run in the parent context so they can write files into the workspace and report back. +- Do NOT inline the full successful script into the SKILL.md if it's more than ~10 lines — put it in a sibling `scripts/` file (Step 4) and reference it from the SKILL.md. +- The Workflow section should describe what to do *to solve the task*, not retell the original session. A future agent reading this should be able to act without ever seeing the trajectory. + +### Step 4: Emit Supporting Scripts + +If the successful workflow used a non-trivial script (more than a one-liner), write it as a sibling file under `scripts/` of your draft skill directory. Use the **already-validated code from the trajectory** — do not invent variations. Strip incidental one-off inputs (literal file names, IDs, hard-coded outputs) and replace with arguments or stdin where appropriate. + +Common shape: + +``` +.evolve/skills/<name>/ +├── SKILL.md +└── scripts/ + └── <action>.py # callable as `python3 scripts/<action>.py <args>` +``` + +If the workflow was a sequence of shell commands rather than a script, encode it as an executable shell script (`scripts/<action>.sh`) so future agents can invoke it as a single unit instead of replaying each command. + +If no non-trivial script is needed (the workflow is a sequence of standard tool calls), skip this step — the SKILL.md alone is the skill. + +### Step 5: Finalize + +Place your draft files (SKILL.md and any scripts) under a temporary directory inside the workspace, e.g. `/tmp/synthesized-<name>/`, then call: + +```bash +sh -lc 'real_home="$(python3 -c "import os,pwd; print(pwd.getpwuid(os.getuid()).pw_dir)")"; config_home="${CLAW_CONFIG_HOME:-$real_home/.claw}"; script=".claw/skills/evolve-lite:synthesize-skill/scripts/synthesize.py"; [ -f "$script" ] || script="$config_home/skills/evolve-lite:synthesize-skill/scripts/synthesize.py"; python3 "$script" finalize --src /tmp/synthesized-<name>/ --name <kebab-case-name> --trajectory <saved_trajectory_path>' +``` + +The script will: + +- Validate the SKILL.md frontmatter (`name` and `description` required; `name` must match `--name`). +- Reject the skill if a same-named skill already exists in `.evolve/skills/` (overwriting requires `--force`). +- Copy the directory into `.evolve/skills/<name>/` (canonical). +- Append a `synthesize_skill` event to `.evolve/audit.log` recording the new skill, the source trajectory, and the timestamp. +- Print the destination path(s). + +If the validator rejects the draft, fix the SKILL.md and retry — do not edit files under `.evolve/skills/` directly. + +### Step 6: Confirm + +After the script returns, list the destination directories with the `Glob` tool to confirm the files landed. Output a short summary: + +- The skill name and description. +- The destination paths. +- A one-line note on what future sessions should now be able to do that they couldn't before. + +## Best Practices + +1. **One skill per workflow.** If the trajectory contains two unrelated successful workflows, run synthesis twice with different names — do not pack them into one skill. +2. **Cite the trajectory.** Include the `--trajectory` flag so the audit log records provenance; future maintainers can trace the skill back to the session that produced it. +3. **Don't promote one-shots.** A skill is worth synthesizing only if the trigger is plausibly recurring. If the trajectory looks like a one-off, prefer the `learn` skill's guideline path instead. +4. **Don't paraphrase failure.** The skill describes what *worked*. If you find yourself writing "this skill avoids the problem where exiftool isn't installed," restate it as "uses Pillow to extract EXIF; works in environments without system EXIF tools." Triggers describe *when*, not *what failed*. +5. **Keep scripts minimal.** Strip incidental log lines, debug prints, and validation that wasn't actually exercised in the trajectory. If a feature wasn't validated, leave it out. diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py new file mode 100644 index 00000000..ef8273d7 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +"""Synthesize-skill helper: validate and install a synthesized skill. + +The synthesize-skill skill (a subagent) is responsible for the *judgment* — +reading the trajectory, identifying the successful workflow, and writing +draft SKILL.md + supporting scripts into a temporary directory. + +This script is the *plumbing* — it validates the draft frontmatter, copies +the directory into both the canonical evolve-managed location and the +platform-specific skills loader location, and writes an audit-log entry. + +Usage: + synthesize.py finalize --src <draft_dir> --name <kebab-case-name> \ + [--trajectory <path>] [--workspace <path>] [--force] +""" + +from __future__ import annotations + +import argparse +import re +import shutil +import sys +from pathlib import Path + +# Reuse the plugin's lib helpers (audit-log writer + entities-dir locator). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): + if (_candidate / "audit.py").is_file(): + _lib = _candidate + break + if _lib is not None: + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from audit import append as audit_append # noqa: E402 + + +KEBAB_RE = re.compile(r"^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$") +FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n", re.DOTALL) + + +def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: + """Minimal YAML-ish frontmatter parser. Supports `key: value` lines only. + + Returns (frontmatter_dict, body_text). + """ + text = skill_md.read_text(encoding="utf-8") + match = FRONTMATTER_RE.match(text) + if not match: + raise ValueError(f"{skill_md} has no frontmatter block") + fm: dict[str, str] = {} + for line in match.group(1).splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if ":" not in line: + raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") + key, _, value = line.partition(":") + fm[key.strip()] = value.strip().strip('"').strip("'") + return fm, text[match.end():] + + +def _validate_draft(src: Path, name: str) -> None: + if not KEBAB_RE.match(name): + raise SystemExit(f"--name {name!r} is not kebab-case") + if not src.is_dir(): + raise SystemExit(f"--src {src} is not a directory") + skill_md = src / "SKILL.md" + if not skill_md.is_file(): + raise SystemExit(f"missing SKILL.md in {src}") + + fm, body = _parse_frontmatter(skill_md) + if "name" not in fm or "description" not in fm: + raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") + if fm["name"] != name: + raise SystemExit(f"frontmatter name {fm['name']!r} does not match --name {name!r}") + if not fm["description"]: + raise SystemExit("SKILL.md description is empty") + if len(body.strip()) < 50: + raise SystemExit("SKILL.md body is suspiciously short — not enough instructions to be useful") + + +def _resolve_workspace(arg: str | None) -> Path: + if arg: + return Path(arg).resolve() + # Fall back to the current working directory; at runtime this is the + # workspace mounted into the sandbox (`/workspace`) or the host repo root. + return Path.cwd().resolve() + + +def _copy_into(src: Path, dst: Path, force: bool) -> None: + if dst.exists(): + if not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + shutil.rmtree(dst) + shutil.copytree(src, dst) + + +# Per-platform runtime mirror — the loader-discoverable directory the +# synthesized skill is copied into so the host agent picks it up +# automatically. Set to None where the platform doesn't have a runtime +# skills directory; only the canonical `.evolve/skills/<name>/` write +# happens in that case. +_RUNTIME_MIRROR_DIR: str | None = None + + +def cmd_finalize(args: argparse.Namespace) -> int: + src = Path(args.src).resolve() + name = args.name + workspace = _resolve_workspace(args.workspace) + + _validate_draft(src, name) + + evolve_dst = workspace / ".evolve" / "skills" / name + _copy_into(src, evolve_dst, args.force) + + runtime_dst: Path | None = None + if _RUNTIME_MIRROR_DIR is not None: + runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name + _copy_into(src, runtime_dst, args.force) + + audit_append( + project_root=str(workspace), + event="synthesize_skill", + skill=name, + evolve_path=str(evolve_dst.relative_to(workspace)), + runtime_path=str(runtime_dst.relative_to(workspace)) if runtime_dst else "", + trajectory=args.trajectory or "", + ) + + print(f"Installed skill {name!r}:") + print(f" evolve: {evolve_dst}") + if runtime_dst is not None: + print(f" runtime: {runtime_dst}") + return 0 + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_finalize = sub.add_parser( + "finalize", + help="Validate a draft skill directory and install it under .evolve/skills/ (and the platform's runtime skills dir, if any).", + ) + p_finalize.add_argument("--src", required=True, help="Draft directory containing SKILL.md and any scripts/") + p_finalize.add_argument("--name", required=True, help="Kebab-case skill name; must match SKILL.md frontmatter") + p_finalize.add_argument("--trajectory", default="", help="Source trajectory path (recorded in audit.log)") + p_finalize.add_argument("--workspace", default=None, help="Project root (defaults to CWD)") + p_finalize.add_argument("--force", action="store_true", help="Overwrite existing skill of the same name") + p_finalize.set_defaults(func=cmd_finalize) + + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md new file mode 100644 index 00000000..b1b0730c --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md @@ -0,0 +1,148 @@ +--- +name: synthesize-skill +description: Convert a saved trajectory into a reusable agent skill (SKILL.md + supporting scripts) that future agents can invoke to skip rediscovered work. Use when a session captured a non-trivial workflow worth promoting from a free-text guideline to an executable skill. +--- + +# Skill Synthesizer + +## Overview + +This skill reads a saved trajectory and produces a **reusable agent skill** — a `SKILL.md` plus any supporting scripts — that captures the *successful* workflow the session discovered. The output goes to `.evolve/skills/<skill-name>/` (canonical, evolve-managed). Future sessions on the same project can then invoke the skill directly instead of re-deriving the workflow. + +This is the **executable** counterpart to the `learn` skill's free-text guidelines: `learn` writes Markdown the next agent has to *read and decide what to do*; `synthesize-skill` writes a skill the next agent can simply *call*. + +## When To Use + +Use this skill when a trajectory captured: + +- A **non-trivial workflow** that succeeded after trial-and-error (the eventual happy path is worth promoting from free-text advice to an invocable artifact). +- A **reusable script or command sequence** the model wrote during the session — particularly one the agent had to reconstruct over multiple attempts. +- An environment-specific workaround (a missing system tool, a permissions wrinkle, a fallback pipeline) that future sessions in the same project will hit. + +Skip this skill — and let `learn` cover the case with a guideline alone — when: + +- The successful path was a single trivial command. +- The workflow embeds secrets, tokens, or one-off user inputs that can't be safely generalized. +- A skill with the same trigger already exists in `.evolve/skills/` (use `learn`'s guideline path to refine the existing skill instead of creating a duplicate). + +## Workflow + +### Step 0: Locate the Trajectory + +This skill runs in a forked context. **You cannot see the parent conversation directly** — read the trajectory the parent passed in via `args` or via the `Run evolve-lite:synthesize-skill on <path>` instruction. + +The trajectory path is either: + +- supplied directly as `args` to the skill invocation, or +- stated in the parent's invocation message as `The saved trajectory path is: <path>` — take everything after the colon, strip surrounding whitespace and quotes. + +If neither is present, scan `.evolve/trajectories/` for the most recently modified `claude-transcript_<session-id>.jsonl` and use that. If `.evolve/trajectories/` does not exist or is empty, output zero artifacts and exit — do not invent a trajectory. + +**Read the trajectory with the `Read` tool — do NOT shell out.** The transcript is JSONL: one JSON object per line. Filter for `"type": "assistant"` and `"type": "human"` records and reconstruct the flow from `message.content`. + +### Step 1: Identify the Successful Workflow + +Walk the trajectory and locate the **final, working** tool sequence — the one that actually produced the answer. Distinguish it from the trial-and-error leading up to it. + +Capture: + +- **What the user asked** (the original prompt). +- **What ultimately worked** — the exact tool calls, scripts, or command sequences that produced the answer. Quote them verbatim from the trajectory. +- **What didn't work** — the dead-ends. You will use these to write a `Triggers` section so the future agent knows when to reach for this skill *instead of* the failing approaches. +- **Environment assumptions** — what was missing or had to be installed (e.g. "no exiftool, pip install Pillow needed"). + +If no clearly successful workflow is in the trajectory (the session ended without reaching an answer, or the answer came from a single trivial call), output zero artifacts and exit. + +### Step 2: Decide a Skill Name and Trigger + +The skill **name** must be: + +- kebab-case, action-oriented (`extract-exif-metadata`, `parse-cloudwatch-logs`, `restart-stuck-deploy`) +- specific enough that a future agent reading just the name can guess what it does +- not a duplicate of any existing entry under `.evolve/skills/` + +The skill **description** (one line, in the SKILL.md frontmatter) should describe the *task* the skill solves, not the trajectory it came from. Bad: "Solves the focal-length question from session abc123." Good: "Extract EXIF metadata (focal length, GPS, lens, timestamps) from JPEG/HEIC images using Pillow when system EXIF tools are unavailable." + +The **trigger** (in the SKILL.md body, under `## When To Use`) should describe the broad task context, not the narrow original request — same rule as the `learn` skill's guidelines. + +Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls`) and confirm your chosen name does not collide with an existing skill. + +### Step 3: Draft the SKILL.md + +Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: + +``` +--- +name: <kebab-case-name> +description: <one-line task description> +--- + +# <Title Case Name> + +## Overview +<1–2 sentences: what the skill does and when to use it> + +## When To Use +- <trigger 1> +- <trigger 2> + +## Workflow +<step-by-step instructions for the agent> +``` + +Notes: + +- `context: fork` is **omitted** for synthesized skills. They run in the parent context so they can write files into the workspace and report back. +- Do NOT inline the full successful script into the SKILL.md if it's more than ~10 lines — put it in a sibling `scripts/` file (Step 4) and reference it from the SKILL.md. +- The Workflow section should describe what to do *to solve the task*, not retell the original session. A future agent reading this should be able to act without ever seeing the trajectory. + +### Step 4: Emit Supporting Scripts + +If the successful workflow used a non-trivial script (more than a one-liner), write it as a sibling file under `scripts/` of your draft skill directory. Use the **already-validated code from the trajectory** — do not invent variations. Strip incidental one-off inputs (literal file names, IDs, hard-coded outputs) and replace with arguments or stdin where appropriate. + +Common shape: + +``` +.evolve/skills/<name>/ +├── SKILL.md +└── scripts/ + └── <action>.py # callable as `python3 scripts/<action>.py <args>` +``` + +If the workflow was a sequence of shell commands rather than a script, encode it as an executable shell script (`scripts/<action>.sh`) so future agents can invoke it as a single unit instead of replaying each command. + +If no non-trivial script is needed (the workflow is a sequence of standard tool calls), skip this step — the SKILL.md alone is the skill. + +### Step 5: Finalize + +Place your draft files (SKILL.md and any scripts) under a temporary directory inside the workspace, e.g. `/tmp/synthesized-<name>/`, then call: + +```bash +python3 "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py" finalize --src /tmp/synthesized-<name>/ --name <kebab-case-name> --trajectory <saved_trajectory_path> +``` + +The script will: + +- Validate the SKILL.md frontmatter (`name` and `description` required; `name` must match `--name`). +- Reject the skill if a same-named skill already exists in `.evolve/skills/` (overwriting requires `--force`). +- Copy the directory into `.evolve/skills/<name>/` (canonical). +- Append a `synthesize_skill` event to `.evolve/audit.log` recording the new skill, the source trajectory, and the timestamp. +- Print the destination path(s). + +If the validator rejects the draft, fix the SKILL.md and retry — do not edit files under `.evolve/skills/` directly. + +### Step 6: Confirm + +After the script returns, list the destination directories with the `Glob` tool to confirm the files landed. Output a short summary: + +- The skill name and description. +- The destination paths. +- A one-line note on what future sessions should now be able to do that they couldn't before. + +## Best Practices + +1. **One skill per workflow.** If the trajectory contains two unrelated successful workflows, run synthesis twice with different names — do not pack them into one skill. +2. **Cite the trajectory.** Include the `--trajectory` flag so the audit log records provenance; future maintainers can trace the skill back to the session that produced it. +3. **Don't promote one-shots.** A skill is worth synthesizing only if the trigger is plausibly recurring. If the trajectory looks like a one-off, prefer the `learn` skill's guideline path instead. +4. **Don't paraphrase failure.** The skill describes what *worked*. If you find yourself writing "this skill avoids the problem where exiftool isn't installed," restate it as "uses Pillow to extract EXIF; works in environments without system EXIF tools." Triggers describe *when*, not *what failed*. +5. **Keep scripts minimal.** Strip incidental log lines, debug prints, and validation that wasn't actually exercised in the trajectory. If a feature wasn't validated, leave it out. diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py new file mode 100644 index 00000000..ef8273d7 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +"""Synthesize-skill helper: validate and install a synthesized skill. + +The synthesize-skill skill (a subagent) is responsible for the *judgment* — +reading the trajectory, identifying the successful workflow, and writing +draft SKILL.md + supporting scripts into a temporary directory. + +This script is the *plumbing* — it validates the draft frontmatter, copies +the directory into both the canonical evolve-managed location and the +platform-specific skills loader location, and writes an audit-log entry. + +Usage: + synthesize.py finalize --src <draft_dir> --name <kebab-case-name> \ + [--trajectory <path>] [--workspace <path>] [--force] +""" + +from __future__ import annotations + +import argparse +import re +import shutil +import sys +from pathlib import Path + +# Reuse the plugin's lib helpers (audit-log writer + entities-dir locator). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): + if (_candidate / "audit.py").is_file(): + _lib = _candidate + break + if _lib is not None: + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from audit import append as audit_append # noqa: E402 + + +KEBAB_RE = re.compile(r"^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$") +FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n", re.DOTALL) + + +def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: + """Minimal YAML-ish frontmatter parser. Supports `key: value` lines only. + + Returns (frontmatter_dict, body_text). + """ + text = skill_md.read_text(encoding="utf-8") + match = FRONTMATTER_RE.match(text) + if not match: + raise ValueError(f"{skill_md} has no frontmatter block") + fm: dict[str, str] = {} + for line in match.group(1).splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if ":" not in line: + raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") + key, _, value = line.partition(":") + fm[key.strip()] = value.strip().strip('"').strip("'") + return fm, text[match.end():] + + +def _validate_draft(src: Path, name: str) -> None: + if not KEBAB_RE.match(name): + raise SystemExit(f"--name {name!r} is not kebab-case") + if not src.is_dir(): + raise SystemExit(f"--src {src} is not a directory") + skill_md = src / "SKILL.md" + if not skill_md.is_file(): + raise SystemExit(f"missing SKILL.md in {src}") + + fm, body = _parse_frontmatter(skill_md) + if "name" not in fm or "description" not in fm: + raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") + if fm["name"] != name: + raise SystemExit(f"frontmatter name {fm['name']!r} does not match --name {name!r}") + if not fm["description"]: + raise SystemExit("SKILL.md description is empty") + if len(body.strip()) < 50: + raise SystemExit("SKILL.md body is suspiciously short — not enough instructions to be useful") + + +def _resolve_workspace(arg: str | None) -> Path: + if arg: + return Path(arg).resolve() + # Fall back to the current working directory; at runtime this is the + # workspace mounted into the sandbox (`/workspace`) or the host repo root. + return Path.cwd().resolve() + + +def _copy_into(src: Path, dst: Path, force: bool) -> None: + if dst.exists(): + if not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + shutil.rmtree(dst) + shutil.copytree(src, dst) + + +# Per-platform runtime mirror — the loader-discoverable directory the +# synthesized skill is copied into so the host agent picks it up +# automatically. Set to None where the platform doesn't have a runtime +# skills directory; only the canonical `.evolve/skills/<name>/` write +# happens in that case. +_RUNTIME_MIRROR_DIR: str | None = None + + +def cmd_finalize(args: argparse.Namespace) -> int: + src = Path(args.src).resolve() + name = args.name + workspace = _resolve_workspace(args.workspace) + + _validate_draft(src, name) + + evolve_dst = workspace / ".evolve" / "skills" / name + _copy_into(src, evolve_dst, args.force) + + runtime_dst: Path | None = None + if _RUNTIME_MIRROR_DIR is not None: + runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name + _copy_into(src, runtime_dst, args.force) + + audit_append( + project_root=str(workspace), + event="synthesize_skill", + skill=name, + evolve_path=str(evolve_dst.relative_to(workspace)), + runtime_path=str(runtime_dst.relative_to(workspace)) if runtime_dst else "", + trajectory=args.trajectory or "", + ) + + print(f"Installed skill {name!r}:") + print(f" evolve: {evolve_dst}") + if runtime_dst is not None: + print(f" runtime: {runtime_dst}") + return 0 + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_finalize = sub.add_parser( + "finalize", + help="Validate a draft skill directory and install it under .evolve/skills/ (and the platform's runtime skills dir, if any).", + ) + p_finalize.add_argument("--src", required=True, help="Draft directory containing SKILL.md and any scripts/") + p_finalize.add_argument("--name", required=True, help="Kebab-case skill name; must match SKILL.md frontmatter") + p_finalize.add_argument("--trajectory", default="", help="Source trajectory path (recorded in audit.log)") + p_finalize.add_argument("--workspace", default=None, help="Project root (defaults to CWD)") + p_finalize.add_argument("--force", action="store_true", help="Overwrite existing skill of the same name") + p_finalize.set_defaults(func=cmd_finalize) + + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 b/plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 new file mode 100644 index 00000000..5389b373 --- /dev/null +++ b/plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 @@ -0,0 +1,152 @@ +{%- from "_macros.j2" import invoke, skill_ref with context -%} +--- +name: {% if platform == "bob" %}evolve-lite:{% endif %}synthesize-skill +description: Convert a saved trajectory into a reusable agent skill (SKILL.md + supporting scripts) that future agents can invoke to skip rediscovered work. Use when a session captured a non-trivial workflow worth promoting from a free-text guideline to an executable skill. +{% if platform == "claude" -%} +context: fork +{% endif -%} +--- + +# Skill Synthesizer + +## Overview + +This skill reads a saved trajectory and produces a **reusable agent skill** — a `SKILL.md` plus any supporting scripts — that captures the *successful* workflow the session discovered. The output goes to `.evolve/skills/<skill-name>/` (canonical, evolve-managed){% if platform == "claude" %} and to `.claude/skills/<skill-name>/` (so Claude Code's skills loader picks it up automatically){% endif %}. Future sessions on the same project can then invoke the skill directly instead of re-deriving the workflow. + +This is the **executable** counterpart to the `learn` skill's free-text guidelines: `learn` writes Markdown the next agent has to *read and decide what to do*; `synthesize-skill` writes a skill the next agent can simply *call*. + +## When To Use + +Use this skill when a trajectory captured: + +- A **non-trivial workflow** that succeeded after trial-and-error (the eventual happy path is worth promoting from free-text advice to an invocable artifact). +- A **reusable script or command sequence** the model wrote during the session — particularly one the agent had to reconstruct over multiple attempts. +- An environment-specific workaround (a missing system tool, a permissions wrinkle, a fallback pipeline) that future sessions in the same project will hit. + +Skip this skill — and let `learn` cover the case with a guideline alone — when: + +- The successful path was a single trivial command. +- The workflow embeds secrets, tokens, or one-off user inputs that can't be safely generalized. +- A skill with the same trigger already exists in `.evolve/skills/` (use `learn`'s guideline path to refine the existing skill instead of creating a duplicate). + +## Workflow + +### Step 0: Locate the Trajectory + +This skill runs in a forked context. **You cannot see the parent conversation directly** — read the trajectory the parent passed in via `args` or via the `Run {{ skill_ref('synthesize-skill') }} on <path>` instruction. + +The trajectory path is either: + +- supplied directly as `args` to the skill invocation, or +- stated in the parent's invocation message as `The saved trajectory path is: <path>` — take everything after the colon, strip surrounding whitespace and quotes. + +If neither is present, scan `.evolve/trajectories/` for the most recently modified `claude-transcript_<session-id>.jsonl` and use that. If `.evolve/trajectories/` does not exist or is empty, output zero artifacts and exit — do not invent a trajectory. + +**Read the trajectory with the `Read` tool — do NOT shell out.** The transcript is JSONL: one JSON object per line. Filter for `"type": "assistant"` and `"type": "human"` records and reconstruct the flow from `message.content`. + +### Step 1: Identify the Successful Workflow + +Walk the trajectory and locate the **final, working** tool sequence — the one that actually produced the answer. Distinguish it from the trial-and-error leading up to it. + +Capture: + +- **What the user asked** (the original prompt). +- **What ultimately worked** — the exact tool calls, scripts, or command sequences that produced the answer. Quote them verbatim from the trajectory. +- **What didn't work** — the dead-ends. You will use these to write a `Triggers` section so the future agent knows when to reach for this skill *instead of* the failing approaches. +- **Environment assumptions** — what was missing or had to be installed (e.g. "no exiftool, pip install Pillow needed"). + +If no clearly successful workflow is in the trajectory (the session ended without reaching an answer, or the answer came from a single trivial call), output zero artifacts and exit. + +### Step 2: Decide a Skill Name and Trigger + +The skill **name** must be: + +- kebab-case, action-oriented (`extract-exif-metadata`, `parse-cloudwatch-logs`, `restart-stuck-deploy`) +- specific enough that a future agent reading just the name can guess what it does +- not a duplicate of any existing entry under `.evolve/skills/` + +The skill **description** (one line, in the SKILL.md frontmatter) should describe the *task* the skill solves, not the trajectory it came from. Bad: "Solves the focal-length question from session abc123." Good: "Extract EXIF metadata (focal length, GPS, lens, timestamps) from JPEG/HEIC images using Pillow when system EXIF tools are unavailable." + +The **trigger** (in the SKILL.md body, under `## When To Use`) should describe the broad task context, not the narrow original request — same rule as the `learn` skill's guidelines. + +Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls`) and confirm your chosen name does not collide with an existing skill. + +### Step 3: Draft the SKILL.md + +Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: + +``` +--- +name: <kebab-case-name> +description: <one-line task description> +--- + +# <Title Case Name> + +## Overview +<1–2 sentences: what the skill does and when to use it> + +## When To Use +- <trigger 1> +- <trigger 2> + +## Workflow +<step-by-step instructions for the agent> +``` + +Notes: + +- `context: fork` is **omitted** for synthesized skills. They run in the parent context so they can write files into the workspace and report back. +- Do NOT inline the full successful script into the SKILL.md if it's more than ~10 lines — put it in a sibling `scripts/` file (Step 4) and reference it from the SKILL.md. +- The Workflow section should describe what to do *to solve the task*, not retell the original session. A future agent reading this should be able to act without ever seeing the trajectory. + +### Step 4: Emit Supporting Scripts + +If the successful workflow used a non-trivial script (more than a one-liner), write it as a sibling file under `scripts/` of your draft skill directory. Use the **already-validated code from the trajectory** — do not invent variations. Strip incidental one-off inputs (literal file names, IDs, hard-coded outputs) and replace with arguments or stdin where appropriate. + +Common shape: + +``` +.evolve/skills/<name>/ +├── SKILL.md +└── scripts/ + └── <action>.py # callable as `python3 scripts/<action>.py <args>` +``` + +If the workflow was a sequence of shell commands rather than a script, encode it as an executable shell script (`scripts/<action>.sh`) so future agents can invoke it as a single unit instead of replaying each command. + +If no non-trivial script is needed (the workflow is a sequence of standard tool calls), skip this step — the SKILL.md alone is the skill. + +### Step 5: Finalize + +Place your draft files (SKILL.md and any scripts) under a temporary directory inside the workspace, e.g. `/tmp/synthesized-<name>/`, then call: + +```bash +{{ invoke('synthesize-skill', 'synthesize.py', args=['finalize', '--src /tmp/synthesized-<name>/', '--name <kebab-case-name>', '--trajectory <saved_trajectory_path>']) }} +``` + +The script will: + +- Validate the SKILL.md frontmatter (`name` and `description` required; `name` must match `--name`). +- Reject the skill if a same-named skill already exists in `.evolve/skills/` (overwriting requires `--force`). +- Copy the directory into `.evolve/skills/<name>/` (canonical){% if platform == "claude" %}, then mirror it into `.claude/skills/<name>/` so Claude Code's skill loader sees it{% endif %}. +- Append a `synthesize_skill` event to `.evolve/audit.log` recording the new skill, the source trajectory, and the timestamp. +- Print the destination path(s). + +If the validator rejects the draft, fix the SKILL.md and retry — do not edit files under `.evolve/skills/`{% if platform == "claude" %} or `.claude/skills/`{% endif %} directly. + +### Step 6: Confirm + +After the script returns, list the destination directories with the `Glob` tool to confirm the files landed. Output a short summary: + +- The skill name and description. +- The destination paths. +- A one-line note on what future sessions should now be able to do that they couldn't before. + +## Best Practices + +1. **One skill per workflow.** If the trajectory contains two unrelated successful workflows, run synthesis twice with different names — do not pack them into one skill. +2. **Cite the trajectory.** Include the `--trajectory` flag so the audit log records provenance; future maintainers can trace the skill back to the session that produced it. +3. **Don't promote one-shots.** A skill is worth synthesizing only if the trigger is plausibly recurring. If the trajectory looks like a one-off, prefer the `learn` skill's guideline path instead. +4. **Don't paraphrase failure.** The skill describes what *worked*. If you find yourself writing "this skill avoids the problem where exiftool isn't installed," restate it as "uses Pillow to extract EXIF; works in environments without system EXIF tools." Triggers describe *when*, not *what failed*. +5. **Keep scripts minimal.** Strip incidental log lines, debug prints, and validation that wasn't actually exercised in the trajectory. If a feature wasn't validated, leave it out. diff --git a/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 b/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 new file mode 100755 index 00000000..662ea9a4 --- /dev/null +++ b/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +"""Synthesize-skill helper: validate and install a synthesized skill. + +The synthesize-skill skill (a subagent) is responsible for the *judgment* — +reading the trajectory, identifying the successful workflow, and writing +draft SKILL.md + supporting scripts into a temporary directory. + +This script is the *plumbing* — it validates the draft frontmatter, copies +the directory into both the canonical evolve-managed location and the +platform-specific skills loader location, and writes an audit-log entry. + +Usage: + synthesize.py finalize --src <draft_dir> --name <kebab-case-name> \ + [--trajectory <path>] [--workspace <path>] [--force] +""" + +from __future__ import annotations + +import argparse +import re +import shutil +import sys +from pathlib import Path + +# Reuse the plugin's lib helpers (audit-log writer + entities-dir locator). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): + if (_candidate / "audit.py").is_file(): + _lib = _candidate + break + if _lib is not None: + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from audit import append as audit_append # noqa: E402 + + +KEBAB_RE = re.compile(r"^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$") +FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n", re.DOTALL) + + +def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: + """Minimal YAML-ish frontmatter parser. Supports `key: value` lines only. + + Returns (frontmatter_dict, body_text). + """ + text = skill_md.read_text(encoding="utf-8") + match = FRONTMATTER_RE.match(text) + if not match: + raise ValueError(f"{skill_md} has no frontmatter block") + fm: dict[str, str] = {} + for line in match.group(1).splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if ":" not in line: + raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") + key, _, value = line.partition(":") + fm[key.strip()] = value.strip().strip('"').strip("'") + return fm, text[match.end():] + + +def _validate_draft(src: Path, name: str) -> None: + if not KEBAB_RE.match(name): + raise SystemExit(f"--name {name!r} is not kebab-case") + if not src.is_dir(): + raise SystemExit(f"--src {src} is not a directory") + skill_md = src / "SKILL.md" + if not skill_md.is_file(): + raise SystemExit(f"missing SKILL.md in {src}") + + fm, body = _parse_frontmatter(skill_md) + if "name" not in fm or "description" not in fm: + raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") + if fm["name"] != name: + raise SystemExit(f"frontmatter name {fm['name']!r} does not match --name {name!r}") + if not fm["description"]: + raise SystemExit("SKILL.md description is empty") + if len(body.strip()) < 50: + raise SystemExit("SKILL.md body is suspiciously short — not enough instructions to be useful") + + +def _resolve_workspace(arg: str | None) -> Path: + if arg: + return Path(arg).resolve() + # Fall back to the current working directory; at runtime this is the + # workspace mounted into the sandbox (`/workspace`) or the host repo root. + return Path.cwd().resolve() + + +def _copy_into(src: Path, dst: Path, force: bool) -> None: + if dst.exists(): + if not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + shutil.rmtree(dst) + shutil.copytree(src, dst) + + +# Per-platform runtime mirror — the loader-discoverable directory the +# synthesized skill is copied into so the host agent picks it up +# automatically. Set to None where the platform doesn't have a runtime +# skills directory; only the canonical `.evolve/skills/<name>/` write +# happens in that case. +{%- if platform == "claude" %} +_RUNTIME_MIRROR_DIR: str | None = ".claude/skills" +{%- else %} +_RUNTIME_MIRROR_DIR: str | None = None +{%- endif %} + + +def cmd_finalize(args: argparse.Namespace) -> int: + src = Path(args.src).resolve() + name = args.name + workspace = _resolve_workspace(args.workspace) + + _validate_draft(src, name) + + evolve_dst = workspace / ".evolve" / "skills" / name + _copy_into(src, evolve_dst, args.force) + + runtime_dst: Path | None = None + if _RUNTIME_MIRROR_DIR is not None: + runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name + _copy_into(src, runtime_dst, args.force) + + audit_append( + project_root=str(workspace), + event="synthesize_skill", + skill=name, + evolve_path=str(evolve_dst.relative_to(workspace)), + runtime_path=str(runtime_dst.relative_to(workspace)) if runtime_dst else "", + trajectory=args.trajectory or "", + ) + + print(f"Installed skill {name!r}:") + print(f" evolve: {evolve_dst}") + if runtime_dst is not None: + print(f" runtime: {runtime_dst}") + return 0 + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_finalize = sub.add_parser( + "finalize", + help="Validate a draft skill directory and install it under .evolve/skills/ (and the platform's runtime skills dir, if any).", + ) + p_finalize.add_argument("--src", required=True, help="Draft directory containing SKILL.md and any scripts/") + p_finalize.add_argument("--name", required=True, help="Kebab-case skill name; must match SKILL.md frontmatter") + p_finalize.add_argument("--trajectory", default="", help="Source trajectory path (recorded in audit.log)") + p_finalize.add_argument("--workspace", default=None, help="Project root (defaults to CWD)") + p_finalize.add_argument("--force", action="store_true", help="Overwrite existing skill of the same name") + p_finalize.set_defaults(func=cmd_finalize) + + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == "__main__": + raise SystemExit(main()) From 5191fd783af680919f39557751d8e7e4456cba04 Mon Sep 17 00:00:00 2001 From: Vinod Muthusamy <vinodmut@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:04:17 -0500 Subject: [PATCH 2/8] test(experiments): add skill-from-trajectory experiment runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three-way comparison driver (no_recall / guidelines / skill) that runs the seed → synthesize → measure flow per trial. Reuses helpers from experiments/token_savings.py. Captures token usage from --output-format json plus per-turn usage and tool-call summaries from saved transcripts. Supports --seed-utterances to test multi-utterance seeding (e.g. gps + focal_length, then measure on lens) for skill generalization. Also resolves /tmp -> /private/tmp on macOS (Docker bind-mount of /tmp subdirs doesn't follow the symlink, breaking the prior plumbing for hidden subdirs). --- experiments/skill_from_trajectory.py | 540 +++++++++++++++++++++++++++ 1 file changed, 540 insertions(+) create mode 100644 experiments/skill_from_trajectory.py diff --git a/experiments/skill_from_trajectory.py b/experiments/skill_from_trajectory.py new file mode 100644 index 00000000..9354ad3a --- /dev/null +++ b/experiments/skill_from_trajectory.py @@ -0,0 +1,540 @@ +"""Experiment: skill-from-trajectory vs guidelines vs no-recall. + +Per trial: + 1. Seed run on a fresh workspace (utterance 1) — produces guidelines via + `learn` and a saved trajectory. + 2. Synthesis run on the same workspace — invokes the new + `/evolve-lite:synthesize-skill` skill on the seed trajectory; produces + `.evolve/skills/<name>/` and `.claude/skills/<name>/`. + 3. Branch into three measure conditions, each a fresh copy of demo/workspace + plus the relevant memory: + - no_recall: nothing + - guidelines: seeded workspace's `.evolve/entities/` (no skills) + - skill: seeded workspace's `.claude/skills/` (no guidelines) + 4. For each condition, run each measure utterance once. Capture token usage, + duration, and the skill the model invoked (if any). + +Results: experiments/results/skill_from_trajectory_<UTC-timestamp>/ + - report.md three-way × per-utterance comparison table + - raw.json full per-run usage payloads + tool-call summaries + - synthesized_skills/ copy of each trial's synthesized skill dir + +Usage: + python3 experiments/skill_from_trajectory.py [--trials 5] +""" + +from __future__ import annotations + +import argparse +import json +import os +import shlex +import shutil +import statistics +import subprocess +import sys +import time +from datetime import datetime, timezone +from pathlib import Path + +# Reuse helpers from the existing token-savings experiment. +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from token_savings import ( # noqa: E402 + FORWARDED_ENV_VARS, + REPO_ROOT, + SANDBOX_IMAGE, + SESSION_TIMEOUT_SECONDS, + _check_prerequisites, + _extract_usage as _extract_usage_base, + _newest_transcript, + _per_turn_usage, +) + + +def _extract_usage(parsed: dict | None) -> dict: + """Extend the base extractor with total_cost_usd, which we report per-trial.""" + out = _extract_usage_base(parsed) + if parsed is not None: + out["total_cost_usd"] = parsed.get("total_cost_usd") + return out + + +# All EXIF utterances, indexed by short key. The default seed is `gps` and the +# default measure set is all three; --seed-utterances can override the seed +# set (e.g. `gps,focal_length`) to test two-utterance seeding. +UTTERANCES: dict[str, str] = { + "gps": "where was the photo @sample.jpg taken. use exif metadata", + "focal_length": "what focal length was used to take the photo @sample.jpg. use exif metadata", + "lens": "what lens model was used for @sample.jpg. use exif metadata", +} + +# Default single-utterance seed (run A behavior). +DEFAULT_SEED_KEYS: list[str] = ["gps"] + +# Default measure set (kept here for back-compat with the report-builder). +MEASURE_UTTERANCES: dict[str, str] = dict(UTTERANCES) + +CONDITIONS = ("no_recall", "guidelines", "skill") + + +def _docker_path(p: Path) -> str: + """Resolve a path for Docker bind-mounting on macOS. + + Docker on macOS doesn't follow the /tmp -> /private/tmp symlink for + subdirectories: mounting /tmp/foo/bar lets the container see /tmp/foo + but not its contents. Resolve to the real path before mounting. + """ + return str(p.resolve()) + + +def _run_sandbox_prompt_json(workspace: Path, prompt: str) -> tuple[subprocess.CompletedProcess, dict | None]: + """Run a prompt with --output-format json and return (proc, parsed_json). + + Local copy of the helper from token_savings.py, but resolves the + workspace path before binding (see _docker_path). + """ + plugins = REPO_ROOT / "platform-integrations" / "claude" / "plugins" + command = ( + "claude --plugin-dir /plugins/evolve-lite/ --dangerously-skip-permissions --output-format json -p " + + shlex.quote(prompt) + ) + cmd = ["docker", "run", "--rm"] + for var in FORWARDED_ENV_VARS: + if os.environ.get(var): + cmd += ["-e", var] + cmd += [ + "-e", + "EVOLVE_DEBUG=1", + "-v", + f"{_docker_path(workspace)}:/workspace", + "-v", + f"{_docker_path(plugins)}:/plugins", + SANDBOX_IMAGE, + "bash", + "-c", + command, + ] + proc = subprocess.run(cmd, capture_output=True, text=True, timeout=SESSION_TIMEOUT_SECONDS) + parsed: dict | None = None + if proc.returncode == 0 and proc.stdout.strip(): + try: + parsed = json.loads(proc.stdout) + except json.JSONDecodeError: + for line in reversed(proc.stdout.splitlines()): + line = line.strip() + if line.startswith("{") and line.endswith("}"): + try: + parsed = json.loads(line) + break + except json.JSONDecodeError: + continue + return proc, parsed + + +def _fresh_workspace(tmp_root: Path, label: str) -> Path: + """Copy demo/workspace into tmp_root/<label>, excluding .evolve/.""" + src = REPO_ROOT / "demo" / "workspace" + dst = tmp_root / label + if dst.exists(): + shutil.rmtree(dst) + shutil.copytree(src, dst, ignore=shutil.ignore_patterns(".evolve", ".claude", "backup", "sandbox-backup")) + return dst + + +def _copy_dir(src: Path, dst: Path) -> None: + if dst.exists(): + shutil.rmtree(dst) + shutil.copytree(src, dst) + + +def _list_paths(root: Path, pattern: str) -> list[str]: + if not root.is_dir(): + return [] + return sorted(str(p.relative_to(root)) for p in root.rglob(pattern)) + + +def _tool_calls_summary(transcript_path: Path | None) -> list[dict]: + """Compact list of tool calls from a saved transcript: name + brief input.""" + if transcript_path is None or not transcript_path.is_file(): + return [] + out: list[dict] = [] + for line in transcript_path.read_text().splitlines(): + line = line.strip() + if not line: + continue + try: + record = json.loads(line) + except json.JSONDecodeError: + continue + message = record.get("message", {}) + content = message.get("content", []) + if not isinstance(content, list): + continue + for block in content: + if not isinstance(block, dict) or block.get("type") != "tool_use": + continue + inp = block.get("input") or {} + brief = ( + inp.get("command") + or inp.get("file_path") + or inp.get("skill") + or inp.get("path") + or inp.get("pattern") + or "" + ) + if isinstance(brief, str) and len(brief) > 200: + brief = brief[:197] + "..." + out.append({"tool": block.get("name"), "brief": brief}) + return out + + +def _seed_and_synthesize(tmp_root: Path, trial_idx: int, seed_keys: list[str]) -> dict: + """Run each seed utterance in turn, then the synthesize-skill skill. + + Synthesis runs against the *most recent* seed trajectory; the seed + workspace's `.evolve/entities/` accumulates guidelines from every + seed run (each one fires the learn Stop hook). + """ + label = f"trial_{trial_idx}_seed" + workspace = _fresh_workspace(tmp_root, label) + + seed_runs: list[dict] = [] + for n, key in enumerate(seed_keys, 1): + utt = UTTERANCES[key] + print(f" [{label}] seed {n}/{len(seed_keys)} ({key})...", flush=True) + t0 = time.time() + proc, parsed = _run_sandbox_prompt_json(workspace, utt) + print(f" [{label}] seed {n} done in {time.time() - t0:.0f}s rc={proc.returncode}", flush=True) + if proc.returncode != 0: + return { + "error": f"seed_failed_at_{n}", + "stderr": proc.stderr[-1000:], + "workspace": str(workspace), + } + seed_runs.append({"key": key, "usage": _extract_usage(parsed)}) + + seed_transcript = _newest_transcript(workspace, exclude=set()) + if seed_transcript is None: + return {"error": "seed_no_transcript", "workspace": str(workspace)} + seed_traj_rel = "/".join(seed_transcript.relative_to(workspace).parts) + + print(f" [{label}] synthesize-skill...", flush=True) + synth_prompt = ( + f"Run /evolve-lite:synthesize-skill on the saved trajectory. " + f"The saved trajectory path is: {seed_traj_rel}" + ) + t1 = time.time() + synth_proc, synth_parsed = _run_sandbox_prompt_json(workspace, synth_prompt) + print(f" [{label}] synth done in {time.time() - t1:.0f}s rc={synth_proc.returncode}", flush=True) + + skills = _list_paths(workspace / ".evolve" / "skills", "SKILL.md") + skill_names = sorted({Path(p).parent.parts[0] for p in skills if Path(p).name == "SKILL.md"}) + + # Aggregate seed_usage across the per-utterance runs so the report's + # "seed total" column has a single number when run A is replayed. + aggregate_seed_usage = {} + if seed_runs: + last = seed_runs[-1]["usage"] # noqa: F841 + for k in ("input_tokens", "output_tokens", "cache_creation_input_tokens", "cache_read_input_tokens", "total_tokens"): + vals = [r["usage"].get(k) for r in seed_runs if isinstance(r["usage"].get(k), (int, float))] + aggregate_seed_usage[k] = sum(vals) if vals else None + costs = [r["usage"].get("total_cost_usd") for r in seed_runs if isinstance(r["usage"].get("total_cost_usd"), (int, float))] + aggregate_seed_usage["total_cost_usd"] = sum(costs) if costs else None + turns = [r["usage"].get("num_turns") for r in seed_runs if isinstance(r["usage"].get("num_turns"), (int, float))] + aggregate_seed_usage["num_turns"] = sum(turns) if turns else None + + return { + "workspace": str(workspace), + "seed_keys": seed_keys, + "per_seed_runs": seed_runs, + "seed_trajectory": seed_traj_rel, + "seed_usage": aggregate_seed_usage, + "synth_usage": _extract_usage(synth_parsed), + "synth_returncode": synth_proc.returncode, + "synth_stderr_tail": synth_proc.stderr[-500:] if synth_proc.returncode != 0 else "", + "skills_synthesized": skill_names, + "guideline_count": len(_list_paths(workspace / ".evolve" / "entities", "*.md")), + } + + +def _build_condition_workspace(seed_workspace: Path, tmp_root: Path, trial_idx: int, condition: str) -> Path: + """Branch a fresh measure workspace from the seeded one for a given condition.""" + label = f"trial_{trial_idx}_{condition}" + dst = _fresh_workspace(tmp_root, label) + if condition == "no_recall": + return dst + # Both guidelines and skill conditions need a writable .evolve/ for recall + # hooks to function (and to write audit + new trajectories). + (dst / ".evolve").mkdir(exist_ok=True) + if condition == "guidelines": + src_entities = seed_workspace / ".evolve" / "entities" + if src_entities.is_dir(): + _copy_dir(src_entities, dst / ".evolve" / "entities") + elif condition == "skill": + src_claude = seed_workspace / ".claude" + if src_claude.is_dir(): + _copy_dir(src_claude, dst / ".claude") + src_evolve_skills = seed_workspace / ".evolve" / "skills" + if src_evolve_skills.is_dir(): + _copy_dir(src_evolve_skills, dst / ".evolve" / "skills") + return dst + + +def _do_measure_run( + workspace: Path, + utterance: str, + label: str, +) -> dict: + print(f" [{label}] measure...", flush=True) + t0 = time.time() + pre_transcripts = ( + set((workspace / ".evolve" / "trajectories").glob("*.jsonl")) + if (workspace / ".evolve" / "trajectories").is_dir() + else set() + ) + proc, parsed = _run_sandbox_prompt_json(workspace, utterance) + print(f" [{label}] done in {time.time() - t0:.0f}s rc={proc.returncode}", flush=True) + if proc.returncode != 0: + return {"label": label, "error": "measure_failed", "stderr": proc.stderr[-500:]} + + transcript = _newest_transcript(workspace, exclude=pre_transcripts) + return { + "label": label, + "headline_usage": _extract_usage(parsed), + "raw_json": parsed, + "per_turn": _per_turn_usage(transcript) if transcript else [], + "tool_calls": _tool_calls_summary(transcript), + "transcript_path": str(transcript) if transcript else None, + } + + +def _summarize(values: list[float]) -> dict: + values = [v for v in values if isinstance(v, (int, float))] + if not values: + return {"n": 0} + return { + "n": len(values), + "mean": statistics.mean(values), + "min": min(values), + "max": max(values), + "stdev": statistics.stdev(values) if len(values) > 1 else 0.0, + } + + +def _format_table(results: dict, utterance_keys: list[str] | None = None) -> str: + """Build a markdown table: rows = (utterance, metric); cols = conditions.""" + lines = [] + metrics = [ + ("total_tokens", "total"), + ("output_tokens", "output"), + ("cache_read_input_tokens", "cache_read"), + ("cache_creation_input_tokens", "cache_create"), + ("duration_ms", "duration_ms"), + ("num_turns", "num_turns"), + ] + keys = utterance_keys if utterance_keys is not None else list(MEASURE_UTTERANCES.keys()) + for utt_key in keys: + lines.append(f"\n### Utterance: `{utt_key}`") + lines.append("") + header = "| metric | " + " | ".join(c for c in CONDITIONS) + " |" + sep = "| --- " + "| --- " * len(CONDITIONS) + "|" + lines.append(header) + lines.append(sep) + for key, label in metrics: + row = [label] + for cond in CONDITIONS: + runs = results.get(cond, {}).get(utt_key, []) + if key == "num_turns": + vals = [r.get("raw_json", {}).get("num_turns") for r in runs if "raw_json" in r] + else: + vals = [r.get("headline_usage", {}).get(key) for r in runs if "headline_usage" in r] + summary = _summarize([v for v in vals if v is not None]) + if not summary.get("n"): + row.append("n/a") + else: + row.append(f"{summary['mean']:.0f} ({summary['min']:.0f}–{summary['max']:.0f})") + lines.append("| " + " | ".join(row) + " |") + return "\n".join(lines) + + +def _format_synth_costs(seeds: list[dict]) -> str: + rows = [ + "| trial | seed total | synth total | synth turns | synth $ | skills | guidelines |", + "| --- | --- | --- | --- | --- | --- | --- |", + ] + for i, seed in enumerate(seeds, 1): + if "error" in seed: + rows.append(f"| {i} | error: {seed['error']} | | | | | |") + continue + s = seed.get("seed_usage") or {} + sy = seed.get("synth_usage") or {} + cost = sy.get("total_cost_usd") + cost_str = f"${cost:.3f}" if isinstance(cost, (int, float)) else "?" + rows.append( + f"| {i} | {s.get('total_tokens', '?')} | {sy.get('total_tokens', '?')} | " + f"{sy.get('num_turns', '?')} | {cost_str} | " + f"{', '.join(seed.get('skills_synthesized', []))} | " + f"{seed.get('guideline_count', '?')} |" + ) + return "\n".join(rows) + + +def _write_report( + results_dir: Path, + seeds: list[dict], + results: dict, + utterance_keys: list[str], + seed_keys: list[str], +) -> Path: + lines = ["# Skill-from-trajectory experiment\n"] + lines.append(f"_Generated {datetime.now(timezone.utc).isoformat()}_\n") + if len(seed_keys) == 1: + lines.append(f"**Seed utterance** (`{seed_keys[0]}`): `{UTTERANCES[seed_keys[0]]}`") + else: + lines.append("**Seed utterances** (run sequentially in the seed workspace before synthesis):") + for k in seed_keys: + lines.append(f"- `{k}`: `{UTTERANCES[k]}`") + lines.append("\n**Conditions:**") + lines.append("- `no_recall` — fresh `demo/workspace`, no `.evolve/`, no `.claude/skills/`") + lines.append("- `guidelines` — fresh `demo/workspace` + seeded `.evolve/entities/`") + lines.append("- `skill` — fresh `demo/workspace` + seeded `.claude/skills/` and `.evolve/skills/`\n") + lines.append("**Measure utterances:**") + for k in utterance_keys: + lines.append(f"- `{k}`: `{UTTERANCES[k]}`") + lines.append("") + lines.append("## Synthesis cost (per-trial setup, NOT included in any condition)\n") + lines.append(_format_synth_costs(seeds)) + lines.append("\n## Comparison\n") + lines.append( + "Mean (range) across trials. `total` is the unweighted sum of input + output + cache_read + cache_create — " + "cache_read is ~10x cheaper per token than fresh input, so this overweights cache." + ) + lines.append(_format_table(results, utterance_keys)) + path = results_dir / "report.md" + path.write_text("\n".join(lines) + "\n") + return path + + +def _save_synthesized_skills(seeds: list[dict], results_dir: Path) -> None: + out_root = results_dir / "synthesized_skills" + out_root.mkdir(parents=True, exist_ok=True) + for i, seed in enumerate(seeds, 1): + ws = seed.get("workspace") + if not ws: + continue + skills_dir = Path(ws) / ".evolve" / "skills" + if not skills_dir.is_dir(): + continue + for skill_dir in skills_dir.iterdir(): + if not skill_dir.is_dir(): + continue + dst = out_root / f"trial_{i}_{skill_dir.name}" + _copy_dir(skill_dir, dst) + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--trials", type=int, default=5, help="trials per condition (default 5)") + parser.add_argument( + "--seed-utterances", + nargs="+", + choices=list(UTTERANCES.keys()), + default=DEFAULT_SEED_KEYS, + help="utterance keys to seed with, run sequentially (default: gps)", + ) + parser.add_argument( + "--utterances", + nargs="+", + choices=list(UTTERANCES.keys()), + default=None, + help=( + "which measure utterances to run. Default: when --seed-utterances is the default (gps), " + "measures all 3; when --seed-utterances is overridden, measures only the keys NOT in the seed set." + ), + ) + parser.add_argument("--keep-workspaces", action="store_true", help="don't delete per-trial workspaces") + args = parser.parse_args() + + seed_keys: list[str] = list(args.seed_utterances) + if args.utterances is None: + if seed_keys == DEFAULT_SEED_KEYS: + measure_keys = list(UTTERANCES.keys()) + else: + measure_keys = [k for k in UTTERANCES.keys() if k not in seed_keys] + if not measure_keys: + parser.error("seed set covers every utterance — no measure utterances left; pass --utterances explicitly") + else: + measure_keys = list(args.utterances) + + _check_prerequisites() + + timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + results_dir = REPO_ROOT / "experiments" / "results" / f"skill_from_trajectory_{timestamp}" + results_dir.mkdir(parents=True, exist_ok=True) + workspace_root = results_dir / "workspaces" + workspace_root.mkdir(exist_ok=True) + print(f"Results dir: {results_dir}") + print(f"Trials: {args.trials}") + print(f"Seed utterances: {seed_keys}") + print(f"Measure utterances: {measure_keys}") + + seeds: list[dict] = [] + results: dict = {cond: {u: [] for u in measure_keys} for cond in CONDITIONS} + + for i in range(1, args.trials + 1): + print(f"\n=== trial {i}/{args.trials}: seed + synthesize ===") + seed = _seed_and_synthesize(workspace_root, i, seed_keys) + seeds.append(seed) + # Persist progressively in case we crash mid-run. + (results_dir / "raw.json").write_text( + json.dumps({"seeds": seeds, "results": results}, indent=2, default=str) + ) + if "error" in seed: + print(f" [trial {i}] seed/synth FAILED: {seed['error']} — skipping measure runs") + continue + if not seed.get("skills_synthesized"): + print(f" [trial {i}] synthesize produced NO skill — skipping measure runs") + continue + seed_workspace = Path(seed["workspace"]) + + for cond in CONDITIONS: + cond_workspace = _build_condition_workspace(seed_workspace, workspace_root, i, cond) + for utt_key in measure_keys: + utt_text = UTTERANCES[utt_key] + label = f"trial_{i}_{cond}_{utt_key}" + run_result = _do_measure_run(cond_workspace, utt_text, label) + run_result["condition"] = cond + run_result["utterance"] = utt_key + run_result["trial"] = i + results[cond][utt_key].append(run_result) + (results_dir / "raw.json").write_text( + json.dumps({"seeds": seeds, "results": results}, indent=2, default=str) + ) + + _save_synthesized_skills(seeds, results_dir) + report_path = _write_report(results_dir, seeds, results, measure_keys, seed_keys) + + print("\n" + "=" * 60) + print(_format_table(results, measure_keys)) + print("=" * 60) + print(f"\nReport: {report_path}") + print(f"Raw: {results_dir / 'raw.json'}") + + if not args.keep_workspaces: + shutil.rmtree(workspace_root, ignore_errors=True) + + errors = [s for s in seeds if "error" in s] + [ + r + for cond in CONDITIONS + for u in measure_keys + for r in results[cond][u] + if "error" in r + ] + if errors: + print(f"\n{len(errors)} run(s) had errors — see raw.json") + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 1b8e484e3940164b9bc85130925c40bacddae695 Mon Sep 17 00:00:00 2001 From: Vinod Muthusamy <vinodmut@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:57:46 -0500 Subject: [PATCH 3/8] fix(claude-plugin): apply ruff format to synthesize.py Ruff format wants `text[match.end() :]` (slice spacing) instead of `text[match.end():]`. Apply to the .j2 source plus all four rendered outputs so re-running the renderer stays consistent. Fixes failing CI check: check-formatting (3.12) --- .../skills/evolve-lite-synthesize-skill/scripts/synthesize.py | 2 +- .../skills/evolve-lite/synthesize-skill/scripts/synthesize.py | 2 +- .../skills/evolve-lite/synthesize-skill/scripts/synthesize.py | 2 +- .../skills/evolve-lite/synthesize-skill/scripts/synthesize.py | 2 +- .../evolve-lite/synthesize-skill/scripts/synthesize.py.j2 | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py index ef8273d7..ad05e48d 100644 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py @@ -60,7 +60,7 @@ def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") key, _, value = line.partition(":") fm[key.strip()] = value.strip().strip('"').strip("'") - return fm, text[match.end():] + return fm, text[match.end() :] def _validate_draft(src: Path, name: str) -> None: diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index fafb5623..06a6f5b9 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -60,7 +60,7 @@ def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") key, _, value = line.partition(":") fm[key.strip()] = value.strip().strip('"').strip("'") - return fm, text[match.end():] + return fm, text[match.end() :] def _validate_draft(src: Path, name: str) -> None: diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index ef8273d7..ad05e48d 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -60,7 +60,7 @@ def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") key, _, value = line.partition(":") fm[key.strip()] = value.strip().strip('"').strip("'") - return fm, text[match.end():] + return fm, text[match.end() :] def _validate_draft(src: Path, name: str) -> None: diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index ef8273d7..ad05e48d 100644 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -60,7 +60,7 @@ def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") key, _, value = line.partition(":") fm[key.strip()] = value.strip().strip('"').strip("'") - return fm, text[match.end():] + return fm, text[match.end() :] def _validate_draft(src: Path, name: str) -> None: diff --git a/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 b/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 index 662ea9a4..4a0829c4 100755 --- a/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 +++ b/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 @@ -60,7 +60,7 @@ def _parse_frontmatter(skill_md: Path) -> tuple[dict[str, str], str]: raise ValueError(f"{skill_md}: malformed frontmatter line: {line!r}") key, _, value = line.partition(":") fm[key.strip()] = value.strip().strip('"').strip("'") - return fm, text[match.end():] + return fm, text[match.end() :] def _validate_draft(src: Path, name: str) -> None: From cab53015a9cf30e5587827a537fec0ac65fe7b2e Mon Sep 17 00:00:00 2001 From: Vinod Muthusamy <vinodmut@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:02:04 -0500 Subject: [PATCH 4/8] fix(experiments): apply ruff format to skill_from_trajectory.py Fixes failing CI check: check-formatting (3.12) --- experiments/skill_from_trajectory.py | 39 +++++----------------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/experiments/skill_from_trajectory.py b/experiments/skill_from_trajectory.py index 9354ad3a..4456792f 100644 --- a/experiments/skill_from_trajectory.py +++ b/experiments/skill_from_trajectory.py @@ -94,10 +94,7 @@ def _run_sandbox_prompt_json(workspace: Path, prompt: str) -> tuple[subprocess.C workspace path before binding (see _docker_path). """ plugins = REPO_ROOT / "platform-integrations" / "claude" / "plugins" - command = ( - "claude --plugin-dir /plugins/evolve-lite/ --dangerously-skip-permissions --output-format json -p " - + shlex.quote(prompt) - ) + command = "claude --plugin-dir /plugins/evolve-lite/ --dangerously-skip-permissions --output-format json -p " + shlex.quote(prompt) cmd = ["docker", "run", "--rm"] for var in FORWARDED_ENV_VARS: if os.environ.get(var): @@ -174,14 +171,7 @@ def _tool_calls_summary(transcript_path: Path | None) -> list[dict]: if not isinstance(block, dict) or block.get("type") != "tool_use": continue inp = block.get("input") or {} - brief = ( - inp.get("command") - or inp.get("file_path") - or inp.get("skill") - or inp.get("path") - or inp.get("pattern") - or "" - ) + brief = inp.get("command") or inp.get("file_path") or inp.get("skill") or inp.get("path") or inp.get("pattern") or "" if isinstance(brief, str) and len(brief) > 200: brief = brief[:197] + "..." out.append({"tool": block.get("name"), "brief": brief}) @@ -219,10 +209,7 @@ def _seed_and_synthesize(tmp_root: Path, trial_idx: int, seed_keys: list[str]) - seed_traj_rel = "/".join(seed_transcript.relative_to(workspace).parts) print(f" [{label}] synthesize-skill...", flush=True) - synth_prompt = ( - f"Run /evolve-lite:synthesize-skill on the saved trajectory. " - f"The saved trajectory path is: {seed_traj_rel}" - ) + synth_prompt = f"Run /evolve-lite:synthesize-skill on the saved trajectory. The saved trajectory path is: {seed_traj_rel}" t1 = time.time() synth_proc, synth_parsed = _run_sandbox_prompt_json(workspace, synth_prompt) print(f" [{label}] synth done in {time.time() - t1:.0f}s rc={synth_proc.returncode}", flush=True) @@ -288,9 +275,7 @@ def _do_measure_run( print(f" [{label}] measure...", flush=True) t0 = time.time() pre_transcripts = ( - set((workspace / ".evolve" / "trajectories").glob("*.jsonl")) - if (workspace / ".evolve" / "trajectories").is_dir() - else set() + set((workspace / ".evolve" / "trajectories").glob("*.jsonl")) if (workspace / ".evolve" / "trajectories").is_dir() else set() ) proc, parsed = _run_sandbox_prompt_json(workspace, utterance) print(f" [{label}] done in {time.time() - t0:.0f}s rc={proc.returncode}", flush=True) @@ -486,9 +471,7 @@ def main() -> int: seed = _seed_and_synthesize(workspace_root, i, seed_keys) seeds.append(seed) # Persist progressively in case we crash mid-run. - (results_dir / "raw.json").write_text( - json.dumps({"seeds": seeds, "results": results}, indent=2, default=str) - ) + (results_dir / "raw.json").write_text(json.dumps({"seeds": seeds, "results": results}, indent=2, default=str)) if "error" in seed: print(f" [trial {i}] seed/synth FAILED: {seed['error']} — skipping measure runs") continue @@ -507,9 +490,7 @@ def main() -> int: run_result["utterance"] = utt_key run_result["trial"] = i results[cond][utt_key].append(run_result) - (results_dir / "raw.json").write_text( - json.dumps({"seeds": seeds, "results": results}, indent=2, default=str) - ) + (results_dir / "raw.json").write_text(json.dumps({"seeds": seeds, "results": results}, indent=2, default=str)) _save_synthesized_skills(seeds, results_dir) report_path = _write_report(results_dir, seeds, results, measure_keys, seed_keys) @@ -523,13 +504,7 @@ def main() -> int: if not args.keep_workspaces: shutil.rmtree(workspace_root, ignore_errors=True) - errors = [s for s in seeds if "error" in s] + [ - r - for cond in CONDITIONS - for u in measure_keys - for r in results[cond][u] - if "error" in r - ] + errors = [s for s in seeds if "error" in s] + [r for cond in CONDITIONS for u in measure_keys for r in results[cond][u] if "error" in r] if errors: print(f"\n{len(errors)} run(s) had errors — see raw.json") return 1 From 054a61635dfc8d5ed744504c66bc0a280b9acb5b Mon Sep 17 00:00:00 2001 From: Vinod Muthusamy <vinodmut@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:07:40 -0500 Subject: [PATCH 5/8] fix(experiments): silence mypy on sys.path-hack import The token_savings import goes through sys.path.insert so mypy can't resolve the module. Add `# type: ignore[import-not-found]` and explicitly annotate the wrapper return so the no-any-return error is also resolved. Fixes failing CI check: check-typing (3.12) --- experiments/skill_from_trajectory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experiments/skill_from_trajectory.py b/experiments/skill_from_trajectory.py index 4456792f..77d2a952 100644 --- a/experiments/skill_from_trajectory.py +++ b/experiments/skill_from_trajectory.py @@ -39,7 +39,7 @@ # Reuse helpers from the existing token-savings experiment. sys.path.insert(0, str(Path(__file__).resolve().parent)) -from token_savings import ( # noqa: E402 +from token_savings import ( # type: ignore[import-not-found] # noqa: E402 FORWARDED_ENV_VARS, REPO_ROOT, SANDBOX_IMAGE, @@ -53,7 +53,7 @@ def _extract_usage(parsed: dict | None) -> dict: """Extend the base extractor with total_cost_usd, which we report per-trial.""" - out = _extract_usage_base(parsed) + out: dict = _extract_usage_base(parsed) if parsed is not None: out["total_cost_usd"] = parsed.get("total_cost_usd") return out From 367bf89e1b300859becdbfe3db8e04bb762ef454 Mon Sep 17 00:00:00 2001 From: Vinod Muthusamy <vinodmut@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:01:51 -0500 Subject: [PATCH 6/8] fix(claude-plugin): tag SKILL.md fenced blocks with language identifiers Markdownlint MD040 wants every fenced code block to declare a language. The frontmatter example fence becomes ```yaml; the directory-tree fence becomes ```text. Edit the .j2 source so re-rendering propagates to all four platforms. Addresses CodeRabbit review findings: - "Add language identifier to fenced code block" (frontmatter) - "Add language identifier to fenced code block" (directory tree) - "Add language identifiers to fenced code blocks (root cause for deployed copies)" --- .../evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md | 4 ++-- .../evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md | 4 ++-- .../evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md | 4 ++-- .../evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md | 4 ++-- plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md index f7df57d7..c5d5db35 100644 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/SKILL.md @@ -71,7 +71,7 @@ Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: -``` +```yaml --- name: <kebab-case-name> description: <one-line task description> @@ -102,7 +102,7 @@ If the successful workflow used a non-trivial script (more than a one-liner), wr Common shape: -``` +```text .evolve/skills/<name>/ ├── SKILL.md └── scripts/ diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md index c9cf62fc..1ea2a172 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md @@ -72,7 +72,7 @@ Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: -``` +```yaml --- name: <kebab-case-name> description: <one-line task description> @@ -103,7 +103,7 @@ If the successful workflow used a non-trivial script (more than a one-liner), wr Common shape: -``` +```text .evolve/skills/<name>/ ├── SKILL.md └── scripts/ diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md index 8f466f42..368ec0be 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md @@ -71,7 +71,7 @@ Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: -``` +```yaml --- name: <kebab-case-name> description: <one-line task description> @@ -102,7 +102,7 @@ If the successful workflow used a non-trivial script (more than a one-liner), wr Common shape: -``` +```text .evolve/skills/<name>/ ├── SKILL.md └── scripts/ diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md index b1b0730c..80c482ef 100644 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/SKILL.md @@ -71,7 +71,7 @@ Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: -``` +```yaml --- name: <kebab-case-name> description: <one-line task description> @@ -102,7 +102,7 @@ If the successful workflow used a non-trivial script (more than a one-liner), wr Common shape: -``` +```text .evolve/skills/<name>/ ├── SKILL.md └── scripts/ diff --git a/plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 b/plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 index 5389b373..db4e02ec 100644 --- a/plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 +++ b/plugin-source/skills/evolve-lite/synthesize-skill/SKILL.md.j2 @@ -75,7 +75,7 @@ Before continuing, list `.evolve/skills/` (use the `Glob` tool, not `find` / `ls Author a SKILL.md with this exact frontmatter shape — the validator in Step 5 will reject it otherwise: -``` +```yaml --- name: <kebab-case-name> description: <one-line task description> @@ -106,7 +106,7 @@ If the successful workflow used a non-trivial script (more than a one-liner), wr Common shape: -``` +```text .evolve/skills/<name>/ ├── SKILL.md └── scripts/ From 17adb8aef14109278173dd5186fc1b40929fdfa4 Mon Sep 17 00:00:00 2001 From: Vinod Muthusamy <vinodmut@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:02:48 -0500 Subject: [PATCH 7/8] fix(claude-plugin): harden synthesize.py finalize against partial installs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two robustness fixes in cmd_finalize: 1. Wrap the _parse_frontmatter call in a try/except ValueError so malformed SKILL.md files exit cleanly (SystemExit with the parser's message) instead of bubbling up a traceback. 2. Pre-check both destinations (.evolve/skills/<name>/ and the platform-specific runtime mirror, when set) before performing any copy. Previously, if the runtime-mirror destination already existed and --force was off, evolve_dst would already have been written — leaving a partial install on disk. Refactor: extract _check_dest() (existence guard) from _copy_into() (actual write), and call _check_dest on both targets before either _copy_into. Also collapse the platform-specific _RUNTIME_MIRROR_DIR declaration to a single line so the rendered output matches ruff format directly (no post-render reformatting cycle). Addresses CodeRabbit review findings: - "Frontmatter parse errors escape as tracebacks instead of clean CLI errors" - "Pre-check both destinations before copying to avoid a partial install" - "Partial install on runtime-mirror failure (claude variant)" --- .../scripts/synthesize.py | 30 +++++++++++----- .../synthesize-skill/scripts/synthesize.py | 30 +++++++++++----- .../synthesize-skill/scripts/synthesize.py | 30 +++++++++++----- .../synthesize-skill/scripts/synthesize.py | 30 +++++++++++----- .../synthesize-skill/scripts/synthesize.py.j2 | 36 +++++++++++-------- 5 files changed, 106 insertions(+), 50 deletions(-) diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py index ad05e48d..6b56f31c 100644 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py @@ -72,7 +72,10 @@ def _validate_draft(src: Path, name: str) -> None: if not skill_md.is_file(): raise SystemExit(f"missing SKILL.md in {src}") - fm, body = _parse_frontmatter(skill_md) + try: + fm, body = _parse_frontmatter(skill_md) + except ValueError as exc: + raise SystemExit(f"SKILL.md frontmatter is malformed: {exc}") from exc if "name" not in fm or "description" not in fm: raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") if fm["name"] != name: @@ -91,10 +94,14 @@ def _resolve_workspace(arg: str | None) -> Path: return Path.cwd().resolve() -def _copy_into(src: Path, dst: Path, force: bool) -> None: +def _check_dest(dst: Path, force: bool) -> None: + """Reject the install if dst would block it; let _copy_into do the actual write.""" + if dst.exists() and not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + + +def _copy_into(src: Path, dst: Path) -> None: if dst.exists(): - if not force: - raise SystemExit(f"{dst} already exists (use --force to overwrite)") shutil.rmtree(dst) shutil.copytree(src, dst) @@ -115,12 +122,17 @@ def cmd_finalize(args: argparse.Namespace) -> int: _validate_draft(src, name) evolve_dst = workspace / ".evolve" / "skills" / name - _copy_into(src, evolve_dst, args.force) + runtime_dst: Path | None = workspace / _RUNTIME_MIRROR_DIR / name if _RUNTIME_MIRROR_DIR is not None else None - runtime_dst: Path | None = None - if _RUNTIME_MIRROR_DIR is not None: - runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name - _copy_into(src, runtime_dst, args.force) + # Pre-check both destinations before any copy so a blocked second + # write doesn't leave the first half of the install on disk. + _check_dest(evolve_dst, args.force) + if runtime_dst is not None: + _check_dest(runtime_dst, args.force) + + _copy_into(src, evolve_dst) + if runtime_dst is not None: + _copy_into(src, runtime_dst) audit_append( project_root=str(workspace), diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index 06a6f5b9..09a8d62d 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -72,7 +72,10 @@ def _validate_draft(src: Path, name: str) -> None: if not skill_md.is_file(): raise SystemExit(f"missing SKILL.md in {src}") - fm, body = _parse_frontmatter(skill_md) + try: + fm, body = _parse_frontmatter(skill_md) + except ValueError as exc: + raise SystemExit(f"SKILL.md frontmatter is malformed: {exc}") from exc if "name" not in fm or "description" not in fm: raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") if fm["name"] != name: @@ -91,10 +94,14 @@ def _resolve_workspace(arg: str | None) -> Path: return Path.cwd().resolve() -def _copy_into(src: Path, dst: Path, force: bool) -> None: +def _check_dest(dst: Path, force: bool) -> None: + """Reject the install if dst would block it; let _copy_into do the actual write.""" + if dst.exists() and not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + + +def _copy_into(src: Path, dst: Path) -> None: if dst.exists(): - if not force: - raise SystemExit(f"{dst} already exists (use --force to overwrite)") shutil.rmtree(dst) shutil.copytree(src, dst) @@ -115,12 +122,17 @@ def cmd_finalize(args: argparse.Namespace) -> int: _validate_draft(src, name) evolve_dst = workspace / ".evolve" / "skills" / name - _copy_into(src, evolve_dst, args.force) + runtime_dst: Path | None = workspace / _RUNTIME_MIRROR_DIR / name if _RUNTIME_MIRROR_DIR is not None else None - runtime_dst: Path | None = None - if _RUNTIME_MIRROR_DIR is not None: - runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name - _copy_into(src, runtime_dst, args.force) + # Pre-check both destinations before any copy so a blocked second + # write doesn't leave the first half of the install on disk. + _check_dest(evolve_dst, args.force) + if runtime_dst is not None: + _check_dest(runtime_dst, args.force) + + _copy_into(src, evolve_dst) + if runtime_dst is not None: + _copy_into(src, runtime_dst) audit_append( project_root=str(workspace), diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index ad05e48d..6b56f31c 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -72,7 +72,10 @@ def _validate_draft(src: Path, name: str) -> None: if not skill_md.is_file(): raise SystemExit(f"missing SKILL.md in {src}") - fm, body = _parse_frontmatter(skill_md) + try: + fm, body = _parse_frontmatter(skill_md) + except ValueError as exc: + raise SystemExit(f"SKILL.md frontmatter is malformed: {exc}") from exc if "name" not in fm or "description" not in fm: raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") if fm["name"] != name: @@ -91,10 +94,14 @@ def _resolve_workspace(arg: str | None) -> Path: return Path.cwd().resolve() -def _copy_into(src: Path, dst: Path, force: bool) -> None: +def _check_dest(dst: Path, force: bool) -> None: + """Reject the install if dst would block it; let _copy_into do the actual write.""" + if dst.exists() and not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + + +def _copy_into(src: Path, dst: Path) -> None: if dst.exists(): - if not force: - raise SystemExit(f"{dst} already exists (use --force to overwrite)") shutil.rmtree(dst) shutil.copytree(src, dst) @@ -115,12 +122,17 @@ def cmd_finalize(args: argparse.Namespace) -> int: _validate_draft(src, name) evolve_dst = workspace / ".evolve" / "skills" / name - _copy_into(src, evolve_dst, args.force) + runtime_dst: Path | None = workspace / _RUNTIME_MIRROR_DIR / name if _RUNTIME_MIRROR_DIR is not None else None - runtime_dst: Path | None = None - if _RUNTIME_MIRROR_DIR is not None: - runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name - _copy_into(src, runtime_dst, args.force) + # Pre-check both destinations before any copy so a blocked second + # write doesn't leave the first half of the install on disk. + _check_dest(evolve_dst, args.force) + if runtime_dst is not None: + _check_dest(runtime_dst, args.force) + + _copy_into(src, evolve_dst) + if runtime_dst is not None: + _copy_into(src, runtime_dst) audit_append( project_root=str(workspace), diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index ad05e48d..6b56f31c 100644 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -72,7 +72,10 @@ def _validate_draft(src: Path, name: str) -> None: if not skill_md.is_file(): raise SystemExit(f"missing SKILL.md in {src}") - fm, body = _parse_frontmatter(skill_md) + try: + fm, body = _parse_frontmatter(skill_md) + except ValueError as exc: + raise SystemExit(f"SKILL.md frontmatter is malformed: {exc}") from exc if "name" not in fm or "description" not in fm: raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") if fm["name"] != name: @@ -91,10 +94,14 @@ def _resolve_workspace(arg: str | None) -> Path: return Path.cwd().resolve() -def _copy_into(src: Path, dst: Path, force: bool) -> None: +def _check_dest(dst: Path, force: bool) -> None: + """Reject the install if dst would block it; let _copy_into do the actual write.""" + if dst.exists() and not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + + +def _copy_into(src: Path, dst: Path) -> None: if dst.exists(): - if not force: - raise SystemExit(f"{dst} already exists (use --force to overwrite)") shutil.rmtree(dst) shutil.copytree(src, dst) @@ -115,12 +122,17 @@ def cmd_finalize(args: argparse.Namespace) -> int: _validate_draft(src, name) evolve_dst = workspace / ".evolve" / "skills" / name - _copy_into(src, evolve_dst, args.force) + runtime_dst: Path | None = workspace / _RUNTIME_MIRROR_DIR / name if _RUNTIME_MIRROR_DIR is not None else None - runtime_dst: Path | None = None - if _RUNTIME_MIRROR_DIR is not None: - runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name - _copy_into(src, runtime_dst, args.force) + # Pre-check both destinations before any copy so a blocked second + # write doesn't leave the first half of the install on disk. + _check_dest(evolve_dst, args.force) + if runtime_dst is not None: + _check_dest(runtime_dst, args.force) + + _copy_into(src, evolve_dst) + if runtime_dst is not None: + _copy_into(src, runtime_dst) audit_append( project_root=str(workspace), diff --git a/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 b/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 index 4a0829c4..e8a4a4ec 100755 --- a/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 +++ b/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 @@ -72,7 +72,10 @@ def _validate_draft(src: Path, name: str) -> None: if not skill_md.is_file(): raise SystemExit(f"missing SKILL.md in {src}") - fm, body = _parse_frontmatter(skill_md) + try: + fm, body = _parse_frontmatter(skill_md) + except ValueError as exc: + raise SystemExit(f"SKILL.md frontmatter is malformed: {exc}") from exc if "name" not in fm or "description" not in fm: raise SystemExit(f"SKILL.md frontmatter must include `name` and `description` (got: {sorted(fm.keys())})") if fm["name"] != name: @@ -91,10 +94,14 @@ def _resolve_workspace(arg: str | None) -> Path: return Path.cwd().resolve() -def _copy_into(src: Path, dst: Path, force: bool) -> None: +def _check_dest(dst: Path, force: bool) -> None: + """Reject the install if dst would block it; let _copy_into do the actual write.""" + if dst.exists() and not force: + raise SystemExit(f"{dst} already exists (use --force to overwrite)") + + +def _copy_into(src: Path, dst: Path) -> None: if dst.exists(): - if not force: - raise SystemExit(f"{dst} already exists (use --force to overwrite)") shutil.rmtree(dst) shutil.copytree(src, dst) @@ -104,11 +111,7 @@ def _copy_into(src: Path, dst: Path, force: bool) -> None: # automatically. Set to None where the platform doesn't have a runtime # skills directory; only the canonical `.evolve/skills/<name>/` write # happens in that case. -{%- if platform == "claude" %} -_RUNTIME_MIRROR_DIR: str | None = ".claude/skills" -{%- else %} -_RUNTIME_MIRROR_DIR: str | None = None -{%- endif %} +_RUNTIME_MIRROR_DIR: str | None = {% if platform == "claude" %}".claude/skills"{% else %}None{% endif %} def cmd_finalize(args: argparse.Namespace) -> int: @@ -119,12 +122,17 @@ def cmd_finalize(args: argparse.Namespace) -> int: _validate_draft(src, name) evolve_dst = workspace / ".evolve" / "skills" / name - _copy_into(src, evolve_dst, args.force) + runtime_dst: Path | None = workspace / _RUNTIME_MIRROR_DIR / name if _RUNTIME_MIRROR_DIR is not None else None - runtime_dst: Path | None = None - if _RUNTIME_MIRROR_DIR is not None: - runtime_dst = workspace / _RUNTIME_MIRROR_DIR / name - _copy_into(src, runtime_dst, args.force) + # Pre-check both destinations before any copy so a blocked second + # write doesn't leave the first half of the install on disk. + _check_dest(evolve_dst, args.force) + if runtime_dst is not None: + _check_dest(runtime_dst, args.force) + + _copy_into(src, evolve_dst) + if runtime_dst is not None: + _copy_into(src, runtime_dst) audit_append( project_root=str(workspace), From 2e1b86cfbead84c800612029fbb0b14da325491a Mon Sep 17 00:00:00 2001 From: Vinod Muthusamy <vinodmut@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:27:40 -0500 Subject: [PATCH 8/8] fix(claude-plugin): adapt synthesize.py walker to the namespaced lib/ PR #258 moved each platform's shared lib from `lib/` to `lib/evolve-lite/`. My _lib walker was looking for `lib/audit.py` and `evolve-lib/audit.py` and would fail to find the helpers post-merge. Update the walker to match the simplified pattern used by save_entities and other recall scripts: a single `lib/evolve-lite/` candidate. --- .../evolve-lite-synthesize-skill/scripts/synthesize.py | 8 +++----- .../evolve-lite/synthesize-skill/scripts/synthesize.py | 8 +++----- .../evolve-lite/synthesize-skill/scripts/synthesize.py | 8 +++----- .../evolve-lite/synthesize-skill/scripts/synthesize.py | 8 +++----- .../evolve-lite/synthesize-skill/scripts/synthesize.py.j2 | 8 +++----- 5 files changed, 15 insertions(+), 25 deletions(-) diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py index 6b56f31c..80cce7d1 100644 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-synthesize-skill/scripts/synthesize.py @@ -26,11 +26,9 @@ _script = Path(__file__).resolve() _lib = None for _ancestor in _script.parents: - for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): - if (_candidate / "audit.py").is_file(): - _lib = _candidate - break - if _lib is not None: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "audit.py").is_file(): + _lib = _candidate break if _lib is None: raise ImportError(f"Cannot find plugin lib directory above {_script}") diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index 09a8d62d..05b642eb 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -26,11 +26,9 @@ _script = Path(__file__).resolve() _lib = None for _ancestor in _script.parents: - for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): - if (_candidate / "audit.py").is_file(): - _lib = _candidate - break - if _lib is not None: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "audit.py").is_file(): + _lib = _candidate break if _lib is None: raise ImportError(f"Cannot find plugin lib directory above {_script}") diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index 6b56f31c..80cce7d1 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -26,11 +26,9 @@ _script = Path(__file__).resolve() _lib = None for _ancestor in _script.parents: - for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): - if (_candidate / "audit.py").is_file(): - _lib = _candidate - break - if _lib is not None: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "audit.py").is_file(): + _lib = _candidate break if _lib is None: raise ImportError(f"Cannot find plugin lib directory above {_script}") diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py index 6b56f31c..80cce7d1 100644 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/synthesize-skill/scripts/synthesize.py @@ -26,11 +26,9 @@ _script = Path(__file__).resolve() _lib = None for _ancestor in _script.parents: - for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): - if (_candidate / "audit.py").is_file(): - _lib = _candidate - break - if _lib is not None: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "audit.py").is_file(): + _lib = _candidate break if _lib is None: raise ImportError(f"Cannot find plugin lib directory above {_script}") diff --git a/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 b/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 index e8a4a4ec..755691de 100755 --- a/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 +++ b/plugin-source/skills/evolve-lite/synthesize-skill/scripts/synthesize.py.j2 @@ -26,11 +26,9 @@ from pathlib import Path _script = Path(__file__).resolve() _lib = None for _ancestor in _script.parents: - for _candidate in (_ancestor / "lib", _ancestor / "evolve-lib"): - if (_candidate / "audit.py").is_file(): - _lib = _candidate - break - if _lib is not None: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "audit.py").is_file(): + _lib = _candidate break if _lib is None: raise ImportError(f"Cannot find plugin lib directory above {_script}")