From f410309e3eb7e3023df3f682c447de176af8ca71 Mon Sep 17 00:00:00 2001 From: Thomas Thornton Date: Tue, 19 May 2026 21:01:06 +0100 Subject: [PATCH] feat: add skill creator functionality with scaffolding and validation scripts --- .github/skills/skill-creator/SKILL.md | 340 ++++++++++++++++++ .../references/output-patterns.md | 108 ++++++ .../references/quality-checklist.md | 85 +++++ .../skill-creator/references/workflows.md | 184 ++++++++++ .../skill-creator/scripts/init_skill.py | 154 ++++++++ .../skill-creator/scripts/quick_validate.py | 207 +++++++++++ README.md | 1 + 7 files changed, 1079 insertions(+) create mode 100644 .github/skills/skill-creator/SKILL.md create mode 100644 .github/skills/skill-creator/references/output-patterns.md create mode 100644 .github/skills/skill-creator/references/quality-checklist.md create mode 100644 .github/skills/skill-creator/references/workflows.md create mode 100644 .github/skills/skill-creator/scripts/init_skill.py create mode 100644 .github/skills/skill-creator/scripts/quick_validate.py diff --git a/.github/skills/skill-creator/SKILL.md b/.github/skills/skill-creator/SKILL.md new file mode 100644 index 0000000..8b65379 --- /dev/null +++ b/.github/skills/skill-creator/SKILL.md @@ -0,0 +1,340 @@ +--- +name: skill-creator +description: Create, update, review, and validate GitHub Copilot agent skills (SKILL.md files). Use this skill whenever someone wants to create a new skill, build a skill from scratch, package domain knowledge into a reusable agent skill, turn a workflow into a skill, or asks "how do I teach Copilot to do X consistently". Also use when updating or improving an existing SKILL.md, writing the description field for better triggering, or designing the folder structure and bundled resources for a skill. Do NOT use for general coding questions, runtime debugging, or MCP server configuration. +metadata: + author: Thomas Thornton + version: "1.0.0" + last-updated: "2026-05-19" +--- + +# Skill Creator + +Create high-quality GitHub Copilot agent skills (SKILL.md) that transform the general-purpose agent into a specialized expert. + +## What Is a Skill + +A skill is a modular, self-contained knowledge package stored in `.github/skills//SKILL.md`. It provides: + +- **Specialized workflows** — Multi-step procedures for a specific domain +- **Domain expertise** — Company-specific schemas, patterns, business logic +- **Tool integrations** — Instructions for APIs, file formats, or services +- **Bundled resources** — Scripts, reference docs, and templates for reuse + +Skills load in three levels (progressive disclosure): + +1. **Metadata** (`name` + `description`) — Always in context (~100 words). This is the trigger mechanism. +2. **SKILL.md body** — Loaded when the skill triggers. Keep under 500 lines. +3. **Bundled resources** — Loaded on demand (scripts/, references/, assets/). + +## Skill Anatomy + +``` +skill-name/ +├── SKILL.md (required) +│ ├── YAML frontmatter name + description (required) +│ └── Markdown instructions +└── Bundled Resources (optional) + ├── scripts/ Executable code for deterministic/repetitive tasks + ├── references/ Docs loaded into context as needed + └── assets/ Output files (templates, icons, boilerplate) +``` + +**Do NOT include:** setup guides, changelogs, or user-facing documentation — skills are instructions for the agent, not onboarding docs for humans. + +--- + +## Is a Skill the Right Vehicle? + +Not every workflow needs a skill. Before creating one, choose the right tool: + +| Situation | Use instead | +|---|---| +| A rule that applies to ALL Copilot interactions in this repo | `copilot-instructions.md` entry | +| A rule scoped to specific file types (e.g., always use `kebab-case` for Bicep variable names) | `.github/instructions/*.instructions.md` with `applyTo` glob | +| A one-off prompt you run occasionally | `.github/prompts/*.prompt.md` | +| A multi-step workflow with domain knowledge that benefits from on-demand loading | **Skill** | +| A complex workflow with a dedicated agent persona | `.github/agents/*.agent.md` | + +A skill is the right choice when: the workflow is too detailed for `copilot-instructions.md`, it should only load for relevant requests (not every conversation), and it encapsulates reusable domain knowledge or a repeatable process. + +If the user's need is better served by a simpler vehicle, say so and use that instead. Not everything needs to be a skill. + +--- + +## Creation Process + +### Phase 1 — Discovery + +Understand the problem before writing a single line. Ask conversationally: + +- What workflow do you want to make consistent? Walk through the steps you do today. +- What goes wrong without the skill? (Inconsistency, forgotten steps, repeated explanation, wrong outputs) +- Who will use this skill? (Just you, your team, public) +- What tools or services are involved? + +Collect 2–3 concrete use cases. For each, capture: + +``` +Trigger: What the user says/does +Steps: Sequence of actions +Tools: Built-in or MCP tools needed +Result: What success looks like (specific output) +``` + +Exit criteria: 2–3 use cases defined, success criteria agreed, tools/dependencies identified. + +### Phase 2 — Architecture + +Make structural decisions before writing: + +1. **Choose the primary pattern** — Sequential workflow, iterative refinement, domain-specific intelligence, or multi-tool coordination. Read [references/workflows.md](references/workflows.md) for structure templates and the pattern-selection guide — load it now if you are unsure which pattern fits. +2. **Plan the folder structure** — Only add `scripts/`, `references/`, or `assets/` when there is a clear reason: + - Same code rewritten repeatedly → `scripts/` + - Reference material > ~100 lines → `references/` + - Output uses templates/images → `assets/` +3. **Draft the description** — This is the most important piece. See [Writing the Description](#writing-the-description) below. +4. **Map content to disclosure levels** — What goes in SKILL.md body vs. reference files? + +Exit criteria: Pattern selected, folder structure planned, description drafted, content mapped. + +### Phase 3 — Craft + +Write SKILL.md with precision. + +**Frontmatter rules:** + +```yaml +--- +name: kebab-case-name +description: [What + When + optional Not-when — single line, under 200 words] +--- +``` + +- `name`: kebab-case only, matches the folder name exactly +- `description`: primary trigger mechanism — include trigger phrases, what it does, what it does NOT do +- No other frontmatter fields needed + +**Body writing guidelines:** + +- Use imperative form: "Search for...", "Create the...", "Validate..." +- Explain WHY behind instructions rather than just MUST/NEVER +- Include 2–3 realistic examples of user inputs and expected outputs +- Put critical instructions at the top, not buried in the middle +- Never wrap prose lines at arbitrary column widths — let paragraphs flow naturally +- Reference bundled files clearly and state exactly WHEN the agent should read them +- **Write for coexistence** — this skill loads alongside other skills in `.github/skills/`. Never assume it is the only skill in context. Avoid generic section headings like "## Overview" that could conflict, and don't claim to handle tasks that belong to another skill in this repo. + +Read [references/output-patterns.md](references/output-patterns.md) for patterns on specifying output format (Template, Examples, Scope Communication, Validation Gate) — load it when deciding how to structure the skill's output expectations or examples. + +### Phase 4 — Validate + +Run the automated validator first: + +```bash +python .github/skills/skill-creator/scripts/quick_validate.py .github/skills/ +``` + +Then work through the full [references/quality-checklist.md](references/quality-checklist.md) for description quality scoring, instruction quality scoring, trigger testing, and final sign-off. + +**Quick structure checks** (also caught by the script): +- [ ] SKILL.md exists with correct casing (not skill.md or SKILL.MD) +- [ ] Frontmatter has `name` and `description`, correct YAML delimiters (`---`) +- [ ] Folder name is kebab-case matching `name` field +- [ ] No README.md or extra docs in the skill folder +- [ ] Description does NOT contain XML angle brackets `< >` + +**Trigger checks** — propose 3–5 test phrases and verify mentally: +- Should trigger: obvious requests, paraphrased versions, informal requests +- Should NOT trigger: unrelated topics, tasks better handled by other skills + +**Quality checks:** +- [ ] Every instruction is unambiguous — an agent reading it fresh can follow it without guessing +- [ ] Examples are realistic and complete +- [ ] Referenced files have clear load conditions stated in SKILL.md +- [ ] SKILL.md body is under 500 lines + +### Phase 5 — Deliver + +If starting a new skill from scratch, scaffold the folder first: + +```bash +python .github/skills/skill-creator/scripts/init_skill.py +``` + +This creates the folder and a template SKILL.md with TODO placeholders. Then fill in the skill content and run the validator before presenting to the user. + +Place the completed skill at `.github/skills//SKILL.md`. + +Present a brief summary: +- What the skill does +- Suggested test phrase to try first +- Any bundled resources and when they load + +--- + +## Writing the Description + +The `description` field is the primary mechanism that determines whether Copilot invokes the skill. A well-written description is specific, includes trigger phrases, and leans slightly "pushy" — agents tend to undertrigger. + +**Structure:** `[What it does] + [When to use it — include actual phrases users would say] + [What NOT to use it for if overlap risk exists]` + +**Good example:** +``` +Analyze Azure architectures for cost optimization opportunities and provide savings recommendations. Use when reviewing Azure spending, asked to reduce costs, optimize resources, right-size VMs, or find savings across subscriptions. Do NOT use for general architecture design (use architecture-design skill instead). +``` + +**Bad example:** +``` +Helps with Azure cost analysis. +``` + +**Rules:** +- Include actual phrases users would say, including variations ("create skill", "build a skill", "turn this into a skill", "teach Copilot to do X") +- Include relevant file types or formats if applicable +- Add negative triggers (`Do NOT use for...`) when overlap with other skills is likely +- Keep under 200 words — it loads in every conversation + +--- + +## Progressive Disclosure Patterns + +### Pattern 1 — High-level guide with references + +Keep core workflow in SKILL.md; move detailed docs to `references/`: + +```markdown +## Advanced Features +- **Streaming**: See [references/streaming.md](references/streaming.md) — read when user requests real-time output +- **Error handling**: See [references/errors.md](references/errors.md) — read when diagnosing failures +``` + +### Pattern 2 — Domain-specific organization + +Organize by domain to avoid loading irrelevant context. When a skill spans multiple environments or tool variants, keep only the selection logic in SKILL.md and move per-variant detail into separate files: + +``` +iac-generator/ +├── SKILL.md (format selection + shared standards) +└── references/ + ├── bicep.md (read when user chooses Bicep) + ├── terraform.md (read when user chooses Terraform) + ├── arm.md (read when user chooses ARM templates) + └── pulumi.md (read when user chooses Pulumi) +``` + +Another example — an APIM skill covering multiple environments: + +``` +apim-deployment/ +├── SKILL.md (shared pipeline and APIOps steps) +└── references/ + ├── github-actions.md (read when deploying via GitHub Actions) + └── azure-devops.md (read when deploying via Azure DevOps) +``` + +### Pattern 3 — Conditional details + +Show core content, reference advanced content: + +```markdown +## Basic usage +[Core instructions here] + +## Advanced: tracked changes +See [references/redlining.md](references/redlining.md) — read only for tracked-change workflows. +``` + +**Key rule:** Keep reference links one level deep from SKILL.md. For files over 100 lines, add a table of contents at the top. + +--- + +## Bundled Resources Guide + +### scripts/ + +Use when the same code is written repeatedly across invocations, or when deterministic reliability is critical. + +- Token-efficient: scripts can be executed without loading into context +- Test scripts by actually running them — don't assume they work +- This skill bundles: `scripts/init_skill.py` (scaffold a new skill folder from template) and `scripts/quick_validate.py` (validate structure, frontmatter, and body against this repo's conventions) + +### references/ + +Use for domain knowledge, API specs, schemas, or detailed guides that exceed what fits cleanly in SKILL.md. + +- Load only when needed — always state the condition in SKILL.md +- Avoid duplicating content between SKILL.md and reference files +- This skill bundles: `references/workflows.md` (workflow pattern templates — read during Phase 2), `references/output-patterns.md` (output formatting patterns — read during Phase 3), and `references/quality-checklist.md` (pre-delivery quality checks — read during Phase 4) + +### assets/ + +Use for files that appear in the output Claude produces (not loaded into context, but used in final output). + +- Example: `assets/template.docx`, `assets/logo.png`, `assets/hello-world/` + +--- + +## Anti-Patterns + +| Anti-pattern | Why it hurts | +|---|---| +| Vague description ("helps with X") | Undertriggering — Copilot won't invoke the skill | +| "When to use" in the body | Body only loads AFTER triggering — too late | +| Wall-of-text instructions | Agent skims and misses critical steps | +| No examples | Agents need concrete input/output pairs | +| README.md inside skill folder | Clutter — agents don't need meta-docs | +| Hardcoded credentials in scripts | Security risk | +| Deeply nested references | Increases cognitive load; keep one level deep | +| SKILL.md over 500 lines | Context bloat on every invocation | +| Overly rigid MUST/NEVER rules | Explain the WHY instead; agents respond better | + +--- + +## Extracting a Skill from an Existing Conversation + +When the user says "turn this into a skill" or "capture what we just did", the conversation history is the primary source — mine it before asking a single question. + +**Step 1 — Mine the history first.** Read back through the conversation and extract: +- Every tool invoked and in what order +- Each correction or course-change the user made (these are the most valuable signal — they reveal where a naive agent would go wrong) +- Inputs provided and the final output format the user accepted +- Anything the agent had to discover or infer mid-conversation that a fresh agent starting cold would not know + +**Step 2 — Identify what's missing.** After mining, you'll have gaps. Common ones: +- Trigger phrases: what would someone say to invoke this workflow? +- Edge cases: what variations of the input exist that the conversation didn't cover? +- Success criteria: how does the user know the output is correct? +- Scope boundaries: what should this skill explicitly NOT handle? + +**Step 3 — Ask only targeted gap-filling questions.** Don't dump a full Discovery interview on the user — they just finished the work and want it captured. Ask one or two focused questions maximum, with a suggested default for each: "I'm going to use X as the trigger phrase — does that sound right, or would you phrase it differently?" + +**Step 4 — Confirm before writing.** Present a concise summary of what you're going to encode: the workflow steps, the trigger phrases, the success criteria, any bundled resources you plan to create. Get a yes before writing the SKILL.md. + +**Step 5 — Generalise, don't transcribe.** The biggest risk here is writing a skill that only works for the exact example in the conversation. Before writing each instruction, ask: "If a different user gives a slightly different input, does this still hold?" Strip out specifics (file names, literal values, one-off workarounds) unless they're universally needed. Encode the pattern, not the instance. + +**Step 6 — Proceed through Phases 2–5** — architecture, craft, validate, deliver as normal. + +--- + +## Updating an Existing Skill + +When improving rather than creating: + +1. Preserve the original `name` field and folder name — do not rename +2. Identify what failed: wrong triggers, missing steps, incorrect outputs, outdated info +3. Edit the minimum necessary — don't refactor surrounding content +4. Generalize from failures rather than adding narrow fixes (avoid overfitting to one example) +5. Re-validate trigger phrases after editing the description + +--- + +## Principles for Writing Effective Instructions + +Drawn from AI coding agent best practices: + +- **Smallest change that works** — don't add steps or context that don't pull their weight +- **Explain the why** — "Use `DefaultAzureCredential` so credentials are never hardcoded" beats "ALWAYS use `DefaultAzureCredential`" +- **Concrete over abstract** — give exact file paths, command names, expected outputs +- **Prove it works** — after writing a skill, mentally walk through it with a real user prompt +- **Incremental delivery** — if a skill is complex, split it with clear references rather than one monolithic file +- **Read before write** — if the skill involves a codebase or service, instruct the agent to locate existing patterns first diff --git a/.github/skills/skill-creator/references/output-patterns.md b/.github/skills/skill-creator/references/output-patterns.md new file mode 100644 index 0000000..dffbfc5 --- /dev/null +++ b/.github/skills/skill-creator/references/output-patterns.md @@ -0,0 +1,108 @@ +# Output Patterns + +Patterns for producing consistent, structured output in GitHub Copilot agent skills. +Read this during Phase 3 — Craft — when deciding how to specify output format or examples in the skill you are writing. + +--- + +## Template Pattern + +Use when the skill must produce output in a predictable shape — config files, policy XML, IaC templates, structured reports, etc. + +Distinguish between strict (must match exactly) and flexible (sensible default, adapt as needed): + +**Strict — use ALWAYS language:** + +```markdown +## Output format + +ALWAYS use this exact structure. Do not deviate. + +[Insert literal template here, with placeholder tokens like {resource_name}] + +If the user's request cannot fit the template, ask for clarification rather than improvising a different format. +``` + +**Flexible — use "sensible default" language:** + +```markdown +## Output format + +Default structure below; adapt sections based on what the request requires: + +[Insert default structure here] + +Add or remove sections as the specific context warrants. The goal is a useful document, not strict adherence to the template. +``` + +**When to use which:** Strict belongs in skills that produce machine-consumed output (IaC files, API policies, CI/CD pipeline YAML). Flexible belongs in skills that produce human-read output (architecture designs, reports, meeting summaries). + +--- + +## Examples Pattern + +Use when the skill's output quality depends on matching a style or level of detail that is hard to describe in prose alone. Showing is almost always clearer than describing. + +Provide 2–3 input/output pairs that represent the range of requests the skill will receive: + +```markdown +## Examples + +### Example 1: [Representative request type] +User says: "[realistic user message]" + +Output: +[concrete, realistic output — not a placeholder like "result goes here"] + +### Example 2: [Different request type or edge case] +User says: "[slightly different request]" + +Output: +[concrete output] + +### What NOT to produce +User says: "[request that is out of scope or often confused with this skill]" +→ Do not [specific wrong output]. Instead, [correct response]. +``` + +The "what NOT to produce" example is especially useful when the skill sits near other skills with similar trigger phrases. + +--- + +## Scope Communication Pattern + +Use when the skill needs to handle out-of-scope requests gracefully rather than silently producing wrong output. Include an explicit boundary statement near the top of the skill: + +```markdown +## Scope + +This skill handles: [specific list of what it covers] + +If the request falls outside this scope, state clearly what you can and cannot do: +- "This is a [X] question, not a [Y] question — [suggest appropriate approach or skill]." +- "Generating [format] is outside this skill. The [other-skill] skill handles that." + +Never silently produce output for a request type the skill was not designed for. +``` + +This prevents the skill from confidently generating incorrect output when it encounters an edge case it wasn't designed for. + +--- + +## Validation Gate Pattern + +Use for skills with workflows that should check prerequisites before starting work—particularly any skill that creates or modifies files, calls external services, or produces output that is hard to undo. + +```markdown +## Before starting + +Verify the following before writing any output: + +1. [Prerequisite 1] — if missing or unclear: ask "[specific question]" before proceeding +2. [Prerequisite 2] — if missing: state "[what is needed and why]" +3. [Prerequisite 3] — if ambiguous: offer a sensible default and confirm + +Only proceed once all prerequisites are confirmed or the user has acknowledged the default. +``` + +The questions should be targeted — don't dump a discovery interview on the user. One or two focused questions with suggested defaults get answers faster than an open-ended list. diff --git a/.github/skills/skill-creator/references/quality-checklist.md b/.github/skills/skill-creator/references/quality-checklist.md new file mode 100644 index 0000000..d1f804d --- /dev/null +++ b/.github/skills/skill-creator/references/quality-checklist.md @@ -0,0 +1,85 @@ +# Quality Checklist + +Use at the end of Phase 4 — Validate — before delivering any skill. + +--- + +## Structural Checks (Pass / Fail) + +Hard requirements. Fix every failure before delivery. + +- [ ] Folder name is kebab-case (lowercase letters, digits, and hyphens only — no spaces, underscores, or capitals) +- [ ] `SKILL.md` exists with exact casing (not `skill.md`, `Skill.md`, or `SKILL.MD`) +- [ ] YAML frontmatter is present with `---` as both opening and closing delimiter +- [ ] `name` field is present and kebab-case +- [ ] `name` value matches the folder name exactly +- [ ] `description` field is present +- [ ] `description` is under 1024 characters +- [ ] `description` contains no XML angle brackets (`<` or `>`) +- [ ] No `README.md` or `CHANGELOG.md` inside the skill folder + +Run `scripts/quick_validate.py` to check these automatically. + +--- + +## Description Quality (Score 1–5, target 4+ on all) + +| Dimension | 1 (poor) | 5 (excellent) | +|---|---|---| +| **Specificity** | "Helps with things" | Names exact workflows, file types, or tools | +| **Trigger clarity** | Vague — agent can't tell when to load it | Includes the actual phrases a user would type | +| **User language** | Internal jargon or technical terms | Words a user would naturally say in a chat | +| **Scope boundary** | No boundary stated | Explicit "Do NOT use for..." where overlap exists | +| **Assertiveness** | Passive ("can be used to…") | Direct ("Use when…", "Use for…") | + +A description scoring below 4 on trigger clarity will undertrigger — the agent simply won't invoke the skill when it should. + +--- + +## Instruction Quality (Score 1–5, target 4+ on all) + +| Dimension | 1 (poor) | 5 (excellent) | +|---|---|---| +| **Actionability** | Vague direction ("validate properly") | Exact command, path, or tool call | +| **Examples** | None | 2–3 concrete examples with realistic inputs and outputs | +| **Error handling** | Silent on failure | Specific failure modes named with recovery steps | +| **Progressive disclosure** | Wall of text in SKILL.md | Focused body, heavy detail in `references/` | +| **Coexistence** | Claims ownership of broad tasks | Scoped clearly; no overlap with other skills in the repo | + +--- + +## Trigger Testing + +Test the description before delivery. Propose 3–5 phrases for each category and verify mentally whether the skill would load. + +### Should trigger +1. [ ] `[Most obvious phrasing of the request]` → triggers? Y / N +2. [ ] `[Paraphrased version]` → triggers? Y / N +3. [ ] `[Informal or abbreviated version]` → triggers? Y / N +4. [ ] `[Version that mentions a specific file type or tool]` → triggers? Y / N + +### Should NOT trigger +1. [ ] `[Clearly unrelated task]` → stays silent? Y / N +2. [ ] `[Task that belongs to a different skill in this repo]` → stays silent? Y / N +3. [ ] `[Generic question the default agent should handle]` → stays silent? Y / N + +If a "should NOT trigger" phrase would activate this skill, the description is too broad. Add a negative scope clause or narrow the trigger wording. + +--- + +## Bundled Resources Check + +If the skill includes `references/`, `scripts/`, or `assets/`: + +- [ ] Every file in `references/` is linked from the SKILL.md body with a clear "read when..." condition +- [ ] Every script in `scripts/` has been run locally at least once (don't assume it works) +- [ ] No bundled file duplicates content already in the SKILL.md body + +--- + +## Final Sign-Off + +- [ ] User has reviewed and confirmed the skill captures their intent correctly +- [ ] Test phrases above produce expected triggering behaviour +- [ ] SKILL.md body line count is under 500 +- [ ] Skill is placed at `.github/skills//SKILL.md` and is discoverable via the skills index diff --git a/.github/skills/skill-creator/references/workflows.md b/.github/skills/skill-creator/references/workflows.md new file mode 100644 index 0000000..29ced08 --- /dev/null +++ b/.github/skills/skill-creator/references/workflows.md @@ -0,0 +1,184 @@ +# Workflow Patterns + +Use during Phase 2 — Architecture — when deciding how to structure a skill's instructions. + +Five patterns cover most real skills. Most complex skills combine a primary and one or two secondary patterns. + +--- + +## 1. Sequential Workflow + +**Choose when:** The task has a natural linear order where each step produces input for the next. Skipping or reordering steps would break the outcome. + +**Signals:** User says "do X, then Y"; steps share data; there is a clear start and end state. + +**Structure:** + +Include an overview near the top of SKILL.md so the agent knows the full shape of the work before starting step 1: + +```markdown +This workflow involves four steps: +1. [Step name] — [one-line description] +2. [Step name] — [one-line description] +3. [Step name] — [one-line description] +4. [Step name] — [one-line description] +``` + +Then give each step its own section: + +```markdown +### Step 1 — [Name] +[What to do] +Validation: [how to know this step succeeded before moving on] +On failure: [what to do rather than silently continuing] +``` + +**Watch out for:** Missing on-failure handling (what if step 3 fails after steps 1–2 completed?); rigid ordering when some steps could safely run in parallel. + +--- + +## 2. Conditional / Branching Workflow + +**Choose when:** The right path depends on a property of the input — file type, tool available, environment, or the user's answer to a question. + +**Signals:** "Handle X differently from Y"; "if the user has Z available, do A, otherwise do B". + +**Structure:** + +```markdown +### Identify the input type +- [Type A]: follow the [Name A] path below +- [Type B]: follow the [Name B] path below +- [Neither]: state clearly that this falls outside the skill's scope and suggest an alternative + +### [Name A] path +[Steps] + +### [Name B] path +[Steps] +``` + +Be explicit about what triggers each branch. State the fallback — never leave the agent to guess what to do with an unexpected input. + +**Watch out for:** Overlapping branch conditions (two branches could match the same input); missing catchall for unexpected cases. + +--- + +## 3. Iterative Refinement + +**Choose when:** Output quality improves through review-and-fix cycles. First drafts are usually close but not right; explicit criteria exist to check against. + +**Signals:** The skill produces documents, designs, or generated content that needs review; quality is multi-dimensional; users historically ask for revisions after first output. + +**Structure:** + +```markdown +### Draft +Generate initial output based on gathered requirements. + +### Review +Check against these criteria: +- [Criterion 1]: [how to verify — must be checkable, not vague] +- [Criterion 2]: [how to verify] + +### Refine +For each issue found: identify the specific problem → fix it → re-check that criterion. + +### Stop when +- All criteria pass, OR +- 3 iterations completed (returns diminish after this), OR +- User explicitly approves the output +``` + +The stopping condition is mandatory — without it the loop runs indefinitely or the agent decides arbitrarily when to stop. + +**Watch out for:** Vague criteria ("make it better" is not a criterion — "description is under 1024 characters" is); no stopping condition. + +--- + +## 4. Multi-Tool Coordination + +**Choose when:** The workflow crosses tool or service boundaries. Data must flow from one tool into a second, possibly into a third. The overall result depends on all phases completing successfully. + +**Signals:** "Get data from X, update Y, then notify Z"; involves multiple MCP servers or file system + API combinations; partial completion has consequences. + +**Structure:** + +```markdown +### Phase 1 — [Service / tool name] +1. [Action] +2. [Action] +Output: [exactly what this phase produces, named clearly] + +### Phase 2 — [Service / tool name] +Input: [output from Phase 1] +1. [Action using Phase 1 data] +2. [Action] +Output: [what this phase produces] + +### Phase 3 — [Service / tool name] +Input: [output from Phase 2] +[Steps] + +### Error handling +- If Phase 1 fails: [action] +- If Phase 2 fails after Phase 1 succeeded: [action — Phase 1 may need to be rolled back or flagged] +``` + +Make data flow explicit. State exactly what output from Phase N becomes input to Phase N+1. Always handle partial failure — "Phase 2 succeeded but Phase 3 failed" is a real state that needs an instruction. + +**Watch out for:** Assuming all tools/MCPs are available; tight coupling where any phase failure silently corrupts output; not cleaning up if a later phase fails. + +--- + +## 5. Domain Intelligence + +**Choose when:** The skill's value is specialised knowledge — compliance rules, security standards, quality criteria — not just procedure execution. The agent needs to *reason from rules*, not just follow steps. + +**Signals:** "Apply our standards"; getting it wrong has consequences; rules must be enforced before action rather than checked after; users benefit from the skill's expertise more than its automation. + +**Structure:** + +```markdown +### Pre-checks +Before producing any output, verify: +1. [Rule 1]: [how to check] — if it fails: [stop and explain, don't continue] +2. [Rule 2]: [how to check] — if it fails: [specific guidance] + +Only proceed if all pre-checks pass. + +### [Main action] +[Steps, with the domain rules embedded as constraints] + +### Reasoning +For each significant decision, state why: "[choice] because [rule or constraint it satisfies]." +This makes the output reviewable and the reasoning auditable. +``` + +**Watch out for:** Hardcoding rules that change frequently — reference an external doc instead; not explaining the reasoning behind enforcement (agents and users both respond better to *why* than to *MUST*). + +--- + +## Choosing a Pattern + +| Signal from the workflow | Primary pattern | +|---|---| +| Steps depend on each other in order | Sequential | +| Different inputs need different paths | Conditional | +| Output improves through review cycles | Iterative Refinement | +| Workflow crosses multiple tools or services | Multi-Tool Coordination | +| Expert rules must be applied before action | Domain Intelligence | +| Some steps are independent of each other | Sequential with parallel notes | +| "It depends" comes up frequently in discovery | Conditional | +| User asks for revisions after first output | Iterative Refinement | + +## Combining Patterns + +Most real skills use one primary pattern and one or two secondary patterns within specific steps: + +- **Sequential + Domain Intelligence:** Follow steps in order, but embed expert pre-checks at the steps with consequences. +- **Conditional + Sequential:** Choose the right path first, then follow a sequential workflow along that path. +- **Multi-Tool + Iterative:** Coordinate across services, then refine the combined output. +- **Domain Intelligence + Iterative:** Apply rules to generate the first draft, then review against those same rules and refine. + +Identify the **primary** pattern (shapes the overall flow) and note secondary patterns in Phase 2 Architecture before writing. diff --git a/.github/skills/skill-creator/scripts/init_skill.py b/.github/skills/skill-creator/scripts/init_skill.py new file mode 100644 index 0000000..6c60f49 --- /dev/null +++ b/.github/skills/skill-creator/scripts/init_skill.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +"""Scaffold a new skill folder with a template SKILL.md. + +Usage: + python init_skill.py [--path ] + +Arguments: + skill-name Kebab-case identifier for the skill (e.g. 'my-new-skill') + +Options: + --path Directory to create the skill in + Default: .github/skills (relative to current working directory) + +Examples: + python init_skill.py code-review-assistant + python init_skill.py git-workflow-helper --path .github/skills +""" + +import re +import sys +from pathlib import Path + + +KEBAB = re.compile(r'^[a-z0-9]+(-[a-z0-9]+)*$') + +SKILL_TEMPLATE = """\ +--- +name: {skill_name} +description: >- + [TODO: What does this skill do? When should Copilot use it? + Include the actual phrases a user would say to trigger it. + Add "Do NOT use for..." if there is overlap risk with other + skills in this repo. Keep under 200 words.] +--- + +# {skill_title} + +[TODO: 1–2 sentences describing what this skill enables. Be concrete — name the +tools, file formats, or workflows it works with.] + +--- + +## Workflow + +[TODO: Replace this section with the actual workflow. Choose one primary pattern +from references/workflows.md: + + - Sequential: numbered steps with validation gates between them + - Conditional: decision tree based on input type or available tools + - Iterative Refinement: draft → review against criteria → refine → stop condition + - Multi-Tool Coordination: phase-by-phase with explicit data flow between phases + - Domain Intelligence: expert pre-checks → action → reasoning + +Delete this comment block when done.] + +--- + +## Examples + +[TODO: Add 2–3 realistic examples. Each example should show exactly what the user +says and what the agent produces. See references/output-patterns.md for the +Examples Pattern. Concrete examples beat prose descriptions of what output +"should look like". Delete this comment block when done.] + +--- + +## Scope + +[TODO: State what this skill does NOT handle if there is any risk of overlap with +other skills in this repo. If no overlap risk exists, delete this section entirely.] +""" + + +def validate_name(name): + """Return an error string if invalid, else None.""" + if not KEBAB.match(name): + return ( + f"'{name}' is not valid kebab-case. " + f"Use lowercase letters and hyphens only (e.g. 'my-skill')." + ) + if len(name) > 64: + return f"'{name}' is too long ({len(name)} chars). Maximum is 64." + return None + + +def title_from_name(name): + """Convert 'my-skill-name' to 'My Skill Name'.""" + return ' '.join(word.capitalize() for word in name.split('-')) + + +def init_skill(name, base_path): + err = validate_name(name) + if err: + print(f"Error: {err}") + return 1 + + skill_dir = Path(base_path) / name + + if skill_dir.exists(): + print(f"Error: '{skill_dir}' already exists.") + return 1 + + skill_dir.mkdir(parents=True) + print(f"Created: {skill_dir}/") + + skill_md = skill_dir / 'SKILL.md' + skill_md.write_text( + SKILL_TEMPLATE.format( + skill_name=name, + skill_title=title_from_name(name), + ), + encoding='utf-8', + ) + print(f"Created: {skill_md}") + + validate_cmd = ( + f"python .github/skills/skill-creator/scripts/quick_validate.py {skill_dir}" + ) + + print(f""" +Next steps: + 1. Edit {skill_dir}/SKILL.md — complete the TODO sections + 2. Run validation when ready: + {validate_cmd} + 3. Add subdirectories as needed: + references/ — detailed docs loaded on demand + scripts/ — executable utilities + assets/ — templates or output files +""") + return 0 + + +def main(): + args = sys.argv[1:] + + if not args or args[0] in ('-h', '--help'): + print(__doc__) + sys.exit(0) + + skill_name = args[0] + base_path = '.github/skills' + + if '--path' in args: + idx = args.index('--path') + if idx + 1 >= len(args): + print("Error: --path requires a directory argument") + sys.exit(1) + base_path = args[idx + 1] + + sys.exit(init_skill(skill_name, base_path)) + + +if __name__ == '__main__': + main() diff --git a/.github/skills/skill-creator/scripts/quick_validate.py b/.github/skills/skill-creator/scripts/quick_validate.py new file mode 100644 index 0000000..8eb609a --- /dev/null +++ b/.github/skills/skill-creator/scripts/quick_validate.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +"""Validate a skill folder against this repo's SKILL.md conventions. + +Usage: + python quick_validate.py + +Exit codes: + 0 pass (warnings are allowed) + 1 fail (at least one structural error) +""" + +import re +import sys +from pathlib import Path + +KEBAB = re.compile(r'^[a-z0-9]+(-[a-z0-9]+)*$') + +TRIGGER_KEYWORDS = ( + 'use when', 'use for', 'use this', 'triggers on', + 'asks to', 'asks for', 'says', +) + +NEGATIVE_KEYWORDS = ( + 'do not use', "don't use", 'not for', 'not intended', +) + + +def _extract_frontmatter(content): + """Return (frontmatter_raw, body) or (None, None) if absent/malformed.""" + m = re.match(r'^---[ \t]*\n(.*?)\n---[ \t]*\n', content, re.DOTALL) + if not m: + return None, None + return m.group(1), content[m.end():] + + +def _parse_frontmatter(fm_raw): + """Parse top-level key: value pairs. Handles inline and block scalars.""" + result = {} + lines = fm_raw.splitlines() + i = 0 + while i < len(lines): + line = lines[i] + m = re.match(r'^([A-Za-z0-9_-]+)\s*:(.*)', line) + if not m or line.startswith(' '): + i += 1 + continue + key = m.group(1) + rest = m.group(2).strip() + if rest in ('|', '>', '|-', '>-', '|+', '>+', ''): + # Block scalar: collect following indented lines + parts = [] + i += 1 + while i < len(lines) and (not lines[i] or lines[i][0] in (' ', '\t')): + parts.append(lines[i].strip()) + i += 1 + result[key] = ' '.join(parts).strip() + else: + # Strip surrounding quotes if present + if len(rest) >= 2 and rest[0] == rest[-1] and rest[0] in ('"', "'"): + rest = rest[1:-1] + result[key] = rest + i += 1 + return result + + +def validate(skill_path): + """Return (errors, warnings). errors is empty on pass.""" + errors = [] + warnings = [] + p = Path(skill_path) + + # 1. Folder must exist + if not p.is_dir(): + return [f"Not a directory: {skill_path}"], [] + + # 2. Folder name must be kebab-case + folder = p.name + if not KEBAB.match(folder): + errors.append( + f"Folder name '{folder}' must be kebab-case " + f"(lowercase letters, digits, hyphens only)" + ) + + entries = {e.name for e in p.iterdir()} + + # 3. SKILL.md must exist with exact casing + if 'SKILL.md' not in entries: + wrong = [n for n in entries if n.lower() == 'skill.md'] + if wrong: + errors.append(f"Found '{wrong[0]}' — file must be named exactly 'SKILL.md'") + else: + errors.append("SKILL.md not found in skill folder") + return errors, warnings # nothing more to check + + # 4. No README.md inside the skill folder + if any(n.lower() == 'readme.md' for n in entries): + errors.append( + "README.md found inside the skill folder — remove it " + "(skills are agent instructions, not human docs)" + ) + + # 5. Read and parse SKILL.md + content = (p / 'SKILL.md').read_text(encoding='utf-8') + fm_raw, body = _extract_frontmatter(content) + + if fm_raw is None: + errors.append( + "Missing or malformed YAML frontmatter " + "(expected opening and closing --- delimiters)" + ) + return errors, warnings + + fm = _parse_frontmatter(fm_raw) + + # 6. name field + name = fm.get('name', '').strip() + if not name: + errors.append("Frontmatter is missing 'name'") + else: + if not KEBAB.match(name): + errors.append(f"name '{name}' must be kebab-case") + if name != folder: + warnings.append( + f"name '{name}' does not match folder name '{folder}' — they should be identical" + ) + + # 7. description field + desc = fm.get('description', '').strip() + if not desc: + errors.append("Frontmatter is missing 'description'") + else: + if '<' in desc or '>' in desc: + errors.append( + "description contains XML angle brackets (< >) — " + "this prevents the skill from loading correctly" + ) + if len(desc) > 1024: + errors.append( + f"description is {len(desc)} characters — maximum is 1024" + ) + if not any(kw in desc.lower() for kw in TRIGGER_KEYWORDS): + warnings.append( + "description may be missing trigger guidance — " + "consider adding 'Use when...' so Copilot knows when to invoke this skill" + ) + if not any(kw in desc.lower() for kw in NEGATIVE_KEYWORDS): + warnings.append( + "description has no negative scope — " + "consider adding 'Do NOT use for...' if overlap with other skills is a risk" + ) + + # 8. Body line count + if body: + line_count = len(body.strip().splitlines()) + if line_count > 500: + warnings.append( + f"SKILL.md body is {line_count} lines — " + f"target is under 500 (move detail to references/)" + ) + + # 9. Reference files should be mentioned in the body + refs_dir = p / 'references' + if refs_dir.is_dir() and body: + for ref in sorted(refs_dir.iterdir()): + if ref.is_file() and not ref.name.startswith('.'): + if ref.name not in body and f"references/{ref.name}" not in body: + warnings.append( + f"references/{ref.name} is not mentioned in SKILL.md — " + f"add a link with a 'read when...' condition" + ) + + return errors, warnings + + +def main(): + if len(sys.argv) != 2 or sys.argv[1] in ('-h', '--help'): + print(__doc__) + sys.exit(0 if '--help' in sys.argv else 1) + + errors, warnings = validate(sys.argv[1]) + + print(f"\nValidating: {sys.argv[1]}") + print('─' * 55) + + for e in errors: + print(f" ❌ {e}") + for w in warnings: + print(f" ⚠️ {w}") + + if not errors and not warnings: + print(" ✅ All checks passed") + elif not errors: + n = len(warnings) + print(f"\n ✅ Passed with {n} warning{'s' if n != 1 else ''}") + else: + e_n, w_n = len(errors), len(warnings) + print( + f"\n ❌ Failed: {e_n} error{'s' if e_n != 1 else ''}, " + f"{w_n} warning{'s' if w_n != 1 else ''}" + ) + + print() + sys.exit(0 if not errors else 1) + + +if __name__ == '__main__': + main() diff --git a/README.md b/README.md index a05e29f..feb7f9b 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ Skills are invoked automatically by Copilot based on relevance, or explicitly by | Skill | Description | |---|---| +| `skill-creator` | Creates, updates, reviews, and validates GitHub Copilot agent skills (`SKILL.md` files). Guides through a 5-phase workflow (Discovery → Architecture → Craft → Validate → Deliver), writes effective descriptions for reliable triggering, and designs folder structures with bundled references and scripts. | | `gh-aw-operations` | Comprehensive knowledge for creating, debugging, and managing GitHub Agentic Workflows (gh-aw) — frontmatter spec, MCP wiring, safe-outputs, and common patterns | | `apm-package-author` | Creates and maintains [APM (Agent Package Manager)](https://microsoft.github.io/apm/) manifests for distributing GitHub Copilot skills, agents, and MCP servers as installable packages. Covers `apm.yml` authoring, package structure, MCP dependency wiring, branch-based installs, and troubleshooting. |