diff --git a/README.md b/README.md index 8cd9d5a..a3dc020 100644 --- a/README.md +++ b/README.md @@ -39,17 +39,19 @@ provider individually. The installer also asks whether to install optional `llm-wiki-*` skills. These skills are tracked in `core/skills/`, but they are not installed by default. -Provider permission config is merged with existing settings, and changed -provider files are backed up with an `.agentspec-backup-*` suffix. +All installed skills are written canonically to `~/.agents/skills`. Providers +that do not read that directory natively get copied mirrors in their own home +directories. Provider permission config is merged with existing settings, and +changed provider files are backed up with an `.agentspec-backup-*` suffix. This installs to: | CLI | Instructions | Skills | Auto-approval config | |-------------|---------------------------------------------|----------------------|------------------| -| Claude Code | shared core instructions + Claude addendum | shared + Claude-only | `~/.claude/settings.json` | -| Codex CLI | shared core instructions + Codex addendum | shared + Codex-only | `~/.codex/config.toml` + `~/.codex/rules/` | -| Gemini CLI | shared core instructions + Gemini addendum | shared + Gemini-only | `~/.gemini/settings.json` + `~/.gemini/bin/gemini-auto` + `~/.gemini/policies/` | -| Copilot CLI | shared core instructions + Copilot addendum | shared + Copilot-only | `~/.copilot/settings.json` + `~/.copilot/bin/copilot-auto` | +| Claude Code | shared core instructions + Claude addendum | `~/.agents/skills` + `~/.claude/skills` mirror | `~/.claude/settings.json` | +| Codex CLI | shared core instructions + Codex addendum | `~/.agents/skills` + `~/.codex/skills` mirror | `~/.codex/config.toml` + `~/.codex/rules/` | +| Gemini CLI | shared core instructions + Gemini addendum | `~/.agents/skills` + `~/.gemini/skills` mirror | `~/.gemini/settings.json` + `~/.gemini/bin/gemini-auto` + `~/.gemini/policies/` | +| Copilot CLI | shared core instructions + Copilot addendum | `~/.agents/skills` | `~/.copilot/settings.json` + `~/.copilot/bin/copilot-auto` | Re-run after updating agentspecs. @@ -102,7 +104,6 @@ Provider behavior is configured during setup: | `/spec-review [name]` | Draft logical commit plan and draft PR docs | | `/handoff` | Capture session context before ending | | `python-code` | Python conventions (auto-loads when writing Python) | -| `subagent-orchestrator` | Provider-specific delegation and fallback skill (Codex, Claude) | Skills follow the [agentskills.io specification](https://agentskills.io/specification). diff --git a/providers/claude/instructions.md b/providers/claude/instructions.md index f61b2d3..4fe7708 100644 --- a/providers/claude/instructions.md +++ b/providers/claude/instructions.md @@ -2,8 +2,9 @@ - This installation targets Claude Code. - Install the shared instructions into `~/.claude/CLAUDE.md`. -- Copy skills into `~/.claude/skills/`. -- Provider-specific Claude skills may be installed from `providers/claude/skills/`. +- Store canonical skills in `~/.agents/skills/`. +- Mirror skills into `~/.claude/skills/` because Claude Code reads that + provider-local directory. - Keep Claude-specific instructions short and layered on top of the shared workflow. - Auto-approval setup uses Claude `permissions.defaultMode = "auto"`. diff --git a/providers/claude/skills/subagent-orchestrator/SKILL.md b/providers/claude/skills/subagent-orchestrator/SKILL.md deleted file mode 100644 index 72d7328..0000000 --- a/providers/claude/skills/subagent-orchestrator/SKILL.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -name: subagent-orchestrator -description: Teach Claude Code to delegate bounded coding tasks to other AI CLIs such as Codex CLI, Gemini CLI, and Copilot CLI, then review the results fairly, retry on provider limits, and fall back cleanly while Claude remains the orchestrator. ---- - -# Subagent Orchestrator - -Use this skill when Claude should offload bounded work to another AI CLI to save -Claude usage, get a second opinion, or parallelize repo exploration while still -keeping Claude in charge. - -Claude remains the orchestrator. The external model is a worker, not the final -authority. - -Load these files only as needed: - -- `references/provider-ranking.md` -- `references/context-assembly.md` -- `references/codex-cli.md` -- `references/gemini-cli.md` -- `references/copilot-cli.md` -- `scripts/provider_state.py` - -## Default Recommendation - -Start with one Claude-specific skill, not a generalized delegate-everyone-to- -everyone system. - -The first-pass provider order is: - -1. Codex CLI -2. Gemini CLI -3. Copilot CLI - -That ordering is a practical recommendation for Claude subagent work, not a -universal benchmark. Load `references/provider-ranking.md` when you need the -research notes, citations, or task-routing detail. - -## Good Delegation Targets - -- repo exploration -- architecture summary -- code review -- draft implementation plan -- writing code and files -- test ideas -- second-opinion analysis - -Avoid delegating the final decision, final merge judgment, or open-ended product -ownership. Claude should keep those. - -## Default Workflow - -### 1. Bound the task - -Give the subagent a narrow ask with an explicit output shape. Good prompts ask -for a summary, risk list, draft patch plan, or focused review. - -### 2. Enrich the prompt - -Do not pass thin prompts through unchanged. - -Turn something like: - -```text -Review this repository and summarize the main risks. -``` - -into a delegation packet that states: - -- the goal of the task -- what success looks like -- what failure looks like -- how the subagent should report progress while it works -- the exact output shape you want back -- which relevant skills the subagent should use -- which files or directories to read first -- any small file chunks or notes that should be attached inline - -Load `references/context-assembly.md` when building the delegation packet. - -At minimum, enrich the prompt with: - -- task goal: why Claude is delegating this -- success criteria: what would make the answer useful -- failure criteria: what would make the answer unusable -- progress reporting: how the subagent should expose progress during longer work, - for example stdout checkpoints or a `/tmp` log file if the CLI supports it -- domain hints: repo type, framework, vault, spec workflow, or review stance -- relevant skills: for example, use Obsidian-related skills if the repo is an - Obsidian vault -- context to inspect first: `AGENTS.md`, `README.md`, `specs/`, or a short file - list chosen by Claude - -Prefer short curated context over dumping the entire repo. If a few files define -the problem, name those files first. If a file contains one relevant section, -quote or summarize only that chunk instead of pasting the whole file. - -If the repo has a `specs/` directory or spec-linked workflow, tell the -subagent where the relevant spec lives and whether it should read spec files -before touching code. - -### 3. Pick a provider - -Check the current provider state: - -```bash -python3 skills/subagent-orchestrator/scripts/provider_state.py pick -``` - -Use the default routing unless there is a strong reason not to: - -- Codex CLI for the best first-pass coding work and strong model reasoning -- Gemini CLI for low-cost broad scans, long-context reading, and cheap retries -- Copilot CLI when GitHub-native context or existing Copilot budget makes it a - better fit - -### 4. Run the provider non-interactively - -Claude delegates by running the external CLI through its Bash tool. - -Use the provider reference file for the exact flags. Prefer read-only or -planning modes for exploration work. Only use the provider's dangerous mode -inside a trusted sandbox and only when the task truly needs edits or shell -actions. - -### 5. Review the result fairly - -Start from the assumption that the subagent may have done good work. - -- Accept it when it is solid. `LGTM` is allowed. -- Lightly edit when the answer is mostly right. -- Retry when the prompt was underspecified. -- Reject or fall back only when the result is materially wrong, risky, or the - provider is unavailable. - -Do not be pedantic for its own sake. Be critical only where it changes the -outcome. - -### 6. Handle provider failures - -If the CLI indicates rate limits, usage caps, temporary overload, or "try again -later", cool that provider down and pick another one. - -Typical cooldown flow: - -```bash -python3 skills/subagent-orchestrator/scripts/provider_state.py mark-failure \ - --provider codex-cli \ - --reason "rate limit exceeded" \ - --cooldown-minutes 30 - -python3 skills/subagent-orchestrator/scripts/provider_state.py pick -``` - -If the failure is auth, bad config, or a missing executable, do not treat that -as a transient provider outage. Surface the setup issue instead. - -### 7. Record the verdict - -When Claude reports back, include: - -- provider and model used -- delegated task -- verdict: accepted, edited, retried, or rejected -- any follow-up action - -## Prompt-Building Rules - -When preparing a subagent prompt, Claude should gather and pass only the context -that materially improves the result. - -- Always mention the task goal explicitly. -- Always define what success looks like and what failure looks like. -- Always tell the subagent how to report progress during longer work. Prefer a - simple mechanism such as periodic stdout updates or a `/tmp` log file. -- Name the relevant skills the subagent should use when the repo has a known - domain. Example: use obsidian-related skills when working in an Obsidian - vault. -- Point the subagent to the files that matter first. Usually that means a short - ordered list, not a giant dump. -- If `AGENTS.md`, `README.md`, or `specs/` are relevant, say so directly. -- Attach short chunks only when they remove ambiguity. Prefer 10-40 important - lines over whole-file paste. - -Useful context sources: - -- repo instructions such as `AGENTS.md` -- `README.md` -- relevant `specs/` files -- files named by the user -- a short handpicked list of code files that define the task - -If the task is "review this repo," Claude should still state what kind of review -it wants: correctness risks, architecture risks, code quality issues, test -gaps, or implementation plan. "Review" without a frame is too vague. - -## Delegation Packet Shape - -Use a shape like this: - -```text -Goal: -Success looks like: -Failure looks like: -Progress reporting: -Return format: -Relevant skills or domain hints: -Files to read first: -Attached context: -Task: -``` - -Claude can keep the packet brief, but it should not omit those fields when the -task would benefit from them. - -## Failure Heuristics - -Treat these as likely transient and eligible for cooldown: - -- `rate limit` -- `usage limit` -- `quota` -- `overloaded` -- `capacity` -- `try again later` -- `temporarily unavailable` - -Treat these as setup problems, not cooldown signals: - -- `not authenticated` -- `command not found` -- `invalid api key` -- `permission denied` -- malformed CLI arguments - -## Output Expectations - -The point of delegation is to save Claude effort without lowering standards. -Claude should use the subagent output as input to judgment, not as a substitute -for judgment. diff --git a/providers/claude/skills/subagent-orchestrator/references/codex-cli.md b/providers/claude/skills/subagent-orchestrator/references/codex-cli.md deleted file mode 100644 index 14504a8..0000000 --- a/providers/claude/skills/subagent-orchestrator/references/codex-cli.md +++ /dev/null @@ -1,93 +0,0 @@ -# Codex CLI - -Use Codex CLI as the first-choice delegate when the task needs strong coding -judgment or difficult reasoning. - -## Default Models - -- Preferred default: `gpt-5.4-med` (moderate tasks) -- Upgrade for harder tasks: `gpt-5.4-high` -- Optional coding specialist: `5.3-codex-high` (strong at hard/moderate coding) - -## Non-Interactive Pattern - -Read-only exploration: - -```bash -codex exec \ - "Review this repository and return the top 10 architecture risks." \ - -m gpt-5.4-med \ - -s read-only \ - -o /tmp/codex-result.txt -``` - -Higher-end fallback for hard analysis: - -```bash -codex exec \ - "Trace the data flow for authentication and call out likely failure modes." \ - -m gpt-5.4-high \ - -s read-only \ - -o /tmp/codex-result.txt -``` - -Auto-write sandbox for implementation work: - -```bash -codex exec \ - "Implement the changes described in specs/feature/design.md." \ - -m gpt-5.4-med \ - --full-auto -``` - -## Dangerous Mode Equivalent - -Codex provides: - -- `--dangerously-bypass-approvals-and-sandbox` - -Use this only in trusted, externally sandboxed environments. - -## Useful Flags - -- `exec`: non-interactive subcommand (NOT `-p`, which means `--profile`) -- `-m`, `--model`: explicit model selection -- `-s`, `--sandbox`: sandbox policy (`read-only`, `workspace-write`, - `danger-full-access`) -- `--full-auto`: convenience alias for `-a on-request --sandbox workspace-write` -- `-o`, `--output-last-message`: write the final agent message to a file -- `--json`: JSONL event stream to stdout (streaming, not a single JSON blob) -- `-C`, `--cd`: set working directory -- `--add-dir`: additional writable directories -- `--ephemeral`: no session persistence -- `--skip-git-repo-check`: allow running outside a git repo -- `--output-schema`: path to JSON Schema for structured output - -## Dedicated Review - -Codex has a built-in code review mode: - -```bash -codex exec review -``` - -This runs a non-interactive code review against the current repository. - -## Quirks - -- `codex exec` is the non-interactive entry point, not `-p`. The `-p` flag on - the main `codex` command means `--profile` (config profile), not print mode. -- There is no `--output-format json` flag. Use `--json` for JSONL streaming - events or `-o FILE` to capture just the final message. For subagent use, - prefer `-o` since it gives you the answer without parsing a stream. -- `--full-auto` combines `-a on-request` with `--sandbox workspace-write`. - Good for implementation tasks in a sandboxed environment. -- Stdin is supported: if no prompt argument is given (or `-` is used), - instructions are read from stdin. If stdin is piped and a prompt is also - provided, stdin is appended as a `` block. - -## Good Delegation Prompts - -- "Review this diff and list only material correctness risks." -- "Implement the function described in this spec, writing files as needed." -- "Draft an implementation plan for this bug fix without editing files." diff --git a/providers/claude/skills/subagent-orchestrator/references/context-assembly.md b/providers/claude/skills/subagent-orchestrator/references/context-assembly.md deleted file mode 100644 index 994aa65..0000000 --- a/providers/claude/skills/subagent-orchestrator/references/context-assembly.md +++ /dev/null @@ -1,185 +0,0 @@ -# Context Assembly - -Use this file when a thin delegation prompt needs to become a useful subagent -packet. - -## Thin prompt - -```text -Review this repository and summarize the main risks. -``` - -That is not enough on its own. It says what to do, but not why, how to judge -the answer, or where to look first. - -## Delegation packet - -Build a prompt with these fields: - -```text -Goal: -Success looks like: -Failure looks like: -Progress reporting: -Return format: -Relevant skills or domain hints: -Files to read first: -Attached context: -Task: -``` - -## What to Fill In - -### Goal - -State why Claude is delegating the task. - -Examples: - -- Find the highest-risk correctness issues before I spend Claude time reviewing details. -- Produce a cheap first-pass repo map so Claude can focus on judgment instead of discovery. - -### Success looks like - -State what a useful answer must contain. - -Examples: - -- A ranked list of the top five risks with file references and short rationale. -- A concise architecture summary that names the modules worth deeper review. - -### Failure looks like - -State what would make the result unusable. - -Examples: - -- Generic observations without file references. -- Style-only feedback when Claude asked for correctness or architecture risks. - -### Return format - -Keep it strict when you want easy review. - -Examples: - -- Five bullets, each with `risk`, `why`, and `files`. -- Markdown sections: `Summary`, `Risks`, `Open questions`. - -### Progress reporting - -Tell the subagent how you want to observe progress while it works. - -Examples: - -- Print a short stdout update after each major step so Claude can tell the work - is moving. -- Append checkpoints to `/tmp/subagent-progress.log` if the CLI and environment - make that practical. -- If the task is short, say that no intermediate progress updates are needed. - -### Relevant skills or domain hints - -Tell the subagent which domain lens to use. - -Examples: - -- If this is an Obsidian vault, use obsidian-related skills and pay attention to wikilinks, frontmatter, and vault conventions. -- If the repo uses specs, read the relevant `specs/` files before proposing changes. -- If the task is Python-heavy, use the repo's Python conventions. - -### Files to read first - -Give an ordered short list. - -Good candidates: - -- `AGENTS.md` -- `README.md` -- `specs//AGENTS.md` -- `specs//design.md` -- the specific source files that define the behavior under review - -### Attached context - -Attach only the chunks that remove ambiguity. - -Good attachments: - -- the relevant paragraph from `AGENTS.md` -- the exact acceptance criteria from a spec -- a short excerpt from a configuration file - -Avoid pasting long files when a path plus a one-line instruction is enough. - -## Example: Repo Risk Review - -```text -Goal: -Give Claude a cheap first-pass map of the highest-risk architecture and correctness issues in this repo. - -Success looks like: -Return the top 5 material risks, each with a file path, why it matters, and what Claude should inspect next. - -Failure looks like: -Generic repo summary, style nits, or risks without file references. - -Progress reporting: -Print a brief stdout update after reading repo instructions and after finishing the ranked risk list. - -Return format: -Markdown with sections: Summary, Top Risks, Follow-up Files. - -Relevant skills or domain hints: -Read repo instructions first. If there is a spec for the current feature, use the spec workflow and read those files before judging implementation risk. - -Files to read first: -- AGENTS.md -- README.md -- specs/current-feature/AGENTS.md -- specs/current-feature/design.md -- src/auth.py -- src/session.py - -Attached context: -- Current task: review correctness and architecture risk, not style. -- Spec note: feature is still in implementation phase. - -Task: -Review this repository and summarize the main risks. -``` - -## Example: Obsidian Vault Task - -```text -Goal: -Help Claude quickly assess whether this Obsidian vault automation will break links or metadata. - -Success looks like: -Call out link, frontmatter, template, or base-view risks with concrete note paths. - -Failure looks like: -Generic markdown advice that ignores Obsidian-specific behavior. - -Progress reporting: -Append note-path checkpoints to `/tmp/subagent-progress.log` or print equivalent stdout updates if file logging is awkward. - -Return format: -Bullet list of risks plus a short verdict. - -Relevant skills or domain hints: -Use obsidian-related skills. Pay attention to wikilinks, embeds, Bases files, and vault conventions. - -Files to read first: -- AGENTS.md -- README.md -- vault/Templates/Daily Note.md -- vault/Bases/Projects.base -- scripts/sync_notes.py - -Attached context: -- This repo is an Obsidian vault. Link integrity matters more than generic markdown style. - -Task: -Review this repository and summarize the main risks. -``` diff --git a/providers/claude/skills/subagent-orchestrator/references/copilot-cli.md b/providers/claude/skills/subagent-orchestrator/references/copilot-cli.md deleted file mode 100644 index c6f3ea7..0000000 --- a/providers/claude/skills/subagent-orchestrator/references/copilot-cli.md +++ /dev/null @@ -1,75 +0,0 @@ -# Copilot CLI - -Use Copilot CLI when GitHub-native context or existing Copilot budget makes it a -natural delegate, not as the default first pick for cheap subagent work. - -## Default Models - -Strong models exposed by current local help include: - -- `claude-sonnet-4.6` -- `claude-opus-4.6` -- `gpt-5.4` -- `gpt-5.4-mini` - -Practical default: - -- Preferred default: `claude-sonnet-4.6` -- Cheap fallback: `gpt-5.4` (high or medium) - -## Non-Interactive Pattern - -Read-only-ish analysis in the current repo: - -```bash -copilot -p "Review this repository and summarize the main risks." \ - --model claude-sonnet-4.6 \ - --output-format json \ - --allow-all-tools \ - --no-ask-user -``` - -If you want the fully permissive equivalent: - -```bash -copilot -p "Review this repository and summarize the main risks." \ - --model claude-sonnet-4.6 \ - --output-format json \ - --allow-all -``` - -## Yolo Mode - -Copilot has explicit permissive shortcuts: - -- `--allow-all` -- `--yolo` - -Both map to: - -- `--allow-all-tools` -- `--allow-all-paths` -- `--allow-all-urls` - -## Useful Flags - -- `-p`, `--prompt`: non-interactive mode -- `--model`: explicit model selection -- `--output-format json`: JSONL output -- `--allow-all-tools`: required for smooth non-interactive tool use -- `--no-ask-user`: keep the agent autonomous -- `--add-dir`: expand path access - -## Quirks - -- Permission controls are explicit and a bit more verbose than Gemini's. -- Premium request usage can make Copilot a less predictable default "money - saver" than Gemini's published free tier or a direct Claude subscription. -- Copilot can still be excellent when the surrounding workflow is already - GitHub-centric. - -## Good Delegation Prompts - -- "Review this repository with GitHub workflow assumptions in mind." -- "Summarize the likely CI and PR risks in this codebase." -- "Give me a second opinion using Copilot's current model stack." diff --git a/providers/claude/skills/subagent-orchestrator/references/gemini-cli.md b/providers/claude/skills/subagent-orchestrator/references/gemini-cli.md deleted file mode 100644 index 213c301..0000000 --- a/providers/claude/skills/subagent-orchestrator/references/gemini-cli.md +++ /dev/null @@ -1,66 +0,0 @@ -# Gemini CLI - -Use Gemini CLI as the best value delegate and the default fallback when Codex -is capped or when you want broad, cheap exploration. - -## Default Models - -- Preferred default: `gemini-2.5-pro` -- Cheaper fast pass: `gemini-2.5-flash` - -## Non-Interactive Pattern - -Read-only / planning: - -```bash -gemini -p \ - "Review this repository and summarize the architecture in 10 bullets." \ - -m gemini-2.5-pro \ - --approval-mode plan \ - --output-format json -``` - -Cheap wide scan: - -```bash -gemini -p \ - "Scan this repository and list the most suspicious files for auth bugs." \ - -m gemini-2.5-flash \ - --approval-mode plan \ - --output-format json -``` - -## Yolo Mode - -Gemini has an explicit yolo mode: - -- `--yolo` -- `--approval-mode yolo` - -It also supports: - -- `--approval-mode auto_edit` -- `--approval-mode plan` - -Gemini's docs note that sandboxing is enabled by default when using yolo mode. - -## Useful Flags - -- `-p`, `--prompt`: non-interactive mode -- `-m`, `--model`: explicit model selection -- `--output-format json`: structured output -- `--approval-mode plan`: read-only planning mode -- `--include-directories`: expand workspace scope - -## Quirks - -- Google login is the cleanest path for generous default usage, but API key mode - is the clearest path when you need strict model control. -- Gemini is especially attractive for large-context reads because the official - docs emphasize Gemini 2.5 Pro with a 1M-token context window. - -## Good Delegation Prompts - -- "Read the whole repo and group modules by responsibility." -- "Give me a low-cost second opinion on whether this refactor is safe." -- "List likely bug clusters without proposing a patch yet." diff --git a/providers/claude/skills/subagent-orchestrator/references/provider-ranking.md b/providers/claude/skills/subagent-orchestrator/references/provider-ranking.md deleted file mode 100644 index 7d949bd..0000000 --- a/providers/claude/skills/subagent-orchestrator/references/provider-ranking.md +++ /dev/null @@ -1,121 +0,0 @@ -# Provider Ranking - -Research date: April 6, 2026. - -This ordering is a recommendation for Claude-as-orchestrator workflows. It is -based on current official docs, local CLI help in this environment, and -practical tradeoffs across quality, CLI ergonomics, cost/value, speed, and -session-limit behavior. - -## Ranking - -### 1. Codex CLI - -Best default choice for delegated coding and reasoning work. - -Why it ranks first: - -- Strong 5.x model lineup: `gpt-5.4-med` for moderate tasks, `gpt-5.4-high` - for hard reasoning, and the optional `5.3-codex-high` coding specialist. -- Mature non-interactive flow via `codex exec` with explicit sandbox policies, - model selection, and output capture. -- Built-in code review subcommand (`codex exec review`) for dedicated review - tasks. - -Best use: - -- implementation and code writing -- code review -- difficult reasoning and analysis -- draft implementation plans - -Watch-outs: - -- `codex exec` is the non-interactive entry, not `-p` (which means `--profile`) -- no `--output-format json`; use `-o FILE` for final message capture -- `5.3-codex-high` is optional and coding-focused; default to `gpt-5.4-med` - for general work -- usage limits and rate limits still apply - -### 2. Gemini CLI - -Best value fallback and strong default for cheap, broad, long-context work. - -Why it ranks second: - -- Official docs emphasize Gemini 2.5 Pro, a 1M-token context window, and a free - tier with 60 requests per minute and 1,000 requests per day for personal - Google accounts. -- Good non-interactive scripting support with `-p`, JSON output, explicit model - selection, and a true plan mode. -- Excellent fit for wide repository summaries, docs digestion, and low-cost - retries when Codex is capped. - -Best use: - -- long-context reading -- repo scans -- cheap retries -- draft summaries -- second-opinion analysis - -Watch-outs: - -- model control depends on auth method; API key mode is the clearest path when - you need exact model selection -- quality is strong, but Codex is still preferred for the most subtle review - tasks - -### 3. Copilot CLI - -Useful situational delegate, not the default cheap subagent. - -Why it ranks third: - -- It exposes strong models and good CLI controls, but its best fit is often - "already inside a GitHub-centric workflow" rather than "lowest-friction cheap - delegate from Claude." -- Official docs and local help show strong model optionality, but the premium - request and entitlement story is more variable than Gemini's published free - tier or a direct Codex subscription. -- Best when GitHub context, MCP tooling, or existing Copilot spend makes it the - natural choice. - -Best use: - -- GitHub-heavy workflows -- cases where Copilot is already the paid path you are using -- situations where you want model choice inside one GitHub-managed tool - -Watch-outs: - -- premium-request budgeting can make it less predictable as the default - "savings" delegate -- permission setup for non-interactive runs is more explicit than Gemini or - Codex - -## Task Routing - -- Use Codex CLI first for difficult reasoning, coding tasks, and subtle code - judgment. -- Use Gemini CLI first when you want low-cost breadth, very large context, or a - cheap retry path. -- Use Copilot CLI first only when GitHub-native context or an existing Copilot - budget is the main constraint. - -## Sources - -- Codex CLI help: `codex exec --help` (local, April 6, 2026) -- Gemini CLI overview: -- Gemini CLI configuration: -- GitHub Copilot CLI docs hub: -- GitHub Copilot CLI command reference: -- GitHub Copilot premium request management: -- GitHub Agentic Workflows engine reference: - -## Notes - -- The ordering above is an inference from the sources, not a vendor-published - benchmark. -- Local CLI help on April 6, 2026 matched the flags documented for the - installed `codex`, `gemini`, and `copilot` binaries in this environment. diff --git a/providers/claude/skills/subagent-orchestrator/scripts/provider_state.py b/providers/claude/skills/subagent-orchestrator/scripts/provider_state.py deleted file mode 100644 index 4a3d76e..0000000 --- a/providers/claude/skills/subagent-orchestrator/scripts/provider_state.py +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -from argparse import ArgumentParser -from copy import deepcopy -from datetime import datetime, timedelta, timezone -from pathlib import Path -import json -import os -import sys - - -DEFAULT_PROVIDERS = [ - { - "provider": "codex-cli", - "rank": 1, - "preferred_model": "gpt-5.4-med", - "fallback_model": "gpt-5.4-high", - "is_active": True, - "last_failure_at": None, - "cooldown_until": None, - "failure_reason": None, - }, - { - "provider": "gemini-cli", - "rank": 2, - "preferred_model": "gemini-2.5-pro", - "fallback_model": "gemini-2.5-flash", - "is_active": True, - "last_failure_at": None, - "cooldown_until": None, - "failure_reason": None, - }, - { - "provider": "copilot-cli", - "rank": 3, - "preferred_model": "claude-sonnet-4.6", - "fallback_model": "gpt-5.4-mini", - "is_active": True, - "last_failure_at": None, - "cooldown_until": None, - "failure_reason": None, - }, -] - - -def utc_now() -> datetime: - return datetime.now(timezone.utc) - - -def isoformat(value: datetime | None) -> str | None: - if value is None: - return None - return value.isoformat().replace("+00:00", "Z") - - -def parse_timestamp(value: str | None) -> datetime | None: - if not value: - return None - return datetime.fromisoformat(value.replace("Z", "+00:00")) - - -def default_state_path() -> Path: - override = os.environ.get("CLAUDE_SUBAGENT_STATE_PATH") - if override: - return Path(override).expanduser() - return Path.home() / ".claude" / "subagent-orchestrator" / "provider-state.json" - - -def build_default_state() -> dict: - return {"providers": deepcopy(DEFAULT_PROVIDERS)} - - -def load_state(path: Path) -> dict: - if not path.exists(): - return reconcile_state(build_default_state()) - - payload = json.loads(path.read_text()) - if not isinstance(payload, dict) or not isinstance(payload.get("providers"), list): - raise ValueError(f"invalid provider state file: {path}") - - return reconcile_state(payload) - - -def reconcile_state(payload: dict) -> dict: - by_name = { - item["provider"]: item - for item in payload.get("providers", []) - if isinstance(item, dict) and "provider" in item - } - now = utc_now() - providers = [] - - for template in DEFAULT_PROVIDERS: - item = deepcopy(template) - item.update(by_name.get(template["provider"], {})) - cooldown_until = parse_timestamp(item.get("cooldown_until")) - - if cooldown_until and cooldown_until <= now: - item["is_active"] = True - item["cooldown_until"] = None - item["failure_reason"] = None - elif cooldown_until: - item["is_active"] = False - item["cooldown_until"] = isoformat(cooldown_until) - else: - item["cooldown_until"] = None - item["is_active"] = bool(item.get("is_active", True)) - - last_failure_at = parse_timestamp(item.get("last_failure_at")) - item["last_failure_at"] = isoformat(last_failure_at) - providers.append(item) - - providers.sort(key=lambda item: item["rank"]) - return {"providers": providers} - - -def save_state(path: Path, payload: dict) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(payload, indent=2, sort_keys=False) + "\n") - - -def get_provider(payload: dict, provider: str) -> dict: - for item in payload["providers"]: - if item["provider"] == provider: - return item - known = ", ".join(entry["provider"] for entry in payload["providers"]) - raise ValueError(f"unknown provider {provider!r}; expected one of: {known}") - - -def cmd_status(args) -> int: - state_path = Path(args.state).expanduser() - payload = load_state(state_path) - save_state(state_path, payload) - print(json.dumps(payload, indent=2)) - return 0 - - -def cmd_rank(args) -> int: - return cmd_status(args) - - -def cmd_pick(args) -> int: - state_path = Path(args.state).expanduser() - payload = load_state(state_path) - save_state(state_path, payload) - - for item in payload["providers"]: - if item["is_active"]: - print(json.dumps(item, indent=2)) - return 0 - - next_provider = min( - payload["providers"], - key=lambda item: item["cooldown_until"] or "9999-12-31T00:00:00Z", - ) - print( - json.dumps( - { - "provider": None, - "reason": "no_active_provider", - "next_provider": next_provider["provider"], - "cooldown_until": next_provider["cooldown_until"], - }, - indent=2, - ) - ) - return 1 - - -def cmd_mark_failure(args) -> int: - state_path = Path(args.state).expanduser() - payload = load_state(state_path) - provider = get_provider(payload, args.provider) - now = utc_now() - provider["is_active"] = False - provider["last_failure_at"] = isoformat(now) - provider["cooldown_until"] = isoformat(now + timedelta(minutes=args.cooldown_minutes)) - provider["failure_reason"] = args.reason - save_state(state_path, payload) - print(json.dumps(provider, indent=2)) - return 0 - - -def cmd_mark_success(args) -> int: - state_path = Path(args.state).expanduser() - payload = load_state(state_path) - provider = get_provider(payload, args.provider) - provider["is_active"] = True - provider["cooldown_until"] = None - provider["failure_reason"] = None - save_state(state_path, payload) - print(json.dumps(provider, indent=2)) - return 0 - - -def build_parser() -> ArgumentParser: - default_state = str(default_state_path()) - parser = ArgumentParser( - description="Track Claude subagent provider ranking and temporary cooldowns." - ) - parser.add_argument( - "--state", - default=default_state, - help="Path to the provider state JSON file.", - ) - - subparsers = parser.add_subparsers(dest="command", required=True) - - status_parser = subparsers.add_parser("status", help="Show provider state.") - status_parser.add_argument("--state", default=default_state) - status_parser.set_defaults(func=cmd_status) - - rank_parser = subparsers.add_parser("rank", help="Show ranked provider state.") - rank_parser.add_argument("--state", default=default_state) - rank_parser.set_defaults(func=cmd_rank) - - pick_parser = subparsers.add_parser("pick", help="Pick the best active provider.") - pick_parser.add_argument("--state", default=default_state) - pick_parser.set_defaults(func=cmd_pick) - - failure_parser = subparsers.add_parser( - "mark-failure", help="Cool down a provider after a transient failure." - ) - failure_parser.add_argument("--state", default=default_state) - failure_parser.add_argument("--provider", required=True) - failure_parser.add_argument("--reason", required=True) - failure_parser.add_argument("--cooldown-minutes", type=int, default=30) - failure_parser.set_defaults(func=cmd_mark_failure) - - success_parser = subparsers.add_parser( - "mark-success", help="Reactivate a provider after a successful run." - ) - success_parser.add_argument("--state", default=default_state) - success_parser.add_argument("--provider", required=True) - success_parser.set_defaults(func=cmd_mark_success) - - return parser - - -def main() -> int: - parser = build_parser() - args = parser.parse_args() - return args.func(args) - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except ValueError as exc: - print(f"ERROR: {exc}", file=sys.stderr) - raise SystemExit(2) diff --git a/providers/codex/instructions.md b/providers/codex/instructions.md index 6460ab8..07aec93 100644 --- a/providers/codex/instructions.md +++ b/providers/codex/instructions.md @@ -2,8 +2,9 @@ - This installation targets Codex CLI. - Install the shared instructions into `~/.codex/AGENTS.md`. +- Store canonical skills in `~/.agents/skills/`. +- Mirror skills into `~/.codex/skills/` for Codex CLI reliability. - Copy skills rather than symlinking them. -- Provider-specific Codex skills may be installed from `providers/codex/skills/`. - Auto-approval setup uses native `approval_policy = "on-request"` with `sandbox_mode = "workspace-write"` and a compact Codex `.rules` file for destructive shell prompts. diff --git a/providers/codex/skills/subagent-orchestrator/SKILL.md b/providers/codex/skills/subagent-orchestrator/SKILL.md deleted file mode 100644 index db5ac6a..0000000 --- a/providers/codex/skills/subagent-orchestrator/SKILL.md +++ /dev/null @@ -1,238 +0,0 @@ ---- -name: subagent-orchestrator -description: Teach Codex to delegate bounded coding tasks to other AI CLIs such as Claude Code, Gemini CLI, and Copilot CLI, then review the results fairly, retry on provider limits, and fall back cleanly while Codex remains the orchestrator. ---- - -# Subagent Orchestrator - -Use this skill when Codex should offload bounded work to another AI CLI to save -Codex usage, get a second opinion, or parallelize repo exploration while still -keeping Codex in charge. - -Codex remains the orchestrator. The external model is a worker, not the final -authority. - -Load these files only as needed: - -- `references/provider-ranking.md` -- `references/context-assembly.md` -- `references/claude-code.md` -- `references/gemini-cli.md` -- `references/copilot-cli.md` -- `scripts/provider_state.py` - -## Default Recommendation - -Start with one Codex-specific skill, not a generalized delegate-everyone-to- -everyone system. - -The first-pass provider order is: - -1. Claude Code -2. Gemini CLI -3. Copilot CLI - -That ordering is a practical recommendation for Codex subagent work, not a -universal benchmark. Load `references/provider-ranking.md` when you need the -research notes, citations, or task-routing detail. - -## Good Delegation Targets - -- repo exploration -- architecture summary -- code review -- draft implementation plan -- test ideas -- second-opinion analysis - -Avoid delegating the final decision, final merge judgment, or open-ended product -ownership. Codex should keep those. - -## Default Workflow - -### 1. Bound the task - -Give the subagent a narrow ask with an explicit output shape. Good prompts ask -for a summary, risk list, draft patch plan, or focused review. - -### 2. Enrich the prompt - -Do not pass thin prompts through unchanged. - -Turn something like: - -```text -Review this repository and summarize the main risks. -``` - -into a delegation packet that states: - -- the goal of the task -- what success looks like -- what failure looks like -- how the subagent should report progress while it works -- the exact output shape you want back -- which relevant skills the subagent should use -- which files or directories to read first -- any small file chunks or notes that should be attached inline - -Load `references/context-assembly.md` when building the delegation packet. - -At minimum, enrich the prompt with: - -- task goal: why Codex is delegating this -- success criteria: what would make the answer useful -- failure criteria: what would make the answer unusable -- progress reporting: how the subagent should expose progress during longer work, - for example stdout checkpoints or a `/tmp` log file if the CLI supports it -- domain hints: repo type, framework, vault, spec workflow, or review stance -- relevant skills: for example, use Obsidian-related skills if the repo is an - Obsidian vault -- context to inspect first: `AGENTS.md`, `README.md`, `specs/`, or a short file - list chosen by Codex - -Prefer short curated context over dumping the entire repo. If a few files define -the problem, name those files first. If a file contains one relevant section, -quote or summarize only that chunk instead of pasting the whole file. - -If the repo has a `specs/` directory or spec-linked workflow, tell the -subagent where the relevant spec lives and whether it should read spec files -before touching code. - -### 3. Pick a provider - -Check the current provider state: - -```bash -python3 skills/subagent-orchestrator/scripts/provider_state.py pick -``` - -Use the default routing unless there is a strong reason not to: - -- Claude Code for the best first-pass repo analysis and nuanced code review -- Gemini CLI for low-cost broad scans, long-context reading, and cheap retries -- Copilot CLI when GitHub-native context or existing Copilot budget makes it a - better fit - -### 4. Run the provider non-interactively - -Use the provider reference file for the exact flags. Prefer read-only or -planning modes for exploration work. Only use the provider's yolo or dangerous -mode inside a trusted sandbox and only when the task truly needs edits or shell -actions. - -### 5. Review the result fairly - -Start from the assumption that the subagent may have done good work. - -- Accept it when it is solid. `LGTM` is allowed. -- Lightly edit when the answer is mostly right. -- Retry when the prompt was underspecified. -- Reject or fall back only when the result is materially wrong, risky, or the - provider is unavailable. - -Do not be pedantic for its own sake. Be critical only where it changes the -outcome. - -### 6. Handle provider failures - -If the CLI indicates rate limits, usage caps, temporary overload, or "try again -later", cool that provider down and pick another one. - -Typical cooldown flow: - -```bash -python3 skills/subagent-orchestrator/scripts/provider_state.py mark-failure \ - --provider claude-code \ - --reason "rate limit exceeded" \ - --cooldown-minutes 30 - -python3 skills/subagent-orchestrator/scripts/provider_state.py pick -``` - -If the failure is auth, bad config, or a missing executable, do not treat that -as a transient provider outage. Surface the setup issue instead. - -### 7. Record the verdict - -When Codex reports back, include: - -- provider and model used -- delegated task -- verdict: accepted, edited, retried, or rejected -- any follow-up action - -## Prompt-Building Rules - -When preparing a subagent prompt, Codex should gather and pass only the context -that materially improves the result. - -- Always mention the task goal explicitly. -- Always define what success looks like and what failure looks like. -- Always tell the subagent how to report progress during longer work. Prefer a - simple mechanism such as periodic stdout updates or a `/tmp` log file. -- Name the relevant skills the subagent should use when the repo has a known - domain. Example: use obsidian-related skills when working in an Obsidian - vault. -- Point the subagent to the files that matter first. Usually that means a short - ordered list, not a giant dump. -- If `AGENTS.md`, `README.md`, or `specs/` are relevant, say so directly. -- Attach short chunks only when they remove ambiguity. Prefer 10-40 important - lines over whole-file paste. - -Useful context sources: - -- repo instructions such as `AGENTS.md` -- `README.md` -- relevant `specs/` files -- files named by the user -- a short handpicked list of code files that define the task - -If the task is "review this repo," Codex should still state what kind of review -it wants: correctness risks, architecture risks, code quality issues, test -gaps, or implementation plan. "Review" without a frame is too vague. - -## Delegation Packet Shape - -Use a shape like this: - -```text -Goal: -Success looks like: -Failure looks like: -Progress reporting: -Return format: -Relevant skills or domain hints: -Files to read first: -Attached context: -Task: -``` - -Codex can keep the packet brief, but it should not omit those fields when the -task would benefit from them. - -## Failure Heuristics - -Treat these as likely transient and eligible for cooldown: - -- `rate limit` -- `usage limit` -- `quota` -- `overloaded` -- `capacity` -- `try again later` -- `temporarily unavailable` - -Treat these as setup problems, not cooldown signals: - -- `not authenticated` -- `command not found` -- `invalid api key` -- `permission denied` -- malformed CLI arguments - -## Output Expectations - -The point of delegation is to save Codex effort without lowering standards. -Codex should use the subagent output as input to judgment, not as a substitute -for judgment. diff --git a/providers/codex/skills/subagent-orchestrator/references/claude-code.md b/providers/codex/skills/subagent-orchestrator/references/claude-code.md deleted file mode 100644 index dd9516b..0000000 --- a/providers/codex/skills/subagent-orchestrator/references/claude-code.md +++ /dev/null @@ -1,65 +0,0 @@ -# Claude Code - -Use Claude Code as the first-choice delegate when the task needs the best -overall coding judgment. - -## Default Models - -- Preferred default: `claude-sonnet-4-6` -- Upgrade for harder tasks: `claude-opus-4-6` -- Built-in alias examples from local help: `sonnet`, `opus` - -## Non-Interactive Pattern - -Read-only / planning: - -```bash -claude -p \ - --model claude-sonnet-4-6 \ - --permission-mode plan \ - --output-format json \ - "Review this repository and return the top 10 architecture risks." -``` - -Higher-end fallback for hard analysis: - -```bash -claude -p \ - --model claude-sonnet-4-6 \ - --fallback-model claude-opus-4-6 \ - --permission-mode plan \ - --output-format json \ - "Trace the data flow for authentication and call out likely failure modes." -``` - -## Dangerous Mode Equivalent - -Claude does not present this as "yolo mode." The closest equivalents are: - -- `--dangerously-skip-permissions` -- `--permission-mode bypassPermissions` - -Use these only in trusted sandboxes. - -## Useful Flags - -- `-p`, `--print`: non-interactive mode -- `--output-format json`: structured output for scripting -- `--permission-mode plan`: read-only planning mode -- `--permission-mode acceptEdits`: allow edits without full bypass -- `--add-dir`: expand workspace access -- `--fallback-model`: automatic model fallback in `--print` mode - -## Quirks - -- `--fallback-model` only works with `--print`. -- `-p` skips the workspace trust dialog, so only use it in directories you - trust. -- Claude has strong permission controls, but the "dangerous" flags are truly - broad; keep them rare. - -## Good Delegation Prompts - -- "Summarize the repo structure in 12 bullets and name the files that matter." -- "Review this diff and list only material correctness risks." -- "Draft an implementation plan for this bug fix without editing files." diff --git a/providers/codex/skills/subagent-orchestrator/references/context-assembly.md b/providers/codex/skills/subagent-orchestrator/references/context-assembly.md deleted file mode 100644 index 0dd0bb8..0000000 --- a/providers/codex/skills/subagent-orchestrator/references/context-assembly.md +++ /dev/null @@ -1,185 +0,0 @@ -# Context Assembly - -Use this file when a thin delegation prompt needs to become a useful subagent -packet. - -## Thin prompt - -```text -Review this repository and summarize the main risks. -``` - -That is not enough on its own. It says what to do, but not why, how to judge -the answer, or where to look first. - -## Delegation packet - -Build a prompt with these fields: - -```text -Goal: -Success looks like: -Failure looks like: -Progress reporting: -Return format: -Relevant skills or domain hints: -Files to read first: -Attached context: -Task: -``` - -## What to Fill In - -### Goal - -State why Codex is delegating the task. - -Examples: - -- Find the highest-risk correctness issues before I spend Codex time reviewing details. -- Produce a cheap first-pass repo map so Codex can focus on judgment instead of discovery. - -### Success looks like - -State what a useful answer must contain. - -Examples: - -- A ranked list of the top five risks with file references and short rationale. -- A concise architecture summary that names the modules worth deeper review. - -### Failure looks like - -State what would make the result unusable. - -Examples: - -- Generic observations without file references. -- Style-only feedback when Codex asked for correctness or architecture risks. - -### Return format - -Keep it strict when you want easy review. - -Examples: - -- Five bullets, each with `risk`, `why`, and `files`. -- Markdown sections: `Summary`, `Risks`, `Open questions`. - -### Progress reporting - -Tell the subagent how you want to observe progress while it works. - -Examples: - -- Print a short stdout update after each major step so Codex can tell the work - is moving. -- Append checkpoints to `/tmp/subagent-progress.log` if the CLI and environment - make that practical. -- If the task is short, say that no intermediate progress updates are needed. - -### Relevant skills or domain hints - -Tell the subagent which domain lens to use. - -Examples: - -- If this is an Obsidian vault, use obsidian-related skills and pay attention to wikilinks, frontmatter, and vault conventions. -- If the repo uses specs, read the relevant `specs/` files before proposing changes. -- If the task is Python-heavy, use the repo's Python conventions. - -### Files to read first - -Give an ordered short list. - -Good candidates: - -- `AGENTS.md` -- `README.md` -- `specs//AGENTS.md` -- `specs//design.md` -- the specific source files that define the behavior under review - -### Attached context - -Attach only the chunks that remove ambiguity. - -Good attachments: - -- the relevant paragraph from `AGENTS.md` -- the exact acceptance criteria from a spec -- a short excerpt from a configuration file - -Avoid pasting long files when a path plus a one-line instruction is enough. - -## Example: Repo Risk Review - -```text -Goal: -Give Codex a cheap first-pass map of the highest-risk architecture and correctness issues in this repo. - -Success looks like: -Return the top 5 material risks, each with a file path, why it matters, and what Codex should inspect next. - -Failure looks like: -Generic repo summary, style nits, or risks without file references. - -Progress reporting: -Print a brief stdout update after reading repo instructions and after finishing the ranked risk list. - -Return format: -Markdown with sections: Summary, Top Risks, Follow-up Files. - -Relevant skills or domain hints: -Read repo instructions first. If there is a spec for the current feature, use the spec workflow and read those files before judging implementation risk. - -Files to read first: -- AGENTS.md -- README.md -- specs/current-feature/AGENTS.md -- specs/current-feature/design.md -- src/auth.py -- src/session.py - -Attached context: -- Current task: review correctness and architecture risk, not style. -- Spec note: feature is still in implementation phase. - -Task: -Review this repository and summarize the main risks. -``` - -## Example: Obsidian Vault Task - -```text -Goal: -Help Codex quickly assess whether this Obsidian vault automation will break links or metadata. - -Success looks like: -Call out link, frontmatter, template, or base-view risks with concrete note paths. - -Failure looks like: -Generic markdown advice that ignores Obsidian-specific behavior. - -Progress reporting: -Append note-path checkpoints to `/tmp/subagent-progress.log` or print equivalent stdout updates if file logging is awkward. - -Return format: -Bullet list of risks plus a short verdict. - -Relevant skills or domain hints: -Use obsidian-related skills. Pay attention to wikilinks, embeds, Bases files, and vault conventions. - -Files to read first: -- AGENTS.md -- README.md -- vault/Templates/Daily Note.md -- vault/Bases/Projects.base -- scripts/sync_notes.py - -Attached context: -- This repo is an Obsidian vault. Link integrity matters more than generic markdown style. - -Task: -Review this repository and summarize the main risks. -``` diff --git a/providers/codex/skills/subagent-orchestrator/references/copilot-cli.md b/providers/codex/skills/subagent-orchestrator/references/copilot-cli.md deleted file mode 100644 index c6f3ea7..0000000 --- a/providers/codex/skills/subagent-orchestrator/references/copilot-cli.md +++ /dev/null @@ -1,75 +0,0 @@ -# Copilot CLI - -Use Copilot CLI when GitHub-native context or existing Copilot budget makes it a -natural delegate, not as the default first pick for cheap subagent work. - -## Default Models - -Strong models exposed by current local help include: - -- `claude-sonnet-4.6` -- `claude-opus-4.6` -- `gpt-5.4` -- `gpt-5.4-mini` - -Practical default: - -- Preferred default: `claude-sonnet-4.6` -- Cheap fallback: `gpt-5.4` (high or medium) - -## Non-Interactive Pattern - -Read-only-ish analysis in the current repo: - -```bash -copilot -p "Review this repository and summarize the main risks." \ - --model claude-sonnet-4.6 \ - --output-format json \ - --allow-all-tools \ - --no-ask-user -``` - -If you want the fully permissive equivalent: - -```bash -copilot -p "Review this repository and summarize the main risks." \ - --model claude-sonnet-4.6 \ - --output-format json \ - --allow-all -``` - -## Yolo Mode - -Copilot has explicit permissive shortcuts: - -- `--allow-all` -- `--yolo` - -Both map to: - -- `--allow-all-tools` -- `--allow-all-paths` -- `--allow-all-urls` - -## Useful Flags - -- `-p`, `--prompt`: non-interactive mode -- `--model`: explicit model selection -- `--output-format json`: JSONL output -- `--allow-all-tools`: required for smooth non-interactive tool use -- `--no-ask-user`: keep the agent autonomous -- `--add-dir`: expand path access - -## Quirks - -- Permission controls are explicit and a bit more verbose than Gemini's. -- Premium request usage can make Copilot a less predictable default "money - saver" than Gemini's published free tier or a direct Claude subscription. -- Copilot can still be excellent when the surrounding workflow is already - GitHub-centric. - -## Good Delegation Prompts - -- "Review this repository with GitHub workflow assumptions in mind." -- "Summarize the likely CI and PR risks in this codebase." -- "Give me a second opinion using Copilot's current model stack." diff --git a/providers/codex/skills/subagent-orchestrator/references/gemini-cli.md b/providers/codex/skills/subagent-orchestrator/references/gemini-cli.md deleted file mode 100644 index f3e8991..0000000 --- a/providers/codex/skills/subagent-orchestrator/references/gemini-cli.md +++ /dev/null @@ -1,66 +0,0 @@ -# Gemini CLI - -Use Gemini CLI as the best value delegate and the default fallback when Claude -is capped or when you want broad, cheap exploration. - -## Default Models - -- Preferred default: `gemini-2.5-pro` -- Cheaper fast pass: `gemini-2.5-flash` - -## Non-Interactive Pattern - -Read-only / planning: - -```bash -gemini -p \ - "Review this repository and summarize the architecture in 10 bullets." \ - -m gemini-2.5-pro \ - --approval-mode plan \ - --output-format json -``` - -Cheap wide scan: - -```bash -gemini -p \ - "Scan this repository and list the most suspicious files for auth bugs." \ - -m gemini-2.5-flash \ - --approval-mode plan \ - --output-format json -``` - -## Yolo Mode - -Gemini has an explicit yolo mode: - -- `--yolo` -- `--approval-mode yolo` - -It also supports: - -- `--approval-mode auto_edit` -- `--approval-mode plan` - -Gemini's docs note that sandboxing is enabled by default when using yolo mode. - -## Useful Flags - -- `-p`, `--prompt`: non-interactive mode -- `-m`, `--model`: explicit model selection -- `--output-format json`: structured output -- `--approval-mode plan`: read-only planning mode -- `--include-directories`: expand workspace scope - -## Quirks - -- Google login is the cleanest path for generous default usage, but API key mode - is the clearest path when you need strict model control. -- Gemini is especially attractive for large-context reads because the official - docs emphasize Gemini 2.5 Pro with a 1M-token context window. - -## Good Delegation Prompts - -- "Read the whole repo and group modules by responsibility." -- "Give me a low-cost second opinion on whether this refactor is safe." -- "List likely bug clusters without proposing a patch yet." diff --git a/providers/codex/skills/subagent-orchestrator/references/provider-ranking.md b/providers/codex/skills/subagent-orchestrator/references/provider-ranking.md deleted file mode 100644 index 48dd63b..0000000 --- a/providers/codex/skills/subagent-orchestrator/references/provider-ranking.md +++ /dev/null @@ -1,119 +0,0 @@ -# Provider Ranking - -Research date: April 3, 2026. - -This ordering is a recommendation for Codex-as-orchestrator workflows. It is -based on current official docs, local CLI help in this environment, and -practical tradeoffs across quality, CLI ergonomics, cost/value, speed, and -session-limit behavior. - -## Ranking - -### 1. Claude Code - -Best default choice for high-value delegated coding work. - -Why it ranks first: - -- Strongest default for deep repo exploration, architecture reading, and - nuanced code review. -- Mature non-interactive CLI flow with explicit `--print`, - `--output-format json`, `--permission-mode`, and `--fallback-model`. -- Model naming is clear for subagent use: `claude-sonnet-4-6` and - `claude-opus-4-6` are straightforward targets. - -Best use: - -- repo exploration -- architecture review -- code review -- difficult implementation planning - -Watch-outs: - -- usage limits and temporary overload still happen -- dangerous-mode equivalents should stay sandbox-only - -### 2. Gemini CLI - -Best value fallback and strong default for cheap, broad, long-context work. - -Why it ranks second: - -- Official docs emphasize Gemini 2.5 Pro, a 1M-token context window, and a free - tier with 60 requests per minute and 1,000 requests per day for personal - Google accounts. -- Good non-interactive scripting support with `-p`, JSON output, explicit model - selection, and a true plan mode. -- Excellent fit for wide repository summaries, docs digestion, and low-cost - retries when Claude is capped. - -Best use: - -- long-context reading -- repo scans -- cheap retries -- draft summaries -- second-opinion analysis - -Watch-outs: - -- model control depends on auth method; API key mode is the clearest path when - you need exact model selection -- quality is strong, but I would still prefer Claude first for the most subtle - review tasks - -### 3. Copilot CLI - -Useful situational delegate, not my default cheap subagent. - -Why it ranks third: - -- It exposes strong models and good CLI controls, but its best fit is often - "already inside a GitHub-centric workflow" rather than "lowest-friction cheap - delegate from Codex." -- Official docs and local help show strong model optionality, but the premium - request and entitlement story is more variable than Gemini's published free - tier or a direct Claude subscription. -- Best when GitHub context, MCP tooling, or existing Copilot spend makes it the - natural choice. - -Best use: - -- GitHub-heavy workflows -- cases where Copilot is already the paid path you are using -- situations where you want model choice inside one GitHub-managed tool - -Watch-outs: - -- premium-request budgeting can make it less predictable as the default - "savings" delegate -- permission setup for non-interactive runs is more explicit than Gemini or - Claude - -## Task Routing - -- Use Claude Code first for difficult reasoning, repo review, and subtle code - judgment. -- Use Gemini CLI first when you want low-cost breadth, very large context, or a - cheap retry path. -- Use Copilot CLI first only when GitHub-native context or an existing Copilot - budget is the main constraint. - -## Sources - -- Anthropic Claude Code settings: -- Anthropic Claude Code security: -- Gemini CLI overview: -- Gemini CLI configuration: -- GitHub Copilot CLI docs hub: -- GitHub Copilot CLI command reference: -- GitHub Copilot premium request management: -- GitHub Agentic Workflows engine reference: - -## Notes - -- The ordering above is an inference from the sources, not a vendor-published - benchmark. -- Local CLI help on April 3, 2026 matched the flags documented for the - installed `claude`, `gemini`, and `copilot` binaries in this environment. diff --git a/providers/codex/skills/subagent-orchestrator/scripts/provider_state.py b/providers/codex/skills/subagent-orchestrator/scripts/provider_state.py deleted file mode 100755 index 8dbdf51..0000000 --- a/providers/codex/skills/subagent-orchestrator/scripts/provider_state.py +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -from argparse import ArgumentParser -from copy import deepcopy -from datetime import datetime, timedelta, timezone -from pathlib import Path -import json -import os -import sys - - -DEFAULT_PROVIDERS = [ - { - "provider": "claude-code", - "rank": 1, - "preferred_model": "claude-sonnet-4-6", - "fallback_model": "claude-opus-4-6", - "is_active": True, - "last_failure_at": None, - "cooldown_until": None, - "failure_reason": None, - }, - { - "provider": "gemini-cli", - "rank": 2, - "preferred_model": "gemini-2.5-pro", - "fallback_model": "gemini-2.5-flash", - "is_active": True, - "last_failure_at": None, - "cooldown_until": None, - "failure_reason": None, - }, - { - "provider": "copilot-cli", - "rank": 3, - "preferred_model": "claude-sonnet-4.6", - "fallback_model": "gpt-5.4-mini", - "is_active": True, - "last_failure_at": None, - "cooldown_until": None, - "failure_reason": None, - }, -] - - -def utc_now() -> datetime: - return datetime.now(timezone.utc) - - -def isoformat(value: datetime | None) -> str | None: - if value is None: - return None - return value.isoformat().replace("+00:00", "Z") - - -def parse_timestamp(value: str | None) -> datetime | None: - if not value: - return None - return datetime.fromisoformat(value.replace("Z", "+00:00")) - - -def default_state_path() -> Path: - override = os.environ.get("CODEX_SUBAGENT_STATE_PATH") - if override: - return Path(override).expanduser() - return Path.home() / ".codex" / "subagent-orchestrator" / "provider-state.json" - - -def build_default_state() -> dict: - return {"providers": deepcopy(DEFAULT_PROVIDERS)} - - -def load_state(path: Path) -> dict: - if not path.exists(): - return reconcile_state(build_default_state()) - - payload = json.loads(path.read_text()) - if not isinstance(payload, dict) or not isinstance(payload.get("providers"), list): - raise ValueError(f"invalid provider state file: {path}") - - return reconcile_state(payload) - - -def reconcile_state(payload: dict) -> dict: - by_name = { - item["provider"]: item - for item in payload.get("providers", []) - if isinstance(item, dict) and "provider" in item - } - now = utc_now() - providers = [] - - for template in DEFAULT_PROVIDERS: - item = deepcopy(template) - item.update(by_name.get(template["provider"], {})) - cooldown_until = parse_timestamp(item.get("cooldown_until")) - - if cooldown_until and cooldown_until <= now: - item["is_active"] = True - item["cooldown_until"] = None - item["failure_reason"] = None - elif cooldown_until: - item["is_active"] = False - item["cooldown_until"] = isoformat(cooldown_until) - else: - item["cooldown_until"] = None - item["is_active"] = bool(item.get("is_active", True)) - - last_failure_at = parse_timestamp(item.get("last_failure_at")) - item["last_failure_at"] = isoformat(last_failure_at) - providers.append(item) - - providers.sort(key=lambda item: item["rank"]) - return {"providers": providers} - - -def save_state(path: Path, payload: dict) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(payload, indent=2, sort_keys=False) + "\n") - - -def get_provider(payload: dict, provider: str) -> dict: - for item in payload["providers"]: - if item["provider"] == provider: - return item - known = ", ".join(entry["provider"] for entry in payload["providers"]) - raise ValueError(f"unknown provider {provider!r}; expected one of: {known}") - - -def cmd_status(args) -> int: - state_path = Path(args.state).expanduser() - payload = load_state(state_path) - save_state(state_path, payload) - print(json.dumps(payload, indent=2)) - return 0 - - -def cmd_rank(args) -> int: - return cmd_status(args) - - -def cmd_pick(args) -> int: - state_path = Path(args.state).expanduser() - payload = load_state(state_path) - save_state(state_path, payload) - - for item in payload["providers"]: - if item["is_active"]: - print(json.dumps(item, indent=2)) - return 0 - - next_provider = min( - payload["providers"], - key=lambda item: item["cooldown_until"] or "9999-12-31T00:00:00Z", - ) - print( - json.dumps( - { - "provider": None, - "reason": "no_active_provider", - "next_provider": next_provider["provider"], - "cooldown_until": next_provider["cooldown_until"], - }, - indent=2, - ) - ) - return 1 - - -def cmd_mark_failure(args) -> int: - state_path = Path(args.state).expanduser() - payload = load_state(state_path) - provider = get_provider(payload, args.provider) - now = utc_now() - provider["is_active"] = False - provider["last_failure_at"] = isoformat(now) - provider["cooldown_until"] = isoformat(now + timedelta(minutes=args.cooldown_minutes)) - provider["failure_reason"] = args.reason - save_state(state_path, payload) - print(json.dumps(provider, indent=2)) - return 0 - - -def cmd_mark_success(args) -> int: - state_path = Path(args.state).expanduser() - payload = load_state(state_path) - provider = get_provider(payload, args.provider) - provider["is_active"] = True - provider["cooldown_until"] = None - provider["failure_reason"] = None - save_state(state_path, payload) - print(json.dumps(provider, indent=2)) - return 0 - - -def build_parser() -> ArgumentParser: - default_state = str(default_state_path()) - parser = ArgumentParser( - description="Track Codex subagent provider ranking and temporary cooldowns." - ) - parser.add_argument( - "--state", - default=default_state, - help="Path to the provider state JSON file.", - ) - - subparsers = parser.add_subparsers(dest="command", required=True) - - status_parser = subparsers.add_parser("status", help="Show provider state.") - status_parser.add_argument("--state", default=default_state) - status_parser.set_defaults(func=cmd_status) - - rank_parser = subparsers.add_parser("rank", help="Show ranked provider state.") - rank_parser.add_argument("--state", default=default_state) - rank_parser.set_defaults(func=cmd_rank) - - pick_parser = subparsers.add_parser("pick", help="Pick the best active provider.") - pick_parser.add_argument("--state", default=default_state) - pick_parser.set_defaults(func=cmd_pick) - - failure_parser = subparsers.add_parser( - "mark-failure", help="Cool down a provider after a transient failure." - ) - failure_parser.add_argument("--state", default=default_state) - failure_parser.add_argument("--provider", required=True) - failure_parser.add_argument("--reason", required=True) - failure_parser.add_argument("--cooldown-minutes", type=int, default=30) - failure_parser.set_defaults(func=cmd_mark_failure) - - success_parser = subparsers.add_parser( - "mark-success", help="Reactivate a provider after a successful run." - ) - success_parser.add_argument("--state", default=default_state) - success_parser.add_argument("--provider", required=True) - success_parser.set_defaults(func=cmd_mark_success) - - return parser - - -def main() -> int: - parser = build_parser() - args = parser.parse_args() - return args.func(args) - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except ValueError as exc: - print(f"ERROR: {exc}", file=sys.stderr) - raise SystemExit(2) diff --git a/providers/copilot/instructions.md b/providers/copilot/instructions.md index 3ab9726..27c38b1 100644 --- a/providers/copilot/instructions.md +++ b/providers/copilot/instructions.md @@ -2,7 +2,8 @@ - This installation targets GitHub Copilot CLI. - Install the shared instructions into `~/.copilot/copilot-instructions.md`. -- Copy skills into `~/.copilot/skills/`. +- Store canonical skills in `~/.agents/skills/`; Copilot reads this personal + skills directory directly. - Keep Copilot-specific instructions short and layered on top of the shared workflow. - Auto-approval setup installs `~/.copilot/bin/copilot-auto`, which uses diff --git a/providers/gemini/instructions.md b/providers/gemini/instructions.md index 1966bf3..e65b8e5 100644 --- a/providers/gemini/instructions.md +++ b/providers/gemini/instructions.md @@ -2,7 +2,8 @@ - This installation targets Gemini CLI. - Install the shared instructions into `~/.gemini/GEMINI.md`. -- Copy skills into `~/.gemini/skills/`. +- Store canonical skills in `~/.agents/skills/`. +- Mirror skills into `~/.gemini/skills/` for Gemini CLI reliability. - Keep Gemini-specific instructions short and layered on top of the shared workflow. - Auto-approval setup uses `auto_edit` for direct `gemini` launches and diff --git a/scripts/setup-agent.sh b/scripts/setup-agent.sh index 6ff1df3..e15b331 100755 --- a/scripts/setup-agent.sh +++ b/scripts/setup-agent.sh @@ -3,6 +3,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +CANONICAL_SKILLS_DIR="$HOME/.agents/skills" GREEN='\033[0;32m' CYAN='\033[0;36m' @@ -60,6 +61,12 @@ copy_core_skills() { done } +provider_uses_canonical_skills() { + local provider="$1" + + [[ "$provider" == "copilot" ]] +} + apply_auto_approval_config() { local provider="$1" @@ -79,8 +86,10 @@ usage() { cat <<'EOF' Usage: ./scripts/setup-agent.sh [codex|claude|gemini|copilot|all|auto] -Installs shared core assets plus provider-specific overlays into the selected -agent home directory. With no argument, auto-detect installed providers. +Installs shared instructions and skills for the selected agent. Skills are +installed canonically into ~/.agents/skills and mirrored only for providers that +require provider-local skill directories. With no argument, auto-detect +installed providers. EOF } @@ -133,10 +142,9 @@ install_provider() { local provider_dir="$ROOT_DIR/providers/$provider" local instruction_path="$home_dir/$instruction_name" local skills_dir="$home_dir/skills" - local provider_skills_dir="$provider_dir/skills" local shared_skill_count="" - local provider_skill_count="" local skill_note="" + local skill_destination="" if [[ ! -f "$provider_dir/instructions.md" ]]; then echo "Error: missing provider instructions at $provider_dir/instructions.md" @@ -144,13 +152,14 @@ install_provider() { fi shared_skill_count="$(skill_count "$ROOT_DIR/core/skills" default)" - provider_skill_count="$(skill_count "$provider_skills_dir")" skill_note="$shared_skill_count shared" if [[ "$include_llm_wiki" == "yes" ]]; then skill_note="$skill_note + $(skill_count "$ROOT_DIR/core/skills" llm-wiki) llm-wiki" fi - if [[ "$provider_skill_count" != "0" ]]; then - skill_note="$skill_note + $provider_skill_count provider-specific" + if provider_uses_canonical_skills "$provider"; then + skill_destination="$CANONICAL_SKILLS_DIR" + else + skill_destination="$CANONICAL_SKILLS_DIR + $skills_dir mirror" fi header "Agent Setup" @@ -158,6 +167,7 @@ install_provider() { field "Destination:" "$home_dir" field "Instructions:" "$instruction_name" field "Skills:" "$skill_note" + field "Skill path:" "$skill_destination" mkdir -p "$home_dir" cat \ @@ -167,17 +177,24 @@ install_provider() { > "$instruction_path" log "Wrote $instruction_path" - rm -rf "$skills_dir" - mkdir -p "$skills_dir" - copy_core_skills "$skills_dir" "$include_llm_wiki" - log "Copied $shared_skill_count shared skills to $skills_dir" + rm -rf "$CANONICAL_SKILLS_DIR" + mkdir -p "$CANONICAL_SKILLS_DIR" + copy_core_skills "$CANONICAL_SKILLS_DIR" "$include_llm_wiki" + log "Copied $shared_skill_count shared skills to $CANONICAL_SKILLS_DIR" if [[ "$include_llm_wiki" == "yes" ]]; then - log "Copied $(skill_count "$ROOT_DIR/core/skills" llm-wiki) llm-wiki skills to $skills_dir" + log "Copied $(skill_count "$ROOT_DIR/core/skills" llm-wiki) llm-wiki skills to $CANONICAL_SKILLS_DIR" fi - if [[ "$provider_skill_count" != "0" ]]; then - cp -R "$provider_skills_dir/." "$skills_dir/" - log "Copied $provider_skill_count provider-specific skills to $skills_dir" + if provider_uses_canonical_skills "$provider"; then + if [[ -d "$skills_dir" ]]; then + rm -rf "$skills_dir" + log "Removed provider-local skills mirror at $skills_dir" + fi + else + rm -rf "$skills_dir" + mkdir -p "$skills_dir" + cp -R "$CANONICAL_SKILLS_DIR/." "$skills_dir/" + log "Mirrored skills to $skills_dir" fi apply_auto_approval_config "$provider"