diff --git a/README.md b/README.md index fdff77f..c6572d2 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ pytest-codingagents gives you a complete **test→optimize→test loop** for Git 4. **A/B confirm** — use `ab_run` to prove the change actually helps 5. **Ship it** — you now have evidence, not vibes -Currently supports **GitHub Copilot** via [copilot-sdk](https://www.npmjs.com/package/github-copilot-sdk). More agents (Claude Code, etc.) coming soon. +Currently supports **GitHub Copilot** via [copilot-sdk](https://www.npmjs.com/package/github-copilot-sdk) with **IDE personas** for VS Code, Claude Code, and Copilot CLI environments. ```python from pytest_codingagents import CopilotAgent, optimize_instruction diff --git a/docs/how-to/copilot-config.md b/docs/how-to/copilot-config.md index 0ed74ae..52f9d5f 100644 --- a/docs/how-to/copilot-config.md +++ b/docs/how-to/copilot-config.md @@ -9,7 +9,9 @@ test fixture project, a shared team config repo, or anything else. | Source | Path (relative to the root you point at) | Maps to | |--------|------------------------------------------|---------| | Instructions | `.github/copilot-instructions.md` | `instructions` | -| Custom agents | `.github/agents/*.agent.md` | `custom_agents` | +| Custom agents | `.github/agents/**/*.agent.md` (recursive) | `custom_agents` | + +Agent files are discovered recursively — agents in `subagents/` subdirectories (e.g. `.github/agents/hve-core/subagents/`) are included automatically. ## Basic usage @@ -102,6 +104,7 @@ The Markdown body becomes the agent's prompt. ## See also +- [IDE Personas Guide](ide-personas.md) — Simulate VS Code, Claude Code, or Copilot CLI environments - [A/B Testing Guide](ab-testing.md) - [GitHub Copilot custom agents docs](https://docs.github.com/en/copilot/how-tos/copilot-cli/customize-copilot/create-custom-agents-for-cli) - [Custom agents configuration reference](https://docs.github.com/en/copilot/reference/custom-agents-configuration) diff --git a/docs/how-to/ide-personas.md b/docs/how-to/ide-personas.md new file mode 100644 index 0000000..6185bef --- /dev/null +++ b/docs/how-to/ide-personas.md @@ -0,0 +1,108 @@ +# IDE Personas + +Agents written for VS Code, Claude Code, or the Copilot CLI each expect a +different native tool set. A `Persona` tells `pytest-codingagents` which +runtime environment to simulate so your tests run the agent the same way +the IDE would. + +## The problem + +An agent like `rpi-agent` is written for VS Code, where `runSubagent` is a +native tool. In the Copilot SDK headless mode `runSubagent` does not exist, +so the agent silently falls back to direct implementation — the RPI pipeline +never fires, and the test proves nothing. + +A persona solves this by: + +1. **Injecting polyfill tools** — e.g. a Python-side `runSubagent` that + dispatches registered custom agents as nested SDK runs. +2. **Auto-loading custom instructions** — VS Code and Copilot CLI read + `.github/copilot-instructions.md`; Claude Code reads `CLAUDE.md`. The + persona does the same, prepending the file to the session's system + message when `working_directory` is set. +3. **Setting IDE context** — adds a system-message fragment so the model + knows which environment it is in. + +## Built-in personas + +| Persona | Auto-loaded file | Polyfilled tools | Use for | +|---|---|---|---| +| `VSCodePersona` *(default)* | `.github/copilot-instructions.md` | `runSubagent` | VS Code Copilot agents | +| `CopilotCLIPersona` | `.github/copilot-instructions.md` | none — `task` + `skill` are native | Copilot terminal agents | +| `ClaudeCodePersona` | `CLAUDE.md` | `task`-dispatch | Claude Code agents | +| `HeadlessPersona` | nothing | none | Raw SDK baseline | + +## Usage + +```python +from pytest_codingagents import CopilotAgent, VSCodePersona, CopilotCLIPersona, ClaudeCodePersona, HeadlessPersona + +# VS Code agent — auto-loads .github/copilot-instructions.md, polyfills runSubagent +agent = CopilotAgent( + persona=VSCodePersona(), + working_directory=str(workspace), + custom_agents=my_agents, +) + +# Default — VSCodePersona is used automatically +agent = CopilotAgent(custom_agents=my_agents) + +# Copilot CLI — same instructions file; task+skill already native, no polyfill needed +agent = CopilotAgent(persona=CopilotCLIPersona(), working_directory=str(workspace)) + +# Claude Code — loads CLAUDE.md, polyfills task-dispatch +agent = CopilotAgent( + persona=ClaudeCodePersona(), + working_directory=str(workspace), + custom_agents=my_agents, +) + +# Headless baseline — no IDE context, no file loaded, no polyfills +agent = CopilotAgent(persona=HeadlessPersona()) +``` + +## Custom instructions loading + +Custom instruction loading is **automatic and additive**: + +- Fires only when `agent.working_directory` is set +- Fires only when the target file exists in that directory +- Prepends the file content to the session system message (before any + `instructions` you set on the agent) +- If the file is absent, the persona works exactly as without it + +This means the same test works against a workspace that has +`.github/copilot-instructions.md` and one that does not — the persona +adapts silently. + +## `runSubagent` polyfill + +`VSCodePersona` injects `runSubagent` as a Python-side tool when +`agent.custom_agents` is non-empty. The tool dispatches the named agent +as a nested `run_copilot` call, so the model's sub-agent invocations +produce real results — not stub responses. + +The polyfill is a no-op when `custom_agents` is empty. + +## Extending personas + +Subclass `Persona` and override `apply()`: + +```python +from pytest_codingagents import Persona, CopilotAgent + +class MyPersona(Persona): + def apply(self, agent, session_config, mapper): + # Add your tool polyfills or system message additions here + session_config.setdefault("system_message", {})["content"] = ( + "Custom context. " + + session_config.get("system_message", {}).get("content", "") + ) + +agent = CopilotAgent(persona=MyPersona()) +``` + +## See also + +- [Load from Copilot Config](copilot-config.md) +- [Tool Control](tool-control.md) diff --git a/docs/how-to/index.md b/docs/how-to/index.md index b1840ea..2ad9ce9 100644 --- a/docs/how-to/index.md +++ b/docs/how-to/index.md @@ -6,6 +6,7 @@ Practical guides for common tasks. - [Optimize Instructions](optimize.md) — Use AI to turn test failures into actionable instruction improvements - [Assertions](assertions.md) — File helpers and semantic assertions with `llm_assert` - [Load from Copilot Config](copilot-config.md) — Build a `CopilotAgent` from your real `.github/` config files +- [IDE Personas](ide-personas.md) — Simulate VS Code, Claude Code, or Copilot CLI tool environments - [Skill Testing](skills.md) — Measure the impact of domain knowledge - [MCP Server Testing](mcp-servers.md) — Test that the agent uses your custom tools - [CLI Tool Testing](cli-tools.md) — Verify the agent operates CLI tools correctly diff --git a/docs/reference/api.md b/docs/reference/api.md index 8f16a53..8e1f381 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -15,3 +15,25 @@ ::: pytest_codingagents.InstructionSuggestion options: show_source: false + +## IDE Personas + +::: pytest_codingagents.Persona + options: + show_source: false + +::: pytest_codingagents.VSCodePersona + options: + show_source: false + +::: pytest_codingagents.CopilotCLIPersona + options: + show_source: false + +::: pytest_codingagents.ClaudeCodePersona + options: + show_source: false + +::: pytest_codingagents.HeadlessPersona + options: + show_source: false diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index d103819..32f8f2f 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -4,6 +4,7 @@ | Field | Type | Default | Description | |-------|------|---------|-------------| +| `persona` | `Persona` | `VSCodePersona()` | IDE runtime persona — controls polyfill tools and auto-loads IDE-specific custom instructions. See [IDE Personas](../how-to/ide-personas.md) | | `name` | `str` | `"copilot"` | Agent identifier for reports | | `model` | `str \| None` | `None` | Model to use (e.g., `claude-sonnet-4`) | | `instructions` | `str \| None` | `None` | Instructions for the agent | diff --git a/pyproject.toml b/pyproject.toml index d7f4de9..2e53803 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "pytest-codingagents" -version = "0.2.0" +version = "0.2.1" description = "Pytest plugin for testing real coding agents via their SDK" readme = "README.md" license = { text = "MIT" } diff --git a/src/pytest_codingagents/__init__.py b/src/pytest_codingagents/__init__.py index 182169f..828c111 100644 --- a/src/pytest_codingagents/__init__.py +++ b/src/pytest_codingagents/__init__.py @@ -8,12 +8,24 @@ InstructionSuggestion, optimize_instruction, ) +from pytest_codingagents.copilot.personas import ( + ClaudeCodePersona, + CopilotCLIPersona, + HeadlessPersona, + Persona, + VSCodePersona, +) from pytest_codingagents.copilot.result import CopilotResult __all__ = [ "CopilotAgent", "CopilotResult", "InstructionSuggestion", + "ClaudeCodePersona", + "CopilotCLIPersona", + "HeadlessPersona", + "Persona", + "VSCodePersona", "load_custom_agent", "load_custom_agents", "optimize_instruction", diff --git a/src/pytest_codingagents/copilot/agent.py b/src/pytest_codingagents/copilot/agent.py index 9521c24..306c8c9 100644 --- a/src/pytest_codingagents/copilot/agent.py +++ b/src/pytest_codingagents/copilot/agent.py @@ -4,10 +4,13 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import yaml +if TYPE_CHECKING: + from pytest_codingagents.copilot.personas import Persona + def _parse_agent_file(path: Path) -> dict[str, Any]: """Parse a ``.agent.md`` file into a ``CustomAgentConfig`` dict. @@ -130,6 +133,12 @@ class CopilotAgent: # SDK passthrough for unmapped fields extra_config: dict[str, Any] = field(default_factory=dict) + # IDE persona — controls which polyfill tools are injected to simulate + # the target runtime environment (VS Code, Claude Code, Copilot CLI, etc.) + # VSCodePersona is the default: it polyfills runSubagent when custom_agents + # are present, matching VS Code's native behaviour. + persona: "Persona" = field(default_factory=lambda: _default_persona()) + def build_session_config(self) -> dict[str, Any]: """Build a SessionConfig dict for the Copilot SDK. @@ -243,11 +252,11 @@ def from_copilot_config( if instructions_file.exists(): instructions = instructions_file.read_text(encoding="utf-8").strip() or None - # Load custom agents + # Load custom agents — recursive so subagents/ subdirectories are included agents: list[dict[str, Any]] = [] agents_dir = github_dir / "agents" if agents_dir.exists(): - for agent_file in sorted(agents_dir.glob("*.agent.md")): + for agent_file in sorted(agents_dir.rglob("*.agent.md")): agents.append(_parse_agent_file(agent_file)) config: dict[str, Any] = { @@ -256,3 +265,14 @@ def from_copilot_config( } config.update(overrides) return cls(**config) + + +def _default_persona() -> "Persona": + """Return the default persona (VSCodePersona). + + Defined as a function to avoid a circular-import at module level: + ``personas.py`` imports ``agent.py``, so we defer the import. + """ + from pytest_codingagents.copilot.personas import VSCodePersona # noqa: PLC0415 + + return VSCodePersona() diff --git a/src/pytest_codingagents/copilot/events.py b/src/pytest_codingagents/copilot/events.py index 9298c52..514d381 100644 --- a/src/pytest_codingagents/copilot/events.py +++ b/src/pytest_codingagents/copilot/events.py @@ -274,6 +274,31 @@ def _handle_tool_execution_complete(self, event: SessionEvent) -> None: result_text = tc.result if tc else str(result_data) self._turns.append(Turn(role="tool", content=f"[{tool_name}] {result_text or ''}")) + # ── Subagent recording (used by runSubagent tool handler) ── + + def record_subagent_start(self, name: str) -> None: + """Record a subagent invocation dispatched via the runSubagent tool.""" + self._subagent_start_times[name] = time.monotonic() + self._subagents.append(SubagentInvocation(name=name, status="started")) + + def record_subagent_complete(self, name: str) -> None: + """Mark a previously started subagent invocation as completed.""" + start = self._subagent_start_times.pop(name, None) + duration = (time.monotonic() - start) * 1000 if start else None + for sa in self._subagents: + if sa.name == name and sa.status == "started": + sa.status = "completed" + sa.duration_ms = duration + return + + def record_subagent_failed(self, name: str) -> None: + """Mark a previously started subagent invocation as failed.""" + self._subagent_start_times.pop(name, None) + for sa in self._subagents: + if sa.name == name and sa.status == "started": + sa.status = "failed" + return + # ── Subagent events ── def _handle_subagent_selected(self, event: SessionEvent) -> None: diff --git a/src/pytest_codingagents/copilot/personas.py b/src/pytest_codingagents/copilot/personas.py new file mode 100644 index 0000000..501142c --- /dev/null +++ b/src/pytest_codingagents/copilot/personas.py @@ -0,0 +1,440 @@ +"""IDE Personas for pytest-codingagents. + +A ``Persona`` defines the runtime environment in which an agent under test +is expected to run. Each persona ensures the agent has the correct tool set +for its target IDE by injecting polyfill tools and adding a system-message +fragment that sets context. + +Built-in personas +----------------- +``VSCodePersona`` (default) + Simulates the VS Code Copilot extension. Polyfills ``runSubagent`` so + that agents written for VS Code dispatch sub-agents correctly. + +``ClaudeCodePersona`` + Simulates Claude Code. Polyfills a ``task``-dispatch tool (same + mechanism as ``runSubagent``, named ``task`` to match Claude Code's + native API). + +``CopilotCLIPersona`` + Simulates the GitHub Copilot terminal agent. No polyfills are needed — + ``task`` and ``skill`` are already in the SDK's native 16-tool set. + Adds a system-message fragment so the model knows its environment. + +``HeadlessPersona`` + Raw SDK headless mode — no polyfills, no extra system message. Use + when you want to test exactly what the SDK exposes with no IDE context. + +Usage:: + + from pytest_codingagents import CopilotAgent, VSCodePersona, ClaudeCodePersona + + # Explicit — recommended for clarity + agent = CopilotAgent(persona=VSCodePersona(), custom_agents=[...]) + + # Default — VSCodePersona is used automatically + agent = CopilotAgent(custom_agents=[...]) + + # Headless — no IDE context, no polyfills + agent = CopilotAgent(persona=HeadlessPersona()) +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from copilot.types import Tool, ToolInvocation, ToolResult + + from pytest_codingagents.copilot.agent import CopilotAgent + from pytest_codingagents.copilot.events import EventMapper + + +# --------------------------------------------------------------------------- +# Base class +# --------------------------------------------------------------------------- + + +class Persona: + """Base class for IDE runtime personas. + + Override ``apply()`` to inject polyfill tools and system-message + additions that match your target IDE's native tool set. + + The ``apply()`` method is called by the runner *after* + ``agent.build_session_config()`` and *before* the session is created, + so modifications to ``session_config`` take effect immediately. + + Phase-2 extension point: override ``create_client()`` to swap the + underlying SDK backend (e.g. Anthropic SDK for Claude Code). + """ + + def apply( + self, + agent: "CopilotAgent", + session_config: dict[str, Any], + mapper: "EventMapper", + ) -> None: + """Modify *session_config* in-place to match this persona's environment. + + Args: + agent: The ``CopilotAgent`` being executed (read-only). + session_config: The session config dict built from ``agent``. + Mutate this to inject tools, update system_message, etc. + mapper: The ``EventMapper`` for the current run. Pass to + tool handlers that need to record subagent events. + """ + + # ------------------------------------------------------------------ + # Phase-2 extension point (not yet used) + # ------------------------------------------------------------------ + + # async def create_client(self, agent: CopilotAgent) -> CopilotClient: + # """Override to swap the SDK backend for this persona.""" + # from copilot import CopilotClient + # return CopilotClient(...) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}()" + + +# --------------------------------------------------------------------------- +# Headless (raw SDK baseline) +# --------------------------------------------------------------------------- + + +class HeadlessPersona(Persona): + """Raw SDK headless mode — no polyfills, no IDE system message. + + Use this when you want to test exactly what the Copilot SDK exposes + with no runtime context added. This is the minimal baseline. + """ + + +# --------------------------------------------------------------------------- +# GitHub Copilot CLI +# --------------------------------------------------------------------------- + + +class CopilotCLIPersona(Persona): + """GitHub Copilot terminal agent persona. + + ``task`` and ``skill`` are already in the SDK's native 16-tool set, so + no polyfills are needed. This persona only adds a system-message + fragment so the model knows it is running inside the Copilot CLI and + can use ``task`` for sub-task dispatch. + """ + + _SYSTEM_MSG = "You are running inside GitHub Copilot CLI." + _INSTRUCTIONS_FILE = Path(".github") / "copilot-instructions.md" + + def apply( + self, + agent: "CopilotAgent", + session_config: dict[str, Any], + mapper: "EventMapper", + ) -> None: + _prepend_system_message(session_config, self._SYSTEM_MSG) + if agent.working_directory: + custom = _load_custom_instructions_file( + Path(agent.working_directory) / self._INSTRUCTIONS_FILE + ) + if custom: + _prepend_system_message(session_config, custom) + + +# --------------------------------------------------------------------------- +# VS Code +# --------------------------------------------------------------------------- + + +class VSCodePersona(Persona): + """VS Code Copilot extension persona. + + Polyfills ``runSubagent`` so agents written for VS Code (where + ``runSubagent`` is a native tool) can dispatch custom sub-agents + correctly during testing. + + The polyfill is only injected when ``agent.custom_agents`` is non-empty, + so using this persona with a plain agent has no side-effects. + """ + + _SYSTEM_MSG = "You are running inside VS Code." + _INSTRUCTIONS_FILE = Path(".github") / "copilot-instructions.md" + + def apply( + self, + agent: "CopilotAgent", + session_config: dict[str, Any], + mapper: "EventMapper", + ) -> None: + _prepend_system_message(session_config, self._SYSTEM_MSG) + if agent.working_directory: + custom = _load_custom_instructions_file( + Path(agent.working_directory) / self._INSTRUCTIONS_FILE + ) + if custom: + _prepend_system_message(session_config, custom) + if agent.custom_agents: + tool = _make_runsubagent_tool(agent, agent.custom_agents, mapper) + _inject_tool(session_config, tool) + + +# --------------------------------------------------------------------------- +# Claude Code +# --------------------------------------------------------------------------- + + +class ClaudeCodePersona(Persona): + """Claude Code persona. + + Polyfills a ``task``-dispatch tool (same dispatch mechanism as + ``runSubagent``, named ``task`` to match Claude Code's native API) so + agents written for Claude Code can dispatch sub-agents during testing. + + The polyfill is only injected when ``agent.custom_agents`` is non-empty. + """ + + _SYSTEM_MSG = "You are running inside Claude Code." + _INSTRUCTIONS_FILE = Path("CLAUDE.md") + + def apply( + self, + agent: "CopilotAgent", + session_config: dict[str, Any], + mapper: "EventMapper", + ) -> None: + _prepend_system_message(session_config, self._SYSTEM_MSG) + if agent.working_directory: + custom = _load_custom_instructions_file( + Path(agent.working_directory) / self._INSTRUCTIONS_FILE + ) + if custom: + _prepend_system_message(session_config, custom) + if agent.custom_agents: + tool = _make_task_tool(agent, agent.custom_agents, mapper) + _inject_tool(session_config, tool) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _load_custom_instructions_file(file_path: Path) -> str | None: + """Read a custom instructions file and return its content, or None if absent.""" + if file_path.exists(): + content = file_path.read_text(encoding="utf-8").strip() + return content or None + return None + + +def _prepend_system_message(session_config: dict[str, Any], message: str) -> None: + """Prepend *message* to the system_message in *session_config*. + + If no system_message is set, creates one in "append" mode so it is + added to the CLI's built-in system message rather than replacing it. + """ + existing = session_config.get("system_message") or {} + existing_content: str = existing.get("content") or "" + mode: str = existing.get("mode") or "append" + combined = f"{message}\n\n{existing_content}".strip() + session_config["system_message"] = {"mode": mode, "content": combined} + + +def _inject_tool(session_config: dict[str, Any], tool: "Tool") -> None: + """Append *tool* to the tools list in *session_config*.""" + existing: list[Any] = list(session_config.get("tools") or []) + session_config["tools"] = existing + [tool] + + +def _make_runsubagent_tool( + parent_agent: "CopilotAgent", + custom_agents: list[dict[str, Any]], + mapper: "EventMapper", +) -> "Tool": + """Build a ``runSubagent`` polyfill tool for the VS Code persona. + + The Copilot CLI does not natively expose ``runSubagent`` in SDK headless + mode. This factory creates a Python-side ``Tool`` that dispatches + registered custom agents as nested ``run_copilot`` calls. + """ + from copilot.types import Tool, ToolResult + + from pytest_codingagents.copilot.agent import CopilotAgent as _CopilotAgent + from pytest_codingagents.copilot.runner import run_copilot + + agent_map: dict[str, dict[str, Any]] = {a["name"]: a for a in custom_agents} + + async def _handler(invocation: "ToolInvocation") -> "ToolResult": + args: dict[str, Any] = invocation.get("arguments") or {} # type: ignore[assignment] + + agent_name: str | None = ( + args.get("agent_name") or args.get("agent") or args.get("agentName") + ) + prompt_text: str = args.get("prompt") or args.get("message") or args.get("task") or "" + + if not agent_name: + available = sorted(agent_map) + return ToolResult( + textResultForLlm=(f"Error: agent_name is required. Available agents: {available}"), + resultType="failure", + ) + + agent_cfg = agent_map.get(agent_name) + if agent_cfg is None: + available = sorted(agent_map) + return ToolResult( + textResultForLlm=(f"Error: agent '{agent_name}' not found. Available: {available}"), + resultType="failure", + ) + + mapper.record_subagent_start(agent_name) + + sub_agent = _CopilotAgent( + name=agent_name, + model=parent_agent.model, + instructions=agent_cfg.get("prompt"), + working_directory=parent_agent.working_directory, + timeout_s=min(parent_agent.timeout_s, 600.0), + max_turns=min(parent_agent.max_turns, 30), + auto_confirm=True, + ) + + sub_result = await run_copilot(sub_agent, prompt_text) + + if sub_result.success: + mapper.record_subagent_complete(agent_name) + return ToolResult( + textResultForLlm=sub_result.final_response or "Sub-agent completed.", + resultType="success", + ) + + mapper.record_subagent_failed(agent_name) + return ToolResult( + textResultForLlm=f"Sub-agent '{agent_name}' failed: {sub_result.error}", + resultType="failure", + ) + + return Tool( + name="runSubagent", + description=( + "Dispatch a named custom agent to perform a task. " + "The agent runs with its own instructions and returns its " + "final response. " + f"Available agents: {sorted(agent_map)}" + ), + handler=_handler, + parameters={ + "type": "object", + "properties": { + "agent_name": { + "type": "string", + "description": "Name of the agent to dispatch.", + "enum": sorted(agent_map), + }, + "prompt": { + "type": "string", + "description": "Task or message to send to the agent.", + }, + }, + "required": ["agent_name", "prompt"], + }, + ) + + +def _make_task_tool( + parent_agent: "CopilotAgent", + custom_agents: list[dict[str, Any]], + mapper: "EventMapper", +) -> "Tool": + """Build a ``task`` polyfill tool for the Claude Code persona. + + Identical dispatch mechanism to ``_make_runsubagent_tool`` but named + ``task`` to match Claude Code's native sub-agent dispatch API. + """ + from copilot.types import Tool, ToolResult + + from pytest_codingagents.copilot.agent import CopilotAgent as _CopilotAgent + from pytest_codingagents.copilot.runner import run_copilot + + agent_map: dict[str, dict[str, Any]] = {a["name"]: a for a in custom_agents} + + async def _handler(invocation: "ToolInvocation") -> "ToolResult": + args: dict[str, Any] = invocation.get("arguments") or {} # type: ignore[assignment] + + agent_name: str | None = ( + args.get("agent_name") or args.get("agent") or args.get("agentName") + ) + prompt_text: str = ( + args.get("prompt") or args.get("message") or args.get("description") or "" + ) + + if not agent_name: + available = sorted(agent_map) + return ToolResult( + textResultForLlm=(f"Error: agent_name is required. Available agents: {available}"), + resultType="failure", + ) + + agent_cfg = agent_map.get(agent_name) + if agent_cfg is None: + available = sorted(agent_map) + return ToolResult( + textResultForLlm=(f"Error: agent '{agent_name}' not found. Available: {available}"), + resultType="failure", + ) + + mapper.record_subagent_start(agent_name) + + sub_agent = _CopilotAgent( + name=agent_name, + model=parent_agent.model, + instructions=agent_cfg.get("prompt"), + working_directory=parent_agent.working_directory, + timeout_s=min(parent_agent.timeout_s, 600.0), + max_turns=min(parent_agent.max_turns, 30), + auto_confirm=True, + ) + + sub_result = await run_copilot(sub_agent, prompt_text) + + if sub_result.success: + mapper.record_subagent_complete(agent_name) + return ToolResult( + textResultForLlm=sub_result.final_response or "Sub-agent completed.", + resultType="success", + ) + + mapper.record_subagent_failed(agent_name) + return ToolResult( + textResultForLlm=f"Sub-agent '{agent_name}' failed: {sub_result.error}", + resultType="failure", + ) + + return Tool( + name="task", + description=( + "Dispatch a named agent to perform a task. " + "The agent runs with its own instructions and returns its " + "final response. " + f"Available agents: {sorted(agent_map)}" + ), + handler=_handler, + parameters={ + "type": "object", + "properties": { + "agent_name": { + "type": "string", + "description": "Name of the agent to dispatch.", + "enum": sorted(agent_map), + }, + "prompt": { + "type": "string", + "description": "Task or message to send to the agent.", + }, + }, + "required": ["agent_name", "prompt"], + }, + ) diff --git a/src/pytest_codingagents/copilot/runner.py b/src/pytest_codingagents/copilot/runner.py index 813205a..6eafe99 100644 --- a/src/pytest_codingagents/copilot/runner.py +++ b/src/pytest_codingagents/copilot/runner.py @@ -100,7 +100,7 @@ def _is_transient_error(error: str | None) -> bool: return any(pattern in error for pattern in _TRANSIENT_PATTERNS) -async def _run_copilot_once(agent: CopilotAgent, prompt: str) -> CopilotResult: +async def _run_copilot_once(agent: "CopilotAgent", prompt: str) -> "CopilotResult": """Execute a single attempt of a prompt against GitHub Copilot.""" client_options: dict[str, Any] = { "cwd": agent.working_directory or ".", @@ -127,6 +127,10 @@ async def _run_copilot_once(agent: CopilotAgent, prompt: str) -> CopilotResult: # Build session config from agent session_config = agent.build_session_config() + # Apply the persona: injects polyfill tools and system-message + # additions that match the target IDE environment. + agent.persona.apply(agent, session_config, mapper) + # Install permission handler if auto_confirm is enabled if agent.auto_confirm: session_config["on_permission_request"] = _auto_approve_handler