From 59f5d7007dda283d53c472c643d1ed157b52470b Mon Sep 17 00:00:00 2001 From: Stefan Broenner Date: Fri, 20 Feb 2026 16:33:47 +0100 Subject: [PATCH] Move optimizer and SubagentInvocation to pytest-aitest; consolidate subagent tool factories - Remove copilot/optimizer.py (InstructionSuggestion and optimize_instruction now live in pytest_aitest.execution.optimizer; re-exported from pytest_codingagents.__init__) - result.py: remove SubagentInvocation from __all__; import from pytest_aitest directly - events.py: import SubagentInvocation directly from pytest_aitest.core.result - personas.py: consolidate _make_runsubagent_tool and _make_task_tool into shared _make_subagent_dispatch_tool(tool_name, ...) factory - Raise pytest-aitest lower bound to >=0.5.7 - Update docs: fix autodoc references, add correct import examples - Update tests: import optimizer types from pytest_aitest directly Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 114 +++++++++++++ docs/how-to/optimize.md | 4 +- docs/reference/api.md | 4 +- docs/reference/result.md | 14 +- pyproject.toml | 2 +- src/pytest_codingagents/__init__.py | 6 +- src/pytest_codingagents/copilot/events.py | 2 +- src/pytest_codingagents/copilot/optimizer.py | 161 ------------------- src/pytest_codingagents/copilot/personas.py | 127 ++++----------- src/pytest_codingagents/copilot/result.py | 22 +-- tests/test_optimizer_integration.py | 2 +- tests/test_subagents.py | 142 ++++++++++++++++ tests/unit/test_optimizer.py | 9 +- 13 files changed, 316 insertions(+), 293 deletions(-) create mode 100644 .github/copilot-instructions.md delete mode 100644 src/pytest_codingagents/copilot/optimizer.py create mode 100644 tests/test_subagents.py diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..00b5ac1 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,114 @@ +# Copilot Instructions for pytest-codingagents + +## Build, Test & Lint Commands + +```bash +# Install all dependencies (including dev and docs extras) +uv sync --all-extras + +# Unit tests (fast, no credentials needed) +uv run pytest tests/unit/ -v + +# Run a single unit test file +uv run pytest tests/unit/test_event_mapper.py -v + +# Run a single test by name +uv run pytest tests/unit/test_result.py::test_name -v + +# Integration tests (require GitHub Copilot credentials via GITHUB_TOKEN or `gh` CLI auth) +uv run pytest tests/ -v -m copilot + +# Run one integration test file for a specific model +uv run pytest tests/test_basic.py -k "gpt-5.2" -v + +# Lint +uv run ruff check src tests + +# Format +uv run ruff format src tests + +# Type check +uv run pyright src + +# Multi-file integration run with per-file HTML reports +uv run python scripts/run_all.py +``` + +## Architecture + +This is a **pytest plugin** (`pytest11` entry point) that provides a test harness for empirically validating GitHub Copilot agent configurations. + +### Data Flow + +``` +CopilotAgent (frozen config dataclass) + → runner.run_copilot(agent, prompt) + → GitHub Copilot SDK client + session + → SDK SessionEvent stream + → EventMapper.process_event() (38+ event types → structured data) + → Turn / ToolCall accumulation + → CopilotResult (turns, success, usage, reasoning, subagents) + → copilot_run fixture stashes result for pytest-aitest + → HTML report with AI-powered insights +``` + +### Key Modules (`src/pytest_codingagents/`) + +| Module | Role | +|--------|------| +| `plugin.py` | Pytest plugin entry point; registers fixtures and `pytest_aitest_analysis_prompt` hook | +| `copilot/agent.py` | `CopilotAgent` frozen dataclass; `build_session_config()` maps user fields → SDK TypedDict | +| `copilot/runner.py` | `run_copilot()` — manages SDK client lifecycle, streams events, returns `CopilotResult` | +| `copilot/events.py` | `EventMapper` — translates raw SDK events into `Turn`/`ToolCall` objects | +| `copilot/result.py` | `CopilotResult`, `UsageInfo`, `SubagentInvocation`; re-exports `Turn`/`ToolCall` from `pytest_aitest` | +| `copilot/fixtures.py` | `copilot_run` and `ab_run` pytest fixtures | +| `copilot/agents.py` | `load_custom_agent()` — parses `.agent.md` YAML frontmatter files | +| `copilot/optimizer.py` | `optimize_instruction()` — uses pydantic-ai to suggest instruction improvements | +| `copilot/personas.py` | `VSCodePersona`, `ClaudeCodePersona`, `CopilotCLIPersona`, `HeadlessPersona` — inject IDE context | + +### Two Core Fixtures + +**`copilot_run(agent, prompt)`** — Executes a single agent run, auto-stashes result for aitest reporting. + +**`ab_run(baseline_agent, treatment_agent, task)`** — Runs two agents in isolated `tmp_path` directories and returns `(baseline_result, treatment_result)` for direct comparison. + +## Key Conventions + +### Every module uses `from __future__ import annotations` +Required for forward references and PEP 563 deferred evaluation. Add it to every new module. + +### `CopilotAgent` is a frozen dataclass +It is immutable and safe to share across parametrized tests. User-friendly field names (e.g., `instructions`) are mapped to SDK internals in `build_session_config()`. Unknown SDK fields go in `extra_config: dict`. + +### Async-first +All SDK interactions are async. Test functions using `copilot_run` or `ab_run` must be `async def`. `asyncio_mode = "auto"` is set in `pyproject.toml`, so no `@pytest.mark.asyncio` decorator is needed. + +### Integration tests are parametrized over models +```python +from tests.conftest import MODELS + +@pytest.mark.parametrize("model", MODELS) +async def test_something(copilot_run, model): + agent = CopilotAgent(model=model, ...) +``` +`MODELS = ["gpt-5.2", "claude-opus-4.5"]` is defined in `tests/conftest.py`. + +### Result introspection methods +Prefer the typed helper methods over raw field access: +- `result.success` / `result.error` +- `result.tool_was_called("create_file")` +- `result.all_tool_calls` / `result.final_response` +- `result.file(path)` — reads a file from the agent's working directory +- `result.usage` — `UsageInfo` with token counts and estimated cost + +### Personas inject IDE context post-config +Apply a persona to a `CopilotAgent` before running to simulate a specific IDE environment (e.g., `VSCodePersona` polyfills `runSubagent`). This is separate from the agent config. + +### Custom agents use `.agent.md` files +YAML frontmatter + Markdown body. Parsed by `load_custom_agent(path)`. The `mode` frontmatter field controls agent type. + +### Ruff rules: E, F, B, I — 100 char line length, double quotes +Enforced by pre-commit hooks and CI. Run `uv run ruff check --fix src tests` before committing. + +### Pyright type checking is `basic` mode, scoped to `src/` only +Tests directory is not type-checked by pyright. Type annotations in `src/` should be complete and valid. diff --git a/docs/how-to/optimize.md b/docs/how-to/optimize.md index 964c9c2..1ca6cfc 100644 --- a/docs/how-to/optimize.md +++ b/docs/how-to/optimize.md @@ -87,11 +87,11 @@ async def test_docstring_instruction_iterates(ab_run, tmp_path): ## API Reference -::: pytest_codingagents.copilot.optimizer.optimize_instruction +::: pytest_aitest.execution.optimizer.optimize_instruction --- -::: pytest_codingagents.copilot.optimizer.InstructionSuggestion +::: pytest_aitest.execution.optimizer.InstructionSuggestion ## Choosing a Model diff --git a/docs/reference/api.md b/docs/reference/api.md index 8e1f381..c007431 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -8,11 +8,11 @@ options: show_source: false -::: pytest_codingagents.optimize_instruction +::: pytest_aitest.execution.optimizer.optimize_instruction options: show_source: false -::: pytest_codingagents.InstructionSuggestion +::: pytest_aitest.execution.optimizer.InstructionSuggestion options: show_source: false diff --git a/docs/reference/result.md b/docs/reference/result.md index c6a3e4a..a58d806 100644 --- a/docs/reference/result.md +++ b/docs/reference/result.md @@ -8,14 +8,22 @@ options: show_source: false -::: pytest_codingagents.copilot.result.SubagentInvocation +## SubagentInvocation + +`SubagentInvocation` is defined in [`pytest_aitest.core.result`](https://sbroenne.github.io/pytest-aitest/reference/result/) and available as: + +```python +from pytest_aitest import SubagentInvocation +``` + +::: pytest_aitest.core.result.SubagentInvocation options: show_source: false ## Turn and ToolCall -`Turn` and `ToolCall` are re-exported from [`pytest_aitest.core.result`](https://sbroenne.github.io/pytest-aitest/reference/result/) for convenience. See the pytest-aitest documentation for their full API. +`Turn` and `ToolCall` are defined in [`pytest_aitest.core.result`](https://sbroenne.github.io/pytest-aitest/reference/result/) and available as: ```python -from pytest_codingagents.copilot.result import Turn, ToolCall +from pytest_aitest import Turn, ToolCall ``` diff --git a/pyproject.toml b/pyproject.toml index e2ce4aa..eb85570 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ dependencies = [ "pytest>=9.0", "github-copilot-sdk>=0.1.25", - "pytest-aitest>=0.5.6", + "pytest-aitest>=0.5.7", "azure-identity>=1.25.2", "pyyaml>=6.0", "pydantic-ai>=1.0", diff --git a/src/pytest_codingagents/__init__.py b/src/pytest_codingagents/__init__.py index 828c111..d43588e 100644 --- a/src/pytest_codingagents/__init__.py +++ b/src/pytest_codingagents/__init__.py @@ -2,12 +2,10 @@ from __future__ import annotations +from pytest_aitest.execution.optimizer import InstructionSuggestion, optimize_instruction + from pytest_codingagents.copilot.agent import CopilotAgent from pytest_codingagents.copilot.agents import load_custom_agent, load_custom_agents -from pytest_codingagents.copilot.optimizer import ( - InstructionSuggestion, - optimize_instruction, -) from pytest_codingagents.copilot.personas import ( ClaudeCodePersona, CopilotCLIPersona, diff --git a/src/pytest_codingagents/copilot/events.py b/src/pytest_codingagents/copilot/events.py index 514d381..ab1342d 100644 --- a/src/pytest_codingagents/copilot/events.py +++ b/src/pytest_codingagents/copilot/events.py @@ -60,11 +60,11 @@ import time from typing import TYPE_CHECKING, Any +from pytest_aitest.core.result import SubagentInvocation from pytest_aitest.execution.cost import estimate_cost from pytest_codingagents.copilot.result import ( CopilotResult, - SubagentInvocation, ToolCall, Turn, UsageInfo, diff --git a/src/pytest_codingagents/copilot/optimizer.py b/src/pytest_codingagents/copilot/optimizer.py deleted file mode 100644 index 2c5bb20..0000000 --- a/src/pytest_codingagents/copilot/optimizer.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Instruction optimizer for test-driven prompt engineering. - -Provides :func:`optimize_instruction`, which uses an LLM to analyze the gap -between a current agent instruction and the observed behavior, and suggests a -concrete improvement. - -Model strings follow the same ``provider/model`` format used by -``pytest-aitest`` (e.g. ``"azure/gpt-5.2-chat"``, ``"openai/gpt-4o-mini"``). -Azure Entra ID authentication is handled automatically when -``AZURE_API_BASE`` or ``AZURE_OPENAI_ENDPOINT`` is set. - -Example:: - - suggestion = await optimize_instruction( - agent.instructions or "", - result, - "Agent should add docstrings.", - ) -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import TYPE_CHECKING - -from pydantic import BaseModel -from pydantic_ai import Agent as PydanticAgent -from pydantic_ai.models import Model -from pytest_aitest.execution.pydantic_adapter import build_model_from_string - -if TYPE_CHECKING: - from pytest_codingagents.copilot.result import CopilotResult - -__all__ = ["InstructionSuggestion", "optimize_instruction"] - - -@dataclass -class InstructionSuggestion: - """A suggested improvement to a Copilot agent instruction. - - Returned by :func:`optimize_instruction`. Designed to drop into - ``pytest.fail()`` so the failure message includes an actionable fix. - - Attributes: - instruction: The improved instruction text to use instead. - reasoning: Explanation of why this change would close the gap. - changes: Short description of what was changed (one sentence). - - Example:: - - suggestion = await optimize_instruction( - agent.instructions, - result, - "Agent should add docstrings to all functions.", - ) - pytest.fail(f"No docstrings found.\\n\\n{suggestion}") - """ - - instruction: str - reasoning: str - changes: str - - def __str__(self) -> str: - return ( - f"💡 Suggested instruction:\n\n" - f" {self.instruction}\n\n" - f" Changes: {self.changes}\n" - f" Reasoning: {self.reasoning}" - ) - - -class _OptimizationOutput(BaseModel): - """Structured output schema for the optimizer LLM call.""" - - instruction: str - reasoning: str - changes: str - - -async def optimize_instruction( - current_instruction: str, - result: CopilotResult, - criterion: str, - *, - model: str | Model = "azure/gpt-5.2-chat", -) -> InstructionSuggestion: - """Analyze a result and suggest an improved instruction. - - Uses pydantic-ai structured output to analyze the gap between a - current instruction and the agent's observed behavior, returning a - concrete, actionable improvement. - - Designed to drop into ``pytest.fail()`` so the failure message - contains a ready-to-use fix. - - Model strings follow the same ``provider/model`` format used by - ``pytest-aitest``. Azure Entra ID auth is handled automatically - when ``AZURE_API_BASE`` or ``AZURE_OPENAI_ENDPOINT`` is set. - - Example:: - - result = await copilot_run(agent, task) - if '\"\"\"' not in result.file("main.py"): - suggestion = await optimize_instruction( - agent.instructions or "", - result, - "Agent should add docstrings to all functions.", - ) - pytest.fail(f"No docstrings found.\\n\\n{suggestion}") - - Args: - current_instruction: The agent's current instruction text. - result: The ``CopilotResult`` from the (failed) run. - criterion: What the agent *should* have done — the test expectation - in plain English (e.g. ``"Always write docstrings"``). - model: Provider/model string (e.g. ``"azure/gpt-5.2-chat"``, - ``"openai/gpt-4o-mini"``) or a pre-configured pydantic-ai - ``Model`` object. Defaults to ``"azure/gpt-5.2-chat"``. - - Returns: - An :class:`InstructionSuggestion` with the improved instruction. - """ - resolved_model: str | Model = ( - build_model_from_string(model) if isinstance(model, str) else model - ) - final_output = result.final_response or "(no response)" - tool_calls = ", ".join(sorted(result.tool_names_called)) or "none" - - prompt = f"""You are helping improve a GitHub Copilot agent instruction. - -## Current instruction -{current_instruction or "(no instruction)"} - -## Task the agent performed -{criterion} - -## What actually happened -The agent produced: -{final_output[:1500]} - -Tools called: {tool_calls} -Run succeeded: {result.success} - -## Expected criterion -The agent SHOULD have satisfied this criterion: -{criterion} - -Analyze the gap between the instruction and the observed behaviour. -Suggest a specific, concise, directive improvement to the instruction -that would make the agent satisfy the criterion. -Keep the instruction under 200 words. Do not add unrelated rules.""" - - optimizer_agent = PydanticAgent(resolved_model, output_type=_OptimizationOutput) - run_result = await optimizer_agent.run(prompt) - output = run_result.output - - return InstructionSuggestion( - instruction=output.instruction, - reasoning=output.reasoning, - changes=output.changes, - ) diff --git a/src/pytest_codingagents/copilot/personas.py b/src/pytest_codingagents/copilot/personas.py index f5e2979..c9b425c 100644 --- a/src/pytest_codingagents/copilot/personas.py +++ b/src/pytest_codingagents/copilot/personas.py @@ -302,105 +302,39 @@ def _make_runsubagent_tool( custom_agents: list[dict[str, Any]], mapper: "EventMapper", ) -> "Tool": - """Build a ``runSubagent`` polyfill tool for the VS Code persona. + """Build a ``runSubagent`` polyfill tool for the VS Code persona.""" + return _make_subagent_dispatch_tool("runSubagent", parent_agent, custom_agents, mapper) - The Copilot CLI does not natively expose ``runSubagent`` in SDK headless - mode. This factory creates a Python-side ``Tool`` that dispatches - registered custom agents as nested ``run_copilot`` calls. - """ - from copilot.types import Tool, ToolResult - - from pytest_codingagents.copilot.agent import CopilotAgent as _CopilotAgent - from pytest_codingagents.copilot.runner import run_copilot - - agent_map: dict[str, dict[str, Any]] = {a["name"]: a for a in custom_agents} - - async def _handler(invocation: "ToolInvocation") -> "ToolResult": - args: dict[str, Any] = invocation.get("arguments") or {} # type: ignore[assignment] - - agent_name: str | None = ( - args.get("agent_name") or args.get("agent") or args.get("agentName") - ) - prompt_text: str = args.get("prompt") or args.get("message") or args.get("task") or "" - - if not agent_name: - available = sorted(agent_map) - return ToolResult( - textResultForLlm=(f"Error: agent_name is required. Available agents: {available}"), - resultType="failure", - ) - - agent_cfg = agent_map.get(agent_name) - if agent_cfg is None: - available = sorted(agent_map) - return ToolResult( - textResultForLlm=(f"Error: agent '{agent_name}' not found. Available: {available}"), - resultType="failure", - ) - - mapper.record_subagent_start(agent_name) - - sub_agent = _CopilotAgent( - name=agent_name, - model=parent_agent.model, - instructions=agent_cfg.get("prompt"), - working_directory=parent_agent.working_directory, - timeout_s=min(parent_agent.timeout_s, 600.0), - max_turns=min(parent_agent.max_turns, 30), - auto_confirm=True, - ) - - sub_result = await run_copilot(sub_agent, prompt_text) - if sub_result.success: - mapper.record_subagent_complete(agent_name) - return ToolResult( - textResultForLlm=sub_result.final_response or "Sub-agent completed.", - resultType="success", - ) - - mapper.record_subagent_failed(agent_name) - return ToolResult( - textResultForLlm=f"Sub-agent '{agent_name}' failed: {sub_result.error}", - resultType="failure", - ) - - return Tool( - name="runSubagent", - description=( - "Dispatch a named custom agent to perform a task. " - "The agent runs with its own instructions and returns its " - "final response. " - f"Available agents: {sorted(agent_map)}" - ), - handler=_handler, - parameters={ - "type": "object", - "properties": { - "agent_name": { - "type": "string", - "description": "Name of the agent to dispatch.", - "enum": sorted(agent_map), - }, - "prompt": { - "type": "string", - "description": "Task or message to send to the agent.", - }, - }, - "required": ["agent_name", "prompt"], - }, - ) +def _make_task_tool( + parent_agent: "CopilotAgent", + custom_agents: list[dict[str, Any]], + mapper: "EventMapper", +) -> "Tool": + """Build a ``task`` polyfill tool for the Claude Code persona.""" + return _make_subagent_dispatch_tool("task", parent_agent, custom_agents, mapper) -def _make_task_tool( +def _make_subagent_dispatch_tool( + tool_name: str, parent_agent: "CopilotAgent", custom_agents: list[dict[str, Any]], mapper: "EventMapper", ) -> "Tool": - """Build a ``task`` polyfill tool for the Claude Code persona. + """Build a subagent dispatch polyfill tool. + + The Copilot CLI does not natively expose ``runSubagent`` or ``task`` in + SDK headless mode. This factory creates a Python-side ``Tool`` that + dispatches registered custom agents as nested ``run_copilot`` calls. - Identical dispatch mechanism to ``_make_runsubagent_tool`` but named - ``task`` to match Claude Code's native sub-agent dispatch API. + Args: + tool_name: Name to register the tool as (``"runSubagent"`` for VS Code, + ``"task"`` for Claude Code). + parent_agent: The orchestrator ``CopilotAgent`` being executed. + custom_agents: List of custom agent config dicts (each with at least + a ``name`` key, optionally ``prompt``, ``description``). + mapper: The ``EventMapper`` for the current run, used to record + subagent lifecycle events. """ from copilot.types import Tool, ToolResult @@ -416,7 +350,11 @@ async def _handler(invocation: "ToolInvocation") -> "ToolResult": args.get("agent_name") or args.get("agent") or args.get("agentName") ) prompt_text: str = ( - args.get("prompt") or args.get("message") or args.get("description") or "" + args.get("prompt") + or args.get("message") + or args.get("task") + or args.get("description") + or "" ) if not agent_name: @@ -462,11 +400,10 @@ async def _handler(invocation: "ToolInvocation") -> "ToolResult": ) return Tool( - name="task", + name=tool_name, description=( - "Dispatch a named agent to perform a task. " - "The agent runs with its own instructions and returns its " - "final response. " + f"Dispatch a named agent to perform a task using the {tool_name} tool. " + "The agent runs with its own instructions and returns its final response. " f"Available agents: {sorted(agent_map)}" ), handler=_handler, diff --git a/src/pytest_codingagents/copilot/result.py b/src/pytest_codingagents/copilot/result.py index 7b4ccf1..5e342c7 100644 --- a/src/pytest_codingagents/copilot/result.py +++ b/src/pytest_codingagents/copilot/result.py @@ -1,8 +1,7 @@ """Result types for Copilot agent execution. -Turn and ToolCall are re-exported from pytest-aitest's core.result module -to avoid duplication. Copilot-specific types (SubagentInvocation, UsageInfo, -CopilotResult) remain here. +Turn, ToolCall, and SubagentInvocation are imported from pytest-aitest. +Copilot-specific types (UsageInfo, CopilotResult) are defined here. """ from __future__ import annotations @@ -11,34 +10,19 @@ from pathlib import Path from typing import TYPE_CHECKING, Any -# Re-export shared types from pytest-aitest so existing imports keep working: -# from pytest_codingagents.copilot.result import Turn, ToolCall -from pytest_aitest.core.result import ToolCall, Turn +from pytest_aitest.core.result import SubagentInvocation, ToolCall, Turn # noqa: F401 if TYPE_CHECKING: from pytest_codingagents.copilot.agent import CopilotAgent __all__ = [ "CopilotResult", - "SubagentInvocation", "ToolCall", "Turn", "UsageInfo", ] -@dataclass(slots=True) -class SubagentInvocation: - """A subagent invocation observed during execution.""" - - name: str - status: str # "selected", "started", "completed", "failed" - duration_ms: float | None = None - - def __repr__(self) -> str: - return f"SubagentInvocation({self.name}, {self.status})" - - @dataclass(slots=True) class UsageInfo: """Token usage and cost from a single model turn.""" diff --git a/tests/test_optimizer_integration.py b/tests/test_optimizer_integration.py index 6642053..479cdbd 100644 --- a/tests/test_optimizer_integration.py +++ b/tests/test_optimizer_integration.py @@ -12,9 +12,9 @@ import os import pytest +from pytest_aitest import InstructionSuggestion, optimize_instruction from pytest_codingagents.copilot.agent import CopilotAgent -from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction @pytest.mark.copilot diff --git a/tests/test_subagents.py b/tests/test_subagents.py new file mode 100644 index 0000000..6e0296f --- /dev/null +++ b/tests/test_subagents.py @@ -0,0 +1,142 @@ +"""Subagent dispatch tests. + +Proves that the subagent dispatch mechanism works reliably when the +orchestrator cannot implement directly (write tools excluded). + +When the orchestrator has no write tools, it *must* route to a subagent +to produce file output. This makes dispatch deterministic and asserts: +- ``result.subagent_invocations`` is non-empty +- The subagent actually created the expected file +- ``SubagentInvocation`` objects have valid name/status fields +""" + +from __future__ import annotations + +import pytest + +from pytest_codingagents.copilot.agent import CopilotAgent + +# Tools that let the orchestrator write files directly. +# Excluding these forces the orchestrator to delegate. +_WRITE_TOOLS = [ + "create_file", + "replace_string_in_file", + "multi_replace_string_in_file", + "insert_edit_into_file", + "run_in_terminal", + "create_directory", +] + + +@pytest.mark.copilot +class TestForcedSubagentDispatch: + """When write tools are excluded, the orchestrator must use runSubagent. + + These tests are deterministic: the orchestrator physically cannot create + files, so it has no choice but to dispatch to the subagent that can. + """ + + async def test_subagent_invocations_non_empty(self, copilot_run, tmp_path): + """Orchestrator with excluded write tools dispatches to a subagent. + + With no write tools available, the orchestrator cannot create the + requested file itself and must invoke the file-writer subagent. + Asserts that at least one subagent invocation is recorded. + """ + agent = CopilotAgent( + name="forced-orchestrator", + instructions=( + "You are an orchestrator. You MUST delegate all file creation " + "to the file-writer agent via runSubagent. " + "Do not attempt to create files yourself." + ), + working_directory=str(tmp_path), + timeout_s=300.0, + max_turns=20, + excluded_tools=_WRITE_TOOLS, + custom_agents=[ + { + "name": "file-writer", + "prompt": ( + "You create Python files. When asked to create a file, " + "write it to disk using your file creation tools." + ), + "description": "Creates Python source files on disk.", + } + ], + ) + result = await copilot_run( + agent, + "Use the file-writer agent to create hello.py containing: print('hello world')", + ) + assert result.success, f"Run failed: {result.error}" + assert result.subagent_invocations, ( + "No subagent invocations recorded — orchestrator may have attempted " + "to implement directly despite excluded write tools" + ) + + async def test_subagent_file_created(self, copilot_run, tmp_path): + """File created by subagent exists in the workspace. + + Complements test_subagent_invocations_non_empty by verifying the + subagent actually produced the expected artifact. + """ + agent = CopilotAgent( + name="forced-orchestrator-file", + instructions=( + "You are an orchestrator. Delegate all file creation to the " + "file-writer agent via runSubagent." + ), + working_directory=str(tmp_path), + timeout_s=300.0, + max_turns=20, + excluded_tools=_WRITE_TOOLS, + custom_agents=[ + { + "name": "file-writer", + "prompt": ("You create Python files. Write requested files to disk."), + "description": "Creates Python source files on disk.", + } + ], + ) + result = await copilot_run( + agent, + "Use the file-writer agent to create output.py containing: x = 42", + ) + assert result.success, f"Run failed: {result.error}" + assert (tmp_path / "output.py").exists(), ( + "output.py not created — subagent did not write the file" + ) + + async def test_subagent_invocation_fields(self, copilot_run, tmp_path): + """SubagentInvocation objects have valid name and status fields.""" + agent = CopilotAgent( + name="forced-orchestrator-fields", + instructions=( + "You are an orchestrator. Delegate file creation to the " + "file-writer agent via runSubagent." + ), + working_directory=str(tmp_path), + timeout_s=300.0, + max_turns=20, + excluded_tools=_WRITE_TOOLS, + custom_agents=[ + { + "name": "file-writer", + "prompt": "You create Python files on disk.", + "description": "Creates Python source files.", + } + ], + ) + result = await copilot_run( + agent, + "Use the file-writer agent to create result.py containing: done = True", + ) + assert result.success, f"Run failed: {result.error}" + assert result.subagent_invocations, "No subagent invocations recorded" + + for inv in result.subagent_invocations: + assert inv.name, "SubagentInvocation.name must not be empty" + assert inv.status in ("selected", "started", "completed", "failed"), ( + f"Unexpected SubagentInvocation.status: {inv.status!r}" + ) diff --git a/tests/unit/test_optimizer.py b/tests/unit/test_optimizer.py index fd551fd..e5d4690 100644 --- a/tests/unit/test_optimizer.py +++ b/tests/unit/test_optimizer.py @@ -4,12 +4,13 @@ from unittest.mock import AsyncMock, MagicMock, patch -from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction +from pytest_aitest import InstructionSuggestion, optimize_instruction + from pytest_codingagents.copilot.result import CopilotResult, ToolCall, Turn -# Patch targets -_AGENT_PATCH = "pytest_codingagents.copilot.optimizer.PydanticAgent" -_BUILD_MODEL_PATCH = "pytest_codingagents.copilot.optimizer.build_model_from_string" +# Patch targets — the optimizer now lives in pytest_aitest +_AGENT_PATCH = "pytest_aitest.execution.optimizer.PydanticAgent" +_BUILD_MODEL_PATCH = "pytest_aitest.execution.optimizer.build_model_from_string" _FAKE_MODEL = MagicMock(name="fake-model")