From 59f5d7007dda283d53c472c643d1ed157b52470b Mon Sep 17 00:00:00 2001
From: Stefan Broenner <stefan.broenner@microsoft.comm>
Date: Fri, 20 Feb 2026 16:33:47 +0100
Subject: [PATCH] Move optimizer and SubagentInvocation to pytest-aitest;
 consolidate subagent tool factories

- Remove copilot/optimizer.py (InstructionSuggestion and optimize_instruction now live
  in pytest_aitest.execution.optimizer; re-exported from pytest_codingagents.__init__)
- result.py: remove SubagentInvocation from __all__; import from pytest_aitest directly
- events.py: import SubagentInvocation directly from pytest_aitest.core.result
- personas.py: consolidate _make_runsubagent_tool and _make_task_tool into shared
  _make_subagent_dispatch_tool(tool_name, ...) factory
- Raise pytest-aitest lower bound to >=0.5.7
- Update docs: fix autodoc references, add correct import examples
- Update tests: import optimizer types from pytest_aitest directly

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/copilot-instructions.md              | 114 +++++++++++++
 docs/how-to/optimize.md                      |   4 +-
 docs/reference/api.md                        |   4 +-
 docs/reference/result.md                     |  14 +-
 pyproject.toml                               |   2 +-
 src/pytest_codingagents/__init__.py          |   6 +-
 src/pytest_codingagents/copilot/events.py    |   2 +-
 src/pytest_codingagents/copilot/optimizer.py | 161 -------------------
 src/pytest_codingagents/copilot/personas.py  | 127 ++++-----------
 src/pytest_codingagents/copilot/result.py    |  22 +--
 tests/test_optimizer_integration.py          |   2 +-
 tests/test_subagents.py                      | 142 ++++++++++++++++
 tests/unit/test_optimizer.py                 |   9 +-
 13 files changed, 316 insertions(+), 293 deletions(-)
 create mode 100644 .github/copilot-instructions.md
 delete mode 100644 src/pytest_codingagents/copilot/optimizer.py
 create mode 100644 tests/test_subagents.py

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 0000000..00b5ac1
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,114 @@
+# Copilot Instructions for pytest-codingagents
+
+## Build, Test & Lint Commands
+
+```bash
+# Install all dependencies (including dev and docs extras)
+uv sync --all-extras
+
+# Unit tests (fast, no credentials needed)
+uv run pytest tests/unit/ -v
+
+# Run a single unit test file
+uv run pytest tests/unit/test_event_mapper.py -v
+
+# Run a single test by name
+uv run pytest tests/unit/test_result.py::test_name -v
+
+# Integration tests (require GitHub Copilot credentials via GITHUB_TOKEN or `gh` CLI auth)
+uv run pytest tests/ -v -m copilot
+
+# Run one integration test file for a specific model
+uv run pytest tests/test_basic.py -k "gpt-5.2" -v
+
+# Lint
+uv run ruff check src tests
+
+# Format
+uv run ruff format src tests
+
+# Type check
+uv run pyright src
+
+# Multi-file integration run with per-file HTML reports
+uv run python scripts/run_all.py
+```
+
+## Architecture
+
+This is a **pytest plugin** (`pytest11` entry point) that provides a test harness for empirically validating GitHub Copilot agent configurations.
+
+### Data Flow
+
+```
+CopilotAgent (frozen config dataclass)
+  → runner.run_copilot(agent, prompt)
+    → GitHub Copilot SDK client + session
+      → SDK SessionEvent stream
+        → EventMapper.process_event()  (38+ event types → structured data)
+          → Turn / ToolCall accumulation
+            → CopilotResult (turns, success, usage, reasoning, subagents)
+              → copilot_run fixture stashes result for pytest-aitest
+                → HTML report with AI-powered insights
+```
+
+### Key Modules (`src/pytest_codingagents/`)
+
+| Module | Role |
+|--------|------|
+| `plugin.py` | Pytest plugin entry point; registers fixtures and `pytest_aitest_analysis_prompt` hook |
+| `copilot/agent.py` | `CopilotAgent` frozen dataclass; `build_session_config()` maps user fields → SDK TypedDict |
+| `copilot/runner.py` | `run_copilot()` — manages SDK client lifecycle, streams events, returns `CopilotResult` |
+| `copilot/events.py` | `EventMapper` — translates raw SDK events into `Turn`/`ToolCall` objects |
+| `copilot/result.py` | `CopilotResult`, `UsageInfo`, `SubagentInvocation`; re-exports `Turn`/`ToolCall` from `pytest_aitest` |
+| `copilot/fixtures.py` | `copilot_run` and `ab_run` pytest fixtures |
+| `copilot/agents.py` | `load_custom_agent()` — parses `.agent.md` YAML frontmatter files |
+| `copilot/optimizer.py` | `optimize_instruction()` — uses pydantic-ai to suggest instruction improvements |
+| `copilot/personas.py` | `VSCodePersona`, `ClaudeCodePersona`, `CopilotCLIPersona`, `HeadlessPersona` — inject IDE context |
+
+### Two Core Fixtures
+
+**`copilot_run(agent, prompt)`** — Executes a single agent run, auto-stashes result for aitest reporting.
+
+**`ab_run(baseline_agent, treatment_agent, task)`** — Runs two agents in isolated `tmp_path` directories and returns `(baseline_result, treatment_result)` for direct comparison.
+
+## Key Conventions
+
+### Every module uses `from __future__ import annotations`
+Required for forward references and PEP 563 deferred evaluation. Add it to every new module.
+
+### `CopilotAgent` is a frozen dataclass
+It is immutable and safe to share across parametrized tests. User-friendly field names (e.g., `instructions`) are mapped to SDK internals in `build_session_config()`. Unknown SDK fields go in `extra_config: dict`.
+
+### Async-first
+All SDK interactions are async. Test functions using `copilot_run` or `ab_run` must be `async def`. `asyncio_mode = "auto"` is set in `pyproject.toml`, so no `@pytest.mark.asyncio` decorator is needed.
+
+### Integration tests are parametrized over models
+```python
+from tests.conftest import MODELS
+
+@pytest.mark.parametrize("model", MODELS)
+async def test_something(copilot_run, model):
+    agent = CopilotAgent(model=model, ...)
+```
+`MODELS = ["gpt-5.2", "claude-opus-4.5"]` is defined in `tests/conftest.py`.
+
+### Result introspection methods
+Prefer the typed helper methods over raw field access:
+- `result.success` / `result.error`
+- `result.tool_was_called("create_file")` 
+- `result.all_tool_calls` / `result.final_response`
+- `result.file(path)` — reads a file from the agent's working directory
+- `result.usage` — `UsageInfo` with token counts and estimated cost
+
+### Personas inject IDE context post-config
+Apply a persona to a `CopilotAgent` before running to simulate a specific IDE environment (e.g., `VSCodePersona` polyfills `runSubagent`). This is separate from the agent config.
+
+### Custom agents use `.agent.md` files
+YAML frontmatter + Markdown body. Parsed by `load_custom_agent(path)`. The `mode` frontmatter field controls agent type.
+
+### Ruff rules: E, F, B, I — 100 char line length, double quotes
+Enforced by pre-commit hooks and CI. Run `uv run ruff check --fix src tests` before committing.
+
+### Pyright type checking is `basic` mode, scoped to `src/` only
+Tests directory is not type-checked by pyright. Type annotations in `src/` should be complete and valid.
diff --git a/docs/how-to/optimize.md b/docs/how-to/optimize.md
index 964c9c2..1ca6cfc 100644
--- a/docs/how-to/optimize.md
+++ b/docs/how-to/optimize.md
@@ -87,11 +87,11 @@ async def test_docstring_instruction_iterates(ab_run, tmp_path):
 
 ## API Reference
 
-::: pytest_codingagents.copilot.optimizer.optimize_instruction
+::: pytest_aitest.execution.optimizer.optimize_instruction
 
 ---
 
-::: pytest_codingagents.copilot.optimizer.InstructionSuggestion
+::: pytest_aitest.execution.optimizer.InstructionSuggestion
 
 ## Choosing a Model
 
diff --git a/docs/reference/api.md b/docs/reference/api.md
index 8e1f381..c007431 100644
--- a/docs/reference/api.md
+++ b/docs/reference/api.md
@@ -8,11 +8,11 @@
     options:
       show_source: false
 
-::: pytest_codingagents.optimize_instruction
+::: pytest_aitest.execution.optimizer.optimize_instruction
     options:
       show_source: false
 
-::: pytest_codingagents.InstructionSuggestion
+::: pytest_aitest.execution.optimizer.InstructionSuggestion
     options:
       show_source: false
 
diff --git a/docs/reference/result.md b/docs/reference/result.md
index c6a3e4a..a58d806 100644
--- a/docs/reference/result.md
+++ b/docs/reference/result.md
@@ -8,14 +8,22 @@
     options:
       show_source: false
 
-::: pytest_codingagents.copilot.result.SubagentInvocation
+## SubagentInvocation
+
+`SubagentInvocation` is defined in [`pytest_aitest.core.result`](https://sbroenne.github.io/pytest-aitest/reference/result/) and available as:
+
+```python
+from pytest_aitest import SubagentInvocation
+```
+
+::: pytest_aitest.core.result.SubagentInvocation
     options:
       show_source: false
 
 ## Turn and ToolCall
 
-`Turn` and `ToolCall` are re-exported from [`pytest_aitest.core.result`](https://sbroenne.github.io/pytest-aitest/reference/result/) for convenience. See the pytest-aitest documentation for their full API.
+`Turn` and `ToolCall` are defined in [`pytest_aitest.core.result`](https://sbroenne.github.io/pytest-aitest/reference/result/) and available as:
 
 ```python
-from pytest_codingagents.copilot.result import Turn, ToolCall
+from pytest_aitest import Turn, ToolCall
 ```
diff --git a/pyproject.toml b/pyproject.toml
index e2ce4aa..eb85570 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,7 @@ classifiers = [
 dependencies = [
     "pytest>=9.0",
     "github-copilot-sdk>=0.1.25",
-    "pytest-aitest>=0.5.6",
+    "pytest-aitest>=0.5.7",
     "azure-identity>=1.25.2",
     "pyyaml>=6.0",
     "pydantic-ai>=1.0",
diff --git a/src/pytest_codingagents/__init__.py b/src/pytest_codingagents/__init__.py
index 828c111..d43588e 100644
--- a/src/pytest_codingagents/__init__.py
+++ b/src/pytest_codingagents/__init__.py
@@ -2,12 +2,10 @@
 
 from __future__ import annotations
 
+from pytest_aitest.execution.optimizer import InstructionSuggestion, optimize_instruction
+
 from pytest_codingagents.copilot.agent import CopilotAgent
 from pytest_codingagents.copilot.agents import load_custom_agent, load_custom_agents
-from pytest_codingagents.copilot.optimizer import (
-    InstructionSuggestion,
-    optimize_instruction,
-)
 from pytest_codingagents.copilot.personas import (
     ClaudeCodePersona,
     CopilotCLIPersona,
diff --git a/src/pytest_codingagents/copilot/events.py b/src/pytest_codingagents/copilot/events.py
index 514d381..ab1342d 100644
--- a/src/pytest_codingagents/copilot/events.py
+++ b/src/pytest_codingagents/copilot/events.py
@@ -60,11 +60,11 @@
 import time
 from typing import TYPE_CHECKING, Any
 
+from pytest_aitest.core.result import SubagentInvocation
 from pytest_aitest.execution.cost import estimate_cost
 
 from pytest_codingagents.copilot.result import (
     CopilotResult,
-    SubagentInvocation,
     ToolCall,
     Turn,
     UsageInfo,
diff --git a/src/pytest_codingagents/copilot/optimizer.py b/src/pytest_codingagents/copilot/optimizer.py
deleted file mode 100644
index 2c5bb20..0000000
--- a/src/pytest_codingagents/copilot/optimizer.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""Instruction optimizer for test-driven prompt engineering.
-
-Provides :func:`optimize_instruction`, which uses an LLM to analyze the gap
-between a current agent instruction and the observed behavior, and suggests a
-concrete improvement.
-
-Model strings follow the same ``provider/model`` format used by
-``pytest-aitest`` (e.g. ``"azure/gpt-5.2-chat"``, ``"openai/gpt-4o-mini"``).
-Azure Entra ID authentication is handled automatically when
-``AZURE_API_BASE`` or ``AZURE_OPENAI_ENDPOINT`` is set.
-
-Example::
-
-    suggestion = await optimize_instruction(
-        agent.instructions or "",
-        result,
-        "Agent should add docstrings.",
-    )
-"""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
-
-from pydantic import BaseModel
-from pydantic_ai import Agent as PydanticAgent
-from pydantic_ai.models import Model
-from pytest_aitest.execution.pydantic_adapter import build_model_from_string
-
-if TYPE_CHECKING:
-    from pytest_codingagents.copilot.result import CopilotResult
-
-__all__ = ["InstructionSuggestion", "optimize_instruction"]
-
-
-@dataclass
-class InstructionSuggestion:
-    """A suggested improvement to a Copilot agent instruction.
-
-    Returned by :func:`optimize_instruction`. Designed to drop into
-    ``pytest.fail()`` so the failure message includes an actionable fix.
-
-    Attributes:
-        instruction: The improved instruction text to use instead.
-        reasoning: Explanation of why this change would close the gap.
-        changes: Short description of what was changed (one sentence).
-
-    Example::
-
-        suggestion = await optimize_instruction(
-            agent.instructions,
-            result,
-            "Agent should add docstrings to all functions.",
-        )
-        pytest.fail(f"No docstrings found.\\n\\n{suggestion}")
-    """
-
-    instruction: str
-    reasoning: str
-    changes: str
-
-    def __str__(self) -> str:
-        return (
-            f"💡 Suggested instruction:\n\n"
-            f"  {self.instruction}\n\n"
-            f"  Changes: {self.changes}\n"
-            f"  Reasoning: {self.reasoning}"
-        )
-
-
-class _OptimizationOutput(BaseModel):
-    """Structured output schema for the optimizer LLM call."""
-
-    instruction: str
-    reasoning: str
-    changes: str
-
-
-async def optimize_instruction(
-    current_instruction: str,
-    result: CopilotResult,
-    criterion: str,
-    *,
-    model: str | Model = "azure/gpt-5.2-chat",
-) -> InstructionSuggestion:
-    """Analyze a result and suggest an improved instruction.
-
-    Uses pydantic-ai structured output to analyze the gap between a
-    current instruction and the agent's observed behavior, returning a
-    concrete, actionable improvement.
-
-    Designed to drop into ``pytest.fail()`` so the failure message
-    contains a ready-to-use fix.
-
-    Model strings follow the same ``provider/model`` format used by
-    ``pytest-aitest``. Azure Entra ID auth is handled automatically
-    when ``AZURE_API_BASE`` or ``AZURE_OPENAI_ENDPOINT`` is set.
-
-    Example::
-
-        result = await copilot_run(agent, task)
-        if '\"\"\"' not in result.file("main.py"):
-            suggestion = await optimize_instruction(
-                agent.instructions or "",
-                result,
-                "Agent should add docstrings to all functions.",
-            )
-            pytest.fail(f"No docstrings found.\\n\\n{suggestion}")
-
-    Args:
-        current_instruction: The agent's current instruction text.
-        result: The ``CopilotResult`` from the (failed) run.
-        criterion: What the agent *should* have done — the test expectation
-            in plain English (e.g. ``"Always write docstrings"``).
-        model: Provider/model string (e.g. ``"azure/gpt-5.2-chat"``,
-            ``"openai/gpt-4o-mini"``) or a pre-configured pydantic-ai
-            ``Model`` object. Defaults to ``"azure/gpt-5.2-chat"``.
-
-    Returns:
-        An :class:`InstructionSuggestion` with the improved instruction.
-    """
-    resolved_model: str | Model = (
-        build_model_from_string(model) if isinstance(model, str) else model
-    )
-    final_output = result.final_response or "(no response)"
-    tool_calls = ", ".join(sorted(result.tool_names_called)) or "none"
-
-    prompt = f"""You are helping improve a GitHub Copilot agent instruction.
-
-## Current instruction
-{current_instruction or "(no instruction)"}
-
-## Task the agent performed
-{criterion}
-
-## What actually happened
-The agent produced:
-{final_output[:1500]}
-
-Tools called: {tool_calls}
-Run succeeded: {result.success}
-
-## Expected criterion
-The agent SHOULD have satisfied this criterion:
-{criterion}
-
-Analyze the gap between the instruction and the observed behaviour.
-Suggest a specific, concise, directive improvement to the instruction
-that would make the agent satisfy the criterion.
-Keep the instruction under 200 words. Do not add unrelated rules."""
-
-    optimizer_agent = PydanticAgent(resolved_model, output_type=_OptimizationOutput)
-    run_result = await optimizer_agent.run(prompt)
-    output = run_result.output
-
-    return InstructionSuggestion(
-        instruction=output.instruction,
-        reasoning=output.reasoning,
-        changes=output.changes,
-    )
diff --git a/src/pytest_codingagents/copilot/personas.py b/src/pytest_codingagents/copilot/personas.py
index f5e2979..c9b425c 100644
--- a/src/pytest_codingagents/copilot/personas.py
+++ b/src/pytest_codingagents/copilot/personas.py
@@ -302,105 +302,39 @@ def _make_runsubagent_tool(
     custom_agents: list[dict[str, Any]],
     mapper: "EventMapper",
 ) -> "Tool":
-    """Build a ``runSubagent`` polyfill tool for the VS Code persona.
+    """Build a ``runSubagent`` polyfill tool for the VS Code persona."""
+    return _make_subagent_dispatch_tool("runSubagent", parent_agent, custom_agents, mapper)
 
-    The Copilot CLI does not natively expose ``runSubagent`` in SDK headless
-    mode.  This factory creates a Python-side ``Tool`` that dispatches
-    registered custom agents as nested ``run_copilot`` calls.
-    """
-    from copilot.types import Tool, ToolResult
-
-    from pytest_codingagents.copilot.agent import CopilotAgent as _CopilotAgent
-    from pytest_codingagents.copilot.runner import run_copilot
-
-    agent_map: dict[str, dict[str, Any]] = {a["name"]: a for a in custom_agents}
-
-    async def _handler(invocation: "ToolInvocation") -> "ToolResult":
-        args: dict[str, Any] = invocation.get("arguments") or {}  # type: ignore[assignment]
-
-        agent_name: str | None = (
-            args.get("agent_name") or args.get("agent") or args.get("agentName")
-        )
-        prompt_text: str = args.get("prompt") or args.get("message") or args.get("task") or ""
-
-        if not agent_name:
-            available = sorted(agent_map)
-            return ToolResult(
-                textResultForLlm=(f"Error: agent_name is required. Available agents: {available}"),
-                resultType="failure",
-            )
-
-        agent_cfg = agent_map.get(agent_name)
-        if agent_cfg is None:
-            available = sorted(agent_map)
-            return ToolResult(
-                textResultForLlm=(f"Error: agent '{agent_name}' not found. Available: {available}"),
-                resultType="failure",
-            )
-
-        mapper.record_subagent_start(agent_name)
-
-        sub_agent = _CopilotAgent(
-            name=agent_name,
-            model=parent_agent.model,
-            instructions=agent_cfg.get("prompt"),
-            working_directory=parent_agent.working_directory,
-            timeout_s=min(parent_agent.timeout_s, 600.0),
-            max_turns=min(parent_agent.max_turns, 30),
-            auto_confirm=True,
-        )
-
-        sub_result = await run_copilot(sub_agent, prompt_text)
 
-        if sub_result.success:
-            mapper.record_subagent_complete(agent_name)
-            return ToolResult(
-                textResultForLlm=sub_result.final_response or "Sub-agent completed.",
-                resultType="success",
-            )
-
-        mapper.record_subagent_failed(agent_name)
-        return ToolResult(
-            textResultForLlm=f"Sub-agent '{agent_name}' failed: {sub_result.error}",
-            resultType="failure",
-        )
-
-    return Tool(
-        name="runSubagent",
-        description=(
-            "Dispatch a named custom agent to perform a task. "
-            "The agent runs with its own instructions and returns its "
-            "final response. "
-            f"Available agents: {sorted(agent_map)}"
-        ),
-        handler=_handler,
-        parameters={
-            "type": "object",
-            "properties": {
-                "agent_name": {
-                    "type": "string",
-                    "description": "Name of the agent to dispatch.",
-                    "enum": sorted(agent_map),
-                },
-                "prompt": {
-                    "type": "string",
-                    "description": "Task or message to send to the agent.",
-                },
-            },
-            "required": ["agent_name", "prompt"],
-        },
-    )
+def _make_task_tool(
+    parent_agent: "CopilotAgent",
+    custom_agents: list[dict[str, Any]],
+    mapper: "EventMapper",
+) -> "Tool":
+    """Build a ``task`` polyfill tool for the Claude Code persona."""
+    return _make_subagent_dispatch_tool("task", parent_agent, custom_agents, mapper)
 
 
-def _make_task_tool(
+def _make_subagent_dispatch_tool(
+    tool_name: str,
     parent_agent: "CopilotAgent",
     custom_agents: list[dict[str, Any]],
     mapper: "EventMapper",
 ) -> "Tool":
-    """Build a ``task`` polyfill tool for the Claude Code persona.
+    """Build a subagent dispatch polyfill tool.
+
+    The Copilot CLI does not natively expose ``runSubagent`` or ``task`` in
+    SDK headless mode.  This factory creates a Python-side ``Tool`` that
+    dispatches registered custom agents as nested ``run_copilot`` calls.
 
-    Identical dispatch mechanism to ``_make_runsubagent_tool`` but named
-    ``task`` to match Claude Code's native sub-agent dispatch API.
+    Args:
+        tool_name: Name to register the tool as (``"runSubagent"`` for VS Code,
+            ``"task"`` for Claude Code).
+        parent_agent: The orchestrator ``CopilotAgent`` being executed.
+        custom_agents: List of custom agent config dicts (each with at least
+            a ``name`` key, optionally ``prompt``, ``description``).
+        mapper: The ``EventMapper`` for the current run, used to record
+            subagent lifecycle events.
     """
     from copilot.types import Tool, ToolResult
 
@@ -416,7 +350,11 @@ async def _handler(invocation: "ToolInvocation") -> "ToolResult":
             args.get("agent_name") or args.get("agent") or args.get("agentName")
         )
         prompt_text: str = (
-            args.get("prompt") or args.get("message") or args.get("description") or ""
+            args.get("prompt")
+            or args.get("message")
+            or args.get("task")
+            or args.get("description")
+            or ""
         )
 
         if not agent_name:
@@ -462,11 +400,10 @@ async def _handler(invocation: "ToolInvocation") -> "ToolResult":
         )
 
     return Tool(
-        name="task",
+        name=tool_name,
         description=(
-            "Dispatch a named agent to perform a task. "
-            "The agent runs with its own instructions and returns its "
-            "final response. "
+            f"Dispatch a named agent to perform a task using the {tool_name} tool. "
+            "The agent runs with its own instructions and returns its final response. "
             f"Available agents: {sorted(agent_map)}"
         ),
         handler=_handler,
diff --git a/src/pytest_codingagents/copilot/result.py b/src/pytest_codingagents/copilot/result.py
index 7b4ccf1..5e342c7 100644
--- a/src/pytest_codingagents/copilot/result.py
+++ b/src/pytest_codingagents/copilot/result.py
@@ -1,8 +1,7 @@
 """Result types for Copilot agent execution.
 
-Turn and ToolCall are re-exported from pytest-aitest's core.result module
-to avoid duplication. Copilot-specific types (SubagentInvocation, UsageInfo,
-CopilotResult) remain here.
+Turn, ToolCall, and SubagentInvocation are imported from pytest-aitest.
+Copilot-specific types (UsageInfo, CopilotResult) are defined here.
 """
 
 from __future__ import annotations
@@ -11,34 +10,19 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
-# Re-export shared types from pytest-aitest so existing imports keep working:
-#   from pytest_codingagents.copilot.result import Turn, ToolCall
-from pytest_aitest.core.result import ToolCall, Turn
+from pytest_aitest.core.result import SubagentInvocation, ToolCall, Turn  # noqa: F401
 
 if TYPE_CHECKING:
     from pytest_codingagents.copilot.agent import CopilotAgent
 
 __all__ = [
     "CopilotResult",
-    "SubagentInvocation",
     "ToolCall",
     "Turn",
     "UsageInfo",
 ]
 
 
-@dataclass(slots=True)
-class SubagentInvocation:
-    """A subagent invocation observed during execution."""
-
-    name: str
-    status: str  # "selected", "started", "completed", "failed"
-    duration_ms: float | None = None
-
-    def __repr__(self) -> str:
-        return f"SubagentInvocation({self.name}, {self.status})"
-
-
 @dataclass(slots=True)
 class UsageInfo:
     """Token usage and cost from a single model turn."""
diff --git a/tests/test_optimizer_integration.py b/tests/test_optimizer_integration.py
index 6642053..479cdbd 100644
--- a/tests/test_optimizer_integration.py
+++ b/tests/test_optimizer_integration.py
@@ -12,9 +12,9 @@
 import os
 
 import pytest
+from pytest_aitest import InstructionSuggestion, optimize_instruction
 
 from pytest_codingagents.copilot.agent import CopilotAgent
-from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
 
 
 @pytest.mark.copilot
diff --git a/tests/test_subagents.py b/tests/test_subagents.py
new file mode 100644
index 0000000..6e0296f
--- /dev/null
+++ b/tests/test_subagents.py
@@ -0,0 +1,142 @@
+"""Subagent dispatch tests.
+
+Proves that the subagent dispatch mechanism works reliably when the
+orchestrator cannot implement directly (write tools excluded).
+
+When the orchestrator has no write tools, it *must* route to a subagent
+to produce file output. This makes dispatch deterministic and asserts:
+- ``result.subagent_invocations`` is non-empty
+- The subagent actually created the expected file
+- ``SubagentInvocation`` objects have valid name/status fields
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from pytest_codingagents.copilot.agent import CopilotAgent
+
+# Tools that let the orchestrator write files directly.
+# Excluding these forces the orchestrator to delegate.
+_WRITE_TOOLS = [
+    "create_file",
+    "replace_string_in_file",
+    "multi_replace_string_in_file",
+    "insert_edit_into_file",
+    "run_in_terminal",
+    "create_directory",
+]
+
+
+@pytest.mark.copilot
+class TestForcedSubagentDispatch:
+    """When write tools are excluded, the orchestrator must use runSubagent.
+
+    These tests are deterministic: the orchestrator physically cannot create
+    files, so it has no choice but to dispatch to the subagent that can.
+    """
+
+    async def test_subagent_invocations_non_empty(self, copilot_run, tmp_path):
+        """Orchestrator with excluded write tools dispatches to a subagent.
+
+        With no write tools available, the orchestrator cannot create the
+        requested file itself and must invoke the file-writer subagent.
+        Asserts that at least one subagent invocation is recorded.
+        """
+        agent = CopilotAgent(
+            name="forced-orchestrator",
+            instructions=(
+                "You are an orchestrator. You MUST delegate all file creation "
+                "to the file-writer agent via runSubagent. "
+                "Do not attempt to create files yourself."
+            ),
+            working_directory=str(tmp_path),
+            timeout_s=300.0,
+            max_turns=20,
+            excluded_tools=_WRITE_TOOLS,
+            custom_agents=[
+                {
+                    "name": "file-writer",
+                    "prompt": (
+                        "You create Python files. When asked to create a file, "
+                        "write it to disk using your file creation tools."
+                    ),
+                    "description": "Creates Python source files on disk.",
+                }
+            ],
+        )
+        result = await copilot_run(
+            agent,
+            "Use the file-writer agent to create hello.py containing: print('hello world')",
+        )
+        assert result.success, f"Run failed: {result.error}"
+        assert result.subagent_invocations, (
+            "No subagent invocations recorded — orchestrator may have attempted "
+            "to implement directly despite excluded write tools"
+        )
+
+    async def test_subagent_file_created(self, copilot_run, tmp_path):
+        """File created by subagent exists in the workspace.
+
+        Complements test_subagent_invocations_non_empty by verifying the
+        subagent actually produced the expected artifact.
+        """
+        agent = CopilotAgent(
+            name="forced-orchestrator-file",
+            instructions=(
+                "You are an orchestrator. Delegate all file creation to the "
+                "file-writer agent via runSubagent."
+            ),
+            working_directory=str(tmp_path),
+            timeout_s=300.0,
+            max_turns=20,
+            excluded_tools=_WRITE_TOOLS,
+            custom_agents=[
+                {
+                    "name": "file-writer",
+                    "prompt": ("You create Python files. Write requested files to disk."),
+                    "description": "Creates Python source files on disk.",
+                }
+            ],
+        )
+        result = await copilot_run(
+            agent,
+            "Use the file-writer agent to create output.py containing: x = 42",
+        )
+        assert result.success, f"Run failed: {result.error}"
+        assert (tmp_path / "output.py").exists(), (
+            "output.py not created — subagent did not write the file"
+        )
+
+    async def test_subagent_invocation_fields(self, copilot_run, tmp_path):
+        """SubagentInvocation objects have valid name and status fields."""
+        agent = CopilotAgent(
+            name="forced-orchestrator-fields",
+            instructions=(
+                "You are an orchestrator. Delegate file creation to the "
+                "file-writer agent via runSubagent."
+            ),
+            working_directory=str(tmp_path),
+            timeout_s=300.0,
+            max_turns=20,
+            excluded_tools=_WRITE_TOOLS,
+            custom_agents=[
+                {
+                    "name": "file-writer",
+                    "prompt": "You create Python files on disk.",
+                    "description": "Creates Python source files.",
+                }
+            ],
+        )
+        result = await copilot_run(
+            agent,
+            "Use the file-writer agent to create result.py containing: done = True",
+        )
+        assert result.success, f"Run failed: {result.error}"
+        assert result.subagent_invocations, "No subagent invocations recorded"
+
+        for inv in result.subagent_invocations:
+            assert inv.name, "SubagentInvocation.name must not be empty"
+            assert inv.status in ("selected", "started", "completed", "failed"), (
+                f"Unexpected SubagentInvocation.status: {inv.status!r}"
+            )
diff --git a/tests/unit/test_optimizer.py b/tests/unit/test_optimizer.py
index fd551fd..e5d4690 100644
--- a/tests/unit/test_optimizer.py
+++ b/tests/unit/test_optimizer.py
@@ -4,12 +4,13 @@
 
 from unittest.mock import AsyncMock, MagicMock, patch
 
-from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
+from pytest_aitest import InstructionSuggestion, optimize_instruction
+
 from pytest_codingagents.copilot.result import CopilotResult, ToolCall, Turn
 
-# Patch targets
-_AGENT_PATCH = "pytest_codingagents.copilot.optimizer.PydanticAgent"
-_BUILD_MODEL_PATCH = "pytest_codingagents.copilot.optimizer.build_model_from_string"
+# Patch targets — the optimizer now lives in pytest_aitest
+_AGENT_PATCH = "pytest_aitest.execution.optimizer.PydanticAgent"
+_BUILD_MODEL_PATCH = "pytest_aitest.execution.optimizer.build_model_from_string"
 _FAKE_MODEL = MagicMock(name="fake-model")