From ad9989504eb1713ac53a582a3cbdc63ec7fa8170 Mon Sep 17 00:00:00 2001
From: Stefan Broenner <stefan.broenner@microsoft.comm>
Date: Thu, 19 Feb 2026 11:56:25 +0100
Subject: [PATCH 1/4] fix: replace sys.modules patching with proper patch() in
 optimizer tests

- Rewrite test_optimizer.py using patch('...PydanticAgent', ...) cleanly
- Add TestAzureEntraModel class (3 tests covering default gpt-5.2-chat deployment)
- Update test_optimizer_integration.py to use azure_entra_model() (gpt-5.2-chat)
  instead of OPENAI_API_KEY skip guard -- all 3 now pass against real Azure
- Verified full test->optimize->test loop end-to-end: 3/3 passed in 64s

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/pytest_codingagents/__init__.py          |   7 +-
 src/pytest_codingagents/copilot/optimizer.py |  98 +++++--
 tests/test_optimizer_integration.py          |  28 +-
 tests/unit/test_optimizer.py                 | 262 +++++++++----------
 4 files changed, 228 insertions(+), 167 deletions(-)

diff --git a/src/pytest_codingagents/__init__.py b/src/pytest_codingagents/__init__.py
index 2d93271..8110341 100644
--- a/src/pytest_codingagents/__init__.py
+++ b/src/pytest_codingagents/__init__.py
@@ -4,13 +4,18 @@
 
 from pytest_codingagents.copilot.agent import CopilotAgent
 from pytest_codingagents.copilot.agents import load_custom_agent, load_custom_agents
-from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
+from pytest_codingagents.copilot.optimizer import (
+    InstructionSuggestion,
+    azure_entra_model,
+    optimize_instruction,
+)
 from pytest_codingagents.copilot.result import CopilotResult
 
 __all__ = [
     "CopilotAgent",
     "CopilotResult",
     "InstructionSuggestion",
+    "azure_entra_model",
     "load_custom_agent",
     "load_custom_agents",
     "optimize_instruction",
diff --git a/src/pytest_codingagents/copilot/optimizer.py b/src/pytest_codingagents/copilot/optimizer.py
index 1d2fb99..c97c6c4 100644
--- a/src/pytest_codingagents/copilot/optimizer.py
+++ b/src/pytest_codingagents/copilot/optimizer.py
@@ -4,22 +4,84 @@
 between a current agent instruction and the observed behavior, and suggests a
 concrete improvement.
 
-Requires ``pydantic-ai``:
-
-    uv add pydantic-ai
+Use :func:`azure_entra_model` to build a pre-configured pydantic-ai model
+from Azure Entra ID (no API key required):
+
+    model = azure_entra_model()  # defaults to gpt-5.2-chat
+    suggestion = await optimize_instruction(
+        agent.instructions or "",
+        result,
+        "Agent should add docstrings.",
+        model=model,
+    )
 """
 
 from __future__ import annotations
 
+import os
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
 from pydantic import BaseModel
+from pydantic_ai import Agent as PydanticAgent
+from pydantic_ai.models import Model
 
 if TYPE_CHECKING:
     from pytest_codingagents.copilot.result import CopilotResult
 
-__all__ = ["InstructionSuggestion", "optimize_instruction"]
+__all__ = ["InstructionSuggestion", "azure_entra_model", "optimize_instruction"]
+
+# Most capable model available on Azure OpenAI
+_AZURE_DEFAULT_MODEL = "gpt-5.2-chat"
+
+
+def azure_entra_model(
+    deployment: str = _AZURE_DEFAULT_MODEL,
+    *,
+    endpoint: str | None = None,
+    api_version: str = "2024-12-01-preview",
+) -> Model:
+    """Build a pydantic-ai Model using Azure Entra ID authentication.
+
+    No API key required — uses ``DefaultAzureCredential`` (works with
+    ``az login`` locally and managed identity in CI).
+
+    Args:
+        deployment: Azure OpenAI deployment name. Defaults to
+            ``"gpt-5.2-chat"`` — the most capable model available.
+        endpoint: Azure OpenAI endpoint URL. Defaults to the
+            ``AZURE_OPENAI_ENDPOINT`` environment variable.
+        api_version: Azure OpenAI API version string.
+
+    Returns:
+        A pydantic-ai ``Model`` ready to pass to ``optimize_instruction()``.
+
+    Example::
+
+        model = azure_entra_model()
+        suggestion = await optimize_instruction(
+            agent.instructions or "",
+            result,
+            "Agent should add docstrings.",
+            model=model,
+        )
+    """
+    from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+    from openai import AsyncAzureOpenAI
+    from pydantic_ai.models.openai import OpenAIChatModel
+    from pydantic_ai.providers.openai import OpenAIProvider
+
+    azure_endpoint = endpoint or os.environ["AZURE_OPENAI_ENDPOINT"]
+    token_provider = get_bearer_token_provider(
+        DefaultAzureCredential(),
+        "https://cognitiveservices.azure.com/.default",
+    )
+    client = AsyncAzureOpenAI(
+        azure_endpoint=azure_endpoint,
+        azure_ad_token_provider=token_provider,
+        api_version=api_version,
+    )
+    return OpenAIChatModel(deployment, provider=OpenAIProvider(openai_client=client))
 
 
 @dataclass
@@ -40,6 +102,7 @@ class InstructionSuggestion:
             agent.instructions,
             result,
             "Agent should add docstrings to all functions.",
+            model=azure_entra_model(),
         )
         pytest.fail(f"No docstrings found.\\n\\n{suggestion}")
     """
@@ -70,7 +133,7 @@ async def optimize_instruction(
     result: CopilotResult,
     criterion: str,
     *,
-    model: str = "openai:gpt-4o-mini",
+    model: str | Model = "openai:gpt-4o-mini",
 ) -> InstructionSuggestion:
     """Analyze a result and suggest an improved instruction.
 
@@ -79,16 +142,22 @@ async def optimize_instruction(
     concrete, actionable improvement.
 
     Designed to drop into ``pytest.fail()`` so the failure message
-    contains a ready-to-use fix:
+    contains a ready-to-use fix.
+
+    For Azure OpenAI with Entra ID auth (recommended), use
+    :func:`azure_entra_model` to build the model:
 
     Example::
 
+        from pytest_codingagents import optimize_instruction, azure_entra_model
+
         result = await copilot_run(agent, task)
         if '\"\"\"' not in result.file("main.py"):
             suggestion = await optimize_instruction(
                 agent.instructions or "",
                 result,
                 "Agent should add docstrings to all functions.",
+                model=azure_entra_model(),  # gpt-5.2-chat via Entra ID
             )
             pytest.fail(f"No docstrings found.\\n\\n{suggestion}")
 
@@ -97,24 +166,13 @@ async def optimize_instruction(
         result: The ``CopilotResult`` from the (failed) run.
         criterion: What the agent *should* have done — the test expectation
             in plain English (e.g. ``"Always write docstrings"``).
-        model: LiteLLM-style model string (e.g. ``"openai:gpt-4o-mini"``
-            or ``"anthropic:claude-3-haiku-20240307"``).
+        model: LiteLLM-style model string (e.g. ``"openai:gpt-4o-mini"``)
+            **or** a pre-configured pydantic-ai ``Model`` object built with
+            :func:`azure_entra_model` or any other provider.
 
     Returns:
         An :class:`InstructionSuggestion` with the improved instruction.
-
-    Raises:
-        ImportError: If pydantic-ai is not installed.
     """
-    try:
-        from pydantic_ai import Agent as PydanticAgent
-    except ImportError as exc:
-        msg = (
-            "pydantic-ai is required for optimize_instruction(). "
-            "Install it with: uv add pydantic-ai"
-        )
-        raise ImportError(msg) from exc
-
     final_output = result.final_response or "(no response)"
     tool_calls = ", ".join(sorted(result.tool_names_called)) or "none"
 
diff --git a/tests/test_optimizer_integration.py b/tests/test_optimizer_integration.py
index b1b6ea7..2fb2f1e 100644
--- a/tests/test_optimizer_integration.py
+++ b/tests/test_optimizer_integration.py
@@ -2,9 +2,9 @@
 
 These tests require:
 - GitHub Copilot credentials (for copilot_run to produce a real result)
-- An LLM API key for the optimizer (OPENAI_API_KEY or configure a different model)
+- AZURE_OPENAI_ENDPOINT env var set (for the optimizer LLM via Azure Entra ID)
 
-Skipped automatically when the required API key is absent.
+Skipped automatically when AZURE_OPENAI_ENDPOINT is absent.
 """
 
 from __future__ import annotations
@@ -14,18 +14,27 @@
 import pytest
 
 from pytest_codingagents.copilot.agent import CopilotAgent
-from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
+from pytest_codingagents.copilot.optimizer import (
+    InstructionSuggestion,
+    azure_entra_model,
+    optimize_instruction,
+)
+
+
+def _model():
+    """Build Azure Entra ID model for optimizer tests."""
+    return azure_entra_model()  # defaults to gpt-5.2-chat
 
 
 @pytest.mark.copilot
 class TestOptimizeInstructionIntegration:
-    """Integration tests for optimize_instruction() with real LLM calls."""
+    """Integration tests for optimize_instruction() with real Azure LLM calls."""
 
     @pytest.fixture(autouse=True)
-    def require_openai_key(self):
-        """Skip entire class when OPENAI_API_KEY is not set."""
-        if not os.environ.get("OPENAI_API_KEY"):
-            pytest.skip("OPENAI_API_KEY not set — skipping optimizer integration tests")
+    def require_azure_endpoint(self):
+        """Skip entire class when AZURE_OPENAI_ENDPOINT is not set."""
+        if not os.environ.get("AZURE_OPENAI_ENDPOINT"):
+            pytest.skip("AZURE_OPENAI_ENDPOINT not set — skipping optimizer integration tests")
 
     async def test_returns_valid_suggestion(self, copilot_run, tmp_path):
         """optimize_instruction returns an InstructionSuggestion with non-empty fields."""
@@ -44,6 +53,7 @@ async def test_returns_valid_suggestion(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             "Every function must have a Google-style docstring.",
+            model=_model(),
         )
 
         assert isinstance(suggestion, InstructionSuggestion)
@@ -66,6 +76,7 @@ async def test_suggestion_str_is_human_readable(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             "Add type hints to all function parameters and return values.",
+            model=_model(),
         )
 
         text = str(suggestion)
@@ -91,6 +102,7 @@ async def test_suggestion_is_relevant_to_criterion(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             criterion,
+            model=_model(),
         )
 
         # The suggestion instruction should mention docstrings somehow
diff --git a/tests/unit/test_optimizer.py b/tests/unit/test_optimizer.py
index 81e797c..1d2871f 100644
--- a/tests/unit/test_optimizer.py
+++ b/tests/unit/test_optimizer.py
@@ -1,11 +1,8 @@
-"""Unit tests for optimize_instruction() and InstructionSuggestion."""
+"""Unit tests for optimize_instruction(), azure_entra_model(), and InstructionSuggestion."""
 
 from __future__ import annotations
 
-import sys
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
 
 from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
 from pytest_codingagents.copilot.result import CopilotResult, ToolCall, Turn
@@ -20,27 +17,21 @@ def _make_result(
     tool_calls = [ToolCall(name=t, arguments={}) for t in (tools or [])]
     return CopilotResult(
         success=success,
-        turns=[
-            Turn(role="assistant", content=final_response, tool_calls=tool_calls),
-        ],
+        turns=[Turn(role="assistant", content=final_response, tool_calls=tool_calls)],
     )
 
 
 def _make_agent_mock(instruction: str, reasoning: str, changes: str) -> MagicMock:
-    """Build a pydantic-ai Agent mock that returns a structured suggestion."""
-    output = MagicMock()
-    output.instruction = instruction
-    output.reasoning = reasoning
-    output.changes = changes
-
-    run_result = MagicMock()
-    run_result.output = output
-
+    """Return a MagicMock that behaves like pydantic-ai Agent class."""
+    output = MagicMock(instruction=instruction, reasoning=reasoning, changes=changes)
+    run_result = MagicMock(output=output)
     agent_instance = MagicMock()
     agent_instance.run = AsyncMock(return_value=run_result)
+    return MagicMock(return_value=agent_instance)
+
 
-    agent_class = MagicMock(return_value=agent_instance)
-    return agent_class
+# Patch target: PydanticAgent as imported in the optimizer module
+_AGENT_PATCH = "pytest_codingagents.copilot.optimizer.PydanticAgent"
 
 
 class TestInstructionSuggestion:
@@ -55,27 +46,17 @@ def test_str_contains_instruction(self):
         assert "Always add docstrings." in str(s)
 
     def test_str_contains_reasoning(self):
-        s = InstructionSuggestion(
-            instruction="inst",
-            reasoning="because reasons",
-            changes="changed x",
-        )
+        s = InstructionSuggestion(instruction="inst", reasoning="because reasons", changes="x")
         assert "because reasons" in str(s)
 
     def test_str_contains_changes(self):
         s = InstructionSuggestion(
-            instruction="inst",
-            reasoning="reason",
-            changes="Added docstring mandate.",
+            instruction="inst", reasoning="reason", changes="Added docstring mandate."
         )
         assert "Added docstring mandate." in str(s)
 
     def test_fields_accessible(self):
-        s = InstructionSuggestion(
-            instruction="inst",
-            reasoning="reason",
-            changes="changes",
-        )
+        s = InstructionSuggestion(instruction="inst", reasoning="reason", changes="changes")
         assert s.instruction == "inst"
         assert s.reasoning == "reason"
         assert s.changes == "changes"
@@ -85,22 +66,15 @@ class TestOptimizeInstruction:
     """Tests for optimize_instruction()."""
 
     async def test_returns_instruction_suggestion(self):
-        """optimize_instruction returns an InstructionSuggestion."""
         agent_class = _make_agent_mock(
             instruction="Always add Google-style docstrings.",
             reasoning="The original instruction omits documentation.",
             changes="Added docstring mandate.",
         )
-
-        # patch pydantic_ai.Agent in the module where it's imported
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        result = await optimize_instruction(
-            "Write Python code.",
-            _make_result(),
-            "Agent should add docstrings.",
-        )
-
+        with patch(_AGENT_PATCH, agent_class):
+            result = await optimize_instruction(
+                "Write Python code.", _make_result(), "Agent should add docstrings."
+            )
         assert isinstance(result, InstructionSuggestion)
         assert result.instruction == "Always add Google-style docstrings."
         assert result.reasoning == "The original instruction omits documentation."
@@ -109,127 +83,139 @@ async def test_returns_instruction_suggestion(self):
     async def test_uses_default_model(self):
         """optimize_instruction defaults to openai:gpt-4o-mini."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        await optimize_instruction("inst", _make_result(), "criterion")
-
-        agent_class.assert_called_once()
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction("inst", _make_result(), "criterion")
         assert agent_class.call_args[0][0] == "openai:gpt-4o-mini"
 
-    async def test_accepts_custom_model(self):
+    async def test_accepts_custom_model_string(self):
         """optimize_instruction accepts a custom model string."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        await optimize_instruction(
-            "inst",
-            _make_result(),
-            "criterion",
-            model="anthropic:claude-3-haiku-20240307",
-        )
-
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "inst",
+                _make_result(),
+                "criterion",
+                model="anthropic:claude-3-haiku-20240307",
+            )
         assert agent_class.call_args[0][0] == "anthropic:claude-3-haiku-20240307"
 
+    async def test_accepts_model_object(self):
+        """optimize_instruction accepts a pre-built Model object (e.g. azure_entra_model())."""
+        agent_class = _make_agent_mock("inst", "reason", "changes")
+        fake_model = MagicMock()
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction("inst", _make_result(), "criterion", model=fake_model)
+        assert agent_class.call_args[0][0] is fake_model
+
     async def test_includes_criterion_in_prompt(self):
-        """The LLM prompt includes the criterion text."""
         agent_class = _make_agent_mock("improved", "reason", "change")
         agent_instance = agent_class.return_value
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        await optimize_instruction(
-            "Write code.",
-            _make_result(),
-            "Agent must use type hints on all functions.",
-        )
-
-        prompt = agent_instance.run.call_args[0][0]
-        assert "type hints" in prompt
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "Write code.", _make_result(), "Agent must use type hints on all functions."
+            )
+        assert "type hints" in agent_instance.run.call_args[0][0]
 
     async def test_includes_current_instruction_in_prompt(self):
-        """The LLM prompt contains the current instruction."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        await optimize_instruction(
-            "Always use FastAPI for web APIs.",
-            _make_result(),
-            "criterion",
-        )
-
-        prompt = agent_instance.run.call_args[0][0]
-        assert "FastAPI" in prompt
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "Always use FastAPI for web APIs.", _make_result(), "criterion"
+            )
+        assert "FastAPI" in agent_instance.run.call_args[0][0]
 
     async def test_includes_agent_output_in_prompt(self):
-        """The LLM prompt contains the agent's final response."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        result = _make_result(final_response="def add(a, b): return a + b")
-        await optimize_instruction("inst", result, "criterion")
-
-        prompt = agent_instance.run.call_args[0][0]
-        assert "def add" in prompt
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "inst", _make_result(final_response="def add(a, b): return a + b"), "criterion"
+            )
+        assert "def add" in agent_instance.run.call_args[0][0]
 
     async def test_handles_no_final_response(self):
-        """optimize_instruction handles results with no turns gracefully."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        empty_result = CopilotResult(success=False, turns=[])
-        result = await optimize_instruction("inst", empty_result, "criterion")
-
+        with patch(_AGENT_PATCH, agent_class):
+            result = await optimize_instruction(
+                "inst", CopilotResult(success=False, turns=[]), "criterion"
+            )
         assert isinstance(result, InstructionSuggestion)
 
     async def test_handles_empty_instruction(self):
-        """optimize_instruction handles empty current instruction."""
         agent_class = _make_agent_mock("new inst", "reason", "changes")
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        result = await optimize_instruction("", _make_result(), "criterion")
+        with patch(_AGENT_PATCH, agent_class):
+            result = await optimize_instruction("", _make_result(), "criterion")
         assert isinstance(result, InstructionSuggestion)
 
     async def test_includes_tool_calls_in_prompt(self):
-        """The LLM prompt includes tool call information."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        result = _make_result(tools=["create_file", "read_file"])
-        await optimize_instruction("inst", result, "criterion")
-
-        prompt = agent_instance.run.call_args[0][0]
-        assert "create_file" in prompt
-
-
-class TestOptimizeInstructionImportError:
-    """Test ImportError when pydantic-ai is not installed."""
-
-    async def test_raises_import_error_when_pydantic_ai_missing(self):
-        """optimize_instruction raises ImportError if pydantic-ai not installed."""
-        saved = sys.modules.get("pydantic_ai")
-        try:
-            sys.modules["pydantic_ai"] = None  # type: ignore
-
-            with pytest.raises(ImportError, match="pydantic-ai"):
-                await optimize_instruction("inst", _make_result(), "criterion")
-        finally:
-            if saved is not None:
-                sys.modules["pydantic_ai"] = saved
-            else:
-                del sys.modules["pydantic_ai"]
-
-    async def test_import_error_includes_install_hint(self):
-        """ImportError message includes the uv add install hint."""
-        saved = sys.modules.get("pydantic_ai")
-        try:
-            sys.modules["pydantic_ai"] = None  # type: ignore
-
-            with pytest.raises(ImportError, match="uv add pydantic-ai"):
-                await optimize_instruction("inst", _make_result(), "criterion")
-        finally:
-            if saved is not None:
-                sys.modules["pydantic_ai"] = saved
-            else:
-                del sys.modules["pydantic_ai"]
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "inst", _make_result(tools=["create_file", "read_file"]), "criterion"
+            )
+        assert "create_file" in agent_instance.run.call_args[0][0]
+
+
+class TestAzureEntraModel:
+    """Tests for azure_entra_model()."""
+
+    # Patch targets: lazy imports inside the function body live in their home modules
+    _PATCHES = [
+        ("azure.identity.DefaultAzureCredential", MagicMock()),
+        ("azure.identity.get_bearer_token_provider", MagicMock()),
+        ("openai.AsyncAzureOpenAI", MagicMock()),
+        ("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
+    ]
+
+    def test_returns_model_object(self):
+        """azure_entra_model() returns a pydantic-ai Model-compatible object."""
+        from pytest_codingagents.copilot.optimizer import azure_entra_model
+
+        fake_model = MagicMock()
+        with (
+            patch("azure.identity.DefaultAzureCredential", MagicMock()),
+            patch("azure.identity.get_bearer_token_provider", MagicMock()),
+            patch("openai.AsyncAzureOpenAI", MagicMock()),
+            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
+            patch("pydantic_ai.models.openai.OpenAIChatModel", return_value=fake_model),
+        ):
+            result = azure_entra_model(endpoint="https://test.openai.azure.com/")
+        assert result is fake_model
+
+    def test_default_deployment_is_gpt52(self):
+        """azure_entra_model() defaults to gpt-5.2-chat."""
+        from pytest_codingagents.copilot.optimizer import azure_entra_model
+
+        captured: list[str] = []
+        with (
+            patch("azure.identity.DefaultAzureCredential", MagicMock()),
+            patch("azure.identity.get_bearer_token_provider", MagicMock()),
+            patch("openai.AsyncAzureOpenAI", MagicMock()),
+            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
+            patch(
+                "pydantic_ai.models.openai.OpenAIChatModel",
+                side_effect=lambda name, **kw: captured.append(name) or MagicMock(),
+            ),
+        ):
+            azure_entra_model(endpoint="https://test.openai.azure.com/")
+        assert captured == ["gpt-5.2-chat"]
+
+    def test_custom_deployment_name(self):
+        """azure_entra_model() uses the provided deployment name."""
+        from pytest_codingagents.copilot.optimizer import azure_entra_model
+
+        captured: list[str] = []
+        with (
+            patch("azure.identity.DefaultAzureCredential", MagicMock()),
+            patch("azure.identity.get_bearer_token_provider", MagicMock()),
+            patch("openai.AsyncAzureOpenAI", MagicMock()),
+            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
+            patch(
+                "pydantic_ai.models.openai.OpenAIChatModel",
+                side_effect=lambda name, **kw: captured.append(name) or MagicMock(),
+            ),
+        ):
+            azure_entra_model("gpt-4.1", endpoint="https://test.openai.azure.com/")
+        assert captured == ["gpt-4.1"]

From 6b9775b12880257986f3496eb5e858d6eace2adb Mon Sep 17 00:00:00 2001
From: Stefan Broenner <stefan.broenner@microsoft.comm>
Date: Thu, 19 Feb 2026 12:06:10 +0100
Subject: [PATCH 2/4] =?UTF-8?q?fix:=20remove=20azure=5Fentra=5Fmodel()=20?=
 =?UTF-8?q?=E2=80=94=20use=20aitest=20Provider=20model=20strings=20instead?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pytest-aitest already provides build_model_from_string() which handles Azure
Entra ID auth automatically from 'azure/gpt-5.2-chat' style strings.

- Delete azure_entra_model() from optimizer.py and __init__.py (duplication)
- optimize_instruction() now uses build_model_from_string() internally
- Default model changed to 'azure/gpt-5.2-chat'
- Model strings use provider/model format (same as pytest-aitest Provider)
- Remove TestAzureEntraModel unit tests (testing dead code)
- Update integration tests: no extra import needed
- 3/3 integration tests pass against azure/gpt-5.2-chat via Entra ID

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/pytest_codingagents/__init__.py          |  2 -
 src/pytest_codingagents/copilot/optimizer.py | 88 +++++---------------
 tests/test_optimizer_integration.py          | 18 +---
 tests/unit/test_optimizer.py                 | 77 ++---------------
 4 files changed, 30 insertions(+), 155 deletions(-)

diff --git a/src/pytest_codingagents/__init__.py b/src/pytest_codingagents/__init__.py
index 8110341..182169f 100644
--- a/src/pytest_codingagents/__init__.py
+++ b/src/pytest_codingagents/__init__.py
@@ -6,7 +6,6 @@
 from pytest_codingagents.copilot.agents import load_custom_agent, load_custom_agents
 from pytest_codingagents.copilot.optimizer import (
     InstructionSuggestion,
-    azure_entra_model,
     optimize_instruction,
 )
 from pytest_codingagents.copilot.result import CopilotResult
@@ -15,7 +14,6 @@
     "CopilotAgent",
     "CopilotResult",
     "InstructionSuggestion",
-    "azure_entra_model",
     "load_custom_agent",
     "load_custom_agents",
     "optimize_instruction",
diff --git a/src/pytest_codingagents/copilot/optimizer.py b/src/pytest_codingagents/copilot/optimizer.py
index c97c6c4..2c5bb20 100644
--- a/src/pytest_codingagents/copilot/optimizer.py
+++ b/src/pytest_codingagents/copilot/optimizer.py
@@ -4,84 +4,34 @@
 between a current agent instruction and the observed behavior, and suggests a
 concrete improvement.
 
-Use :func:`azure_entra_model` to build a pre-configured pydantic-ai model
-from Azure Entra ID (no API key required):
+Model strings follow the same ``provider/model`` format used by
+``pytest-aitest`` (e.g. ``"azure/gpt-5.2-chat"``, ``"openai/gpt-4o-mini"``).
+Azure Entra ID authentication is handled automatically when
+``AZURE_API_BASE`` or ``AZURE_OPENAI_ENDPOINT`` is set.
+
+Example::
 
-    model = azure_entra_model()  # defaults to gpt-5.2-chat
     suggestion = await optimize_instruction(
         agent.instructions or "",
         result,
         "Agent should add docstrings.",
-        model=model,
     )
 """
 
 from __future__ import annotations
 
-import os
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
 from pydantic import BaseModel
 from pydantic_ai import Agent as PydanticAgent
 from pydantic_ai.models import Model
+from pytest_aitest.execution.pydantic_adapter import build_model_from_string
 
 if TYPE_CHECKING:
     from pytest_codingagents.copilot.result import CopilotResult
 
-__all__ = ["InstructionSuggestion", "azure_entra_model", "optimize_instruction"]
-
-# Most capable model available on Azure OpenAI
-_AZURE_DEFAULT_MODEL = "gpt-5.2-chat"
-
-
-def azure_entra_model(
-    deployment: str = _AZURE_DEFAULT_MODEL,
-    *,
-    endpoint: str | None = None,
-    api_version: str = "2024-12-01-preview",
-) -> Model:
-    """Build a pydantic-ai Model using Azure Entra ID authentication.
-
-    No API key required — uses ``DefaultAzureCredential`` (works with
-    ``az login`` locally and managed identity in CI).
-
-    Args:
-        deployment: Azure OpenAI deployment name. Defaults to
-            ``"gpt-5.2-chat"`` — the most capable model available.
-        endpoint: Azure OpenAI endpoint URL. Defaults to the
-            ``AZURE_OPENAI_ENDPOINT`` environment variable.
-        api_version: Azure OpenAI API version string.
-
-    Returns:
-        A pydantic-ai ``Model`` ready to pass to ``optimize_instruction()``.
-
-    Example::
-
-        model = azure_entra_model()
-        suggestion = await optimize_instruction(
-            agent.instructions or "",
-            result,
-            "Agent should add docstrings.",
-            model=model,
-        )
-    """
-    from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-    from openai import AsyncAzureOpenAI
-    from pydantic_ai.models.openai import OpenAIChatModel
-    from pydantic_ai.providers.openai import OpenAIProvider
-
-    azure_endpoint = endpoint or os.environ["AZURE_OPENAI_ENDPOINT"]
-    token_provider = get_bearer_token_provider(
-        DefaultAzureCredential(),
-        "https://cognitiveservices.azure.com/.default",
-    )
-    client = AsyncAzureOpenAI(
-        azure_endpoint=azure_endpoint,
-        azure_ad_token_provider=token_provider,
-        api_version=api_version,
-    )
-    return OpenAIChatModel(deployment, provider=OpenAIProvider(openai_client=client))
+__all__ = ["InstructionSuggestion", "optimize_instruction"]
 
 
 @dataclass
@@ -102,7 +52,6 @@ class InstructionSuggestion:
             agent.instructions,
             result,
             "Agent should add docstrings to all functions.",
-            model=azure_entra_model(),
         )
         pytest.fail(f"No docstrings found.\\n\\n{suggestion}")
     """
@@ -133,7 +82,7 @@ async def optimize_instruction(
     result: CopilotResult,
     criterion: str,
     *,
-    model: str | Model = "openai:gpt-4o-mini",
+    model: str | Model = "azure/gpt-5.2-chat",
 ) -> InstructionSuggestion:
     """Analyze a result and suggest an improved instruction.
 
@@ -144,20 +93,18 @@ async def optimize_instruction(
     Designed to drop into ``pytest.fail()`` so the failure message
     contains a ready-to-use fix.
 
-    For Azure OpenAI with Entra ID auth (recommended), use
-    :func:`azure_entra_model` to build the model:
+    Model strings follow the same ``provider/model`` format used by
+    ``pytest-aitest``. Azure Entra ID auth is handled automatically
+    when ``AZURE_API_BASE`` or ``AZURE_OPENAI_ENDPOINT`` is set.
 
     Example::
 
-        from pytest_codingagents import optimize_instruction, azure_entra_model
-
         result = await copilot_run(agent, task)
         if '\"\"\"' not in result.file("main.py"):
             suggestion = await optimize_instruction(
                 agent.instructions or "",
                 result,
                 "Agent should add docstrings to all functions.",
-                model=azure_entra_model(),  # gpt-5.2-chat via Entra ID
             )
             pytest.fail(f"No docstrings found.\\n\\n{suggestion}")
 
@@ -166,13 +113,16 @@ async def optimize_instruction(
         result: The ``CopilotResult`` from the (failed) run.
         criterion: What the agent *should* have done — the test expectation
             in plain English (e.g. ``"Always write docstrings"``).
-        model: LiteLLM-style model string (e.g. ``"openai:gpt-4o-mini"``)
-            **or** a pre-configured pydantic-ai ``Model`` object built with
-            :func:`azure_entra_model` or any other provider.
+        model: Provider/model string (e.g. ``"azure/gpt-5.2-chat"``,
+            ``"openai/gpt-4o-mini"``) or a pre-configured pydantic-ai
+            ``Model`` object. Defaults to ``"azure/gpt-5.2-chat"``.
 
     Returns:
         An :class:`InstructionSuggestion` with the improved instruction.
     """
+    resolved_model: str | Model = (
+        build_model_from_string(model) if isinstance(model, str) else model
+    )
     final_output = result.final_response or "(no response)"
     tool_calls = ", ".join(sorted(result.tool_names_called)) or "none"
 
@@ -200,7 +150,7 @@ async def optimize_instruction(
 that would make the agent satisfy the criterion.
 Keep the instruction under 200 words. Do not add unrelated rules."""
 
-    optimizer_agent = PydanticAgent(model, output_type=_OptimizationOutput)
+    optimizer_agent = PydanticAgent(resolved_model, output_type=_OptimizationOutput)
     run_result = await optimizer_agent.run(prompt)
     output = run_result.output
 
diff --git a/tests/test_optimizer_integration.py b/tests/test_optimizer_integration.py
index 2fb2f1e..22604d6 100644
--- a/tests/test_optimizer_integration.py
+++ b/tests/test_optimizer_integration.py
@@ -2,7 +2,7 @@
 
 These tests require:
 - GitHub Copilot credentials (for copilot_run to produce a real result)
-- AZURE_OPENAI_ENDPOINT env var set (for the optimizer LLM via Azure Entra ID)
+- AZURE_API_BASE or AZURE_OPENAI_ENDPOINT env var (for the optimizer LLM via Azure Entra ID)
 
 Skipped automatically when AZURE_OPENAI_ENDPOINT is absent.
 """
@@ -14,16 +14,7 @@
 import pytest
 
 from pytest_codingagents.copilot.agent import CopilotAgent
-from pytest_codingagents.copilot.optimizer import (
-    InstructionSuggestion,
-    azure_entra_model,
-    optimize_instruction,
-)
-
-
-def _model():
-    """Build Azure Entra ID model for optimizer tests."""
-    return azure_entra_model()  # defaults to gpt-5.2-chat
+from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
 
 
 @pytest.mark.copilot
@@ -33,7 +24,7 @@ class TestOptimizeInstructionIntegration:
     @pytest.fixture(autouse=True)
     def require_azure_endpoint(self):
         """Skip entire class when AZURE_OPENAI_ENDPOINT is not set."""
-        if not os.environ.get("AZURE_OPENAI_ENDPOINT"):
+        if not os.environ.get("AZURE_OPENAI_ENDPOINT") and not os.environ.get("AZURE_API_BASE"):
             pytest.skip("AZURE_OPENAI_ENDPOINT not set — skipping optimizer integration tests")
 
     async def test_returns_valid_suggestion(self, copilot_run, tmp_path):
@@ -53,7 +44,6 @@ async def test_returns_valid_suggestion(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             "Every function must have a Google-style docstring.",
-            model=_model(),
         )
 
         assert isinstance(suggestion, InstructionSuggestion)
@@ -76,7 +66,6 @@ async def test_suggestion_str_is_human_readable(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             "Add type hints to all function parameters and return values.",
-            model=_model(),
         )
 
         text = str(suggestion)
@@ -102,7 +91,6 @@ async def test_suggestion_is_relevant_to_criterion(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             criterion,
-            model=_model(),
         )
 
         # The suggestion instruction should mention docstrings somehow
diff --git a/tests/unit/test_optimizer.py b/tests/unit/test_optimizer.py
index 1d2871f..51a378d 100644
--- a/tests/unit/test_optimizer.py
+++ b/tests/unit/test_optimizer.py
@@ -81,26 +81,28 @@ async def test_returns_instruction_suggestion(self):
         assert result.changes == "Added docstring mandate."
 
     async def test_uses_default_model(self):
-        """optimize_instruction defaults to openai:gpt-4o-mini."""
+        """optimize_instruction defaults to azure/gpt-5.2-chat."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         with patch(_AGENT_PATCH, agent_class):
             await optimize_instruction("inst", _make_result(), "criterion")
-        assert agent_class.call_args[0][0] == "openai:gpt-4o-mini"
+        # build_model_from_string("azure/gpt-5.2-chat") returns an OpenAIChatModel object
+        assert agent_class.call_args[0][0] is not None
 
     async def test_accepts_custom_model_string(self):
-        """optimize_instruction accepts a custom model string."""
+        """optimize_instruction accepts a custom model string (provider/model format)."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         with patch(_AGENT_PATCH, agent_class):
             await optimize_instruction(
                 "inst",
                 _make_result(),
                 "criterion",
-                model="anthropic:claude-3-haiku-20240307",
+                model="openai/gpt-4o-mini",
             )
-        assert agent_class.call_args[0][0] == "anthropic:claude-3-haiku-20240307"
+        # build_model_from_string converts "openai/gpt-4o-mini" -> "openai:gpt-4o-mini"
+        assert agent_class.call_args[0][0] == "openai:gpt-4o-mini"
 
     async def test_accepts_model_object(self):
-        """optimize_instruction accepts a pre-built Model object (e.g. azure_entra_model())."""
+        """optimize_instruction accepts a pre-built Model object."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         fake_model = MagicMock()
         with patch(_AGENT_PATCH, agent_class):
@@ -156,66 +158,3 @@ async def test_includes_tool_calls_in_prompt(self):
                 "inst", _make_result(tools=["create_file", "read_file"]), "criterion"
             )
         assert "create_file" in agent_instance.run.call_args[0][0]
-
-
-class TestAzureEntraModel:
-    """Tests for azure_entra_model()."""
-
-    # Patch targets: lazy imports inside the function body live in their home modules
-    _PATCHES = [
-        ("azure.identity.DefaultAzureCredential", MagicMock()),
-        ("azure.identity.get_bearer_token_provider", MagicMock()),
-        ("openai.AsyncAzureOpenAI", MagicMock()),
-        ("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
-    ]
-
-    def test_returns_model_object(self):
-        """azure_entra_model() returns a pydantic-ai Model-compatible object."""
-        from pytest_codingagents.copilot.optimizer import azure_entra_model
-
-        fake_model = MagicMock()
-        with (
-            patch("azure.identity.DefaultAzureCredential", MagicMock()),
-            patch("azure.identity.get_bearer_token_provider", MagicMock()),
-            patch("openai.AsyncAzureOpenAI", MagicMock()),
-            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
-            patch("pydantic_ai.models.openai.OpenAIChatModel", return_value=fake_model),
-        ):
-            result = azure_entra_model(endpoint="https://test.openai.azure.com/")
-        assert result is fake_model
-
-    def test_default_deployment_is_gpt52(self):
-        """azure_entra_model() defaults to gpt-5.2-chat."""
-        from pytest_codingagents.copilot.optimizer import azure_entra_model
-
-        captured: list[str] = []
-        with (
-            patch("azure.identity.DefaultAzureCredential", MagicMock()),
-            patch("azure.identity.get_bearer_token_provider", MagicMock()),
-            patch("openai.AsyncAzureOpenAI", MagicMock()),
-            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
-            patch(
-                "pydantic_ai.models.openai.OpenAIChatModel",
-                side_effect=lambda name, **kw: captured.append(name) or MagicMock(),
-            ),
-        ):
-            azure_entra_model(endpoint="https://test.openai.azure.com/")
-        assert captured == ["gpt-5.2-chat"]
-
-    def test_custom_deployment_name(self):
-        """azure_entra_model() uses the provided deployment name."""
-        from pytest_codingagents.copilot.optimizer import azure_entra_model
-
-        captured: list[str] = []
-        with (
-            patch("azure.identity.DefaultAzureCredential", MagicMock()),
-            patch("azure.identity.get_bearer_token_provider", MagicMock()),
-            patch("openai.AsyncAzureOpenAI", MagicMock()),
-            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
-            patch(
-                "pydantic_ai.models.openai.OpenAIChatModel",
-                side_effect=lambda name, **kw: captured.append(name) or MagicMock(),
-            ),
-        ):
-            azure_entra_model("gpt-4.1", endpoint="https://test.openai.azure.com/")
-        assert captured == ["gpt-4.1"]

From 763ba3bd630d49605becd990afe33c7dbb3f4e57 Mon Sep 17 00:00:00 2001
From: Stefan Broenner <stefan.broenner@microsoft.comm>
Date: Thu, 19 Feb 2026 12:14:12 +0100
Subject: [PATCH 3/4] test: add full test->optimize->test loop integration test

Adds test_full_optimize_loop which validates the hero use case end-to-end:

1. Run Copilot with weak instruction ('no comments or documentation needed')
   -> code produced has no docstrings (verified)
2. Call optimize_instruction() with failing criterion
   -> optimizer identifies the conflict, suggests explicit docstring mandate
3. Run Copilot again with improved instruction
   -> code now contains Google-style docstrings (verified)

Result: Docstrings round 1: False, round 2: True (passed in 36s)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tests/test_optimizer_integration.py | 61 +++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/tests/test_optimizer_integration.py b/tests/test_optimizer_integration.py
index 22604d6..6642053 100644
--- a/tests/test_optimizer_integration.py
+++ b/tests/test_optimizer_integration.py
@@ -100,3 +100,64 @@ async def test_suggestion_is_relevant_to_criterion(self, copilot_run, tmp_path):
             f"Instruction: {suggestion.instruction}\n"
             f"Reasoning: {suggestion.reasoning}"
         )
+
+    async def test_full_optimize_loop(self, copilot_run, tmp_path):
+        """Full test→optimize→test loop: weak instruction fails, improved instruction passes.
+
+        This is the hero use case: verify that optimize_instruction() produces
+        an instruction that actually fixes a failing criterion.
+
+        Round 1: Run with a deliberately weak instruction (no docstring mandate).
+                 The agent writes code but skips docstrings.
+        Optimize: Call optimize_instruction() with the failing criterion.
+                  Receive a suggested instruction that mandates docstrings.
+        Round 2: Run again with the improved instruction.
+                 The agent now includes docstrings — criterion passes.
+        """
+        CRITERION = "Every function must include a Google-style docstring."
+        TASK = "Create calculator.py with add(a, b) and subtract(a, b) functions."
+
+        # --- Round 1: weak instruction, expect no docstrings ---
+        weak_agent = CopilotAgent(
+            name="weak-coder",
+            instructions="Write minimal Python code. No comments or documentation needed.",
+            working_directory=str(tmp_path / "round1"),
+        )
+        (tmp_path / "round1").mkdir()
+        result1 = await copilot_run(weak_agent, TASK)
+        assert result1.success, "Round 1 Copilot run failed"
+
+        code1 = result1.file("calculator.py") or ""
+        has_docstrings_round1 = '"""' in code1 or "'''" in code1
+
+        # --- Optimize ---
+        suggestion = await optimize_instruction(
+            weak_agent.instructions or "",
+            result1,
+            CRITERION,
+        )
+        assert suggestion.instruction.strip(), "Optimizer returned empty instruction"
+        print(f"\n💡 Suggested instruction:\n{suggestion}")  # visible in -s output
+
+        # --- Round 2: improved instruction ---
+        improved_agent = CopilotAgent(
+            name="improved-coder",
+            instructions=suggestion.instruction,
+            working_directory=str(tmp_path / "round2"),
+        )
+        (tmp_path / "round2").mkdir()
+        result2 = await copilot_run(improved_agent, TASK)
+        assert result2.success, "Round 2 Copilot run failed"
+
+        code2 = result2.file("calculator.py") or ""
+        has_docstrings_round2 = '"""' in code2 or "'''" in code2
+
+        assert has_docstrings_round2, (
+            f"Round 2 code still has no docstrings after optimization.\n"
+            f"Suggested instruction: {suggestion.instruction}\n"
+            f"Round 2 code:\n{code2}"
+        )
+        print(
+            f"\n✅ Loop complete. "
+            f"Docstrings round 1: {has_docstrings_round1}, round 2: {has_docstrings_round2}"
+        )

From f23ed1eb0b34b43178cbd8e0ccfaeb20f031ace1 Mon Sep 17 00:00:00 2001
From: Stefan Broenner <stefan.broenner@microsoft.comm>
Date: Thu, 19 Feb 2026 12:36:26 +0100
Subject: [PATCH 4/4] fix: patch build_model_from_string in unit tests to avoid
 Azure env requirement

Unit tests were failing in CI with 'AZURE_API_BASE or AZURE_OPENAI_ENDPOINT
required' because build_model_from_string('azure/gpt-5.2-chat') runs before
PydanticAgent is mocked. Now both are patched together.

Also strengthens test_uses_default_model: verifies the exact model string
passed to build_model_from_string, and that the resolved model is forwarded.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tests/unit/test_optimizer.py | 54 +++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/tests/unit/test_optimizer.py b/tests/unit/test_optimizer.py
index 51a378d..fd551fd 100644
--- a/tests/unit/test_optimizer.py
+++ b/tests/unit/test_optimizer.py
@@ -1,4 +1,4 @@
-"""Unit tests for optimize_instruction(), azure_entra_model(), and InstructionSuggestion."""
+"""Unit tests for optimize_instruction() and InstructionSuggestion."""
 
 from __future__ import annotations
 
@@ -7,6 +7,11 @@
 from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
 from pytest_codingagents.copilot.result import CopilotResult, ToolCall, Turn
 
+# Patch targets
+_AGENT_PATCH = "pytest_codingagents.copilot.optimizer.PydanticAgent"
+_BUILD_MODEL_PATCH = "pytest_codingagents.copilot.optimizer.build_model_from_string"
+_FAKE_MODEL = MagicMock(name="fake-model")
+
 
 def _make_result(
     *,
@@ -30,10 +35,6 @@ def _make_agent_mock(instruction: str, reasoning: str, changes: str) -> MagicMoc
     return MagicMock(return_value=agent_instance)
 
 
-# Patch target: PydanticAgent as imported in the optimizer module
-_AGENT_PATCH = "pytest_codingagents.copilot.optimizer.PydanticAgent"
-
-
 class TestInstructionSuggestion:
     """Tests for the InstructionSuggestion dataclass."""
 
@@ -71,7 +72,7 @@ async def test_returns_instruction_suggestion(self):
             reasoning="The original instruction omits documentation.",
             changes="Added docstring mandate.",
         )
-        with patch(_AGENT_PATCH, agent_class):
+        with patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL), patch(_AGENT_PATCH, agent_class):
             result = await optimize_instruction(
                 "Write Python code.", _make_result(), "Agent should add docstrings."
             )
@@ -83,36 +84,39 @@ async def test_returns_instruction_suggestion(self):
     async def test_uses_default_model(self):
         """optimize_instruction defaults to azure/gpt-5.2-chat."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        with patch(_AGENT_PATCH, agent_class):
+        with (
+            patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL) as mock_build,
+            patch(_AGENT_PATCH, agent_class),
+        ):
             await optimize_instruction("inst", _make_result(), "criterion")
-        # build_model_from_string("azure/gpt-5.2-chat") returns an OpenAIChatModel object
-        assert agent_class.call_args[0][0] is not None
+        mock_build.assert_called_once_with("azure/gpt-5.2-chat")
+        assert agent_class.call_args[0][0] is _FAKE_MODEL
 
     async def test_accepts_custom_model_string(self):
-        """optimize_instruction accepts a custom model string (provider/model format)."""
+        """optimize_instruction passes model string through build_model_from_string."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        with patch(_AGENT_PATCH, agent_class):
+        with (
+            patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL) as mock_build,
+            patch(_AGENT_PATCH, agent_class),
+        ):
             await optimize_instruction(
-                "inst",
-                _make_result(),
-                "criterion",
-                model="openai/gpt-4o-mini",
+                "inst", _make_result(), "criterion", model="openai/gpt-4o-mini"
             )
-        # build_model_from_string converts "openai/gpt-4o-mini" -> "openai:gpt-4o-mini"
-        assert agent_class.call_args[0][0] == "openai:gpt-4o-mini"
+        mock_build.assert_called_once_with("openai/gpt-4o-mini")
 
     async def test_accepts_model_object(self):
-        """optimize_instruction accepts a pre-built Model object."""
+        """optimize_instruction skips build_model_from_string for a pre-built Model object."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         fake_model = MagicMock()
-        with patch(_AGENT_PATCH, agent_class):
+        with patch(_BUILD_MODEL_PATCH) as mock_build, patch(_AGENT_PATCH, agent_class):
             await optimize_instruction("inst", _make_result(), "criterion", model=fake_model)
+        mock_build.assert_not_called()
         assert agent_class.call_args[0][0] is fake_model
 
     async def test_includes_criterion_in_prompt(self):
         agent_class = _make_agent_mock("improved", "reason", "change")
         agent_instance = agent_class.return_value
-        with patch(_AGENT_PATCH, agent_class):
+        with patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL), patch(_AGENT_PATCH, agent_class):
             await optimize_instruction(
                 "Write code.", _make_result(), "Agent must use type hints on all functions."
             )
@@ -121,7 +125,7 @@ async def test_includes_criterion_in_prompt(self):
     async def test_includes_current_instruction_in_prompt(self):
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        with patch(_AGENT_PATCH, agent_class):
+        with patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL), patch(_AGENT_PATCH, agent_class):
             await optimize_instruction(
                 "Always use FastAPI for web APIs.", _make_result(), "criterion"
             )
@@ -130,7 +134,7 @@ async def test_includes_current_instruction_in_prompt(self):
     async def test_includes_agent_output_in_prompt(self):
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        with patch(_AGENT_PATCH, agent_class):
+        with patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL), patch(_AGENT_PATCH, agent_class):
             await optimize_instruction(
                 "inst", _make_result(final_response="def add(a, b): return a + b"), "criterion"
             )
@@ -138,7 +142,7 @@ async def test_includes_agent_output_in_prompt(self):
 
     async def test_handles_no_final_response(self):
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        with patch(_AGENT_PATCH, agent_class):
+        with patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL), patch(_AGENT_PATCH, agent_class):
             result = await optimize_instruction(
                 "inst", CopilotResult(success=False, turns=[]), "criterion"
             )
@@ -146,14 +150,14 @@ async def test_handles_no_final_response(self):
 
     async def test_handles_empty_instruction(self):
         agent_class = _make_agent_mock("new inst", "reason", "changes")
-        with patch(_AGENT_PATCH, agent_class):
+        with patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL), patch(_AGENT_PATCH, agent_class):
             result = await optimize_instruction("", _make_result(), "criterion")
         assert isinstance(result, InstructionSuggestion)
 
     async def test_includes_tool_calls_in_prompt(self):
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        with patch(_AGENT_PATCH, agent_class):
+        with patch(_BUILD_MODEL_PATCH, return_value=_FAKE_MODEL), patch(_AGENT_PATCH, agent_class):
             await optimize_instruction(
                 "inst", _make_result(tools=["create_file", "read_file"]), "criterion"
             )