|
1 | 1 | """ |
2 | 2 | Copied and modified for eval-protocol from https://ai.pydantic.dev/multi-agent-applications/#agent-delegation |
3 | 3 |
|
4 | | -To test your Pydantic AI multi-agent application, you can pass a function that |
5 | | -sets up the agents and their tools. The function should accept parameters that |
6 | | -map a model to each agent. In completion_params, you can provide mappings of |
7 | | -model to agent based on key. |
| 4 | +To test your Pydantic AI multi-agent application, you can pass a factory that |
| 5 | +sets up the agenet based on the completion_params. The function should accept a |
| 6 | +RolloutProcessorConfig. In completion_params, you can provide mappings of model |
| 7 | +to agent based on key. |
8 | 8 | """ |
9 | 9 |
|
| 10 | +from pydantic_ai.models.openai import OpenAIModel |
10 | 11 | import pytest |
11 | 12 |
|
12 | 13 | from eval_protocol.models import EvaluationRow, Message |
|
18 | 19 | from pydantic_ai.models import Model |
19 | 20 | from pydantic_ai.usage import UsageLimits |
20 | 21 |
|
| 22 | +from eval_protocol.pytest.types import RolloutProcessorConfig |
| 23 | + |
21 | 24 |
|
22 | 25 | def setup_agent(joke_generation_model: Model, joke_selection_model: Model) -> Agent: |
23 | 26 | """ |
@@ -45,22 +48,31 @@ async def joke_factory(ctx: RunContext[None], count: int) -> list[str]: # pyrig |
45 | 48 | return joke_selection_agent |
46 | 49 |
|
47 | 50 |
|
| 51 | +def agent_factory(config: RolloutProcessorConfig) -> Agent: |
| 52 | + joke_generation_model = OpenAIModel( |
| 53 | + config.completion_params["model"]["joke_generation_model"], provider="fireworks" |
| 54 | + ) |
| 55 | + joke_selection_model = OpenAIModel(config.completion_params["model"]["joke_selection_model"], provider="fireworks") |
| 56 | + return setup_agent( |
| 57 | + joke_generation_model, |
| 58 | + joke_selection_model, |
| 59 | + ) |
| 60 | + |
| 61 | + |
48 | 62 | @pytest.mark.asyncio |
49 | 63 | @evaluation_test( |
50 | 64 | input_messages=[[[Message(role="user", content="Tell me a joke.")]]], |
51 | 65 | completion_params=[ |
52 | 66 | # multi-agent |
53 | 67 | { |
54 | | - "joke_generation_model": { |
55 | | - "model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", |
56 | | - }, |
57 | | - "joke_selection_model": { |
58 | | - "model": "fireworks_ai/accounts/fireworks/models/deepseek-v3p1", |
59 | | - }, |
| 68 | + "model": { |
| 69 | + "joke_generation_model": "accounts/fireworks/models/kimi-k2-instruct", |
| 70 | + "joke_selection_model": "accounts/fireworks/models/deepseek-v3p1", |
| 71 | + } |
60 | 72 | }, |
61 | 73 | ], |
62 | 74 | rollout_processor=PydanticAgentRolloutProcessor( |
63 | | - setup_agent, UsageLimits(request_limit=5, total_tokens_limit=1000) |
| 75 | + agent_factory, UsageLimits(request_limit=5, total_tokens_limit=1000) |
64 | 76 | ), |
65 | 77 | mode="pointwise", |
66 | 78 | ) |
|
0 commit comments