Skip to content

Commit c63ad89

Browse files
author
Dylan Huang
authored
simple pydantic multi agent example (#120)
* support model as JSON * add JSONTooltip * TODO: make test_pydantic_multi_agent work * vite build * simple test_pydantic_multi_agent
1 parent a53f31c commit c63ad89

14 files changed

Lines changed: 399 additions & 168 deletions

eval_protocol/pytest/default_pydantic_ai_rollout_processor.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import os
21
import asyncio
32
import logging
3+
import types
44
from typing import List
55

66
from openai.types.chat.chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
@@ -23,7 +23,6 @@
2323
UserPromptPart,
2424
)
2525
from pydantic_ai.providers.openai import OpenAIProvider
26-
from pydantic_ai.providers.fireworks import FireworksProvider
2726

2827
logger = logging.getLogger(__name__)
2928

@@ -45,20 +44,40 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) ->
4544
# validate that the "agent" field is present with a valid Pydantic AI Agent instance in the completion_params dict
4645
if "agent" not in config.kwargs:
4746
raise ValueError("kwargs must contain an 'agent' field with a valid Pydantic AI Agent instance")
48-
if not isinstance(config.kwargs["agent"], Agent):
49-
raise ValueError("kwargs['agent'] must be a valid Pydantic AI Agent instance")
50-
51-
agent: Agent = config.kwargs["agent"]
47+
if not isinstance(config.kwargs["agent"], Agent) and not isinstance(
48+
config.kwargs["agent"], types.FunctionType
49+
):
50+
raise ValueError(
51+
"kwargs['agent'] must be a valid Pydantic AI Agent instance or a function that returns an Agent"
52+
)
5253

53-
model = OpenAIModel(
54-
config.completion_params["model"],
55-
provider=config.completion_params["provider"],
56-
)
54+
if isinstance(config.kwargs["agent"], types.FunctionType):
55+
setup_agent = config.kwargs["agent"]
56+
if not isinstance(config.completion_params["model"], dict):
57+
raise ValueError(
58+
"completion_params['model'] must be a dict mapping agent argument names to model config dicts (with 'model' and 'provider' keys)"
59+
)
60+
kwargs = {}
61+
for model_name, model_config in config.completion_params["model"].items():
62+
kwargs[model_name] = OpenAIModel(
63+
model_config["model"],
64+
provider=model_config["provider"],
65+
)
66+
agent = setup_agent(**kwargs)
67+
model = None
68+
else:
69+
agent = config.kwargs["agent"]
70+
model = OpenAIModel(
71+
config.completion_params["model"],
72+
provider=config.completion_params["provider"],
73+
)
5774

5875
async def process_row(row: EvaluationRow) -> EvaluationRow:
5976
"""Process a single row with agent rollout."""
6077
model_messages = [self.convert_ep_message_to_pyd_message(m, row) for m in row.messages]
61-
response = await agent.run(message_history=model_messages, model=model)
78+
response = await agent.run(
79+
message_history=model_messages, model=model, usage_limits=config.kwargs.get("usage_limits")
80+
)
6281
row.messages = await self.convert_pyd_message_to_ep_message(response.all_messages())
6382
return row
6483

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""
2+
Copied and modified for eval-protocol from https://ai.pydantic.dev/multi-agent-applications/#agent-delegation
3+
4+
To test your Pydantic AI multi-agent application, you can pass a function that
5+
sets up the agents and their tools. The function should accept parameters that
6+
map a model to each agent. In completion_params, you can provide mappings of
7+
model to agent based on key.
8+
"""
9+
10+
import pytest
11+
12+
from eval_protocol.models import EvaluationRow, Message
13+
from eval_protocol.pytest import evaluation_test
14+
from pydantic_ai import Agent
15+
16+
from eval_protocol.pytest.default_pydantic_ai_rollout_processor import PydanticAgentRolloutProcessor
17+
from pydantic_ai import RunContext
18+
from pydantic_ai.models import Model
19+
from pydantic_ai.usage import UsageLimits
20+
21+
22+
def setup_agent(joke_generation_model: Model, joke_selection_model: Model) -> Agent:
23+
"""
24+
This is an extra step that most applications will probably need to do to
25+
parameterize the model that their agents use. But we believe that this is a
26+
necessary step for multi-agent applications if developers want to solve the
27+
model selection problem.
28+
"""
29+
joke_selection_agent = Agent(
30+
model=joke_selection_model,
31+
system_prompt=(
32+
"Use the `joke_factory` to generate some jokes, then choose the best. You must return just a single joke."
33+
),
34+
)
35+
joke_generation_agent = Agent(joke_generation_model, output_type=list[str])
36+
37+
@joke_selection_agent.tool
38+
async def joke_factory(ctx: RunContext[None], count: int) -> list[str]:
39+
r = await joke_generation_agent.run(
40+
f"Please generate {count} jokes.",
41+
usage=ctx.usage,
42+
)
43+
return r.output
44+
45+
return joke_selection_agent
46+
47+
48+
@pytest.mark.asyncio
49+
@evaluation_test(
50+
input_messages=[Message(role="user", content="Tell me a joke.")],
51+
completion_params=[
52+
{
53+
"model": {
54+
"joke_generation_model": {
55+
"model": "accounts/fireworks/models/kimi-k2-instruct",
56+
"provider": "fireworks",
57+
},
58+
"joke_selection_model": {"model": "accounts/fireworks/models/deepseek-v3p1", "provider": "fireworks"},
59+
}
60+
},
61+
],
62+
rollout_processor=PydanticAgentRolloutProcessor(),
63+
rollout_processor_kwargs={
64+
"agent": setup_agent,
65+
# PydanticAgentRolloutProcessor will pass usage_limits into the "run" call
66+
"usage_limits": UsageLimits(request_limit=5, total_tokens_limit=1000),
67+
},
68+
mode="pointwise",
69+
)
70+
async def test_pydantic_multi_agent(row: EvaluationRow) -> EvaluationRow:
71+
"""
72+
Super simple hello world test for Pydantic AI.
73+
"""
74+
return row

vite-app/dist/assets/index-Bw6MHHaR.js

Lines changed: 136 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vite-app/dist/assets/index-Bw6MHHaR.js.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vite-app/dist/assets/index-BxZNbf6w.css

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vite-app/dist/assets/index-Bxmt9iUR.css

Lines changed: 0 additions & 1 deletion
This file was deleted.

vite-app/dist/assets/index-DbgWqpuZ.js

Lines changed: 0 additions & 131 deletions
This file was deleted.

vite-app/dist/assets/index-DbgWqpuZ.js.map

Lines changed: 0 additions & 1 deletion
This file was deleted.

vite-app/dist/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
66
<title>EP | Log Viewer</title>
77
<link rel="icon" href="/assets/favicon-BkAAWQga.png" />
8-
<script type="module" crossorigin src="/assets/index-DbgWqpuZ.js"></script>
9-
<link rel="stylesheet" crossorigin href="/assets/index-Bxmt9iUR.css">
8+
<script type="module" crossorigin src="/assets/index-Bw6MHHaR.js"></script>
9+
<link rel="stylesheet" crossorigin href="/assets/index-BxZNbf6w.css">
1010
</head>
1111
<body>
1212
<div id="root"></div>

vite-app/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"react-chartjs-2": "^5.3.0",
2222
"react-dom": "^19.1.0",
2323
"react-router-dom": "^7.7.1",
24+
"react-tooltip": "^5.29.1",
2425
"zod": "^4.0.14"
2526
},
2627
"devDependencies": {

0 commit comments

Comments
 (0)