Skip to content

Commit 7307c11

Browse files
RKestcopybara-github
authored andcommitted
test(otel): Add functional test for telemetry with MCP toolset
Co-authored-by: Max Ind <maxind@google.com> PiperOrigin-RevId: 933053369
1 parent 23c0826 commit 7307c11

2 files changed

Lines changed: 310 additions & 2 deletions

File tree

tests/unittests/telemetry/functional_test_cases.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,144 @@
11281128
)
11291129

11301130

1131+
# ---------------------------------------------------------------------------
1132+
# MCP-integration single-turn shape (experimental semconv only).
1133+
#
1134+
# Used by ``test_functional.py``'s MCP integration test. The scenario is
1135+
# a single-turn agent (``MockModel`` returns text immediately) whose only
1136+
# tool source is an ``McpToolset`` whose underlying session exposes one
1137+
# ``mcp_echo`` tool. ``McpToolset`` calls ``list_tools()`` once per agent
1138+
# invocation and materializes the result into a ``FunctionDeclaration``;
1139+
# the experimental semconv builder reads that declaration straight from
1140+
# ``llm_request.config.tools`` without ever talking to the MCP server
1141+
# itself.
1142+
#
1143+
# Only the experimental path needs a dedicated shape: stable semconv
1144+
# doesn't emit ``gen_ai.tool.definitions`` at all, so the MCP integration
1145+
# would be indistinguishable from any other tool-bearing agent under
1146+
# stable semconv.
1147+
#
1148+
# In ``EXPECTED_EXPERIMENTAL_SPAN_AND_EVENT_WITH_MCP``, the MCP-resolved
1149+
# ``mcp_echo`` definition surfaces in both ``gen_ai.tool.definitions``
1150+
# (span attribute) and the same key on the completion-details log
1151+
# record. The ``parameters`` block uses standard JSON Schema vocabulary
1152+
# (``object``, ``string``) because ``McpTool._get_declaration`` passes
1153+
# the MCP ``inputSchema`` through ``parameters_json_schema`` when the
1154+
# ``JSON_SCHEMA_FOR_FUNC_DECL`` feature is enabled.
1155+
# ---------------------------------------------------------------------------
1156+
1157+
_MCP_TOOL_NAME = "mcp_echo"
1158+
_MCP_TOOL_DESCRIPTION = "Echoes back its input."
1159+
_MCP_TOOL_DEFINITION_FULL = {
1160+
"name": _MCP_TOOL_NAME,
1161+
"description": _MCP_TOOL_DESCRIPTION,
1162+
"parameters": {
1163+
"properties": {"text": {"type": "string"}},
1164+
"required": ["text"],
1165+
"type": "object",
1166+
},
1167+
"type": "function",
1168+
}
1169+
1170+
_MCP_TURN_INPUT_MESSAGES = [{
1171+
"role": "user",
1172+
"parts": [{"content": USER_PROMPT, "type": "text"}],
1173+
}]
1174+
1175+
_MCP_TURN_OUTPUT_MESSAGES = [{
1176+
"role": "assistant",
1177+
"parts": [{"content": FINAL_TEXT, "type": "text"}],
1178+
# ``MockModel`` does not populate ``finish_reason``; it surfaces here as
1179+
# the empty string from ``_to_finish_reason(None)``.
1180+
"finish_reason": "",
1181+
}]
1182+
1183+
1184+
EXPECTED_EXPERIMENTAL_SPAN_AND_EVENT_WITH_MCP = SpanDigest(
1185+
name="invocation",
1186+
attributes={},
1187+
children=[
1188+
SpanDigest(
1189+
name="invoke_agent some_root_agent",
1190+
attributes={
1191+
"gen_ai.operation.name": "invoke_agent",
1192+
"gen_ai.agent.description": AGENT_DESCRIPTION,
1193+
"gen_ai.agent.name": AGENT_NAME,
1194+
"gen_ai.conversation.id": PRESENT,
1195+
},
1196+
children=[
1197+
SpanDigest(
1198+
name="call_llm",
1199+
attributes={
1200+
"gen_ai.system": "gcp.vertex.agent",
1201+
"gen_ai.request.model": "mock",
1202+
"gcp.vertex.agent.invocation_id": PRESENT,
1203+
"gcp.vertex.agent.session_id": PRESENT,
1204+
"gcp.vertex.agent.event_id": PRESENT,
1205+
"gcp.vertex.agent.llm_request": "{}",
1206+
"gcp.vertex.agent.llm_response": "{}",
1207+
},
1208+
children=[
1209+
SpanDigest(
1210+
name="generate_content mock",
1211+
attributes={
1212+
"gen_ai.operation.name": "generate_content",
1213+
"gen_ai.request.model": "mock",
1214+
"gen_ai.agent.name": AGENT_NAME,
1215+
"gen_ai.conversation.id": PRESENT,
1216+
"gcp.vertex.agent.event_id": PRESENT,
1217+
"gcp.vertex.agent.invocation_id": PRESENT,
1218+
"gen_ai.input.messages": (
1219+
_MCP_TURN_INPUT_MESSAGES
1220+
),
1221+
"gen_ai.system_instructions": [{
1222+
"content": FULL_SYSTEM_INSTRUCTION,
1223+
"type": "text",
1224+
}],
1225+
"gen_ai.tool.definitions": [
1226+
_MCP_TOOL_DEFINITION_FULL
1227+
],
1228+
"gen_ai.output.messages": (
1229+
_MCP_TURN_OUTPUT_MESSAGES
1230+
),
1231+
},
1232+
logs=[
1233+
LogDigest(
1234+
event_name=GEN_AI_COMPLETION_DETAILS_EVENT,
1235+
body=None,
1236+
attributes={
1237+
"gen_ai.agent.name": AGENT_NAME,
1238+
"gen_ai.conversation.id": PRESENT,
1239+
"user.id": "test_user",
1240+
"gcp.vertex.agent.event_id": PRESENT,
1241+
"gcp.vertex.agent.invocation_id": (
1242+
PRESENT
1243+
),
1244+
"gen_ai.input.messages": (
1245+
_MCP_TURN_INPUT_MESSAGES
1246+
),
1247+
"gen_ai.system_instructions": [{
1248+
"content": FULL_SYSTEM_INSTRUCTION,
1249+
"type": "text",
1250+
}],
1251+
"gen_ai.tool.definitions": [
1252+
_MCP_TOOL_DEFINITION_FULL
1253+
],
1254+
"gen_ai.output.messages": (
1255+
_MCP_TURN_OUTPUT_MESSAGES
1256+
),
1257+
},
1258+
),
1259+
],
1260+
),
1261+
],
1262+
),
1263+
],
1264+
),
1265+
],
1266+
)
1267+
1268+
11311269
# ---------------------------------------------------------------------------
11321270
# Parametrization list.
11331271
# ---------------------------------------------------------------------------

tests/unittests/telemetry/test_functional.py

Lines changed: 172 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,41 @@
2121
from google.adk.telemetry import _metrics
2222
from google.adk.telemetry import tracing
2323
from google.adk.tools import FunctionTool
24+
from google.adk.tools.base_tool import BaseTool
25+
from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams
26+
from google.adk.tools.mcp_tool.mcp_toolset import McpToolset
27+
from google.adk.tools.tool_context import ToolContext
2428
from google.genai import types
2529
from google.genai.types import Part
30+
from mcp import ClientSession as McpClientSession
31+
from mcp import StdioServerParameters
32+
from mcp.types import ListToolsResult
33+
from mcp.types import PaginatedRequestParams
34+
from mcp.types import Tool as McpTool
35+
from opentelemetry import trace
2636
from opentelemetry.instrumentation.google_genai import GoogleGenAiSdkInstrumentor
2737
from opentelemetry.sdk._logs.export import InMemoryLogRecordExporter
2838
from opentelemetry.sdk.metrics import MeterProvider
2939
from opentelemetry.sdk.metrics.export import InMemoryMetricReader
3040
from opentelemetry.sdk.metrics.export import Metric
3141
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
3242
import pytest
43+
from typing_extensions import override
3344

3445
from ..testing_utils import InMemoryRunner
3546
from ..testing_utils import MockModel
47+
from ..testing_utils import TestInMemoryRunner
3648
from .functional_test_cases import ALL_CASES
49+
from .functional_test_cases import EXPECTED_EXPERIMENTAL_SPAN_AND_EVENT_WITH_MCP
3750
from .functional_test_helpers import aclosing_wrapping_assertions
3851
from .functional_test_helpers import AGENT_NAME
3952
from .functional_test_helpers import build_test_agent
4053
from .functional_test_helpers import build_test_runner
54+
from .functional_test_helpers import CAPTURE_CONTENT
55+
from .functional_test_helpers import EXPERIMENTAL_OPT_IN
4156
from .functional_test_helpers import FunctionalTestCase
4257
from .functional_test_helpers import install_telemetry
58+
from .functional_test_helpers import OTEL_OPT_IN
4359
from .functional_test_helpers import run_agent_scenario
4460
from .functional_test_helpers import SpanDigest
4561
from .functional_test_helpers import TOOL_NAME
@@ -420,6 +436,160 @@ async def failing_tool():
420436
),
421437
]
422438

423-
got.sort(key=lambda p: p.attributes.get("gen_ai.tool.name", ""))
424-
want.sort(key=lambda p: p.attributes.get("gen_ai.tool.name", ""))
439+
got.sort(key=lambda p: str(p.attributes.get("gen_ai.tool.name", "")))
440+
want.sort(key=lambda p: str(p.attributes.get("gen_ai.tool.name", "")))
425441
assert got == want
442+
443+
444+
# ---------------------------------------------------------------------------
445+
# MCP integration: telemetry adds zero ``list_tools()`` calls of its own.
446+
#
447+
# The standard ADK ↔ MCP integration path is:
448+
#
449+
# Agent(tools=[McpToolset(...)])
450+
# → McpToolset.get_tools() ─ calls list_tools() ONCE, caches MCPTool list
451+
# → BaseLlmFlow loop calls each MCPTool.process_llm_request, which
452+
# materializes the tool's FunctionDeclaration into
453+
# llm_request.config.tools.
454+
#
455+
# By the time the experimental semconv builder reads
456+
# ``llm_request.config.tools``, MCP tools are ALREADY ``types.Tool``
457+
# entries with ``function_declarations``. Because the builder is fully
458+
# synchronous (it never calls ``list_tools()`` itself), the MCP server is
459+
# queried EXACTLY ONCE per agent invocation regardless of which semconv
460+
# (or capture mode) is active. These tests pin that contract AND verify
461+
# the resolved tool definitions surface intact in the experimental
462+
# telemetry.
463+
#
464+
# A ``_FakeMcpSession`` substitutes the live ``McpClientSession`` so the
465+
# test doesn't need a running MCP server. ``McpToolset.create_session``
466+
# is patched to hand it out instead of dialing ``StdioServerParameters``.
467+
# ---------------------------------------------------------------------------
468+
469+
470+
class _FakeMcpSession(McpClientSession):
471+
"""Minimal ``McpClientSession`` stand-in with a counted ``list_tools()``.
472+
473+
Subclasses ``McpClientSession`` (and skips its real ``__init__``) so that
474+
every ``isinstance(x, McpClientSession)`` check in ADK and in the MCP
475+
Python client passes, without needing to wire up the underlying anyio
476+
memory streams + peer process.
477+
"""
478+
479+
def __init__( # pyright: ignore[reportMissingSuperCall]
480+
self, *, tools: list[McpTool]
481+
) -> None:
482+
# Deliberately skip ``McpClientSession.__init__``: the real one wants
483+
# live anyio streams + a peer process. ``isinstance`` checks still
484+
# succeed, which is all ADK's MCP plumbing requires.
485+
self._tools: list[McpTool] = tools
486+
self.list_tools_call_count: int = 0
487+
488+
@override
489+
async def list_tools(
490+
self,
491+
cursor: str | None = None,
492+
*,
493+
params: PaginatedRequestParams | None = None,
494+
) -> ListToolsResult:
495+
self.list_tools_call_count += 1
496+
return ListToolsResult(tools=list(self._tools))
497+
498+
499+
def _make_fake_mcp_toolset(
500+
monkeypatch: pytest.MonkeyPatch, fake_session: _FakeMcpSession
501+
) -> McpToolset:
502+
"""Returns an ``McpToolset`` whose session manager hands out ``fake_session``.
503+
504+
Patches the toolset's ``MCPSessionManager`` so:
505+
* ``create_session`` returns the fake (no socket / subprocess).
506+
* ``close`` is a no-op (the fake holds no resources).
507+
508+
Connection params are nominally a stdio command but never actually
509+
invoked because ``create_session`` is overridden.
510+
"""
511+
toolset = McpToolset(
512+
connection_params=StdioConnectionParams(
513+
server_params=StdioServerParameters(command="unused-by-test"),
514+
)
515+
)
516+
517+
async def _create_session(*_args, **_kwargs): # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
518+
return fake_session
519+
520+
async def _close(*_args, **_kwargs): # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
521+
return None
522+
523+
monkeypatch.setattr(
524+
toolset._mcp_session_manager, "create_session", _create_session # pyright: ignore[reportPrivateUsage, reportUnknownArgumentType]
525+
)
526+
monkeypatch.setattr(toolset._mcp_session_manager, "close", _close) # pyright: ignore[reportPrivateUsage, reportUnknownArgumentType]
527+
return toolset
528+
529+
530+
def _build_mcp_test_runner(toolset: McpToolset) -> TestInMemoryRunner:
531+
"""Builds a single-turn agent runner whose only tool source is ``toolset``.
532+
533+
Single-turn (one ``Part.from_text`` response) so the assertion on
534+
``list_tools_call_count`` is unambiguous: exactly one agent invocation
535+
is performed.
536+
"""
537+
mock_model = MockModel.create(
538+
responses=[Part.from_text(text="text response")]
539+
)
540+
test_agent = Agent(
541+
name="some_root_agent",
542+
description="A sample root agent.",
543+
instruction="you are helpful",
544+
model=mock_model,
545+
tools=[toolset],
546+
)
547+
return TestInMemoryRunner(node=test_agent)
548+
549+
550+
@pytest.mark.asyncio
551+
async def test_mcp_list_tools_called_once_under_experimental_semconv(
552+
monkeypatch: pytest.MonkeyPatch,
553+
) -> None:
554+
"""Experimental semconv: exactly one ``list_tools()`` call per invocation.
555+
556+
By the time the experimental semconv builder inspects
557+
``llm_request.config.tools``, ``McpToolset`` has already materialized
558+
each MCP tool into a ``FunctionDeclaration`` — so the synchronous
559+
builder never has to (and never does) talk to the MCP server. The
560+
MCP-resolved tool definition still surfaces in the experimental
561+
telemetry intact, sourced from the ``FunctionDeclaration`` rather than
562+
from a fresh ``list_tools()`` call.
563+
"""
564+
monkeypatch.setenv(OTEL_OPT_IN, EXPERIMENTAL_OPT_IN)
565+
monkeypatch.setenv(CAPTURE_CONTENT, "span_and_event")
566+
monkeypatch.setenv("ADK_CAPTURE_MESSAGE_CONTENT_IN_SPANS", "false")
567+
568+
span_exporter = InMemorySpanExporter()
569+
log_exporter = InMemoryLogRecordExporter()
570+
install_telemetry(monkeypatch, span_exporter, log_exporter)
571+
572+
fake_session = _FakeMcpSession(
573+
tools=[
574+
McpTool(
575+
name="mcp_echo",
576+
description="Echoes back its input.",
577+
inputSchema={
578+
"type": "object",
579+
"properties": {"text": {"type": "string"}},
580+
"required": ["text"],
581+
},
582+
)
583+
]
584+
)
585+
toolset = _make_fake_mcp_toolset(monkeypatch, fake_session)
586+
587+
await run_agent_scenario(_build_mcp_test_runner(toolset))
588+
589+
assert fake_session.list_tools_call_count == 1
590+
591+
digest = SpanDigest.build(
592+
span_exporter.get_finished_spans(),
593+
log_exporter.get_finished_logs(),
594+
)
595+
assert digest == EXPECTED_EXPERIMENTAL_SPAN_AND_EVENT_WITH_MCP

0 commit comments

Comments
 (0)