test(otel): Add functional test for telemetry with MCP toolset

RKest · copybara-github · commit 7307c11bf6e8 · 2026-06-16T06:19:41.000-07:00
Co-authored-by: Max Ind &lt;maxind@google.com&gt;
PiperOrigin-RevId: 933053369
diff --git a/tests/unittests/telemetry/functional_test_cases.py b/tests/unittests/telemetry/functional_test_cases.py
@@ -1128,6 +1128,144 @@
 )
 
 
+# ---------------------------------------------------------------------------
+# MCP-integration single-turn shape (experimental semconv only).
+#
+# Used by ``test_functional.py``'s MCP integration test. The scenario is
+# a single-turn agent (``MockModel`` returns text immediately) whose only
+# tool source is an ``McpToolset`` whose underlying session exposes one
+# ``mcp_echo`` tool. ``McpToolset`` calls ``list_tools()`` once per agent
+# invocation and materializes the result into a ``FunctionDeclaration``;
+# the experimental semconv builder reads that declaration straight from
+# ``llm_request.config.tools`` without ever talking to the MCP server
+# itself.
+#
+# Only the experimental path needs a dedicated shape: stable semconv
+# doesn't emit ``gen_ai.tool.definitions`` at all, so the MCP integration
+# would be indistinguishable from any other tool-bearing agent under
+# stable semconv.
+#
+# In ``EXPECTED_EXPERIMENTAL_SPAN_AND_EVENT_WITH_MCP``, the MCP-resolved
+# ``mcp_echo`` definition surfaces in both ``gen_ai.tool.definitions``
+# (span attribute) and the same key on the completion-details log
+# record. The ``parameters`` block uses standard JSON Schema vocabulary
+# (``object``, ``string``) because ``McpTool._get_declaration`` passes
+# the MCP ``inputSchema`` through ``parameters_json_schema`` when the
+# ``JSON_SCHEMA_FOR_FUNC_DECL`` feature is enabled.
+# ---------------------------------------------------------------------------
+
+_MCP_TOOL_NAME = "mcp_echo"
+_MCP_TOOL_DESCRIPTION = "Echoes back its input."
+_MCP_TOOL_DEFINITION_FULL = {
+    "name": _MCP_TOOL_NAME,
+    "description": _MCP_TOOL_DESCRIPTION,
+    "parameters": {
+        "properties": {"text": {"type": "string"}},
+        "required": ["text"],
+        "type": "object",
+    },
+    "type": "function",
+}
+
+_MCP_TURN_INPUT_MESSAGES = [{
+    "role": "user",
+    "parts": [{"content": USER_PROMPT, "type": "text"}],
+}]
+
+_MCP_TURN_OUTPUT_MESSAGES = [{
+    "role": "assistant",
+    "parts": [{"content": FINAL_TEXT, "type": "text"}],
+    # ``MockModel`` does not populate ``finish_reason``; it surfaces here as
+    # the empty string from ``_to_finish_reason(None)``.
+    "finish_reason": "",
+}]
+
+
+EXPECTED_EXPERIMENTAL_SPAN_AND_EVENT_WITH_MCP = SpanDigest(
+    name="invocation",
+    attributes={},
+    children=[
+        SpanDigest(
+            name="invoke_agent some_root_agent",
+            attributes={
+                "gen_ai.operation.name": "invoke_agent",
+                "gen_ai.agent.description": AGENT_DESCRIPTION,
+                "gen_ai.agent.name": AGENT_NAME,
+                "gen_ai.conversation.id": PRESENT,
+            },
+            children=[
+                SpanDigest(
+                    name="call_llm",
+                    attributes={
+                        "gen_ai.system": "gcp.vertex.agent",
+                        "gen_ai.request.model": "mock",
+                        "gcp.vertex.agent.invocation_id": PRESENT,
+                        "gcp.vertex.agent.session_id": PRESENT,
+                        "gcp.vertex.agent.event_id": PRESENT,
+                        "gcp.vertex.agent.llm_request": "{}",
+                        "gcp.vertex.agent.llm_response": "{}",
+                    },
+                    children=[
+                        SpanDigest(
+                            name="generate_content mock",
+                            attributes={
+                                "gen_ai.operation.name": "generate_content",
+                                "gen_ai.request.model": "mock",
+                                "gen_ai.agent.name": AGENT_NAME,
+                                "gen_ai.conversation.id": PRESENT,
+                                "gcp.vertex.agent.event_id": PRESENT,
+                                "gcp.vertex.agent.invocation_id": PRESENT,
+                                "gen_ai.input.messages": (
+                                    _MCP_TURN_INPUT_MESSAGES
+                                ),
+                                "gen_ai.system_instructions": [{
+                                    "content": FULL_SYSTEM_INSTRUCTION,
+                                    "type": "text",
+                                }],
+                                "gen_ai.tool.definitions": [
+                                    _MCP_TOOL_DEFINITION_FULL
+                                ],
+                                "gen_ai.output.messages": (
+                                    _MCP_TURN_OUTPUT_MESSAGES
+                                ),
+                            },
+                            logs=[
+                                LogDigest(
+                                    event_name=GEN_AI_COMPLETION_DETAILS_EVENT,
+                                    body=None,
+                                    attributes={
+                                        "gen_ai.agent.name": AGENT_NAME,
+                                        "gen_ai.conversation.id": PRESENT,
+                                        "user.id": "test_user",
+                                        "gcp.vertex.agent.event_id": PRESENT,
+                                        "gcp.vertex.agent.invocation_id": (
+                                            PRESENT
+                                        ),
+                                        "gen_ai.input.messages": (
+                                            _MCP_TURN_INPUT_MESSAGES
+                                        ),
+                                        "gen_ai.system_instructions": [{
+                                            "content": FULL_SYSTEM_INSTRUCTION,
+                                            "type": "text",
+                                        }],
+                                        "gen_ai.tool.definitions": [
+                                            _MCP_TOOL_DEFINITION_FULL
+                                        ],
+                                        "gen_ai.output.messages": (
+                                            _MCP_TURN_OUTPUT_MESSAGES
+                                        ),
+                                    },
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+            ],
+        ),
+    ],
+)
+
+
 # ---------------------------------------------------------------------------
 # Parametrization list.
 # ---------------------------------------------------------------------------
diff --git a/tests/unittests/telemetry/test_functional.py b/tests/unittests/telemetry/test_functional.py
@@ -21,25 +21,41 @@
 from google.adk.telemetry import _metrics
 from google.adk.telemetry import tracing
 from google.adk.tools import FunctionTool
+from google.adk.tools.base_tool import BaseTool
+from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams
+from google.adk.tools.mcp_tool.mcp_toolset import McpToolset
+from google.adk.tools.tool_context import ToolContext
 from google.genai import types
 from google.genai.types import Part
+from mcp import ClientSession as McpClientSession
+from mcp import StdioServerParameters
+from mcp.types import ListToolsResult
+from mcp.types import PaginatedRequestParams
+from mcp.types import Tool as McpTool
+from opentelemetry import trace
 from opentelemetry.instrumentation.google_genai import GoogleGenAiSdkInstrumentor
 from opentelemetry.sdk._logs.export import InMemoryLogRecordExporter
 from opentelemetry.sdk.metrics import MeterProvider
 from opentelemetry.sdk.metrics.export import InMemoryMetricReader
 from opentelemetry.sdk.metrics.export import Metric
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 import pytest
+from typing_extensions import override
 
 from ..testing_utils import InMemoryRunner
 from ..testing_utils import MockModel
+from ..testing_utils import TestInMemoryRunner
 from .functional_test_cases import ALL_CASES
+from .functional_test_cases import EXPECTED_EXPERIMENTAL_SPAN_AND_EVENT_WITH_MCP
 from .functional_test_helpers import aclosing_wrapping_assertions
 from .functional_test_helpers import AGENT_NAME
 from .functional_test_helpers import build_test_agent
 from .functional_test_helpers import build_test_runner
+from .functional_test_helpers import CAPTURE_CONTENT
+from .functional_test_helpers import EXPERIMENTAL_OPT_IN
 from .functional_test_helpers import FunctionalTestCase
 from .functional_test_helpers import install_telemetry
+from .functional_test_helpers import OTEL_OPT_IN
 from .functional_test_helpers import run_agent_scenario
 from .functional_test_helpers import SpanDigest
 from .functional_test_helpers import TOOL_NAME
@@ -420,6 +436,160 @@ async def failing_tool():
       ),
   ]
 
-  got.sort(key=lambda p: p.attributes.get("gen_ai.tool.name", ""))
-  want.sort(key=lambda p: p.attributes.get("gen_ai.tool.name", ""))
+  got.sort(key=lambda p: str(p.attributes.get("gen_ai.tool.name", "")))
+  want.sort(key=lambda p: str(p.attributes.get("gen_ai.tool.name", "")))
   assert got == want
+
+
+# ---------------------------------------------------------------------------
+# MCP integration: telemetry adds zero ``list_tools()`` calls of its own.
+#
+# The standard ADK ↔ MCP integration path is:
+#
+#   Agent(tools=[McpToolset(...)])
+#     → McpToolset.get_tools()  ─ calls list_tools() ONCE, caches MCPTool list
+#     → BaseLlmFlow loop calls each MCPTool.process_llm_request, which
+#       materializes the tool's FunctionDeclaration into
+#       llm_request.config.tools.
+#
+# By the time the experimental semconv builder reads
+# ``llm_request.config.tools``, MCP tools are ALREADY ``types.Tool``
+# entries with ``function_declarations``. Because the builder is fully
+# synchronous (it never calls ``list_tools()`` itself), the MCP server is
+# queried EXACTLY ONCE per agent invocation regardless of which semconv
+# (or capture mode) is active. These tests pin that contract AND verify
+# the resolved tool definitions surface intact in the experimental
+# telemetry.
+#
+# A ``_FakeMcpSession`` substitutes the live ``McpClientSession`` so the
+# test doesn't need a running MCP server. ``McpToolset.create_session``
+# is patched to hand it out instead of dialing ``StdioServerParameters``.
+# ---------------------------------------------------------------------------
+
+
+class _FakeMcpSession(McpClientSession):
+  """Minimal ``McpClientSession`` stand-in with a counted ``list_tools()``.
+
+  Subclasses ``McpClientSession`` (and skips its real ``__init__``) so that
+  every ``isinstance(x, McpClientSession)`` check in ADK and in the MCP
+  Python client passes, without needing to wire up the underlying anyio
+  memory streams + peer process.
+  """
+
+  def __init__(  # pyright: ignore[reportMissingSuperCall]
+      self, *, tools: list[McpTool]
+  ) -> None:
+    # Deliberately skip ``McpClientSession.__init__``: the real one wants
+    # live anyio streams + a peer process. ``isinstance`` checks still
+    # succeed, which is all ADK's MCP plumbing requires.
+    self._tools: list[McpTool] = tools
+    self.list_tools_call_count: int = 0
+
+  @override
+  async def list_tools(
+      self,
+      cursor: str | None = None,
+      *,
+      params: PaginatedRequestParams | None = None,
+  ) -> ListToolsResult:
+    self.list_tools_call_count += 1
+    return ListToolsResult(tools=list(self._tools))
+
+
+def _make_fake_mcp_toolset(
+    monkeypatch: pytest.MonkeyPatch, fake_session: _FakeMcpSession
+) -> McpToolset:
+  """Returns an ``McpToolset`` whose session manager hands out ``fake_session``.
+
+  Patches the toolset's ``MCPSessionManager`` so:
+    * ``create_session`` returns the fake (no socket / subprocess).
+    * ``close`` is a no-op (the fake holds no resources).
+
+  Connection params are nominally a stdio command but never actually
+  invoked because ``create_session`` is overridden.
+  """
+  toolset = McpToolset(
+      connection_params=StdioConnectionParams(
+          server_params=StdioServerParameters(command="unused-by-test"),
+      )
+  )
+
+  async def _create_session(*_args, **_kwargs):  # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+    return fake_session
+
+  async def _close(*_args, **_kwargs):  # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+    return None
+
+  monkeypatch.setattr(
+      toolset._mcp_session_manager, "create_session", _create_session  # pyright: ignore[reportPrivateUsage, reportUnknownArgumentType]
+  )
+  monkeypatch.setattr(toolset._mcp_session_manager, "close", _close)  # pyright: ignore[reportPrivateUsage, reportUnknownArgumentType]
+  return toolset
+
+
+def _build_mcp_test_runner(toolset: McpToolset) -> TestInMemoryRunner:
+  """Builds a single-turn agent runner whose only tool source is ``toolset``.
+
+  Single-turn (one ``Part.from_text`` response) so the assertion on
+  ``list_tools_call_count`` is unambiguous: exactly one agent invocation
+  is performed.
+  """
+  mock_model = MockModel.create(
+      responses=[Part.from_text(text="text response")]
+  )
+  test_agent = Agent(
+      name="some_root_agent",
+      description="A sample root agent.",
+      instruction="you are helpful",
+      model=mock_model,
+      tools=[toolset],
+  )
+  return TestInMemoryRunner(node=test_agent)
+
+
+@pytest.mark.asyncio
+async def test_mcp_list_tools_called_once_under_experimental_semconv(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+  """Experimental semconv: exactly one ``list_tools()`` call per invocation.
+
+  By the time the experimental semconv builder inspects
+  ``llm_request.config.tools``, ``McpToolset`` has already materialized
+  each MCP tool into a ``FunctionDeclaration`` — so the synchronous
+  builder never has to (and never does) talk to the MCP server. The
+  MCP-resolved tool definition still surfaces in the experimental
+  telemetry intact, sourced from the ``FunctionDeclaration`` rather than
+  from a fresh ``list_tools()`` call.
+  """
+  monkeypatch.setenv(OTEL_OPT_IN, EXPERIMENTAL_OPT_IN)
+  monkeypatch.setenv(CAPTURE_CONTENT, "span_and_event")
+  monkeypatch.setenv("ADK_CAPTURE_MESSAGE_CONTENT_IN_SPANS", "false")
+
+  span_exporter = InMemorySpanExporter()
+  log_exporter = InMemoryLogRecordExporter()
+  install_telemetry(monkeypatch, span_exporter, log_exporter)
+
+  fake_session = _FakeMcpSession(
+      tools=[
+          McpTool(
+              name="mcp_echo",
+              description="Echoes back its input.",
+              inputSchema={
+                  "type": "object",
+                  "properties": {"text": {"type": "string"}},
+                  "required": ["text"],
+              },
+          )
+      ]
+  )
+  toolset = _make_fake_mcp_toolset(monkeypatch, fake_session)
+
+  await run_agent_scenario(_build_mcp_test_runner(toolset))
+
+  assert fake_session.list_tools_call_count == 1
+
+  digest = SpanDigest.build(
+      span_exporter.get_finished_spans(),
+      log_exporter.get_finished_logs(),
+  )
+  assert digest == EXPECTED_EXPERIMENTAL_SPAN_AND_EVENT_WITH_MCP