From f506006f5fc932cf4d36e957ef4eca573968c079 Mon Sep 17 00:00:00 2001 From: hawktang Date: Wed, 24 Jun 2026 03:35:50 +0800 Subject: [PATCH] fix(skills): isolate per-toolset failures in SkillToolset resolution SkillToolset._resolve_additional_tools_from_state gathered get_tools_with_prefix() from every provided toolset in a single asyncio.gather() with no error isolation. One toolset that raises while listing its tools -- e.g. an McpToolset whose remote MCP server is restarting or returns 503 -- aborted the whole gather, so no skill-gated additional tools resolved, including unrelated individual function tools that needed zero I/O. Use return_exceptions=True and skip (with a warning) any toolset that fails, re-raising CancelledError so cooperative cancellation is preserved. A failing toolset now contributes no tools and recovers on the next invocation. Adds a regression test: a failing toolset in the additional_tools pool no longer prevents the healthy tools and core skill tools from resolving. Co-Authored-By: Claude Opus 4.8 --- src/google/adk/tools/skill_toolset.py | 25 ++++++++--- tests/unittests/tools/test_skill_toolset.py | 46 +++++++++++++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/src/google/adk/tools/skill_toolset.py b/src/google/adk/tools/skill_toolset.py index 4b4fd069d7..0c0e6a5ba3 100644 --- a/src/google/adk/tools/skill_toolset.py +++ b/src/google/adk/tools/skill_toolset.py @@ -1080,11 +1080,26 @@ async def _resolve_additional_tools_from_state( # Collect all candidate tools from both individual tools and toolsets candidate_tools = self._provided_tools_by_name.copy() if self._provided_toolsets: - ts_results = await asyncio.gather(*( - ts.get_tools_with_prefix(readonly_context) - for ts in self._provided_toolsets - )) - for ts_tools in ts_results: + # Isolate per-toolset failures: a single toolset that raises while listing + # its tools (e.g. an unreachable MCP server) must not abort resolution of + # the remaining toolsets and individual tools. + ts_results = await asyncio.gather( + *( + ts.get_tools_with_prefix(readonly_context) + for ts in self._provided_toolsets + ), + return_exceptions=True, + ) + for toolset, ts_tools in zip(self._provided_toolsets, ts_results): + if isinstance(ts_tools, asyncio.CancelledError): + raise ts_tools + if isinstance(ts_tools, BaseException): + logger.warning( + "Skipping toolset %s while resolving skill additional tools: %s", + type(toolset).__name__, + ts_tools, + ) + continue for t in ts_tools: candidate_tools[t.name] = t diff --git a/tests/unittests/tools/test_skill_toolset.py b/tests/unittests/tools/test_skill_toolset.py index e9386bb063..3093102367 100644 --- a/tests/unittests/tools/test_skill_toolset.py +++ b/tests/unittests/tools/test_skill_toolset.py @@ -2052,6 +2052,52 @@ async def test_skill_toolset_resolution_error_handling(mock_skill1, caplog): assert len(tools) == 4 +@pytest.mark.asyncio +async def test_skill_toolset_resolution_isolates_failing_toolset( + mock_skill1, caplog +): + """A provided toolset that raises while listing its tools (e.g. an + unreachable MCP server) must not abort resolution of the other additional + tools.""" + mock_skill1.frontmatter.metadata = { + "adk_additional_tools": ["good_tool", "from_failing_toolset"] + } + mock_skill1.name = "skill1" + + # Healthy individual tool that must still resolve. + good_tool = mock.create_autospec(skill_toolset.BaseTool, instance=True) + good_tool.name = "good_tool" + + # Toolset whose listing fails (simulates a down / unreachable MCP server). + failing_toolset = mock.create_autospec( + skill_toolset.BaseToolset, instance=True + ) + failing_toolset.get_tools_with_prefix.side_effect = RuntimeError( + "MCP server unreachable" + ) + + toolset = skill_toolset.SkillToolset( + [mock_skill1], additional_tools=[good_tool, failing_toolset] + ) + ctx = _make_tool_context_with_agent() + + # Activate skill + load_tool = skill_toolset.LoadSkillTool(toolset) + await load_tool.run_async(args={"skill_name": "skill1"}, tool_context=ctx) + + with caplog.at_level(logging.WARNING): + tools = await toolset.get_tools(readonly_context=ctx) + + tool_names = {t.name for t in tools} + # Healthy individual tool and core skill tools still resolve. + assert "good_tool" in tool_names + assert "list_skills" in tool_names + # The failing toolset contributes nothing instead of breaking everything. + assert "from_failing_toolset" not in tool_names + # And the failure is surfaced via a warning, not silently swallowed. + assert "Skipping toolset" in caplog.text + + @pytest.fixture(name="mock_registry") def _mock_registry(): """Fixture for mock SkillRegistry."""