From f506006f5fc932cf4d36e957ef4eca573968c079 Mon Sep 17 00:00:00 2001
From: hawktang <hawkang@gmail.com>
Date: Wed, 24 Jun 2026 03:35:50 +0800
Subject: [PATCH] fix(skills): isolate per-toolset failures in SkillToolset
 resolution

SkillToolset._resolve_additional_tools_from_state gathered
get_tools_with_prefix() from every provided toolset in a single
asyncio.gather() with no error isolation. One toolset that raises while
listing its tools -- e.g. an McpToolset whose remote MCP server is
restarting or returns 503 -- aborted the whole gather, so no skill-gated
additional tools resolved, including unrelated individual function tools
that needed zero I/O.

Use return_exceptions=True and skip (with a warning) any toolset that
fails, re-raising CancelledError so cooperative cancellation is
preserved. A failing toolset now contributes no tools and recovers on the
next invocation.

Adds a regression test: a failing toolset in the additional_tools pool no
longer prevents the healthy tools and core skill tools from resolving.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/google/adk/tools/skill_toolset.py       | 25 ++++++++---
 tests/unittests/tools/test_skill_toolset.py | 46 +++++++++++++++++++++
 2 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/src/google/adk/tools/skill_toolset.py b/src/google/adk/tools/skill_toolset.py
index 4b4fd069d7..0c0e6a5ba3 100644
--- a/src/google/adk/tools/skill_toolset.py
+++ b/src/google/adk/tools/skill_toolset.py
@@ -1080,11 +1080,26 @@ async def _resolve_additional_tools_from_state(
     # Collect all candidate tools from both individual tools and toolsets
     candidate_tools = self._provided_tools_by_name.copy()
     if self._provided_toolsets:
-      ts_results = await asyncio.gather(*(
-          ts.get_tools_with_prefix(readonly_context)
-          for ts in self._provided_toolsets
-      ))
-      for ts_tools in ts_results:
+      # Isolate per-toolset failures: a single toolset that raises while listing
+      # its tools (e.g. an unreachable MCP server) must not abort resolution of
+      # the remaining toolsets and individual tools.
+      ts_results = await asyncio.gather(
+          *(
+              ts.get_tools_with_prefix(readonly_context)
+              for ts in self._provided_toolsets
+          ),
+          return_exceptions=True,
+      )
+      for toolset, ts_tools in zip(self._provided_toolsets, ts_results):
+        if isinstance(ts_tools, asyncio.CancelledError):
+          raise ts_tools
+        if isinstance(ts_tools, BaseException):
+          logger.warning(
+              "Skipping toolset %s while resolving skill additional tools: %s",
+              type(toolset).__name__,
+              ts_tools,
+          )
+          continue
         for t in ts_tools:
           candidate_tools[t.name] = t
 
diff --git a/tests/unittests/tools/test_skill_toolset.py b/tests/unittests/tools/test_skill_toolset.py
index e9386bb063..3093102367 100644
--- a/tests/unittests/tools/test_skill_toolset.py
+++ b/tests/unittests/tools/test_skill_toolset.py
@@ -2052,6 +2052,52 @@ async def test_skill_toolset_resolution_error_handling(mock_skill1, caplog):
   assert len(tools) == 4
 
 
+@pytest.mark.asyncio
+async def test_skill_toolset_resolution_isolates_failing_toolset(
+    mock_skill1, caplog
+):
+  """A provided toolset that raises while listing its tools (e.g. an
+  unreachable MCP server) must not abort resolution of the other additional
+  tools."""
+  mock_skill1.frontmatter.metadata = {
+      "adk_additional_tools": ["good_tool", "from_failing_toolset"]
+  }
+  mock_skill1.name = "skill1"
+
+  # Healthy individual tool that must still resolve.
+  good_tool = mock.create_autospec(skill_toolset.BaseTool, instance=True)
+  good_tool.name = "good_tool"
+
+  # Toolset whose listing fails (simulates a down / unreachable MCP server).
+  failing_toolset = mock.create_autospec(
+      skill_toolset.BaseToolset, instance=True
+  )
+  failing_toolset.get_tools_with_prefix.side_effect = RuntimeError(
+      "MCP server unreachable"
+  )
+
+  toolset = skill_toolset.SkillToolset(
+      [mock_skill1], additional_tools=[good_tool, failing_toolset]
+  )
+  ctx = _make_tool_context_with_agent()
+
+  # Activate skill
+  load_tool = skill_toolset.LoadSkillTool(toolset)
+  await load_tool.run_async(args={"skill_name": "skill1"}, tool_context=ctx)
+
+  with caplog.at_level(logging.WARNING):
+    tools = await toolset.get_tools(readonly_context=ctx)
+
+  tool_names = {t.name for t in tools}
+  # Healthy individual tool and core skill tools still resolve.
+  assert "good_tool" in tool_names
+  assert "list_skills" in tool_names
+  # The failing toolset contributes nothing instead of breaking everything.
+  assert "from_failing_toolset" not in tool_names
+  # And the failure is surfaced via a warning, not silently swallowed.
+  assert "Skipping toolset" in caplog.text
+
+
 @pytest.fixture(name="mock_registry")
 def _mock_registry():
   """Fixture for mock SkillRegistry."""