From 5fce6322587a667ce1c58c24fa211590e57438a6 Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Thu, 25 Jun 2026 18:56:11 -0700
Subject: [PATCH 1/2] Python: Add include_detailed_errors option for skill
 script execution

Port the .NET fix from #6680. SkillsProvider previously swallowed
exceptions from skill script execution and resource reading, returning a
generic error string so the model could not self-correct.

- Add an include_detailed_errors option to SkillsProvider.__init__ and
  from_paths. When True, script-execution failures return an error string
  with the exception message appended; when False (default), the exception
  is logged and re-raised, delegating to the function-invocation pipeline's
  own include_detailed_errors policy.
- _read_skill_resource now logs and re-raises instead of returning a
  generic error string. Resources take no model arguments, so a swallowed
  generic error is not actionable by the model.
- Update and add tests covering the new propagation and detailed-error
  behavior.

Fixes #6681

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../packages/core/agent_framework/_skills.py  | 60 +++++++++++++++--
 .../packages/core/tests/core/test_skills.py   | 67 +++++++++++--------
 2 files changed, 91 insertions(+), 36 deletions(-)

diff --git a/python/packages/core/agent_framework/_skills.py b/python/packages/core/agent_framework/_skills.py
index aeee8df6476..dbf8fc085d3 100644
--- a/python/packages/core/agent_framework/_skills.py
+++ b/python/packages/core/agent_framework/_skills.py
@@ -1843,6 +1843,7 @@ def __init__(
         *,
         instruction_template: str | None = None,
         require_script_approval: bool = False,
+        include_detailed_errors: bool = False,
         disable_caching: bool = False,
         source_id: str | None = None,
     ) -> None:
@@ -1881,6 +1882,21 @@ def __init__(
                 the user declined. Defaults to ``False``.  See
                 ``samples/02-agents/skills/script_approval/script_approval.py``
                 for the full approval loop pattern.
+            include_detailed_errors: Controls how script-execution failures are
+                surfaced to the model. When ``False`` (the default), the
+                exception is logged and re-raised, delegating error handling to
+                the function-invocation pipeline (which applies its own
+                ``include_detailed_errors`` policy). When ``True``, the
+                exception message is appended to the error string returned
+                directly to the model, enabling it to retry with different
+                arguments. This may disclose raw exception details to the
+                model, so exercise particular caution when enabling it for
+                skills whose scripts originate from untrusted or third-party
+                sources: a maliciously crafted script could throw an exception
+                whose message embeds a prompt-injection payload, which would
+                then be fed back to the model. Only enable this when the skills
+                and their scripts come from a trusted source. Defaults to
+                ``False``.
             disable_caching: When ``True``, rebuilds tools and instructions
                 from the source on every invocation instead of caching
                 after the first build.  Defaults to ``False``.
@@ -1904,6 +1920,7 @@ def __init__(
         self._source = source
         self._instruction_template = instruction_template
         self._require_script_approval = require_script_approval
+        self._include_detailed_errors = include_detailed_errors
         self._disable_caching = disable_caching
 
         # Lazy-initialized via _get_or_create_context / _create_context
@@ -1922,6 +1939,7 @@ def from_paths(
         resource_filter: Callable[[str, str], bool] | None = None,
         instruction_template: str | None = None,
         require_script_approval: bool = False,
+        include_detailed_errors: bool = False,
         disable_caching: bool = False,
         source_id: str | None = None,
     ) -> _TSkillsProvider:
@@ -1969,6 +1987,16 @@ def from_paths(
                 the user declined. Defaults to ``False``.  See
                 ``samples/02-agents/skills/script_approval/script_approval.py``
                 for the full approval loop pattern.
+            include_detailed_errors: Controls how script-execution failures are
+                surfaced to the model. When ``False`` (the default), the
+                exception is logged and re-raised, delegating error handling to
+                the function-invocation pipeline (which applies its own
+                ``include_detailed_errors`` policy). When ``True``, the
+                exception message is appended to the error string returned
+                directly to the model, enabling it to retry with different
+                arguments. This may disclose raw exception details to the
+                model, so only enable it when the skills and their scripts come
+                from a trusted source. Defaults to ``False``.
             disable_caching: When ``True``, rebuilds tools and instructions
                 from the source on every invocation instead of caching
                 after the first build.
@@ -1992,6 +2020,7 @@ def from_paths(
             source,
             instruction_template=instruction_template,
             require_script_approval=require_script_approval,
+            include_detailed_errors=include_detailed_errors,
             disable_caching=disable_caching,
             source_id=source_id,
         )
@@ -2318,8 +2347,15 @@ async def _run_skill_script(
                 ``agent.run(user_id="123")``).
 
         Returns:
-            The result, or a user-facing error message on
-            failure.
+            The script result. Returns a user-facing error string for
+            validation failures (empty or unknown skill/script name) and,
+            when ``include_detailed_errors`` is ``True``, for execution
+            failures.
+
+        Raises:
+            Exception: Re-raises any exception raised while running the
+                script when ``include_detailed_errors`` is ``False``,
+                delegating error handling to the function-invocation pipeline.
         """
         if not skill_name or not skill_name.strip():
             return "Error: Skill name cannot be empty."
@@ -2337,9 +2373,11 @@ async def _run_skill_script(
 
         try:
             return await script.run(skill, args, **kwargs)
-        except Exception:
+        except Exception as ex:
             logger.exception("Error running script '%s' in skill '%s'", script_name, skill_name)
-            return f"Error: Failed to run script '{script_name}' in skill '{skill_name}'."
+            if self._include_detailed_errors:
+                return f"Error: Failed to run script '{script_name}' in skill '{skill_name}'. Exception: {ex}"
+            raise
 
     async def _read_skill_resource(
         self, skills: Sequence[Skill], skill_name: str, resource_name: str, **kwargs: Any
@@ -2359,8 +2397,16 @@ async def _read_skill_resource(
                 ``agent.run(user_id="123")``).
 
         Returns:
-            The resource content (any type), or a user-facing error message on
-            failure.
+            The resource content (any type). Returns a user-facing error
+            string for validation failures (empty or unknown skill/resource
+            name).
+
+        Raises:
+            Exception: Re-raises any exception raised while reading the
+                resource. Resources take no model-supplied arguments, so a
+                swallowed generic error is not actionable by the model;
+                re-raising lets the function-invocation pipeline decide how to
+                surface it.
         """
         if not skill_name or not skill_name.strip():
             return "Error: Skill name cannot be empty."
@@ -2380,7 +2426,7 @@ async def _read_skill_resource(
             return await resource.read(**kwargs)
         except Exception:
             logger.exception("Failed to read resource '%s' from skill '%s'", resource_name, skill_name)
-            return f"Error: Failed to read resource '{resource_name}' from skill '{skill_name}'."
+            raise
 
 
 # endregion
diff --git a/python/packages/core/tests/core/test_skills.py b/python/packages/core/tests/core/test_skills.py
index d9789709f77..d35180d486b 100644
--- a/python/packages/core/tests/core/test_skills.py
+++ b/python/packages/core/tests/core/test_skills.py
@@ -2802,7 +2802,7 @@ async def test_read_skill_resource_whitespace_resource_name_returns_error(self)
         assert result.startswith("Error:")
         assert "empty" in result
 
-    async def test_read_callable_resource_exception_returns_error(self) -> None:
+    async def test_read_callable_resource_exception_propagates(self) -> None:
         skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="A skill."), instructions="Body")
 
         @skill.resource
@@ -2811,11 +2811,10 @@ def exploding_resource() -> Any:
 
         provider = SkillsProvider([skill])
         await _init_provider(provider)
-        result = await provider._read_skill_resource(_raw_skills(provider), "my-skill", "exploding_resource")
-        assert result.startswith("Error:")
-        assert "Failed to read resource" in result
+        with pytest.raises(RuntimeError, match="boom"):
+            await provider._read_skill_resource(_raw_skills(provider), "my-skill", "exploding_resource")
 
-    async def test_read_async_callable_resource_exception_returns_error(self) -> None:
+    async def test_read_async_callable_resource_exception_propagates(self) -> None:
         skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="A skill."), instructions="Body")
 
         @skill.resource
@@ -2824,8 +2823,8 @@ async def async_exploding() -> Any:
 
         provider = SkillsProvider([skill])
         await _init_provider(provider)
-        result = await provider._read_skill_resource(_raw_skills(provider), "my-skill", "async_exploding")
-        assert result.startswith("Error:")
+        with pytest.raises(ValueError, match="async boom"):
+            await provider._read_skill_resource(_raw_skills(provider), "my-skill", "async_exploding")
 
     async def test_load_code_skill_xml_escapes_metadata(self) -> None:
         skill = InlineSkill(
@@ -3588,10 +3587,9 @@ async def test_file_script_error_without_runner(self) -> None:
         result = await run_tool.func(skill_name="my-skill", script_name="code-s")
         assert result == "ok"
 
-        # File script without runner returns error
-        result = await run_tool.func(skill_name="my-skill", script_name="file-s")
-        assert "Error" in result
-        assert "Failed to run" in result
+        # File script without runner propagates an error by default
+        with pytest.raises(TypeError, match="requires a FileSkill"):
+            await run_tool.func(skill_name="my-skill", script_name="file-s")
 
     async def test_async_code_script_runs_directly(self) -> None:
         async def async_func(x: int = 0) -> str:
@@ -3646,10 +3644,9 @@ async def test_script_with_path_errors_without_runner(self) -> None:
         result = await run_tool.func(skill_name="my-skill", script_name="code-s")
         assert result == "ok"
 
-        # Path+function script without runner returns error
-        result = await run_tool.func(skill_name="my-skill", script_name="path-s")
-        assert "Error" in result
-        assert "script_runner" in result or "Failed to run" in result
+        # Path+function script without runner propagates an error by default
+        with pytest.raises(TypeError, match="requires a FileSkill"):
+            await run_tool.func(skill_name="my-skill", script_name="path-s")
 
     async def test_run_skill_script_error_on_missing_skill(self) -> None:
         skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body")
@@ -3717,10 +3714,10 @@ def process(**kwargs: Any) -> str:
 
         provider = SkillsProvider([skill])
         await _init_provider(provider)
-        result = await provider._run_skill_script(
-            _raw_skills(provider), "my-skill", "process", args={"mode": "llm-value"}, mode="runtime-value"
-        )
-        assert "Error" in result
+        with pytest.raises(TypeError):
+            await provider._run_skill_script(
+                _raw_skills(provider), "my-skill", "process", args={"mode": "llm-value"}, mode="runtime-value"
+            )
 
     async def test_run_skill_script_error_on_missing_script(self) -> None:
         skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body")
@@ -3804,8 +3801,8 @@ async def test_require_script_approval_does_not_affect_other_tools(self) -> None
         for t in other_tools:
             assert t.approval_mode == "never_require"
 
-    async def test_code_script_exception_returns_error(self) -> None:
-        """A code script function that raises should return an error string."""
+    async def test_code_script_exception_propagates_by_default(self) -> None:
+        """A code script function that raises should propagate by default."""
 
         def failing_script() -> str:
             raise RuntimeError("Something went wrong")
@@ -3816,10 +3813,23 @@ def failing_script() -> str:
         provider = SkillsProvider([skill])
         await _init_provider(provider)
         run_tool = next(t for t in _ctx(provider)[2] if hasattr(t, "name") and t.name == "run_skill_script")
+        with pytest.raises(RuntimeError, match="Something went wrong"):
+            await run_tool.func(skill_name="my-skill", script_name="boom")
+
+    async def test_code_script_exception_includes_details_when_enabled(self) -> None:
+        """When include_detailed_errors=True, the script error string includes the exception message."""
+
+        def failing_script() -> str:
+            raise RuntimeError("Something went wrong")
+
+        skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body")
+        skill._scripts.append(InlineSkillScript(name="boom", function=failing_script))
+
+        provider = SkillsProvider([skill], include_detailed_errors=True)
+        await _init_provider(provider)
+        run_tool = next(t for t in _ctx(provider)[2] if hasattr(t, "name") and t.name == "run_skill_script")
         result = await run_tool.func(skill_name="my-skill", script_name="boom")
-        assert "Error" in result
-        assert "boom" in result
-        assert "Something went wrong" not in result
+        assert result == "Error: Failed to run script 'boom' in skill 'my-skill'. Exception: Something went wrong"
 
     async def test_custom_template_without_runner_placeholder_raises(self) -> None:
         """Providers accept custom templates without {runner_instructions}."""
@@ -5799,17 +5809,16 @@ def runner(skill: Any, script: Any, args: Any = None) -> str:
         assert result == "list_result"
         assert captured["args"] == ["input.docx", "--verbose"]
 
-    async def test_run_skill_script_inline_with_list_args_returns_error(self) -> None:
-        """Inline script called with list args through provider returns error (TypeError caught)."""
+    async def test_run_skill_script_inline_with_list_args_propagates_error(self) -> None:
+        """Inline script called with list args through provider propagates the TypeError by default."""
         skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body")
         skill._scripts.append(InlineSkillScript(name="s1", function=lambda: "ok"))
 
         provider = SkillsProvider([skill])
         await _init_provider(provider)
         run_tool = next(t for t in _ctx(provider)[2] if hasattr(t, "name") and t.name == "run_skill_script")
-        result = await run_tool.func(skill_name="my-skill", script_name="s1", args=["arg1"])
-        assert "Error" in result
-        assert "Failed to run" in result
+        with pytest.raises(TypeError, match="requires keyword arguments"):
+            await run_tool.func(skill_name="my-skill", script_name="s1", args=["arg1"])
 
     async def test_file_skill_content_includes_scripts_block(self) -> None:
         """FileSkill.content appends an <available_scripts> block when scripts are present."""

From c32df6195cb8f92774892dd0d8f0dc4097e6bce6 Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Fri, 26 Jun 2026 14:04:28 -0700
Subject: [PATCH 2/2] Re-raise skill script/resource errors instead of adding a
 provider option

Address PR review: returning a plain error string from the skill provider
bypassed the shared tool-error contract (no exception metadata, not counted
toward consecutive-error limits), risking infinite retries.

Instead of porting the .NET provider-level IncludeDetailedErrors option,
_run_skill_script and _read_skill_resource now always log and re-raise on
failure. This delegates error handling to the function-invocation pipeline,
whose existing include_detailed_errors policy is the Python equivalent of
.NET's FunctionInvokingChatClient.IncludeDetailedErrors and correctly
preserves exception metadata and consecutive-error counting.

Validation failures (empty/unknown skill, script, or resource names) still
return user-facing error strings. Tests updated accordingly.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../packages/core/agent_framework/_skills.py  | 43 +++----------------
 .../packages/core/tests/core/test_skills.py   | 15 -------
 2 files changed, 5 insertions(+), 53 deletions(-)

diff --git a/python/packages/core/agent_framework/_skills.py b/python/packages/core/agent_framework/_skills.py
index dbf8fc085d3..24f534fb692 100644
--- a/python/packages/core/agent_framework/_skills.py
+++ b/python/packages/core/agent_framework/_skills.py
@@ -1843,7 +1843,6 @@ def __init__(
         *,
         instruction_template: str | None = None,
         require_script_approval: bool = False,
-        include_detailed_errors: bool = False,
         disable_caching: bool = False,
         source_id: str | None = None,
     ) -> None:
@@ -1882,21 +1881,6 @@ def __init__(
                 the user declined. Defaults to ``False``.  See
                 ``samples/02-agents/skills/script_approval/script_approval.py``
                 for the full approval loop pattern.
-            include_detailed_errors: Controls how script-execution failures are
-                surfaced to the model. When ``False`` (the default), the
-                exception is logged and re-raised, delegating error handling to
-                the function-invocation pipeline (which applies its own
-                ``include_detailed_errors`` policy). When ``True``, the
-                exception message is appended to the error string returned
-                directly to the model, enabling it to retry with different
-                arguments. This may disclose raw exception details to the
-                model, so exercise particular caution when enabling it for
-                skills whose scripts originate from untrusted or third-party
-                sources: a maliciously crafted script could throw an exception
-                whose message embeds a prompt-injection payload, which would
-                then be fed back to the model. Only enable this when the skills
-                and their scripts come from a trusted source. Defaults to
-                ``False``.
             disable_caching: When ``True``, rebuilds tools and instructions
                 from the source on every invocation instead of caching
                 after the first build.  Defaults to ``False``.
@@ -1920,7 +1904,6 @@ def __init__(
         self._source = source
         self._instruction_template = instruction_template
         self._require_script_approval = require_script_approval
-        self._include_detailed_errors = include_detailed_errors
         self._disable_caching = disable_caching
 
         # Lazy-initialized via _get_or_create_context / _create_context
@@ -1939,7 +1922,6 @@ def from_paths(
         resource_filter: Callable[[str, str], bool] | None = None,
         instruction_template: str | None = None,
         require_script_approval: bool = False,
-        include_detailed_errors: bool = False,
         disable_caching: bool = False,
         source_id: str | None = None,
     ) -> _TSkillsProvider:
@@ -1987,16 +1969,6 @@ def from_paths(
                 the user declined. Defaults to ``False``.  See
                 ``samples/02-agents/skills/script_approval/script_approval.py``
                 for the full approval loop pattern.
-            include_detailed_errors: Controls how script-execution failures are
-                surfaced to the model. When ``False`` (the default), the
-                exception is logged and re-raised, delegating error handling to
-                the function-invocation pipeline (which applies its own
-                ``include_detailed_errors`` policy). When ``True``, the
-                exception message is appended to the error string returned
-                directly to the model, enabling it to retry with different
-                arguments. This may disclose raw exception details to the
-                model, so only enable it when the skills and their scripts come
-                from a trusted source. Defaults to ``False``.
             disable_caching: When ``True``, rebuilds tools and instructions
                 from the source on every invocation instead of caching
                 after the first build.
@@ -2020,7 +1992,6 @@ def from_paths(
             source,
             instruction_template=instruction_template,
             require_script_approval=require_script_approval,
-            include_detailed_errors=include_detailed_errors,
             disable_caching=disable_caching,
             source_id=source_id,
         )
@@ -2348,14 +2319,12 @@ async def _run_skill_script(
 
         Returns:
             The script result. Returns a user-facing error string for
-            validation failures (empty or unknown skill/script name) and,
-            when ``include_detailed_errors`` is ``True``, for execution
-            failures.
+            validation failures (empty or unknown skill/script name).
 
         Raises:
-            Exception: Re-raises any exception raised while running the
-                script when ``include_detailed_errors`` is ``False``,
-                delegating error handling to the function-invocation pipeline.
+            Exception: Re-raises any exception raised while running the script,
+                delegating error handling to the function-invocation pipeline
+                (which applies its own ``include_detailed_errors`` policy).
         """
         if not skill_name or not skill_name.strip():
             return "Error: Skill name cannot be empty."
@@ -2373,10 +2342,8 @@ async def _run_skill_script(
 
         try:
             return await script.run(skill, args, **kwargs)
-        except Exception as ex:
+        except Exception:
             logger.exception("Error running script '%s' in skill '%s'", script_name, skill_name)
-            if self._include_detailed_errors:
-                return f"Error: Failed to run script '{script_name}' in skill '{skill_name}'. Exception: {ex}"
             raise
 
     async def _read_skill_resource(
diff --git a/python/packages/core/tests/core/test_skills.py b/python/packages/core/tests/core/test_skills.py
index d35180d486b..63936e15220 100644
--- a/python/packages/core/tests/core/test_skills.py
+++ b/python/packages/core/tests/core/test_skills.py
@@ -3816,21 +3816,6 @@ def failing_script() -> str:
         with pytest.raises(RuntimeError, match="Something went wrong"):
             await run_tool.func(skill_name="my-skill", script_name="boom")
 
-    async def test_code_script_exception_includes_details_when_enabled(self) -> None:
-        """When include_detailed_errors=True, the script error string includes the exception message."""
-
-        def failing_script() -> str:
-            raise RuntimeError("Something went wrong")
-
-        skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body")
-        skill._scripts.append(InlineSkillScript(name="boom", function=failing_script))
-
-        provider = SkillsProvider([skill], include_detailed_errors=True)
-        await _init_provider(provider)
-        run_tool = next(t for t in _ctx(provider)[2] if hasattr(t, "name") and t.name == "run_skill_script")
-        result = await run_tool.func(skill_name="my-skill", script_name="boom")
-        assert result == "Error: Failed to run script 'boom' in skill 'my-skill'. Exception: Something went wrong"
-
     async def test_custom_template_without_runner_placeholder_raises(self) -> None:
         """Providers accept custom templates without {runner_instructions}."""
         skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body")