From 5fce6322587a667ce1c58c24fa211590e57438a6 Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Thu, 25 Jun 2026 18:56:11 -0700 Subject: [PATCH 1/2] Python: Add include_detailed_errors option for skill script execution Port the .NET fix from #6680. SkillsProvider previously swallowed exceptions from skill script execution and resource reading, returning a generic error string so the model could not self-correct. - Add an include_detailed_errors option to SkillsProvider.__init__ and from_paths. When True, script-execution failures return an error string with the exception message appended; when False (default), the exception is logged and re-raised, delegating to the function-invocation pipeline's own include_detailed_errors policy. - _read_skill_resource now logs and re-raises instead of returning a generic error string. Resources take no model arguments, so a swallowed generic error is not actionable by the model. - Update and add tests covering the new propagation and detailed-error behavior. Fixes #6681 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../packages/core/agent_framework/_skills.py | 60 +++++++++++++++-- .../packages/core/tests/core/test_skills.py | 67 +++++++++++-------- 2 files changed, 91 insertions(+), 36 deletions(-) diff --git a/python/packages/core/agent_framework/_skills.py b/python/packages/core/agent_framework/_skills.py index aeee8df6476..dbf8fc085d3 100644 --- a/python/packages/core/agent_framework/_skills.py +++ b/python/packages/core/agent_framework/_skills.py @@ -1843,6 +1843,7 @@ def __init__( *, instruction_template: str | None = None, require_script_approval: bool = False, + include_detailed_errors: bool = False, disable_caching: bool = False, source_id: str | None = None, ) -> None: @@ -1881,6 +1882,21 @@ def __init__( the user declined. Defaults to ``False``. See ``samples/02-agents/skills/script_approval/script_approval.py`` for the full approval loop pattern. + include_detailed_errors: Controls how script-execution failures are + surfaced to the model. When ``False`` (the default), the + exception is logged and re-raised, delegating error handling to + the function-invocation pipeline (which applies its own + ``include_detailed_errors`` policy). When ``True``, the + exception message is appended to the error string returned + directly to the model, enabling it to retry with different + arguments. This may disclose raw exception details to the + model, so exercise particular caution when enabling it for + skills whose scripts originate from untrusted or third-party + sources: a maliciously crafted script could throw an exception + whose message embeds a prompt-injection payload, which would + then be fed back to the model. Only enable this when the skills + and their scripts come from a trusted source. Defaults to + ``False``. disable_caching: When ``True``, rebuilds tools and instructions from the source on every invocation instead of caching after the first build. Defaults to ``False``. @@ -1904,6 +1920,7 @@ def __init__( self._source = source self._instruction_template = instruction_template self._require_script_approval = require_script_approval + self._include_detailed_errors = include_detailed_errors self._disable_caching = disable_caching # Lazy-initialized via _get_or_create_context / _create_context @@ -1922,6 +1939,7 @@ def from_paths( resource_filter: Callable[[str, str], bool] | None = None, instruction_template: str | None = None, require_script_approval: bool = False, + include_detailed_errors: bool = False, disable_caching: bool = False, source_id: str | None = None, ) -> _TSkillsProvider: @@ -1969,6 +1987,16 @@ def from_paths( the user declined. Defaults to ``False``. See ``samples/02-agents/skills/script_approval/script_approval.py`` for the full approval loop pattern. + include_detailed_errors: Controls how script-execution failures are + surfaced to the model. When ``False`` (the default), the + exception is logged and re-raised, delegating error handling to + the function-invocation pipeline (which applies its own + ``include_detailed_errors`` policy). When ``True``, the + exception message is appended to the error string returned + directly to the model, enabling it to retry with different + arguments. This may disclose raw exception details to the + model, so only enable it when the skills and their scripts come + from a trusted source. Defaults to ``False``. disable_caching: When ``True``, rebuilds tools and instructions from the source on every invocation instead of caching after the first build. @@ -1992,6 +2020,7 @@ def from_paths( source, instruction_template=instruction_template, require_script_approval=require_script_approval, + include_detailed_errors=include_detailed_errors, disable_caching=disable_caching, source_id=source_id, ) @@ -2318,8 +2347,15 @@ async def _run_skill_script( ``agent.run(user_id="123")``). Returns: - The result, or a user-facing error message on - failure. + The script result. Returns a user-facing error string for + validation failures (empty or unknown skill/script name) and, + when ``include_detailed_errors`` is ``True``, for execution + failures. + + Raises: + Exception: Re-raises any exception raised while running the + script when ``include_detailed_errors`` is ``False``, + delegating error handling to the function-invocation pipeline. """ if not skill_name or not skill_name.strip(): return "Error: Skill name cannot be empty." @@ -2337,9 +2373,11 @@ async def _run_skill_script( try: return await script.run(skill, args, **kwargs) - except Exception: + except Exception as ex: logger.exception("Error running script '%s' in skill '%s'", script_name, skill_name) - return f"Error: Failed to run script '{script_name}' in skill '{skill_name}'." + if self._include_detailed_errors: + return f"Error: Failed to run script '{script_name}' in skill '{skill_name}'. Exception: {ex}" + raise async def _read_skill_resource( self, skills: Sequence[Skill], skill_name: str, resource_name: str, **kwargs: Any @@ -2359,8 +2397,16 @@ async def _read_skill_resource( ``agent.run(user_id="123")``). Returns: - The resource content (any type), or a user-facing error message on - failure. + The resource content (any type). Returns a user-facing error + string for validation failures (empty or unknown skill/resource + name). + + Raises: + Exception: Re-raises any exception raised while reading the + resource. Resources take no model-supplied arguments, so a + swallowed generic error is not actionable by the model; + re-raising lets the function-invocation pipeline decide how to + surface it. """ if not skill_name or not skill_name.strip(): return "Error: Skill name cannot be empty." @@ -2380,7 +2426,7 @@ async def _read_skill_resource( return await resource.read(**kwargs) except Exception: logger.exception("Failed to read resource '%s' from skill '%s'", resource_name, skill_name) - return f"Error: Failed to read resource '{resource_name}' from skill '{skill_name}'." + raise # endregion diff --git a/python/packages/core/tests/core/test_skills.py b/python/packages/core/tests/core/test_skills.py index d9789709f77..d35180d486b 100644 --- a/python/packages/core/tests/core/test_skills.py +++ b/python/packages/core/tests/core/test_skills.py @@ -2802,7 +2802,7 @@ async def test_read_skill_resource_whitespace_resource_name_returns_error(self) assert result.startswith("Error:") assert "empty" in result - async def test_read_callable_resource_exception_returns_error(self) -> None: + async def test_read_callable_resource_exception_propagates(self) -> None: skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="A skill."), instructions="Body") @skill.resource @@ -2811,11 +2811,10 @@ def exploding_resource() -> Any: provider = SkillsProvider([skill]) await _init_provider(provider) - result = await provider._read_skill_resource(_raw_skills(provider), "my-skill", "exploding_resource") - assert result.startswith("Error:") - assert "Failed to read resource" in result + with pytest.raises(RuntimeError, match="boom"): + await provider._read_skill_resource(_raw_skills(provider), "my-skill", "exploding_resource") - async def test_read_async_callable_resource_exception_returns_error(self) -> None: + async def test_read_async_callable_resource_exception_propagates(self) -> None: skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="A skill."), instructions="Body") @skill.resource @@ -2824,8 +2823,8 @@ async def async_exploding() -> Any: provider = SkillsProvider([skill]) await _init_provider(provider) - result = await provider._read_skill_resource(_raw_skills(provider), "my-skill", "async_exploding") - assert result.startswith("Error:") + with pytest.raises(ValueError, match="async boom"): + await provider._read_skill_resource(_raw_skills(provider), "my-skill", "async_exploding") async def test_load_code_skill_xml_escapes_metadata(self) -> None: skill = InlineSkill( @@ -3588,10 +3587,9 @@ async def test_file_script_error_without_runner(self) -> None: result = await run_tool.func(skill_name="my-skill", script_name="code-s") assert result == "ok" - # File script without runner returns error - result = await run_tool.func(skill_name="my-skill", script_name="file-s") - assert "Error" in result - assert "Failed to run" in result + # File script without runner propagates an error by default + with pytest.raises(TypeError, match="requires a FileSkill"): + await run_tool.func(skill_name="my-skill", script_name="file-s") async def test_async_code_script_runs_directly(self) -> None: async def async_func(x: int = 0) -> str: @@ -3646,10 +3644,9 @@ async def test_script_with_path_errors_without_runner(self) -> None: result = await run_tool.func(skill_name="my-skill", script_name="code-s") assert result == "ok" - # Path+function script without runner returns error - result = await run_tool.func(skill_name="my-skill", script_name="path-s") - assert "Error" in result - assert "script_runner" in result or "Failed to run" in result + # Path+function script without runner propagates an error by default + with pytest.raises(TypeError, match="requires a FileSkill"): + await run_tool.func(skill_name="my-skill", script_name="path-s") async def test_run_skill_script_error_on_missing_skill(self) -> None: skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body") @@ -3717,10 +3714,10 @@ def process(**kwargs: Any) -> str: provider = SkillsProvider([skill]) await _init_provider(provider) - result = await provider._run_skill_script( - _raw_skills(provider), "my-skill", "process", args={"mode": "llm-value"}, mode="runtime-value" - ) - assert "Error" in result + with pytest.raises(TypeError): + await provider._run_skill_script( + _raw_skills(provider), "my-skill", "process", args={"mode": "llm-value"}, mode="runtime-value" + ) async def test_run_skill_script_error_on_missing_script(self) -> None: skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body") @@ -3804,8 +3801,8 @@ async def test_require_script_approval_does_not_affect_other_tools(self) -> None for t in other_tools: assert t.approval_mode == "never_require" - async def test_code_script_exception_returns_error(self) -> None: - """A code script function that raises should return an error string.""" + async def test_code_script_exception_propagates_by_default(self) -> None: + """A code script function that raises should propagate by default.""" def failing_script() -> str: raise RuntimeError("Something went wrong") @@ -3816,10 +3813,23 @@ def failing_script() -> str: provider = SkillsProvider([skill]) await _init_provider(provider) run_tool = next(t for t in _ctx(provider)[2] if hasattr(t, "name") and t.name == "run_skill_script") + with pytest.raises(RuntimeError, match="Something went wrong"): + await run_tool.func(skill_name="my-skill", script_name="boom") + + async def test_code_script_exception_includes_details_when_enabled(self) -> None: + """When include_detailed_errors=True, the script error string includes the exception message.""" + + def failing_script() -> str: + raise RuntimeError("Something went wrong") + + skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body") + skill._scripts.append(InlineSkillScript(name="boom", function=failing_script)) + + provider = SkillsProvider([skill], include_detailed_errors=True) + await _init_provider(provider) + run_tool = next(t for t in _ctx(provider)[2] if hasattr(t, "name") and t.name == "run_skill_script") result = await run_tool.func(skill_name="my-skill", script_name="boom") - assert "Error" in result - assert "boom" in result - assert "Something went wrong" not in result + assert result == "Error: Failed to run script 'boom' in skill 'my-skill'. Exception: Something went wrong" async def test_custom_template_without_runner_placeholder_raises(self) -> None: """Providers accept custom templates without {runner_instructions}.""" @@ -5799,17 +5809,16 @@ def runner(skill: Any, script: Any, args: Any = None) -> str: assert result == "list_result" assert captured["args"] == ["input.docx", "--verbose"] - async def test_run_skill_script_inline_with_list_args_returns_error(self) -> None: - """Inline script called with list args through provider returns error (TypeError caught).""" + async def test_run_skill_script_inline_with_list_args_propagates_error(self) -> None: + """Inline script called with list args through provider propagates the TypeError by default.""" skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body") skill._scripts.append(InlineSkillScript(name="s1", function=lambda: "ok")) provider = SkillsProvider([skill]) await _init_provider(provider) run_tool = next(t for t in _ctx(provider)[2] if hasattr(t, "name") and t.name == "run_skill_script") - result = await run_tool.func(skill_name="my-skill", script_name="s1", args=["arg1"]) - assert "Error" in result - assert "Failed to run" in result + with pytest.raises(TypeError, match="requires keyword arguments"): + await run_tool.func(skill_name="my-skill", script_name="s1", args=["arg1"]) async def test_file_skill_content_includes_scripts_block(self) -> None: """FileSkill.content appends an block when scripts are present.""" From c32df6195cb8f92774892dd0d8f0dc4097e6bce6 Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Fri, 26 Jun 2026 14:04:28 -0700 Subject: [PATCH 2/2] Re-raise skill script/resource errors instead of adding a provider option Address PR review: returning a plain error string from the skill provider bypassed the shared tool-error contract (no exception metadata, not counted toward consecutive-error limits), risking infinite retries. Instead of porting the .NET provider-level IncludeDetailedErrors option, _run_skill_script and _read_skill_resource now always log and re-raise on failure. This delegates error handling to the function-invocation pipeline, whose existing include_detailed_errors policy is the Python equivalent of .NET's FunctionInvokingChatClient.IncludeDetailedErrors and correctly preserves exception metadata and consecutive-error counting. Validation failures (empty/unknown skill, script, or resource names) still return user-facing error strings. Tests updated accordingly. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../packages/core/agent_framework/_skills.py | 43 +++---------------- .../packages/core/tests/core/test_skills.py | 15 ------- 2 files changed, 5 insertions(+), 53 deletions(-) diff --git a/python/packages/core/agent_framework/_skills.py b/python/packages/core/agent_framework/_skills.py index dbf8fc085d3..24f534fb692 100644 --- a/python/packages/core/agent_framework/_skills.py +++ b/python/packages/core/agent_framework/_skills.py @@ -1843,7 +1843,6 @@ def __init__( *, instruction_template: str | None = None, require_script_approval: bool = False, - include_detailed_errors: bool = False, disable_caching: bool = False, source_id: str | None = None, ) -> None: @@ -1882,21 +1881,6 @@ def __init__( the user declined. Defaults to ``False``. See ``samples/02-agents/skills/script_approval/script_approval.py`` for the full approval loop pattern. - include_detailed_errors: Controls how script-execution failures are - surfaced to the model. When ``False`` (the default), the - exception is logged and re-raised, delegating error handling to - the function-invocation pipeline (which applies its own - ``include_detailed_errors`` policy). When ``True``, the - exception message is appended to the error string returned - directly to the model, enabling it to retry with different - arguments. This may disclose raw exception details to the - model, so exercise particular caution when enabling it for - skills whose scripts originate from untrusted or third-party - sources: a maliciously crafted script could throw an exception - whose message embeds a prompt-injection payload, which would - then be fed back to the model. Only enable this when the skills - and their scripts come from a trusted source. Defaults to - ``False``. disable_caching: When ``True``, rebuilds tools and instructions from the source on every invocation instead of caching after the first build. Defaults to ``False``. @@ -1920,7 +1904,6 @@ def __init__( self._source = source self._instruction_template = instruction_template self._require_script_approval = require_script_approval - self._include_detailed_errors = include_detailed_errors self._disable_caching = disable_caching # Lazy-initialized via _get_or_create_context / _create_context @@ -1939,7 +1922,6 @@ def from_paths( resource_filter: Callable[[str, str], bool] | None = None, instruction_template: str | None = None, require_script_approval: bool = False, - include_detailed_errors: bool = False, disable_caching: bool = False, source_id: str | None = None, ) -> _TSkillsProvider: @@ -1987,16 +1969,6 @@ def from_paths( the user declined. Defaults to ``False``. See ``samples/02-agents/skills/script_approval/script_approval.py`` for the full approval loop pattern. - include_detailed_errors: Controls how script-execution failures are - surfaced to the model. When ``False`` (the default), the - exception is logged and re-raised, delegating error handling to - the function-invocation pipeline (which applies its own - ``include_detailed_errors`` policy). When ``True``, the - exception message is appended to the error string returned - directly to the model, enabling it to retry with different - arguments. This may disclose raw exception details to the - model, so only enable it when the skills and their scripts come - from a trusted source. Defaults to ``False``. disable_caching: When ``True``, rebuilds tools and instructions from the source on every invocation instead of caching after the first build. @@ -2020,7 +1992,6 @@ def from_paths( source, instruction_template=instruction_template, require_script_approval=require_script_approval, - include_detailed_errors=include_detailed_errors, disable_caching=disable_caching, source_id=source_id, ) @@ -2348,14 +2319,12 @@ async def _run_skill_script( Returns: The script result. Returns a user-facing error string for - validation failures (empty or unknown skill/script name) and, - when ``include_detailed_errors`` is ``True``, for execution - failures. + validation failures (empty or unknown skill/script name). Raises: - Exception: Re-raises any exception raised while running the - script when ``include_detailed_errors`` is ``False``, - delegating error handling to the function-invocation pipeline. + Exception: Re-raises any exception raised while running the script, + delegating error handling to the function-invocation pipeline + (which applies its own ``include_detailed_errors`` policy). """ if not skill_name or not skill_name.strip(): return "Error: Skill name cannot be empty." @@ -2373,10 +2342,8 @@ async def _run_skill_script( try: return await script.run(skill, args, **kwargs) - except Exception as ex: + except Exception: logger.exception("Error running script '%s' in skill '%s'", script_name, skill_name) - if self._include_detailed_errors: - return f"Error: Failed to run script '{script_name}' in skill '{skill_name}'. Exception: {ex}" raise async def _read_skill_resource( diff --git a/python/packages/core/tests/core/test_skills.py b/python/packages/core/tests/core/test_skills.py index d35180d486b..63936e15220 100644 --- a/python/packages/core/tests/core/test_skills.py +++ b/python/packages/core/tests/core/test_skills.py @@ -3816,21 +3816,6 @@ def failing_script() -> str: with pytest.raises(RuntimeError, match="Something went wrong"): await run_tool.func(skill_name="my-skill", script_name="boom") - async def test_code_script_exception_includes_details_when_enabled(self) -> None: - """When include_detailed_errors=True, the script error string includes the exception message.""" - - def failing_script() -> str: - raise RuntimeError("Something went wrong") - - skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body") - skill._scripts.append(InlineSkillScript(name="boom", function=failing_script)) - - provider = SkillsProvider([skill], include_detailed_errors=True) - await _init_provider(provider) - run_tool = next(t for t in _ctx(provider)[2] if hasattr(t, "name") and t.name == "run_skill_script") - result = await run_tool.func(skill_name="my-skill", script_name="boom") - assert result == "Error: Failed to run script 'boom' in skill 'my-skill'. Exception: Something went wrong" - async def test_custom_template_without_runner_placeholder_raises(self) -> None: """Providers accept custom templates without {runner_instructions}.""" skill = InlineSkill(frontmatter=SkillFrontmatter(name="my-skill", description="test"), instructions="body")