From 27f6db974d15603963f28bd30766a0913f55dd46 Mon Sep 17 00:00:00 2001
From: Jonathan Wrede <wrede.jonathan00@gmail.com>
Date: Sat, 30 May 2026 12:42:00 +0000
Subject: [PATCH 1/2] Serialize tool call output items as structured parts

ResponseFunctionToolCall objects in response.output were being
stringified as text parts instead of serialized as structured
tool_call parts with id, name, and arguments fields. This adds
duck-type detection for tool call items and proper serialization
following the GenAI semantic conventions.

Assisted-by: Claude Opus 4.6
---
 .../.changelog/0.fixed                        |   1 +
 .../genai/openai_agents/span_processor.py     |  94 +++++++----
 .../tests/test_tracer.py                      | 151 ++++++++++++++++++
 3 files changed, 218 insertions(+), 28 deletions(-)
 create mode 100644 instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed

diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed
new file mode 100644
index 00000000..20a2cbc2
--- /dev/null
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed
@@ -0,0 +1 @@
+Serialize ResponseFunctionToolCall output items as structured tool_call parts instead of stringified text
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py
index f78552f1..bb960362 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py
@@ -20,6 +20,7 @@
 from __future__ import annotations
 
 import importlib
+import json
 import logging
 from dataclasses import dataclass
 from datetime import datetime, timezone
@@ -877,6 +878,60 @@ def _normalize_messages_to_role_parts(
 
         return normalized
 
+    @staticmethod
+    def _is_tool_call_item(item: Any) -> bool:
+        """Check if an output item represents a function tool call."""
+        return (
+            hasattr(item, "call_id")
+            and hasattr(item, "name")
+            and hasattr(item, "arguments")
+            and getattr(item, "type", None) == "function_call"
+        )
+
+    def _output_item_to_part(self, item: Any) -> dict[str, Any]:
+        """Convert a single response output item to a normalized part dict.
+
+        Recognizes function tool call objects (e.g. ResponseFunctionToolCall)
+        and serializes them as tool_call parts per the GenAI semantic
+        conventions instead of falling back to str().
+        """
+        if self._is_tool_call_item(item):
+            if not self.include_sensitive_data:
+                return {
+                    "type": "tool_call",
+                    "id": getattr(item, "call_id", None),
+                    "name": getattr(item, "name", None),
+                    "arguments": "readacted",
+                }
+            arguments = getattr(item, "arguments", None)
+            if isinstance(arguments, str):
+                try:
+                    arguments = json.loads(arguments)
+                except (TypeError, ValueError):
+                    pass
+            return {
+                "type": "tool_call",
+                "id": getattr(item, "call_id", None),
+                "name": getattr(item, "name", None),
+                "arguments": arguments,
+            }
+
+        txt = getattr(item, "content", None)
+        if isinstance(txt, str) and txt:
+            return {
+                "type": "text",
+                "content": (
+                    "readacted" if not self.include_sensitive_data else txt
+                ),
+            }
+
+        return {
+            "type": "text",
+            "content": (
+                "readacted" if not self.include_sensitive_data else str(item)
+            ),
+        }
+
     def _normalize_output_messages_to_role_parts(
         self, span_data: Any
     ) -> list[dict[str, Any]]:
@@ -909,35 +964,18 @@ def _normalize_output_messages_to_role_parts(
                 output = getattr(response, "output", None)
                 if isinstance(output, Sequence):
                     for item in output:
-                        # ResponseOutputMessage may have a string representation
-                        txt = getattr(item, "content", None)
-                        if isinstance(txt, str) and txt:
-                            parts.append(
-                                {
-                                    "type": "text",
-                                    "content": (
-                                        "readacted"
-                                        if not self.include_sensitive_data
-                                        else txt
-                                    ),
-                                }
-                            )
-                        else:
-                            # Fallback: stringified
-                            parts.append(
-                                {
-                                    "type": "text",
-                                    "content": (
-                                        "readacted"
-                                        if not self.include_sensitive_data
-                                        else str(item)
-                                    ),
-                                }
-                            )
+                        part = self._output_item_to_part(item)
+                        parts.append(part)
                         # Capture finish_reason from parts when present
-                        fr = getattr(item, "finish_reason", None)
-                        if isinstance(fr, str) and not finish_reason:
-                            finish_reason = fr
+                        if not finish_reason:
+                            if self._is_tool_call_item(item):
+                                status = getattr(item, "status", None)
+                                if status in {"completed", "incomplete"}:
+                                    finish_reason = "tool_calls"
+                            else:
+                                fr = getattr(item, "finish_reason", None)
+                                if isinstance(fr, str):
+                                    finish_reason = fr
 
         # Generation span: use span_data.output
         if not parts:
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py
index 4d83d1c8..4b6e24cf 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py
@@ -559,3 +559,154 @@ def __init__(self) -> None:
     finally:
         instrumentor.uninstrument()
         exporter.clear()
+
+
+def test_response_span_tool_call_output_serialized_as_tool_call():
+    """ResponseFunctionToolCall objects in response.output are serialized as
+    tool_call parts with id, name, and arguments -- not stringified as text."""
+    instrumentor, exporter = _instrument_with_provider()
+
+    class _Usage:
+        def __init__(self, input_tokens: int, output_tokens: int) -> None:
+            self.input_tokens = input_tokens
+            self.output_tokens = output_tokens
+
+    class _ToolCall:
+        def __init__(self) -> None:
+            self.arguments = '{"city": "Barcelona"}'
+            self.call_id = "call_abc123"
+            self.name = "get_weather"
+            self.type = "function_call"
+            self.id = "fc_def456"
+            self.status = "completed"
+
+    class _Response:
+        def __init__(self) -> None:
+            self.id = "resp-tool"
+            self.instructions = None
+            self.model = "gpt-4o-mini"
+            self.usage = _Usage(10, 5)
+            self.tools = []
+            self.output = [_ToolCall()]
+            self.output_text = None
+
+    try:
+        with trace("workflow"):
+            with response_span(response=_Response()):
+                pass
+
+        spans = exporter.get_finished_spans()
+        chat_span = next(
+            span
+            for span in spans
+            if span.attributes.get(GenAI.GEN_AI_OPERATION_NAME)
+            == GenAI.GenAiOperationNameValues.CHAT.value
+        )
+
+        output_messages = json.loads(
+            chat_span.attributes[GEN_AI_OUTPUT_MESSAGES]
+        )
+        assert len(output_messages) == 1
+        msg = output_messages[0]
+        assert msg["role"] == "assistant"
+        assert msg.get("finish_reason") == "tool_calls"
+
+        parts = msg["parts"]
+        assert len(parts) == 1
+        part = parts[0]
+        assert part["type"] == "tool_call"
+        assert part["id"] == "call_abc123"
+        assert part["name"] == "get_weather"
+        assert part["arguments"] == {"city": "Barcelona"}
+    finally:
+        instrumentor.uninstrument()
+        exporter.clear()
+
+
+def test_response_span_mixed_output_text_and_tool_call():
+    """When response.output contains both message and tool call items,
+    both are serialized correctly."""
+    instrumentor, exporter = _instrument_with_provider()
+
+    class _Usage:
+        def __init__(self) -> None:
+            self.input_tokens = 20
+            self.output_tokens = 15
+
+    class _OutputMessage:
+        def __init__(self) -> None:
+            self.content = "Let me check the weather for you."
+            self.finish_reason = None
+            self.status = "completed"
+
+    class _ToolCall:
+        def __init__(self) -> None:
+            self.arguments = '{"city": "Paris"}'
+            self.call_id = "call_xyz"
+            self.name = "get_weather"
+            self.type = "function_call"
+            self.id = "fc_789"
+            self.status = "completed"
+
+    class _Response:
+        def __init__(self) -> None:
+            self.id = "resp-mixed"
+            self.instructions = None
+            self.model = "gpt-4o-mini"
+            self.usage = _Usage()
+            self.tools = []
+            self.output = [_OutputMessage(), _ToolCall()]
+            self.output_text = None
+
+    try:
+        with trace("workflow"):
+            with response_span(response=_Response()):
+                pass
+
+        spans = exporter.get_finished_spans()
+        chat_span = next(
+            span
+            for span in spans
+            if span.attributes.get(GenAI.GEN_AI_OPERATION_NAME)
+            == GenAI.GenAiOperationNameValues.CHAT.value
+        )
+
+        output_messages = json.loads(
+            chat_span.attributes[GEN_AI_OUTPUT_MESSAGES]
+        )
+        assert len(output_messages) == 1
+        parts = output_messages[0]["parts"]
+        assert len(parts) == 2
+
+        assert parts[0]["type"] == "text"
+        assert parts[0]["content"] == "Let me check the weather for you."
+
+        assert parts[1]["type"] == "tool_call"
+        assert parts[1]["id"] == "call_xyz"
+        assert parts[1]["name"] == "get_weather"
+        assert parts[1]["arguments"] == {"city": "Paris"}
+    finally:
+        instrumentor.uninstrument()
+        exporter.clear()
+
+
+def test_response_span_tool_call_redacted_when_sensitive_disabled():
+    """Tool call arguments are redacted when sensitive data capture is off."""
+    processor = GenAISemanticProcessor(
+        include_sensitive_data=False, metrics_enabled=False
+    )
+
+    class _ToolCall:
+        def __init__(self) -> None:
+            self.arguments = '{"secret": "value"}'
+            self.call_id = "call_redact"
+            self.name = "secret_tool"
+            self.type = "function_call"
+            self.id = "fc_redact"
+            self.status = "completed"
+
+    part = processor._output_item_to_part(_ToolCall())
+    assert part["type"] == "tool_call"
+    assert part["id"] == "call_redact"
+    assert part["name"] == "secret_tool"
+    assert part["arguments"] == "readacted"

From 22d5b0896597f94853e6d691b1223635f8915da8 Mon Sep 17 00:00:00 2001
From: Jonathan Wrede <wrede.jonathan00@gmail.com>
Date: Sat, 30 May 2026 12:45:10 +0000
Subject: [PATCH 2/2] chore: rename changelog fragment to match PR number

Assisted-by: Claude Opus 4.6
---
 .../.changelog/{0.fixed => 101.fixed}                             | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/{0.fixed => 101.fixed} (100%)

diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/101.fixed
similarity index 100%
rename from instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed
rename to instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/101.fixed