From 27f6db974d15603963f28bd30766a0913f55dd46 Mon Sep 17 00:00:00 2001 From: Jonathan Wrede Date: Sat, 30 May 2026 12:42:00 +0000 Subject: [PATCH 1/2] Serialize tool call output items as structured parts ResponseFunctionToolCall objects in response.output were being stringified as text parts instead of serialized as structured tool_call parts with id, name, and arguments fields. This adds duck-type detection for tool call items and proper serialization following the GenAI semantic conventions. Assisted-by: Claude Opus 4.6 --- .../.changelog/0.fixed | 1 + .../genai/openai_agents/span_processor.py | 94 +++++++---- .../tests/test_tracer.py | 151 ++++++++++++++++++ 3 files changed, 218 insertions(+), 28 deletions(-) create mode 100644 instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed new file mode 100644 index 00000000..20a2cbc2 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed @@ -0,0 +1 @@ +Serialize ResponseFunctionToolCall output items as structured tool_call parts instead of stringified text diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py index f78552f1..bb960362 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py @@ -20,6 +20,7 @@ from __future__ import annotations import importlib +import json import logging from dataclasses import dataclass from datetime import datetime, timezone @@ -877,6 +878,60 @@ def _normalize_messages_to_role_parts( return normalized + @staticmethod + def _is_tool_call_item(item: Any) -> bool: + """Check if an output item represents a function tool call.""" + return ( + hasattr(item, "call_id") + and hasattr(item, "name") + and hasattr(item, "arguments") + and getattr(item, "type", None) == "function_call" + ) + + def _output_item_to_part(self, item: Any) -> dict[str, Any]: + """Convert a single response output item to a normalized part dict. + + Recognizes function tool call objects (e.g. ResponseFunctionToolCall) + and serializes them as tool_call parts per the GenAI semantic + conventions instead of falling back to str(). + """ + if self._is_tool_call_item(item): + if not self.include_sensitive_data: + return { + "type": "tool_call", + "id": getattr(item, "call_id", None), + "name": getattr(item, "name", None), + "arguments": "readacted", + } + arguments = getattr(item, "arguments", None) + if isinstance(arguments, str): + try: + arguments = json.loads(arguments) + except (TypeError, ValueError): + pass + return { + "type": "tool_call", + "id": getattr(item, "call_id", None), + "name": getattr(item, "name", None), + "arguments": arguments, + } + + txt = getattr(item, "content", None) + if isinstance(txt, str) and txt: + return { + "type": "text", + "content": ( + "readacted" if not self.include_sensitive_data else txt + ), + } + + return { + "type": "text", + "content": ( + "readacted" if not self.include_sensitive_data else str(item) + ), + } + def _normalize_output_messages_to_role_parts( self, span_data: Any ) -> list[dict[str, Any]]: @@ -909,35 +964,18 @@ def _normalize_output_messages_to_role_parts( output = getattr(response, "output", None) if isinstance(output, Sequence): for item in output: - # ResponseOutputMessage may have a string representation - txt = getattr(item, "content", None) - if isinstance(txt, str) and txt: - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else txt - ), - } - ) - else: - # Fallback: stringified - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else str(item) - ), - } - ) + part = self._output_item_to_part(item) + parts.append(part) # Capture finish_reason from parts when present - fr = getattr(item, "finish_reason", None) - if isinstance(fr, str) and not finish_reason: - finish_reason = fr + if not finish_reason: + if self._is_tool_call_item(item): + status = getattr(item, "status", None) + if status in {"completed", "incomplete"}: + finish_reason = "tool_calls" + else: + fr = getattr(item, "finish_reason", None) + if isinstance(fr, str): + finish_reason = fr # Generation span: use span_data.output if not parts: diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py index 4d83d1c8..4b6e24cf 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py @@ -559,3 +559,154 @@ def __init__(self) -> None: finally: instrumentor.uninstrument() exporter.clear() + + +def test_response_span_tool_call_output_serialized_as_tool_call(): + """ResponseFunctionToolCall objects in response.output are serialized as + tool_call parts with id, name, and arguments -- not stringified as text.""" + instrumentor, exporter = _instrument_with_provider() + + class _Usage: + def __init__(self, input_tokens: int, output_tokens: int) -> None: + self.input_tokens = input_tokens + self.output_tokens = output_tokens + + class _ToolCall: + def __init__(self) -> None: + self.arguments = '{"city": "Barcelona"}' + self.call_id = "call_abc123" + self.name = "get_weather" + self.type = "function_call" + self.id = "fc_def456" + self.status = "completed" + + class _Response: + def __init__(self) -> None: + self.id = "resp-tool" + self.instructions = None + self.model = "gpt-4o-mini" + self.usage = _Usage(10, 5) + self.tools = [] + self.output = [_ToolCall()] + self.output_text = None + + try: + with trace("workflow"): + with response_span(response=_Response()): + pass + + spans = exporter.get_finished_spans() + chat_span = next( + span + for span in spans + if span.attributes.get(GenAI.GEN_AI_OPERATION_NAME) + == GenAI.GenAiOperationNameValues.CHAT.value + ) + + output_messages = json.loads( + chat_span.attributes[GEN_AI_OUTPUT_MESSAGES] + ) + assert len(output_messages) == 1 + msg = output_messages[0] + assert msg["role"] == "assistant" + assert msg.get("finish_reason") == "tool_calls" + + parts = msg["parts"] + assert len(parts) == 1 + part = parts[0] + assert part["type"] == "tool_call" + assert part["id"] == "call_abc123" + assert part["name"] == "get_weather" + assert part["arguments"] == {"city": "Barcelona"} + finally: + instrumentor.uninstrument() + exporter.clear() + + +def test_response_span_mixed_output_text_and_tool_call(): + """When response.output contains both message and tool call items, + both are serialized correctly.""" + instrumentor, exporter = _instrument_with_provider() + + class _Usage: + def __init__(self) -> None: + self.input_tokens = 20 + self.output_tokens = 15 + + class _OutputMessage: + def __init__(self) -> None: + self.content = "Let me check the weather for you." + self.finish_reason = None + self.status = "completed" + + class _ToolCall: + def __init__(self) -> None: + self.arguments = '{"city": "Paris"}' + self.call_id = "call_xyz" + self.name = "get_weather" + self.type = "function_call" + self.id = "fc_789" + self.status = "completed" + + class _Response: + def __init__(self) -> None: + self.id = "resp-mixed" + self.instructions = None + self.model = "gpt-4o-mini" + self.usage = _Usage() + self.tools = [] + self.output = [_OutputMessage(), _ToolCall()] + self.output_text = None + + try: + with trace("workflow"): + with response_span(response=_Response()): + pass + + spans = exporter.get_finished_spans() + chat_span = next( + span + for span in spans + if span.attributes.get(GenAI.GEN_AI_OPERATION_NAME) + == GenAI.GenAiOperationNameValues.CHAT.value + ) + + output_messages = json.loads( + chat_span.attributes[GEN_AI_OUTPUT_MESSAGES] + ) + assert len(output_messages) == 1 + parts = output_messages[0]["parts"] + assert len(parts) == 2 + + assert parts[0]["type"] == "text" + assert parts[0]["content"] == "Let me check the weather for you." + + assert parts[1]["type"] == "tool_call" + assert parts[1]["id"] == "call_xyz" + assert parts[1]["name"] == "get_weather" + assert parts[1]["arguments"] == {"city": "Paris"} + finally: + instrumentor.uninstrument() + exporter.clear() + + +def test_response_span_tool_call_redacted_when_sensitive_disabled(): + """Tool call arguments are redacted when sensitive data capture is off.""" + processor = GenAISemanticProcessor( + include_sensitive_data=False, metrics_enabled=False + ) + + class _ToolCall: + def __init__(self) -> None: + self.arguments = '{"secret": "value"}' + self.call_id = "call_redact" + self.name = "secret_tool" + self.type = "function_call" + self.id = "fc_redact" + self.status = "completed" + + part = processor._output_item_to_part(_ToolCall()) + assert part["type"] == "tool_call" + assert part["id"] == "call_redact" + assert part["name"] == "secret_tool" + assert part["arguments"] == "readacted" From 22d5b0896597f94853e6d691b1223635f8915da8 Mon Sep 17 00:00:00 2001 From: Jonathan Wrede Date: Sat, 30 May 2026 12:45:10 +0000 Subject: [PATCH 2/2] chore: rename changelog fragment to match PR number Assisted-by: Claude Opus 4.6 --- .../.changelog/{0.fixed => 101.fixed} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/{0.fixed => 101.fixed} (100%) diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/101.fixed similarity index 100% rename from instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/0.fixed rename to instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/101.fixed