Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Serialize ResponseFunctionToolCall output items as structured tool_call parts instead of stringified text
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from __future__ import annotations

import importlib
import json
import logging
from dataclasses import dataclass
from datetime import datetime, timezone
Expand Down Expand Up @@ -877,6 +878,60 @@ def _normalize_messages_to_role_parts(

return normalized

@staticmethod
def _is_tool_call_item(item: Any) -> bool:
"""Check if an output item represents a function tool call."""
return (
hasattr(item, "call_id")
and hasattr(item, "name")
and hasattr(item, "arguments")
and getattr(item, "type", None) == "function_call"
)

def _output_item_to_part(self, item: Any) -> dict[str, Any]:
"""Convert a single response output item to a normalized part dict.

Recognizes function tool call objects (e.g. ResponseFunctionToolCall)
and serializes them as tool_call parts per the GenAI semantic
conventions instead of falling back to str().
"""
if self._is_tool_call_item(item):
if not self.include_sensitive_data:
return {
"type": "tool_call",
"id": getattr(item, "call_id", None),
"name": getattr(item, "name", None),
"arguments": "readacted",
}
arguments = getattr(item, "arguments", None)
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except (TypeError, ValueError):
pass
return {
"type": "tool_call",
"id": getattr(item, "call_id", None),
"name": getattr(item, "name", None),
"arguments": arguments,
}

txt = getattr(item, "content", None)
if isinstance(txt, str) and txt:
return {
"type": "text",
"content": (
"readacted" if not self.include_sensitive_data else txt
),
}

return {
"type": "text",
"content": (
"readacted" if not self.include_sensitive_data else str(item)
),
}

def _normalize_output_messages_to_role_parts(
self, span_data: Any
) -> list[dict[str, Any]]:
Expand Down Expand Up @@ -909,35 +964,18 @@ def _normalize_output_messages_to_role_parts(
output = getattr(response, "output", None)
if isinstance(output, Sequence):
for item in output:
# ResponseOutputMessage may have a string representation
txt = getattr(item, "content", None)
if isinstance(txt, str) and txt:
parts.append(
{
"type": "text",
"content": (
"readacted"
if not self.include_sensitive_data
else txt
),
}
)
else:
# Fallback: stringified
parts.append(
{
"type": "text",
"content": (
"readacted"
if not self.include_sensitive_data
else str(item)
),
}
)
part = self._output_item_to_part(item)
parts.append(part)
# Capture finish_reason from parts when present
fr = getattr(item, "finish_reason", None)
if isinstance(fr, str) and not finish_reason:
finish_reason = fr
if not finish_reason:
if self._is_tool_call_item(item):
status = getattr(item, "status", None)
if status in {"completed", "incomplete"}:
finish_reason = "tool_calls"
Comment on lines +970 to +974
else:
fr = getattr(item, "finish_reason", None)
if isinstance(fr, str):
finish_reason = fr

# Generation span: use span_data.output
if not parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -559,3 +559,154 @@ def __init__(self) -> None:
finally:
instrumentor.uninstrument()
exporter.clear()


def test_response_span_tool_call_output_serialized_as_tool_call():
"""ResponseFunctionToolCall objects in response.output are serialized as
tool_call parts with id, name, and arguments -- not stringified as text."""
instrumentor, exporter = _instrument_with_provider()

class _Usage:
def __init__(self, input_tokens: int, output_tokens: int) -> None:
self.input_tokens = input_tokens
self.output_tokens = output_tokens

class _ToolCall:
def __init__(self) -> None:
self.arguments = '{"city": "Barcelona"}'
self.call_id = "call_abc123"
self.name = "get_weather"
self.type = "function_call"
self.id = "fc_def456"
self.status = "completed"

class _Response:
def __init__(self) -> None:
self.id = "resp-tool"
self.instructions = None
self.model = "gpt-4o-mini"
self.usage = _Usage(10, 5)
self.tools = []
self.output = [_ToolCall()]
self.output_text = None

try:
with trace("workflow"):
with response_span(response=_Response()):
pass

spans = exporter.get_finished_spans()
chat_span = next(
span
for span in spans
if span.attributes.get(GenAI.GEN_AI_OPERATION_NAME)
== GenAI.GenAiOperationNameValues.CHAT.value
)

output_messages = json.loads(
chat_span.attributes[GEN_AI_OUTPUT_MESSAGES]
)
assert len(output_messages) == 1
msg = output_messages[0]
assert msg["role"] == "assistant"
assert msg.get("finish_reason") == "tool_calls"

parts = msg["parts"]
assert len(parts) == 1
part = parts[0]
assert part["type"] == "tool_call"
assert part["id"] == "call_abc123"
assert part["name"] == "get_weather"
assert part["arguments"] == {"city": "Barcelona"}
finally:
instrumentor.uninstrument()
exporter.clear()


def test_response_span_mixed_output_text_and_tool_call():
"""When response.output contains both message and tool call items,
both are serialized correctly."""
instrumentor, exporter = _instrument_with_provider()

class _Usage:
def __init__(self) -> None:
self.input_tokens = 20
self.output_tokens = 15

class _OutputMessage:
def __init__(self) -> None:
self.content = "Let me check the weather for you."
self.finish_reason = None
self.status = "completed"

class _ToolCall:
def __init__(self) -> None:
self.arguments = '{"city": "Paris"}'
self.call_id = "call_xyz"
self.name = "get_weather"
self.type = "function_call"
self.id = "fc_789"
self.status = "completed"

class _Response:
def __init__(self) -> None:
self.id = "resp-mixed"
self.instructions = None
self.model = "gpt-4o-mini"
self.usage = _Usage()
self.tools = []
self.output = [_OutputMessage(), _ToolCall()]
self.output_text = None

try:
with trace("workflow"):
with response_span(response=_Response()):
pass

spans = exporter.get_finished_spans()
chat_span = next(
span
for span in spans
if span.attributes.get(GenAI.GEN_AI_OPERATION_NAME)
== GenAI.GenAiOperationNameValues.CHAT.value
)

output_messages = json.loads(
chat_span.attributes[GEN_AI_OUTPUT_MESSAGES]
)
assert len(output_messages) == 1
parts = output_messages[0]["parts"]
assert len(parts) == 2

assert parts[0]["type"] == "text"
assert parts[0]["content"] == "Let me check the weather for you."

assert parts[1]["type"] == "tool_call"
assert parts[1]["id"] == "call_xyz"
assert parts[1]["name"] == "get_weather"
assert parts[1]["arguments"] == {"city": "Paris"}
finally:
instrumentor.uninstrument()
exporter.clear()


def test_response_span_tool_call_redacted_when_sensitive_disabled():
"""Tool call arguments are redacted when sensitive data capture is off."""
processor = GenAISemanticProcessor(
include_sensitive_data=False, metrics_enabled=False
)

class _ToolCall:
def __init__(self) -> None:
self.arguments = '{"secret": "value"}'
self.call_id = "call_redact"
self.name = "secret_tool"
self.type = "function_call"
self.id = "fc_redact"
self.status = "completed"

part = processor._output_item_to_part(_ToolCall())
assert part["type"] == "tool_call"
assert part["id"] == "call_redact"
assert part["name"] == "secret_tool"
assert part["arguments"] == "readacted"