diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/CHANGELOG.md index 95f69d6ded..550c74b9d0 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +- Align AgentSpanData test stubs and span processor with real OpenAI Agents SDK; + remove non-existent `operation`, `description`, `agent_id`, and `model` fields. + ([#4229](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4229)) - Document official package metadata and README for the OpenAI Agents instrumentation. ([#3859](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3859)) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/src/opentelemetry/instrumentation/openai_agents/span_processor.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/src/opentelemetry/instrumentation/openai_agents/span_processor.py index d1dce8ec5e..629e7ba54c 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/src/opentelemetry/instrumentation/openai_agents/span_processor.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/src/opentelemetry/instrumentation/openai_agents/span_processor.py @@ -1519,17 +1519,8 @@ def _get_operation_name(self, span_data: Any) -> str: return GenAIOperationName.CHAT return GenAIOperationName.TEXT_COMPLETION if _is_instance_of(span_data, AgentSpanData): - # Could be create_agent or invoke_agent based on context - operation = getattr(span_data, "operation", None) - normalized = ( - operation.strip().lower() - if isinstance(operation, str) - else None - ) - if normalized in {"create", "create_agent"}: - return GenAIOperationName.CREATE_AGENT - if normalized in {"invoke", "invoke_agent"}: - return GenAIOperationName.INVOKE_AGENT + # The real SDK AgentSpanData has no "operation" field; + # agent spans always represent invoke_agent. return GenAIOperationName.INVOKE_AGENT if _is_instance_of(span_data, FunctionSpanData): return GenAIOperationName.EXECUTE_TOOL @@ -1831,24 +1822,20 @@ def _get_attributes_from_agent_span_data( if name: yield GEN_AI_AGENT_NAME, name - agent_id = ( - self.agent_id - or getattr(span_data, "agent_id", None) - or self._agent_id_default - ) + # agent_id and description are not available on the real SDK + # AgentSpanData; only use user-configured overrides. + agent_id = self.agent_id or self._agent_id_default if agent_id: yield GEN_AI_AGENT_ID, agent_id - description = ( - self.agent_description - or getattr(span_data, "description", None) - or self._agent_description_default - ) + description = self.agent_description or self._agent_description_default if description: yield GEN_AI_AGENT_DESCRIPTION, description - model = getattr(span_data, "model", None) - if not model and agent_content: + # The real SDK AgentSpanData has no "model" field; fall back to + # the model aggregated from child generation/response spans. + model = None + if agent_content: model = agent_content.get("request_model") if model: yield GEN_AI_REQUEST_MODEL, model diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/stubs/agents/tracing/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/stubs/agents/tracing/__init__.py index 4ed06c8977..509fd537b3 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/stubs/agents/tracing/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/stubs/agents/tracing/__init__.py @@ -35,12 +35,9 @@ @dataclass class AgentSpanData: name: str | None = None + handoffs: list[str] | None = None tools: list[str] | None = None output_type: str | None = None - description: str | None = None - agent_id: str | None = None - model: str | None = None - operation: str | None = None @property def type(self) -> str: @@ -200,8 +197,16 @@ def generation_span(**kwargs: Any): @contextmanager -def agent_span(**kwargs: Any): - data = AgentSpanData(**kwargs) +def agent_span( + name: str, + handoffs: list[str] | None = None, + tools: list[str] | None = None, + output_type: str | None = None, + **kwargs: Any, +): + data = AgentSpanData( + name=name, handoffs=handoffs, tools=tools, output_type=output_type + ) span = _PROVIDER.create_span(data, parent=_CURRENT_TRACE) span.start() try: diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_tracer.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_tracer.py index 1f21ab25c0..a2e8143b0d 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_tracer.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_tracer.py @@ -171,40 +171,31 @@ def test_function_span_records_tool_attributes(): exporter.clear() -def test_agent_create_span_records_attributes(): +def test_agent_invoke_span_records_attributes(): instrumentor, exporter = _instrument_with_provider() try: with trace("workflow"): with agent_span( - operation="create", name="support_bot", - description="Answers support questions", - agent_id="agt_123", - model="gpt-4o-mini", + handoffs=["escalation_bot"], + tools=["search"], + output_type="str", ): pass spans = exporter.get_finished_spans() - create_span = next( + invoke_span = next( span for span in spans if span.attributes[GenAI.GEN_AI_OPERATION_NAME] - == GenAI.GenAiOperationNameValues.CREATE_AGENT.value + == GenAI.GenAiOperationNameValues.INVOKE_AGENT.value ) - assert create_span.kind is SpanKind.CLIENT - assert create_span.name == "create_agent support_bot" - assert create_span.attributes[GEN_AI_PROVIDER_NAME] == "openai" - assert create_span.attributes[GenAI.GEN_AI_AGENT_NAME] == "support_bot" - assert ( - create_span.attributes[GenAI.GEN_AI_AGENT_DESCRIPTION] - == "Answers support questions" - ) - assert create_span.attributes[GenAI.GEN_AI_AGENT_ID] == "agt_123" - assert ( - create_span.attributes[GenAI.GEN_AI_REQUEST_MODEL] == "gpt-4o-mini" - ) + assert invoke_span.kind is SpanKind.CLIENT + assert invoke_span.name == "invoke_agent support_bot" + assert invoke_span.attributes[GEN_AI_PROVIDER_NAME] == "openai" + assert invoke_span.attributes[GenAI.GEN_AI_AGENT_NAME] == "support_bot" finally: instrumentor.uninstrument() exporter.clear() @@ -425,7 +416,7 @@ def test_agent_name_override_applied_to_agent_spans(): try: with trace("workflow"): - with agent_span(operation="invoke", name="support_bot"): + with agent_span(name="support_bot"): pass spans = exporter.get_finished_spans() diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_z_span_processor_unit.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_z_span_processor_unit.py index b2c8c7c8f3..49603a52eb 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_z_span_processor_unit.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_z_span_processor_unit.py @@ -156,19 +156,14 @@ def test_operation_and_span_naming(processor_setup): == sp.GenAIOperationName.EMBEDDINGS ) - agent_create = AgentSpanData(operation=" CREATE ") + # AgentSpanData always maps to invoke_agent (no operation field in real SDK) + agent_data = AgentSpanData(name="bot") assert ( - processor._get_operation_name(agent_create) - == sp.GenAIOperationName.CREATE_AGENT - ) - - agent_invoke = AgentSpanData(operation="invoke_agent") - assert ( - processor._get_operation_name(agent_invoke) + processor._get_operation_name(agent_data) == sp.GenAIOperationName.INVOKE_AGENT ) - agent_default = AgentSpanData(operation=None) + agent_default = AgentSpanData() assert ( processor._get_operation_name(agent_default) == sp.GenAIOperationName.INVOKE_AGENT @@ -315,26 +310,19 @@ def __init__(self) -> None: agent_span = AgentSpanData( name="helper", output_type="json", - description="desc", - agent_id="agent-123", - model="model-x", - operation="invoke_agent", ) agent_attrs = _collect( processor._get_attributes_from_agent_span_data(agent_span, None) ) assert agent_attrs[sp.GEN_AI_AGENT_NAME] == "helper" - assert agent_attrs[sp.GEN_AI_AGENT_ID] == "agent-123" - assert agent_attrs[sp.GEN_AI_REQUEST_MODEL] == "model-x" + assert sp.GEN_AI_AGENT_ID not in agent_attrs + assert sp.GEN_AI_REQUEST_MODEL not in agent_attrs assert agent_attrs[sp.GEN_AI_OUTPUT_TYPE] == sp.GenAIOutputType.TEXT # Fallback to aggregated model when span data lacks it agent_span_no_model = AgentSpanData( name="helper-2", output_type="json", - description="desc", - agent_id="agent-456", - operation="invoke_agent", ) agent_content = { "input_messages": [], @@ -435,9 +423,7 @@ def test_span_lifecycle_and_shutdown(processor_setup): parent_span = FakeSpan( trace_id="trace-1", span_id="span-1", - span_data=AgentSpanData( - operation="invoke", name="agent", model="gpt-4o" - ), + span_data=AgentSpanData(name="agent"), started_at="2024-01-01T00:00:00Z", ended_at="2024-01-01T00:00:02Z", ) @@ -476,7 +462,7 @@ def test_span_lifecycle_and_shutdown(processor_setup): linger_span = FakeSpan( trace_id="trace-2", span_id="span-3", - span_data=AgentSpanData(operation=None), + span_data=AgentSpanData(), started_at="2024-01-01T00:00:06Z", ) processor.on_span_start(linger_span) @@ -518,7 +504,6 @@ def test_chat_span_renamed_with_model(processor_setup): trace_id=trace.trace_id, span_id="agent-span", span_data=AgentSpanData( - operation="invoke_agent", name="Agent", ), started_at="2025-01-01T00:00:00Z",