diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/90.changed b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/90.changed new file mode 100644 index 00000000..7e88259b --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/.changelog/90.changed @@ -0,0 +1,4 @@ +Switch instrumentation to use util-genai instead of hand-rolled signals. +Stop capturing chat, embeddings, speech, and transcription spans — those are covered by the OpenAI instrumentation. +Remove handoff and guardrail spans (not yet defined in semantic convention and not implemented +by genai-util). diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/manual/main.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/manual/main.py index 5aae8b11..b510528a 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/manual/main.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/manual/main.py @@ -6,28 +6,69 @@ from __future__ import annotations -from agents import Agent, Runner, function_tool +from agents import ( + Agent, + RunConfig, + Runner, + function_tool, +) from dotenv import load_dotenv from opentelemetry import trace +from opentelemetry._logs import set_logger_provider +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import ( + OTLPLogExporter, +) +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter, +) from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( OTLPSpanExporter, ) +from opentelemetry.instrumentation.genai.openai import OpenAIInstrumentor from opentelemetry.instrumentation.genai.openai_agents import ( OpenAIAgentsInstrumentor, ) +from opentelemetry.metrics import set_meter_provider +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor -def configure_otel() -> None: - """Configure the OpenTelemetry SDK for exporting spans.""" +def configure_otel() -> tuple[TracerProvider, MeterProvider, LoggerProvider]: + """Configure OpenTelemetry providers and install the instrumentor.""" - provider = TracerProvider() - provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) - trace.set_tracer_provider(provider) + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) + trace.set_tracer_provider(tracer_provider) - OpenAIAgentsInstrumentor().instrument(tracer_provider=provider) + meter_provider = MeterProvider( + metric_readers=[ + PeriodicExportingMetricReader(OTLPMetricExporter()), + ], + ) + set_meter_provider(meter_provider) + + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor( + BatchLogRecordProcessor(OTLPLogExporter()) + ) + set_logger_provider(logger_provider) + + OpenAIInstrumentor().instrument( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + OpenAIAgentsInstrumentor().instrument( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + return tracer_provider, meter_provider, logger_provider @function_tool @@ -39,7 +80,7 @@ def get_weather(city: str) -> str: def main() -> None: load_dotenv() - configure_otel() + tracer_provider, meter_provider, logger_provider = configure_otel() weather_specialist = Agent( name="weather_specialist", instructions=( @@ -60,13 +101,22 @@ def main() -> None: model="gpt-4o-mini", ) - result = Runner.run_sync( - triage_agent, - "I'm visiting Barcelona this weekend. How should I pack?", - ) - - print("Agent response:") - print(result.final_output) + try: + # ``RunConfig.workflow_name`` is the agents library's own knob for + # naming the workflow. The instrumentation reads it and emits the + # value as the ``gen_ai.workflow.name`` attribute on the workflow + # span — without it, the default "Agent workflow" is used. + result = Runner.run_sync( + triage_agent, + "I'm visiting Barcelona this weekend. How should I pack?", + run_config=RunConfig(workflow_name="weather-triage"), + ) + print("Agent response:") + print(result.final_output) + finally: + tracer_provider.shutdown() + meter_provider.shutdown() + logger_provider.shutdown() if __name__ == "__main__": diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/manual/requirements.txt b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/manual/requirements.txt index 9cab84dc..cc1447ff 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/manual/requirements.txt +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/manual/requirements.txt @@ -3,4 +3,5 @@ python-dotenv~=1.0 opentelemetry-sdk~=1.42.0 opentelemetry-exporter-otlp-proto-grpc~=1.42.0 +-e ../../../opentelemetry-instrumentation-genai-openai -e ../.. \ No newline at end of file diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/__init__.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/__init__.py index 955c3f1a..b1b33e34 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/__init__.py @@ -1,213 +1,128 @@ # Copyright The OpenTelemetry Authors # SPDX-License-Identifier: Apache-2.0 -"""OpenAI Agents instrumentation for OpenTelemetry.""" +"""OpenAI Agents instrumentation for OpenTelemetry. -# pylint: disable=too-many-locals +Registers a :class:`GenAITracingProcessor` with the agents library's +public ``add_trace_processor`` extension API. The processor reacts +synchronously to the agents library's own ``Trace`` / ``AgentSpan`` / +``FunctionSpan`` start/end callbacks and turns them +into ``invoke_workflow`` / ``invoke_agent`` / ``execute_tool`` spans via +``opentelemetry-util-genai``. + +LLM-level spans (``chat`` / ``embeddings``) are produced +by ``opentelemetry-instrumentation-genai-openai`` when both packages are +installed; this instrumentation does not emit them. + +Usage +----- + +.. code:: python + + from opentelemetry.instrumentation.genai.openai_agents import ( + OpenAIAgentsInstrumentor, + ) + + # Default: keep the OpenAI native trace exporter; add our OTel emission. + OpenAIAgentsInstrumentor().instrument() + + # Replace the default exporter so traces are only sent via OTel. + OpenAIAgentsInstrumentor().instrument(disable_openai_trace_export=True) +""" from __future__ import annotations -import importlib import logging -import os from typing import Any, Collection +from agents.tracing import ( + add_trace_processor, + get_trace_provider, + set_trace_processors, +) + from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) -from opentelemetry.semconv.schemas import Schemas -from opentelemetry.trace import get_tracer +from opentelemetry.util.genai.handler import ( + TelemetryHandler, + get_telemetry_handler, +) from .package import _instruments -from .span_processor import ( - ContentCaptureMode, - GenAIEvaluationAttributes, - GenAIOperationName, - GenAIOutputType, - GenAIProvider, - GenAISemanticProcessor, - GenAIToolType, -) +from .processor import GenAITracingProcessor -__all__ = [ - "OpenAIAgentsInstrumentor", - "GenAIProvider", - "GenAIOperationName", - "GenAIToolType", - "GenAIOutputType", - "GenAIEvaluationAttributes", -] +__all__ = ["OpenAIAgentsInstrumentor"] logger = logging.getLogger(__name__) -_CONTENT_CAPTURE_ENV = "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT" -_SYSTEM_OVERRIDE_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_SYSTEM" -_CAPTURE_CONTENT_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_CAPTURE_CONTENT" -_CAPTURE_METRICS_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_CAPTURE_METRICS" - - -def _load_tracing_module(): # pragma: no cover - exercised via tests - return importlib.import_module("agents.tracing") - - -def _get_registered_processors(provider) -> list: - multi = getattr(provider, "_multi_processor", None) - processors = getattr(multi, "_processors", ()) - return list(processors) - - -def _resolve_system(value: str | None) -> str: - if not value: - return GenAI.GenAiSystemValues.OPENAI.value - - normalized = value.strip().lower() - for member in GenAI.GenAiSystemValues: - if normalized == member.value: - return member.value - if normalized == member.name.lower(): - return member.value - return value +class OpenAIAgentsInstrumentor(BaseInstrumentor): + """Instrument the openai-agents library. -def _resolve_content_mode(value: Any) -> ContentCaptureMode: - if isinstance(value, ContentCaptureMode): - return value - if isinstance(value, bool): - return ( - ContentCaptureMode.SPAN_AND_EVENT - if value - else ContentCaptureMode.NO_CONTENT - ) + Constructor takes no arguments. Configure behavior via ``instrument()``: - if value is None: - return ContentCaptureMode.SPAN_AND_EVENT - - text = str(value).strip().lower() - if not text: - return ContentCaptureMode.SPAN_AND_EVENT - - mapping = { - "span_only": ContentCaptureMode.SPAN_ONLY, - "span-only": ContentCaptureMode.SPAN_ONLY, - "span": ContentCaptureMode.SPAN_ONLY, - "event_only": ContentCaptureMode.EVENT_ONLY, - "event-only": ContentCaptureMode.EVENT_ONLY, - "event": ContentCaptureMode.EVENT_ONLY, - "span_and_event": ContentCaptureMode.SPAN_AND_EVENT, - "span-and-event": ContentCaptureMode.SPAN_AND_EVENT, - "span_and_events": ContentCaptureMode.SPAN_AND_EVENT, - "all": ContentCaptureMode.SPAN_AND_EVENT, - "true": ContentCaptureMode.SPAN_AND_EVENT, - "1": ContentCaptureMode.SPAN_AND_EVENT, - "yes": ContentCaptureMode.SPAN_AND_EVENT, - "no_content": ContentCaptureMode.NO_CONTENT, - "false": ContentCaptureMode.NO_CONTENT, - "0": ContentCaptureMode.NO_CONTENT, - "no": ContentCaptureMode.NO_CONTENT, - "none": ContentCaptureMode.NO_CONTENT, - } - - return mapping.get(text, ContentCaptureMode.SPAN_AND_EVENT) - - -def _resolve_bool(value: Any, default: bool) -> bool: - if value is None: - return default - if isinstance(value, bool): - return value - text = str(value).strip().lower() - if text in {"true", "1", "yes", "on"}: - return True - if text in {"false", "0", "no", "off"}: - return False - return default + ``disable_openai_trace_export`` (default ``False``) + When ``False`` (default), the agents library's built-in trace + exporter (which sends traces to OpenAI's hosted tracing backend + when ``OPENAI_API_KEY`` is set) remains active alongside our OTel + emission. - -class OpenAIAgentsInstrumentor(BaseInstrumentor): - """Instrumentation that bridges OpenAI Agents tracing to OpenTelemetry.""" + When ``True``, the default exporter is removed via + ``agents.tracing.set_trace_processors`` so traces flow only through + OpenTelemetry while this instrumentor is active. Previously registered + processors are restored on ``uninstrument()``. + """ def __init__(self) -> None: super().__init__() - self._processor: GenAISemanticProcessor | None = None + self._processor: GenAITracingProcessor | None = None + self._previous_processors: tuple[Any, ...] | None = None - def _instrument(self, **kwargs) -> None: + def instrumentation_dependencies(self) -> Collection[str]: + return _instruments + + def _instrument(self, **kwargs: Any) -> None: if self._processor is not None: return - tracer_provider = kwargs.get("tracer_provider") - tracer = get_tracer( - __name__, - "", - tracer_provider, - schema_url=Schemas.V1_28_0.value, - ) - - system_override = kwargs.get("system") or os.getenv( - _SYSTEM_OVERRIDE_ENV + handler: TelemetryHandler = get_telemetry_handler( + tracer_provider=kwargs.get("tracer_provider"), + meter_provider=kwargs.get("meter_provider"), + logger_provider=kwargs.get("logger_provider"), + completion_hook=kwargs.get("completion_hook"), ) - system = _resolve_system(system_override) - - content_override = kwargs.get("capture_message_content") - if content_override is None: - content_override = os.getenv(_CONTENT_CAPTURE_ENV) or os.getenv( - _CAPTURE_CONTENT_ENV + provider = GenAI.GenAiProviderNameValues.OPENAI.value + self._processor = GenAITracingProcessor(handler, provider) + + if kwargs.get("disable_openai_trace_export"): + trace_provider = get_trace_provider() + current = getattr( + getattr(trace_provider, "_multi_processor", None), + "_processors", + (), ) - content_mode = _resolve_content_mode(content_override) - - metrics_override = kwargs.get("capture_metrics") - if metrics_override is None: - metrics_override = os.getenv(_CAPTURE_METRICS_ENV) - metrics_enabled = _resolve_bool(metrics_override, default=True) - - agent_name = kwargs.get("agent_name") - agent_id = kwargs.get("agent_id") - agent_description = kwargs.get("agent_description") - base_url = kwargs.get("base_url") - server_address = kwargs.get("server_address") - server_port = kwargs.get("server_port") - - processor = GenAISemanticProcessor( - tracer=tracer, - system_name=system, - include_sensitive_data=content_mode - != ContentCaptureMode.NO_CONTENT, - content_mode=content_mode, - metrics_enabled=metrics_enabled, - agent_name=agent_name, - agent_id=agent_id, - agent_description=agent_description, - base_url=base_url, - server_address=server_address, - server_port=server_port, - agent_name_default="OpenAI Agent", - agent_id_default="agent", - agent_description_default="OpenAI Agents instrumentation", - base_url_default="https://api.openai.com", - server_address_default="api.openai.com", - server_port_default=443, - ) + self._previous_processors = tuple(current) + set_trace_processors([self._processor]) + else: + add_trace_processor(self._processor) - tracing = _load_tracing_module() - provider = tracing.get_trace_provider() - existing = _get_registered_processors(provider) - provider.set_processors([*existing, processor]) - self._processor = processor - - def _uninstrument(self, **kwargs) -> None: + def _uninstrument(self, **kwargs: Any) -> None: if self._processor is None: return - tracing = _load_tracing_module() - provider = tracing.get_trace_provider() - current = _get_registered_processors(provider) - filtered = [proc for proc in current if proc is not self._processor] - provider.set_processors(filtered) - + if self._previous_processors is not None: + set_trace_processors(list(self._previous_processors)) + else: + provider = get_trace_provider() + current = getattr( + getattr(provider, "_multi_processor", None), "_processors", () + ) + filtered = [p for p in current if p is not self._processor] + set_trace_processors(filtered) try: self._processor.shutdown() finally: self._processor = None - - def instrumentation_dependencies(self) -> Collection[str]: - return _instruments + self._previous_processors = None diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/processor.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/processor.py new file mode 100644 index 00000000..2e674186 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/processor.py @@ -0,0 +1,145 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Bridges agents-library tracing callbacks to opentelemetry-util-genai. + +The agents library exposes a public extension API +(:func:`agents.tracing.add_trace_processor`) for plugging custom +:class:`TracingProcessor` implementations into its own tracing system. +``Trace.start()`` / ``Span.start()`` invoke the registered processors' +``on_*_start`` callbacks *synchronously* on whichever asyncio task +started the agents-library span: + +* Workflow (``Trace``) and agent (``AgentSpanData``) spans start in the + ``Runner.run`` task itself. +* Function tool (``FunctionSpanData``) spans start inside the per-tool + ``asyncio.Task`` the agents library creates for tool dispatch. That + sub-task inherits a snapshot of the run-loop context (so workflow + + agent are already active in OTel contextvars). + +Because every ``*_end`` callback fires on the same task as its +matching ``*_start``, util-genai's auto-``attach()`` / ``detach()`` of +OTel context is balanced and no context tokens leak across tasks. +OTel's natural parent tracking nests the tree: + + workflow > invoke_agent > [chat from openai instrumentation, + execute_tool] + +LLM-level spans (``chat`` / ``responses`` / ``embeddings``) are not +emitted here — ``opentelemetry-instrumentation-genai-openai`` patches +the openai SDK directly and produces those. +""" + +from __future__ import annotations + +import weakref +from typing import Any + +from agents.tracing import Span, Trace, TracingProcessor +from agents.tracing.span_data import ( + AgentSpanData, + FunctionSpanData, +) + +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) +from opentelemetry.util.genai.handler import TelemetryHandler +from opentelemetry.util.genai.invocation import ( + GenAIInvocation, + ToolInvocation, +) + +# Non-semconv attribute: surfaces the workflow name on the workflow span +# so callers can query/filter by it. util-genai's WorkflowInvocation +# only puts the name in the span name, not as an attribute. +_WORKFLOW_NAME_ATTR = "gen_ai.workflow.name" + + +class GenAITracingProcessor(TracingProcessor): + """Translate agents-library tracing into util-genai invocations. + + Stateful only for span lifetime: each in-flight Trace/Span has one + entry in a :class:`weakref.WeakKeyDictionary` keyed by the + agents-library object itself. Entries are removed on ``*_end`` or + garbage-collected with the agents-library span/trace if the library + drops it before ``end`` (which it shouldn't, but the weak reference + is belt-and-suspenders against any future leak). + """ + + def __init__(self, handler: TelemetryHandler, provider: str) -> None: + self._handler = handler + self._provider = provider + self._invocations: weakref.WeakKeyDictionary[Any, GenAIInvocation] = ( + weakref.WeakKeyDictionary() + ) + + def on_trace_start(self, trace: Trace) -> None: + # ``trace.name`` comes from ``RunConfig.workflow_name`` (default + # "Agent workflow"). Callers customize it via the agents library's + # own ``Runner.run(..., run_config=RunConfig(workflow_name=...))``; + # we don't expose a second knob. + invocation = self._handler.workflow(name=trace.name) + if trace.name: + invocation.attributes[_WORKFLOW_NAME_ATTR] = trace.name + self._invocations[trace] = invocation + + def on_trace_end(self, trace: Trace) -> None: + invocation = self._invocations.pop(trace, None) + if invocation is not None: + invocation.stop() + + def on_span_start(self, span: Span[Any]) -> None: + span_data = span.span_data + if isinstance(span_data, AgentSpanData): + invocation = self._handler.invoke_local_agent( + provider=self._provider, + agent_name=span_data.name, + ) + self._invocations[span] = invocation + return + if isinstance(span_data, FunctionSpanData): + invocation = self._handler.tool( + name=span_data.name, + tool_type="function", + ) + + invocation.arguments = span_data.input + + # ToolInvocation does not include provider in metric attributes + # by default; set it so gen_ai.client.operation.duration carries + # the required gen_ai.provider.name attribute. + invocation.metric_attributes[GenAI.GEN_AI_PROVIDER_NAME] = ( + self._provider + ) + self._invocations[span] = invocation + return + # Other span_data types (GenerationSpanData, ResponseSpanData, + # HandoffSpanData, GuardrailSpanData, Speech/TranscriptionSpanData) + # are intentionally ignored. LLM-level spans come from the openai + # instrumentation; the rest have no semconv yet. + + def on_span_end(self, span: Span[Any]) -> None: + invocation = self._invocations.pop(span, None) + if invocation is None: + return + if isinstance(invocation, ToolInvocation) and isinstance( + span.span_data, FunctionSpanData + ): + output = span.span_data.output + if output is not None: + invocation.tool_result = ( + output if isinstance(output, str) else str(output) + ) + invocation.stop() + + def shutdown(self) -> None: + for invocation in list(self._invocations.values()): + try: + invocation.stop() + except Exception: # pylint: disable=broad-except + pass + self._invocations.clear() + + def force_flush(self) -> None: # pragma: no cover - nothing to flush + pass diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py deleted file mode 100644 index f78552f1..00000000 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/src/opentelemetry/instrumentation/genai/openai_agents/span_processor.py +++ /dev/null @@ -1,2206 +0,0 @@ -# Copyright The OpenTelemetry Authors -# SPDX-License-Identifier: Apache-2.0 - -""" -GenAI Semantic Convention Trace Processor - -This module implements a custom trace processor that enriches spans with -OpenTelemetry GenAI semantic conventions attributes following the -OpenInference processor pattern. It adds standardized attributes for -generative AI operations using iterator-based attribute extraction. - -References: -- OpenTelemetry GenAI Semantic Conventions: - https://opentelemetry.io/docs/specs/semconv/gen-ai/ -- OpenInference Pattern: https://github.com/Arize-ai/openinference -""" - -# pylint: disable=too-many-lines,invalid-name,too-many-locals,too-many-branches,too-many-statements,too-many-return-statements,too-many-nested-blocks,too-many-arguments,too-many-instance-attributes,broad-exception-caught,no-self-use,consider-iterating-dictionary,unused-variable,unnecessary-pass - -from __future__ import annotations - -import importlib -import logging -from dataclasses import dataclass -from datetime import datetime, timezone -from enum import Enum -from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Sequence -from urllib.parse import urlparse - -from opentelemetry.util.genai.utils import gen_ai_json_dumps - -try: - from agents.tracing import Span, Trace, TracingProcessor - from agents.tracing.span_data import ( - AgentSpanData, - FunctionSpanData, - GenerationSpanData, - GuardrailSpanData, - HandoffSpanData, - ResponseSpanData, - SpeechSpanData, - TranscriptionSpanData, - ) -except ModuleNotFoundError: # pragma: no cover - test stubs - tracing_module = importlib.import_module("agents.tracing") - Span = getattr(tracing_module, "Span") - Trace = getattr(tracing_module, "Trace") - TracingProcessor = getattr(tracing_module, "TracingProcessor") - AgentSpanData = getattr(tracing_module, "AgentSpanData", Any) # type: ignore[assignment] - FunctionSpanData = getattr(tracing_module, "FunctionSpanData", Any) # type: ignore[assignment] - GenerationSpanData = getattr(tracing_module, "GenerationSpanData", Any) # type: ignore[assignment] - GuardrailSpanData = getattr(tracing_module, "GuardrailSpanData", Any) # type: ignore[assignment] - HandoffSpanData = getattr(tracing_module, "HandoffSpanData", Any) # type: ignore[assignment] - ResponseSpanData = getattr(tracing_module, "ResponseSpanData", Any) # type: ignore[assignment] - SpeechSpanData = getattr(tracing_module, "SpeechSpanData", Any) # type: ignore[assignment] - TranscriptionSpanData = getattr( - tracing_module, "TranscriptionSpanData", Any - ) # type: ignore[assignment] - -from opentelemetry.context import attach, detach -from opentelemetry.metrics import Histogram, get_meter -from opentelemetry.semconv._incubating.attributes import ( - gen_ai_attributes as GenAIAttributes, -) -from opentelemetry.semconv._incubating.attributes import ( - server_attributes as ServerAttributes, -) -from opentelemetry.trace import Span as OtelSpan -from opentelemetry.trace import ( - SpanKind, - Status, - StatusCode, - Tracer, - set_span_in_context, -) -from opentelemetry.util.types import AttributeValue - -# Import all semantic convention constants -# ---- GenAI semantic convention helpers (embedded from constants.py) ---- - - -def _enum_values(enum_cls) -> dict[str, str]: - """Return mapping of enum member name to value.""" - return {member.name: member.value for member in enum_cls} - - -_PROVIDER_VALUES = _enum_values(GenAIAttributes.GenAiProviderNameValues) - - -class GenAIProvider: - OPENAI = _PROVIDER_VALUES["OPENAI"] - GCP_GEN_AI = _PROVIDER_VALUES["GCP_GEN_AI"] - GCP_VERTEX_AI = _PROVIDER_VALUES["GCP_VERTEX_AI"] - GCP_GEMINI = _PROVIDER_VALUES["GCP_GEMINI"] - ANTHROPIC = _PROVIDER_VALUES["ANTHROPIC"] - COHERE = _PROVIDER_VALUES["COHERE"] - AZURE_AI_INFERENCE = _PROVIDER_VALUES["AZURE_AI_INFERENCE"] - AZURE_AI_OPENAI = _PROVIDER_VALUES["AZURE_AI_OPENAI"] - IBM_WATSONX_AI = _PROVIDER_VALUES["IBM_WATSONX_AI"] - AWS_BEDROCK = _PROVIDER_VALUES["AWS_BEDROCK"] - PERPLEXITY = _PROVIDER_VALUES["PERPLEXITY"] - X_AI = _PROVIDER_VALUES["X_AI"] - DEEPSEEK = _PROVIDER_VALUES["DEEPSEEK"] - GROQ = _PROVIDER_VALUES["GROQ"] - MISTRAL_AI = _PROVIDER_VALUES["MISTRAL_AI"] - - ALL = set(_PROVIDER_VALUES.values()) - - -_OPERATION_VALUES = _enum_values(GenAIAttributes.GenAiOperationNameValues) - - -class GenAIOperationName: - CHAT = _OPERATION_VALUES["CHAT"] - GENERATE_CONTENT = _OPERATION_VALUES["GENERATE_CONTENT"] - TEXT_COMPLETION = _OPERATION_VALUES["TEXT_COMPLETION"] - EMBEDDINGS = _OPERATION_VALUES["EMBEDDINGS"] - CREATE_AGENT = _OPERATION_VALUES["CREATE_AGENT"] - INVOKE_AGENT = _OPERATION_VALUES["INVOKE_AGENT"] - EXECUTE_TOOL = _OPERATION_VALUES["EXECUTE_TOOL"] - # Operations below are not yet covered by the spec but remain for backwards compatibility - TRANSCRIPTION = "transcription" - SPEECH = "speech_generation" - GUARDRAIL = "guardrail_check" - HANDOFF = "agent_handoff" - RESPONSE = "response" # internal aggregator in current processor - - CLASS_FALLBACK = { - "generationspan": CHAT, - "responsespan": RESPONSE, - "functionspan": EXECUTE_TOOL, - "agentspan": INVOKE_AGENT, - } - - -_OUTPUT_VALUES = _enum_values(GenAIAttributes.GenAiOutputTypeValues) - - -class GenAIOutputType: - TEXT = _OUTPUT_VALUES["TEXT"] - JSON = _OUTPUT_VALUES["JSON"] - IMAGE = _OUTPUT_VALUES["IMAGE"] - SPEECH = _OUTPUT_VALUES["SPEECH"] - - -class GenAIToolType: - FUNCTION = "function" - EXTENSION = "extension" - DATASTORE = "datastore" - - ALL = {FUNCTION, EXTENSION, DATASTORE} - - -class GenAIEvaluationAttributes: - NAME = "gen_ai.evaluation.name" - SCORE_VALUE = "gen_ai.evaluation.score.value" - SCORE_LABEL = "gen_ai.evaluation.score.label" - EXPLANATION = "gen_ai.evaluation.explanation" - - -def _attr(name: str, fallback: str) -> str: - return getattr(GenAIAttributes, name, fallback) - - -GEN_AI_PROVIDER_NAME = _attr("GEN_AI_PROVIDER_NAME", "gen_ai.provider.name") -GEN_AI_OPERATION_NAME = _attr("GEN_AI_OPERATION_NAME", "gen_ai.operation.name") -GEN_AI_REQUEST_MODEL = _attr("GEN_AI_REQUEST_MODEL", "gen_ai.request.model") -GEN_AI_REQUEST_MAX_TOKENS = _attr( - "GEN_AI_REQUEST_MAX_TOKENS", "gen_ai.request.max_tokens" -) -GEN_AI_REQUEST_TEMPERATURE = _attr( - "GEN_AI_REQUEST_TEMPERATURE", "gen_ai.request.temperature" -) -GEN_AI_REQUEST_TOP_P = _attr("GEN_AI_REQUEST_TOP_P", "gen_ai.request.top_p") -GEN_AI_REQUEST_TOP_K = _attr("GEN_AI_REQUEST_TOP_K", "gen_ai.request.top_k") -GEN_AI_REQUEST_FREQUENCY_PENALTY = _attr( - "GEN_AI_REQUEST_FREQUENCY_PENALTY", "gen_ai.request.frequency_penalty" -) -GEN_AI_REQUEST_PRESENCE_PENALTY = _attr( - "GEN_AI_REQUEST_PRESENCE_PENALTY", "gen_ai.request.presence_penalty" -) -GEN_AI_REQUEST_CHOICE_COUNT = _attr( - "GEN_AI_REQUEST_CHOICE_COUNT", "gen_ai.request.choice.count" -) -GEN_AI_REQUEST_STOP_SEQUENCES = _attr( - "GEN_AI_REQUEST_STOP_SEQUENCES", "gen_ai.request.stop_sequences" -) -GEN_AI_REQUEST_ENCODING_FORMATS = _attr( - "GEN_AI_REQUEST_ENCODING_FORMATS", "gen_ai.request.encoding_formats" -) -GEN_AI_REQUEST_SEED = _attr("GEN_AI_REQUEST_SEED", "gen_ai.request.seed") -GEN_AI_RESPONSE_ID = _attr("GEN_AI_RESPONSE_ID", "gen_ai.response.id") -GEN_AI_RESPONSE_MODEL = _attr("GEN_AI_RESPONSE_MODEL", "gen_ai.response.model") -GEN_AI_RESPONSE_FINISH_REASONS = _attr( - "GEN_AI_RESPONSE_FINISH_REASONS", "gen_ai.response.finish_reasons" -) -GEN_AI_USAGE_INPUT_TOKENS = _attr( - "GEN_AI_USAGE_INPUT_TOKENS", "gen_ai.usage.input_tokens" -) -GEN_AI_USAGE_OUTPUT_TOKENS = _attr( - "GEN_AI_USAGE_OUTPUT_TOKENS", "gen_ai.usage.output_tokens" -) -GEN_AI_CONVERSATION_ID = _attr( - "GEN_AI_CONVERSATION_ID", "gen_ai.conversation.id" -) -GEN_AI_AGENT_ID = _attr("GEN_AI_AGENT_ID", "gen_ai.agent.id") -GEN_AI_AGENT_NAME = _attr("GEN_AI_AGENT_NAME", "gen_ai.agent.name") -GEN_AI_AGENT_DESCRIPTION = _attr( - "GEN_AI_AGENT_DESCRIPTION", "gen_ai.agent.description" -) -GEN_AI_TOOL_NAME = _attr("GEN_AI_TOOL_NAME", "gen_ai.tool.name") -GEN_AI_TOOL_TYPE = _attr("GEN_AI_TOOL_TYPE", "gen_ai.tool.type") -GEN_AI_TOOL_CALL_ID = _attr("GEN_AI_TOOL_CALL_ID", "gen_ai.tool.call.id") -GEN_AI_TOOL_DESCRIPTION = _attr( - "GEN_AI_TOOL_DESCRIPTION", "gen_ai.tool.description" -) -GEN_AI_OUTPUT_TYPE = _attr("GEN_AI_OUTPUT_TYPE", "gen_ai.output.type") -GEN_AI_SYSTEM_INSTRUCTIONS = _attr( - "GEN_AI_SYSTEM_INSTRUCTIONS", "gen_ai.system_instructions" -) -GEN_AI_INPUT_MESSAGES = _attr("GEN_AI_INPUT_MESSAGES", "gen_ai.input.messages") -GEN_AI_OUTPUT_MESSAGES = _attr( - "GEN_AI_OUTPUT_MESSAGES", "gen_ai.output.messages" -) -GEN_AI_DATA_SOURCE_ID = _attr("GEN_AI_DATA_SOURCE_ID", "gen_ai.data_source.id") - -# The semantic conventions currently expose multiple usage token attributes; we retain the -# completion/prompt aliases for backwards compatibility where used. -GEN_AI_USAGE_PROMPT_TOKENS = _attr( - "GEN_AI_USAGE_PROMPT_TOKENS", "gen_ai.usage.prompt_tokens" -) -GEN_AI_USAGE_COMPLETION_TOKENS = _attr( - "GEN_AI_USAGE_COMPLETION_TOKENS", "gen_ai.usage.completion_tokens" -) - -# Attributes not (yet) defined in the spec retain their literal values. -GEN_AI_TOOL_CALL_ARGUMENTS = "gen_ai.tool.call.arguments" -GEN_AI_TOOL_CALL_RESULT = "gen_ai.tool.call.result" -GEN_AI_TOOL_DEFINITIONS = "gen_ai.tool.definitions" -GEN_AI_ORCHESTRATOR_AGENT_DEFINITIONS = "gen_ai.orchestrator.agent.definitions" -GEN_AI_GUARDRAIL_NAME = "gen_ai.guardrail.name" -GEN_AI_GUARDRAIL_TRIGGERED = "gen_ai.guardrail.triggered" -GEN_AI_HANDOFF_FROM_AGENT = "gen_ai.handoff.from_agent" -GEN_AI_HANDOFF_TO_AGENT = "gen_ai.handoff.to_agent" -GEN_AI_EMBEDDINGS_DIMENSION_COUNT = "gen_ai.embeddings.dimension.count" -GEN_AI_TOKEN_TYPE = _attr("GEN_AI_TOKEN_TYPE", "gen_ai.token.type") - -# ---- Normalization utilities (embedded from utils.py) ---- - - -def normalize_provider(provider: Optional[str]) -> Optional[str]: - """Normalize provider name to spec-compliant value.""" - if not provider: - return None - normalized = provider.strip().lower() - if normalized in GenAIProvider.ALL: - return normalized - return provider # passthrough if unknown (forward compat) - - -def validate_tool_type(tool_type: Optional[str]) -> str: - """Validate and normalize tool type.""" - if not tool_type: - return GenAIToolType.FUNCTION # default - normalized = tool_type.strip().lower() - return ( - normalized - if normalized in GenAIToolType.ALL - else GenAIToolType.FUNCTION - ) - - -def normalize_output_type(output_type: Optional[str]) -> str: - """Normalize output type to spec-compliant value.""" - if not output_type: - return GenAIOutputType.TEXT # default - normalized = output_type.strip().lower() - base_map = { - "json_object": GenAIOutputType.JSON, - "jsonschema": GenAIOutputType.JSON, - "speech_audio": GenAIOutputType.SPEECH, - "audio_speech": GenAIOutputType.SPEECH, - "image_png": GenAIOutputType.IMAGE, - "function_arguments_json": GenAIOutputType.JSON, - "tool_call": GenAIOutputType.JSON, - "transcription_json": GenAIOutputType.JSON, - } - if normalized in base_map: - return base_map[normalized] - if normalized in { - GenAIOutputType.TEXT, - GenAIOutputType.JSON, - GenAIOutputType.IMAGE, - GenAIOutputType.SPEECH, - }: - return normalized - return GenAIOutputType.TEXT # default for unknown - - -if TYPE_CHECKING: - pass - -# Legacy attributes removed - -logger = logging.getLogger(__name__) - -GEN_AI_SYSTEM_KEY = getattr(GenAIAttributes, "GEN_AI_SYSTEM", "gen_ai.system") - - -class ContentCaptureMode(Enum): - """Controls whether sensitive content is recorded on spans, events, or both.""" - - NO_CONTENT = "no_content" - SPAN_ONLY = "span_only" - EVENT_ONLY = "event_only" - SPAN_AND_EVENT = "span_and_event" - - @property - def capture_in_span(self) -> bool: - return self in ( - ContentCaptureMode.SPAN_ONLY, - ContentCaptureMode.SPAN_AND_EVENT, - ) - - @property - def capture_in_event(self) -> bool: - return self in ( - ContentCaptureMode.EVENT_ONLY, - ContentCaptureMode.SPAN_AND_EVENT, - ) - - -@dataclass -class ContentPayload: - """Container for normalized content associated with a span.""" - - input_messages: Optional[list[dict[str, Any]]] = None - output_messages: Optional[list[dict[str, Any]]] = None - system_instructions: Optional[list[dict[str, str]]] = None - tool_arguments: Any = None - tool_result: Any = None - - -def _is_instance_of(value: Any, classes: Any) -> bool: - """Safe isinstance that tolerates typing.Any placeholders.""" - if not isinstance(classes, tuple): - classes = (classes,) - for cls in classes: - try: - if isinstance(value, cls): - return True - except TypeError: - continue - return False - - -def _infer_server_attributes(base_url: Optional[str]) -> dict[str, Any]: - """Return server.address / server.port attributes if base_url provided.""" - out: dict[str, Any] = {} - if not base_url: - return out - try: - parsed = urlparse(base_url) - if parsed.hostname: - out[ServerAttributes.SERVER_ADDRESS] = parsed.hostname - if parsed.port: - out[ServerAttributes.SERVER_PORT] = parsed.port - except Exception: - return out - return out - - -def safe_json_dumps(obj: Any) -> str: - """Safely convert object to JSON string (fallback to str).""" - try: - return gen_ai_json_dumps(obj) - except (TypeError, ValueError): - return str(obj) - - -def _as_utc_nano(dt: datetime) -> int: - """Convert datetime to UTC nanoseconds timestamp.""" - return int(dt.astimezone(timezone.utc).timestamp() * 1_000_000_000) - - -def _get_span_status(span: Span[Any]) -> Status: - """Get OpenTelemetry span status from agent span.""" - if error := getattr(span, "error", None): - return Status( - status_code=StatusCode.ERROR, - description=f"{error.get('message', '')}: {error.get('data', '')}", - ) - return Status(StatusCode.OK) - - -def get_span_name( - operation_name: str, - model: Optional[str] = None, - agent_name: Optional[str] = None, - tool_name: Optional[str] = None, -) -> str: - """Generate spec-compliant span name based on operation type.""" - base_name = operation_name - - if operation_name in { - GenAIOperationName.CHAT, - GenAIOperationName.TEXT_COMPLETION, - GenAIOperationName.EMBEDDINGS, - GenAIOperationName.TRANSCRIPTION, - GenAIOperationName.SPEECH, - }: - return f"{base_name} {model}" if model else base_name - - if operation_name == GenAIOperationName.CREATE_AGENT: - return f"{base_name} {agent_name}" if agent_name else base_name - - if operation_name == GenAIOperationName.INVOKE_AGENT: - return f"{base_name} {agent_name}" if agent_name else base_name - - if operation_name == GenAIOperationName.EXECUTE_TOOL: - return f"{base_name} {tool_name}" if tool_name else base_name - - if operation_name == GenAIOperationName.HANDOFF: - return f"{base_name} {agent_name}" if agent_name else base_name - - return base_name - - -class GenAISemanticProcessor(TracingProcessor): - """Trace processor adding GenAI semantic convention attributes with metrics.""" - - # pylint: disable=too-many-positional-arguments - def __init__( - self, - tracer: Optional[Tracer] = None, - system_name: str = "openai", - include_sensitive_data: bool = True, - content_mode: ContentCaptureMode = ContentCaptureMode.SPAN_AND_EVENT, - base_url: Optional[str] = None, - agent_name: Optional[str] = None, - agent_id: Optional[str] = None, - agent_description: Optional[str] = None, - server_address: Optional[str] = None, - server_port: Optional[int] = None, - metrics_enabled: bool = True, - agent_name_default: Optional[str] = None, - agent_id_default: Optional[str] = None, - agent_description_default: Optional[str] = None, - base_url_default: Optional[str] = None, - server_address_default: Optional[str] = None, - server_port_default: Optional[int] = None, - ): - """Initialize processor with metrics support. - - Args: - tracer: Optional OpenTelemetry tracer - system_name: Provider name (openai/azure.ai.inference/etc.) - include_sensitive_data: Include model/tool IO when True - base_url: API endpoint for server.address/port - agent_name: Name of the agent (can be overridden by env var) - agent_id: ID of the agent (can be overridden by env var) - agent_description: Description of the agent (can be overridden by env var) - server_address: Server address (can be overridden by env var or base_url) - server_port: Server port (can be overridden by env var or base_url) - """ - self._tracer = tracer - self.system_name = normalize_provider(system_name) or system_name - self._content_mode = content_mode - self.include_sensitive_data = include_sensitive_data and ( - content_mode.capture_in_span or content_mode.capture_in_event - ) - effective_base_url = base_url or base_url_default - self.base_url = effective_base_url - - # Agent information - prefer explicit overrides; otherwise defer to span data - self.agent_name = agent_name - self.agent_id = agent_id - self.agent_description = agent_description - self._agent_name_default = agent_name_default - self._agent_id_default = agent_id_default - self._agent_description_default = agent_description_default - - # Server information - use init parameters, then base_url inference - self.server_address = server_address or server_address_default - resolved_port = ( - server_port if server_port is not None else server_port_default - ) - self.server_port = resolved_port - - # If server info not provided, try to extract from base_url - if ( - not self.server_address or not self.server_port - ) and effective_base_url: - server_attrs = _infer_server_attributes(effective_base_url) - if not self.server_address: - self.server_address = server_attrs.get( - ServerAttributes.SERVER_ADDRESS - ) - if not self.server_port: - self.server_port = server_attrs.get( - ServerAttributes.SERVER_PORT - ) - - # Content capture configuration - self._capture_messages = ( - content_mode.capture_in_span or content_mode.capture_in_event - ) - self._capture_system_instructions = True - self._capture_tool_definitions = True - - # Span tracking - self._root_spans: dict[str, OtelSpan] = {} - self._otel_spans: dict[str, OtelSpan] = {} - self._tokens: dict[str, object] = {} - self._span_parents: dict[str, Optional[str]] = {} - self._agent_content: dict[str, Dict[str, list[Any]]] = {} - - # Metrics configuration - self._metrics_enabled = metrics_enabled - self._meter = None - self._duration_histogram: Optional[Histogram] = None - self._token_usage_histogram: Optional[Histogram] = None - if self._metrics_enabled: - self._init_metrics() - - def _get_server_attributes(self) -> dict[str, Any]: - """Get server attributes from configured values.""" - attrs = {} - if self.server_address: - attrs[ServerAttributes.SERVER_ADDRESS] = self.server_address - if self.server_port: - attrs[ServerAttributes.SERVER_PORT] = self.server_port - return attrs - - def _init_metrics(self): - """Initialize metric instruments.""" - self._meter = get_meter( - "opentelemetry.instrumentation.genai.openai_agents", "0.1.0" - ) - - # Operation duration histogram - self._duration_histogram = self._meter.create_histogram( - name="gen_ai.client.operation.duration", - description="GenAI operation duration", - unit="s", - ) - - # Token usage histogram - self._token_usage_histogram = self._meter.create_histogram( - name="gen_ai.client.token.usage", - description="Number of input and output tokens used", - unit="{token}", - ) - - def _record_metrics( - self, span: Span[Any], attributes: dict[str, AttributeValue] - ) -> None: - """Record metrics for the span.""" - if not self._metrics_enabled or ( - self._duration_histogram is None - and self._token_usage_histogram is None - ): - return - - try: - # Calculate duration - duration = None - if hasattr(span, "started_at") and hasattr(span, "ended_at"): - try: - start = datetime.fromisoformat(span.started_at) - end = datetime.fromisoformat(span.ended_at) - duration = (end - start).total_seconds() - except Exception: - pass - - # Build metric attributes - metric_attrs = { - GEN_AI_PROVIDER_NAME: attributes.get(GEN_AI_PROVIDER_NAME), - GEN_AI_OPERATION_NAME: attributes.get(GEN_AI_OPERATION_NAME), - GEN_AI_REQUEST_MODEL: ( - attributes.get(GEN_AI_REQUEST_MODEL) - or attributes.get(GEN_AI_RESPONSE_MODEL) - ), - ServerAttributes.SERVER_ADDRESS: attributes.get( - ServerAttributes.SERVER_ADDRESS - ), - ServerAttributes.SERVER_PORT: attributes.get( - ServerAttributes.SERVER_PORT - ), - } - - # Add error type if present - if error := getattr(span, "error", None): - error_type = error.get("type") or error.get("name") - if error_type: - metric_attrs["error.type"] = error_type - - # Remove None values - metric_attrs = { - k: v for k, v in metric_attrs.items() if v is not None - } - - # Record duration - if duration is not None and self._duration_histogram is not None: - self._duration_histogram.record(duration, metric_attrs) - - # Record token usage - if self._token_usage_histogram: - input_tokens = attributes.get(GEN_AI_USAGE_INPUT_TOKENS) - if isinstance(input_tokens, (int, float)): - token_attrs = dict(metric_attrs) - token_attrs[GEN_AI_TOKEN_TYPE] = "input" - self._token_usage_histogram.record( - input_tokens, token_attrs - ) - - output_tokens = attributes.get(GEN_AI_USAGE_OUTPUT_TOKENS) - if isinstance(output_tokens, (int, float)): - token_attrs = dict(metric_attrs) - token_attrs[GEN_AI_TOKEN_TYPE] = "output" - self._token_usage_histogram.record( - output_tokens, token_attrs - ) - - except Exception as e: - logger.debug("Failed to record metrics: %s", e) - - def _emit_content_events( - self, - span: Span[Any], - otel_span: OtelSpan, - payload: ContentPayload, - agent_content: Optional[Dict[str, list[Any]]] = None, - ) -> None: - """Intentionally skip emitting gen_ai.* events to avoid payload duplication.""" - if ( - not self.include_sensitive_data - or not self._content_mode.capture_in_event - or not otel_span.is_recording() - ): - return - - logger.debug( - "Event capture requested for span %s but is currently disabled", - getattr(span, "span_id", ""), - ) - return - - def _collect_system_instructions( - self, messages: Sequence[Any] | None - ) -> list[dict[str, str]]: - """Return system/ai role instructions as typed text objects. - - Enforces format: [{"type": "text", "content": "..."}]. - Handles message content that may be a string, list of parts, - or a dict with text/content fields. - """ - if not messages: - return [] - out: list[dict[str, str]] = [] - for m in messages: - if not isinstance(m, dict): - continue - role = m.get("role") - if role in {"system", "ai"}: - content = m.get("content") - out.extend(self._normalize_to_text_parts(content)) - return out - - def _normalize_to_text_parts(self, content: Any) -> list[dict[str, str]]: - """Normalize arbitrary content into typed text parts. - - - String -> [{type: text, content: }] - - List/Tuple -> map each item to a text part (string/dict supported) - - Dict -> use 'text' or 'content' field when available; else str(dict) - - Other -> str(value) - """ - parts: list[dict[str, str]] = [] - if content is None: - return parts - if isinstance(content, str): - parts.append({"type": "text", "content": content}) - return parts - if isinstance(content, (list, tuple)): - for item in content: - if isinstance(item, str): - parts.append({"type": "text", "content": item}) - elif isinstance(item, dict): - txt = item.get("text") or item.get("content") - if isinstance(txt, str) and txt: - parts.append({"type": "text", "content": txt}) - else: - parts.append({"type": "text", "content": str(item)}) - else: - parts.append({"type": "text", "content": str(item)}) - return parts - if isinstance(content, dict): - txt = content.get("text") or content.get("content") - if isinstance(txt, str) and txt: - parts.append({"type": "text", "content": txt}) - else: - parts.append({"type": "text", "content": str(content)}) - return parts - # Fallback for other types - parts.append({"type": "text", "content": str(content)}) - return parts - - def _redacted_text_parts(self) -> list[dict[str, str]]: - """Return a single redacted text part for system instructions.""" - return [{"type": "text", "content": "readacted"}] - - def _normalize_messages_to_role_parts( - self, messages: Sequence[Any] | None - ) -> list[dict[str, Any]]: - """Normalize input messages to enforced role+parts schema. - - Each message becomes: {"role": , "parts": [ {"type": ..., ...} ]} - Redaction: when include_sensitive_data is False, replace text content, - tool_call arguments, and tool_call_response result with "readacted". - """ - if not messages: - return [] - normalized: list[dict[str, Any]] = [] - for m in messages: - if not isinstance(m, dict): - # Fallback: treat as user text - normalized.append( - { - "role": "user", - "parts": [ - { - "type": "text", - "content": "readacted" - if not self.include_sensitive_data - else str(m), - } - ], - } - ) - continue - - role = m.get("role") or "user" - parts: list[dict[str, Any]] = [] - - # Existing parts array - if isinstance(m.get("parts"), (list, tuple)): - for p in m["parts"]: - if isinstance(p, dict): - ptype = p.get("type") or "text" - newp: dict[str, Any] = {"type": ptype} - if ptype == "text": - txt = p.get("content") or p.get("text") - newp["content"] = ( - "readacted" - if not self.include_sensitive_data - else (txt if isinstance(txt, str) else str(p)) - ) - elif ptype == "tool_call": - newp["id"] = p.get("id") - newp["name"] = p.get("name") - args = p.get("arguments") - newp["arguments"] = ( - "readacted" - if not self.include_sensitive_data - else args - ) - elif ptype == "tool_call_response": - newp["id"] = p.get("id") or m.get("tool_call_id") - result = p.get("result") or p.get("content") - newp["result"] = ( - "readacted" - if not self.include_sensitive_data - else result - ) - else: - newp["content"] = ( - "readacted" - if not self.include_sensitive_data - else str(p) - ) - parts.append(newp) - else: - parts.append( - { - "type": "text", - "content": "readacted" - if not self.include_sensitive_data - else str(p), - } - ) - - # OpenAI content - content = m.get("content") - if isinstance(content, str): - parts.append( - { - "type": "text", - "content": "readacted" - if not self.include_sensitive_data - else content, - } - ) - elif isinstance(content, (list, tuple)): - for item in content: - if isinstance(item, dict): - itype = item.get("type") or "text" - if itype == "text": - txt = item.get("text") or item.get("content") - parts.append( - { - "type": "text", - "content": "readacted" - if not self.include_sensitive_data - else ( - txt - if isinstance(txt, str) - else str(item) - ), - } - ) - else: - # Fallback for other part types - parts.append( - { - "type": "text", - "content": "readacted" - if not self.include_sensitive_data - else str(item), - } - ) - else: - parts.append( - { - "type": "text", - "content": "readacted" - if not self.include_sensitive_data - else str(item), - } - ) - - # Assistant tool_calls - if role == "assistant" and isinstance( - m.get("tool_calls"), (list, tuple) - ): - for tc in m["tool_calls"]: - if not isinstance(tc, dict): - continue - p = {"type": "tool_call"} - p["id"] = tc.get("id") - fn = tc.get("function") or {} - if isinstance(fn, dict): - p["name"] = fn.get("name") - args = fn.get("arguments") - p["arguments"] = ( - "readacted" - if not self.include_sensitive_data - else args - ) - parts.append(p) - - # Tool call response - if role in {"tool", "function"}: - p = {"type": "tool_call_response"} - p["id"] = m.get("tool_call_id") or m.get("id") - result = m.get("result") or m.get("content") - p["result"] = ( - "readacted" if not self.include_sensitive_data else result - ) - parts.append(p) - - if parts: - normalized.append({"role": role, "parts": parts}) - elif not self.include_sensitive_data: - normalized.append( - {"role": role, "parts": self._redacted_text_parts()} - ) - - return normalized - - def _normalize_output_messages_to_role_parts( - self, span_data: Any - ) -> list[dict[str, Any]]: - """Normalize output messages to enforced role+parts schema. - - Produces: [{"role": "assistant", "parts": [{"type": "text", "content": "..."}], - optional "finish_reason": "..." }] - """ - messages: list[dict[str, Any]] = [] - parts: list[dict[str, Any]] = [] - finish_reason: Optional[str] = None - - # Response span: prefer consolidated output_text - response = getattr(span_data, "response", None) - if response is not None: - # Collect text content - output_text = getattr(response, "output_text", None) - if isinstance(output_text, str) and output_text: - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else output_text - ), - } - ) - else: - output = getattr(response, "output", None) - if isinstance(output, Sequence): - for item in output: - # ResponseOutputMessage may have a string representation - txt = getattr(item, "content", None) - if isinstance(txt, str) and txt: - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else txt - ), - } - ) - else: - # Fallback: stringified - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else str(item) - ), - } - ) - # Capture finish_reason from parts when present - fr = getattr(item, "finish_reason", None) - if isinstance(fr, str) and not finish_reason: - finish_reason = fr - - # Generation span: use span_data.output - if not parts: - output = getattr(span_data, "output", None) - if isinstance(output, Sequence): - for item in output: - if isinstance(item, dict): - if item.get("type") == "text": - txt = item.get("content") or item.get("text") - if isinstance(txt, str) and txt: - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else txt - ), - } - ) - elif "content" in item and isinstance( - item["content"], str - ): - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else item["content"] - ), - } - ) - else: - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else str(item) - ), - } - ) - if not finish_reason and isinstance( - item.get("finish_reason"), str - ): - finish_reason = item.get("finish_reason") - elif isinstance(item, str): - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else item - ), - } - ) - else: - parts.append( - { - "type": "text", - "content": ( - "readacted" - if not self.include_sensitive_data - else str(item) - ), - } - ) - - # Build assistant message - msg: dict[str, Any] = {"role": "assistant", "parts": parts} - if finish_reason: - msg["finish_reason"] = finish_reason - # Only include if there is content - if parts: - messages.append(msg) - return messages - - def _build_content_payload(self, span: Span[Any]) -> ContentPayload: - """Normalize content from span data for attribute/event capture.""" - payload = ContentPayload() - span_data = getattr(span, "span_data", None) - if span_data is None or not self.include_sensitive_data: - return payload - - capture_messages = self._capture_messages and ( - self._content_mode.capture_in_span - or self._content_mode.capture_in_event - ) - capture_system = self._capture_system_instructions and ( - self._content_mode.capture_in_span - or self._content_mode.capture_in_event - ) - capture_tools = self._content_mode.capture_in_span or ( - self._content_mode.capture_in_event - and _is_instance_of(span_data, FunctionSpanData) - ) - - if _is_instance_of(span_data, GenerationSpanData): - span_input = getattr(span_data, "input", None) - if capture_messages and span_input: - payload.input_messages = ( - self._normalize_messages_to_role_parts(span_input) - ) - if capture_system and span_input: - sys_instr = self._collect_system_instructions(span_input) - if sys_instr: - payload.system_instructions = sys_instr - if capture_messages and ( - getattr(span_data, "output", None) - or getattr(span_data, "response", None) - ): - normalized_out = self._normalize_output_messages_to_role_parts( - span_data - ) - if normalized_out: - payload.output_messages = normalized_out - - elif _is_instance_of(span_data, ResponseSpanData): - span_input = getattr(span_data, "input", None) - response_obj = getattr(span_data, "response", None) - if capture_messages and span_input: - payload.input_messages = ( - self._normalize_messages_to_role_parts(span_input) - ) - - if ( - capture_system - and response_obj - and hasattr(response_obj, "instructions") - ): - payload.system_instructions = self._normalize_to_text_parts( - response_obj.instructions - ) - if capture_system and span_input: - sys_instr = self._collect_system_instructions(span_input) - if sys_instr: - payload.system_instructions = sys_instr - if capture_messages: - normalized_out = self._normalize_output_messages_to_role_parts( - span_data - ) - if normalized_out: - payload.output_messages = normalized_out - - elif _is_instance_of(span_data, FunctionSpanData) and capture_tools: - - def _serialize_tool_value(value: Any) -> Optional[str]: - if value is None: - return None - if isinstance(value, (dict, list)): - return safe_json_dumps(value) - return str(value) - - payload.tool_arguments = _serialize_tool_value( - getattr(span_data, "input", None) - ) - payload.tool_result = _serialize_tool_value( - getattr(span_data, "output", None) - ) - - return payload - - def _find_agent_parent_span_id( - self, span_id: Optional[str] - ) -> Optional[str]: - """Return nearest ancestor span id that represents an agent.""" - current = span_id - visited: set[str] = set() - while current: - if current in visited: - break - visited.add(current) - if current in self._agent_content: - return current - current = self._span_parents.get(current) - return None - - def _update_agent_aggregate( - self, span: Span[Any], payload: ContentPayload - ) -> None: - """Accumulate child span content for parent agent span.""" - agent_id = self._find_agent_parent_span_id(span.parent_id) - if not agent_id: - return - entry = self._agent_content.setdefault( - agent_id, - { - "input_messages": [], - "output_messages": [], - "system_instructions": [], - "request_model": None, - }, - ) - if payload.input_messages: - entry["input_messages"] = self._merge_content_sequence( - entry["input_messages"], payload.input_messages - ) - if payload.output_messages: - entry["output_messages"] = self._merge_content_sequence( - entry["output_messages"], payload.output_messages - ) - if payload.system_instructions: - entry["system_instructions"] = self._merge_content_sequence( - entry["system_instructions"], payload.system_instructions - ) - - if not entry.get("request_model"): - model = getattr(span.span_data, "model", None) - if not model: - response_obj = getattr(span.span_data, "response", None) - model = getattr(response_obj, "model", None) - if model: - entry["request_model"] = model - - def _infer_output_type(self, span_data: Any) -> str: - """Infer gen_ai.output.type for multiple span kinds.""" - if _is_instance_of(span_data, FunctionSpanData): - # Tool results are typically JSON - return GenAIOutputType.JSON - if _is_instance_of(span_data, TranscriptionSpanData): - return GenAIOutputType.TEXT - if _is_instance_of(span_data, SpeechSpanData): - return GenAIOutputType.SPEECH - if _is_instance_of(span_data, GuardrailSpanData): - return GenAIOutputType.TEXT - if _is_instance_of(span_data, HandoffSpanData): - return GenAIOutputType.TEXT - - # Check for embeddings operation - if _is_instance_of(span_data, GenerationSpanData): - if hasattr(span_data, "embedding_dimension"): - return ( - GenAIOutputType.TEXT - ) # Embeddings are numeric but represented as text - - # Generation/Response - check output structure - output = getattr(span_data, "output", None) or getattr( - getattr(span_data, "response", None), "output", None - ) - if isinstance(output, Sequence) and output: - first = output[0] - if isinstance(first, dict): - item_type = first.get("type") - if isinstance(item_type, str): - normalized = item_type.strip().lower() - if normalized in {"image", "image_url"}: - return GenAIOutputType.IMAGE - if normalized in {"audio", "speech", "audio_url"}: - return GenAIOutputType.SPEECH - if normalized in { - "json", - "json_object", - "jsonschema", - "function_call", - "tool_call", - "tool_result", - }: - return GenAIOutputType.JSON - if normalized in { - "text", - "output_text", - "message", - "assistant", - }: - return GenAIOutputType.TEXT - - # Conversation style payloads - if "role" in first: - parts = first.get("parts") - if isinstance(parts, Sequence) and parts: - # If all parts are textual (or missing explicit type), treat as text - textual = True - for part in parts: - if isinstance(part, dict): - part_type = str(part.get("type", "")).lower() - if part_type in {"image", "image_url"}: - return GenAIOutputType.IMAGE - if part_type in { - "audio", - "speech", - "audio_url", - }: - return GenAIOutputType.SPEECH - if part_type and part_type not in { - "text", - "output_text", - "assistant", - }: - textual = False - elif not isinstance(part, str): - textual = False - if textual: - return GenAIOutputType.TEXT - content_value = first.get("content") - if isinstance(content_value, str): - return GenAIOutputType.TEXT - - # Detect structured data without explicit type - json_like_keys = { - "schema", - "properties", - "arguments", - "result", - "data", - "json", - "output_json", - } - if json_like_keys.intersection(first.keys()): - return GenAIOutputType.JSON - - return GenAIOutputType.TEXT - - @staticmethod - def _sanitize_usage_payload(usage: Any) -> None: - """Remove non-spec usage fields (e.g., total tokens) in-place.""" - if not usage: - return - if isinstance(usage, dict): - usage.pop("total_tokens", None) - return - if hasattr(usage, "total_tokens"): - try: - setattr(usage, "total_tokens", None) - except Exception: # pragma: no cover - defensive - try: - delattr(usage, "total_tokens") - except Exception: # pragma: no cover - defensive - pass - - def _get_span_kind(self, span_data: Any) -> SpanKind: - """Determine appropriate span kind based on span data type.""" - if _is_instance_of(span_data, FunctionSpanData): - return SpanKind.INTERNAL # Tool execution is internal - if _is_instance_of( - span_data, - ( - GenerationSpanData, - ResponseSpanData, - TranscriptionSpanData, - SpeechSpanData, - ), - ): - return SpanKind.CLIENT # API calls to model providers - if _is_instance_of(span_data, AgentSpanData): - return SpanKind.CLIENT - if _is_instance_of(span_data, (GuardrailSpanData, HandoffSpanData)): - return SpanKind.INTERNAL # Agent operations are internal - return SpanKind.INTERNAL - - def on_trace_start(self, trace: Trace) -> None: - """Create root span when trace starts.""" - if self._tracer: - attributes = { - GEN_AI_PROVIDER_NAME: self.system_name, - GEN_AI_SYSTEM_KEY: self.system_name, - GEN_AI_OPERATION_NAME: GenAIOperationName.INVOKE_AGENT, - } - # Legacy emission removed - - # Add configured agent and server attributes - if self.agent_name: - attributes[GEN_AI_AGENT_NAME] = self.agent_name - if self.agent_id: - attributes[GEN_AI_AGENT_ID] = self.agent_id - if self.agent_description: - attributes[GEN_AI_AGENT_DESCRIPTION] = self.agent_description - attributes.update(self._get_server_attributes()) - - otel_span = self._tracer.start_span( - name=trace.name, - attributes=attributes, - kind=SpanKind.SERVER, # Root span is typically server - ) - self._root_spans[trace.trace_id] = otel_span - - def on_trace_end(self, trace: Trace) -> None: - """End root span when trace ends.""" - if root_span := self._root_spans.pop(trace.trace_id, None): - if root_span.is_recording(): - root_span.set_status(Status(StatusCode.OK)) - root_span.end() - self._cleanup_spans_for_trace(trace.trace_id) - - def on_span_start(self, span: Span[Any]) -> None: - """Start child span for agent span.""" - if not self._tracer or not span.started_at: - return - - self._span_parents[span.span_id] = span.parent_id - if ( - _is_instance_of(span.span_data, AgentSpanData) - and span.span_id not in self._agent_content - ): - self._agent_content[span.span_id] = { - "input_messages": [], - "output_messages": [], - "system_instructions": [], - "request_model": None, - } - - parent_span = ( - self._otel_spans.get(span.parent_id) - if span.parent_id - else self._root_spans.get(span.trace_id) - ) - context = set_span_in_context(parent_span) if parent_span else None - - # Get operation details for span naming - operation_name = self._get_operation_name(span.span_data) - model = getattr(span.span_data, "model", None) - if model is None: - response_obj = getattr(span.span_data, "response", None) - model = getattr(response_obj, "model", None) - - # Use configured agent name or get from span data - agent_name = self.agent_name - if not agent_name and _is_instance_of(span.span_data, AgentSpanData): - agent_name = getattr(span.span_data, "name", None) - if not agent_name: - agent_name = self._agent_name_default - - tool_name = ( - getattr(span.span_data, "name", None) - if _is_instance_of(span.span_data, FunctionSpanData) - else None - ) - - # Generate spec-compliant span name - span_name = get_span_name(operation_name, model, agent_name, tool_name) - - attributes = { - GEN_AI_PROVIDER_NAME: self.system_name, - GEN_AI_SYSTEM_KEY: self.system_name, - GEN_AI_OPERATION_NAME: operation_name, - } - # Legacy emission removed - - # Add configured agent and server attributes - agent_name_override = self.agent_name or self._agent_name_default - agent_id_override = self.agent_id or self._agent_id_default - agent_desc_override = ( - self.agent_description or self._agent_description_default - ) - if agent_name_override: - attributes[GEN_AI_AGENT_NAME] = agent_name_override - if agent_id_override: - attributes[GEN_AI_AGENT_ID] = agent_id_override - if agent_desc_override: - attributes[GEN_AI_AGENT_DESCRIPTION] = agent_desc_override - attributes.update(self._get_server_attributes()) - - otel_span = self._tracer.start_span( - name=span_name, - context=context, - attributes=attributes, - kind=self._get_span_kind(span.span_data), - ) - self._otel_spans[span.span_id] = otel_span - self._tokens[span.span_id] = attach(set_span_in_context(otel_span)) - - def on_span_end(self, span: Span[Any]) -> None: - """Finalize span with attributes, events, and metrics.""" - if token := self._tokens.pop(span.span_id, None): - detach(token) - - payload = self._build_content_payload(span) - self._update_agent_aggregate(span, payload) - agent_content = ( - self._agent_content.get(span.span_id) - if _is_instance_of(span.span_data, AgentSpanData) - else None - ) - - if not (otel_span := self._otel_spans.pop(span.span_id, None)): - # Log attributes even without OTel span - try: - attributes = dict( - self._extract_genai_attributes( - span, payload, agent_content - ) - ) - for key, value in attributes.items(): - logger.debug( - "GenAI attr span %s: %s=%s", span.span_id, key, value - ) - except Exception as e: - logger.warning( - "Failed to extract attributes for span %s: %s", - span.span_id, - e, - ) - if _is_instance_of(span.span_data, AgentSpanData): - self._agent_content.pop(span.span_id, None) - self._span_parents.pop(span.span_id, None) - return - - try: - # Extract and set attributes - attributes: dict[str, AttributeValue] = {} - # Optimize for non-sampled spans to avoid heavy work - if not otel_span.is_recording(): - otel_span.end() - return - for key, value in self._extract_genai_attributes( - span, payload, agent_content - ): - otel_span.set_attribute(key, value) - attributes[key] = value - - if _is_instance_of( - span.span_data, (GenerationSpanData, ResponseSpanData) - ): - operation_name = attributes.get(GEN_AI_OPERATION_NAME) - model_for_name = attributes.get(GEN_AI_REQUEST_MODEL) or ( - attributes.get(GEN_AI_RESPONSE_MODEL) - ) - if operation_name and model_for_name: - agent_name_for_name = attributes.get(GEN_AI_AGENT_NAME) - tool_name_for_name = attributes.get(GEN_AI_TOOL_NAME) - new_name = get_span_name( - operation_name, - model_for_name, - agent_name_for_name, - tool_name_for_name, - ) - if new_name != otel_span.name: - otel_span.update_name(new_name) - - # Emit span events for captured content when configured - self._emit_content_events(span, otel_span, payload, agent_content) - - # Emit operation details event if configured - # Set error status if applicable - otel_span.set_status(status=_get_span_status(span)) - if getattr(span, "error", None): - err_obj = span.error - err_type = err_obj.get("type") or err_obj.get("name") - if err_type: - otel_span.set_attribute("error.type", err_type) - - # Record metrics before ending span - self._record_metrics(span, attributes) - - # End the span - otel_span.end() - - except Exception as e: - logger.warning("Failed to enrich span %s: %s", span.span_id, e) - otel_span.set_status(Status(StatusCode.ERROR, str(e))) - otel_span.end() - finally: - if _is_instance_of(span.span_data, AgentSpanData): - self._agent_content.pop(span.span_id, None) - self._span_parents.pop(span.span_id, None) - - def shutdown(self) -> None: - """Clean up resources on shutdown.""" - for span_id, otel_span in list(self._otel_spans.items()): - otel_span.set_status( - Status(StatusCode.ERROR, "Application shutdown") - ) - otel_span.end() - - for trace_id, root_span in list(self._root_spans.items()): - root_span.set_status( - Status(StatusCode.ERROR, "Application shutdown") - ) - root_span.end() - - self._otel_spans.clear() - self._root_spans.clear() - self._tokens.clear() - self._span_parents.clear() - self._agent_content.clear() - - def force_flush(self) -> None: - """Force flush (no-op for this processor).""" - pass - - def _get_operation_name(self, span_data: Any) -> str: - """Determine operation name from span data type.""" - if _is_instance_of(span_data, GenerationSpanData): - # Check if it's embeddings - if hasattr(span_data, "embedding_dimension"): - return GenAIOperationName.EMBEDDINGS - # Check if it's chat or completion - if span_data.input: - first_input = span_data.input[0] if span_data.input else None - if isinstance(first_input, dict) and "role" in first_input: - return GenAIOperationName.CHAT - return GenAIOperationName.TEXT_COMPLETION - if _is_instance_of(span_data, AgentSpanData): - # The OpenAI Agents SDK AgentSpanData has no "operation" field; - # agent spans always represent invoke_agent. - return GenAIOperationName.INVOKE_AGENT - if _is_instance_of(span_data, FunctionSpanData): - return GenAIOperationName.EXECUTE_TOOL - if _is_instance_of(span_data, ResponseSpanData): - return GenAIOperationName.CHAT # Response typically from chat - if _is_instance_of(span_data, TranscriptionSpanData): - return GenAIOperationName.TRANSCRIPTION - if _is_instance_of(span_data, SpeechSpanData): - return GenAIOperationName.SPEECH - if _is_instance_of(span_data, GuardrailSpanData): - return GenAIOperationName.GUARDRAIL - if _is_instance_of(span_data, HandoffSpanData): - return GenAIOperationName.HANDOFF - return "unknown" - - def _extract_genai_attributes( - self, - span: Span[Any], - payload: ContentPayload, - agent_content: Optional[Dict[str, list[Any]]] = None, - ) -> Iterator[tuple[str, AttributeValue]]: - """Yield (attr, value) pairs for GenAI semantic conventions.""" - span_data = span.span_data - - # Base attributes - yield GEN_AI_PROVIDER_NAME, self.system_name - yield GEN_AI_SYSTEM_KEY, self.system_name - # Legacy emission removed - - # Add configured agent attributes (always include when set) - agent_name_override = self.agent_name or self._agent_name_default - agent_id_override = self.agent_id or self._agent_id_default - agent_desc_override = ( - self.agent_description or self._agent_description_default - ) - if agent_name_override: - yield GEN_AI_AGENT_NAME, agent_name_override - if agent_id_override: - yield GEN_AI_AGENT_ID, agent_id_override - if agent_desc_override: - yield GEN_AI_AGENT_DESCRIPTION, agent_desc_override - - # Server attributes - for key, value in self._get_server_attributes().items(): - yield key, value - - # Process different span types - if _is_instance_of(span_data, GenerationSpanData): - yield from self._get_attributes_from_generation_span_data( - span_data, payload - ) - elif _is_instance_of(span_data, AgentSpanData): - yield from self._get_attributes_from_agent_span_data( - span_data, agent_content - ) - elif _is_instance_of(span_data, FunctionSpanData): - yield from self._get_attributes_from_function_span_data( - span_data, payload - ) - elif _is_instance_of(span_data, ResponseSpanData): - yield from self._get_attributes_from_response_span_data( - span_data, payload - ) - elif _is_instance_of(span_data, TranscriptionSpanData): - yield from self._get_attributes_from_transcription_span_data( - span_data - ) - elif _is_instance_of(span_data, SpeechSpanData): - yield from self._get_attributes_from_speech_span_data(span_data) - elif _is_instance_of(span_data, GuardrailSpanData): - yield from self._get_attributes_from_guardrail_span_data(span_data) - elif _is_instance_of(span_data, HandoffSpanData): - yield from self._get_attributes_from_handoff_span_data(span_data) - - def _get_attributes_from_generation_span_data( - self, span_data: GenerationSpanData, payload: ContentPayload - ) -> Iterator[tuple[str, AttributeValue]]: - """Extract attributes from generation span.""" - # Operation name - operation_name = self._get_operation_name(span_data) - yield GEN_AI_OPERATION_NAME, operation_name - - # Model information - if span_data.model: - yield GEN_AI_REQUEST_MODEL, span_data.model - - # Check for embeddings-specific attributes - if hasattr(span_data, "embedding_dimension"): - yield ( - GEN_AI_EMBEDDINGS_DIMENSION_COUNT, - span_data.embedding_dimension, - ) - - # Check for data source - if hasattr(span_data, "data_source_id"): - yield GEN_AI_DATA_SOURCE_ID, span_data.data_source_id - - finish_reasons: list[Any] = [] - if span_data.output: - for part in span_data.output: - if isinstance(part, dict): - fr = part.get("finish_reason") or part.get("stop_reason") - else: - fr = getattr(part, "finish_reason", None) - if fr: - finish_reasons.append( - fr if isinstance(fr, str) else str(fr) - ) - if finish_reasons: - yield GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons - - # Usage information - if span_data.usage: - usage = span_data.usage - self._sanitize_usage_payload(usage) - if "prompt_tokens" in usage or "input_tokens" in usage: - tokens = usage.get("prompt_tokens") or usage.get( - "input_tokens" - ) - if tokens is not None: - yield GEN_AI_USAGE_INPUT_TOKENS, tokens - if "completion_tokens" in usage or "output_tokens" in usage: - tokens = usage.get("completion_tokens") or usage.get( - "output_tokens" - ) - if tokens is not None: - yield GEN_AI_USAGE_OUTPUT_TOKENS, tokens - - # Model configuration - if span_data.model_config: - mc = span_data.model_config - param_map = { - "temperature": GEN_AI_REQUEST_TEMPERATURE, - "top_p": GEN_AI_REQUEST_TOP_P, - "top_k": GEN_AI_REQUEST_TOP_K, - "max_tokens": GEN_AI_REQUEST_MAX_TOKENS, - "presence_penalty": GEN_AI_REQUEST_PRESENCE_PENALTY, - "frequency_penalty": GEN_AI_REQUEST_FREQUENCY_PENALTY, - "seed": GEN_AI_REQUEST_SEED, - "n": GEN_AI_REQUEST_CHOICE_COUNT, - "stop": GEN_AI_REQUEST_STOP_SEQUENCES, - "encoding_formats": GEN_AI_REQUEST_ENCODING_FORMATS, - } - for k, attr in param_map.items(): - if hasattr(mc, "__contains__") and k in mc: - value = mc[k] - else: - value = getattr(mc, k, None) - if value is not None: - yield attr, value - - if hasattr(mc, "get"): - base_url = ( - mc.get("base_url") - or mc.get("baseUrl") - or mc.get("endpoint") - ) - else: - base_url = ( - getattr(mc, "base_url", None) - or getattr(mc, "baseUrl", None) - or getattr(mc, "endpoint", None) - ) - for key, value in _infer_server_attributes(base_url).items(): - yield key, value - - # Sensitive data capture - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_messages - and payload.input_messages - ): - yield ( - GEN_AI_INPUT_MESSAGES, - safe_json_dumps(payload.input_messages), - ) - - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_system_instructions - and payload.system_instructions - ): - yield ( - GEN_AI_SYSTEM_INSTRUCTIONS, - safe_json_dumps(payload.system_instructions), - ) - - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_messages - and payload.output_messages - ): - yield ( - GEN_AI_OUTPUT_MESSAGES, - safe_json_dumps(payload.output_messages), - ) - - # Output type - yield ( - GEN_AI_OUTPUT_TYPE, - normalize_output_type(self._infer_output_type(span_data)), - ) - - def _merge_content_sequence( - self, - existing: list[Any], - incoming: Sequence[Any], - ) -> list[Any]: - """Merge normalized message/content lists without duplicating snapshots.""" - if not incoming: - return existing - - incoming_list = [self._clone_message(item) for item in incoming] - - if self.include_sensitive_data: - filtered = [ - msg - for msg in incoming_list - if not self._is_placeholder_message(msg) - ] - if filtered: - incoming_list = filtered - - if not existing: - return incoming_list - - result = [self._clone_message(item) for item in existing] - - for idx, new_msg in enumerate(incoming_list): - if idx < len(result): - if ( - self.include_sensitive_data - and self._is_placeholder_message(new_msg) - and not self._is_placeholder_message(result[idx]) - ): - continue - if result[idx] != new_msg: - result[idx] = self._clone_message(new_msg) - else: - if ( - self.include_sensitive_data - and self._is_placeholder_message(new_msg) - ): - if ( - any( - not self._is_placeholder_message(existing_msg) - for existing_msg in result - ) - or new_msg in result - ): - continue - result.append(self._clone_message(new_msg)) - - return result - - def _clone_message(self, message: Any) -> Any: - if isinstance(message, dict): - return { - key: self._clone_message(value) - if isinstance(value, (dict, list)) - else value - for key, value in message.items() - } - if isinstance(message, list): - return [self._clone_message(item) for item in message] - return message - - def _is_placeholder_message(self, message: Any) -> bool: - if not isinstance(message, dict): - return False - parts = message.get("parts") - if not isinstance(parts, list) or not parts: - return False - for part in parts: - if ( - not isinstance(part, dict) - or part.get("type") != "text" - or part.get("content") != "readacted" - ): - return False - return True - - def _get_attributes_from_agent_span_data( - self, - span_data: AgentSpanData, - agent_content: Optional[Dict[str, list[Any]]] = None, - ) -> Iterator[tuple[str, AttributeValue]]: - """Extract attributes from agent span.""" - yield GEN_AI_OPERATION_NAME, self._get_operation_name(span_data) - - name = ( - self.agent_name - or getattr(span_data, "name", None) - or self._agent_name_default - ) - if name: - yield GEN_AI_AGENT_NAME, name - - # agent_id and description are not available on the OpenAI Agents SDK - # AgentSpanData; only use user-configured overrides. - agent_id = self.agent_id or self._agent_id_default - if agent_id: - yield GEN_AI_AGENT_ID, agent_id - - description = self.agent_description or self._agent_description_default - if description: - yield GEN_AI_AGENT_DESCRIPTION, description - - # The OpenAI Agents SDK AgentSpanData has no "model" field; fall back to - # the model aggregated from child generation/response spans. - model = None - if agent_content: - model = agent_content.get("request_model") - if model: - yield GEN_AI_REQUEST_MODEL, model - - if hasattr(span_data, "conversation_id") and span_data.conversation_id: - yield GEN_AI_CONVERSATION_ID, span_data.conversation_id - - # Agent definitions - if self._capture_tool_definitions and hasattr( - span_data, "agent_definitions" - ): - yield ( - GEN_AI_ORCHESTRATOR_AGENT_DEFINITIONS, - safe_json_dumps(span_data.agent_definitions), - ) - - # System instructions from agent definitions - if self._capture_system_instructions and hasattr( - span_data, "agent_definitions" - ): - try: - defs = span_data.agent_definitions - if isinstance(defs, (list, tuple)): - collected: list[dict[str, str]] = [] - for d in defs: - if isinstance(d, dict): - msgs = d.get("messages") or d.get( - "system_messages" - ) - if isinstance(msgs, (list, tuple)): - collected.extend( - self._collect_system_instructions(msgs) - ) - if collected: - yield ( - GEN_AI_SYSTEM_INSTRUCTIONS, - safe_json_dumps(collected), - ) - except Exception: - pass - - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_messages - and agent_content - ): - if agent_content.get("input_messages"): - yield ( - GEN_AI_INPUT_MESSAGES, - safe_json_dumps(agent_content["input_messages"]), - ) - if agent_content.get("output_messages"): - yield ( - GEN_AI_OUTPUT_MESSAGES, - safe_json_dumps(agent_content["output_messages"]), - ) - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_system_instructions - and agent_content - and agent_content.get("system_instructions") - ): - yield ( - GEN_AI_SYSTEM_INSTRUCTIONS, - safe_json_dumps(agent_content["system_instructions"]), - ) - - yield ( - GEN_AI_OUTPUT_TYPE, - normalize_output_type(self._infer_output_type(span_data)), - ) - - def _get_attributes_from_function_span_data( - self, span_data: FunctionSpanData, payload: ContentPayload - ) -> Iterator[tuple[str, AttributeValue]]: - """Extract attributes from function/tool span.""" - yield GEN_AI_OPERATION_NAME, GenAIOperationName.EXECUTE_TOOL - - if span_data.name: - yield GEN_AI_TOOL_NAME, span_data.name - - # Tool type - validate and normalize - tool_type = "function" # Default for function spans - if hasattr(span_data, "tool_type"): - tool_type = span_data.tool_type - yield GEN_AI_TOOL_TYPE, validate_tool_type(tool_type) - - if hasattr(span_data, "call_id") and span_data.call_id: - yield GEN_AI_TOOL_CALL_ID, span_data.call_id - if hasattr(span_data, "description") and span_data.description: - yield GEN_AI_TOOL_DESCRIPTION, span_data.description - - # Tool definitions - if self._capture_tool_definitions and hasattr( - span_data, "tool_definitions" - ): - yield ( - GEN_AI_TOOL_DEFINITIONS, - safe_json_dumps(span_data.tool_definitions), - ) - - # Tool input/output (sensitive) - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and payload.tool_arguments is not None - ): - yield GEN_AI_TOOL_CALL_ARGUMENTS, payload.tool_arguments - - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and payload.tool_result is not None - ): - yield GEN_AI_TOOL_CALL_RESULT, payload.tool_result - - yield ( - GEN_AI_OUTPUT_TYPE, - normalize_output_type(self._infer_output_type(span_data)), - ) - - def _get_attributes_from_response_span_data( - self, span_data: ResponseSpanData, payload: ContentPayload - ) -> Iterator[tuple[str, AttributeValue]]: - """Extract attributes from response span.""" - yield GEN_AI_OPERATION_NAME, GenAIOperationName.CHAT - - # Response information - if span_data.response: - if hasattr(span_data.response, "id") and span_data.response.id: - yield GEN_AI_RESPONSE_ID, span_data.response.id - - # Model from response - if ( - hasattr(span_data.response, "model") - and span_data.response.model - ): - yield GEN_AI_RESPONSE_MODEL, span_data.response.model - if not getattr(span_data, "model", None): - yield GEN_AI_REQUEST_MODEL, span_data.response.model - - # Finish reasons - finish_reasons = [] - if ( - hasattr(span_data.response, "output") - and span_data.response.output - ): - for part in span_data.response.output: - if isinstance(part, dict): - fr = part.get("finish_reason") or part.get( - "stop_reason" - ) - else: - fr = getattr(part, "finish_reason", None) - if fr: - finish_reasons.append(fr) - if finish_reasons: - yield GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons - - # Usage from response - if ( - hasattr(span_data.response, "usage") - and span_data.response.usage - ): - usage = span_data.response.usage - self._sanitize_usage_payload(usage) - input_tokens = getattr(usage, "input_tokens", None) - if input_tokens is None: - input_tokens = getattr(usage, "prompt_tokens", None) - if input_tokens is not None: - yield GEN_AI_USAGE_INPUT_TOKENS, input_tokens - - output_tokens = getattr(usage, "output_tokens", None) - if output_tokens is None: - output_tokens = getattr(usage, "completion_tokens", None) - if output_tokens is not None: - yield GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens - - # Tool definitions from response - if self._capture_tool_definitions and hasattr( - span_data.response, "tools" - ): - yield ( - GEN_AI_TOOL_DEFINITIONS, - safe_json_dumps( - list( - map( - lambda tool: tool.to_dict(), - span_data.response.tools, - ) - ) - ), - ) - - # Input/output messages - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_messages - and payload.input_messages - ): - yield ( - GEN_AI_INPUT_MESSAGES, - safe_json_dumps(payload.input_messages), - ) - - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_system_instructions - and payload.system_instructions - ): - yield ( - GEN_AI_SYSTEM_INSTRUCTIONS, - safe_json_dumps(payload.system_instructions), - ) - - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_messages - and payload.output_messages - ): - yield ( - GEN_AI_OUTPUT_MESSAGES, - safe_json_dumps(payload.output_messages), - ) - - yield ( - GEN_AI_OUTPUT_TYPE, - normalize_output_type(self._infer_output_type(span_data)), - ) - - def _get_attributes_from_transcription_span_data( - self, span_data: TranscriptionSpanData - ) -> Iterator[tuple[str, AttributeValue]]: - """Extract attributes from transcription span.""" - yield GEN_AI_OPERATION_NAME, GenAIOperationName.TRANSCRIPTION - - if hasattr(span_data, "model") and span_data.model: - yield GEN_AI_REQUEST_MODEL, span_data.model - - # Audio format - if hasattr(span_data, "format") and span_data.format: - yield "gen_ai.audio.input.format", span_data.format - - # Transcript (sensitive) - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_messages - and hasattr(span_data, "transcript") - ): - yield "gen_ai.transcription.text", span_data.transcript - - yield ( - GEN_AI_OUTPUT_TYPE, - normalize_output_type(self._infer_output_type(span_data)), - ) - - def _get_attributes_from_speech_span_data( - self, span_data: SpeechSpanData - ) -> Iterator[tuple[str, AttributeValue]]: - """Extract attributes from speech span.""" - yield GEN_AI_OPERATION_NAME, GenAIOperationName.SPEECH - - if hasattr(span_data, "model") and span_data.model: - yield GEN_AI_REQUEST_MODEL, span_data.model - - if hasattr(span_data, "voice") and span_data.voice: - yield "gen_ai.speech.voice", span_data.voice - - if hasattr(span_data, "format") and span_data.format: - yield "gen_ai.audio.output.format", span_data.format - - # Input text (sensitive) - if ( - self.include_sensitive_data - and self._content_mode.capture_in_span - and self._capture_messages - and hasattr(span_data, "input_text") - ): - yield "gen_ai.speech.input_text", span_data.input_text - - yield ( - GEN_AI_OUTPUT_TYPE, - normalize_output_type(self._infer_output_type(span_data)), - ) - - def _get_attributes_from_guardrail_span_data( - self, span_data: GuardrailSpanData - ) -> Iterator[tuple[str, AttributeValue]]: - """Extract attributes from guardrail span.""" - yield GEN_AI_OPERATION_NAME, GenAIOperationName.GUARDRAIL - - if span_data.name: - yield GEN_AI_GUARDRAIL_NAME, span_data.name - - yield GEN_AI_GUARDRAIL_TRIGGERED, span_data.triggered - yield ( - GEN_AI_OUTPUT_TYPE, - normalize_output_type(self._infer_output_type(span_data)), - ) - - def _get_attributes_from_handoff_span_data( - self, span_data: HandoffSpanData - ) -> Iterator[tuple[str, AttributeValue]]: - """Extract attributes from handoff span.""" - yield GEN_AI_OPERATION_NAME, GenAIOperationName.HANDOFF - - if span_data.from_agent: - yield GEN_AI_HANDOFF_FROM_AGENT, span_data.from_agent - - if span_data.to_agent: - yield GEN_AI_HANDOFF_TO_AGENT, span_data.to_agent - - yield ( - GEN_AI_OUTPUT_TYPE, - normalize_output_type(self._infer_output_type(span_data)), - ) - - def _cleanup_spans_for_trace(self, trace_id: str) -> None: - """Clean up spans for a trace to prevent memory leaks.""" - spans_to_remove = [ - span_id - for span_id in self._otel_spans.keys() - if span_id.startswith(trace_id) - ] - for span_id in spans_to_remove: - if otel_span := self._otel_spans.pop(span_id, None): - otel_span.set_status( - Status( - StatusCode.ERROR, "Trace ended before span completion" - ) - ) - otel_span.end() - self._tokens.pop(span_id, None) - - -__all__ = [ - "GenAIProvider", - "GenAIOperationName", - "GenAIToolType", - "GenAIOutputType", - "GenAIEvaluationAttributes", - "ContentCaptureMode", - "ContentPayload", - "GenAISemanticProcessor", - "normalize_provider", - "normalize_output_type", - "validate_tool_type", -] diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/conformance/orchestration.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/conformance/orchestration.py index b045d20e..5b5517fa 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/conformance/orchestration.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/conformance/orchestration.py @@ -3,11 +3,16 @@ """Conformance scenario: triage agent hands off to a specialist that uses a tool. -Exercises the three common agent shapes in a single ``Runner.run``: +Exercises the agent-orchestration shapes this instrumentation owns in a +single ``Runner.run``: -- Basic agent invocation (``invoke_agent`` + ``chat`` from the Responses API). +- Basic agent invocation (``invoke_agent``). - Multi-agent handoff (a second ``invoke_agent`` after the triage step). - Function tool execution (``execute_tool``) inside the specialist agent. + +The underlying ``chat`` / ``responses`` spans for the LLM calls are +produced by ``opentelemetry-instrumentation-genai-openai`` when it is +installed and is not exercised here. """ from __future__ import annotations @@ -26,7 +31,10 @@ from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.trace import TracerProvider from opentelemetry.test.weaver_live_check import LiveCheckReport -from opentelemetry.test_util_genai.conformance import Scenario +from opentelemetry.test_util_genai.conformance import ( + ExpectedViolation, + Scenario, +) from opentelemetry.test_util_genai.instrumentor import instrument DEFAULT_MODEL = "gpt-4o-mini" @@ -63,13 +71,20 @@ def _build_triage_agent() -> Agent: class OrchestrationScenario(Scenario): expected_spans = ( + "invoke_workflow", "invoke_agent", - "chat", "execute_tool", ) - expected_metrics = ( - "gen_ai.client.operation.duration", - "gen_ai.client.token.usage", + expected_metrics = ("gen_ai.client.operation.duration",) + expected_violations = ( + # `FunctionSpanData` in the openai-agents library doesn't expose + # `tool_call_id`, so our `execute_tool` spans can't set + # `gen_ai.tool.call.id`. Tracked in + # https://github.com/open-telemetry/opentelemetry-python-genai/issues/86 + ExpectedViolation( + advice_id="genai_expected_attribute_missing", + message_substring="gen_ai.tool.call.id", + ), ) def run( @@ -124,10 +139,6 @@ def validate(self, report: LiveCheckReport) -> None: "Orchestration involves a triage agent handing off to a specialist; " f"expected at least two invoke_agent spans, saw {operations}" ) - assert operations.count("chat") >= 2, ( - "Each agent issues at least one Responses-API call; " - f"expected at least two chat spans, saw {operations}" - ) assert operations.count("execute_tool") >= 1, ( "Specialist agent calls the get_weather function tool; " f"expected at least one execute_tool span, saw {operations}" diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/conftest.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/conftest.py index 4f834bd4..76ddaae4 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/conftest.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/conftest.py @@ -5,7 +5,11 @@ import pytest -from opentelemetry.test_util_genai.vcr import scrub_response_headers_overwrite +pytest.register_assert_rewrite("opentelemetry.test_util_genai.vcr") + +from opentelemetry.test_util_genai.vcr import ( # noqa: E402 + scrub_response_headers_overwrite, +) pytest_plugins = [ "opentelemetry.test_util_genai.fixtures", diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_conformance.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_conformance.py index 6a38496b..e26c783e 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_conformance.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_conformance.py @@ -25,18 +25,7 @@ @pytest.mark.parametrize( "scenario", - [ - pytest.param( - OrchestrationScenario(), - marks=pytest.mark.skip( - reason=( - "openai-agents instrumentation has multiple semconv gaps " - "surfaced by this scenario; tracked in " - "https://github.com/open-telemetry/opentelemetry-python-genai/issues/86" - ) - ), - ), - ], + [pytest.param(OrchestrationScenario())], ids=lambda s: type(s).__name__, ) def test_conformance( diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_instrumentor.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_instrumentor.py new file mode 100644 index 00000000..d1a6f2bf --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_instrumentor.py @@ -0,0 +1,107 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import agents.tracing + +from opentelemetry.instrumentation.genai.openai_agents import ( + OpenAIAgentsInstrumentor, +) +from opentelemetry.instrumentation.genai.openai_agents.package import ( + _instruments, +) +from opentelemetry.instrumentation.genai.openai_agents.processor import ( + GenAITracingProcessor, +) + + +def _registered_processors() -> tuple: + provider = agents.tracing.get_trace_provider() + multi = getattr(provider, "_multi_processor", None) + return tuple(getattr(multi, "_processors", ())) + + +def _our_processors(): + return [ + p + for p in _registered_processors() + if isinstance(p, GenAITracingProcessor) + ] + + +def test_instrumentation_dependencies_exposed() -> None: + instrumentor = OpenAIAgentsInstrumentor() + assert instrumentor.instrumentation_dependencies() == _instruments + + +def test_instrument_adds_processor_alongside_default() -> None: + instrumentor = OpenAIAgentsInstrumentor() + pre_count = len(_registered_processors()) + try: + instrumentor.instrument() + post = _registered_processors() + # Default processor stays in place, ours is appended. + assert len(post) == pre_count + 1 + assert len(_our_processors()) == 1 + finally: + instrumentor.uninstrument() + assert len(_our_processors()) == 0 + + +def test_instrument_with_disable_openai_trace_export_replaces_processors() -> ( + None +): + # Make sure the default processor is registered before we start, + # so the "replace" behavior is observable. + agents.tracing.set_trace_processors( + [agents.tracing.processors.default_processor()] + ) + instrumentor = OpenAIAgentsInstrumentor() + try: + instrumentor.instrument(disable_openai_trace_export=True) + post = _registered_processors() + assert len(post) == 1 + assert isinstance(post[0], GenAITracingProcessor) + finally: + instrumentor.uninstrument() + + +def test_uninstrument_restores_processors_in_replace_mode() -> None: + baseline = [ + agents.tracing.processors.default_processor(), + agents.tracing.processors.default_processor(), + ] + agents.tracing.set_trace_processors(baseline) + + instrumentor = OpenAIAgentsInstrumentor() + try: + instrumentor.instrument(disable_openai_trace_export=True) + replaced = _registered_processors() + assert len(replaced) == 1 + assert isinstance(replaced[0], GenAITracingProcessor) + finally: + instrumentor.uninstrument() + + restored = _registered_processors() + assert restored == tuple(baseline) + + +def test_double_instrument_is_noop() -> None: + instrumentor = OpenAIAgentsInstrumentor() + try: + instrumentor.instrument() + first = _our_processors() + instrumentor.instrument() + second = _our_processors() + assert len(first) == 1 and len(second) == 1 + assert first[0] is second[0] + finally: + instrumentor.uninstrument() + + +def test_double_uninstrument_is_noop() -> None: + instrumentor = OpenAIAgentsInstrumentor() + instrumentor.instrument() + instrumentor.uninstrument() + instrumentor.uninstrument() # must not raise diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_processor.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_processor.py new file mode 100644 index 00000000..7b493bd5 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_processor.py @@ -0,0 +1,196 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import gc +from typing import Any +from unittest.mock import MagicMock + +from agents.tracing.span_data import ( + AgentSpanData, + FunctionSpanData, + GenerationSpanData, + HandoffSpanData, + ResponseSpanData, +) + +from opentelemetry.instrumentation.genai.openai_agents.processor import ( + GenAITracingProcessor, +) +from opentelemetry.util.genai.invocation import ToolInvocation + + +class _Span: + """Minimal stand-in for agents-library Span (must be weakref-able).""" + + def __init__(self, span_data: Any) -> None: + self.span_data = span_data + # Span objects in tests don't need a span_id since the processor + # keys its WeakKeyDictionary by the Span instance itself, but + # keep one around for parity with the real class. + self.span_id = f"span-{id(self)}" + + +class _Trace: + """Minimal stand-in for agents-library Trace.""" + + def __init__(self, trace_id: str, name: str) -> None: + self.trace_id = trace_id + self.name = name + + +def _build_handler() -> MagicMock: + return MagicMock() + + +def test_trace_start_end_creates_and_stops_workflow() -> None: + handler = _build_handler() + handler.workflow.return_value = MagicMock(attributes={}) + processor = GenAITracingProcessor(handler, provider="openai") + trace = _Trace("trace-1", "Agent workflow") + + processor.on_trace_start(trace) + handler.workflow.assert_called_once_with(name="Agent workflow") + workflow_invocation = handler.workflow.return_value + assert ( + workflow_invocation.attributes["gen_ai.workflow.name"] + == "Agent workflow" + ) + + processor.on_trace_end(trace) + workflow_invocation.stop.assert_called_once_with() + + +def test_agent_span_creates_invoke_local_agent() -> None: + handler = _build_handler() + processor = GenAITracingProcessor(handler, provider="openai") + span = _Span(AgentSpanData(name="triage")) + + processor.on_span_start(span) + handler.invoke_local_agent.assert_called_once_with( + provider="openai", agent_name="triage" + ) + + processor.on_span_end(span) + handler.invoke_local_agent.return_value.stop.assert_called_once_with() + + +def test_function_span_creates_tool_invocation_and_sets_provider_metric() -> ( + None +): + handler = _build_handler() + handler.tool.return_value = MagicMock( + spec=ToolInvocation, metric_attributes={} + ) + processor = GenAITracingProcessor(handler, provider="openai") + span = _Span( + FunctionSpanData( + name="get_weather", + input='{"city":"BCN"}', + output=None, + ) + ) + + processor.on_span_start(span) + handler.tool.assert_called_once_with( + name="get_weather", + tool_type="function", + ) + tool_invocation = handler.tool.return_value + + assert tool_invocation.arguments == '{"city":"BCN"}' + assert ( + tool_invocation.metric_attributes["gen_ai.provider.name"] == "openai" + ) + + # Output gets populated on the agents library span_data after the + # tool runs; our on_span_end reads it. + span.span_data.output = "sunny" + processor.on_span_end(span) + assert tool_invocation.tool_result == "sunny" + tool_invocation.stop.assert_called_once_with() + + +def test_function_span_without_output_still_stops() -> None: + handler = _build_handler() + handler.tool.return_value = MagicMock( + spec=ToolInvocation, metric_attributes={} + ) + processor = GenAITracingProcessor(handler, provider="openai") + span = _Span(FunctionSpanData(name="noop", input=None, output=None)) + + processor.on_span_start(span) + processor.on_span_end(span) + tool_invocation = handler.tool.return_value + # tool_result stays as whatever MagicMock default; what matters is + # we didn't crash and we stopped. + tool_invocation.stop.assert_called_once_with() + + +def test_generation_and_response_spans_ignored() -> None: + handler = _build_handler() + processor = GenAITracingProcessor(handler, provider="openai") + + for span_data in ( + GenerationSpanData(model="gpt-4o-mini"), + ResponseSpanData(), + ): + span = _Span(span_data) + processor.on_span_start(span) + processor.on_span_end(span) + + handler.invoke_local_agent.assert_not_called() + handler.tool.assert_not_called() + handler.inference.assert_not_called() + + +def test_handoff_emits_raw_span() -> None: + handler = _build_handler() + processor = GenAITracingProcessor(handler, provider="openai") + span = _Span( + HandoffSpanData(from_agent="triage", to_agent="weather_specialist") + ) + # Doesn't raise; the actual OTel span emission is verified end-to-end + # by the conformance scenario. + processor.on_span_start(span) + processor.on_span_end(span) + + +def test_shutdown_stops_open_invocations() -> None: + handler = _build_handler() + handler.tool.return_value = MagicMock( + spec=ToolInvocation, metric_attributes={} + ) + processor = GenAITracingProcessor(handler, provider="openai") + # Hold strong references to the trace / span objects so the + # WeakKeyDictionary entries survive until shutdown runs. + trace = _Trace("t", "wf") + agent_span = _Span(AgentSpanData(name="agent")) + tool_span = _Span( + FunctionSpanData(name="get_weather", input=None, output=None) + ) + processor.on_trace_start(trace) + processor.on_span_start(agent_span) + processor.on_span_start(tool_span) + assert len(processor._invocations) == 3 + + processor.shutdown() + + handler.workflow.return_value.stop.assert_called_once_with() + handler.invoke_local_agent.return_value.stop.assert_called_once_with() + handler.tool.return_value.stop.assert_called_once_with() + assert len(processor._invocations) == 0 + + +def test_state_uses_weakref_so_dropped_spans_are_collected() -> None: + handler = _build_handler() + processor = GenAITracingProcessor(handler, provider="openai") + span: _Span | None = _Span(AgentSpanData(name="triage")) + processor.on_span_start(span) + assert len(processor._invocations) == 1 + + span = None # drop the only strong reference + gc.collect() + + assert len(processor._invocations) == 0 diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py deleted file mode 100644 index 72379388..00000000 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_tracer.py +++ /dev/null @@ -1,551 +0,0 @@ -# Copyright The OpenTelemetry Authors -# SPDX-License-Identifier: Apache-2.0 - -# pylint: disable=wrong-import-position,wrong-import-order,import-error,no-name-in-module,unexpected-keyword-arg,no-value-for-parameter,redefined-outer-name - -from __future__ import annotations - -import json -from types import SimpleNamespace -from typing import Any - -import agents.tracing as agents_tracing -from agents.tracing import ( - agent_span, - function_span, - generation_span, - response_span, - set_trace_processors, - trace, -) -from openai.types.responses import FunctionTool # noqa: E402 - -from opentelemetry.instrumentation.genai.openai_agents import ( # noqa: E402 - OpenAIAgentsInstrumentor, -) -from opentelemetry.instrumentation.genai.openai_agents.span_processor import ( # noqa: E402 - ContentPayload, - GenAISemanticProcessor, -) -from opentelemetry.sdk.trace import TracerProvider # noqa: E402 - -try: - from opentelemetry.sdk.trace.export import ( # type: ignore[attr-defined] - InMemorySpanExporter, - SimpleSpanProcessor, - ) -except ImportError: # pragma: no cover - support older/newer SDK layouts - from opentelemetry.sdk.trace.export import ( - SimpleSpanProcessor, # noqa: E402 - ) - from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( # noqa: E402 - InMemorySpanExporter, - ) -from opentelemetry.semconv._incubating.attributes import ( # noqa: E402 - gen_ai_attributes as GenAI, -) -from opentelemetry.semconv._incubating.attributes import ( # noqa: E402 - server_attributes as ServerAttributes, -) -from opentelemetry.trace import SpanKind # noqa: E402 - -GEN_AI_PROVIDER_NAME = GenAI.GEN_AI_PROVIDER_NAME -GEN_AI_INPUT_MESSAGES = getattr( - GenAI, "GEN_AI_INPUT_MESSAGES", "gen_ai.input.messages" -) -GEN_AI_OUTPUT_MESSAGES = getattr( - GenAI, "GEN_AI_OUTPUT_MESSAGES", "gen_ai.output.messages" -) -GEN_AI_TOOL_DEFINITIONS = getattr( - GenAI, "GEN_AI_TOOL_DEFINITIONS", "gen_ai.tool.definitions" -) - - -def _instrument_with_provider(**instrument_kwargs): - set_trace_processors([]) - provider = TracerProvider() - exporter = InMemorySpanExporter() - provider.add_span_processor(SimpleSpanProcessor(exporter)) - - instrumentor = OpenAIAgentsInstrumentor() - instrumentor.instrument(tracer_provider=provider, **instrument_kwargs) - - return instrumentor, exporter - - -def test_generation_span_creates_client_span(): - instrumentor, exporter = _instrument_with_provider() - - try: - with trace("workflow"): - with generation_span( - input=[{"role": "user", "content": "hi"}], - model="gpt-4o-mini", - model_config={ - "temperature": 0.2, - "base_url": "https://api.openai.com", - }, - usage={"input_tokens": 12, "output_tokens": 3}, - ): - pass - - spans = exporter.get_finished_spans() - client_span = next( - span for span in spans if span.kind is SpanKind.CLIENT - ) - - assert client_span.attributes[GEN_AI_PROVIDER_NAME] == "openai" - assert client_span.attributes[GenAI.GEN_AI_OPERATION_NAME] == "chat" - assert ( - client_span.attributes[GenAI.GEN_AI_REQUEST_MODEL] == "gpt-4o-mini" - ) - assert client_span.name == "chat gpt-4o-mini" - assert client_span.attributes[GenAI.GEN_AI_USAGE_INPUT_TOKENS] == 12 - assert client_span.attributes[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS] == 3 - assert ( - client_span.attributes[ServerAttributes.SERVER_ADDRESS] - == "api.openai.com" - ) - finally: - instrumentor.uninstrument() - exporter.clear() - - -def test_generation_span_without_roles_uses_text_completion(): - instrumentor, exporter = _instrument_with_provider() - - try: - with trace("workflow"): - with generation_span( - input=[{"content": "tell me a joke"}], - model="gpt-4o-mini", - model_config={"temperature": 0.7}, - ): - pass - - spans = exporter.get_finished_spans() - completion_span = next( - span - for span in spans - if span.attributes[GenAI.GEN_AI_OPERATION_NAME] - == GenAI.GenAiOperationNameValues.TEXT_COMPLETION.value - ) - - assert completion_span.kind is SpanKind.CLIENT - assert completion_span.name == "text_completion gpt-4o-mini" - assert ( - completion_span.attributes[GenAI.GEN_AI_REQUEST_MODEL] - == "gpt-4o-mini" - ) - finally: - instrumentor.uninstrument() - exporter.clear() - - -def test_function_span_records_tool_attributes(): - instrumentor, exporter = _instrument_with_provider() - - try: - with trace("workflow"): - with function_span( - name="fetch_weather", input='{"city": "Paris"}' - ): - pass - - spans = exporter.get_finished_spans() - tool_span = next( - span for span in spans if span.kind is SpanKind.INTERNAL - ) - - assert ( - tool_span.attributes[GenAI.GEN_AI_OPERATION_NAME] == "execute_tool" - ) - assert tool_span.attributes[GenAI.GEN_AI_TOOL_NAME] == "fetch_weather" - assert tool_span.attributes[GenAI.GEN_AI_TOOL_TYPE] == "function" - assert tool_span.attributes[GEN_AI_PROVIDER_NAME] == "openai" - finally: - instrumentor.uninstrument() - exporter.clear() - - -def test_agent_invoke_span_records_attributes(): - instrumentor, exporter = _instrument_with_provider() - - try: - with trace("workflow"): - with agent_span( - name="support_bot", - handoffs=["escalation_bot"], - tools=["search"], - output_type="str", - ): - pass - - spans = exporter.get_finished_spans() - invoke_span = next( - span - for span in spans - if span.attributes[GenAI.GEN_AI_OPERATION_NAME] - == GenAI.GenAiOperationNameValues.INVOKE_AGENT.value - ) - - assert invoke_span.kind is SpanKind.CLIENT - assert invoke_span.name == "invoke_agent support_bot" - assert invoke_span.attributes[GEN_AI_PROVIDER_NAME] == "openai" - assert invoke_span.attributes[GenAI.GEN_AI_AGENT_NAME] == "support_bot" - finally: - instrumentor.uninstrument() - exporter.clear() - - -def _placeholder_message() -> dict[str, Any]: - return { - "role": "user", - "parts": [{"type": "text", "content": "readacted"}], - } - - -def test_normalize_messages_skips_empty_when_sensitive_enabled(): - processor = GenAISemanticProcessor(metrics_enabled=False) - normalized = processor._normalize_messages_to_role_parts( - [{"role": "user", "content": None}] - ) - assert not normalized - - -def test_normalize_messages_emits_placeholder_when_sensitive_disabled(): - processor = GenAISemanticProcessor( - include_sensitive_data=False, metrics_enabled=False - ) - normalized = processor._normalize_messages_to_role_parts( - [{"role": "user", "content": None}] - ) - assert normalized == [_placeholder_message()] - - -def test_agent_content_aggregation_skips_duplicate_snapshots(): - processor = GenAISemanticProcessor(metrics_enabled=False) - agent_id = "agent-span" - processor._agent_content[agent_id] = { - "input_messages": [], - "output_messages": [], - "system_instructions": [], - } - - payload = ContentPayload( - input_messages=[ - {"role": "user", "parts": [{"type": "text", "content": "hello"}]}, - { - "role": "user", - "parts": [{"type": "text", "content": "readacted"}], - }, - ] - ) - - processor._update_agent_aggregate( - SimpleNamespace(span_id="child-1", parent_id=agent_id, span_data=None), - payload, - ) - processor._update_agent_aggregate( - SimpleNamespace(span_id="child-2", parent_id=agent_id, span_data=None), - payload, - ) - - aggregated = processor._agent_content[agent_id]["input_messages"] - assert aggregated == [ - {"role": "user", "parts": [{"type": "text", "content": "hello"}]} - ] - # ensure data copied rather than reused to prevent accidental mutation - assert aggregated is not payload.input_messages - - -def test_agent_content_aggregation_filters_placeholder_append_when_sensitive(): - processor = GenAISemanticProcessor(metrics_enabled=False) - agent_id = "agent-span" - processor._agent_content[agent_id] = { - "input_messages": [], - "output_messages": [], - "system_instructions": [], - } - - initial_payload = ContentPayload( - input_messages=[ - {"role": "user", "parts": [{"type": "text", "content": "hello"}]} - ] - ) - processor._update_agent_aggregate( - SimpleNamespace(span_id="child-1", parent_id=agent_id, span_data=None), - initial_payload, - ) - - placeholder_payload = ContentPayload( - input_messages=[_placeholder_message()] - ) - processor._update_agent_aggregate( - SimpleNamespace(span_id="child-2", parent_id=agent_id, span_data=None), - placeholder_payload, - ) - - aggregated = processor._agent_content[agent_id]["input_messages"] - assert aggregated == [ - {"role": "user", "parts": [{"type": "text", "content": "hello"}]} - ] - - -def test_agent_content_aggregation_retains_placeholder_when_sensitive_disabled(): - processor = GenAISemanticProcessor( - include_sensitive_data=False, metrics_enabled=False - ) - agent_id = "agent-span" - processor._agent_content[agent_id] = { - "input_messages": [], - "output_messages": [], - "system_instructions": [], - } - - placeholder_payload = ContentPayload( - input_messages=[_placeholder_message()] - ) - processor._update_agent_aggregate( - SimpleNamespace(span_id="child-1", parent_id=agent_id, span_data=None), - placeholder_payload, - ) - - aggregated = processor._agent_content[agent_id]["input_messages"] - assert aggregated == [_placeholder_message()] - - -def test_agent_content_aggregation_appends_new_messages_once(): - processor = GenAISemanticProcessor(metrics_enabled=False) - agent_id = "agent-span" - processor._agent_content[agent_id] = { - "input_messages": [], - "output_messages": [], - "system_instructions": [], - } - - initial_payload = ContentPayload( - input_messages=[ - {"role": "user", "parts": [{"type": "text", "content": "hello"}]} - ] - ) - processor._update_agent_aggregate( - SimpleNamespace(span_id="child-1", parent_id=agent_id, span_data=None), - initial_payload, - ) - - extended_messages = [ - {"role": "user", "parts": [{"type": "text", "content": "hello"}]}, - { - "role": "assistant", - "parts": [{"type": "text", "content": "hi there"}], - }, - ] - extended_payload = ContentPayload(input_messages=extended_messages) - processor._update_agent_aggregate( - SimpleNamespace(span_id="child-2", parent_id=agent_id, span_data=None), - extended_payload, - ) - - aggregated = processor._agent_content[agent_id]["input_messages"] - assert aggregated == extended_messages - assert extended_payload.input_messages == extended_messages - - -def test_agent_span_collects_child_messages(): - instrumentor, exporter = _instrument_with_provider() - - try: - provider = agents_tracing.get_trace_provider() - - with trace("workflow") as workflow: - agent_span_obj = provider.create_span( - agents_tracing.AgentSpanData(name="helper"), - parent=workflow, - ) - agent_span_obj.start() - - generation = agents_tracing.GenerationSpanData( - input=[{"role": "user", "content": "hi"}], - output=[{"type": "text", "content": "hello"}], - model="gpt-4o-mini", - ) - gen_span = provider.create_span(generation, parent=agent_span_obj) - gen_span.start() - gen_span.finish() - - agent_span_obj.finish() - - spans = exporter.get_finished_spans() - agent_span = next( - span - for span in spans - if span.attributes.get(GenAI.GEN_AI_OPERATION_NAME) - == GenAI.GenAiOperationNameValues.INVOKE_AGENT.value - ) - - prompt = json.loads(agent_span.attributes[GEN_AI_INPUT_MESSAGES]) - completion = json.loads(agent_span.attributes[GEN_AI_OUTPUT_MESSAGES]) - - assert prompt == [ - { - "role": "user", - "parts": [{"type": "text", "content": "hi"}], - } - ] - assert completion == [ - { - "role": "assistant", - "parts": [{"type": "text", "content": "hello"}], - } - ] - - assert not agent_span.events - finally: - instrumentor.uninstrument() - exporter.clear() - - -def test_agent_name_override_applied_to_agent_spans(): - instrumentor, exporter = _instrument_with_provider( - agent_name="Travel Concierge" - ) - - try: - with trace("workflow"): - with agent_span(name="support_bot"): - pass - - spans = exporter.get_finished_spans() - agent_span_record = next( - span - for span in spans - if span.attributes[GenAI.GEN_AI_OPERATION_NAME] - == GenAI.GenAiOperationNameValues.INVOKE_AGENT.value - ) - - assert agent_span_record.kind is SpanKind.CLIENT - assert agent_span_record.name == "invoke_agent Travel Concierge" - assert ( - agent_span_record.attributes[GenAI.GEN_AI_AGENT_NAME] - == "Travel Concierge" - ) - finally: - instrumentor.uninstrument() - exporter.clear() - - -def test_capture_mode_can_be_disabled(): - instrumentor, exporter = _instrument_with_provider( - capture_message_content="no_content" - ) - - try: - with trace("workflow"): - with generation_span( - input=[{"role": "user", "content": "hi"}], - output=[{"role": "assistant", "content": "hello"}], - model="gpt-4o-mini", - ): - pass - - spans = exporter.get_finished_spans() - client_span = next( - span for span in spans if span.kind is SpanKind.CLIENT - ) - - assert GEN_AI_INPUT_MESSAGES not in client_span.attributes - assert GEN_AI_OUTPUT_MESSAGES not in client_span.attributes - for event in client_span.events: - assert GEN_AI_INPUT_MESSAGES not in event.attributes - assert GEN_AI_OUTPUT_MESSAGES not in event.attributes - finally: - instrumentor.uninstrument() - exporter.clear() - - -def test_response_span_records_response_attributes(): - instrumentor, exporter = _instrument_with_provider() - - class _Usage: - def __init__(self, input_tokens: int, output_tokens: int) -> None: - self.input_tokens = input_tokens - self.output_tokens = output_tokens - - class _Response: - def __init__(self) -> None: - self.id = "resp-123" - self.instructions = "You are a helpful assistant." - self.model = "gpt-4o-mini" - self.usage = _Usage(42, 9) - self.tools = [ - FunctionTool( - name="get_current_weather", - type="function", - description="Get the current weather in a given location", - parameters={ - "type": "object", - "properties": { - "location": { - "title": "Location", - "type": "string", - }, - }, - "required": ["location"], - }, - ) - ] - self.output = [{"finish_reason": "stop"}] - - try: - with trace("workflow"): - with response_span(response=_Response()): - pass - - spans = exporter.get_finished_spans() - response = next( - span - for span in spans - if span.attributes[GenAI.GEN_AI_OPERATION_NAME] - == GenAI.GenAiOperationNameValues.CHAT.value - ) - - assert response.kind is SpanKind.CLIENT - assert response.name == "chat gpt-4o-mini" - assert response.attributes[GEN_AI_PROVIDER_NAME] == "openai" - assert response.attributes[GenAI.GEN_AI_RESPONSE_ID] == "resp-123" - assert ( - response.attributes[GenAI.GEN_AI_RESPONSE_MODEL] == "gpt-4o-mini" - ) - assert response.attributes[GenAI.GEN_AI_USAGE_INPUT_TOKENS] == 42 - assert response.attributes[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS] == 9 - assert response.attributes[GenAI.GEN_AI_RESPONSE_FINISH_REASONS] == ( - "stop", - ) - - system_instructions = json.loads( - response.attributes[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] - ) - assert system_instructions == [ - {"type": "text", "content": "You are a helpful assistant."} - ] - tool_definitions = json.loads( - response.attributes[GEN_AI_TOOL_DEFINITIONS] - ) - assert tool_definitions == [ - { - "type": "function", - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": {"title": "Location", "type": "string"}, - }, - "required": ["location"], - }, - } - ] - finally: - instrumentor.uninstrument() - exporter.clear() diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_instrumentor_behaviors.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_instrumentor_behaviors.py deleted file mode 100644 index 33aec0f4..00000000 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_instrumentor_behaviors.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright The OpenTelemetry Authors -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from agents.tracing import ( - get_trace_provider, - set_trace_processors, -) - -from opentelemetry.instrumentation.genai.openai_agents import ( - OpenAIAgentsInstrumentor, -) -from opentelemetry.instrumentation.genai.openai_agents.package import ( - _instruments, -) -from opentelemetry.sdk.trace import TracerProvider - - -def test_double_instrument_is_noop(): - set_trace_processors([]) - provider = TracerProvider() - instrumentor = OpenAIAgentsInstrumentor() - - instrumentor.instrument(tracer_provider=provider) - trace_provider = get_trace_provider() - assert len(trace_provider._multi_processor._processors) == 1 - - instrumentor.instrument(tracer_provider=provider) - assert len(trace_provider._multi_processor._processors) == 1 - - instrumentor.uninstrument() - instrumentor.uninstrument() - set_trace_processors([]) - - -def test_instrumentation_dependencies_exposed(): - instrumentor = OpenAIAgentsInstrumentor() - assert instrumentor.instrumentation_dependencies() == _instruments - - -def test_default_agent_configuration(): - set_trace_processors([]) - provider = TracerProvider() - instrumentor = OpenAIAgentsInstrumentor() - - try: - instrumentor.instrument(tracer_provider=provider) - processor = instrumentor._processor - assert processor is not None - assert getattr(processor, "_agent_name_default") == "OpenAI Agent" - assert getattr(processor, "_agent_id_default") == "agent" - assert ( - getattr(processor, "_agent_description_default") - == "OpenAI Agents instrumentation" - ) - assert processor.base_url == "https://api.openai.com" - assert processor.server_address == "api.openai.com" - assert processor.server_port == 443 - finally: - instrumentor.uninstrument() - set_trace_processors([]) diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_span_processor_unit.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_span_processor_unit.py deleted file mode 100644 index 0a2d2cda..00000000 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_z_span_processor_unit.py +++ /dev/null @@ -1,547 +0,0 @@ -# Copyright The OpenTelemetry Authors -# SPDX-License-Identifier: Apache-2.0 - -# pylint: disable=wrong-import-position,wrong-import-order,import-error,no-name-in-module,unexpected-keyword-arg,no-value-for-parameter,redefined-outer-name,too-many-locals,too-many-statements,too-many-branches - -from __future__ import annotations - -import importlib -import json -from dataclasses import dataclass -from datetime import datetime, timezone -from enum import Enum -from types import SimpleNamespace -from typing import Any - -import pytest -from agents.tracing import ( - AgentSpanData, - FunctionSpanData, - GenerationSpanData, - ResponseSpanData, -) - -import opentelemetry.semconv._incubating.attributes.gen_ai_attributes as _gen_ai_attributes -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.semconv._incubating.attributes import ( - server_attributes as _server_attributes, -) -from opentelemetry.trace import SpanKind -from opentelemetry.trace.status import StatusCode - - -def _ensure_semconv_enums() -> None: - if not hasattr(_gen_ai_attributes, "GenAiProviderNameValues"): - - class _GenAiProviderNameValues(Enum): - OPENAI = "openai" - GCP_GEN_AI = "gcp.gen_ai" - GCP_VERTEX_AI = "gcp.vertex_ai" - GCP_GEMINI = "gcp.gemini" - ANTHROPIC = "anthropic" - COHERE = "cohere" - AZURE_AI_INFERENCE = "azure.ai.inference" - AZURE_AI_OPENAI = "azure.ai.openai" - IBM_WATSONX_AI = "ibm.watsonx.ai" - AWS_BEDROCK = "aws.bedrock" - PERPLEXITY = "perplexity" - X_AI = "x.ai" - DEEPSEEK = "deepseek" - GROQ = "groq" - MISTRAL_AI = "mistral.ai" - - class _GenAiOperationNameValues(Enum): - CHAT = "chat" - GENERATE_CONTENT = "generate_content" - TEXT_COMPLETION = "text_completion" - EMBEDDINGS = "embeddings" - CREATE_AGENT = "create_agent" - INVOKE_AGENT = "invoke_agent" - EXECUTE_TOOL = "execute_tool" - - class _GenAiOutputTypeValues(Enum): - TEXT = "text" - JSON = "json" - IMAGE = "image" - SPEECH = "speech" - - _gen_ai_attributes.GenAiProviderNameValues = _GenAiProviderNameValues - _gen_ai_attributes.GenAiOperationNameValues = _GenAiOperationNameValues - _gen_ai_attributes.GenAiOutputTypeValues = _GenAiOutputTypeValues - - if not hasattr(_server_attributes, "SERVER_ADDRESS"): - _server_attributes.SERVER_ADDRESS = "server.address" - if not hasattr(_server_attributes, "SERVER_PORT"): - _server_attributes.SERVER_PORT = "server.port" - - -_ensure_semconv_enums() - -ServerAttributes = _server_attributes - -sp = importlib.import_module( - "opentelemetry.instrumentation.genai.openai_agents.span_processor" -) - -try: - from opentelemetry.sdk.trace.export import ( # type: ignore[attr-defined] - InMemorySpanExporter, - SimpleSpanProcessor, - ) -except ImportError: # pragma: no cover - from opentelemetry.sdk.trace.export import SimpleSpanProcessor - from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( - InMemorySpanExporter, - ) - - -def _collect(iterator) -> dict[str, Any]: - return dict(iterator) - - -@pytest.fixture -def processor_setup(): - provider = TracerProvider() - exporter = InMemorySpanExporter() - provider.add_span_processor(SimpleSpanProcessor(exporter)) - tracer = provider.get_tracer(__name__) - processor = sp.GenAISemanticProcessor(tracer=tracer, system_name="openai") - yield processor, exporter - processor.shutdown() - exporter.clear() - - -def test_time_helpers(): - dt = datetime(2024, 1, 1, tzinfo=timezone.utc) - assert sp._as_utc_nano(dt) == 1704067200 * 1_000_000_000 - - class Fallback: - def __str__(self) -> str: - return "fallback" - - assert sp.safe_json_dumps({"foo": "bar"}) == '{"foo":"bar"}' - assert sp.safe_json_dumps(Fallback()) == "fallback" - - -def test_infer_server_attributes_variants(monkeypatch): - assert sp._infer_server_attributes(None) == {} - assert sp._infer_server_attributes(123) == {} - - attrs = sp._infer_server_attributes("https://api.example.com:8080/v1") - assert attrs[ServerAttributes.SERVER_ADDRESS] == "api.example.com" - assert attrs[ServerAttributes.SERVER_PORT] == 8080 - - def boom(_: str): - raise ValueError("unparsable url") - - monkeypatch.setattr(sp, "urlparse", boom) - assert sp._infer_server_attributes("bad") == {} - - -def test_operation_and_span_naming(processor_setup): - processor, _ = processor_setup - - generation = GenerationSpanData(input=[{"role": "user"}], model="gpt-4o") - assert ( - processor._get_operation_name(generation) == sp.GenAIOperationName.CHAT - ) - - completion = GenerationSpanData(input=[]) - assert ( - processor._get_operation_name(completion) - == sp.GenAIOperationName.TEXT_COMPLETION - ) - - embeddings = GenerationSpanData(input=None) - setattr(embeddings, "embedding_dimension", 128) - assert ( - processor._get_operation_name(embeddings) - == sp.GenAIOperationName.EMBEDDINGS - ) - - # AgentSpanData always maps to invoke_agent (no operation field in OpenAI Agents SDK) - agent_data = AgentSpanData(name="bot") - assert ( - processor._get_operation_name(agent_data) - == sp.GenAIOperationName.INVOKE_AGENT - ) - - agent_default = AgentSpanData(name="") - assert ( - processor._get_operation_name(agent_default) - == sp.GenAIOperationName.INVOKE_AGENT - ) - - function_data = FunctionSpanData(name="", input=None, output=None) - assert ( - processor._get_operation_name(function_data) - == sp.GenAIOperationName.EXECUTE_TOOL - ) - - response_data = ResponseSpanData() - assert ( - processor._get_operation_name(response_data) - == sp.GenAIOperationName.CHAT - ) - - class UnknownSpanData: - pass - - unknown = UnknownSpanData() - assert processor._get_operation_name(unknown) == "unknown" - - assert processor._get_span_kind(GenerationSpanData()) is SpanKind.CLIENT - assert ( - processor._get_span_kind( - FunctionSpanData(name="", input=None, output=None) - ) - is SpanKind.INTERNAL - ) - - assert ( - sp.get_span_name(sp.GenAIOperationName.CHAT, model="gpt-4o") - == "chat gpt-4o" - ) - assert ( - sp.get_span_name( - sp.GenAIOperationName.EXECUTE_TOOL, tool_name="weather" - ) - == "execute_tool weather" - ) - assert ( - sp.get_span_name(sp.GenAIOperationName.INVOKE_AGENT, agent_name=None) - == "invoke_agent" - ) - assert ( - sp.get_span_name(sp.GenAIOperationName.CREATE_AGENT, agent_name=None) - == "create_agent" - ) - - -def test_attribute_builders(processor_setup): - processor, _ = processor_setup - - payload = sp.ContentPayload( - input_messages=[ - { - "role": "user", - "parts": [{"type": "text", "content": "hi"}], - } - ], - output_messages=[ - { - "role": "assistant", - "parts": [{"type": "text", "content": "hello"}], - } - ], - system_instructions=[{"type": "text", "content": "be helpful"}], - ) - model_config = { - "base_url": "https://api.openai.com:443/v1", - "temperature": 0.2, - "top_p": 0.9, - "top_k": 3, - "frequency_penalty": 0.1, - "presence_penalty": 0.4, - "seed": 1234, - "n": 2, - "max_tokens": 128, - "stop": ["foo", None, "bar"], - } - generation_span = GenerationSpanData( - input=[{"role": "user"}], - output=[{"finish_reason": "stop"}], - model="gpt-4o", - model_config=model_config, - usage={ - "prompt_tokens": 10, - "completion_tokens": 3, - "total_tokens": 13, - }, - ) - gen_attrs = _collect( - processor._get_attributes_from_generation_span_data( - generation_span, payload - ) - ) - assert gen_attrs[sp.GEN_AI_REQUEST_MODEL] == "gpt-4o" - assert gen_attrs[sp.GEN_AI_REQUEST_MAX_TOKENS] == 128 - assert gen_attrs[sp.GEN_AI_REQUEST_STOP_SEQUENCES] == [ - "foo", - None, - "bar", - ] - assert gen_attrs[ServerAttributes.SERVER_ADDRESS] == "api.openai.com" - assert gen_attrs[ServerAttributes.SERVER_PORT] == 443 - assert gen_attrs[sp.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert gen_attrs[sp.GEN_AI_USAGE_OUTPUT_TOKENS] == 3 - assert gen_attrs[sp.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] - assert json.loads(gen_attrs[sp.GEN_AI_INPUT_MESSAGES])[0]["role"] == "user" - assert ( - json.loads(gen_attrs[sp.GEN_AI_OUTPUT_MESSAGES])[0]["role"] - == "assistant" - ) - assert ( - json.loads(gen_attrs[sp.GEN_AI_SYSTEM_INSTRUCTIONS])[0]["content"] - == "be helpful" - ) - assert gen_attrs[sp.GEN_AI_OUTPUT_TYPE] == sp.GenAIOutputType.TEXT - - class _Usage: - def __init__(self) -> None: - self.input_tokens = None - self.prompt_tokens = 7 - self.output_tokens = None - self.completion_tokens = 2 - self.total_tokens = 9 - - class _Response: - def __init__(self) -> None: - self.id = "resp-1" - self.model = "gpt-4o" - self.usage = _Usage() - self.output = [{"finish_reason": "stop"}] - - response_span = ResponseSpanData(response=_Response()) - response_attrs = _collect( - processor._get_attributes_from_response_span_data( - response_span, sp.ContentPayload() - ) - ) - assert response_attrs[sp.GEN_AI_RESPONSE_ID] == "resp-1" - assert response_attrs[sp.GEN_AI_RESPONSE_MODEL] == "gpt-4o" - assert response_attrs[sp.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] - assert response_attrs[sp.GEN_AI_USAGE_INPUT_TOKENS] == 7 - assert response_attrs[sp.GEN_AI_USAGE_OUTPUT_TOKENS] == 2 - assert response_attrs[sp.GEN_AI_OUTPUT_TYPE] == sp.GenAIOutputType.TEXT - - agent_span = AgentSpanData( - name="helper", - output_type="json", - ) - agent_attrs = _collect( - processor._get_attributes_from_agent_span_data(agent_span, None) - ) - assert agent_attrs[sp.GEN_AI_AGENT_NAME] == "helper" - assert sp.GEN_AI_AGENT_ID not in agent_attrs - assert sp.GEN_AI_REQUEST_MODEL not in agent_attrs - assert agent_attrs[sp.GEN_AI_OUTPUT_TYPE] == sp.GenAIOutputType.TEXT - - # Fallback to aggregated model when span data lacks it - agent_span_no_model = AgentSpanData( - name="helper-2", - output_type="json", - ) - agent_content = { - "input_messages": [], - "output_messages": [], - "system_instructions": [], - "request_model": "gpt-fallback", - } - agent_attrs_fallback = _collect( - processor._get_attributes_from_agent_span_data( - agent_span_no_model, agent_content - ) - ) - assert agent_attrs_fallback[sp.GEN_AI_REQUEST_MODEL] == "gpt-fallback" - - function_span = FunctionSpanData( - name="lookup_weather", input=None, output=None - ) - function_span.tool_type = "extension" - function_span.call_id = "call-42" - function_span.description = "desc" - function_payload = sp.ContentPayload( - tool_arguments={"city": "seattle"}, - tool_result={"temperature": 70}, - ) - function_attrs = _collect( - processor._get_attributes_from_function_span_data( - function_span, function_payload - ) - ) - assert function_attrs[sp.GEN_AI_TOOL_NAME] == "lookup_weather" - assert function_attrs[sp.GEN_AI_TOOL_TYPE] == "extension" - assert function_attrs[sp.GEN_AI_TOOL_CALL_ID] == "call-42" - assert function_attrs[sp.GEN_AI_TOOL_DESCRIPTION] == "desc" - assert function_attrs[sp.GEN_AI_TOOL_CALL_ARGUMENTS] == {"city": "seattle"} - assert function_attrs[sp.GEN_AI_TOOL_CALL_RESULT] == {"temperature": 70} - assert function_attrs[sp.GEN_AI_OUTPUT_TYPE] == sp.GenAIOutputType.JSON - - -def test_extract_genai_attributes_unknown_type(processor_setup): - processor, _ = processor_setup - - class UnknownSpanData: - pass - - class StubSpan: - def __init__(self) -> None: - self.span_data = UnknownSpanData() - - attrs = _collect( - processor._extract_genai_attributes( - StubSpan(), sp.ContentPayload(), None - ) - ) - assert attrs[sp.GEN_AI_PROVIDER_NAME] == "openai" - assert attrs[sp.GEN_AI_SYSTEM_KEY] == "openai" - assert sp.GEN_AI_OPERATION_NAME not in attrs - - -def test_span_status_helper(): - status = sp._get_span_status( - SimpleNamespace(error={"message": "boom", "data": "bad"}) - ) - assert status.status_code is StatusCode.ERROR - assert status.description == "boom: bad" - - ok_status = sp._get_span_status(SimpleNamespace(error=None)) - assert ok_status.status_code is StatusCode.OK - - -@dataclass -class FakeTrace: - name: str - trace_id: str - started_at: str | None = None - ended_at: str | None = None - - -@dataclass -class FakeSpan: - trace_id: str - span_id: str - span_data: Any - parent_id: str | None = None - started_at: str | None = None - ended_at: str | None = None - error: dict[str, Any] | None = None - - -def test_span_lifecycle_and_shutdown(processor_setup): - processor, exporter = processor_setup - - trace = FakeTrace( - name="workflow", - trace_id="trace-1", - started_at="not-a-timestamp", - ended_at="2024-01-01T00:00:05Z", - ) - processor.on_trace_start(trace) - - parent_span = FakeSpan( - trace_id="trace-1", - span_id="span-1", - span_data=AgentSpanData(name="agent"), - started_at="2024-01-01T00:00:00Z", - ended_at="2024-01-01T00:00:02Z", - ) - processor.on_span_start(parent_span) - - missing_span = FakeSpan( - trace_id="trace-1", - span_id="missing", - span_data=FunctionSpanData(name="lookup", input=None, output=None), - started_at="2024-01-01T00:00:01Z", - ended_at="2024-01-01T00:00:02Z", - ) - processor.on_span_end(missing_span) - - child_span = FakeSpan( - trace_id="trace-1", - span_id="span-2", - parent_id="span-1", - span_data=FunctionSpanData(name="lookup", input=None, output=None), - started_at="2024-01-01T00:00:02Z", - ended_at="2024-01-01T00:00:03Z", - error={"message": "boom", "data": "bad"}, - ) - processor.on_span_start(child_span) - processor.on_span_end(child_span) - - processor.on_span_end(parent_span) - processor.on_trace_end(trace) - - linger_trace = FakeTrace( - name="linger", - trace_id="trace-2", - started_at="2024-01-01T00:00:06Z", - ) - processor.on_trace_start(linger_trace) - linger_span = FakeSpan( - trace_id="trace-2", - span_id="span-3", - span_data=AgentSpanData(name=""), - started_at="2024-01-01T00:00:06Z", - ) - processor.on_span_start(linger_span) - - assert processor.force_flush() is None - processor.shutdown() - - finished = exporter.get_finished_spans() - statuses = {span.name: span.status for span in finished} - - assert ( - statuses["execute_tool lookup"].status_code is StatusCode.ERROR - and statuses["execute_tool lookup"].description == "boom: bad" - ) - assert statuses["invoke_agent agent"].status_code is StatusCode.OK - assert statuses["workflow"].status_code is StatusCode.OK - assert ( - statuses["invoke_agent"].status_code is StatusCode.ERROR - and statuses["invoke_agent"].description == "Application shutdown" - ) - assert ( - statuses["linger"].status_code is StatusCode.ERROR - and statuses["linger"].description == "Application shutdown" - ) - workflow_span = next(span for span in finished if span.name == "workflow") - assert ( - workflow_span.attributes[sp.GEN_AI_OPERATION_NAME] - == sp.GenAIOperationName.INVOKE_AGENT - ) - - -def test_chat_span_renamed_with_model(processor_setup): - processor, exporter = processor_setup - - trace = FakeTrace(name="workflow", trace_id="trace-rename") - processor.on_trace_start(trace) - - agent = FakeSpan( - trace_id=trace.trace_id, - span_id="agent-span", - span_data=AgentSpanData( - name="Agent", - ), - started_at="2025-01-01T00:00:00Z", - ended_at="2025-01-01T00:00:02Z", - ) - processor.on_span_start(agent) - - generation_data = GenerationSpanData( - input=[{"role": "user", "content": "question"}], - output=[{"finish_reason": "stop"}], - usage={"prompt_tokens": 1, "completion_tokens": 1}, - ) - generation_span = FakeSpan( - trace_id=trace.trace_id, - span_id="child-span", - parent_id=agent.span_id, - span_data=generation_data, - started_at="2025-01-01T00:00:00Z", - ended_at="2025-01-01T00:00:01Z", - ) - processor.on_span_start(generation_span) - - # Model becomes available before span end (e.g., once response arrives) - generation_data.model = "gpt-4o" - - processor.on_span_end(generation_span) - processor.on_span_end(agent) - processor.on_trace_end(trace) - - span_names = {span.name for span in exporter.get_finished_spans()} - assert "chat gpt-4o" in span_names diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_zz_coverage_improvements.py b/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_zz_coverage_improvements.py deleted file mode 100644 index d9880806..00000000 --- a/instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/test_zz_coverage_improvements.py +++ /dev/null @@ -1,898 +0,0 @@ -# Copyright The OpenTelemetry Authors -# SPDX-License-Identifier: Apache-2.0 - -# pylint: disable=wrong-import-position,wrong-import-order,import-error,no-name-in-module,unexpected-keyword-arg,no-value-for-parameter,redefined-outer-name,too-many-locals,too-many-statements,protected-access,import-outside-toplevel,no-self-use,invalid-name -# ruff: noqa: PLC0415 - -""" -Tests for improving coverage of OpenAI Agents instrumentation. - -This file targets uncovered lines identified in the coverage report: -- __init__.py: _resolve_system, _resolve_bool, _resolve_content_mode -- span_processor.py: normalization utilities, span data handlers, message normalization, - output type inference, metrics recording, and various edge cases. - -NOTE: Imports are done inside functions to avoid module-level import conflicts -that can occur when pytest collects tests across multiple files. -""" - -from __future__ import annotations - - -def _get_modules(): - """Lazy import to avoid module conflicts.""" - import importlib - - sp = importlib.import_module( - "opentelemetry.instrumentation.genai.openai_agents.span_processor" - ) - init_module = importlib.import_module( - "opentelemetry.instrumentation.genai.openai_agents" - ) - return sp, init_module - - -def _get_span_data_classes(): - """Lazy import span data classes.""" - from agents.tracing import ( - AgentSpanData, - FunctionSpanData, - GenerationSpanData, - ResponseSpanData, - ) - - return ( - AgentSpanData, - FunctionSpanData, - GenerationSpanData, - ResponseSpanData, - ) - - -def _get_otel_test_fixtures(): - """Get OpenTelemetry test fixtures.""" - from opentelemetry.sdk.trace import TracerProvider - - try: - from opentelemetry.sdk.trace.export import ( - InMemorySpanExporter, - SimpleSpanProcessor, - ) - except ImportError: - from opentelemetry.sdk.trace.export import SimpleSpanProcessor - from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( - InMemorySpanExporter, - ) - return TracerProvider, InMemorySpanExporter, SimpleSpanProcessor - - -# ============================================================================ -# Tests for __init__.py functions -# ============================================================================ - - -class TestResolveSystem: - """Tests for _resolve_system function.""" - - def test_resolve_system_returns_openai_for_none(self): - _, init_module = _get_modules() - result = init_module._resolve_system(None) - assert result == "openai" - - def test_resolve_system_returns_openai_for_empty_string(self): - _, init_module = _get_modules() - result = init_module._resolve_system("") - assert result == "openai" - - def test_resolve_system_matches_enum_value(self): - _, init_module = _get_modules() - result = init_module._resolve_system("openai") - assert result == "openai" - - def test_resolve_system_matches_enum_name(self): - _, init_module = _get_modules() - result = init_module._resolve_system("OPENAI") - assert result == "openai" - - def test_resolve_system_matches_enum_name_when_name_differs(self): - _, init_module = _get_modules() - from opentelemetry.semconv._incubating.attributes import ( - gen_ai_attributes as GenAI, - ) - - candidate = next( - ( - member - for member in GenAI.GenAiSystemValues - if member.name.lower() != member.value - ), - None, - ) - assert candidate is not None - assert ( - init_module._resolve_system(candidate.name.lower()) - == candidate.value - ) - - def test_resolve_system_matches_with_whitespace(self): - _, init_module = _get_modules() - result = init_module._resolve_system(" openai ") - assert result == "openai" - - def test_resolve_system_returns_custom_value_when_not_found(self): - _, init_module = _get_modules() - result = init_module._resolve_system("custom_provider") - assert result == "custom_provider" - - -class TestResolveBool: - """Tests for _resolve_bool function.""" - - def test_resolve_bool_returns_default_for_none(self): - _, init_module = _get_modules() - assert init_module._resolve_bool(None, True) is True - assert init_module._resolve_bool(None, False) is False - - def test_resolve_bool_returns_value_for_bool(self): - _, init_module = _get_modules() - assert init_module._resolve_bool(True, False) is True - assert init_module._resolve_bool(False, True) is False - - def test_resolve_bool_parses_true_strings(self): - _, init_module = _get_modules() - for value in ["true", "TRUE", "1", "yes", "on", " YES "]: - assert init_module._resolve_bool(value, False) is True, ( - f"Failed for {value}" - ) - - def test_resolve_bool_parses_false_strings(self): - _, init_module = _get_modules() - for value in ["false", "FALSE", "0", "no", "off", " NO "]: - assert init_module._resolve_bool(value, True) is False, ( - f"Failed for {value}" - ) - - def test_resolve_bool_returns_default_for_unknown(self): - _, init_module = _get_modules() - assert init_module._resolve_bool("maybe", True) is True - assert init_module._resolve_bool("maybe", False) is False - - -class TestResolveContentMode: - """Tests for _resolve_content_mode function.""" - - def test_resolve_content_mode_returns_value_for_enum(self): - sp, init_module = _get_modules() - result = init_module._resolve_content_mode( - sp.ContentCaptureMode.SPAN_ONLY - ) - assert result == sp.ContentCaptureMode.SPAN_ONLY - - def test_resolve_content_mode_for_bool_true(self): - sp, init_module = _get_modules() - result = init_module._resolve_content_mode(True) - assert result == sp.ContentCaptureMode.SPAN_AND_EVENT - - def test_resolve_content_mode_for_bool_false(self): - sp, init_module = _get_modules() - result = init_module._resolve_content_mode(False) - assert result == sp.ContentCaptureMode.NO_CONTENT - - def test_resolve_content_mode_for_none(self): - sp, init_module = _get_modules() - result = init_module._resolve_content_mode(None) - assert result == sp.ContentCaptureMode.SPAN_AND_EVENT - - def test_resolve_content_mode_for_empty_string(self): - sp, init_module = _get_modules() - result = init_module._resolve_content_mode("") - assert result == sp.ContentCaptureMode.SPAN_AND_EVENT - - def test_resolve_content_mode_span_only_variants(self): - sp, init_module = _get_modules() - for value in ["span_only", "span-only", "span"]: - result = init_module._resolve_content_mode(value) - assert result == sp.ContentCaptureMode.SPAN_ONLY, ( - f"Failed for {value}" - ) - - def test_resolve_content_mode_event_only_variants(self): - sp, init_module = _get_modules() - for value in ["event_only", "event-only", "event"]: - result = init_module._resolve_content_mode(value) - assert result == sp.ContentCaptureMode.EVENT_ONLY, ( - f"Failed for {value}" - ) - - def test_resolve_content_mode_span_and_event_variants(self): - sp, init_module = _get_modules() - for value in [ - "span_and_event", - "span-and-event", - "span_and_events", - "all", - "true", - "1", - "yes", - ]: - result = init_module._resolve_content_mode(value) - assert result == sp.ContentCaptureMode.SPAN_AND_EVENT, ( - f"Failed for {value}" - ) - - def test_resolve_content_mode_no_content_variants(self): - sp, init_module = _get_modules() - for value in ["no_content", "false", "0", "no", "none"]: - result = init_module._resolve_content_mode(value) - assert result == sp.ContentCaptureMode.NO_CONTENT, ( - f"Failed for {value}" - ) - - -# ============================================================================ -# Tests for span_processor.py normalization utilities -# ============================================================================ - - -class TestNormalizeProvider: - """Tests for normalize_provider function.""" - - def test_normalize_provider_returns_none_for_none(self): - sp, _ = _get_modules() - assert sp.normalize_provider(None) is None - - def test_normalize_provider_returns_none_for_empty(self): - sp, _ = _get_modules() - assert sp.normalize_provider("") is None - - def test_normalize_provider_normalizes_valid_provider(self): - sp, _ = _get_modules() - result = sp.normalize_provider("OPENAI") - assert result == "openai" - - def test_normalize_provider_passes_through_unknown(self): - sp, _ = _get_modules() - result = sp.normalize_provider("MyCustomProvider") - assert result == "MyCustomProvider" - - -class TestValidateToolType: - """Tests for validate_tool_type function.""" - - def test_validate_tool_type_returns_default_for_none(self): - sp, _ = _get_modules() - assert sp.validate_tool_type(None) == "function" - - def test_validate_tool_type_returns_default_for_empty(self): - sp, _ = _get_modules() - assert sp.validate_tool_type("") == "function" - - def test_validate_tool_type_normalizes_valid_types(self): - sp, _ = _get_modules() - assert sp.validate_tool_type("FUNCTION") == "function" - assert sp.validate_tool_type("extension") == "extension" - assert sp.validate_tool_type("DATASTORE") == "datastore" - - def test_validate_tool_type_returns_default_for_unknown(self): - sp, _ = _get_modules() - assert sp.validate_tool_type("unknown_type") == "function" - - -class TestNormalizeOutputType: - """Tests for normalize_output_type function.""" - - def test_normalize_output_type_returns_text_for_none(self): - sp, _ = _get_modules() - assert sp.normalize_output_type(None) == "text" - - def test_normalize_output_type_returns_text_for_empty(self): - sp, _ = _get_modules() - assert sp.normalize_output_type("") == "text" - - def test_normalize_output_type_normalizes_known_types(self): - sp, _ = _get_modules() - assert sp.normalize_output_type("TEXT") == "text" - assert sp.normalize_output_type("json") == "json" - assert sp.normalize_output_type("image") == "image" - assert sp.normalize_output_type("speech") == "speech" - - def test_normalize_output_type_handles_mappings(self): - sp, _ = _get_modules() - assert sp.normalize_output_type("json_object") == "json" - assert sp.normalize_output_type("jsonschema") == "json" - assert sp.normalize_output_type("speech_audio") == "speech" - assert sp.normalize_output_type("audio_speech") == "speech" - assert sp.normalize_output_type("image_png") == "image" - assert sp.normalize_output_type("function_arguments_json") == "json" - assert sp.normalize_output_type("tool_call") == "json" - assert sp.normalize_output_type("transcription_json") == "json" - - def test_normalize_output_type_returns_text_for_unknown(self): - sp, _ = _get_modules() - assert sp.normalize_output_type("unknown_type") == "text" - - -# ============================================================================ -# Tests for span naming and output type inference -# ============================================================================ - - -class TestGetSpanName: - """Tests for get_span_name function.""" - - def test_span_name_handoff(self): - sp, _ = _get_modules() - name = sp.get_span_name("agent_handoff", agent_name="target_agent") - assert name == "agent_handoff target_agent" - - def test_span_name_handoff_no_agent(self): - sp, _ = _get_modules() - name = sp.get_span_name("agent_handoff") - assert name == "agent_handoff" - - def test_span_name_transcription(self): - sp, _ = _get_modules() - name = sp.get_span_name("transcription", model="whisper-1") - assert name == "transcription whisper-1" - - def test_span_name_speech(self): - sp, _ = _get_modules() - name = sp.get_span_name("speech_generation", model="tts-1") - assert name == "speech_generation tts-1" - - def test_span_name_unknown_operation(self): - sp, _ = _get_modules() - name = sp.get_span_name("unknown_operation") - assert name == "unknown_operation" - - -class TestInferOutputType: - """Tests for _infer_output_type method.""" - - def _make_processor(self): - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - return sp.GenAISemanticProcessor( - tracer=tracer, system_name="openai", metrics_enabled=False - ) - - def test_infer_output_type_function_span(self): - _, FunctionSpanData, _, _ = _get_span_data_classes() - processor = self._make_processor() - span_data = FunctionSpanData(name="", input=None, output=None) - assert processor._infer_output_type(span_data) == "json" - - def test_infer_output_type_embeddings(self): - _, _, GenerationSpanData, _ = _get_span_data_classes() - processor = self._make_processor() - span_data = GenerationSpanData() - span_data.embedding_dimension = 128 - assert processor._infer_output_type(span_data) == "text" - - def test_infer_output_type_image_output(self): - _, _, GenerationSpanData, _ = _get_span_data_classes() - processor = self._make_processor() - span_data = GenerationSpanData(output=[{"type": "image"}]) - assert processor._infer_output_type(span_data) == "image" - - def test_infer_output_type_audio_output(self): - _, _, GenerationSpanData, _ = _get_span_data_classes() - processor = self._make_processor() - span_data = GenerationSpanData(output=[{"type": "audio"}]) - assert processor._infer_output_type(span_data) == "speech" - - def test_infer_output_type_json_output(self): - _, _, GenerationSpanData, _ = _get_span_data_classes() - processor = self._make_processor() - span_data = GenerationSpanData(output=[{"type": "json_object"}]) - assert processor._infer_output_type(span_data) == "json" - - def test_infer_output_type_text_output(self): - _, _, GenerationSpanData, _ = _get_span_data_classes() - processor = self._make_processor() - span_data = GenerationSpanData(output=[{"type": "text"}]) - assert processor._infer_output_type(span_data) == "text" - - def test_infer_output_type_json_like_keys(self): - _, _, GenerationSpanData, _ = _get_span_data_classes() - processor = self._make_processor() - span_data = GenerationSpanData( - output=[{"schema": {}, "properties": {}}] - ) - assert processor._infer_output_type(span_data) == "json" - - -# ============================================================================ -# Tests for content capture mode properties -# ============================================================================ - - -class TestContentCaptureMode: - """Tests for ContentCaptureMode properties.""" - - def test_capture_in_span(self): - sp, _ = _get_modules() - assert sp.ContentCaptureMode.NO_CONTENT.capture_in_span is False - assert sp.ContentCaptureMode.SPAN_ONLY.capture_in_span is True - assert sp.ContentCaptureMode.EVENT_ONLY.capture_in_span is False - assert sp.ContentCaptureMode.SPAN_AND_EVENT.capture_in_span is True - - def test_capture_in_event(self): - sp, _ = _get_modules() - assert sp.ContentCaptureMode.NO_CONTENT.capture_in_event is False - assert sp.ContentCaptureMode.SPAN_ONLY.capture_in_event is False - assert sp.ContentCaptureMode.EVENT_ONLY.capture_in_event is True - assert sp.ContentCaptureMode.SPAN_AND_EVENT.capture_in_event is True - - -# ============================================================================ -# Tests for sanitize usage payload -# ============================================================================ - - -class TestSanitizeUsagePayload: - """Tests for _sanitize_usage_payload method.""" - - def test_sanitize_dict_usage(self): - sp, _ = _get_modules() - usage = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15} - sp.GenAISemanticProcessor._sanitize_usage_payload(usage) - assert "total_tokens" not in usage - assert usage["input_tokens"] == 10 - - def test_sanitize_object_usage(self): - sp, _ = _get_modules() - - class Usage: - def __init__(self): - self.input_tokens = 10 - self.output_tokens = 5 - self.total_tokens = 15 - - usage = Usage() - sp.GenAISemanticProcessor._sanitize_usage_payload(usage) - assert usage.total_tokens is None - - def test_sanitize_none_usage(self): - sp, _ = _get_modules() - # Should not raise - sp.GenAISemanticProcessor._sanitize_usage_payload(None) - - -# ============================================================================ -# Tests for message normalization -# ============================================================================ - - -class TestNormalizeMessagesToRoleParts: - """Tests for _normalize_messages_to_role_parts method.""" - - def _make_processor(self, sensitive=True): - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - return sp.GenAISemanticProcessor( - tracer=tracer, - system_name="openai", - include_sensitive_data=sensitive, - metrics_enabled=False, - ) - - def test_normalize_non_dict_message(self): - processor = self._make_processor() - messages = ["plain text message"] - normalized = processor._normalize_messages_to_role_parts(messages) - assert len(normalized) == 1 - assert normalized[0]["role"] == "user" - assert normalized[0]["parts"][0]["type"] == "text" - - def test_normalize_with_redaction(self): - processor = self._make_processor(sensitive=False) - messages = [{"role": "user", "content": "Secret message"}] - normalized = processor._normalize_messages_to_role_parts(messages) - assert normalized[0]["parts"][0]["content"] == "readacted" - - def test_normalize_empty_messages_returns_empty_list(self): - processor = self._make_processor() - normalized = processor._normalize_messages_to_role_parts([]) - assert normalized == [] - - def test_normalize_parts_tool_calls_and_tool_responses(self): - processor = self._make_processor(sensitive=False) - messages = [ - { - "role": "assistant", - "parts": [ - {"type": "text", "content": "hello"}, - { - "type": "tool_call", - "id": "call_1", - "name": "weather", - "arguments": {"city": "Paris"}, - }, - { - "type": "tool_call_response", - "id": "call_1", - "result": {"temp": 70}, - }, - {"type": "weird", "payload": {"x": 1}}, - 123, - ], - "content": [ - {"type": "text", "text": "from-content"}, - {"type": "image", "url": "http://example.com"}, - "raw", - ], - "tool_calls": [ - { - "id": "call_2", - "function": {"name": "calc", "arguments": '{"x":1}'}, - }, - "not-dict", - ], - }, - {"role": "tool", "tool_call_id": "call_2", "content": {"x": 1}}, - ] - - normalized = processor._normalize_messages_to_role_parts(messages) - assert normalized[0]["role"] == "assistant" - assistant_part_types = {p["type"] for p in normalized[0]["parts"]} - assert "tool_call" in assistant_part_types - assert "tool_call_response" in assistant_part_types - - tool_call = next( - p for p in normalized[0]["parts"] if p["type"] == "tool_call" - ) - assert tool_call["arguments"] == "readacted" - - tool_response = next( - p - for p in normalized[0]["parts"] - if p["type"] == "tool_call_response" - ) - assert tool_response["result"] == "readacted" - - assert normalized[1]["role"] == "tool" - assert normalized[1]["parts"][0]["type"] == "tool_call_response" - assert normalized[1]["parts"][0]["result"] == "readacted" - - -# ============================================================================ -# Tests for helper functions -# ============================================================================ - - -class TestHelperFunctions: - """Tests for various helper functions.""" - - def test_is_instance_of_single_class(self): - sp, _ = _get_modules() - assert sp._is_instance_of("hello", str) is True - assert sp._is_instance_of(123, str) is False - - def test_is_instance_of_tuple_classes(self): - sp, _ = _get_modules() - assert sp._is_instance_of("hello", (str, int)) is True - assert sp._is_instance_of(123, (str, int)) is True - assert sp._is_instance_of(3.14, (str, int)) is False - - def test_span_status_helper(self): - from types import SimpleNamespace - - from opentelemetry.trace.status import StatusCode - - sp, _ = _get_modules() - - status = sp._get_span_status( - SimpleNamespace(error={"message": "boom", "data": "bad"}) - ) - assert status.status_code is StatusCode.ERROR - assert status.description == "boom: bad" - - ok_status = sp._get_span_status(SimpleNamespace(error=None)) - assert ok_status.status_code is StatusCode.OK - - -# ============================================================================ -# Tests for processor initialization and configuration -# ============================================================================ - - -class TestProcessorConfiguration: - """Tests for GenAISemanticProcessor configuration.""" - - def test_processor_with_metrics_disabled(self): - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - - processor = sp.GenAISemanticProcessor( - tracer=tracer, - system_name="openai", - metrics_enabled=False, - ) - - assert processor._metrics_enabled is False - assert processor._duration_histogram is None - assert processor._token_usage_histogram is None - processor.shutdown() - - def test_processor_with_custom_system_name(self): - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - - processor = sp.GenAISemanticProcessor( - tracer=tracer, - system_name="azure.ai.inference", - metrics_enabled=False, - ) - - assert processor.system_name == "azure.ai.inference" - processor.shutdown() - - def test_processor_force_flush_is_noop(self): - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - - processor = sp.GenAISemanticProcessor( - tracer=tracer, - system_name="openai", - metrics_enabled=False, - ) - - # Should not raise - result = processor.force_flush() - assert result is None - processor.shutdown() - - def test_processor_infers_server_from_base_url(self): - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - - processor = sp.GenAISemanticProcessor( - tracer=tracer, - system_name="openai", - metrics_enabled=False, - base_url="https://api.example.com:8443/v1", - ) - - assert processor.server_address == "api.example.com" - assert processor.server_port == 8443 - server_attrs = processor._get_server_attributes() - assert ( - server_attrs[sp.ServerAttributes.SERVER_ADDRESS] - == "api.example.com" - ) - assert server_attrs[sp.ServerAttributes.SERVER_PORT] == 8443 - processor.shutdown() - - -class TestRecordMetrics: - def test_record_metrics_noop_when_disabled(self): - sp, _ = _get_modules() - processor = sp.GenAISemanticProcessor(metrics_enabled=False) - processor._record_metrics(object(), {}) - - def test_record_metrics_records_duration_and_tokens(self): - from types import SimpleNamespace - - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - - class _Histogram: - def __init__(self) -> None: - self.records = [] - - def record(self, value, attributes) -> None: - self.records.append((value, attributes)) - - duration_histogram = _Histogram() - token_histogram = _Histogram() - - processor = sp.GenAISemanticProcessor( - tracer=tracer, system_name="openai", metrics_enabled=False - ) - processor._metrics_enabled = True - processor._duration_histogram = duration_histogram - processor._token_usage_histogram = token_histogram - - span = SimpleNamespace( - started_at="2024-01-01T00:00:00+00:00", - ended_at="2024-01-01T00:00:02+00:00", - error={"type": "timeout"}, - ) - attributes = { - sp.GEN_AI_PROVIDER_NAME: "openai", - sp.GEN_AI_OPERATION_NAME: sp.GenAIOperationName.CHAT, - sp.GEN_AI_REQUEST_MODEL: "gpt-4o-mini", - sp.ServerAttributes.SERVER_ADDRESS: "api.example.com", - sp.ServerAttributes.SERVER_PORT: 443, - sp.GEN_AI_USAGE_INPUT_TOKENS: 2, - sp.GEN_AI_USAGE_OUTPUT_TOKENS: 3, - } - - processor._record_metrics(span, attributes) - - assert duration_histogram.records - duration_value, duration_attrs = duration_histogram.records[0] - assert duration_value == 2.0 - assert duration_attrs["error.type"] == "timeout" - - assert len(token_histogram.records) == 2 - token_types = { - token_attrs[sp.GEN_AI_TOKEN_TYPE] - for _, token_attrs in token_histogram.records - } - assert token_types == {"input", "output"} - - def test_record_metrics_swallows_exceptions(self): - from types import SimpleNamespace - - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - - class _BoomHistogram: - def record(self, value, attributes) -> None: - raise RuntimeError("boom") - - processor = sp.GenAISemanticProcessor( - tracer=tracer, system_name="openai", metrics_enabled=False - ) - processor._metrics_enabled = True - processor._duration_histogram = _BoomHistogram() - - span = SimpleNamespace( - started_at="2024-01-01T00:00:00+00:00", - ended_at="2024-01-01T00:00:02+00:00", - ) - attributes = { - sp.GEN_AI_PROVIDER_NAME: "openai", - sp.GEN_AI_OPERATION_NAME: sp.GenAIOperationName.CHAT, - sp.GEN_AI_REQUEST_MODEL: "gpt-4o-mini", - } - - processor._record_metrics(span, attributes) - - -class TestVersionModule: - def test_version_importable(self): - import importlib - - version_module = importlib.import_module( - "opentelemetry.instrumentation.genai.openai_agents.version" - ) - assert isinstance(version_module.__version__, str) - assert version_module.__version__ - - -# ============================================================================ -# Tests for normalize_to_text_parts -# ============================================================================ - - -class TestNormalizeToTextParts: - """Tests for _normalize_to_text_parts method.""" - - def _make_processor(self): - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - return sp.GenAISemanticProcessor( - tracer=tracer, system_name="openai", metrics_enabled=False - ) - - def test_normalize_string_content(self): - processor = self._make_processor() - parts = processor._normalize_to_text_parts("Hello") - assert len(parts) == 1 - assert parts[0] == {"type": "text", "content": "Hello"} - - def test_normalize_list_content(self): - processor = self._make_processor() - parts = processor._normalize_to_text_parts(["Hello", "World"]) - assert len(parts) == 2 - assert parts[0]["content"] == "Hello" - assert parts[1]["content"] == "World" - - def test_normalize_list_with_dict_missing_text_and_other_types(self): - processor = self._make_processor() - parts = processor._normalize_to_text_parts( - [{"nope": "missing"}, 123, "ok"] - ) - assert [part["content"] for part in parts] == [ - "{'nope': 'missing'}", - "123", - "ok", - ] - - def test_normalize_dict_content_with_text(self): - processor = self._make_processor() - parts = processor._normalize_to_text_parts({"text": "From text key"}) - assert len(parts) == 1 - assert parts[0]["content"] == "From text key" - - def test_normalize_dict_without_text_or_content(self): - processor = self._make_processor() - parts = processor._normalize_to_text_parts({"nope": "missing"}) - assert parts[0]["content"] == "{'nope': 'missing'}" - - def test_normalize_none_content(self): - processor = self._make_processor() - parts = processor._normalize_to_text_parts(None) - assert parts == [] - - def test_normalize_other_type_content(self): - processor = self._make_processor() - parts = processor._normalize_to_text_parts(123) - assert len(parts) == 1 - assert parts[0]["content"] == "123" - - -# ============================================================================ -# Tests for collect_system_instructions -# ============================================================================ - - -class TestCollectSystemInstructions: - """Tests for _collect_system_instructions method.""" - - def _make_processor(self): - sp, _ = _get_modules() - TracerProvider, _, _ = _get_otel_test_fixtures() - provider = TracerProvider() - tracer = provider.get_tracer(__name__) - return sp.GenAISemanticProcessor( - tracer=tracer, system_name="openai", metrics_enabled=False - ) - - def test_collect_system_role_message(self): - processor = self._make_processor() - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Hello"}, - ] - collected = processor._collect_system_instructions(messages) - assert len(collected) == 1 - assert collected[0]["type"] == "text" - assert collected[0]["content"] == "You are a helpful assistant." - - def test_collect_ai_role_message(self): - processor = self._make_processor() - messages = [ - {"role": "ai", "content": "AI assistant instructions."}, - ] - collected = processor._collect_system_instructions(messages) - assert len(collected) == 1 - assert collected[0]["content"] == "AI assistant instructions." - - def test_collect_empty_messages(self): - processor = self._make_processor() - collected = processor._collect_system_instructions([]) - assert collected == [] - - def test_collect_none_messages(self): - processor = self._make_processor() - collected = processor._collect_system_instructions(None) - assert collected == [] - - def test_collect_skips_non_dict_messages(self): - processor = self._make_processor() - collected = processor._collect_system_instructions( - ["not-a-dict", {"role": "system", "content": "hi"}] - ) - assert collected[0]["content"] == "hi" diff --git a/pyproject.toml b/pyproject.toml index 537181ee..4be72e2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,6 +96,7 @@ include = [ "instrumentation/opentelemetry-instrumentation-genai-anthropic", "instrumentation/opentelemetry-instrumentation-genai-claude-agent-sdk", "instrumentation/opentelemetry-instrumentation-genai-langchain", + "instrumentation/opentelemetry-instrumentation-genai-openai-agents", "instrumentation/opentelemetry-instrumentation-genai-weaviate-client", "util/opentelemetry-util-genai", ] @@ -108,6 +109,8 @@ exclude = [ "instrumentation/opentelemetry-instrumentation-genai-claude-agent-sdk/examples/**/*.py", "instrumentation/opentelemetry-instrumentation-genai-langchain/tests/**/*.py", "instrumentation/opentelemetry-instrumentation-genai-langchain/examples/**/*.py", + "instrumentation/opentelemetry-instrumentation-genai-openai-agents/tests/**/*.py", + "instrumentation/opentelemetry-instrumentation-genai-openai-agents/examples/**", "instrumentation/opentelemetry-instrumentation-genai-weaviate-client/tests/**/*.py", "instrumentation/opentelemetry-instrumentation-genai-weaviate-client/examples/**/*.py", "util/opentelemetry-util-genai/tests/**/*.py", diff --git a/tox.ini b/tox.ini index 6f1a4db9..5d632713 100644 --- a/tox.ini +++ b/tox.ini @@ -251,6 +251,7 @@ deps = {toxinidir}/instrumentation/opentelemetry-instrumentation-genai-anthropic[instruments] {toxinidir}/instrumentation/opentelemetry-instrumentation-genai-langchain[instruments] {toxinidir}/instrumentation/opentelemetry-instrumentation-genai-claude-agent-sdk[instruments] + {toxinidir}/instrumentation/opentelemetry-instrumentation-genai-openai-agents[instruments] {toxinidir}/instrumentation/opentelemetry-instrumentation-genai-weaviate-client[instruments] commands =