diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/129.changed b/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/129.changed new file mode 100644 index 00000000..26854c82 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/129.changed @@ -0,0 +1 @@ +Update langchain instrumentation to use latest semantic conventions diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index 8779b3d4..0746a268 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -3,7 +3,7 @@ from __future__ import annotations -from typing import Any, Optional, cast +from typing import Any, Optional from uuid import UUID from langchain_core.callbacks import BaseCallbackHandler @@ -21,6 +21,9 @@ from opentelemetry.instrumentation.genai.langchain.utils import ( make_input_message, make_last_output_message, + normalize_provider, + to_input_messages, + to_output_messages, ) from opentelemetry.util.genai.handler import TelemetryHandler from opentelemetry.util.genai.invocation import ( @@ -29,10 +32,7 @@ WorkflowInvocation, ) from opentelemetry.util.genai.types import ( - InputMessage, - MessagePart, OutputMessage, - Text, ) @@ -91,10 +91,9 @@ def on_chain_start( else None ) if suggested_agent_name_lower != agent_invocation_name_lower: + agent_provider = normalize_provider(metadata) or "unknown" agent = self._telemetry_handler.invoke_local_agent( - provider=metadata.get("ls_provider", "unknown") - if metadata - else "unknown", + provider=agent_provider, agent_name=suggested_agent_name, ) agent.input_messages = make_input_message(inputs) @@ -190,10 +189,6 @@ def on_chat_model_start( metadata: Optional[dict[str, Any]] = None, **kwargs: Any, ) -> None: - # Other providers/LLMs may be supported in the future and telemetry for them is skipped for now. - if serialized.get("name") not in ("ChatOpenAI", "ChatBedrock"): - return - if "invocation_params" in kwargs: params = ( kwargs["invocation_params"].get("params") @@ -202,20 +197,22 @@ def on_chat_model_start( else: params = kwargs - request_model = "unknown" + # Resolve request_model from common provider-specific keys. + request_model: Optional[str] = None for model_tag in ( - "model_name", # ChatOpenAI + "model_name", # ChatOpenAI / ChatAnthropic "model_id", # ChatBedrock + "model", # ChatGoogleGenerativeAI / ChatVertexAI / ChatGroq / ChatMistralAI / ChatCohere / ChatOllama / ChatDeepSeek / ChatXAI ): if (model := (params or {}).get(model_tag)) is not None: - request_model = model + request_model = str(model) break - elif (model := (metadata or {}).get(model_tag)) is not None: - request_model = model + if (model := (metadata or {}).get(model_tag)) is not None: + request_model = str(model) break # Skip telemetry for unsupported request models - if request_model == "unknown": + if request_model is None: return # Initialize variables with default values to avoid "possibly unbound" errors @@ -234,45 +231,28 @@ def on_chat_model_start( stop_sequences = params.get("stop") seed = params.get("seed") temperature = params.get("temperature") - max_tokens = params.get("max_completion_tokens") + # ``max_completion_tokens`` is OpenAI-specific; fall back to the + # generic ``max_tokens`` used by Anthropic, Mistral, Cohere, etc. + max_tokens = params.get("max_completion_tokens") or params.get( + "max_tokens" + ) - provider = "unknown" + provider = normalize_provider(metadata) or "unknown" if metadata is not None: - provider = metadata.get("ls_provider", "unknown") - # Override with ChatBedrock values if present if "ls_temperature" in metadata: temperature = metadata.get("ls_temperature") if "ls_max_tokens" in metadata: max_tokens = metadata.get("ls_max_tokens") - input_messages: list[InputMessage] = [] - for sub_messages in messages: - for message in sub_messages: - # Cast to Any to avoid type checking issues with LangChain's complex content type - raw_content: Any = message.content - role = message.type - parts: list[Text] = [] - - if isinstance(raw_content, str): - parts = [Text(content=raw_content, type="text")] - elif isinstance(raw_content, list): - for item in raw_content: # type: ignore[misc] - if isinstance(item, str): - parts.append(Text(content=item, type="text")) - elif isinstance(item, dict): - # Safely extract text content from dict - text_value = item.get("text") # type: ignore[misc] - if isinstance(text_value, str) and text_value: - parts.append( - Text(content=text_value, type="text") - ) - - input_messages.append( - InputMessage( - parts=cast(list[MessagePart], parts), role=role - ) - ) + # ``messages`` from on_chat_model_start is ``list[list[BaseMessage]]`` + # (one inner list per generation request). Flatten and let + # :func:`to_input_messages` produce spec-conformant ``InputMessage`` s + # with proper roles, tool-call requests, tool results, and reasoning. + flattened: list[BaseMessage] = [ + msg for sub in messages for msg in sub + ] + input_messages = to_input_messages(flattened) llm_invocation = self._telemetry_handler.inference( provider, @@ -311,57 +291,50 @@ def on_llm_end( output_messages: list[OutputMessage] = [] for generation in getattr(response, "generations", []): for chat_generation in generation: - # Get finish reason - finish_reason = "unknown" # Default value + message = chat_generation.message + if message is None: + continue + + # Resolve finish_reason from generation_info or response + # metadata. Modern langchain-aws (>= 0.2) emits ``stop_reason`` + # (snake_case); older versions used ``stopReason``. Empty + # values are filtered out by util-genai when emitting + # ``gen_ai.response.finish_reasons``. + finish_reason = "" generation_info = getattr( chat_generation, "generation_info", None ) if generation_info is not None: - finish_reason = generation_info.get( - "finish_reason", "unknown" + finish_reason = generation_info.get("finish_reason", "") + if not finish_reason and message.response_metadata: + response_metadata = message.response_metadata + finish_reason = ( + response_metadata.get("stop_reason") + or response_metadata.get("stopReason") + or "" ) - if chat_generation.message: - # Get finish reason if generation_info is None above - if ( - generation_info is None - and chat_generation.message.response_metadata - ): - finish_reason = ( - chat_generation.message.response_metadata.get( - "stopReason", "unknown" - ) - ) - - # Get message content - parts = [ - Text( - content=chat_generation.message.content, - type="text", - ) - ] - role = chat_generation.message.type - output_message = OutputMessage( - role=role, - parts=cast(list[MessagePart], parts), - finish_reason=finish_reason, - ) - output_messages.append(output_message) - - # Get token usage if available - if chat_generation.message.usage_metadata: - input_tokens = ( - chat_generation.message.usage_metadata.get( - "input_tokens", 0 - ) - ) + # Convert via message_conversion so AIMessage tool calls, + # reasoning blocks, and structured content are preserved. + converted = to_output_messages( + [message], finish_reason=finish_reason + ) + output_messages.extend(converted) + + # Token usage (extracted regardless of whether the message + # produced output parts — some tool-call-only responses can + # still report token counts). Only set the counts when the + # provider actually reported them; defaulting to 0 would + # fabricate telemetry when the keys are absent. + if message.usage_metadata: + input_tokens = message.usage_metadata.get("input_tokens") + if input_tokens is not None: llm_invocation.input_tokens = input_tokens - output_tokens = ( - chat_generation.message.usage_metadata.get( - "output_tokens", 0 - ) - ) + output_tokens = message.usage_metadata.get( + "output_tokens" + ) + if output_tokens is not None: llm_invocation.output_tokens = output_tokens llm_invocation.output_messages = output_messages diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/span_manager.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/span_manager.py deleted file mode 100644 index 98fade9b..00000000 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/span_manager.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright The OpenTelemetry Authors -# SPDX-License-Identifier: Apache-2.0 - -from dataclasses import dataclass, field -from typing import Dict, List, Optional -from uuid import UUID - -from opentelemetry.semconv._incubating.attributes import ( - gen_ai_attributes as GenAI, -) -from opentelemetry.semconv.attributes import ( - error_attributes, -) -from opentelemetry.trace import Span, SpanKind, Tracer, set_span_in_context -from opentelemetry.trace.status import Status, StatusCode - -__all__ = ["_SpanManager"] - - -@dataclass -class _SpanState: - span: Span - children: List[UUID] = field(default_factory=lambda: list()) - - -class _SpanManager: - def __init__( - self, - tracer: Tracer, - ) -> None: - self._tracer = tracer - - # Map from run_id -> _SpanState, to keep track of spans and parent/child relationships - # TODO: Use weak references or a TTL cache to avoid memory leaks in long-running processes. See #3735 - self.spans: Dict[UUID, _SpanState] = {} - - def _create_span( - self, - run_id: UUID, - parent_run_id: Optional[UUID], - span_name: str, - kind: SpanKind = SpanKind.INTERNAL, - ) -> Span: - if parent_run_id is not None and parent_run_id in self.spans: - parent_state = self.spans[parent_run_id] - parent_span = parent_state.span - ctx = set_span_in_context(parent_span) - span = self._tracer.start_span( - name=span_name, kind=kind, context=ctx - ) - parent_state.children.append(run_id) - else: - # top-level or missing parent - span = self._tracer.start_span(name=span_name, kind=kind) - set_span_in_context(span) - - span_state = _SpanState(span=span) - self.spans[run_id] = span_state - - return span - - def create_chat_span( - self, - run_id: UUID, - parent_run_id: Optional[UUID], - request_model: str, - ) -> Span: - span = self._create_span( - run_id=run_id, - parent_run_id=parent_run_id, - span_name=f"{GenAI.GenAiOperationNameValues.CHAT.value} {request_model}", - kind=SpanKind.CLIENT, - ) - span.set_attribute( - GenAI.GEN_AI_OPERATION_NAME, - GenAI.GenAiOperationNameValues.CHAT.value, - ) - if request_model: - span.set_attribute(GenAI.GEN_AI_REQUEST_MODEL, request_model) - - return span - - def end_span(self, run_id: UUID) -> None: - state = self.spans[run_id] - for child_id in state.children: - child_state = self.spans.get(child_id) - if child_state: - child_state.span.end() - del self.spans[child_id] - state.span.end() - del self.spans[run_id] - - def get_span(self, run_id: UUID) -> Optional[Span]: - state = self.spans.get(run_id) - return state.span if state else None - - def handle_error(self, error: BaseException, run_id: UUID): - span = self.get_span(run_id) - if span is None: - # If the span does not exist, we cannot set the error status - return - span.set_status(Status(StatusCode.ERROR, str(error))) - span.set_attribute( - error_attributes.ERROR_TYPE, type(error).__qualname__ - ) - self.end_span(run_id) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py index 1636a1ec..5de54e1d 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py @@ -1,34 +1,228 @@ # Copyright The OpenTelemetry Authors # SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + import json +from collections.abc import Iterable from typing import Any, Optional, cast -from langchain_core.messages import AIMessage +from langchain_core.messages import ( + AIMessage, + BaseMessage, + ToolMessage, +) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) from opentelemetry.util.genai.types import ( InputMessage, + MessagePart, OutputMessage, + Reasoning, Text, + ToolCallRequest, + ToolCallResponse, ) +# Mapping from LangChain ``ls_provider`` metadata values to the well-known +# ``gen_ai.provider.name`` values defined by the GenAI semantic conventions. +_PROVIDER_NAME_OVERRIDES: dict[str, str] = { + "amazon_bedrock": GenAIAttributes.GenAiProviderNameValues.AWS_BEDROCK.value, + "bedrock": GenAIAttributes.GenAiProviderNameValues.AWS_BEDROCK.value, + "bedrock_converse": GenAIAttributes.GenAiProviderNameValues.AWS_BEDROCK.value, + "azure_openai": GenAIAttributes.GenAiProviderNameValues.AZURE_AI_OPENAI.value, + "azure": GenAIAttributes.GenAiProviderNameValues.AZURE_AI_INFERENCE.value, + "vertexai": GenAIAttributes.GenAiProviderNameValues.GCP_VERTEX_AI.value, + "google_vertexai": GenAIAttributes.GenAiProviderNameValues.GCP_VERTEX_AI.value, + "google_genai": GenAIAttributes.GenAiProviderNameValues.GCP_GEN_AI.value, + "google_generativeai": GenAIAttributes.GenAiProviderNameValues.GCP_GEMINI.value, + "mistralai": GenAIAttributes.GenAiProviderNameValues.MISTRAL_AI.value, + "mistral": GenAIAttributes.GenAiProviderNameValues.MISTRAL_AI.value, +} + + +def normalize_provider(metadata: Optional[dict[str, Any]]) -> Optional[str]: + """Return the spec ``gen_ai.provider.name`` value derived from metadata. + + Returns ``None`` when no provider can be determined; callers decide how + to handle that (typically by skipping the span). + """ + if not metadata: + return None + raw = metadata.get("ls_provider") + if not isinstance(raw, str) or not raw: + return None + return _PROVIDER_NAME_OVERRIDES.get(raw, raw) + + +# LangChain ``BaseMessage.type`` -> spec ``role`` value. Anything not in the +# map is passed through unchanged so future LangChain message types still emit +# telemetry without code changes here. +_ROLE_MAP: dict[str, str] = { + "human": "user", + "ai": "assistant", + "function": "tool", +} + + +def _normalize_role(message: BaseMessage) -> str: + return _ROLE_MAP.get(message.type, message.type) + + +def _content_to_parts(content: Any) -> list[MessagePart]: + """Convert a LangChain message ``content`` payload into ``MessagePart`` s. + + Content may be a plain string or a list of provider-specific block dicts + (e.g. Anthropic structured content). We extract :class:`Text` and + :class:`Reasoning` parts; ``tool_use`` blocks are intentionally ignored + here because LangChain consolidates them into ``message.tool_calls`` which + is read separately. + """ + parts: list[MessagePart] = [] + if isinstance(content, str): + if content: + parts.append(Text(content=content)) + return parts + if isinstance(content, list): + for item in content: + if isinstance(item, str): + if item: + parts.append(Text(content=item)) + continue + if not isinstance(item, dict): + continue + block_type = item.get("type") + if block_type == "text": + text_value = item.get("text") + if isinstance(text_value, str) and text_value: + parts.append(Text(content=text_value)) + elif block_type in ("thinking", "reasoning"): + reasoning_value = ( + item.get("thinking") + or item.get("reasoning") + or item.get("text") + ) + if isinstance(reasoning_value, str) and reasoning_value: + parts.append(Reasoning(content=reasoning_value)) + return parts + + +def _ai_message_parts(message: AIMessage) -> list[MessagePart]: + """Build :class:`MessagePart` s for an :class:`AIMessage`. + + Includes any text/reasoning content followed by a + :class:`ToolCallRequest` for each entry in ``message.tool_calls``. + """ + parts: list[MessagePart] = _content_to_parts(message.content) + tool_calls = getattr(message, "tool_calls", None) or [] + for call in tool_calls: + if not isinstance(call, dict): + continue + name = call.get("name") + if not isinstance(name, str) or not name: + continue + call_id = call.get("id") + parts.append( + ToolCallRequest( + arguments=call.get("args"), + name=name, + id=call_id if isinstance(call_id, str) else None, + ) + ) + return parts + + +def _tool_message_parts(message: ToolMessage) -> list[MessagePart]: + """Build :class:`MessagePart` s for a :class:`ToolMessage` (tool result).""" + tool_call_id = getattr(message, "tool_call_id", None) + return [ + ToolCallResponse( + response=message.content, + id=tool_call_id if isinstance(tool_call_id, str) else None, + ) + ] + + +def _message_parts(message: BaseMessage) -> list[MessagePart]: + if isinstance(message, ToolMessage): + return _tool_message_parts(message) + if isinstance(message, AIMessage): + return _ai_message_parts(message) + return _content_to_parts(message.content) + + +def to_input_messages( + messages: Iterable[BaseMessage], +) -> list[InputMessage]: + """Convert LangChain messages into spec-conformant ``InputMessage`` s.""" + result: list[InputMessage] = [] + for message in messages: + if not isinstance(message, BaseMessage): + continue + parts = _message_parts(message) + if not parts: + continue + result.append( + InputMessage(role=_normalize_role(message), parts=parts) + ) + return result + + +def to_output_messages( + messages: Iterable[BaseMessage], + *, + finish_reason: str = "", +) -> list[OutputMessage]: + """Convert LangChain ``AIMessage`` instances into ``OutputMessage`` s. + + Non-``AIMessage`` entries are skipped: only assistant turns are recorded + as ``gen_ai.output.messages``. Tool execution results belong on the + *input* side of the next inference call, not the output side of the + previous one. + """ + result: list[OutputMessage] = [] + for message in messages: + if not isinstance(message, AIMessage): + continue + parts = _ai_message_parts(message) + if not parts: + continue + result.append( + OutputMessage( + role=_normalize_role(message), + parts=parts, + finish_reason=finish_reason, + ) + ) + return result + def make_input_message(data: Any) -> list[InputMessage]: - """Create structured input message with full data as JSON.""" + """Build ``InputMessage`` s from a workflow/agent input mapping. + + When ``data['messages']`` is present, every LangChain ``BaseMessage`` in it + is converted via :func:`to_input_messages` (which preserves the original + role: a prior ``AIMessage`` becomes ``role='assistant'``, a + ``SystemMessage`` becomes ``role='system'``, and so on) and includes + tool-call structure. + + When no ``messages`` key exists (common in LangGraph state dicts), the + remaining state fields are serialized as JSON and emitted as a single + user-role :class:`Text` part. + """ if not isinstance(data, dict): return [] data_dict = cast(dict[str, Any], data) - input_messages: list[InputMessage] = [] messages: Any = data_dict.get("messages") if messages is not None: - for msg in messages: - content: Any = getattr(msg, "content", "") - if content and isinstance(content, str): - input_message = InputMessage( - role="user", parts=[Text(content)] - ) - input_messages.append(input_message) - return input_messages + if isinstance(messages, (str, bytes)) or not isinstance( + messages, Iterable + ): + return [] + return to_input_messages(messages) # Fallback: serialize non-message state fields as input. # Common in LangGraph where nodes use structured state fields # (e.g., user_query) rather than a message list. @@ -42,28 +236,28 @@ def make_input_message(data: Any) -> list[InputMessage]: serialized = serialize(input_data) if serialized: return [InputMessage(role="user", parts=[Text(serialized)])] - return input_messages + return [] def make_output_message(data: Any) -> list[OutputMessage]: - """Create structured output message with full data as JSON.""" + """Build ``OutputMessage`` s from a workflow/agent output mapping. + + Only ``AIMessage`` entries become outputs. ``finish_reason`` is left + empty: the underlying per-LLM-call finish reasons are recorded on child + inference spans, and util-genai filters empty values out of + ``gen_ai.response.finish_reasons``. + """ if not isinstance(data, dict): return [] data_dict = cast(dict[str, Any], data) - output_messages: list[OutputMessage] = [] - messages: list[Any] | None = data_dict.get("messages") - if messages is None: + messages: Any = data_dict.get("messages") + if ( + messages is None + or isinstance(messages, (str, bytes)) + or not isinstance(messages, Iterable) + ): return [] - for msg in messages: - content: Any = getattr(msg, "content", "") - if content and isinstance(msg, AIMessage) and isinstance(content, str): - output_message = OutputMessage( - role="assistant", - parts=[Text(content)], - finish_reason="stop", - ) - output_messages.append(output_message) - return output_messages + return to_output_messages(messages) def make_last_output_message(data: Any) -> list[OutputMessage]: diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_callback_handler.py index be5476e4..586ce16b 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_callback_handler.py @@ -21,6 +21,7 @@ make_last_output_message, make_output_message, serialize, + to_output_messages, ) from opentelemetry.util.genai.invocation import ( AgentInvocation, @@ -717,7 +718,7 @@ def test_ai_message_produces_assistant_output(self): assert len(result) == 1 assert isinstance(result[0], OutputMessage) assert result[0].role == "assistant" - assert result[0].finish_reason == "stop" + assert result[0].finish_reason == "" assert result[0].parts[0].content == "The answer is 42" def test_non_ai_message_skipped(self): @@ -752,6 +753,40 @@ def test_mixed_messages_only_ai_returned(self): assert len(result) == 1 assert result[0].parts[0].content == "answer" + def test_finish_reason_default_is_empty_string(self): + # Workflow/agent output spans must not fabricate a finish reason: + # util-genai filters empty strings out of + # ``gen_ai.response.finish_reasons`` so the rollup span stays silent + # while the per-call inference spans report the real values. + result = make_output_message({"messages": [AIMessage(content="hi")]}) + assert result[0].finish_reason == "" + + def test_to_output_messages_propagates_explicit_finish_reason(self): + # The inference path passes the provider's finish_reason through + # ``to_output_messages``; the converter must forward it onto every + # ``OutputMessage`` so util-genai can aggregate it into + # ``gen_ai.response.finish_reasons``. + msgs = [ + AIMessage(content="first"), + AIMessage(content="second"), + ] + result = to_output_messages(msgs, finish_reason="tool_calls") + assert [m.finish_reason for m in result] == [ + "tool_calls", + "tool_calls", + ] + + def test_to_output_messages_skips_non_ai_when_finish_reason_set(self): + # finish_reason should never bleed onto non-AI messages: those are + # filtered out entirely on the output side. + result = to_output_messages( + [HumanMessage(content="q"), AIMessage(content="a")], + finish_reason="length", + ) + assert len(result) == 1 + assert result[0].role == "assistant" + assert result[0].finish_reason == "length" + class TestMakeLastOutputMessage: def test_returns_only_last_ai_message(self): diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_llm_call.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_llm_call.py index 21832d7e..23430238 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_llm_call.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_llm_call.py @@ -4,9 +4,12 @@ from typing import Optional import pytest -from langchain_core.messages import HumanMessage, SystemMessage +from langchain_core.messages import FunctionMessage, HumanMessage, SystemMessage from openai import AuthenticationError +from opentelemetry.instrumentation.genai.langchain.utils import ( + to_input_messages, +) from opentelemetry.sdk.trace import ReadableSpan from opentelemetry.semconv._incubating.attributes import ( event_attributes as EventAttributes, @@ -195,7 +198,20 @@ def test_gemini(span_exporter, start_instrumentation, gemini): # verify spans spans = span_exporter.get_finished_spans() - assert len(spans) == 0 # No spans should be created for gemini as of now + assert len(spans) == 1 + assert_gemini_completion_attributes(spans[0], result) + + +def test_function_message_role_maps_to_tool(): + # Legacy LangChain ``FunctionMessage`` predates ``tool_calls`` and reports + # ``message.type == 'function'``. The GenAI semantic conventions enum has + # no ``function`` role, so the converter must remap it to ``tool`` before + # the message lands in ``gen_ai.input.messages``. + result = to_input_messages( + [FunctionMessage(name="get_weather", content="sunny")] + ) + assert len(result) == 1 + assert result[0].role == "tool" def assert_openai_completion_attributes( @@ -254,13 +270,13 @@ def assert_openai_completion_attributes( assert input_message is not None assert '"role":"system"' in input_message assert '"content":"You are a helpful assistant!"' in input_message - assert '"role":"human"' in input_message + assert '"role":"user"' in input_message assert '"content":"What is the capital of France?"' in input_message # Assert output message output_message = attributes[gen_ai_attributes.GEN_AI_OUTPUT_MESSAGES] assert output_message is not None - assert '"role":"ai"' in output_message + assert '"role":"assistant"' in output_message assert '"content":"The capital of France is Paris."' in output_message assert '"finish_reason":"stop"' in output_message else: @@ -304,7 +320,7 @@ def assert_openai_completion_attributes_with_error( assert input_message is not None assert '"role":"system"' in input_message assert '"content":"You are a helpful assistant!"' in input_message - assert '"role":"human"' in input_message + assert '"role":"user"' in input_message assert '"content":"What is the capital of France?"' in input_message # Assert output message @@ -324,7 +340,7 @@ def assert_bedrock_completion_attributes( == "us.amazon.nova-lite-v1:0" ) - assert span.attributes["gen_ai.provider.name"] == "amazon_bedrock" + assert span.attributes["gen_ai.provider.name"] == "aws.bedrock" assert span.attributes[gen_ai_attributes.GEN_AI_REQUEST_MAX_TOKENS] == 100 assert span.attributes[gen_ai_attributes.GEN_AI_REQUEST_TEMPERATURE] == 0.1 @@ -351,6 +367,41 @@ def assert_bedrock_completion_attributes( ) +def assert_gemini_completion_attributes( + span: ReadableSpan, response: Optional +): + assert span is not None + assert span.name == "chat gemini-2.5-pro" + assert span.attributes[gen_ai_attributes.GEN_AI_OPERATION_NAME] == "chat" + assert ( + span.attributes[gen_ai_attributes.GEN_AI_REQUEST_MODEL] + == "gemini-2.5-pro" + ) + assert span.attributes["gen_ai.provider.name"] == "gcp.gen_ai" + + input_tokens = response.usage_metadata.get("input_tokens") + if input_tokens is not None: + assert ( + input_tokens + == span.attributes[gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS] + ) + else: + assert ( + gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS not in span.attributes + ) + + output_tokens = response.usage_metadata.get("output_tokens") + if output_tokens is not None: + assert ( + output_tokens + == span.attributes[gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS] + ) + else: + assert ( + gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS not in span.attributes + ) + + def assert_duration_metric(metric, parent_span): assert metric is not None assert len(metric.data.data_points) == 1 @@ -509,7 +560,7 @@ def assert_log_record(log_record, parent_span): "parts": [ {"content": "What is the capital of France?", "type": "text"} ], - "role": "human", + "role": "user", }, ] assert len(input_msgs) == 2 @@ -523,7 +574,7 @@ def assert_log_record(log_record, parent_span): ) assert len(output_msgs) == 1 out = _normalize_to_dict(output_msgs[0]) - assert out["role"] == "ai" + assert out["role"] == "assistant" assert out["finish_reason"] == "stop" assert _normalize_to_list(out["parts"]) == [ {"content": "The capital of France is Paris.", "type": "text"} @@ -573,7 +624,7 @@ def assert_log_record_when_error(log_record, parent_span): "parts": [ {"content": "What is the capital of France?", "type": "text"} ], - "role": "human", + "role": "user", }, ] assert len(input_msgs) == 2 diff --git a/util/opentelemetry-util-genai/pyproject.toml b/util/opentelemetry-util-genai/pyproject.toml index 7b5802fb..49e850b5 100644 --- a/util/opentelemetry-util-genai/pyproject.toml +++ b/util/opentelemetry-util-genai/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dependencies = [ "opentelemetry-instrumentation ~= 0.61b0", - "opentelemetry-semantic-conventions ~= 0.61b0", + "opentelemetry-semantic-conventions ~= 0.63b1", "opentelemetry-api ~= 1.40", "wrapt >= 1.0.0, < 3.0.0", ] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py index 4ba07d91..49b83314 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py @@ -29,8 +29,6 @@ should_emit_event, ) -# TODO: Migrate to GenAI constants once available in semconv package -_GEN_AI_REASONING_OUTPUT_TOKENS = "gen_ai.usage.reasoning.output_tokens" class InferenceInvocation(GenAIInvocation): @@ -157,7 +155,7 @@ def _get_attributes(self) -> dict[str, Any]: self.cache_read_input_tokens, ), ( - _GEN_AI_REASONING_OUTPUT_TOKENS, + GenAI.GEN_AI_USAGE_REASONING_OUTPUT_TOKENS, self.thinking_tokens, ), )