diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed b/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed new file mode 100644 index 00000000..521bd00e --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed @@ -0,0 +1 @@ +expose response headers on the streaming with_raw_response chat wrapper diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py index 4c5dfa32..f76f5878 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py @@ -4,7 +4,7 @@ from __future__ import annotations import json -from typing import Optional +from typing import TYPE_CHECKING, Optional from openai import AsyncStream, Stream from openai.types.chat import ChatCompletionChunk @@ -25,6 +25,9 @@ from .chat_buffers import ChoiceBuffer +if TYPE_CHECKING: + import httpx + class _ChatStreamMixin: """Chat-specific hooks shared by sync and async stream wrappers.""" @@ -37,6 +40,17 @@ class _ChatStreamMixin: _self_service_tier: Optional[str] _self_prompt_tokens: Optional[int] _self_completion_tokens: Optional[int] + _self_headers: Optional[httpx.Headers] + + @property + def headers(self) -> Optional[httpx.Headers]: + """Headers from the original raw API response, if available. + + Lets callers using ``with_raw_response`` with ``stream=True`` read + ``raw_response.headers`` even though the wrapper replaces the + underlying stream, whose ``.headers`` was discarded by ``parse()``. + """ + return self._self_headers def _set_response_model(self, chunk: ChatCompletionChunk) -> None: if self._self_response_model: @@ -181,6 +195,7 @@ def __init__( stream: Stream[ChatCompletionChunk], invocation: InferenceInvocation, capture_content: bool, + headers: Optional[httpx.Headers] = None, ) -> None: super().__init__(stream) self._self_invocation = invocation @@ -191,6 +206,7 @@ def __init__( self._self_service_tier = None self._self_prompt_tokens = None self._self_completion_tokens = None + self._self_headers = headers class AsyncChatStreamWrapper( @@ -202,6 +218,7 @@ def __init__( stream: AsyncStream[ChatCompletionChunk], invocation: InferenceInvocation, capture_content: bool, + headers: Optional[httpx.Headers] = None, ) -> None: super().__init__(stream) self._self_invocation = invocation @@ -212,6 +229,7 @@ def __init__( self._self_service_tier = None self._self_prompt_tokens = None self._self_completion_tokens = None + self._self_headers = headers __all__ = [ diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py index b38b3214..0e3c5502 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py @@ -73,6 +73,9 @@ def traced_method(wrapped, instance, args, kwargs): try: result = wrapped(*args, **kwargs) + # Capture the raw response headers before parse() discards the + # LegacyAPIResponse they live on (with_raw_response + stream=True). + raw_response_headers = getattr(result, "headers", None) if hasattr(result, "parse"): # result is of type LegacyAPIResponse, call parse to get the actual response parsed_result = result.parse() @@ -80,7 +83,10 @@ def traced_method(wrapped, instance, args, kwargs): parsed_result = result if is_streaming(kwargs): return ChatStreamWrapper( - parsed_result, chat_invocation, capture_content + parsed_result, + chat_invocation, + capture_content, + headers=raw_response_headers, ) _set_response_properties( @@ -108,6 +114,9 @@ async def traced_method(wrapped, instance, args, kwargs): try: result = await wrapped(*args, **kwargs) + # Capture the raw response headers before parse() discards the + # LegacyAPIResponse they live on (with_raw_response + stream=True). + raw_response_headers = getattr(result, "headers", None) if hasattr(result, "parse"): # result is of type LegacyAPIResponse, calling parse to get the actual response parsed_result = result.parse() @@ -115,7 +124,10 @@ async def traced_method(wrapped, instance, args, kwargs): parsed_result = result if is_streaming(kwargs): return AsyncChatStreamWrapper( - parsed_result, chat_invocation, capture_content + parsed_result, + chat_invocation, + capture_content, + headers=raw_response_headers, ) _set_response_properties( diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py index e04f9644..01dde30e 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py @@ -1394,3 +1394,33 @@ async def async_chat_completion_multiple_tools_streaming( assert_message_in_logs( logs[2], "gen_ai.choice", choice_event, spans[0] ) + + +@pytest.mark.asyncio() +async def test_chat_completion_with_raw_response_streaming_exposes_headers( + span_exporter, async_openai_client, instrument_with_content, vcr +): + """Regression test for #46 (async path). + + Accessing ``.headers`` on the async streaming ``with_raw_response`` + wrapper used to raise ``AttributeError`` because the headers were + discarded when the response was parsed into the underlying stream. + """ + with vcr.use_cassette( + "test_chat_completion_with_raw_response_streaming.yaml" + ): + raw_response = await async_openai_client.chat.completions.with_raw_response.create( + messages=USER_ONLY_PROMPT, + model=DEFAULT_MODEL, + stream=True, + stream_options={"include_usage": True}, + ) + + assert raw_response.headers is not None + assert "text/event-stream" in raw_response.headers.get("content-type", "") + + response = raw_response.parse() + assert [chunk async for chunk in response] + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py index 07e86a68..8fe63073 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py @@ -619,6 +619,38 @@ def test_chat_completion_with_raw_response_streaming( ) +def test_chat_completion_with_raw_response_streaming_exposes_headers( + span_exporter, openai_client, instrument_with_content, vcr +): + """Regression test for #46. + + Accessing ``.headers`` on the streaming ``with_raw_response`` wrapper + used to raise ``AttributeError``: the headers lived on the + ``LegacyAPIResponse`` that was discarded when it was parsed into the + underlying stream, and the wrapper exposed no headers of its own. + """ + with vcr.use_cassette( + "test_chat_completion_with_raw_response_streaming.yaml" + ): + raw_response = openai_client.chat.completions.with_raw_response.create( + messages=USER_ONLY_PROMPT, + model=DEFAULT_MODEL, + stream=True, + stream_options={"include_usage": True}, + ) + + # The wrapper now carries the original response headers. + assert raw_response.headers is not None + assert "text/event-stream" in raw_response.headers.get("content-type", "") + + # Streaming still works through the same wrapper. + response = raw_response.parse() + assert list(response) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + def test_chat_completion_tool_calls_with_content( span_exporter, log_exporter, openai_client, instrument_with_content, vcr ):