From d7ccaeebeb54721c560bc9161ba1654723ea522f Mon Sep 17 00:00:00 2001 From: "Yuxiang (Ryan) Jiang" Date: Tue, 16 Jun 2026 18:05:17 -0400 Subject: [PATCH] fix(openai): expose headers on streaming with_raw_response wrapper Accessing .headers on the wrapper returned by chat.completions.with_raw_response.create(stream=True) raised AttributeError. parse() discards the LegacyAPIResponse that carries the headers, and ChatStreamWrapper kept no reference to them. The responses path already handles this, chat never did. Capture the headers before parse() and expose them via a property on the sync and async chat stream wrappers, with tests for both. Fixes #46 --- .../.changelog/147.fixed | 1 + .../genai/openai/chat_wrappers.py | 20 +++++++++++- .../instrumentation/genai/openai/patch.py | 16 ++++++++-- .../tests/test_async_chat_completions.py | 30 +++++++++++++++++ .../tests/test_chat_completions.py | 32 +++++++++++++++++++ 5 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed b/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed new file mode 100644 index 00000000..521bd00e --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed @@ -0,0 +1 @@ +expose response headers on the streaming with_raw_response chat wrapper diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py index 4c5dfa32..f76f5878 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py @@ -4,7 +4,7 @@ from __future__ import annotations import json -from typing import Optional +from typing import TYPE_CHECKING, Optional from openai import AsyncStream, Stream from openai.types.chat import ChatCompletionChunk @@ -25,6 +25,9 @@ from .chat_buffers import ChoiceBuffer +if TYPE_CHECKING: + import httpx + class _ChatStreamMixin: """Chat-specific hooks shared by sync and async stream wrappers.""" @@ -37,6 +40,17 @@ class _ChatStreamMixin: _self_service_tier: Optional[str] _self_prompt_tokens: Optional[int] _self_completion_tokens: Optional[int] + _self_headers: Optional[httpx.Headers] + + @property + def headers(self) -> Optional[httpx.Headers]: + """Headers from the original raw API response, if available. + + Lets callers using ``with_raw_response`` with ``stream=True`` read + ``raw_response.headers`` even though the wrapper replaces the + underlying stream, whose ``.headers`` was discarded by ``parse()``. + """ + return self._self_headers def _set_response_model(self, chunk: ChatCompletionChunk) -> None: if self._self_response_model: @@ -181,6 +195,7 @@ def __init__( stream: Stream[ChatCompletionChunk], invocation: InferenceInvocation, capture_content: bool, + headers: Optional[httpx.Headers] = None, ) -> None: super().__init__(stream) self._self_invocation = invocation @@ -191,6 +206,7 @@ def __init__( self._self_service_tier = None self._self_prompt_tokens = None self._self_completion_tokens = None + self._self_headers = headers class AsyncChatStreamWrapper( @@ -202,6 +218,7 @@ def __init__( stream: AsyncStream[ChatCompletionChunk], invocation: InferenceInvocation, capture_content: bool, + headers: Optional[httpx.Headers] = None, ) -> None: super().__init__(stream) self._self_invocation = invocation @@ -212,6 +229,7 @@ def __init__( self._self_service_tier = None self._self_prompt_tokens = None self._self_completion_tokens = None + self._self_headers = headers __all__ = [ diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py index 30906058..3d681389 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py @@ -46,6 +46,9 @@ def traced_method(wrapped, instance, args, kwargs): try: result = wrapped(*args, **kwargs) + # Capture the raw response headers before parse() discards the + # LegacyAPIResponse they live on (with_raw_response + stream=True). + raw_response_headers = getattr(result, "headers", None) if hasattr(result, "parse"): # result is of type LegacyAPIResponse, call parse to get the actual response parsed_result = result.parse() @@ -53,7 +56,10 @@ def traced_method(wrapped, instance, args, kwargs): parsed_result = result if is_streaming(kwargs): return ChatStreamWrapper( - parsed_result, chat_invocation, capture_content + parsed_result, + chat_invocation, + capture_content, + headers=raw_response_headers, ) _set_response_properties( @@ -81,6 +87,9 @@ async def traced_method(wrapped, instance, args, kwargs): try: result = await wrapped(*args, **kwargs) + # Capture the raw response headers before parse() discards the + # LegacyAPIResponse they live on (with_raw_response + stream=True). + raw_response_headers = getattr(result, "headers", None) if hasattr(result, "parse"): # result is of type LegacyAPIResponse, calling parse to get the actual response parsed_result = result.parse() @@ -88,7 +97,10 @@ async def traced_method(wrapped, instance, args, kwargs): parsed_result = result if is_streaming(kwargs): return AsyncChatStreamWrapper( - parsed_result, chat_invocation, capture_content + parsed_result, + chat_invocation, + capture_content, + headers=raw_response_headers, ) _set_response_properties( diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py index e04f9644..01dde30e 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py @@ -1394,3 +1394,33 @@ async def async_chat_completion_multiple_tools_streaming( assert_message_in_logs( logs[2], "gen_ai.choice", choice_event, spans[0] ) + + +@pytest.mark.asyncio() +async def test_chat_completion_with_raw_response_streaming_exposes_headers( + span_exporter, async_openai_client, instrument_with_content, vcr +): + """Regression test for #46 (async path). + + Accessing ``.headers`` on the async streaming ``with_raw_response`` + wrapper used to raise ``AttributeError`` because the headers were + discarded when the response was parsed into the underlying stream. + """ + with vcr.use_cassette( + "test_chat_completion_with_raw_response_streaming.yaml" + ): + raw_response = await async_openai_client.chat.completions.with_raw_response.create( + messages=USER_ONLY_PROMPT, + model=DEFAULT_MODEL, + stream=True, + stream_options={"include_usage": True}, + ) + + assert raw_response.headers is not None + assert "text/event-stream" in raw_response.headers.get("content-type", "") + + response = raw_response.parse() + assert [chunk async for chunk in response] + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py index 07e86a68..8fe63073 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py +++ b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py @@ -619,6 +619,38 @@ def test_chat_completion_with_raw_response_streaming( ) +def test_chat_completion_with_raw_response_streaming_exposes_headers( + span_exporter, openai_client, instrument_with_content, vcr +): + """Regression test for #46. + + Accessing ``.headers`` on the streaming ``with_raw_response`` wrapper + used to raise ``AttributeError``: the headers lived on the + ``LegacyAPIResponse`` that was discarded when it was parsed into the + underlying stream, and the wrapper exposed no headers of its own. + """ + with vcr.use_cassette( + "test_chat_completion_with_raw_response_streaming.yaml" + ): + raw_response = openai_client.chat.completions.with_raw_response.create( + messages=USER_ONLY_PROMPT, + model=DEFAULT_MODEL, + stream=True, + stream_options={"include_usage": True}, + ) + + # The wrapper now carries the original response headers. + assert raw_response.headers is not None + assert "text/event-stream" in raw_response.headers.get("content-type", "") + + # Streaming still works through the same wrapper. + response = raw_response.parse() + assert list(response) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + def test_chat_completion_tool_calls_with_content( span_exporter, log_exporter, openai_client, instrument_with_content, vcr ):