open-telemetry · YuxiangJiangCT · Jun 16, 2026 · Jun 19, 2026 · Jun 19, 2026 · YuxiangJiangCT
@@ -0,0 +1 @@
+expose response headers on the streaming with_raw_response chat wrapper
@@ -4,7 +4,7 @@
 from __future__ import annotations
 
 import json
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 
 from openai import AsyncStream, Stream
 from openai.types.chat import ChatCompletionChunk
@@ -25,6 +25,9 @@
 
 from .chat_buffers import ChoiceBuffer
 
+if TYPE_CHECKING:
+    import httpx
+
 
 class _ChatStreamMixin:
     """Chat-specific hooks shared by sync and async stream wrappers."""
@@ -37,6 +40,17 @@ class _ChatStreamMixin:
     _self_service_tier: Optional[str]
     _self_prompt_tokens: Optional[int]
     _self_completion_tokens: Optional[int]
+    _self_headers: Optional[httpx.Headers]
+
+    @property
+    def headers(self) -> Optional[httpx.Headers]:
+        """Headers from the original raw API response, if available.
+
+        Lets callers using ``with_raw_response`` with ``stream=True`` read
+        ``raw_response.headers`` even though the wrapper replaces the
+        underlying stream, whose ``.headers`` was discarded by ``parse()``.
+        """
+        return self._self_headers
 
     def _set_response_model(self, chunk: ChatCompletionChunk) -> None:
         if self._self_response_model:
@@ -181,6 +195,7 @@ def __init__(
         stream: Stream[ChatCompletionChunk],
         invocation: InferenceInvocation,
         capture_content: bool,
+        headers: Optional[httpx.Headers] = None,
     ) -> None:
         super().__init__(stream)
         self._self_invocation = invocation
@@ -191,6 +206,7 @@ def __init__(
         self._self_service_tier = None
         self._self_prompt_tokens = None
         self._self_completion_tokens = None
+        self._self_headers = headers
 
 
 class AsyncChatStreamWrapper(
@@ -202,6 +218,7 @@ def __init__(
         stream: AsyncStream[ChatCompletionChunk],
         invocation: InferenceInvocation,
         capture_content: bool,
+        headers: Optional[httpx.Headers] = None,
     ) -> None:
         super().__init__(stream)
         self._self_invocation = invocation
@@ -212,6 +229,7 @@ def __init__(
         self._self_service_tier = None
         self._self_prompt_tokens = None
         self._self_completion_tokens = None
+        self._self_headers = headers
 
 
 __all__ = [

@@ -73,14 +73,20 @@ def traced_method(wrapped, instance, args, kwargs):
 
         try:
             result = wrapped(*args, **kwargs)
+            # Capture the raw response headers before parse() discards the
+            # LegacyAPIResponse they live on (with_raw_response + stream=True).
+            raw_response_headers = getattr(result, "headers", None)
             if hasattr(result, "parse"):
                 # result is of type LegacyAPIResponse, call parse to get the actual response
                 parsed_result = result.parse()
             else:
                 parsed_result = result
             if is_streaming(kwargs):
                 return ChatStreamWrapper(
-                    parsed_result, chat_invocation, capture_content
+                    parsed_result,
+                    chat_invocation,
+                    capture_content,
+                    headers=raw_response_headers,
                 )
 
             _set_response_properties(
@@ -108,14 +114,20 @@ async def traced_method(wrapped, instance, args, kwargs):
 
         try:
             result = await wrapped(*args, **kwargs)
+            # Capture the raw response headers before parse() discards the
+            # LegacyAPIResponse they live on (with_raw_response + stream=True).
+            raw_response_headers = getattr(result, "headers", None)
             if hasattr(result, "parse"):
                 # result is of type LegacyAPIResponse, calling parse to get the actual response
                 parsed_result = result.parse()
             else:
                 parsed_result = result
             if is_streaming(kwargs):
                 return AsyncChatStreamWrapper(
-                    parsed_result, chat_invocation, capture_content
+                    parsed_result,
+                    chat_invocation,
+                    capture_content,
+                    headers=raw_response_headers,
                 )
 
             _set_response_properties(

@@ -1394,3 +1394,33 @@ async def async_chat_completion_multiple_tools_streaming(
         assert_message_in_logs(
             logs[2], "gen_ai.choice", choice_event, spans[0]
         )
+
+
+@pytest.mark.asyncio()
+async def test_chat_completion_with_raw_response_streaming_exposes_headers(
+    span_exporter, async_openai_client, instrument_with_content, vcr
+):
+    """Regression test for #46 (async path).
+
+    Accessing ``.headers`` on the async streaming ``with_raw_response``
+    wrapper used to raise ``AttributeError`` because the headers were
+    discarded when the response was parsed into the underlying stream.
+    """
+    with vcr.use_cassette(
+        "test_chat_completion_with_raw_response_streaming.yaml"
+    ):
+        raw_response = await async_openai_client.chat.completions.with_raw_response.create(
+            messages=USER_ONLY_PROMPT,
+            model=DEFAULT_MODEL,
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+
+    assert raw_response.headers is not None
+    assert "text/event-stream" in raw_response.headers.get("content-type", "")
+
+    response = raw_response.parse()
+    assert [chunk async for chunk in response]
+
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) == 1
@@ -619,6 +619,38 @@ def test_chat_completion_with_raw_response_streaming(
         )
 
 
+def test_chat_completion_with_raw_response_streaming_exposes_headers(
+    span_exporter, openai_client, instrument_with_content, vcr
+):
+    """Regression test for #46.
+
+    Accessing ``.headers`` on the streaming ``with_raw_response`` wrapper
+    used to raise ``AttributeError``: the headers lived on the
+    ``LegacyAPIResponse`` that was discarded when it was parsed into the
+    underlying stream, and the wrapper exposed no headers of its own.
+    """
+    with vcr.use_cassette(
+        "test_chat_completion_with_raw_response_streaming.yaml"
+    ):
+        raw_response = openai_client.chat.completions.with_raw_response.create(
+            messages=USER_ONLY_PROMPT,
+            model=DEFAULT_MODEL,
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+
+    # The wrapper now carries the original response headers.
+    assert raw_response.headers is not None
+    assert "text/event-stream" in raw_response.headers.get("content-type", "")
+
+    # Streaming still works through the same wrapper.
+    response = raw_response.parse()
+    assert list(response)
+
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+
 def test_chat_completion_tool_calls_with_content(
     span_exporter, log_exporter, openai_client, instrument_with_content, vcr
 ):
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		expose response headers on the streaming with_raw_response chat wrapper