From d7ccaeebeb54721c560bc9161ba1654723ea522f Mon Sep 17 00:00:00 2001
From: "Yuxiang (Ryan) Jiang" <yj548@cornell.edu>
Date: Tue, 16 Jun 2026 18:05:17 -0400
Subject: [PATCH] fix(openai): expose headers on streaming with_raw_response
 wrapper

Accessing .headers on the wrapper returned by
chat.completions.with_raw_response.create(stream=True) raised
AttributeError. parse() discards the LegacyAPIResponse that carries the
headers, and ChatStreamWrapper kept no reference to them. The responses
path already handles this, chat never did.

Capture the headers before parse() and expose them via a property on the
sync and async chat stream wrappers, with tests for both.

Fixes #46
---
 .../.changelog/147.fixed                      |  1 +
 .../genai/openai/chat_wrappers.py             | 20 +++++++++++-
 .../instrumentation/genai/openai/patch.py     | 16 ++++++++--
 .../tests/test_async_chat_completions.py      | 30 +++++++++++++++++
 .../tests/test_chat_completions.py            | 32 +++++++++++++++++++
 5 files changed, 96 insertions(+), 3 deletions(-)
 create mode 100644 instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed

diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed b/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed
new file mode 100644
index 00000000..521bd00e
--- /dev/null
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai/.changelog/147.fixed
@@ -0,0 +1 @@
+expose response headers on the streaming with_raw_response chat wrapper
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py
index 4c5dfa32..f76f5878 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/chat_wrappers.py
@@ -4,7 +4,7 @@
 from __future__ import annotations
 
 import json
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 
 from openai import AsyncStream, Stream
 from openai.types.chat import ChatCompletionChunk
@@ -25,6 +25,9 @@
 
 from .chat_buffers import ChoiceBuffer
 
+if TYPE_CHECKING:
+    import httpx
+
 
 class _ChatStreamMixin:
     """Chat-specific hooks shared by sync and async stream wrappers."""
@@ -37,6 +40,17 @@ class _ChatStreamMixin:
     _self_service_tier: Optional[str]
     _self_prompt_tokens: Optional[int]
     _self_completion_tokens: Optional[int]
+    _self_headers: Optional[httpx.Headers]
+
+    @property
+    def headers(self) -> Optional[httpx.Headers]:
+        """Headers from the original raw API response, if available.
+
+        Lets callers using ``with_raw_response`` with ``stream=True`` read
+        ``raw_response.headers`` even though the wrapper replaces the
+        underlying stream, whose ``.headers`` was discarded by ``parse()``.
+        """
+        return self._self_headers
 
     def _set_response_model(self, chunk: ChatCompletionChunk) -> None:
         if self._self_response_model:
@@ -181,6 +195,7 @@ def __init__(
         stream: Stream[ChatCompletionChunk],
         invocation: InferenceInvocation,
         capture_content: bool,
+        headers: Optional[httpx.Headers] = None,
     ) -> None:
         super().__init__(stream)
         self._self_invocation = invocation
@@ -191,6 +206,7 @@ def __init__(
         self._self_service_tier = None
         self._self_prompt_tokens = None
         self._self_completion_tokens = None
+        self._self_headers = headers
 
 
 class AsyncChatStreamWrapper(
@@ -202,6 +218,7 @@ def __init__(
         stream: AsyncStream[ChatCompletionChunk],
         invocation: InferenceInvocation,
         capture_content: bool,
+        headers: Optional[httpx.Headers] = None,
     ) -> None:
         super().__init__(stream)
         self._self_invocation = invocation
@@ -212,6 +229,7 @@ def __init__(
         self._self_service_tier = None
         self._self_prompt_tokens = None
         self._self_completion_tokens = None
+        self._self_headers = headers
 
 
 __all__ = [
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py
index 30906058..3d681389 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai/src/opentelemetry/instrumentation/genai/openai/patch.py
@@ -46,6 +46,9 @@ def traced_method(wrapped, instance, args, kwargs):
 
         try:
             result = wrapped(*args, **kwargs)
+            # Capture the raw response headers before parse() discards the
+            # LegacyAPIResponse they live on (with_raw_response + stream=True).
+            raw_response_headers = getattr(result, "headers", None)
             if hasattr(result, "parse"):
                 # result is of type LegacyAPIResponse, call parse to get the actual response
                 parsed_result = result.parse()
@@ -53,7 +56,10 @@ def traced_method(wrapped, instance, args, kwargs):
                 parsed_result = result
             if is_streaming(kwargs):
                 return ChatStreamWrapper(
-                    parsed_result, chat_invocation, capture_content
+                    parsed_result,
+                    chat_invocation,
+                    capture_content,
+                    headers=raw_response_headers,
                 )
 
             _set_response_properties(
@@ -81,6 +87,9 @@ async def traced_method(wrapped, instance, args, kwargs):
 
         try:
             result = await wrapped(*args, **kwargs)
+            # Capture the raw response headers before parse() discards the
+            # LegacyAPIResponse they live on (with_raw_response + stream=True).
+            raw_response_headers = getattr(result, "headers", None)
             if hasattr(result, "parse"):
                 # result is of type LegacyAPIResponse, calling parse to get the actual response
                 parsed_result = result.parse()
@@ -88,7 +97,10 @@ async def traced_method(wrapped, instance, args, kwargs):
                 parsed_result = result
             if is_streaming(kwargs):
                 return AsyncChatStreamWrapper(
-                    parsed_result, chat_invocation, capture_content
+                    parsed_result,
+                    chat_invocation,
+                    capture_content,
+                    headers=raw_response_headers,
                 )
 
             _set_response_properties(
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py
index e04f9644..01dde30e 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_async_chat_completions.py
@@ -1394,3 +1394,33 @@ async def async_chat_completion_multiple_tools_streaming(
         assert_message_in_logs(
             logs[2], "gen_ai.choice", choice_event, spans[0]
         )
+
+
+@pytest.mark.asyncio()
+async def test_chat_completion_with_raw_response_streaming_exposes_headers(
+    span_exporter, async_openai_client, instrument_with_content, vcr
+):
+    """Regression test for #46 (async path).
+
+    Accessing ``.headers`` on the async streaming ``with_raw_response``
+    wrapper used to raise ``AttributeError`` because the headers were
+    discarded when the response was parsed into the underlying stream.
+    """
+    with vcr.use_cassette(
+        "test_chat_completion_with_raw_response_streaming.yaml"
+    ):
+        raw_response = await async_openai_client.chat.completions.with_raw_response.create(
+            messages=USER_ONLY_PROMPT,
+            model=DEFAULT_MODEL,
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+
+    assert raw_response.headers is not None
+    assert "text/event-stream" in raw_response.headers.get("content-type", "")
+
+    response = raw_response.parse()
+    assert [chunk async for chunk in response]
+
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) == 1
diff --git a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py
index 07e86a68..8fe63073 100644
--- a/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py
+++ b/instrumentation/opentelemetry-instrumentation-genai-openai/tests/test_chat_completions.py
@@ -619,6 +619,38 @@ def test_chat_completion_with_raw_response_streaming(
         )
 
 
+def test_chat_completion_with_raw_response_streaming_exposes_headers(
+    span_exporter, openai_client, instrument_with_content, vcr
+):
+    """Regression test for #46.
+
+    Accessing ``.headers`` on the streaming ``with_raw_response`` wrapper
+    used to raise ``AttributeError``: the headers lived on the
+    ``LegacyAPIResponse`` that was discarded when it was parsed into the
+    underlying stream, and the wrapper exposed no headers of its own.
+    """
+    with vcr.use_cassette(
+        "test_chat_completion_with_raw_response_streaming.yaml"
+    ):
+        raw_response = openai_client.chat.completions.with_raw_response.create(
+            messages=USER_ONLY_PROMPT,
+            model=DEFAULT_MODEL,
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+
+    # The wrapper now carries the original response headers.
+    assert raw_response.headers is not None
+    assert "text/event-stream" in raw_response.headers.get("content-type", "")
+
+    # Streaming still works through the same wrapper.
+    response = raw_response.parse()
+    assert list(response)
+
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+
 def test_chat_completion_tool_calls_with_content(
     span_exporter, log_exporter, openai_client, instrument_with_content, vcr
 ):