Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
expose response headers on the streaming with_raw_response chat wrapper
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from __future__ import annotations

import json
from typing import Optional
from typing import TYPE_CHECKING, Optional

from openai import AsyncStream, Stream
from openai.types.chat import ChatCompletionChunk
Comment thread
YuxiangJiangCT marked this conversation as resolved.
Expand All @@ -25,6 +25,9 @@

from .chat_buffers import ChoiceBuffer

if TYPE_CHECKING:
import httpx


class _ChatStreamMixin:
"""Chat-specific hooks shared by sync and async stream wrappers."""
Expand All @@ -37,6 +40,17 @@ class _ChatStreamMixin:
_self_service_tier: Optional[str]
_self_prompt_tokens: Optional[int]
_self_completion_tokens: Optional[int]
_self_headers: Optional[httpx.Headers]

@property
def headers(self) -> Optional[httpx.Headers]:
"""Headers from the original raw API response, if available.

Lets callers using ``with_raw_response`` with ``stream=True`` read
``raw_response.headers`` even though the wrapper replaces the
underlying stream, whose ``.headers`` was discarded by ``parse()``.
"""
return self._self_headers

def _set_response_model(self, chunk: ChatCompletionChunk) -> None:
if self._self_response_model:
Expand Down Expand Up @@ -181,6 +195,7 @@ def __init__(
stream: Stream[ChatCompletionChunk],
invocation: InferenceInvocation,
capture_content: bool,
headers: Optional[httpx.Headers] = None,
) -> None:
super().__init__(stream)
self._self_invocation = invocation
Expand All @@ -191,6 +206,7 @@ def __init__(
self._self_service_tier = None
self._self_prompt_tokens = None
self._self_completion_tokens = None
self._self_headers = headers


class AsyncChatStreamWrapper(
Expand All @@ -202,6 +218,7 @@ def __init__(
stream: AsyncStream[ChatCompletionChunk],
invocation: InferenceInvocation,
capture_content: bool,
headers: Optional[httpx.Headers] = None,
) -> None:
super().__init__(stream)
self._self_invocation = invocation
Expand All @@ -212,6 +229,7 @@ def __init__(
self._self_service_tier = None
self._self_prompt_tokens = None
self._self_completion_tokens = None
self._self_headers = headers


__all__ = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,20 @@ def traced_method(wrapped, instance, args, kwargs):

try:
result = wrapped(*args, **kwargs)
# Capture the raw response headers before parse() discards the
# LegacyAPIResponse they live on (with_raw_response + stream=True).
raw_response_headers = getattr(result, "headers", None)
if hasattr(result, "parse"):
# result is of type LegacyAPIResponse, call parse to get the actual response
parsed_result = result.parse()
else:
parsed_result = result
if is_streaming(kwargs):
return ChatStreamWrapper(
parsed_result, chat_invocation, capture_content
parsed_result,
chat_invocation,
capture_content,
headers=raw_response_headers,
)

_set_response_properties(
Expand Down Expand Up @@ -108,14 +114,20 @@ async def traced_method(wrapped, instance, args, kwargs):

try:
result = await wrapped(*args, **kwargs)
# Capture the raw response headers before parse() discards the
# LegacyAPIResponse they live on (with_raw_response + stream=True).
raw_response_headers = getattr(result, "headers", None)
if hasattr(result, "parse"):
# result is of type LegacyAPIResponse, calling parse to get the actual response
parsed_result = result.parse()
else:
parsed_result = result
if is_streaming(kwargs):
return AsyncChatStreamWrapper(
parsed_result, chat_invocation, capture_content
parsed_result,
chat_invocation,
capture_content,
headers=raw_response_headers,
)

_set_response_properties(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1394,3 +1394,33 @@ async def async_chat_completion_multiple_tools_streaming(
assert_message_in_logs(
logs[2], "gen_ai.choice", choice_event, spans[0]
)


@pytest.mark.asyncio()
async def test_chat_completion_with_raw_response_streaming_exposes_headers(
Comment on lines +1399 to +1400

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kept @pytest.mark.asyncio() with parens to match the existing async tests in this file - changing only this one would be inconsistent. Happy to switch all of them in a separate cleanup if preferred.

span_exporter, async_openai_client, instrument_with_content, vcr
):
"""Regression test for #46 (async path).

Accessing ``.headers`` on the async streaming ``with_raw_response``
wrapper used to raise ``AttributeError`` because the headers were
discarded when the response was parsed into the underlying stream.
"""
with vcr.use_cassette(
"test_chat_completion_with_raw_response_streaming.yaml"
):
raw_response = await async_openai_client.chat.completions.with_raw_response.create(
messages=USER_ONLY_PROMPT,
model=DEFAULT_MODEL,
stream=True,
stream_options={"include_usage": True},
)

assert raw_response.headers is not None
assert "text/event-stream" in raw_response.headers.get("content-type", "")

response = raw_response.parse()
assert [chunk async for chunk in response]

spans = span_exporter.get_finished_spans()
assert len(spans) == 1
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,38 @@ def test_chat_completion_with_raw_response_streaming(
)


def test_chat_completion_with_raw_response_streaming_exposes_headers(
span_exporter, openai_client, instrument_with_content, vcr
):
"""Regression test for #46.

Accessing ``.headers`` on the streaming ``with_raw_response`` wrapper
used to raise ``AttributeError``: the headers lived on the
``LegacyAPIResponse`` that was discarded when it was parsed into the
underlying stream, and the wrapper exposed no headers of its own.
"""
with vcr.use_cassette(
"test_chat_completion_with_raw_response_streaming.yaml"
):
raw_response = openai_client.chat.completions.with_raw_response.create(
messages=USER_ONLY_PROMPT,
model=DEFAULT_MODEL,
stream=True,
stream_options={"include_usage": True},
)

# The wrapper now carries the original response headers.
assert raw_response.headers is not None
assert "text/event-stream" in raw_response.headers.get("content-type", "")

# Streaming still works through the same wrapper.
response = raw_response.parse()
assert list(response)

spans = span_exporter.get_finished_spans()
assert len(spans) == 1


def test_chat_completion_tool_calls_with_content(
span_exporter, log_exporter, openai_client, instrument_with_content, vcr
):
Expand Down