Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
interactions:
- request:
body: '{"model":"command-a-03-2025","messages":[{"role":"user","content":"What
is Braintrust? Cite the provided document."}],"documents":[{"data":{"title":"Braintrust
overview","snippet":"Braintrust is a platform for evaluating, logging, and improving
AI applications."}}],"citation_options":{"mode":"fast"},"max_tokens":80,"stream":true}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '331'
Host:
- api.cohere.com
User-Agent:
- cohere/6.1.0
X-Fern-Language:
- Python
X-Fern-Platform:
- darwin/25.2.0
X-Fern-Runtime:
- python/3.12.12
X-Fern-SDK-Name:
- cohere
X-Fern-SDK-Version:
- 6.1.0
content-type:
- application/json
method: POST
uri: https://api.cohere.com/v2/chat
response:
body:
string: 'event: message-start

data: {"id":"eb4d8291-1dde-42d1-a10b-e15931a1891f","type":"message-start","delta":{"message":{"role":"assistant","content":[],"tool_plan":"","tool_calls":[],"citations":[]}}}


event: content-start

data: {"type":"content-start","index":0,"delta":{"message":{"content":{"type":"text","text":""}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"Brain"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"trust"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
is"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
a"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"platform"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
for"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
evaluating"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":","}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
logging"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":","}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
and"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
improving"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
AI"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"
applications"}}}}


event: content-delta

data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"text":"."}}}}


event: citation-start

data: {"type":"citation-start","index":0,"delta":{"message":{"citations":{"start":16,"end":80,"text":"platform
for evaluating, logging, and improving AI applications.","sources":[{"type":"document","id":"doc:0","document":{"id":"doc:0","snippet":"Braintrust
is a platform for evaluating, logging, and improving AI applications.","title":"Braintrust
overview"}}],"type":"TEXT_CONTENT"}}}}


event: citation-end

data: {"type":"citation-end","index":0}


event: content-end

data: {"type":"content-end","index":0}


event: message-end

data: {"type":"message-end","delta":{"finish_reason":"COMPLETE","usage":{"billed_units":{"input_tokens":34,"output_tokens":15},"tokens":{"input_tokens":1677,"output_tokens":27},"cached_tokens":0}}}


data: [DONE]


'
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Transfer-Encoding:
- chunked
Via:
- 1.1 google
access-control-expose-headers:
- X-Debug-Trace-ID
cache-control:
- no-cache, no-store, no-transform, must-revalidate, private, max-age=0
content-type:
- text/event-stream
date:
- Mon, 04 May 2026 17:51:26 GMT
expires:
- Thu, 01 Jan 1970 00:00:00 GMT
pragma:
- no-cache
server:
- envoy
vary:
- Origin
x-accel-expires:
- '0'
x-debug-trace-id:
- e47c41547aa42c0050d9ff38acdbe9c4
x-endpoint-monthly-call-limit:
- '1000'
x-envoy-upstream-service-time:
- '19'
x-trial-endpoint-call-limit:
- '20'
x-trial-endpoint-call-remaining:
- '18'
status:
code: 200
message: OK
version: 1
47 changes: 47 additions & 0 deletions py/src/braintrust/integrations/cohere/test_cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,53 @@ def test_wrap_cohere_chat_stream_v2_sync(memory_logger):
assert metrics.get("completion_tokens", 0) > 0


@pytest.mark.vcr
def test_wrap_cohere_chat_stream_v2_rag_citations(memory_logger):
if os.environ.get("BRAINTRUST_TEST_PACKAGE_VERSION") != "latest":
pytest.skip("v2 RAG citation cassette is recorded for the latest Cohere SDK")

assert not memory_logger.pop()
client = wrap_cohere(_v2_client(require_methods=("chat_stream",)))
documents = [
{
"data": {
"title": "Braintrust overview",
"snippet": "Braintrust is a platform for evaluating, logging, and improving AI applications.",
}
}
]
citation_options = {"mode": "fast"}

events = list(
client.chat_stream(
model=CHAT_MODEL,
messages=[{"role": "user", "content": "What is Braintrust? Cite the provided document."}],
documents=documents,
citation_options=citation_options,
max_tokens=80,
)
)

assert events
event_types = [getattr(event, "type", None) or getattr(event, "event_type", None) for event in events]
assert "citation-start" in event_types

spans = memory_logger.pop()
assert len(spans) == 1
span = spans[0]

assert span["metadata"]["documents"] == documents
assert span["metadata"]["citation_options"] == citation_options
output = span["output"]
assert isinstance(output, dict)
citations = output.get("citations")
assert isinstance(citations, list) and citations
assert citations[0].get("start") is not None
assert citations[0].get("end") is not None
assert citations[0].get("text")
assert citations[0].get("sources")


@pytest.mark.vcr
def test_wrap_cohere_chat_v1_async(memory_logger):
assert not memory_logger.pop()
Expand Down
42 changes: 33 additions & 9 deletions py/src/braintrust/integrations/cohere/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
"presence_penalty",
"raw_prompting",
"search_queries_only",
"documents",
"citation_options",
"strict_tools",
"tool_choice",
"tools",
Expand Down Expand Up @@ -393,7 +395,7 @@ def _delta_message_tool_calls(chunk: Any) -> list[Any]:


def _as_merge_dict(value: Any) -> dict[str, Any] | None:
"""Coerce a tool-call delta to a plain dict for merging, or return ``None``."""
"""Coerce a provider object to a plain dict for merging, or return ``None``."""
if isinstance(value, dict):
return value
converted = _try_to_dict(value)
Expand Down Expand Up @@ -455,6 +457,8 @@ def _aggregate_chat_stream(chunks: list[Any]) -> tuple[Any, dict[str, float], di
finish_reason: str | None = None
metadata: dict[str, Any] = {}
metrics: dict[str, float] = {}
citations_by_index: dict[int, dict[str, Any]] = {}
citation_order: list[int] = []

for chunk in chunks:
if chunk is None:
Expand Down Expand Up @@ -516,6 +520,16 @@ def _aggregate_chat_stream(chunks: list[Any]) -> tuple[Any, dict[str, float], di
merge=True,
)
continue
if event_type == "citation-start":
citation = _get_field(_get_field(_get_field(chunk, "delta"), "message"), "citations")
citation_dict = _as_merge_dict(citation)
chunk_index = _get_field(chunk, "index")
idx = chunk_index if isinstance(chunk_index, int) else len(citation_order)
if citation_dict is not None:
if idx not in citation_order:
citation_order.append(idx)
citations_by_index[idx] = citation_dict
continue
if event_type == "message-end":
delta = _get_field(chunk, "delta")
fr = _get_field(delta, "finish_reason")
Expand All @@ -531,18 +545,28 @@ def _aggregate_chat_stream(chunks: list[Any]) -> tuple[Any, dict[str, float], di
merged_tool_calls = [
tool_calls_by_index[i] for i in sorted(tool_call_order) if isinstance(tool_calls_by_index.get(i), dict)
]
merged_citations = [
citations_by_index[i] for i in sorted(citation_order) if isinstance(citations_by_index.get(i), dict)
]

output: Any = _chat_output(terminal_response) if terminal_response is not None else None
if output is None:
output_dict = {}
merged_text = "".join(text_parts)
if merged_tool_calls or role or merged_text:
output = {}
if role:
output["role"] = role
if merged_text:
output["content"] = merged_text
if merged_tool_calls:
output["tool_calls"] = merged_tool_calls
if role:
output_dict["role"] = role
if merged_text:
output_dict["content"] = merged_text
if merged_tool_calls:
output_dict["tool_calls"] = merged_tool_calls
if merged_citations:
output_dict["citations"] = merged_citations
output = output_dict or None
elif merged_citations:
output_dict = output if isinstance(output, dict) else _try_to_dict(output)
if isinstance(output_dict, dict):
output_dict.setdefault("citations", merged_citations)
output = output_dict

if finish_reason is not None:
metadata["finish_reason"] = finish_reason
Expand Down