Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/sharp-beds-smell.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"braintrust": patch
---

fix(groq): Capture reasoning for groq reasoning models
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,49 @@
],
"type": "llm"
},
{
"has_input": false,
"has_output": false,
"metadata": {
"operation": "reasoning-stream"
},
"metric_keys": [],
"name": "groq-reasoning-stream-operation",
"root_span_id": "<span:1>",
"span_id": "<span:6>",
"span_parents": [
"<span:1>"
],
"type": null
},
{
"has_input": true,
"has_output": true,
"metadata": {
"model": "qwen/qwen3-32b",
"provider": "groq",
"reasoning_format": "parsed",
"temperature": 0.6
},
"metric_keys": [
"completion_reasoning_tokens",
"completion_time",
"completion_tokens",
"prompt_time",
"prompt_tokens",
"queue_time",
"time_to_first_token",
"tokens",
"total_time"
],
"name": "groq.chat.completions.create",
"root_span_id": "<span:1>",
"span_id": "<span:7>",
"span_parents": [
"<span:6>"
],
"type": "llm"
},
{
"has_input": false,
"has_output": false,
Expand All @@ -103,7 +146,7 @@
"metric_keys": [],
"name": "groq-tool-operation",
"root_span_id": "<span:1>",
"span_id": "<span:6>",
"span_id": "<span:8>",
"span_parents": [
"<span:1>"
],
Expand All @@ -129,9 +172,9 @@
],
"name": "groq.chat.completions.create",
"root_span_id": "<span:1>",
"span_id": "<span:7>",
"span_id": "<span:9>",
"span_parents": [
"<span:6>"
"<span:8>"
],
"type": "llm"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,49 @@
],
"type": "llm"
},
{
"has_input": false,
"has_output": false,
"metadata": {
"operation": "reasoning-stream"
},
"metric_keys": [],
"name": "groq-reasoning-stream-operation",
"root_span_id": "<span:1>",
"span_id": "<span:6>",
"span_parents": [
"<span:1>"
],
"type": null
},
{
"has_input": true,
"has_output": true,
"metadata": {
"model": "qwen/qwen3-32b",
"provider": "groq",
"reasoning_format": "parsed",
"temperature": 0.6
},
"metric_keys": [
"completion_reasoning_tokens",
"completion_time",
"completion_tokens",
"prompt_time",
"prompt_tokens",
"queue_time",
"time_to_first_token",
"tokens",
"total_time"
],
"name": "groq.chat.completions.create",
"root_span_id": "<span:1>",
"span_id": "<span:7>",
"span_parents": [
"<span:6>"
],
"type": "llm"
},
{
"has_input": false,
"has_output": false,
Expand All @@ -103,7 +146,7 @@
"metric_keys": [],
"name": "groq-tool-operation",
"root_span_id": "<span:1>",
"span_id": "<span:6>",
"span_id": "<span:8>",
"span_parents": [
"<span:1>"
],
Expand All @@ -129,9 +172,9 @@
],
"name": "groq.chat.completions.create",
"root_span_id": "<span:1>",
"span_id": "<span:7>",
"span_id": "<span:9>",
"span_parents": [
"<span:6>"
"<span:8>"
],
"type": "llm"
}
Expand Down
41 changes: 40 additions & 1 deletion e2e/scenarios/groq-instrumentation/assertions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {
import { withScenarioHarness } from "../../helpers/scenario-harness";
import { findChildSpans, findLatestSpan } from "../../helpers/trace-selectors";
import { summarizeWrapperContract } from "../../helpers/wrapper-contract";
import { ROOT_NAME, SCENARIO_NAME } from "./constants.mjs";
import { REASONING_MODEL, ROOT_NAME, SCENARIO_NAME } from "./constants.mjs";

type RunGroqScenario = (harness: {
runNodeScenarioDir: (options: {
Expand Down Expand Up @@ -38,6 +38,10 @@ function findGroqSpan(
function buildSpanSummary(events: CapturedLogEvent[]): Json {
const chatOperation = findLatestSpan(events, "groq-chat-operation");
const streamOperation = findLatestSpan(events, "groq-stream-operation");
const reasoningStreamOperation = findLatestSpan(
events,
"groq-reasoning-stream-operation",
);
const toolOperation = findLatestSpan(events, "groq-tool-operation");

return [
Expand All @@ -54,6 +58,12 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json {
streamOperation?.span.id,
"groq.chat.completions.create",
),
reasoningStreamOperation,
findGroqSpan(
events,
reasoningStreamOperation?.span.id,
"groq.chat.completions.create",
),
toolOperation,
findGroqSpan(
events,
Expand All @@ -65,6 +75,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json {
"model",
"operation",
"provider",
"reasoning_format",
"scenario",
"temperature",
]),
Expand Down Expand Up @@ -134,6 +145,34 @@ export function defineGroqInstrumentationAssertions(options: {
});
});

test(
"captures reasoning content from parsed streaming chunks",
testConfig,
() => {
const operation = findLatestSpan(
events,
"groq-reasoning-stream-operation",
);
const span = findGroqSpan(
events,
operation?.span.id,
"groq.chat.completions.create",
);
const reasoning = span?.output?.[0]?.message?.reasoning;

expect(span?.row.metadata).toMatchObject({
model: REASONING_MODEL,
provider: "groq",
reasoning_format: "parsed",
});
expect(span?.metrics).toMatchObject({
time_to_first_token: expect.any(Number),
});
expect(reasoning).toEqual(expect.any(String));
expect(reasoning?.length).toBeGreaterThan(0);
},
);

test("captures tool calling span", testConfig, () => {
const operation = findLatestSpan(events, "groq-tool-operation");
const span = findGroqSpan(
Expand Down
1 change: 1 addition & 0 deletions e2e/scenarios/groq-instrumentation/constants.mjs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export const CHAT_MODEL = "llama-3.3-70b-versatile";
export const REASONING_MODEL = "qwen/qwen3-32b";
export const ROOT_NAME = "groq-instrumentation-root";
export const SCENARIO_NAME = "groq-instrumentation";
29 changes: 28 additions & 1 deletion e2e/scenarios/groq-instrumentation/scenario.impl.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ import {
runOperation,
runTracedScenario,
} from "../../helpers/provider-runtime.mjs";
import { CHAT_MODEL, ROOT_NAME, SCENARIO_NAME } from "./constants.mjs";
import {
CHAT_MODEL,
REASONING_MODEL,
ROOT_NAME,
SCENARIO_NAME,
} from "./constants.mjs";

export const GROQ_SCENARIO_TIMEOUT_MS = 120_000;

Expand Down Expand Up @@ -66,6 +71,28 @@ export async function runGroqInstrumentationScenario(options) {
await collectAsync(stream);
});

await runOperation(
"groq-reasoning-stream-operation",
"reasoning-stream",
async () => {
const stream = await client.chat.completions.create({
max_completion_tokens: 512,
messages: [
{
role: "user",
content:
"Solve this step by step: Elena has 3 boxes with 4 marbles each, gives away 5 marbles, then doubles what remains. Reply with just the final number.",
},
],
model: REASONING_MODEL,
reasoning_format: "parsed",
stream: true,
temperature: 0.6,
});
await collectAsync(stream);
},
);

await runOperation("groq-tool-operation", "tool", async () => {
await client.chat.completions.create({
messages: [
Expand Down
57 changes: 56 additions & 1 deletion js/src/instrumentation/plugins/groq-plugin.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import { describe, expect, it } from "vitest";
import { parseGroqMetrics } from "./groq-plugin";
import {
aggregateGroqChatCompletionChunks,
parseGroqMetrics,
} from "./groq-plugin";

describe("parseGroqMetrics", () => {
it("merges OpenAI-compatible usage metrics with Groq cache metrics", () => {
Expand Down Expand Up @@ -32,3 +35,55 @@ describe("parseGroqMetrics", () => {
expect(parseGroqMetrics({})).toEqual({});
});
});

describe("aggregateGroqChatCompletionChunks", () => {
it("preserves parsed reasoning chunks", () => {
expect(
aggregateGroqChatCompletionChunks([
{
choices: [
{
delta: {
role: "assistant",
reasoning: "First, count the marbles. ",
},
finish_reason: null,
},
],
},
{
choices: [
{
delta: {
reasoning: "Then double the remainder.",
},
finish_reason: null,
},
],
},
{
choices: [
{
delta: {
content: "14",
},
finish_reason: "stop",
},
],
},
]).output,
).toEqual([
{
finish_reason: "stop",
index: 0,
logprobs: null,
message: {
content: "14",
reasoning: "First, count the marbles. Then double the remainder.",
role: "assistant",
tool_calls: undefined,
},
},
]);
});
});
25 changes: 24 additions & 1 deletion js/src/instrumentation/plugins/groq-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ export function parseGroqMetrics(
};
}

function aggregateGroqChatCompletionChunks(
export function aggregateGroqChatCompletionChunks(
chunks: GroqChatCompletionChunk[],
streamResult?: unknown,
endEvent?: unknown,
Expand All @@ -120,8 +120,31 @@ function aggregateGroqChatCompletionChunks(
streamResult,
endEvent,
);
const reasoning = aggregateGroqReasoning(chunks);
if (reasoning !== undefined) {
const message = aggregated.output[0]?.message;
if (message) {
message.reasoning = reasoning;
}
}
return {
metrics: aggregated.metrics,
output: aggregated.output,
};
}

function aggregateGroqReasoning(
chunks: GroqChatCompletionChunk[],
): string | undefined {
let reasoning = "";

for (const chunk of chunks) {
const delta = chunk.choices?.[0]?.delta;
const deltaReasoning = delta?.reasoning;
if (typeof deltaReasoning === "string") {
reasoning += deltaReasoning;
}
}

return reasoning.length > 0 ? reasoning : undefined;
}
Loading