From 74a3e456a06e39201ea3b940ee1eb5a5b4509cdc Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 14:58:41 -0700 Subject: [PATCH 1/2] fix(groq): Capture reasoning for groq reasoning models --- .../groq-v1-auto.span-events.json | 49 +++++++++++++++- .../groq-v1-wrapped.span-events.json | 49 +++++++++++++++- .../groq-instrumentation/assertions.ts | 41 ++++++++++++- .../groq-instrumentation/constants.mjs | 1 + .../groq-instrumentation/scenario.impl.mjs | 29 +++++++++- .../plugins/groq-plugin.test.ts | 57 ++++++++++++++++++- js/src/instrumentation/plugins/groq-plugin.ts | 25 +++++++- 7 files changed, 241 insertions(+), 10 deletions(-) diff --git a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json index edc29e4f9..f0d1c924a 100644 --- a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json +++ b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json @@ -94,6 +94,49 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "reasoning-stream" + }, + "metric_keys": [], + "name": "groq-reasoning-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "qwen/qwen3-32b", + "provider": "groq", + "reasoning_format": "parsed", + "temperature": 0.6 + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_time", + "completion_tokens", + "prompt_time", + "prompt_tokens", + "queue_time", + "time_to_first_token", + "tokens", + "total_time" + ], + "name": "groq.chat.completions.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -103,7 +146,7 @@ "metric_keys": [], "name": "groq-tool-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -129,9 +172,9 @@ ], "name": "groq.chat.completions.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json index edc29e4f9..f0d1c924a 100644 --- a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json +++ b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json @@ -94,6 +94,49 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "reasoning-stream" + }, + "metric_keys": [], + "name": "groq-reasoning-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "qwen/qwen3-32b", + "provider": "groq", + "reasoning_format": "parsed", + "temperature": 0.6 + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_time", + "completion_tokens", + "prompt_time", + "prompt_tokens", + "queue_time", + "time_to_first_token", + "tokens", + "total_time" + ], + "name": "groq.chat.completions.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -103,7 +146,7 @@ "metric_keys": [], "name": "groq-tool-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -129,9 +172,9 @@ ], "name": "groq.chat.completions.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/groq-instrumentation/assertions.ts b/e2e/scenarios/groq-instrumentation/assertions.ts index d3472114b..0087f18a8 100644 --- a/e2e/scenarios/groq-instrumentation/assertions.ts +++ b/e2e/scenarios/groq-instrumentation/assertions.ts @@ -8,7 +8,7 @@ import { import { withScenarioHarness } from "../../helpers/scenario-harness"; import { findChildSpans, findLatestSpan } from "../../helpers/trace-selectors"; import { summarizeWrapperContract } from "../../helpers/wrapper-contract"; -import { ROOT_NAME, SCENARIO_NAME } from "./constants.mjs"; +import { REASONING_MODEL, ROOT_NAME, SCENARIO_NAME } from "./constants.mjs"; type RunGroqScenario = (harness: { runNodeScenarioDir: (options: { @@ -38,6 +38,10 @@ function findGroqSpan( function buildSpanSummary(events: CapturedLogEvent[]): Json { const chatOperation = findLatestSpan(events, "groq-chat-operation"); const streamOperation = findLatestSpan(events, "groq-stream-operation"); + const reasoningStreamOperation = findLatestSpan( + events, + "groq-reasoning-stream-operation", + ); const toolOperation = findLatestSpan(events, "groq-tool-operation"); return [ @@ -54,6 +58,12 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { streamOperation?.span.id, "groq.chat.completions.create", ), + reasoningStreamOperation, + findGroqSpan( + events, + reasoningStreamOperation?.span.id, + "groq.chat.completions.create", + ), toolOperation, findGroqSpan( events, @@ -65,6 +75,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { "model", "operation", "provider", + "reasoning_format", "scenario", "temperature", ]), @@ -134,6 +145,34 @@ export function defineGroqInstrumentationAssertions(options: { }); }); + test( + "captures reasoning content from parsed streaming chunks", + testConfig, + () => { + const operation = findLatestSpan( + events, + "groq-reasoning-stream-operation", + ); + const span = findGroqSpan( + events, + operation?.span.id, + "groq.chat.completions.create", + ); + const reasoning = span?.output?.[0]?.message?.reasoning; + + expect(span?.row.metadata).toMatchObject({ + model: REASONING_MODEL, + provider: "groq", + reasoning_format: "parsed", + }); + expect(span?.metrics).toMatchObject({ + time_to_first_token: expect.any(Number), + }); + expect(reasoning).toEqual(expect.any(String)); + expect(reasoning?.length).toBeGreaterThan(0); + }, + ); + test("captures tool calling span", testConfig, () => { const operation = findLatestSpan(events, "groq-tool-operation"); const span = findGroqSpan( diff --git a/e2e/scenarios/groq-instrumentation/constants.mjs b/e2e/scenarios/groq-instrumentation/constants.mjs index 967d98068..4b80d0ce6 100644 --- a/e2e/scenarios/groq-instrumentation/constants.mjs +++ b/e2e/scenarios/groq-instrumentation/constants.mjs @@ -1,3 +1,4 @@ export const CHAT_MODEL = "llama-3.3-70b-versatile"; +export const REASONING_MODEL = "qwen/qwen3-32b"; export const ROOT_NAME = "groq-instrumentation-root"; export const SCENARIO_NAME = "groq-instrumentation"; diff --git a/e2e/scenarios/groq-instrumentation/scenario.impl.mjs b/e2e/scenarios/groq-instrumentation/scenario.impl.mjs index 5c6c79f27..fe432d7d7 100644 --- a/e2e/scenarios/groq-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/groq-instrumentation/scenario.impl.mjs @@ -4,7 +4,12 @@ import { runOperation, runTracedScenario, } from "../../helpers/provider-runtime.mjs"; -import { CHAT_MODEL, ROOT_NAME, SCENARIO_NAME } from "./constants.mjs"; +import { + CHAT_MODEL, + REASONING_MODEL, + ROOT_NAME, + SCENARIO_NAME, +} from "./constants.mjs"; export const GROQ_SCENARIO_TIMEOUT_MS = 120_000; @@ -66,6 +71,28 @@ export async function runGroqInstrumentationScenario(options) { await collectAsync(stream); }); + await runOperation( + "groq-reasoning-stream-operation", + "reasoning-stream", + async () => { + const stream = await client.chat.completions.create({ + max_completion_tokens: 512, + messages: [ + { + role: "user", + content: + "Solve this step by step: Elena has 3 boxes with 4 marbles each, gives away 5 marbles, then doubles what remains. Reply with just the final number.", + }, + ], + model: REASONING_MODEL, + reasoning_format: "parsed", + stream: true, + temperature: 0.6, + }); + await collectAsync(stream); + }, + ); + await runOperation("groq-tool-operation", "tool", async () => { await client.chat.completions.create({ messages: [ diff --git a/js/src/instrumentation/plugins/groq-plugin.test.ts b/js/src/instrumentation/plugins/groq-plugin.test.ts index a63bab607..a140deb80 100644 --- a/js/src/instrumentation/plugins/groq-plugin.test.ts +++ b/js/src/instrumentation/plugins/groq-plugin.test.ts @@ -1,5 +1,8 @@ import { describe, expect, it } from "vitest"; -import { parseGroqMetrics } from "./groq-plugin"; +import { + aggregateGroqChatCompletionChunks, + parseGroqMetrics, +} from "./groq-plugin"; describe("parseGroqMetrics", () => { it("merges OpenAI-compatible usage metrics with Groq cache metrics", () => { @@ -32,3 +35,55 @@ describe("parseGroqMetrics", () => { expect(parseGroqMetrics({})).toEqual({}); }); }); + +describe("aggregateGroqChatCompletionChunks", () => { + it("preserves parsed reasoning chunks", () => { + expect( + aggregateGroqChatCompletionChunks([ + { + choices: [ + { + delta: { + role: "assistant", + reasoning: "First, count the marbles. ", + }, + finish_reason: null, + }, + ], + }, + { + choices: [ + { + delta: { + reasoning: "Then double the remainder.", + }, + finish_reason: null, + }, + ], + }, + { + choices: [ + { + delta: { + content: "14", + }, + finish_reason: "stop", + }, + ], + }, + ]).output, + ).toEqual([ + { + finish_reason: "stop", + index: 0, + logprobs: null, + message: { + content: "14", + reasoning: "First, count the marbles. Then double the remainder.", + role: "assistant", + tool_calls: undefined, + }, + }, + ]); + }); +}); diff --git a/js/src/instrumentation/plugins/groq-plugin.ts b/js/src/instrumentation/plugins/groq-plugin.ts index 777bc40a0..10b6d2c0a 100644 --- a/js/src/instrumentation/plugins/groq-plugin.ts +++ b/js/src/instrumentation/plugins/groq-plugin.ts @@ -107,7 +107,7 @@ export function parseGroqMetrics( }; } -function aggregateGroqChatCompletionChunks( +export function aggregateGroqChatCompletionChunks( chunks: GroqChatCompletionChunk[], streamResult?: unknown, endEvent?: unknown, @@ -120,8 +120,31 @@ function aggregateGroqChatCompletionChunks( streamResult, endEvent, ); + const reasoning = aggregateGroqReasoning(chunks); + if (reasoning !== undefined) { + const message = aggregated.output[0]?.message; + if (message) { + message.reasoning = reasoning; + } + } return { metrics: aggregated.metrics, output: aggregated.output, }; } + +function aggregateGroqReasoning( + chunks: GroqChatCompletionChunk[], +): string | undefined { + let reasoning = ""; + + for (const chunk of chunks) { + const delta = chunk.choices?.[0]?.delta; + const deltaReasoning = delta?.reasoning; + if (typeof deltaReasoning === "string") { + reasoning += deltaReasoning; + } + } + + return reasoning.length > 0 ? reasoning : undefined; +} From 1dc186368fc16cff681a0f4f741c89171233172c Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 14:59:45 -0700 Subject: [PATCH 2/2] cs --- .changeset/sharp-beds-smell.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/sharp-beds-smell.md diff --git a/.changeset/sharp-beds-smell.md b/.changeset/sharp-beds-smell.md new file mode 100644 index 000000000..69834e4ec --- /dev/null +++ b/.changeset/sharp-beds-smell.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix(groq): Capture reasoning for groq reasoning models