From 74a3e456a06e39201ea3b940ee1eb5a5b4509cdc Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Mon, 4 May 2026 14:58:41 -0700
Subject: [PATCH 1/2] fix(groq): Capture reasoning for groq reasoning models

---
 .../groq-v1-auto.span-events.json             | 49 +++++++++++++++-
 .../groq-v1-wrapped.span-events.json          | 49 +++++++++++++++-
 .../groq-instrumentation/assertions.ts        | 41 ++++++++++++-
 .../groq-instrumentation/constants.mjs        |  1 +
 .../groq-instrumentation/scenario.impl.mjs    | 29 +++++++++-
 .../plugins/groq-plugin.test.ts               | 57 ++++++++++++++++++-
 js/src/instrumentation/plugins/groq-plugin.ts | 25 +++++++-
 7 files changed, 241 insertions(+), 10 deletions(-)

diff --git a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json
index edc29e4f9..f0d1c924a 100644
--- a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json
+++ b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json
@@ -94,6 +94,49 @@
     ],
     "type": "llm"
   },
+  {
+    "has_input": false,
+    "has_output": false,
+    "metadata": {
+      "operation": "reasoning-stream"
+    },
+    "metric_keys": [],
+    "name": "groq-reasoning-stream-operation",
+    "root_span_id": "<span:1>",
+    "span_id": "<span:6>",
+    "span_parents": [
+      "<span:1>"
+    ],
+    "type": null
+  },
+  {
+    "has_input": true,
+    "has_output": true,
+    "metadata": {
+      "model": "qwen/qwen3-32b",
+      "provider": "groq",
+      "reasoning_format": "parsed",
+      "temperature": 0.6
+    },
+    "metric_keys": [
+      "completion_reasoning_tokens",
+      "completion_time",
+      "completion_tokens",
+      "prompt_time",
+      "prompt_tokens",
+      "queue_time",
+      "time_to_first_token",
+      "tokens",
+      "total_time"
+    ],
+    "name": "groq.chat.completions.create",
+    "root_span_id": "<span:1>",
+    "span_id": "<span:7>",
+    "span_parents": [
+      "<span:6>"
+    ],
+    "type": "llm"
+  },
   {
     "has_input": false,
     "has_output": false,
@@ -103,7 +146,7 @@
     "metric_keys": [],
     "name": "groq-tool-operation",
     "root_span_id": "<span:1>",
-    "span_id": "<span:6>",
+    "span_id": "<span:8>",
     "span_parents": [
       "<span:1>"
     ],
@@ -129,9 +172,9 @@
     ],
     "name": "groq.chat.completions.create",
     "root_span_id": "<span:1>",
-    "span_id": "<span:7>",
+    "span_id": "<span:9>",
     "span_parents": [
-      "<span:6>"
+      "<span:8>"
     ],
     "type": "llm"
   }
diff --git a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json
index edc29e4f9..f0d1c924a 100644
--- a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json
+++ b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json
@@ -94,6 +94,49 @@
     ],
     "type": "llm"
   },
+  {
+    "has_input": false,
+    "has_output": false,
+    "metadata": {
+      "operation": "reasoning-stream"
+    },
+    "metric_keys": [],
+    "name": "groq-reasoning-stream-operation",
+    "root_span_id": "<span:1>",
+    "span_id": "<span:6>",
+    "span_parents": [
+      "<span:1>"
+    ],
+    "type": null
+  },
+  {
+    "has_input": true,
+    "has_output": true,
+    "metadata": {
+      "model": "qwen/qwen3-32b",
+      "provider": "groq",
+      "reasoning_format": "parsed",
+      "temperature": 0.6
+    },
+    "metric_keys": [
+      "completion_reasoning_tokens",
+      "completion_time",
+      "completion_tokens",
+      "prompt_time",
+      "prompt_tokens",
+      "queue_time",
+      "time_to_first_token",
+      "tokens",
+      "total_time"
+    ],
+    "name": "groq.chat.completions.create",
+    "root_span_id": "<span:1>",
+    "span_id": "<span:7>",
+    "span_parents": [
+      "<span:6>"
+    ],
+    "type": "llm"
+  },
   {
     "has_input": false,
     "has_output": false,
@@ -103,7 +146,7 @@
     "metric_keys": [],
     "name": "groq-tool-operation",
     "root_span_id": "<span:1>",
-    "span_id": "<span:6>",
+    "span_id": "<span:8>",
     "span_parents": [
       "<span:1>"
     ],
@@ -129,9 +172,9 @@
     ],
     "name": "groq.chat.completions.create",
     "root_span_id": "<span:1>",
-    "span_id": "<span:7>",
+    "span_id": "<span:9>",
     "span_parents": [
-      "<span:6>"
+      "<span:8>"
     ],
     "type": "llm"
   }
diff --git a/e2e/scenarios/groq-instrumentation/assertions.ts b/e2e/scenarios/groq-instrumentation/assertions.ts
index d3472114b..0087f18a8 100644
--- a/e2e/scenarios/groq-instrumentation/assertions.ts
+++ b/e2e/scenarios/groq-instrumentation/assertions.ts
@@ -8,7 +8,7 @@ import {
 import { withScenarioHarness } from "../../helpers/scenario-harness";
 import { findChildSpans, findLatestSpan } from "../../helpers/trace-selectors";
 import { summarizeWrapperContract } from "../../helpers/wrapper-contract";
-import { ROOT_NAME, SCENARIO_NAME } from "./constants.mjs";
+import { REASONING_MODEL, ROOT_NAME, SCENARIO_NAME } from "./constants.mjs";
 
 type RunGroqScenario = (harness: {
   runNodeScenarioDir: (options: {
@@ -38,6 +38,10 @@ function findGroqSpan(
 function buildSpanSummary(events: CapturedLogEvent[]): Json {
   const chatOperation = findLatestSpan(events, "groq-chat-operation");
   const streamOperation = findLatestSpan(events, "groq-stream-operation");
+  const reasoningStreamOperation = findLatestSpan(
+    events,
+    "groq-reasoning-stream-operation",
+  );
   const toolOperation = findLatestSpan(events, "groq-tool-operation");
 
   return [
@@ -54,6 +58,12 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json {
       streamOperation?.span.id,
       "groq.chat.completions.create",
     ),
+    reasoningStreamOperation,
+    findGroqSpan(
+      events,
+      reasoningStreamOperation?.span.id,
+      "groq.chat.completions.create",
+    ),
     toolOperation,
     findGroqSpan(
       events,
@@ -65,6 +75,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json {
       "model",
       "operation",
       "provider",
+      "reasoning_format",
       "scenario",
       "temperature",
     ]),
@@ -134,6 +145,34 @@ export function defineGroqInstrumentationAssertions(options: {
       });
     });
 
+    test(
+      "captures reasoning content from parsed streaming chunks",
+      testConfig,
+      () => {
+        const operation = findLatestSpan(
+          events,
+          "groq-reasoning-stream-operation",
+        );
+        const span = findGroqSpan(
+          events,
+          operation?.span.id,
+          "groq.chat.completions.create",
+        );
+        const reasoning = span?.output?.[0]?.message?.reasoning;
+
+        expect(span?.row.metadata).toMatchObject({
+          model: REASONING_MODEL,
+          provider: "groq",
+          reasoning_format: "parsed",
+        });
+        expect(span?.metrics).toMatchObject({
+          time_to_first_token: expect.any(Number),
+        });
+        expect(reasoning).toEqual(expect.any(String));
+        expect(reasoning?.length).toBeGreaterThan(0);
+      },
+    );
+
     test("captures tool calling span", testConfig, () => {
       const operation = findLatestSpan(events, "groq-tool-operation");
       const span = findGroqSpan(
diff --git a/e2e/scenarios/groq-instrumentation/constants.mjs b/e2e/scenarios/groq-instrumentation/constants.mjs
index 967d98068..4b80d0ce6 100644
--- a/e2e/scenarios/groq-instrumentation/constants.mjs
+++ b/e2e/scenarios/groq-instrumentation/constants.mjs
@@ -1,3 +1,4 @@
 export const CHAT_MODEL = "llama-3.3-70b-versatile";
+export const REASONING_MODEL = "qwen/qwen3-32b";
 export const ROOT_NAME = "groq-instrumentation-root";
 export const SCENARIO_NAME = "groq-instrumentation";
diff --git a/e2e/scenarios/groq-instrumentation/scenario.impl.mjs b/e2e/scenarios/groq-instrumentation/scenario.impl.mjs
index 5c6c79f27..fe432d7d7 100644
--- a/e2e/scenarios/groq-instrumentation/scenario.impl.mjs
+++ b/e2e/scenarios/groq-instrumentation/scenario.impl.mjs
@@ -4,7 +4,12 @@ import {
   runOperation,
   runTracedScenario,
 } from "../../helpers/provider-runtime.mjs";
-import { CHAT_MODEL, ROOT_NAME, SCENARIO_NAME } from "./constants.mjs";
+import {
+  CHAT_MODEL,
+  REASONING_MODEL,
+  ROOT_NAME,
+  SCENARIO_NAME,
+} from "./constants.mjs";
 
 export const GROQ_SCENARIO_TIMEOUT_MS = 120_000;
 
@@ -66,6 +71,28 @@ export async function runGroqInstrumentationScenario(options) {
         await collectAsync(stream);
       });
 
+      await runOperation(
+        "groq-reasoning-stream-operation",
+        "reasoning-stream",
+        async () => {
+          const stream = await client.chat.completions.create({
+            max_completion_tokens: 512,
+            messages: [
+              {
+                role: "user",
+                content:
+                  "Solve this step by step: Elena has 3 boxes with 4 marbles each, gives away 5 marbles, then doubles what remains. Reply with just the final number.",
+              },
+            ],
+            model: REASONING_MODEL,
+            reasoning_format: "parsed",
+            stream: true,
+            temperature: 0.6,
+          });
+          await collectAsync(stream);
+        },
+      );
+
       await runOperation("groq-tool-operation", "tool", async () => {
         await client.chat.completions.create({
           messages: [
diff --git a/js/src/instrumentation/plugins/groq-plugin.test.ts b/js/src/instrumentation/plugins/groq-plugin.test.ts
index a63bab607..a140deb80 100644
--- a/js/src/instrumentation/plugins/groq-plugin.test.ts
+++ b/js/src/instrumentation/plugins/groq-plugin.test.ts
@@ -1,5 +1,8 @@
 import { describe, expect, it } from "vitest";
-import { parseGroqMetrics } from "./groq-plugin";
+import {
+  aggregateGroqChatCompletionChunks,
+  parseGroqMetrics,
+} from "./groq-plugin";
 
 describe("parseGroqMetrics", () => {
   it("merges OpenAI-compatible usage metrics with Groq cache metrics", () => {
@@ -32,3 +35,55 @@ describe("parseGroqMetrics", () => {
     expect(parseGroqMetrics({})).toEqual({});
   });
 });
+
+describe("aggregateGroqChatCompletionChunks", () => {
+  it("preserves parsed reasoning chunks", () => {
+    expect(
+      aggregateGroqChatCompletionChunks([
+        {
+          choices: [
+            {
+              delta: {
+                role: "assistant",
+                reasoning: "First, count the marbles. ",
+              },
+              finish_reason: null,
+            },
+          ],
+        },
+        {
+          choices: [
+            {
+              delta: {
+                reasoning: "Then double the remainder.",
+              },
+              finish_reason: null,
+            },
+          ],
+        },
+        {
+          choices: [
+            {
+              delta: {
+                content: "14",
+              },
+              finish_reason: "stop",
+            },
+          ],
+        },
+      ]).output,
+    ).toEqual([
+      {
+        finish_reason: "stop",
+        index: 0,
+        logprobs: null,
+        message: {
+          content: "14",
+          reasoning: "First, count the marbles. Then double the remainder.",
+          role: "assistant",
+          tool_calls: undefined,
+        },
+      },
+    ]);
+  });
+});
diff --git a/js/src/instrumentation/plugins/groq-plugin.ts b/js/src/instrumentation/plugins/groq-plugin.ts
index 777bc40a0..10b6d2c0a 100644
--- a/js/src/instrumentation/plugins/groq-plugin.ts
+++ b/js/src/instrumentation/plugins/groq-plugin.ts
@@ -107,7 +107,7 @@ export function parseGroqMetrics(
   };
 }
 
-function aggregateGroqChatCompletionChunks(
+export function aggregateGroqChatCompletionChunks(
   chunks: GroqChatCompletionChunk[],
   streamResult?: unknown,
   endEvent?: unknown,
@@ -120,8 +120,31 @@ function aggregateGroqChatCompletionChunks(
     streamResult,
     endEvent,
   );
+  const reasoning = aggregateGroqReasoning(chunks);
+  if (reasoning !== undefined) {
+    const message = aggregated.output[0]?.message;
+    if (message) {
+      message.reasoning = reasoning;
+    }
+  }
   return {
     metrics: aggregated.metrics,
     output: aggregated.output,
   };
 }
+
+function aggregateGroqReasoning(
+  chunks: GroqChatCompletionChunk[],
+): string | undefined {
+  let reasoning = "";
+
+  for (const chunk of chunks) {
+    const delta = chunk.choices?.[0]?.delta;
+    const deltaReasoning = delta?.reasoning;
+    if (typeof deltaReasoning === "string") {
+      reasoning += deltaReasoning;
+    }
+  }
+
+  return reasoning.length > 0 ? reasoning : undefined;
+}

From 1dc186368fc16cff681a0f4f741c89171233172c Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Mon, 4 May 2026 14:59:45 -0700
Subject: [PATCH 2/2] cs

---
 .changeset/sharp-beds-smell.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/sharp-beds-smell.md

diff --git a/.changeset/sharp-beds-smell.md b/.changeset/sharp-beds-smell.md
new file mode 100644
index 000000000..69834e4ec
--- /dev/null
+++ b/.changeset/sharp-beds-smell.md
@@ -0,0 +1,5 @@
+---
+"braintrust": patch
+---
+
+fix(groq): Capture reasoning for groq reasoning models