From c5d0c98e612224624867a2c26e3b33cd62546502 Mon Sep 17 00:00:00 2001
From: Rima Al Ghossein <ralghossein@google.com>
Date: Sat, 14 Feb 2026 13:16:20 +0100
Subject: [PATCH 1/4] Include thoughts_tokens in output_tokens count

---
 .../google_genai/generate_content.py              | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py
index a7901b3c7e..926f96b6a3 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py
@@ -528,13 +528,22 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse):
         input_tokens = _get_response_property(
             response, "usage_metadata.prompt_token_count"
         )
-        output_tokens = _get_response_property(
+        candidates_tokens = _get_response_property(
             response, "usage_metadata.candidates_token_count"
         )
+        thoughts_tokens = _get_response_property(
+            response, "usage_metadata.thoughts_token_count"
+        )
+        output_tokens: int = 0
+        if candidates_tokens and isinstance(candidates_tokens, int):
+            output_tokens += candidates_tokens
+        if thoughts_tokens and isinstance(thoughts_tokens, int):
+            output_tokens += thoughts_tokens
+
         if input_tokens and isinstance(input_tokens, int):
             self._input_tokens = input_tokens
-        if output_tokens and isinstance(output_tokens, int):
-            self._output_tokens = output_tokens
+
+        self._output_tokens = output_tokens
 
     def _maybe_update_error_type(self, response: GenerateContentResponse):
         if response.candidates:

From 5ca18077c673f530efe204a9e55bc389919873c2 Mon Sep 17 00:00:00 2001
From: Rima Al Ghossein <ralghossein@google.com>
Date: Sat, 14 Feb 2026 13:31:16 +0100
Subject: [PATCH 2/4] Rename output_tokens to candidate_tokens in response mock
 function

---
 .../tests/generate_content/nonstreaming_base.py             | 2 +-
 .../tests/generate_content/streaming_base.py                | 6 +++---
 .../tests/generate_content/util.py                          | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py
index ca683c911a..009a53a92f 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py
@@ -259,7 +259,7 @@ def test_generated_span_has_vertex_ai_system_when_configured(self):
         )
 
     def test_generated_span_counts_tokens(self):
-        self.configure_valid_response(input_tokens=123, output_tokens=456)
+        self.configure_valid_response(input_tokens=123, candidates_tokens=456)
         self.generate_content(model="gemini-2.0-flash", contents="Some input")
         self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
         span = self.otel.get_span_named("generate_content gemini-2.0-flash")
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py
index 9d702033bb..31a43928e9 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py
@@ -91,9 +91,9 @@ def test_handles_multiple_ressponses(self):
 
     def test_includes_token_counts_in_span_not_aggregated_from_responses(self):
         # Tokens should not be aggregated in streaming. Cumulative counts are returned on each response.
-        self.configure_valid_response(input_tokens=3, output_tokens=5)
-        self.configure_valid_response(input_tokens=3, output_tokens=5)
-        self.configure_valid_response(input_tokens=3, output_tokens=5)
+        self.configure_valid_response(input_tokens=3, candidates_tokens=5)
+        self.configure_valid_response(input_tokens=3, candidates_tokens=5)
+        self.configure_valid_response(input_tokens=3, candidates_tokens=5)
 
         self.generate_content(model="gemini-2.0-flash", contents="Some input")
 
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py
index 2bbd5bdd53..9777c2d74b 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py
@@ -25,7 +25,7 @@ def create_response(
     candidates: Optional[list[genai_types.Candidate]] = None,
     text: Optional[str] = None,
     input_tokens: Optional[int] = None,
-    output_tokens: Optional[int] = None,
+    candidates_tokens: Optional[int] = None,
     model_version: Optional[str] = None,
     usage_metadata: Optional[
         genai_types.GenerateContentResponseUsageMetadata
@@ -51,8 +51,8 @@ def create_response(
         usage_metadata = genai_types.GenerateContentResponseUsageMetadata()
     if input_tokens is not None:
         usage_metadata.prompt_token_count = input_tokens
-    if output_tokens is not None:
-        usage_metadata.candidates_token_count = output_tokens
+    if candidates_tokens is not None:
+        usage_metadata.candidates_token_count = candidates_tokens
     return genai_types.GenerateContentResponse(
         candidates=candidates,
         usage_metadata=usage_metadata,

From 9d08a667f7940ae52869034d620ac08f1433ed86 Mon Sep 17 00:00:00 2001
From: Rima Al Ghossein <ralghossein@google.com>
Date: Sat, 14 Feb 2026 13:32:08 +0100
Subject: [PATCH 3/4] Test output_tokens count with thoughts_tokens

---
 .../tests/generate_content/nonstreaming_base.py    |  6 ++++--
 .../tests/generate_content/streaming_base.py       | 14 ++++++++++----
 .../tests/generate_content/util.py                 |  3 +++
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py
index 009a53a92f..8f26dcf53a 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py
@@ -259,12 +259,14 @@ def test_generated_span_has_vertex_ai_system_when_configured(self):
         )
 
     def test_generated_span_counts_tokens(self):
-        self.configure_valid_response(input_tokens=123, candidates_tokens=456)
+        self.configure_valid_response(
+            input_tokens=123, candidates_tokens=456, thoughts_tokens=789
+        )
         self.generate_content(model="gemini-2.0-flash", contents="Some input")
         self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
         span = self.otel.get_span_named("generate_content gemini-2.0-flash")
         self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123)
-        self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456)
+        self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 1245)
 
     @patch.dict(
         "os.environ",
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py
index 31a43928e9..5876d31482 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py
@@ -91,16 +91,22 @@ def test_handles_multiple_ressponses(self):
 
     def test_includes_token_counts_in_span_not_aggregated_from_responses(self):
         # Tokens should not be aggregated in streaming. Cumulative counts are returned on each response.
-        self.configure_valid_response(input_tokens=3, candidates_tokens=5)
-        self.configure_valid_response(input_tokens=3, candidates_tokens=5)
-        self.configure_valid_response(input_tokens=3, candidates_tokens=5)
+        self.configure_valid_response(
+            input_tokens=3, candidates_tokens=5, thoughts_tokens=2
+        )
+        self.configure_valid_response(
+            input_tokens=3, candidates_tokens=5, thoughts_tokens=2
+        )
+        self.configure_valid_response(
+            input_tokens=3, candidates_tokens=5, thoughts_tokens=2
+        )
 
         self.generate_content(model="gemini-2.0-flash", contents="Some input")
 
         self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
         span = self.otel.get_span_named("generate_content gemini-2.0-flash")
         self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 3)
-        self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 5)
+        self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 7)
 
     def test_new_semconv_log_has_extra_genai_attributes(self):
         patched_environ = patch.dict(
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py
index 9777c2d74b..0b44739910 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py
@@ -26,6 +26,7 @@ def create_response(
     text: Optional[str] = None,
     input_tokens: Optional[int] = None,
     candidates_tokens: Optional[int] = None,
+    thoughts_tokens: Optional[int] = None,
     model_version: Optional[str] = None,
     usage_metadata: Optional[
         genai_types.GenerateContentResponseUsageMetadata
@@ -53,6 +54,8 @@ def create_response(
         usage_metadata.prompt_token_count = input_tokens
     if candidates_tokens is not None:
         usage_metadata.candidates_token_count = candidates_tokens
+    if thoughts_tokens is not None:
+        usage_metadata.thoughts_token_count = thoughts_tokens
     return genai_types.GenerateContentResponse(
         candidates=candidates,
         usage_metadata=usage_metadata,

From 233194572e2408346f2b137b9feb41205e2ec8aa Mon Sep 17 00:00:00 2001
From: Rima Al Ghossein <ralghossein@google.com>
Date: Sat, 14 Feb 2026 14:13:47 +0100
Subject: [PATCH 4/4] Update changelog

---
 .../opentelemetry-instrumentation-google-genai/CHANGELOG.md      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md
index 2643682a2b..0f673c0933 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md
+++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md
@@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## Unreleased
+- Include thinking tokens in `gen_ai.usage.output_tokens` ([#4206](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4206)).
 - Fix bug in how tokens are counted when using the streaming `generateContent` method.  ([#4152](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4152)).
 - Add `gen_ai.tool.definitions` attribute to `gen_ai.client.inference.operation.details` log event ([#4142](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4142)).