From c5d0c98e612224624867a2c26e3b33cd62546502 Mon Sep 17 00:00:00 2001 From: Rima Al Ghossein Date: Sat, 14 Feb 2026 13:16:20 +0100 Subject: [PATCH 1/4] Include thoughts_tokens in output_tokens count --- .../google_genai/generate_content.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py index a7901b3c7e..926f96b6a3 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py @@ -528,13 +528,22 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse): input_tokens = _get_response_property( response, "usage_metadata.prompt_token_count" ) - output_tokens = _get_response_property( + candidates_tokens = _get_response_property( response, "usage_metadata.candidates_token_count" ) + thoughts_tokens = _get_response_property( + response, "usage_metadata.thoughts_token_count" + ) + output_tokens: int = 0 + if candidates_tokens and isinstance(candidates_tokens, int): + output_tokens += candidates_tokens + if thoughts_tokens and isinstance(thoughts_tokens, int): + output_tokens += thoughts_tokens + if input_tokens and isinstance(input_tokens, int): self._input_tokens = input_tokens - if output_tokens and isinstance(output_tokens, int): - self._output_tokens = output_tokens + + self._output_tokens = output_tokens def _maybe_update_error_type(self, response: GenerateContentResponse): if response.candidates: From 5ca18077c673f530efe204a9e55bc389919873c2 Mon Sep 17 00:00:00 2001 From: Rima Al Ghossein Date: Sat, 14 Feb 2026 13:31:16 +0100 Subject: [PATCH 2/4] Rename output_tokens to candidate_tokens in response mock function --- .../tests/generate_content/nonstreaming_base.py | 2 +- .../tests/generate_content/streaming_base.py | 6 +++--- .../tests/generate_content/util.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py index ca683c911a..009a53a92f 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py @@ -259,7 +259,7 @@ def test_generated_span_has_vertex_ai_system_when_configured(self): ) def test_generated_span_counts_tokens(self): - self.configure_valid_response(input_tokens=123, output_tokens=456) + self.configure_valid_response(input_tokens=123, candidates_tokens=456) self.generate_content(model="gemini-2.0-flash", contents="Some input") self.otel.assert_has_span_named("generate_content gemini-2.0-flash") span = self.otel.get_span_named("generate_content gemini-2.0-flash") diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py index 9d702033bb..31a43928e9 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py @@ -91,9 +91,9 @@ def test_handles_multiple_ressponses(self): def test_includes_token_counts_in_span_not_aggregated_from_responses(self): # Tokens should not be aggregated in streaming. Cumulative counts are returned on each response. - self.configure_valid_response(input_tokens=3, output_tokens=5) - self.configure_valid_response(input_tokens=3, output_tokens=5) - self.configure_valid_response(input_tokens=3, output_tokens=5) + self.configure_valid_response(input_tokens=3, candidates_tokens=5) + self.configure_valid_response(input_tokens=3, candidates_tokens=5) + self.configure_valid_response(input_tokens=3, candidates_tokens=5) self.generate_content(model="gemini-2.0-flash", contents="Some input") diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py index 2bbd5bdd53..9777c2d74b 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py @@ -25,7 +25,7 @@ def create_response( candidates: Optional[list[genai_types.Candidate]] = None, text: Optional[str] = None, input_tokens: Optional[int] = None, - output_tokens: Optional[int] = None, + candidates_tokens: Optional[int] = None, model_version: Optional[str] = None, usage_metadata: Optional[ genai_types.GenerateContentResponseUsageMetadata @@ -51,8 +51,8 @@ def create_response( usage_metadata = genai_types.GenerateContentResponseUsageMetadata() if input_tokens is not None: usage_metadata.prompt_token_count = input_tokens - if output_tokens is not None: - usage_metadata.candidates_token_count = output_tokens + if candidates_tokens is not None: + usage_metadata.candidates_token_count = candidates_tokens return genai_types.GenerateContentResponse( candidates=candidates, usage_metadata=usage_metadata, From 9d08a667f7940ae52869034d620ac08f1433ed86 Mon Sep 17 00:00:00 2001 From: Rima Al Ghossein Date: Sat, 14 Feb 2026 13:32:08 +0100 Subject: [PATCH 3/4] Test output_tokens count with thoughts_tokens --- .../tests/generate_content/nonstreaming_base.py | 6 ++++-- .../tests/generate_content/streaming_base.py | 14 ++++++++++---- .../tests/generate_content/util.py | 3 +++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py index 009a53a92f..8f26dcf53a 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py @@ -259,12 +259,14 @@ def test_generated_span_has_vertex_ai_system_when_configured(self): ) def test_generated_span_counts_tokens(self): - self.configure_valid_response(input_tokens=123, candidates_tokens=456) + self.configure_valid_response( + input_tokens=123, candidates_tokens=456, thoughts_tokens=789 + ) self.generate_content(model="gemini-2.0-flash", contents="Some input") self.otel.assert_has_span_named("generate_content gemini-2.0-flash") span = self.otel.get_span_named("generate_content gemini-2.0-flash") self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123) - self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456) + self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 1245) @patch.dict( "os.environ", diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py index 31a43928e9..5876d31482 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py @@ -91,16 +91,22 @@ def test_handles_multiple_ressponses(self): def test_includes_token_counts_in_span_not_aggregated_from_responses(self): # Tokens should not be aggregated in streaming. Cumulative counts are returned on each response. - self.configure_valid_response(input_tokens=3, candidates_tokens=5) - self.configure_valid_response(input_tokens=3, candidates_tokens=5) - self.configure_valid_response(input_tokens=3, candidates_tokens=5) + self.configure_valid_response( + input_tokens=3, candidates_tokens=5, thoughts_tokens=2 + ) + self.configure_valid_response( + input_tokens=3, candidates_tokens=5, thoughts_tokens=2 + ) + self.configure_valid_response( + input_tokens=3, candidates_tokens=5, thoughts_tokens=2 + ) self.generate_content(model="gemini-2.0-flash", contents="Some input") self.otel.assert_has_span_named("generate_content gemini-2.0-flash") span = self.otel.get_span_named("generate_content gemini-2.0-flash") self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 3) - self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 5) + self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 7) def test_new_semconv_log_has_extra_genai_attributes(self): patched_environ = patch.dict( diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py index 9777c2d74b..0b44739910 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py @@ -26,6 +26,7 @@ def create_response( text: Optional[str] = None, input_tokens: Optional[int] = None, candidates_tokens: Optional[int] = None, + thoughts_tokens: Optional[int] = None, model_version: Optional[str] = None, usage_metadata: Optional[ genai_types.GenerateContentResponseUsageMetadata @@ -53,6 +54,8 @@ def create_response( usage_metadata.prompt_token_count = input_tokens if candidates_tokens is not None: usage_metadata.candidates_token_count = candidates_tokens + if thoughts_tokens is not None: + usage_metadata.thoughts_token_count = thoughts_tokens return genai_types.GenerateContentResponse( candidates=candidates, usage_metadata=usage_metadata, From 233194572e2408346f2b137b9feb41205e2ec8aa Mon Sep 17 00:00:00 2001 From: Rima Al Ghossein Date: Sat, 14 Feb 2026 14:13:47 +0100 Subject: [PATCH 4/4] Update changelog --- .../opentelemetry-instrumentation-google-genai/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md index 2643682a2b..0f673c0933 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +- Include thinking tokens in `gen_ai.usage.output_tokens` ([#4206](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4206)). - Fix bug in how tokens are counted when using the streaming `generateContent` method. ([#4152](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4152)). - Add `gen_ai.tool.definitions` attribute to `gen_ai.client.inference.operation.details` log event ([#4142](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4142)).