diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md index 2643682a2b..0f673c0933 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +- Include thinking tokens in `gen_ai.usage.output_tokens` ([#4206](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4206)). - Fix bug in how tokens are counted when using the streaming `generateContent` method. ([#4152](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4152)). - Add `gen_ai.tool.definitions` attribute to `gen_ai.client.inference.operation.details` log event ([#4142](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4142)). diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py index a7901b3c7e..926f96b6a3 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py @@ -528,13 +528,22 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse): input_tokens = _get_response_property( response, "usage_metadata.prompt_token_count" ) - output_tokens = _get_response_property( + candidates_tokens = _get_response_property( response, "usage_metadata.candidates_token_count" ) + thoughts_tokens = _get_response_property( + response, "usage_metadata.thoughts_token_count" + ) + output_tokens: int = 0 + if candidates_tokens and isinstance(candidates_tokens, int): + output_tokens += candidates_tokens + if thoughts_tokens and isinstance(thoughts_tokens, int): + output_tokens += thoughts_tokens + if input_tokens and isinstance(input_tokens, int): self._input_tokens = input_tokens - if output_tokens and isinstance(output_tokens, int): - self._output_tokens = output_tokens + + self._output_tokens = output_tokens def _maybe_update_error_type(self, response: GenerateContentResponse): if response.candidates: diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py index ca683c911a..8f26dcf53a 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py @@ -259,12 +259,14 @@ def test_generated_span_has_vertex_ai_system_when_configured(self): ) def test_generated_span_counts_tokens(self): - self.configure_valid_response(input_tokens=123, output_tokens=456) + self.configure_valid_response( + input_tokens=123, candidates_tokens=456, thoughts_tokens=789 + ) self.generate_content(model="gemini-2.0-flash", contents="Some input") self.otel.assert_has_span_named("generate_content gemini-2.0-flash") span = self.otel.get_span_named("generate_content gemini-2.0-flash") self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123) - self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456) + self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 1245) @patch.dict( "os.environ", diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py index 9d702033bb..5876d31482 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py @@ -91,16 +91,22 @@ def test_handles_multiple_ressponses(self): def test_includes_token_counts_in_span_not_aggregated_from_responses(self): # Tokens should not be aggregated in streaming. Cumulative counts are returned on each response. - self.configure_valid_response(input_tokens=3, output_tokens=5) - self.configure_valid_response(input_tokens=3, output_tokens=5) - self.configure_valid_response(input_tokens=3, output_tokens=5) + self.configure_valid_response( + input_tokens=3, candidates_tokens=5, thoughts_tokens=2 + ) + self.configure_valid_response( + input_tokens=3, candidates_tokens=5, thoughts_tokens=2 + ) + self.configure_valid_response( + input_tokens=3, candidates_tokens=5, thoughts_tokens=2 + ) self.generate_content(model="gemini-2.0-flash", contents="Some input") self.otel.assert_has_span_named("generate_content gemini-2.0-flash") span = self.otel.get_span_named("generate_content gemini-2.0-flash") self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 3) - self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 5) + self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 7) def test_new_semconv_log_has_extra_genai_attributes(self): patched_environ = patch.dict( diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py index 2bbd5bdd53..0b44739910 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py @@ -25,7 +25,8 @@ def create_response( candidates: Optional[list[genai_types.Candidate]] = None, text: Optional[str] = None, input_tokens: Optional[int] = None, - output_tokens: Optional[int] = None, + candidates_tokens: Optional[int] = None, + thoughts_tokens: Optional[int] = None, model_version: Optional[str] = None, usage_metadata: Optional[ genai_types.GenerateContentResponseUsageMetadata @@ -51,8 +52,10 @@ def create_response( usage_metadata = genai_types.GenerateContentResponseUsageMetadata() if input_tokens is not None: usage_metadata.prompt_token_count = input_tokens - if output_tokens is not None: - usage_metadata.candidates_token_count = output_tokens + if candidates_tokens is not None: + usage_metadata.candidates_token_count = candidates_tokens + if thoughts_tokens is not None: + usage_metadata.thoughts_token_count = thoughts_tokens return genai_types.GenerateContentResponse( candidates=candidates, usage_metadata=usage_metadata,