Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased
- Include thinking tokens in `gen_ai.usage.output_tokens` ([#4206](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4206)).
- Fix bug in how tokens are counted when using the streaming `generateContent` method. ([#4152](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4152)).
- Add `gen_ai.tool.definitions` attribute to `gen_ai.client.inference.operation.details` log event ([#4142](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4142)).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -528,13 +528,22 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse):
input_tokens = _get_response_property(
response, "usage_metadata.prompt_token_count"
)
output_tokens = _get_response_property(
candidates_tokens = _get_response_property(
response, "usage_metadata.candidates_token_count"
)
thoughts_tokens = _get_response_property(
response, "usage_metadata.thoughts_token_count"
)
output_tokens: int = 0
if candidates_tokens and isinstance(candidates_tokens, int):
output_tokens += candidates_tokens
if thoughts_tokens and isinstance(thoughts_tokens, int):
output_tokens += thoughts_tokens

if input_tokens and isinstance(input_tokens, int):
self._input_tokens = input_tokens
if output_tokens and isinstance(output_tokens, int):
self._output_tokens = output_tokens

self._output_tokens = output_tokens

def _maybe_update_error_type(self, response: GenerateContentResponse):
if response.candidates:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,12 +259,14 @@ def test_generated_span_has_vertex_ai_system_when_configured(self):
)

def test_generated_span_counts_tokens(self):
self.configure_valid_response(input_tokens=123, output_tokens=456)
self.configure_valid_response(
input_tokens=123, candidates_tokens=456, thoughts_tokens=789
)
self.generate_content(model="gemini-2.0-flash", contents="Some input")
self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
span = self.otel.get_span_named("generate_content gemini-2.0-flash")
self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123)
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456)
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 1245)

@patch.dict(
"os.environ",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,22 @@ def test_handles_multiple_ressponses(self):

def test_includes_token_counts_in_span_not_aggregated_from_responses(self):
# Tokens should not be aggregated in streaming. Cumulative counts are returned on each response.
self.configure_valid_response(input_tokens=3, output_tokens=5)
self.configure_valid_response(input_tokens=3, output_tokens=5)
self.configure_valid_response(input_tokens=3, output_tokens=5)
self.configure_valid_response(
input_tokens=3, candidates_tokens=5, thoughts_tokens=2
)
self.configure_valid_response(
input_tokens=3, candidates_tokens=5, thoughts_tokens=2
)
self.configure_valid_response(
input_tokens=3, candidates_tokens=5, thoughts_tokens=2
)

self.generate_content(model="gemini-2.0-flash", contents="Some input")

self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
span = self.otel.get_span_named("generate_content gemini-2.0-flash")
self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 3)
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 5)
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 7)

def test_new_semconv_log_has_extra_genai_attributes(self):
patched_environ = patch.dict(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def create_response(
candidates: Optional[list[genai_types.Candidate]] = None,
text: Optional[str] = None,
input_tokens: Optional[int] = None,
output_tokens: Optional[int] = None,
candidates_tokens: Optional[int] = None,
thoughts_tokens: Optional[int] = None,
model_version: Optional[str] = None,
usage_metadata: Optional[
genai_types.GenerateContentResponseUsageMetadata
Expand All @@ -51,8 +52,10 @@ def create_response(
usage_metadata = genai_types.GenerateContentResponseUsageMetadata()
if input_tokens is not None:
usage_metadata.prompt_token_count = input_tokens
if output_tokens is not None:
usage_metadata.candidates_token_count = output_tokens
if candidates_tokens is not None:
usage_metadata.candidates_token_count = candidates_tokens
if thoughts_tokens is not None:
usage_metadata.thoughts_token_count = thoughts_tokens
return genai_types.GenerateContentResponse(
candidates=candidates,
usage_metadata=usage_metadata,
Expand Down