From 207d722717b8cf679f63c991c954662ff2955ba0 Mon Sep 17 00:00:00 2001 From: Jessie Li Date: Sun, 15 Feb 2026 08:00:13 -0800 Subject: [PATCH 1/2] Fix token usage- total tokens --- .../_evaluators/_common/_base_rai_svc_eval.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py index 446ff4ad1d70..bb3fd05d8dd9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py @@ -299,23 +299,23 @@ def _parse_eval_result(self, eval_result) -> Dict[str, T]: # Extract token counts from metrics metrics = properties.get("metrics", {}) - prompt_tokens = metrics.get("promptTokens", "") - completion_tokens = metrics.get("completionTokens", "") + prompt_tokens = int(metrics.get("promptTokens", 0)) if metrics.get("promptTokens") else 0 + completion_tokens = int(metrics.get("completionTokens", 0)) if metrics.get("completionTokens") else 0 # Calculate total tokens try: total_tokens = ( - str(int(prompt_tokens) + int(completion_tokens)) + int(prompt_tokens) + int(completion_tokens) if prompt_tokens and completion_tokens - else "" + else 0 ) except (ValueError, TypeError): - total_tokens = "" + total_tokens = 0 # Add token metadata (matching old format) parsed_result[f"{self._eval_metric. value}_total_tokens"] = total_tokens - parsed_result[f"{self._eval_metric.value}_prompt_tokens"] = prompt_tokens - parsed_result[f"{self._eval_metric.value}_completion_tokens"] = completion_tokens + parsed_result[f"{self._eval_metric.value}_prompt_tokens"] = int(prompt_tokens) if prompt_tokens else 0 + parsed_result[f"{self._eval_metric.value}_completion_tokens"] = int(completion_tokens) if completion_tokens else 0 # Add empty placeholders for fields that sync_evals doesn't provide parsed_result[f"{self._eval_metric.value}_finish_reason"] = "" @@ -334,17 +334,17 @@ def _parse_eval_result(self, eval_result) -> Dict[str, T]: # Extract token counts metrics = properties.get("metrics", {}) - prompt_tokens = metrics.get("promptTokens", "") - completion_tokens = metrics.get("completionTokens", "") + prompt_tokens = int(metrics.get("promptTokens", 0)) if metrics.get("promptTokens") else 0 + completion_tokens = int(metrics.get("completionTokens", 0)) if metrics.get("completionTokens") else 0 try: total_tokens = ( - str(int(prompt_tokens) + int(completion_tokens)) + int(prompt_tokens) + int(completion_tokens) if prompt_tokens and completion_tokens - else "" + else 0 ) except (ValueError, TypeError): - total_tokens = "" + total_tokens = 0 # Return in the expected format matching parse_response output return { From 2770d0b4bb5f50e68ba130b2ab2b880b74e056bc Mon Sep 17 00:00:00 2001 From: Jessie Li Date: Sun, 15 Feb 2026 09:05:20 -0800 Subject: [PATCH 2/2] run black --- .../_evaluators/_common/_base_rai_svc_eval.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py index bb3fd05d8dd9..41d99028e9f9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py @@ -300,7 +300,9 @@ def _parse_eval_result(self, eval_result) -> Dict[str, T]: # Extract token counts from metrics metrics = properties.get("metrics", {}) prompt_tokens = int(metrics.get("promptTokens", 0)) if metrics.get("promptTokens") else 0 - completion_tokens = int(metrics.get("completionTokens", 0)) if metrics.get("completionTokens") else 0 + completion_tokens = ( + int(metrics.get("completionTokens", 0)) if metrics.get("completionTokens") else 0 + ) # Calculate total tokens try: @@ -314,8 +316,12 @@ def _parse_eval_result(self, eval_result) -> Dict[str, T]: # Add token metadata (matching old format) parsed_result[f"{self._eval_metric. value}_total_tokens"] = total_tokens - parsed_result[f"{self._eval_metric.value}_prompt_tokens"] = int(prompt_tokens) if prompt_tokens else 0 - parsed_result[f"{self._eval_metric.value}_completion_tokens"] = int(completion_tokens) if completion_tokens else 0 + parsed_result[f"{self._eval_metric.value}_prompt_tokens"] = ( + int(prompt_tokens) if prompt_tokens else 0 + ) + parsed_result[f"{self._eval_metric.value}_completion_tokens"] = ( + int(completion_tokens) if completion_tokens else 0 + ) # Add empty placeholders for fields that sync_evals doesn't provide parsed_result[f"{self._eval_metric.value}_finish_reason"] = "" @@ -335,13 +341,13 @@ def _parse_eval_result(self, eval_result) -> Dict[str, T]: # Extract token counts metrics = properties.get("metrics", {}) prompt_tokens = int(metrics.get("promptTokens", 0)) if metrics.get("promptTokens") else 0 - completion_tokens = int(metrics.get("completionTokens", 0)) if metrics.get("completionTokens") else 0 + completion_tokens = ( + int(metrics.get("completionTokens", 0)) if metrics.get("completionTokens") else 0 + ) try: total_tokens = ( - int(prompt_tokens) + int(completion_tokens) - if prompt_tokens and completion_tokens - else 0 + int(prompt_tokens) + int(completion_tokens) if prompt_tokens and completion_tokens else 0 ) except (ValueError, TypeError): total_tokens = 0