Skip to content

Commit fc82767

Browse files
committed
test
1 parent 7c0d149 commit fc82767

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

eval_protocol/benchmarks/test_aime25.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
7979

8080
@evaluation_test(
8181
input_dataset=[
82-
# _get_aime_dataset_path(),
83-
"https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-I.jsonl",
84-
"https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-II.jsonl",
82+
_get_aime_dataset_path(),
83+
# "https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-I.jsonl",
84+
# "https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-II.jsonl",
8585
],
8686
dataset_adapter=aime2025_dataset_adapter,
8787
completion_params=[
@@ -91,6 +91,7 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
9191
"model": "fireworks_ai/accounts/pyroworks/deployedModels/glm-4p6-qpwrimne",
9292
# "model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b",
9393
"stream": True,
94+
# "timeout": 2400,
9495
}
9596
],
9697
rollout_processor=SingleTurnRolloutProcessor(),

eval_protocol/pytest/default_single_turn_rollout_process.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
import litellm
88
from litellm import acompletion
9+
from litellm.types.utils import ModelResponse, Choices
10+
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
911

1012
from eval_protocol.dataset_logger import default_logger
1113
from eval_protocol.models import EvaluationRow, Message
@@ -65,14 +67,18 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
6567
if request_params.get("stream") is True:
6668
chunks = []
6769
stream = await acompletion(**request_params)
70+
71+
assert isinstance(stream, CustomStreamWrapper), "Stream should be a CustomStreamWrapper"
72+
6873
async for chunk in stream: # pyright: ignore[reportGeneralTypeIssues]
6974
chunks.append(chunk)
7075
response = litellm.stream_chunk_builder(chunks, messages_payload)
7176
else:
7277
response = await acompletion(**request_params)
7378

74-
if response is None:
75-
raise ValueError("Response is None")
79+
assert response is not None, "Response is None"
80+
assert isinstance(response, ModelResponse), "Response should be ModelResponse"
81+
assert isinstance(response.choices[0], Choices), "Response choice should be a Choices"
7682

7783
assistant_content = response.choices[0].message.content or ""
7884
tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else None
@@ -115,10 +121,12 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
115121
tool_calls=converted_tool_calls,
116122
)
117123
]
118-
row.execution_metadata.usage = CompletionUsage(
119-
prompt_tokens=response.usage.prompt_tokens,
120-
completion_tokens=response.usage.completion_tokens,
121-
total_tokens=response.usage.total_tokens,
124+
row.execution_metadata.usage = (
125+
CompletionUsage( # Note: LiteLLM sets usage dynamically via setattr(), not as a typed field
126+
prompt_tokens=response.usage.prompt_tokens, # pyright: ignore[reportAttributeAccessIssue]
127+
completion_tokens=response.usage.completion_tokens, # pyright: ignore[reportAttributeAccessIssue]
128+
total_tokens=response.usage.total_tokens, # pyright: ignore[reportAttributeAccessIssue]
129+
)
122130
)
123131

124132
row.messages = messages

0 commit comments

Comments
 (0)