Skip to content

Commit ff5fae7

Browse files
committed
bench: export name aime25; default low effort with max_tokens=131000; keep num_runs=8
1 parent 508eeeb commit ff5fae7

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

eval_protocol/benchmarks/suites/aime25.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,15 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
6262
return converted
6363

6464

65-
@export_benchmark("aime25_low")
65+
@export_benchmark("aime25")
6666
@evaluation_test(
6767
model=["fireworks_ai/accounts/fireworks/models/gpt-oss-120b"],
6868
input_dataset=[
6969
"https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-I.jsonl",
7070
"https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-II.jsonl",
7171
],
7272
dataset_adapter=aime2025_dataset_adapter,
73-
rollout_input_params=[{"max_tokens": 131000, "extra_body": {"reasoning_effort": "high"}}],
73+
rollout_input_params=[{"max_tokens": 131000, "extra_body": {"reasoning_effort": "low"}}],
7474
rollout_processor=default_single_turn_rollout_processor,
7575
aggregation_method="mean",
7676
threshold_of_success=None,

0 commit comments

Comments
 (0)