We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 25dff74 commit 684131dCopy full SHA for 684131d
1 file changed
eval_protocol/benchmarks/test_aime25.py
@@ -90,8 +90,8 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
90
rollout_processor=SingleTurnRolloutProcessor(),
91
aggregation_method="mean",
92
passed_threshold=0.8,
93
- num_runs=8,
94
- max_concurrent_rollouts=4,
+ num_runs=2,
+ max_concurrent_rollouts=16,
95
mode="pointwise",
96
)
97
def test_aime25_pointwise(row: EvaluationRow) -> EvaluationRow:
0 commit comments