Skip to content

Commit a0d35f3

Browse files
committed
rename files and consolidate aime
1 parent e2198ac commit a0d35f3

5 files changed

Lines changed: 6 additions & 145 deletions

File tree

eval_protocol/benchmarks/registry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@
1212
1313
from eval_protocol.benchmarks.registry import export_benchmark
1414
15-
@export_benchmark("aime25_low")
15+
@export_benchmark("aime25")
1616
@evaluation_test(...)
1717
def test_aime_pointwise(row: EvaluationRow) -> EvaluationRow:
1818
...
1919
2020
Programmatic run:
2121
2222
from eval_protocol.benchmarks.registry import get_benchmark_runner
23-
get_benchmark_runner("aime25_low")(model="fireworks_ai/...", print_summary=True, out="artifacts/aime.json")
23+
get_benchmark_runner("aime25")(model="fireworks_ai/...", print_summary=True, out="artifacts/aime.json")
2424
"""
2525

2626
from __future__ import annotations

eval_protocol/benchmarks/run.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,17 @@
33
44
Usage:
55
6-
python -m eval_protocol.benchmarks.run aime25_low \
6+
python -m eval_protocol.benchmarks.run aime25 \
77
--model fireworks_ai/accounts/fireworks/models/gpt-oss-120b \
88
--print-summary \
9-
--out artifacts/aime25_low.json \
9+
--out artifacts/aime25.json \
1010
--max-rows 50 \
1111
--reasoning-effort low
1212
"""
1313

1414
from __future__ import annotations
1515

1616
import argparse
17-
from typing import Any
1817

1918
from importlib import import_module
2019
import pkgutil
@@ -60,7 +59,7 @@ def main() -> int:
6059
# Fallback: if nothing registered yet and a known suite was requested, try explicit import
6160
if not list_benchmarks():
6261
known_map = {
63-
"aime25_low": "eval_protocol.benchmarks.suites.aime25",
62+
"aime25": "eval_protocol.benchmarks.suites.aime25",
6463
}
6564
forced = known_map.get(args.name)
6665
if forced:
@@ -73,7 +72,7 @@ def main() -> int:
7372
if args.max_rows is not None:
7473
try:
7574
max_rows = int(args.max_rows)
76-
except Exception:
75+
except ValueError:
7776
max_rows = str(args.max_rows)
7877
# Build input params override if needed
7978
ip_override = {}

examples/aime2025_chat_completion/README.md

Lines changed: 0 additions & 24 deletions
This file was deleted.

examples/aime2025_chat_completion/__init__.py

Lines changed: 0 additions & 4 deletions
This file was deleted.

examples/aime2025_chat_completion/main.py

Lines changed: 0 additions & 110 deletions
This file was deleted.

0 commit comments

Comments
 (0)