|
9 | 9 | import argparse |
10 | 10 | import hashlib |
11 | 11 | import json |
| 12 | +import os |
12 | 13 | import re |
13 | 14 | import subprocess |
14 | 15 | import sys |
@@ -140,11 +141,21 @@ def scan_max_path(root: Path, current_max: int, current_path: str) -> tuple[int, |
140 | 141 | if not root.exists(): |
141 | 142 | return current_max, current_path |
142 | 143 | info(f"Scanning path lengths under {root} ...") |
143 | | - for path in root.rglob("*"): |
144 | | - plen = len(str(path.resolve())) |
145 | | - if plen > current_max: |
146 | | - current_max = plen |
147 | | - current_path = str(path.resolve()) |
| 144 | + stack = [os.fspath(root)] |
| 145 | + while stack: |
| 146 | + current = stack.pop() |
| 147 | + try: |
| 148 | + with os.scandir(current) as entries: |
| 149 | + for entry in entries: |
| 150 | + path_str = entry.path |
| 151 | + plen = len(path_str) |
| 152 | + if plen > current_max: |
| 153 | + current_max = plen |
| 154 | + current_path = path_str |
| 155 | + if entry.is_dir(follow_symlinks=False): |
| 156 | + stack.append(path_str) |
| 157 | + except OSError: |
| 158 | + continue |
148 | 159 | return current_max, current_path |
149 | 160 |
|
150 | 161 |
|
@@ -256,11 +267,6 @@ def run_task_checks(rcb_tasks: list[str], progress_every: int) -> None: |
256 | 267 | elif rcb_cl.exists() and not home_cl.exists(): |
257 | 268 | err(f"{task_id}: Home checklist.json missing") |
258 | 269 |
|
259 | | - # [6] Data files vs source |
260 | | - src_data = task_dir / "data" |
261 | | - if src_data.exists(): |
262 | | - sum(1 for f in src_data.rglob("*") if f.is_file()) |
263 | | - |
264 | 270 | # [7] related_work exists and non-empty, PDF naming |
265 | 271 | rw = task_dir / "related_work" |
266 | 272 | if not rw.exists() or not list(rw.iterdir()): |
@@ -315,14 +321,9 @@ def run_task_checks(rcb_tasks: list[str], progress_every: int) -> None: |
315 | 321 | for ref in path_refs: |
316 | 322 | err(f"{task_id}: task description contains stale path: {ref}") |
317 | 323 |
|
318 | | - for path in task_dir.rglob("*"): |
319 | | - plen = len(str(path.resolve())) |
320 | | - if plen > max_path_len: |
321 | | - max_path_len = plen |
322 | | - max_path_str = str(path.resolve()) |
323 | | - |
324 | 324 | # [11] Path lengths |
325 | 325 | log("\n[11] Path length statistics") |
| 326 | + max_path_len, max_path_str = scan_max_path(TASKS, max_path_len, max_path_str) |
326 | 327 | max_path_len, max_path_str = scan_max_path(HOME_DATA, max_path_len, max_path_str) |
327 | 328 | max_path_len, max_path_str = scan_max_path(WS, max_path_len, max_path_str) |
328 | 329 |
|
@@ -485,6 +486,36 @@ def run_static_checks(rcb_tasks: list[str], progress_every: int) -> None: |
485 | 486 | f"leaderboard.json: {len(leaderboard['tasks'])} tasks, " |
486 | 487 | f"{len(leaderboard['agents'])} agents" |
487 | 488 | ) |
| 489 | + cell_required = {"score", "run_id", "duration_seconds", "cost_usd", "model", "model_display"} |
| 490 | + cell_errors = 0 |
| 491 | + for agent_name, task_scores in leaderboard.get("scores", {}).items(): |
| 492 | + if not isinstance(task_scores, dict): |
| 493 | + err(f"leaderboard.json scores[{agent_name!r}] is not an object") |
| 494 | + cell_errors += 1 |
| 495 | + continue |
| 496 | + for task_id, entry in task_scores.items(): |
| 497 | + if not isinstance(entry, dict): |
| 498 | + err(f"leaderboard.json scores[{agent_name!r}][{task_id!r}] is not an object") |
| 499 | + cell_errors += 1 |
| 500 | + continue |
| 501 | + missing_cell = cell_required - set(entry.keys()) |
| 502 | + if missing_cell: |
| 503 | + err( |
| 504 | + f"leaderboard.json scores[{agent_name!r}][{task_id!r}] " |
| 505 | + f"missing fields: {sorted(missing_cell)}" |
| 506 | + ) |
| 507 | + cell_errors += 1 |
| 508 | + frontier = leaderboard.get("frontier") |
| 509 | + if not isinstance(frontier, dict): |
| 510 | + err("leaderboard.json frontier is not an object") |
| 511 | + cell_errors += 1 |
| 512 | + else: |
| 513 | + missing_frontier = [task_id for task_id in leaderboard.get("tasks", []) if task_id not in frontier] |
| 514 | + if missing_frontier: |
| 515 | + err(f"leaderboard.json frontier missing tasks: {missing_frontier[:3]}") |
| 516 | + cell_errors += len(missing_frontier) |
| 517 | + if cell_errors == 0: |
| 518 | + ok("leaderboard.json cell payloads look complete") |
488 | 519 | else: |
489 | 520 | warn("leaderboard.json not found") |
490 | 521 |
|
|
0 commit comments