Skip to content

Commit cac5b3f

Browse files
committed
Refresh Home export and consistency checks
1 parent e5ffeda commit cac5b3f

9 files changed

Lines changed: 2055 additions & 533 deletions

File tree

check_consistency.py

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import argparse
1010
import hashlib
1111
import json
12+
import os
1213
import re
1314
import subprocess
1415
import sys
@@ -140,11 +141,21 @@ def scan_max_path(root: Path, current_max: int, current_path: str) -> tuple[int,
140141
if not root.exists():
141142
return current_max, current_path
142143
info(f"Scanning path lengths under {root} ...")
143-
for path in root.rglob("*"):
144-
plen = len(str(path.resolve()))
145-
if plen > current_max:
146-
current_max = plen
147-
current_path = str(path.resolve())
144+
stack = [os.fspath(root)]
145+
while stack:
146+
current = stack.pop()
147+
try:
148+
with os.scandir(current) as entries:
149+
for entry in entries:
150+
path_str = entry.path
151+
plen = len(path_str)
152+
if plen > current_max:
153+
current_max = plen
154+
current_path = path_str
155+
if entry.is_dir(follow_symlinks=False):
156+
stack.append(path_str)
157+
except OSError:
158+
continue
148159
return current_max, current_path
149160

150161

@@ -256,11 +267,6 @@ def run_task_checks(rcb_tasks: list[str], progress_every: int) -> None:
256267
elif rcb_cl.exists() and not home_cl.exists():
257268
err(f"{task_id}: Home checklist.json missing")
258269

259-
# [6] Data files vs source
260-
src_data = task_dir / "data"
261-
if src_data.exists():
262-
sum(1 for f in src_data.rglob("*") if f.is_file())
263-
264270
# [7] related_work exists and non-empty, PDF naming
265271
rw = task_dir / "related_work"
266272
if not rw.exists() or not list(rw.iterdir()):
@@ -315,14 +321,9 @@ def run_task_checks(rcb_tasks: list[str], progress_every: int) -> None:
315321
for ref in path_refs:
316322
err(f"{task_id}: task description contains stale path: {ref}")
317323

318-
for path in task_dir.rglob("*"):
319-
plen = len(str(path.resolve()))
320-
if plen > max_path_len:
321-
max_path_len = plen
322-
max_path_str = str(path.resolve())
323-
324324
# [11] Path lengths
325325
log("\n[11] Path length statistics")
326+
max_path_len, max_path_str = scan_max_path(TASKS, max_path_len, max_path_str)
326327
max_path_len, max_path_str = scan_max_path(HOME_DATA, max_path_len, max_path_str)
327328
max_path_len, max_path_str = scan_max_path(WS, max_path_len, max_path_str)
328329

@@ -485,6 +486,36 @@ def run_static_checks(rcb_tasks: list[str], progress_every: int) -> None:
485486
f"leaderboard.json: {len(leaderboard['tasks'])} tasks, "
486487
f"{len(leaderboard['agents'])} agents"
487488
)
489+
cell_required = {"score", "run_id", "duration_seconds", "cost_usd", "model", "model_display"}
490+
cell_errors = 0
491+
for agent_name, task_scores in leaderboard.get("scores", {}).items():
492+
if not isinstance(task_scores, dict):
493+
err(f"leaderboard.json scores[{agent_name!r}] is not an object")
494+
cell_errors += 1
495+
continue
496+
for task_id, entry in task_scores.items():
497+
if not isinstance(entry, dict):
498+
err(f"leaderboard.json scores[{agent_name!r}][{task_id!r}] is not an object")
499+
cell_errors += 1
500+
continue
501+
missing_cell = cell_required - set(entry.keys())
502+
if missing_cell:
503+
err(
504+
f"leaderboard.json scores[{agent_name!r}][{task_id!r}] "
505+
f"missing fields: {sorted(missing_cell)}"
506+
)
507+
cell_errors += 1
508+
frontier = leaderboard.get("frontier")
509+
if not isinstance(frontier, dict):
510+
err("leaderboard.json frontier is not an object")
511+
cell_errors += 1
512+
else:
513+
missing_frontier = [task_id for task_id in leaderboard.get("tasks", []) if task_id not in frontier]
514+
if missing_frontier:
515+
err(f"leaderboard.json frontier missing tasks: {missing_frontier[:3]}")
516+
cell_errors += len(missing_frontier)
517+
if cell_errors == 0:
518+
ok("leaderboard.json cell payloads look complete")
488519
else:
489520
warn("leaderboard.json not found")
490521

0 commit comments

Comments
 (0)