diff --git a/src/endpoints_submission_cli/submissions/builder.py b/src/endpoints_submission_cli/submissions/builder.py index 31e1b36..3ccd221 100644 --- a/src/endpoints_submission_cli/submissions/builder.py +++ b/src/endpoints_submission_cli/submissions/builder.py @@ -112,7 +112,12 @@ def build_submission_folder( ) system_max_concurrency[system_id] = int(values.pop()) - max_tps = _compute_max_tps(run_data) + model_runs: dict[str, list[dict[str, Any]]] = {} + for (_system_id, model), runs in groups.items(): + model_runs.setdefault(model, []).extend(runs) + max_tps_by_model: dict[str, float | None] = { + model: _compute_max_tps(runs) for model, runs in model_runs.items() + } written_systems: set[str] = set() for (system_id, model), runs in groups.items(): @@ -120,7 +125,8 @@ def build_submission_folder( _write_system_description(submission_dir, system_id, runs[0]["system_info"]) written_systems.add(system_id) _write_pareto_entries( - submission_dir, system_id, model, runs, system_max_concurrency[system_id], max_tps + submission_dir, system_id, model, runs, + system_max_concurrency[system_id], max_tps_by_model[model], ) # Copy src/ for Standardized division submissions (mirrors documentation/ handling) @@ -442,10 +448,10 @@ def _write_pareto_entries( content = json.dumps(metadata, indent=2).encode() except (json.JSONDecodeError, TypeError, ValueError): pass - if rel_path == "results.json": + if rel_path in ("results.json", "accuracy/results.json"): content = _truncate_responses(content) dest_rel = ( - rel_path[len(_acc_prefix) :] + rel_path[len(_acc_prefix):] if run_type == "accuracy" and rel_path.startswith(_acc_prefix) else rel_path ) @@ -453,10 +459,6 @@ def _write_pareto_entries( dest.parent.mkdir(parents=True, exist_ok=True) dest.write_bytes(content) - # Fallback: if the archive had no accuracy/results.json, derive from results.json - if "accuracy/results.json" not in extra_files: - _write_accuracy_fallback(result_dir, run) - _RESPONSES_LIMIT = 10 * 1024 # 10 KB @@ -483,33 +485,6 @@ def _truncate_responses(content: bytes) -> bytes: return json.dumps(data, indent=2).encode() -def _write_accuracy_fallback(result_dir: Path, run: dict[str, Any]) -> None: - """Write per-point accuracy/ files from results.json when the archive has no accuracy/ dir. - - Called only when accuracy/results.json is absent from the run's extra_files - (i.e. the run archive did not include an accuracy/ directory). Sources accuracy_scores - from results.json. - - The written results.json format: - {"": {"score": {...}, "num_samples": N, ...}} - """ - results_bytes = run.get("_extra_files", {}).get("results.json") - if not results_bytes: - return - try: - parsed = json.loads(results_bytes) - except json.JSONDecodeError: - return - accuracy_scores: dict[str, Any] | None = parsed.get("accuracy_scores") - if not accuracy_scores: - return - - accuracy_dir = result_dir / "accuracy" - accuracy_dir.mkdir(parents=True, exist_ok=True) - (accuracy_dir / "results.json").write_text( - json.dumps(accuracy_scores, indent=2), encoding="utf-8" - ) - def _write_documentation(submission_dir: Path, run_data: list[dict[str, Any]]) -> None: """Merge documentation files from all runs into submission_dir/documentation/.""" @@ -533,7 +508,7 @@ def _write_src(submission_dir: Path, run_data: list[dict[str, Any]]) -> None: for rel, content in run.get("_extra_files", {}).items(): if not rel.startswith("src/"): continue - dest = submission_dir / rel + dest = src_model_dir / Path(rel).relative_to("src") dest.parent.mkdir(parents=True, exist_ok=True) dest.write_bytes(content) diff --git a/tests/endpoints_submission_cli/submissions/test_builder.py b/tests/endpoints_submission_cli/submissions/test_builder.py index a41e0e2..c74c535 100644 --- a/tests/endpoints_submission_cli/submissions/test_builder.py +++ b/tests/endpoints_submission_cli/submissions/test_builder.py @@ -87,9 +87,10 @@ def test_log_summary_created(self, run_archive: Path, tmp_path: Path) -> None: assert "n_samples_completed" in data assert "duration_ns" in data - def test_accuracy_file_created(self, run_archive: Path, tmp_path: Path) -> None: + def test_accuracy_file_created(self, run_folder: Path, tmp_path: Path) -> None: + acc_archive = self._make_archive(run_folder, tmp_path, "acc", 4, "accuracy") sub_dir = build_submission_folder( - [("run-001", run_archive)], "standardized", "available", tmp_path + [("run-001", acc_archive)], "standardized", "available", tmp_path / "sub" ) acc_jsons = list(sub_dir.rglob("accuracy/results.json")) assert len(acc_jsons) == 1 @@ -223,13 +224,9 @@ def test_perf_and_accuracy_same_concurrency_ok(self, run_folder: Path, tmp_path: "available", tmp_path / "sub", ) - assert (sub_dir / "pareto").glob("*/*/points/point_4.yaml").__next__().exists() - assert ( - (sub_dir / "pareto") - .glob("*/*/results/point_4/accuracy/point_4.yaml") - .__next__() - .exists() - ) + assert list((sub_dir / "pareto").glob("*/*/points/point_4.yaml")) + assert list((sub_dir / "pareto").glob("*/*/results/point_4/accuracy/results.json")) + assert list((sub_dir / "pareto").glob("*/*/results/point_4/accuracy/point_4.yaml")) def test_accuracy_run_routed_to_accuracy(self, run_folder: Path, tmp_path: Path) -> None: a_acc = self._make_archive(run_folder, tmp_path, "acc", 4, "accuracy") @@ -641,3 +638,92 @@ def test_empty_string(self) -> None: def test_long_name_truncated(self) -> None: long = "A" * 100 assert len(_slugify(long)) <= 64 + + +@pytest.mark.unit +class TestAccuracyResultsTruncation: + """Tests for accuracy/results.json truncation across three workflow scenarios.""" + + def _make_accuracy_archive( + self, + run_folder: Path, + tmp_path: Path, + name: str, + results_content: dict | None = None, + acc_results_content: dict | None = None, + ) -> Path: + import shutil + + folder = tmp_path / name + shutil.copytree(run_folder, folder) + cfg = yaml.safe_load((folder / "config.yaml").read_text()) + cfg["datasets"][0]["type"] = "accuracy" + (folder / "config.yaml").write_text(yaml.dump(cfg)) + if results_content is not None: + (folder / "results.json").write_text(json.dumps(results_content)) + if acc_results_content is not None: + (folder / "accuracy").mkdir(exist_ok=True) + (folder / "accuracy" / "results.json").write_text(json.dumps(acc_results_content)) + archive = tmp_path / f"{name}.tar.gz" + with tarfile.open(archive, "w:gz") as tar: + tar.add(folder, arcname=name) + return archive + + def _big_results(self, count: int = 5000) -> dict: + return { + "config": {}, + "accuracy_scores": {"score": 0.45}, + "responses": [{"text": "a" * 100, "idx": i} for i in range(count)], + } + + def test_no_accuracy_subdir_truncates_responses( + self, run_folder: Path, tmp_path: Path + ) -> None: + """Workflow 1/3: archive has only root results.json; responses written truncated to accuracy/results.json.""" + archive = self._make_accuracy_archive( + run_folder, tmp_path, "wf1", results_content=self._big_results() + ) + sub_dir = build_submission_folder( + [("run-001", archive)], "standardized", "available", tmp_path / "sub" + ) + acc_files = list(sub_dir.rglob("accuracy/results.json")) + assert len(acc_files) == 1 + written = json.loads(acc_files[0].read_text()) + assert len(json.dumps(written["responses"]).encode()) <= 10 * 1024 + + def test_accuracy_subdir_large_responses_truncated( + self, run_folder: Path, tmp_path: Path + ) -> None: + """Workflow 2: archive has accuracy/results.json with large responses; truncated on write.""" + archive = self._make_accuracy_archive( + run_folder, tmp_path, "wf2", acc_results_content=self._big_results() + ) + sub_dir = build_submission_folder( + [("run-001", archive)], "standardized", "available", tmp_path / "sub" + ) + acc_files = list(sub_dir.rglob("accuracy/results.json")) + assert len(acc_files) == 1 + written = json.loads(acc_files[0].read_text()) + assert len(json.dumps(written["responses"]).encode()) <= 10 * 1024 + + def test_accuracy_subdir_already_small_written_intact( + self, run_folder: Path, tmp_path: Path + ) -> None: + """Workflow 3: archive has accuracy/results.json already within 10 KB; written without corruption.""" + small_responses = [{"text": "hi", "idx": 0}] + acc_results = { + "config": {}, + "accuracy_scores": {"score": 0.45}, + "responses": small_responses, + } + archive = self._make_accuracy_archive( + run_folder, tmp_path, "wf3", acc_results_content=acc_results + ) + sub_dir = build_submission_folder( + [("run-001", archive)], "standardized", "available", tmp_path / "sub" + ) + acc_files = list(sub_dir.rglob("accuracy/results.json")) + assert len(acc_files) == 1 + written = json.loads(acc_files[0].read_text()) + assert written["responses"] == small_responses + assert written["accuracy_scores"] == acc_results["accuracy_scores"]