mlcommons · anandhu-eng · Jun 24, 2026 · Jun 24, 2026
diff --git a/src/endpoints_submission_cli/runs/parser.py b/src/endpoints_submission_cli/runs/parser.py
@@ -28,6 +28,7 @@
 import yaml
 
 from ..exceptions import RunFolderError
+from ..truncation import truncate_responses
 
 __all__ = ["parse_run_folder", "build_archive"]
 
@@ -156,6 +157,11 @@ def build_archive(folder: Path, dest: Path | None = None, run_date: str | None =
             archived copy of that file has its ``run_date`` field set to this value.
             The source folder on disk is left untouched.
 
+    The archived copy of every ``results.json`` (top-level and any nested one,
+    e.g. ``accuracy/results.json``) has its verbose ``responses`` list truncated
+    (see :func:`truncate_responses`) to keep uploads small; the source folder on
+    disk is never mutated.
+
     Returns:
         Path of the created archive.
     """
@@ -165,7 +171,11 @@ def build_archive(folder: Path, dest: Path | None = None, run_date: str | None =
     if dest is None:
         dest = folder.parent / f"{folder.name}.tar.gz"
 
-    patched_metadata: bytes | None = None
+    # In-memory replacements for specific archive members, keyed by arcname. The
+    # matching files on disk are excluded from the tar and these copies appended,
+    # so the source folder is never mutated.
+    replacements: dict[str, bytes] = {}
+
     meta_path = folder / "run_metadata.json"
     if run_date is not None and meta_path.is_file():
         try:
@@ -174,21 +184,30 @@ def build_archive(folder: Path, dest: Path | None = None, run_date: str | None =
             metadata = None
         if isinstance(metadata, dict):
             metadata["run_date"] = run_date
-            patched_metadata = json.dumps(metadata, indent=2).encode("utf-8")
+            replacements[f"{folder.name}/run_metadata.json"] = json.dumps(
+                metadata, indent=2
+            ).encode("utf-8")
+
+    for results_path in sorted(folder.rglob("results.json")):
+        if not results_path.is_file():
+            continue
+        original = results_path.read_bytes()
+        truncated = truncate_responses(original)
+        if truncated != original:
+            rel = results_path.relative_to(folder).as_posix()
+            replacements[f"{folder.name}/{rel}"] = truncated
 
-    meta_arcname = f"{folder.name}/run_metadata.json"
     with tarfile.open(dest, "w:gz") as tar:
-        if patched_metadata is None:
+        if not replacements:
             tar.add(folder, arcname=folder.name)
         else:
-            # Add everything except the original run_metadata.json, then append the
-            # patched copy from memory so the source folder is never mutated.
             tar.add(
                 folder,
                 arcname=folder.name,
-                filter=lambda ti: None if ti.name == meta_arcname else ti,
+                filter=lambda ti: None if ti.name in replacements else ti,
             )
-            info = tarfile.TarInfo(name=meta_arcname)
-            info.size = len(patched_metadata)
-            tar.addfile(info, io.BytesIO(patched_metadata))
+            for arcname, data in replacements.items():
+                info = tarfile.TarInfo(name=arcname)
+                info.size = len(data)
+                tar.addfile(info, io.BytesIO(data))
     return dest
diff --git a/src/endpoints_submission_cli/submissions/builder.py b/src/endpoints_submission_cli/submissions/builder.py
@@ -34,6 +34,7 @@
 from submission_checker.models.file import SystemDescription
 
 from ..exceptions import SubmissionBuildError
+from ..truncation import truncate_responses
 
 __all__ = ["build_submission_folder", "create_bundle_archive", "extract_archive"]
 
@@ -449,7 +450,7 @@ def _write_pareto_entries(
                 except (json.JSONDecodeError, TypeError, ValueError):
                     pass
             if rel_path in ("results.json", "accuracy/results.json"):
-                content = _truncate_responses(content)
+                content = truncate_responses(content)
             dest_rel = (
                 rel_path[len(_acc_prefix):]
                 if run_type == "accuracy" and rel_path.startswith(_acc_prefix)
@@ -460,32 +461,6 @@ def _write_pareto_entries(
             dest.write_bytes(content)
 
 
-_RESPONSES_LIMIT = 10 * 1024  # 10 KB
-
-
-def _truncate_responses(content: bytes) -> bytes:
-    """Truncate the responses list in a results.json payload to stay under 10 KB."""
-    try:
-        data = json.loads(content)
-    except (json.JSONDecodeError, ValueError):
-        return content
-    responses = data.get("responses")
-    if not isinstance(responses, list) or not responses:
-        return content
-    # Walk items and stop as soon as adding the next one would exceed the limit.
-    # Each item contributes its own bytes plus 2 for the ", " separator after the first.
-    total = 2  # "[]"
-    idx = 0
-    for i, r in enumerate(responses):
-        total += len(json.dumps(r).encode()) + (2 if i > 0 else 0)
-        if total > _RESPONSES_LIMIT:
-            break
-        idx = i + 1
-    data["responses"] = responses[:idx]
-    return json.dumps(data, indent=2).encode()
-
-
-
 def _write_documentation(submission_dir: Path, run_data: list[dict[str, Any]]) -> None:
     """Merge documentation files from all runs into submission_dir/documentation/."""
     doc_dir = submission_dir / "documentation"

diff --git a/src/endpoints_submission_cli/truncation.py b/src/endpoints_submission_cli/truncation.py
@@ -0,0 +1,43 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 MLCommons
+# SPDX-License-Identifier: Apache-2.0
+"""Shared helper for trimming the verbose ``responses`` list in results.json.
+
+The raw ``responses`` array captured during a run can be very large (one entry
+per sample). It is only kept for spot-checking, so both the run-upload archiver
+and the submission builder cap it to keep payloads small and uploads reliable.
+"""
+
+from __future__ import annotations
+
+import json
+
+__all__ = ["RESPONSES_LIMIT", "truncate_responses"]
+
+RESPONSES_LIMIT = 10 * 1024  # 10 KB
+
+
+def truncate_responses(content: bytes) -> bytes:
+    """Truncate the ``responses`` list in a results.json payload to stay under 10 KB.
+
+    Returns *content* unchanged when it is not JSON, has no ``responses`` list, or
+    already fits within the limit. Only the ``responses`` key is affected; all
+    other fields (accuracy scores, config, results, ...) are preserved.
+    """
+    try:
+        data = json.loads(content)
+    except (json.JSONDecodeError, ValueError):
+        return content
+    responses = data.get("responses")
+    if not isinstance(responses, list) or not responses:
+        return content
+    # Walk items and stop as soon as adding the next one would exceed the limit.
+    # Each item contributes its own bytes plus 2 for the ", " separator after the first.
+    total = 2  # "[]"
+    idx = 0
+    for i, r in enumerate(responses):
+        total += len(json.dumps(r).encode()) + (2 if i > 0 else 0)
+        if total > RESPONSES_LIMIT:
+            break
+        idx = i + 1
+    data["responses"] = responses[:idx]
+    return json.dumps(data, indent=2).encode()
diff --git a/tests/endpoints_submission_cli/runs/test_parser.py b/tests/endpoints_submission_cli/runs/test_parser.py
@@ -168,6 +168,58 @@ def test_archive_contains_files(self, run_folder: Path, tmp_path: Path) -> None:
         assert any("config.yaml" in n for n in names)
         assert any("result_summary.json" in n for n in names)
 
+    def test_truncates_results_responses(self, run_folder: Path, tmp_path: Path) -> None:
+        results_path = run_folder / "results.json"
+        big = {
+            "config": {"mode": "perf"},
+            "results": {"total": 5000},
+            "responses": [{"idx": i, "text": "lorem ipsum " * 10} for i in range(5000)],
+        }
+        results_path.write_text(json.dumps(big))
+        original_bytes = results_path.read_bytes()
+
+        dest = tmp_path / "out.tar.gz"
+        build_archive(run_folder, dest)
+
+        archived = _read_member(dest, "/results.json")
+        # responses capped well below the original count...
+        assert 0 < len(archived["responses"]) < 5000
+        # ...while the other keys survive intact.
+        assert archived["config"] == {"mode": "perf"}
+        assert archived["results"] == {"total": 5000}
+        # source folder is never mutated.
+        assert results_path.read_bytes() == original_bytes
+
+    def test_truncates_nested_accuracy_results(self, run_folder: Path, tmp_path: Path) -> None:
+        # Accuracy run folders carry an accuracy/results.json; it must be truncated too.
+        acc_dir = run_folder / "accuracy"
+        acc_dir.mkdir()
+        big = {
+            "results": {"total": 5000},
+            "responses": [{"idx": i, "text": "lorem ipsum " * 10} for i in range(5000)],
+        }
+        (acc_dir / "results.json").write_text(json.dumps(big))
+
+        dest = tmp_path / "out.tar.gz"
+        build_archive(run_folder, dest)
+
+        archived = _read_member(dest, "/accuracy/results.json")
+        assert 0 < len(archived["responses"]) < 5000
+        assert archived["results"] == {"total": 5000}
+
+    def test_results_without_responses_archived_unchanged(
+        self, run_folder: Path, tmp_path: Path
+    ) -> None:
+        # The fixture's results.json has no "responses" key -> archived byte-for-byte.
+        original = (run_folder / "results.json").read_bytes()
+        dest = tmp_path / "out.tar.gz"
+        build_archive(run_folder, dest)
+        with tarfile.open(dest) as tar:
+            member = next(m for m in tar.getmembers() if m.name.endswith("/results.json"))
+            fh = tar.extractfile(member)
+            assert fh is not None
+            assert fh.read() == original
+
 
 def _read_member(archive: Path, suffix: str) -> dict:
     with tarfile.open(archive) as tar:

diff --git a/tests/endpoints_submission_cli/submissions/test_builder.py b/tests/endpoints_submission_cli/submissions/test_builder.py
@@ -15,11 +15,11 @@
 from endpoints_submission_cli.submissions.builder import (
     _compute_max_tps,
     _slugify,
-    _truncate_responses,
     build_submission_folder,
     create_bundle_archive,
     extract_archive,
 )
+from endpoints_submission_cli.truncation import truncate_responses as _truncate_responses
 
 
 @pytest.mark.unit