mlcommons · arav-agarwal2 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
@@ -0,0 +1,8 @@
+[run]
+source = .
+omit =
+    nev/*
+    tests/*
+    retired_benchmarks/*
+    */site-packages/*
+    */dist-packages/*
@@ -0,0 +1,43 @@
+name: Tests and Coverage
+
+on:
+  push:
+    branches: ["arav-codecov-impl"]
+    paths:
+      - "tests/**"
+      - "tools/submission/submission_checker/**"
+      - ".github/workflows/codecov.yml"
+  pull_request:
+    branches: ["arav-codecov-impl"]
+    paths:
+      - "tests/**"
+      - "*"
+      - ".github/workflows/codecov.yml"
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install test dependencies
+        run: pip install pytest pytest-cov
+
+      - name: Run tests with coverage
+        run: >
+          pytest tests/
+          --cov=.
+          --cov-report=xml
+          --cov-report=term-missing
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          files: coverage.xml
+          token: ${{ secrets.CODECOV_TOKEN }}
@@ -0,0 +1,12 @@
+import sys
+import os
+
+# Ensure tools/submission is on the path so `import submission_checker`
+# resolves.
+sys.path.insert(
+    0,
+    os.path.join(
+        os.path.dirname(__file__),
+        "..",
+        "tools",
+        "submission"))
@@ -0,0 +1,3 @@
+[pytest]
+testpaths = .
+pythonpath = ../tools/submission
@@ -0,0 +1,62 @@
+import json
+import pytest
+
+
+MLLOG_MARKER = ":::MLLOG"
+
+
+def make_mllog_line(key, value, is_error=False, is_warning=False):
+    entry = {
+        "key": key,
+        "value": value,
+        "time_ms": 0,
+        "namespace": "",
+        "event_type": "POINT_IN_TIME",
+        "metadata": {
+            "file": "test.py",
+            "line_no": 1,
+            "is_error": is_error,
+            "is_warning": is_warning,
+        },
+    }
+    return f"{MLLOG_MARKER} {json.dumps(entry)}\n"
+
+
+@pytest.fixture()
+def simple_mllog(tmp_path):
+    """A minimal valid MLPerf log with two entries."""
+    p = tmp_path / "mlperf_log_detail.txt"
+    lines = [
+        make_mllog_line("result_validity", "VALID"),
+        make_mllog_line("effective_scenario", "Offline"),
+        make_mllog_line("result_samples_per_second", 123.4),
+    ]
+    p.write_text("".join(lines))
+    return p
+
+
+@pytest.fixture()
+def mllog_with_error(tmp_path):
+    """An MLPerf log containing one error entry."""
+    p = tmp_path / "mlperf_log_detail.txt"
+    lines = [
+        make_mllog_line("result_validity", "INVALID"),
+        make_mllog_line(
+            "loadgen_error",
+            "something went wrong",
+            is_error=True),
+    ]
+    p.write_text("".join(lines))
+    return p
+
+
+@pytest.fixture()
+def mllog_duplicate_key(tmp_path):
+    """An MLPerf log with the same key appearing twice."""
+    p = tmp_path / "mlperf_log_detail.txt"
+    lines = [
+        make_mllog_line("seeds", 1234),
+        make_mllog_line("seeds", 5678),
+    ]
+    p.write_text("".join(lines))
+    return p
@@ -0,0 +1,98 @@
+import logging
+import pytest
+from submission_checker.checks.base import BaseCheck
+
+
+log = logging.getLogger("test")
+
+
+class AlwaysPassCheck(BaseCheck):
+    def __init__(self):
+        super().__init__(log, "/fake/path")
+        self.checks = [self.check_a, self.check_b]
+
+    def check_a(self):
+        return True
+
+    def check_b(self):
+        return True
+
+
+class SomeFailCheck(BaseCheck):
+    def __init__(self):
+        super().__init__(log, "/fake/path")
+        self.checks = [self.pass_check, self.fail_check]
+
+    def pass_check(self):
+        return True
+
+    def fail_check(self):
+        return False
+
+
+class ExceptionCheck(BaseCheck):
+    def __init__(self):
+        super().__init__(log, "/fake/path")
+        self.checks = [self.boom]
+
+    def boom(self):
+        raise RuntimeError("intentional failure")
+
+
+class EmptyCheck(BaseCheck):
+    def __init__(self):
+        super().__init__(log, "/fake/path")
+        self.checks = []
+
+
+# ---------------------------------------------------------------------------
+# run_checks
+# ---------------------------------------------------------------------------
+
+class TestRunChecks:
+    def test_all_pass_returns_true(self):
+        assert AlwaysPassCheck().run_checks() is True
+
+    def test_any_fail_returns_false(self):
+        assert SomeFailCheck().run_checks() is False
+
+    def test_exception_treated_as_failure(self):
+        assert ExceptionCheck().run_checks() is False
+
+    def test_no_checks_returns_true(self):
+        assert EmptyCheck().run_checks() is True
+
+
+# ---------------------------------------------------------------------------
+# __call__
+# ---------------------------------------------------------------------------
+
+class TestCall:
+    def test_callable_returns_true_when_all_pass(self):
+        assert AlwaysPassCheck()() is True
+
+    def test_callable_returns_false_when_any_fail(self):
+        assert SomeFailCheck()() is False
+
+
+# ---------------------------------------------------------------------------
+# execute
+# ---------------------------------------------------------------------------
+
+def test_execute_delegates_to_check_method():
+    checker = AlwaysPassCheck()
+    assert checker.execute(checker.check_a) is True
+
+
+# ---------------------------------------------------------------------------
+# Attributes
+# ---------------------------------------------------------------------------
+
+def test_path_stored():
+    checker = AlwaysPassCheck()
+    assert checker.path == "/fake/path"
+
+
+def test_log_stored():
+    checker = AlwaysPassCheck()
+    assert checker.log is log
@@ -0,0 +1,155 @@
+import pytest
+from submission_checker.configuration.configuration import Config
+
+
+@pytest.fixture()
+def cfg():
+    return Config(version="v6.0", extra_model_benchmark_map={})
+
+
+# ---------------------------------------------------------------------------
+# Initialization
+# ---------------------------------------------------------------------------
+
+class TestConfigInit:
+    def test_version_stored(self, cfg):
+        assert cfg.version == "v6.0"
+
+    def test_models_populated(self, cfg):
+        assert "resnet" in cfg.models
+        assert "bert-99" in cfg.models
+
+    def test_unknown_version_raises(self):
+        with pytest.raises((KeyError, TypeError)):
+            Config(version="v99.99", extra_model_benchmark_map={})
+
+
+# ---------------------------------------------------------------------------
+# set_type
+# ---------------------------------------------------------------------------
+
+class TestSetType:
+    def test_datacenter_sets_required(self, cfg):
+        cfg.set_type("datacenter")
+        assert cfg.required is not None
+
+    def test_edge_sets_required(self, cfg):
+        cfg.set_type("edge")
+        assert cfg.required is not None
+
+    def test_combined_accepted(self, cfg):
+        cfg.set_type("datacenter,edge")
+        assert cfg.required is not None
+
+    def test_combined_reversed_accepted(self, cfg):
+        cfg.set_type("edge,datacenter")
+        assert cfg.required is not None
+
+    def test_invalid_type_raises(self, cfg):
+        with pytest.raises(ValueError, match="invalid system type"):
+            cfg.set_type("cloud")
+
+
+# ---------------------------------------------------------------------------
+# get_mlperf_model
+# ---------------------------------------------------------------------------
+
+class TestGetMlperfModel:
+    def test_official_name_passthrough(self, cfg):
+        assert cfg.get_mlperf_model("resnet") == "resnet"
+
+    def test_resnet50_maps_to_resnet(self, cfg):
+        assert cfg.get_mlperf_model("resnet50") == "resnet"
+
+    def test_mobilenet_maps_to_resnet(self, cfg):
+        assert cfg.get_mlperf_model("mobilenet-v1") == "resnet"
+
+    def test_bert_99_variant(self, cfg):
+        assert cfg.get_mlperf_model("bert-99-large") == "bert-99"
+
+    def test_extra_mapping_used(self, cfg):
+        assert cfg.get_mlperf_model(
+            "my_resnet", {
+                "my_resnet": "resnet"}) == "resnet"
+
+
+# ---------------------------------------------------------------------------
+# get_required / get_optional
+# ---------------------------------------------------------------------------
+
+class TestGetRequired:
+    def test_resnet_edge_requires_three_scenarios(self, cfg):
+        cfg.set_type("edge")
+        req = cfg.get_required("resnet")
+        assert req == {"SingleStream", "MultiStream", "Offline"}
+
+    def test_unknown_model_returns_none(self, cfg):
+        cfg.set_type("edge")
+        assert cfg.get_required("nonexistent-model") is None
+
+    def test_optional_empty_set_for_unknown(self, cfg):
+        cfg.set_type("edge")
+        assert cfg.get_optional("nonexistent-model") == set()
+
+
+# ---------------------------------------------------------------------------
+# get_accuracy_target
+# ---------------------------------------------------------------------------
+
+class TestGetAccuracyTarget:
+    def test_resnet_accuracy_target(self, cfg):
+        target = cfg.get_accuracy_target("resnet")
+        assert target is not None
+        assert target[0] == "acc"
+        assert target[1] == pytest.approx(76.46 * 0.99)
+
+    def test_unknown_model_raises(self, cfg):
+        with pytest.raises(ValueError, match="model not known"):
+            cfg.get_accuracy_target("not-a-model")
+
+
+# ---------------------------------------------------------------------------
+# get_delta_perc
+# ---------------------------------------------------------------------------
+
+class TestGetDeltaPerc:
+    def test_standard_model_defaults_to_1(self, cfg):
+        assert cfg.get_delta_perc("resnet", "acc") == 1
+
+    def test_high_accuracy_model_defaults_to_0_1(self, cfg):
+        assert cfg.get_delta_perc("bert-99.9", "F1") == pytest.approx(0.1)
+
+
+# ---------------------------------------------------------------------------
+# Boolean helpers
+# ---------------------------------------------------------------------------
+
+class TestBooleanHelpers:
+    def test_uses_early_stopping_server(self, cfg):
+        assert cfg.uses_early_stopping("Server") is True
+
+    def test_uses_early_stopping_offline_false(self, cfg):
+        assert cfg.uses_early_stopping("Offline") is False
+
+    def test_has_new_logging_format(self, cfg):
+        assert cfg.has_new_logging_format() is True
+
+
+# ---------------------------------------------------------------------------
+# get_llm_models
+# ---------------------------------------------------------------------------
+
+def test_llm_models_include_llama(cfg):
+    llms = cfg.get_llm_models()
+    assert any("llama" in m for m in llms)
+
+
+# ---------------------------------------------------------------------------
+# ignore_errors
+# ---------------------------------------------------------------------------
+
+def test_ignore_errors_matches_configured_string(cfg):
+    # ignore_errors is driven by base["ignore_errors"]; we just verify it
+    # doesn't crash and returns a bool
+    result = cfg.ignore_errors("some random log line")
+    assert isinstance(result, bool)