From 6348746db7bebb6de5a1b4e8f6eab2771734e497 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Wed, 22 Apr 2026 09:38:24 -0400 Subject: [PATCH 1/6] Initial test structure --- tests/conftest.py | 5 + tests/pytest.ini | 3 + tests/submission_checker/conftest.py | 59 ++++++ .../test_accuracy_parser.py | 144 +++++++++++++ tests/submission_checker/test_base_check.py | 98 +++++++++ tests/submission_checker/test_config.py | 153 ++++++++++++++ .../submission_checker/test_loadgen_parser.py | 120 +++++++++++ tests/submission_checker/test_utils.py | 197 ++++++++++++++++++ 8 files changed, 779 insertions(+) create mode 100644 tests/conftest.py create mode 100644 tests/pytest.ini create mode 100644 tests/submission_checker/conftest.py create mode 100644 tests/submission_checker/test_accuracy_parser.py create mode 100644 tests/submission_checker/test_base_check.py create mode 100644 tests/submission_checker/test_config.py create mode 100644 tests/submission_checker/test_loadgen_parser.py create mode 100644 tests/submission_checker/test_utils.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..2f0f4db9b7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,5 @@ +import sys +import os + +# Ensure tools/submission is on the path so `import submission_checker` resolves. +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "tools", "submission")) diff --git a/tests/pytest.ini b/tests/pytest.ini new file mode 100644 index 0000000000..32ecb85b0d --- /dev/null +++ b/tests/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = . +pythonpath = ../tools/submission diff --git a/tests/submission_checker/conftest.py b/tests/submission_checker/conftest.py new file mode 100644 index 0000000000..cc14060905 --- /dev/null +++ b/tests/submission_checker/conftest.py @@ -0,0 +1,59 @@ +import json +import pytest + + +MLLOG_MARKER = ":::MLLOG" + + +def make_mllog_line(key, value, is_error=False, is_warning=False): + entry = { + "key": key, + "value": value, + "time_ms": 0, + "namespace": "", + "event_type": "POINT_IN_TIME", + "metadata": { + "file": "test.py", + "line_no": 1, + "is_error": is_error, + "is_warning": is_warning, + }, + } + return f"{MLLOG_MARKER} {json.dumps(entry)}\n" + + +@pytest.fixture() +def simple_mllog(tmp_path): + """A minimal valid MLPerf log with two entries.""" + p = tmp_path / "mlperf_log_detail.txt" + lines = [ + make_mllog_line("result_validity", "VALID"), + make_mllog_line("effective_scenario", "Offline"), + make_mllog_line("result_samples_per_second", 123.4), + ] + p.write_text("".join(lines)) + return p + + +@pytest.fixture() +def mllog_with_error(tmp_path): + """An MLPerf log containing one error entry.""" + p = tmp_path / "mlperf_log_detail.txt" + lines = [ + make_mllog_line("result_validity", "INVALID"), + make_mllog_line("loadgen_error", "something went wrong", is_error=True), + ] + p.write_text("".join(lines)) + return p + + +@pytest.fixture() +def mllog_duplicate_key(tmp_path): + """An MLPerf log with the same key appearing twice.""" + p = tmp_path / "mlperf_log_detail.txt" + lines = [ + make_mllog_line("seeds", 1234), + make_mllog_line("seeds", 5678), + ] + p.write_text("".join(lines)) + return p diff --git a/tests/submission_checker/test_accuracy_parser.py b/tests/submission_checker/test_accuracy_parser.py new file mode 100644 index 0000000000..f61b0370b8 --- /dev/null +++ b/tests/submission_checker/test_accuracy_parser.py @@ -0,0 +1,144 @@ +import pytest +from submission_checker.parsers.accuracy_parser import parse_line + + +# --------------------------------------------------------------------------- +# Regex-backed metrics +# --------------------------------------------------------------------------- + +class TestAccMetric: + def test_plain_accuracy_line(self): + assert parse_line("accuracy = 76.50", "acc") == pytest.approx(76.50) + + def test_json_style_accuracy_line(self): + assert parse_line('{"accuracy": 76.50}', "acc") == pytest.approx(76.50) + + def test_no_match_returns_none(self): + assert parse_line("something else entirely", "acc") is None + + +class TestAUCMetric: + def test_auc_line(self): + assert parse_line("AUC=80.31", "AUC") == pytest.approx(80.31) + + def test_auc_with_trailing_text(self): + assert parse_line("AUC=80.31 (threshold=0.5)", "AUC") == pytest.approx(80.31) + + def test_no_match_returns_none(self): + assert parse_line("accuracy = 80.31", "AUC") is None + + +class TestMAPMetric: + def test_map_equals_format(self): + assert parse_line("mAP=37.55", "mAP") == pytest.approx(37.55) + + def test_map_total_dict_format(self): + assert parse_line("'Total': 37.55", "mAP") == pytest.approx(37.55) + + def test_no_match_returns_none(self): + assert parse_line("Average Precision = 37.55", "mAP") is None + + +class TestACCURACYMetric: + def test_wer_accuracy_line(self): + val = parse_line("Word Error Rate: 4.5%, accuracy=95.5%", "ACCURACY") + assert val == pytest.approx(95.5) + + def test_no_match_returns_none(self): + assert parse_line("accuracy=95.5%", "ACCURACY") is None + + +class TestDICEMetric: + def test_dice_line(self): + assert parse_line("Accuracy: mean = 0.86170", "DICE") == pytest.approx(0.86170) + + def test_no_match_returns_none(self): + assert parse_line("mean accuracy 0.86", "DICE") is None + + +class TestDLRMMetrics: + def test_dlrm_ne(self): + val = parse_line("metric/lifetime_ne/rating: 0.8500", "DLRM_NE") + assert val == pytest.approx(0.85) + + def test_dlrm_acc(self): + val = parse_line("metric/lifetime_accuracy/rating: 0.9200", "DLRM_ACC") + assert val == pytest.approx(0.92) + + def test_dlrm_auc(self): + val = parse_line("metric/lifetime_gauc/rating: 0.8100", "DLRM_AUC") + assert val == pytest.approx(0.81) + + +# --------------------------------------------------------------------------- +# Dict-backed metrics (ast.literal_eval) +# --------------------------------------------------------------------------- + +class TestROUGEMetrics: + ROUGE_LINE = "{'rouge1': 44.43, 'rouge2': 22.04, 'rougeL': 28.62, 'rougeLsum': 35.0, 'gen_len': 8167644}" + + def test_rouge1(self): + assert parse_line(self.ROUGE_LINE, "ROUGE1") == pytest.approx(44.43) + + def test_rouge2(self): + assert parse_line(self.ROUGE_LINE, "ROUGE2") == pytest.approx(22.04) + + def test_rougel(self): + assert parse_line(self.ROUGE_LINE, "ROUGEL") == pytest.approx(28.62) + + def test_rougelsum(self): + assert parse_line(self.ROUGE_LINE, "ROUGELSUM") == pytest.approx(35.0) + + def test_gen_len(self): + assert parse_line(self.ROUGE_LINE, "GEN_LEN") == pytest.approx(8167644) + + def test_no_dict_returns_none(self): + assert parse_line("rouge1 = 44.43", "ROUGE1") is None + + +class TestTokensPerSample: + def test_tokens_per_sample(self): + line = "{'tokens_per_sample': 294.45}" + assert parse_line(line, "TOKENS_PER_SAMPLE") == pytest.approx(294.45) + + +class TestCLIPAndFIDMetrics: + CLIP_LINE = "Accuracy Results: {'CLIP_SCORE': 31.69, 'FID_SCORE': 23.01}" + + def test_clip_score(self): + assert parse_line(self.CLIP_LINE, "CLIP_SCORE") == pytest.approx(31.69) + + def test_fid_score(self): + assert parse_line(self.CLIP_LINE, "FID_SCORE") == pytest.approx(23.01) + + def test_clip_score_missing_prefix_returns_none(self): + assert parse_line("{'CLIP_SCORE': 31.69}", "CLIP_SCORE") is None + + +# --------------------------------------------------------------------------- +# JSON-backed metrics +# --------------------------------------------------------------------------- + +class TestF1Metric: + def test_f1_line(self): + assert parse_line('{"f1": 90.874}', "F1") == pytest.approx(90.874) + + def test_f1_with_prefix(self): + assert parse_line('prefix text {"f1": 90.874}', "F1") == pytest.approx(90.874) + + def test_f1_hierarchical(self): + assert parse_line('{"f1": 85.0}', "F1_HIERARCHICAL") == pytest.approx(85.0) + + def test_no_json_returns_none(self): + assert parse_line("f1 = 90.874", "F1") is None + + def test_missing_key_returns_none(self): + assert parse_line('{"score": 90.874}', "F1") is None + + +# --------------------------------------------------------------------------- +# Unknown metric +# --------------------------------------------------------------------------- + +def test_unknown_metric_returns_none(): + assert parse_line("accuracy = 75.0", "UNKNOWN_METRIC") is None diff --git a/tests/submission_checker/test_base_check.py b/tests/submission_checker/test_base_check.py new file mode 100644 index 0000000000..2783f5cc41 --- /dev/null +++ b/tests/submission_checker/test_base_check.py @@ -0,0 +1,98 @@ +import logging +import pytest +from submission_checker.checks.base import BaseCheck + + +log = logging.getLogger("test") + + +class AlwaysPassCheck(BaseCheck): + def __init__(self): + super().__init__(log, "/fake/path") + self.checks = [self.check_a, self.check_b] + + def check_a(self): + return True + + def check_b(self): + return True + + +class SomeFailCheck(BaseCheck): + def __init__(self): + super().__init__(log, "/fake/path") + self.checks = [self.pass_check, self.fail_check] + + def pass_check(self): + return True + + def fail_check(self): + return False + + +class ExceptionCheck(BaseCheck): + def __init__(self): + super().__init__(log, "/fake/path") + self.checks = [self.boom] + + def boom(self): + raise RuntimeError("intentional failure") + + +class EmptyCheck(BaseCheck): + def __init__(self): + super().__init__(log, "/fake/path") + self.checks = [] + + +# --------------------------------------------------------------------------- +# run_checks +# --------------------------------------------------------------------------- + +class TestRunChecks: + def test_all_pass_returns_true(self): + assert AlwaysPassCheck().run_checks() is True + + def test_any_fail_returns_false(self): + assert SomeFailCheck().run_checks() is False + + def test_exception_treated_as_failure(self): + assert ExceptionCheck().run_checks() is False + + def test_no_checks_returns_true(self): + assert EmptyCheck().run_checks() is True + + +# --------------------------------------------------------------------------- +# __call__ +# --------------------------------------------------------------------------- + +class TestCall: + def test_callable_returns_true_when_all_pass(self): + assert AlwaysPassCheck()() is True + + def test_callable_returns_false_when_any_fail(self): + assert SomeFailCheck()() is False + + +# --------------------------------------------------------------------------- +# execute +# --------------------------------------------------------------------------- + +def test_execute_delegates_to_check_method(): + checker = AlwaysPassCheck() + assert checker.execute(checker.check_a) is True + + +# --------------------------------------------------------------------------- +# Attributes +# --------------------------------------------------------------------------- + +def test_path_stored(): + checker = AlwaysPassCheck() + assert checker.path == "/fake/path" + + +def test_log_stored(): + checker = AlwaysPassCheck() + assert checker.log is log diff --git a/tests/submission_checker/test_config.py b/tests/submission_checker/test_config.py new file mode 100644 index 0000000000..e9adbc83cc --- /dev/null +++ b/tests/submission_checker/test_config.py @@ -0,0 +1,153 @@ +import pytest +from submission_checker.configuration.configuration import Config + + +@pytest.fixture() +def cfg(): + return Config(version="v6.0", extra_model_benchmark_map={}) + + +# --------------------------------------------------------------------------- +# Initialization +# --------------------------------------------------------------------------- + +class TestConfigInit: + def test_version_stored(self, cfg): + assert cfg.version == "v6.0" + + def test_models_populated(self, cfg): + assert "resnet" in cfg.models + assert "bert-99" in cfg.models + + def test_unknown_version_raises(self): + with pytest.raises((KeyError, TypeError)): + Config(version="v99.99", extra_model_benchmark_map={}) + + +# --------------------------------------------------------------------------- +# set_type +# --------------------------------------------------------------------------- + +class TestSetType: + def test_datacenter_sets_required(self, cfg): + cfg.set_type("datacenter") + assert cfg.required is not None + + def test_edge_sets_required(self, cfg): + cfg.set_type("edge") + assert cfg.required is not None + + def test_combined_accepted(self, cfg): + cfg.set_type("datacenter,edge") + assert cfg.required is not None + + def test_combined_reversed_accepted(self, cfg): + cfg.set_type("edge,datacenter") + assert cfg.required is not None + + def test_invalid_type_raises(self, cfg): + with pytest.raises(ValueError, match="invalid system type"): + cfg.set_type("cloud") + + +# --------------------------------------------------------------------------- +# get_mlperf_model +# --------------------------------------------------------------------------- + +class TestGetMlperfModel: + def test_official_name_passthrough(self, cfg): + assert cfg.get_mlperf_model("resnet") == "resnet" + + def test_resnet50_maps_to_resnet(self, cfg): + assert cfg.get_mlperf_model("resnet50") == "resnet" + + def test_mobilenet_maps_to_resnet(self, cfg): + assert cfg.get_mlperf_model("mobilenet-v1") == "resnet" + + def test_bert_99_variant(self, cfg): + assert cfg.get_mlperf_model("bert-99-large") == "bert-99" + + def test_extra_mapping_used(self, cfg): + assert cfg.get_mlperf_model("my_resnet", {"my_resnet": "resnet"}) == "resnet" + + +# --------------------------------------------------------------------------- +# get_required / get_optional +# --------------------------------------------------------------------------- + +class TestGetRequired: + def test_resnet_edge_requires_three_scenarios(self, cfg): + cfg.set_type("edge") + req = cfg.get_required("resnet") + assert req == {"SingleStream", "MultiStream", "Offline"} + + def test_unknown_model_returns_none(self, cfg): + cfg.set_type("edge") + assert cfg.get_required("nonexistent-model") is None + + def test_optional_empty_set_for_unknown(self, cfg): + cfg.set_type("edge") + assert cfg.get_optional("nonexistent-model") == set() + + +# --------------------------------------------------------------------------- +# get_accuracy_target +# --------------------------------------------------------------------------- + +class TestGetAccuracyTarget: + def test_resnet_accuracy_target(self, cfg): + target = cfg.get_accuracy_target("resnet") + assert target is not None + assert target[0] == "acc" + assert target[1] == pytest.approx(76.46 * 0.99) + + def test_unknown_model_raises(self, cfg): + with pytest.raises(ValueError, match="model not known"): + cfg.get_accuracy_target("not-a-model") + + +# --------------------------------------------------------------------------- +# get_delta_perc +# --------------------------------------------------------------------------- + +class TestGetDeltaPerc: + def test_standard_model_defaults_to_1(self, cfg): + assert cfg.get_delta_perc("resnet", "acc") == 1 + + def test_high_accuracy_model_defaults_to_0_1(self, cfg): + assert cfg.get_delta_perc("bert-99.9", "F1") == pytest.approx(0.1) + + +# --------------------------------------------------------------------------- +# Boolean helpers +# --------------------------------------------------------------------------- + +class TestBooleanHelpers: + def test_uses_early_stopping_server(self, cfg): + assert cfg.uses_early_stopping("Server") is True + + def test_uses_early_stopping_offline_false(self, cfg): + assert cfg.uses_early_stopping("Offline") is False + + def test_has_new_logging_format(self, cfg): + assert cfg.has_new_logging_format() is True + + +# --------------------------------------------------------------------------- +# get_llm_models +# --------------------------------------------------------------------------- + +def test_llm_models_include_llama(cfg): + llms = cfg.get_llm_models() + assert any("llama" in m for m in llms) + + +# --------------------------------------------------------------------------- +# ignore_errors +# --------------------------------------------------------------------------- + +def test_ignore_errors_matches_configured_string(cfg): + # ignore_errors is driven by base["ignore_errors"]; we just verify it + # doesn't crash and returns a bool + result = cfg.ignore_errors("some random log line") + assert isinstance(result, bool) diff --git a/tests/submission_checker/test_loadgen_parser.py b/tests/submission_checker/test_loadgen_parser.py new file mode 100644 index 0000000000..8fc519a783 --- /dev/null +++ b/tests/submission_checker/test_loadgen_parser.py @@ -0,0 +1,120 @@ +import json +import pytest +from submission_checker.parsers.loadgen_parser import LoadgenParser + + +MLLOG_MARKER = ":::MLLOG" + + +def make_mllog_line(key, value, is_error=False, is_warning=False): + entry = { + "key": key, + "value": value, + "time_ms": 0, + "namespace": "", + "event_type": "POINT_IN_TIME", + "metadata": { + "file": "test.py", + "line_no": 1, + "is_error": is_error, + "is_warning": is_warning, + }, + } + return f"{MLLOG_MARKER} {json.dumps(entry)}\n" + + +class TestLoadgenParserBasic: + def test_parses_valid_log(self, simple_mllog): + p = LoadgenParser(str(simple_mllog)) + assert "result_validity" in p.get_keys() + assert "effective_scenario" in p.get_keys() + + def test_getitem_returns_first_value(self, simple_mllog): + p = LoadgenParser(str(simple_mllog)) + assert p["result_validity"] == "VALID" + assert p["effective_scenario"] == "Offline" + + def test_getitem_missing_key_returns_none(self, simple_mllog): + p = LoadgenParser(str(simple_mllog)) + assert p["nonexistent_key"] is None + + def test_num_messages(self, simple_mllog): + p = LoadgenParser(str(simple_mllog)) + assert p.num_messages() == 3 + + def test_get_messages_is_dict(self, simple_mllog): + p = LoadgenParser(str(simple_mllog)) + assert isinstance(p.get_messages(), dict) + + +class TestLoadgenParserErrors: + def test_no_error_in_clean_log(self, simple_mllog): + p = LoadgenParser(str(simple_mllog)) + assert p.num_errors() == 0 + assert not p.has_error() + + def test_detects_error_entry(self, mllog_with_error): + p = LoadgenParser(str(mllog_with_error)) + assert p.num_errors() == 1 + assert p.has_error() + + def test_get_errors_returns_list(self, mllog_with_error): + p = LoadgenParser(str(mllog_with_error)) + errors = p.get_errors() + assert len(errors) == 1 + assert errors[0]["key"] == "loadgen_error" + + +class TestLoadgenParserDuplicateKeys: + def test_duplicate_key_stored_twice(self, mllog_duplicate_key): + p = LoadgenParser(str(mllog_duplicate_key)) + entries = p.get("seeds") + assert len(entries) == 2 + + def test_getitem_returns_first_on_duplicate(self, mllog_duplicate_key): + p = LoadgenParser(str(mllog_duplicate_key)) + assert p["seeds"] == 1234 + + +class TestLoadgenParserStrict: + def test_invalid_first_line_raises(self, tmp_path): + bad = tmp_path / "bad.txt" + bad.write_text("not a valid mllog line\n") + with pytest.raises(RuntimeError, match="Marker not found"): + LoadgenParser(str(bad)) + + def test_invalid_json_strict_raises(self, tmp_path): + p = tmp_path / "log.txt" + p.write_text(":::MLLOG not-valid-json\n") + with pytest.raises(RuntimeError): + LoadgenParser(str(p), strict=True) + + def test_invalid_json_non_strict_skips(self, tmp_path): + p = tmp_path / "log.txt" + valid_line = make_mllog_line("result_validity", "VALID") + p.write_text(valid_line + ":::MLLOG not-valid-json\n") + parser = LoadgenParser(str(p), strict=False) + assert parser["result_validity"] == "VALID" + + +class TestLoadgenParserEndpoints: + def test_endpoints_marker_accepted(self, tmp_path): + p = tmp_path / "log.txt" + entry = json.dumps({ + "key": "endpoint_key", + "value": "endpoint_value", + "metadata": {"is_error": False, "is_warning": False}, + }) + p.write_text(f":::ENDPTS {entry}\n") + parser = LoadgenParser(str(p)) + assert parser.log_is_endpoints + assert parser["endpoint_key"] == "endpoint_value" + + +class TestLoadgenParserDump: + def test_dump_writes_json(self, simple_mllog, tmp_path): + parser = LoadgenParser(str(simple_mllog)) + out = tmp_path / "out.json" + parser.dump(str(out)) + data = json.loads(out.read_text()) + assert "result_validity" in data diff --git a/tests/submission_checker/test_utils.py b/tests/submission_checker/test_utils.py new file mode 100644 index 0000000000..f1513c91d0 --- /dev/null +++ b/tests/submission_checker/test_utils.py @@ -0,0 +1,197 @@ +import os +import pytest +from submission_checker.utils import ( + files_diff, + get_boolean, + is_number, + lower_list, + contains_list, + merge_two_dict, + sum_dict_values, + split_path, + list_dir, + list_files, + list_empty_dirs_recursively, + list_files_recursively, +) + + +# --------------------------------------------------------------------------- +# files_diff +# --------------------------------------------------------------------------- + +class TestFilesDiff: + def test_identical_lists_no_diff(self): + assert files_diff(["a.txt", "b.txt"], ["a.txt", "b.txt"]) == set() + + def test_missing_file_reported(self): + diff = files_diff(["a.txt"], ["a.txt", "b.txt"]) + assert "b.txt" in diff + + def test_extra_file_reported(self): + diff = files_diff(["a.txt", "extra.txt"], ["a.txt"]) + assert "extra.txt" in diff + + def test_optional_files_ignored(self): + # mlperf_log_trace.json is always optional + diff = files_diff(["a.txt", "mlperf_log_trace.json"], ["a.txt"]) + assert diff == set() + + def test_custom_optional_ignored(self): + diff = files_diff(["a.txt", "custom.json"], ["a.txt"], optional=["custom.json"]) + assert diff == set() + + +# --------------------------------------------------------------------------- +# get_boolean +# --------------------------------------------------------------------------- + +class TestGetBoolean: + @pytest.mark.parametrize("val", [True, "true", "True", "TRUE", 1]) + def test_truthy_values(self, val): + assert get_boolean(val) is True + + @pytest.mark.parametrize("val", [False, "false", "False", "FALSE", 0]) + def test_falsy_values(self, val): + assert get_boolean(val) is False + + def test_none_returns_false(self): + assert get_boolean(None) is False + + def test_invalid_type_raises(self): + with pytest.raises(TypeError): + get_boolean([]) + + +# --------------------------------------------------------------------------- +# is_number +# --------------------------------------------------------------------------- + +class TestIsNumber: + @pytest.mark.parametrize("val", ["3.14", "0", "-1", "1e5", "123"]) + def test_numeric_strings(self, val): + assert is_number(val) is True + + @pytest.mark.parametrize("val", ["abc", "", "1.2.3"]) + def test_non_numeric_strings(self, val): + assert is_number(val) is False + + def test_nan_is_numeric(self): + # float("NaN") succeeds in Python, so is_number returns True + assert is_number("NaN") is True + + +# --------------------------------------------------------------------------- +# lower_list +# --------------------------------------------------------------------------- + +def test_lower_list_converts_to_lowercase(): + assert lower_list(["Hello", "WORLD", "123"]) == ["hello", "world", "123"] + + +def test_lower_list_empty(): + assert lower_list([]) == [] + + +# --------------------------------------------------------------------------- +# contains_list +# --------------------------------------------------------------------------- + +class TestContainsList: + def test_all_present(self): + missing, ok = contains_list(["a", "b", "c"], ["a", "b"]) + assert ok is True + assert missing == [] + + def test_some_missing(self): + missing, ok = contains_list(["a"], ["a", "b"]) + assert ok is False + assert "b" in missing + + def test_empty_needle(self): + _, ok = contains_list(["a"], []) + assert ok is True + + +# --------------------------------------------------------------------------- +# merge_two_dict +# --------------------------------------------------------------------------- + +class TestMergeTwoDict: + def test_disjoint_dicts_merged(self): + result = merge_two_dict({"a": 1}, {"b": 2}) + assert result == {"a": 1, "b": 2} + + def test_overlapping_keys_summed(self): + result = merge_two_dict({"a": [1]}, {"a": [2]}) + assert result == {"a": [1, 2]} + + def test_original_not_mutated(self): + x = {"a": 1} + merge_two_dict(x, {"b": 2}) + assert x == {"a": 1} + + +# --------------------------------------------------------------------------- +# sum_dict_values +# --------------------------------------------------------------------------- + +def test_sum_dict_values(): + assert sum_dict_values({"a": 1, "b": 2, "c": 3}) == 6 + + +def test_sum_dict_values_empty(): + assert sum_dict_values({}) == 0 + + +# --------------------------------------------------------------------------- +# split_path +# --------------------------------------------------------------------------- + +def test_split_path_unix(): + assert split_path("foo/bar/baz") == ["foo", "bar", "baz"] + + +def test_split_path_windows_backslash(): + assert split_path("foo\\bar\\baz") == ["foo", "bar", "baz"] + + +# --------------------------------------------------------------------------- +# Filesystem helpers (use tmp_path) +# --------------------------------------------------------------------------- + +@pytest.fixture() +def sample_tree(tmp_path): + (tmp_path / "subA").mkdir() + (tmp_path / "subB").mkdir() + (tmp_path / "subA" / "file1.txt").write_text("x") + (tmp_path / "subA" / "file2.txt").write_text("y") + (tmp_path / "subB").mkdir(exist_ok=True) + return tmp_path + + +def test_list_dir(sample_tree): + dirs = list_dir(str(sample_tree)) + assert dirs == ["subA", "subB"] + + +def test_list_files(sample_tree): + files = list_files(str(sample_tree / "subA")) + assert files == ["file1.txt", "file2.txt"] + + +def test_list_empty_dirs(tmp_path): + empty = tmp_path / "empty" + empty.mkdir() + (tmp_path / "nonempty").mkdir() + (tmp_path / "nonempty" / "f.txt").write_text("x") + empties = list_empty_dirs_recursively(str(tmp_path)) + assert str(empty) in empties + assert str(tmp_path / "nonempty") not in empties + + +def test_list_files_recursively(sample_tree): + files = list_files_recursively(str(sample_tree)) + names = [os.path.basename(f) for f in files] + assert "file1.txt" in names + assert "file2.txt" in names From 38ec3cfaa446a17c1762e2042dd2d4d1448b6f14 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Wed, 22 Apr 2026 09:41:54 -0400 Subject: [PATCH 2/6] TEST ONLY codecov impl --- .github/workflows/codecov.yml | 43 +++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .github/workflows/codecov.yml diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml new file mode 100644 index 0000000000..27df9e33ff --- /dev/null +++ b/.github/workflows/codecov.yml @@ -0,0 +1,43 @@ +name: Tests and Coverage + +on: + push: + branches: ["arav-codecov-impl"] + paths: + - "tests/**" + - "tools/submission/submission_checker/**" + - ".github/workflows/codecov.yml" + pull_request: + branches: ["arav-codecov-impl"] + paths: + - "tests/**" + - "tools/submission/submission_checker/**" + - ".github/workflows/codecov.yml" + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install test dependencies + run: pip install pytest pytest-cov + + - name: Run tests with coverage + run: > + pytest tests/ + --cov=. + --cov-report=xml + --cov-report=term-missing + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v5 + with: + files: coverage.xml + token: ${{ secrets.CODECOV_TOKEN }} From 2c8c736e1bd27d276cb73a02892b1f5de99e5a70 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 22 Apr 2026 13:42:40 +0000 Subject: [PATCH 3/6] [Automated Commit] Format Codebase --- tests/conftest.py | 11 +++++++++-- tests/submission_checker/conftest.py | 5 ++++- tests/submission_checker/test_accuracy_parser.py | 16 ++++++++++++---- tests/submission_checker/test_config.py | 4 +++- tests/submission_checker/test_utils.py | 3 ++- 5 files changed, 30 insertions(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2f0f4db9b7..f068daa9e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,12 @@ import sys import os -# Ensure tools/submission is on the path so `import submission_checker` resolves. -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "tools", "submission")) +# Ensure tools/submission is on the path so `import submission_checker` +# resolves. +sys.path.insert( + 0, + os.path.join( + os.path.dirname(__file__), + "..", + "tools", + "submission")) diff --git a/tests/submission_checker/conftest.py b/tests/submission_checker/conftest.py index cc14060905..fd990d5155 100644 --- a/tests/submission_checker/conftest.py +++ b/tests/submission_checker/conftest.py @@ -41,7 +41,10 @@ def mllog_with_error(tmp_path): p = tmp_path / "mlperf_log_detail.txt" lines = [ make_mllog_line("result_validity", "INVALID"), - make_mllog_line("loadgen_error", "something went wrong", is_error=True), + make_mllog_line( + "loadgen_error", + "something went wrong", + is_error=True), ] p.write_text("".join(lines)) return p diff --git a/tests/submission_checker/test_accuracy_parser.py b/tests/submission_checker/test_accuracy_parser.py index f61b0370b8..46e72a187c 100644 --- a/tests/submission_checker/test_accuracy_parser.py +++ b/tests/submission_checker/test_accuracy_parser.py @@ -22,7 +22,9 @@ def test_auc_line(self): assert parse_line("AUC=80.31", "AUC") == pytest.approx(80.31) def test_auc_with_trailing_text(self): - assert parse_line("AUC=80.31 (threshold=0.5)", "AUC") == pytest.approx(80.31) + assert parse_line( + "AUC=80.31 (threshold=0.5)", + "AUC") == pytest.approx(80.31) def test_no_match_returns_none(self): assert parse_line("accuracy = 80.31", "AUC") is None @@ -50,7 +52,9 @@ def test_no_match_returns_none(self): class TestDICEMetric: def test_dice_line(self): - assert parse_line("Accuracy: mean = 0.86170", "DICE") == pytest.approx(0.86170) + assert parse_line( + "Accuracy: mean = 0.86170", + "DICE") == pytest.approx(0.86170) def test_no_match_returns_none(self): assert parse_line("mean accuracy 0.86", "DICE") is None @@ -124,10 +128,14 @@ def test_f1_line(self): assert parse_line('{"f1": 90.874}', "F1") == pytest.approx(90.874) def test_f1_with_prefix(self): - assert parse_line('prefix text {"f1": 90.874}', "F1") == pytest.approx(90.874) + assert parse_line( + 'prefix text {"f1": 90.874}', + "F1") == pytest.approx(90.874) def test_f1_hierarchical(self): - assert parse_line('{"f1": 85.0}', "F1_HIERARCHICAL") == pytest.approx(85.0) + assert parse_line( + '{"f1": 85.0}', + "F1_HIERARCHICAL") == pytest.approx(85.0) def test_no_json_returns_none(self): assert parse_line("f1 = 90.874", "F1") is None diff --git a/tests/submission_checker/test_config.py b/tests/submission_checker/test_config.py index e9adbc83cc..5d94989f0b 100644 --- a/tests/submission_checker/test_config.py +++ b/tests/submission_checker/test_config.py @@ -68,7 +68,9 @@ def test_bert_99_variant(self, cfg): assert cfg.get_mlperf_model("bert-99-large") == "bert-99" def test_extra_mapping_used(self, cfg): - assert cfg.get_mlperf_model("my_resnet", {"my_resnet": "resnet"}) == "resnet" + assert cfg.get_mlperf_model( + "my_resnet", { + "my_resnet": "resnet"}) == "resnet" # --------------------------------------------------------------------------- diff --git a/tests/submission_checker/test_utils.py b/tests/submission_checker/test_utils.py index f1513c91d0..156277e837 100644 --- a/tests/submission_checker/test_utils.py +++ b/tests/submission_checker/test_utils.py @@ -38,7 +38,8 @@ def test_optional_files_ignored(self): assert diff == set() def test_custom_optional_ignored(self): - diff = files_diff(["a.txt", "custom.json"], ["a.txt"], optional=["custom.json"]) + diff = files_diff(["a.txt", "custom.json"], [ + "a.txt"], optional=["custom.json"]) assert diff == set() From fdc5b0ace480aab699a6df639d8d4f79898eb57a Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Wed, 22 Apr 2026 09:45:04 -0400 Subject: [PATCH 4/6] Remove bad file --- .../test_accuracy_parser.py | 144 ------------------ 1 file changed, 144 deletions(-) delete mode 100644 tests/submission_checker/test_accuracy_parser.py diff --git a/tests/submission_checker/test_accuracy_parser.py b/tests/submission_checker/test_accuracy_parser.py deleted file mode 100644 index f61b0370b8..0000000000 --- a/tests/submission_checker/test_accuracy_parser.py +++ /dev/null @@ -1,144 +0,0 @@ -import pytest -from submission_checker.parsers.accuracy_parser import parse_line - - -# --------------------------------------------------------------------------- -# Regex-backed metrics -# --------------------------------------------------------------------------- - -class TestAccMetric: - def test_plain_accuracy_line(self): - assert parse_line("accuracy = 76.50", "acc") == pytest.approx(76.50) - - def test_json_style_accuracy_line(self): - assert parse_line('{"accuracy": 76.50}', "acc") == pytest.approx(76.50) - - def test_no_match_returns_none(self): - assert parse_line("something else entirely", "acc") is None - - -class TestAUCMetric: - def test_auc_line(self): - assert parse_line("AUC=80.31", "AUC") == pytest.approx(80.31) - - def test_auc_with_trailing_text(self): - assert parse_line("AUC=80.31 (threshold=0.5)", "AUC") == pytest.approx(80.31) - - def test_no_match_returns_none(self): - assert parse_line("accuracy = 80.31", "AUC") is None - - -class TestMAPMetric: - def test_map_equals_format(self): - assert parse_line("mAP=37.55", "mAP") == pytest.approx(37.55) - - def test_map_total_dict_format(self): - assert parse_line("'Total': 37.55", "mAP") == pytest.approx(37.55) - - def test_no_match_returns_none(self): - assert parse_line("Average Precision = 37.55", "mAP") is None - - -class TestACCURACYMetric: - def test_wer_accuracy_line(self): - val = parse_line("Word Error Rate: 4.5%, accuracy=95.5%", "ACCURACY") - assert val == pytest.approx(95.5) - - def test_no_match_returns_none(self): - assert parse_line("accuracy=95.5%", "ACCURACY") is None - - -class TestDICEMetric: - def test_dice_line(self): - assert parse_line("Accuracy: mean = 0.86170", "DICE") == pytest.approx(0.86170) - - def test_no_match_returns_none(self): - assert parse_line("mean accuracy 0.86", "DICE") is None - - -class TestDLRMMetrics: - def test_dlrm_ne(self): - val = parse_line("metric/lifetime_ne/rating: 0.8500", "DLRM_NE") - assert val == pytest.approx(0.85) - - def test_dlrm_acc(self): - val = parse_line("metric/lifetime_accuracy/rating: 0.9200", "DLRM_ACC") - assert val == pytest.approx(0.92) - - def test_dlrm_auc(self): - val = parse_line("metric/lifetime_gauc/rating: 0.8100", "DLRM_AUC") - assert val == pytest.approx(0.81) - - -# --------------------------------------------------------------------------- -# Dict-backed metrics (ast.literal_eval) -# --------------------------------------------------------------------------- - -class TestROUGEMetrics: - ROUGE_LINE = "{'rouge1': 44.43, 'rouge2': 22.04, 'rougeL': 28.62, 'rougeLsum': 35.0, 'gen_len': 8167644}" - - def test_rouge1(self): - assert parse_line(self.ROUGE_LINE, "ROUGE1") == pytest.approx(44.43) - - def test_rouge2(self): - assert parse_line(self.ROUGE_LINE, "ROUGE2") == pytest.approx(22.04) - - def test_rougel(self): - assert parse_line(self.ROUGE_LINE, "ROUGEL") == pytest.approx(28.62) - - def test_rougelsum(self): - assert parse_line(self.ROUGE_LINE, "ROUGELSUM") == pytest.approx(35.0) - - def test_gen_len(self): - assert parse_line(self.ROUGE_LINE, "GEN_LEN") == pytest.approx(8167644) - - def test_no_dict_returns_none(self): - assert parse_line("rouge1 = 44.43", "ROUGE1") is None - - -class TestTokensPerSample: - def test_tokens_per_sample(self): - line = "{'tokens_per_sample': 294.45}" - assert parse_line(line, "TOKENS_PER_SAMPLE") == pytest.approx(294.45) - - -class TestCLIPAndFIDMetrics: - CLIP_LINE = "Accuracy Results: {'CLIP_SCORE': 31.69, 'FID_SCORE': 23.01}" - - def test_clip_score(self): - assert parse_line(self.CLIP_LINE, "CLIP_SCORE") == pytest.approx(31.69) - - def test_fid_score(self): - assert parse_line(self.CLIP_LINE, "FID_SCORE") == pytest.approx(23.01) - - def test_clip_score_missing_prefix_returns_none(self): - assert parse_line("{'CLIP_SCORE': 31.69}", "CLIP_SCORE") is None - - -# --------------------------------------------------------------------------- -# JSON-backed metrics -# --------------------------------------------------------------------------- - -class TestF1Metric: - def test_f1_line(self): - assert parse_line('{"f1": 90.874}', "F1") == pytest.approx(90.874) - - def test_f1_with_prefix(self): - assert parse_line('prefix text {"f1": 90.874}', "F1") == pytest.approx(90.874) - - def test_f1_hierarchical(self): - assert parse_line('{"f1": 85.0}', "F1_HIERARCHICAL") == pytest.approx(85.0) - - def test_no_json_returns_none(self): - assert parse_line("f1 = 90.874", "F1") is None - - def test_missing_key_returns_none(self): - assert parse_line('{"score": 90.874}', "F1") is None - - -# --------------------------------------------------------------------------- -# Unknown metric -# --------------------------------------------------------------------------- - -def test_unknown_metric_returns_none(): - assert parse_line("accuracy = 75.0", "UNKNOWN_METRIC") is None From 22104d44f0d4bbb20f5d3b333db8e1bbd6096a68 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Wed, 22 Apr 2026 09:48:58 -0400 Subject: [PATCH 5/6] test From 7e121aa67bb2aab33813e0e793da9c1b90f2e0b9 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Wed, 22 Apr 2026 11:43:30 -0400 Subject: [PATCH 6/6] Add __init__.py to files to ensure full coverage knowledge --- .coveragerc | 8 ++++++++ .github/workflows/codecov.yml | 2 +- automotive/3d-object-detection/__init__.py | 0 automotive/3d-object-detection/tools/__init__.py | 0 automotive/__init__.py | 0 calibration/BraTS/__init__.py | 0 calibration/__init__.py | 0 compliance/TEST01/__init__.py | 0 compliance/TEST04/__init__.py | 0 compliance/TEST06/__init__.py | 0 compliance/TEST07/__init__.py | 0 compliance/TEST08/__init__.py | 0 compliance/TEST09/__init__.py | 0 compliance/__init__.py | 0 graph/R-GAT/__init__.py | 0 graph/R-GAT/dgl_utilities/__init__.py | 0 graph/R-GAT/tools/__init__.py | 0 graph/__init__.py | 0 language/__init__.py | 0 language/bert/__init__.py | 0 language/deepseek-r1/__init__.py | 0 language/gpt-j/__init__.py | 0 language/gpt-oss-120b/__init__.py | 0 language/gpt-oss-120b/sglang/__init__.py | 0 language/llama2-70b/__init__.py | 0 language/llama3.1-405b/__init__.py | 0 language/llama3.1-8b/__init__.py | 0 language/mixtral-8x7b/__init__.py | 0 language/mixtral-8x7b/standalone_infer/__init__.py | 0 loadgen/demos/__init__.py | 0 loadgen/demos/lon/__init__.py | 0 loadgen/demos/token_metrics/__init__.py | 0 loadgen/docs/__init__.py | 0 loadgen/docs/src/__init__.py | 0 lon/__init__.py | 0 multimodal/__init__.py | 0 multimodal/qwen3-vl/__init__.py | 0 multimodal/qwen3-vl/src/__init__.py | 0 recommendation/__init__.py | 0 recommendation/dlrm_v2/__init__.py | 0 recommendation/dlrm_v2/pytorch/__init__.py | 0 recommendation/dlrm_v2/pytorch/tools/__init__.py | 0 recommendation/dlrm_v3/__init__.py | 0 recommendation/dlrm_v3/datasets/__init__.py | 0 .../dlrm_v3/generative_recommenders/__init__.py | 0 .../dlrm_v3/generative_recommenders/modules/__init__.py | 0 .../dlrm_v3/generative_recommenders/ops/__init__.py | 0 .../generative_recommenders/ops/pytorch/__init__.py | 0 .../generative_recommenders/ops/triton/__init__.py | 0 retired_benchmarks/__init__.py | 0 retired_benchmarks/never_adopted/__init__.py | 0 retired_benchmarks/never_adopted/language/__init__.py | 0 .../never_adopted/language/gpt3/__init__.py | 0 .../never_adopted/language/gpt3/megatron/__init__.py | 0 retired_benchmarks/recommendation/__init__.py | 0 retired_benchmarks/recommendation/dlrm/__init__.py | 0 .../recommendation/dlrm/pytorch/__init__.py | 0 .../recommendation/dlrm/pytorch/tools/__init__.py | 0 retired_benchmarks/speech_recognition/__init__.py | 0 retired_benchmarks/speech_recognition/rnnt/__init__.py | 0 .../speech_recognition/rnnt/pytorch/__init__.py | 0 retired_benchmarks/translation/__init__.py | 0 retired_benchmarks/translation/gnmt/__init__.py | 0 .../translation/gnmt/tensorflow/__init__.py | 0 retired_benchmarks/vision/__init__.py | 0 .../vision/classification_and_detection/__init__.py | 0 .../python/models/__init__.py | 0 .../vision/classification_and_detection/tools/__init__.py | 0 .../vision/medical_imaging/3d-unet-brats19/__init__.py | 0 retired_benchmarks/vision/medical_imaging/__init__.py | 0 speech2text/__init__.py | 0 speech2text/utils/__init__.py | 0 text_to_image/__init__.py | 0 text_to_image/tools/__init__.py | 0 text_to_image/tools/clip/__init__.py | 0 text_to_image/tools/fid/__init__.py | 0 text_to_video/__init__.py | 0 text_to_video/wan-2.2-t2v-a14b/__init__.py | 0 tools/__init__.py | 0 tools/submission/__init__.py | 0 tools/submission/power/__init__.py | 0 .../submission_checker/checks/power/__init__.py | 0 tools/upscale_coco/__init__.py | 0 vision/__init__.py | 0 vision/classification_and_detection/__init__.py | 0 .../python/models/__init__.py | 0 vision/classification_and_detection/tools/__init__.py | 0 vision/classification_and_detection/yolo/__init__.py | 0 vision/medical_imaging/3d-unet-kits19/__init__.py | 0 vision/medical_imaging/__init__.py | 0 90 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 .coveragerc create mode 100644 automotive/3d-object-detection/__init__.py create mode 100644 automotive/3d-object-detection/tools/__init__.py create mode 100644 automotive/__init__.py create mode 100644 calibration/BraTS/__init__.py create mode 100644 calibration/__init__.py create mode 100644 compliance/TEST01/__init__.py create mode 100644 compliance/TEST04/__init__.py create mode 100644 compliance/TEST06/__init__.py create mode 100644 compliance/TEST07/__init__.py create mode 100644 compliance/TEST08/__init__.py create mode 100644 compliance/TEST09/__init__.py create mode 100644 compliance/__init__.py create mode 100644 graph/R-GAT/__init__.py create mode 100644 graph/R-GAT/dgl_utilities/__init__.py create mode 100644 graph/R-GAT/tools/__init__.py create mode 100644 graph/__init__.py create mode 100644 language/__init__.py create mode 100644 language/bert/__init__.py create mode 100644 language/deepseek-r1/__init__.py create mode 100644 language/gpt-j/__init__.py create mode 100644 language/gpt-oss-120b/__init__.py create mode 100644 language/gpt-oss-120b/sglang/__init__.py create mode 100644 language/llama2-70b/__init__.py create mode 100644 language/llama3.1-405b/__init__.py create mode 100644 language/llama3.1-8b/__init__.py create mode 100644 language/mixtral-8x7b/__init__.py create mode 100644 language/mixtral-8x7b/standalone_infer/__init__.py create mode 100644 loadgen/demos/__init__.py create mode 100644 loadgen/demos/lon/__init__.py create mode 100644 loadgen/demos/token_metrics/__init__.py create mode 100644 loadgen/docs/__init__.py create mode 100644 loadgen/docs/src/__init__.py create mode 100644 lon/__init__.py create mode 100644 multimodal/__init__.py create mode 100644 multimodal/qwen3-vl/__init__.py create mode 100644 multimodal/qwen3-vl/src/__init__.py create mode 100644 recommendation/__init__.py create mode 100644 recommendation/dlrm_v2/__init__.py create mode 100644 recommendation/dlrm_v2/pytorch/__init__.py create mode 100644 recommendation/dlrm_v2/pytorch/tools/__init__.py create mode 100644 recommendation/dlrm_v3/__init__.py create mode 100644 recommendation/dlrm_v3/datasets/__init__.py create mode 100644 recommendation/dlrm_v3/generative_recommenders/__init__.py create mode 100644 recommendation/dlrm_v3/generative_recommenders/modules/__init__.py create mode 100644 recommendation/dlrm_v3/generative_recommenders/ops/__init__.py create mode 100644 recommendation/dlrm_v3/generative_recommenders/ops/pytorch/__init__.py create mode 100644 recommendation/dlrm_v3/generative_recommenders/ops/triton/__init__.py create mode 100644 retired_benchmarks/__init__.py create mode 100644 retired_benchmarks/never_adopted/__init__.py create mode 100644 retired_benchmarks/never_adopted/language/__init__.py create mode 100644 retired_benchmarks/never_adopted/language/gpt3/__init__.py create mode 100644 retired_benchmarks/never_adopted/language/gpt3/megatron/__init__.py create mode 100644 retired_benchmarks/recommendation/__init__.py create mode 100644 retired_benchmarks/recommendation/dlrm/__init__.py create mode 100644 retired_benchmarks/recommendation/dlrm/pytorch/__init__.py create mode 100644 retired_benchmarks/recommendation/dlrm/pytorch/tools/__init__.py create mode 100644 retired_benchmarks/speech_recognition/__init__.py create mode 100644 retired_benchmarks/speech_recognition/rnnt/__init__.py create mode 100644 retired_benchmarks/speech_recognition/rnnt/pytorch/__init__.py create mode 100644 retired_benchmarks/translation/__init__.py create mode 100644 retired_benchmarks/translation/gnmt/__init__.py create mode 100644 retired_benchmarks/translation/gnmt/tensorflow/__init__.py create mode 100644 retired_benchmarks/vision/__init__.py create mode 100644 retired_benchmarks/vision/classification_and_detection/__init__.py create mode 100644 retired_benchmarks/vision/classification_and_detection/python/models/__init__.py create mode 100644 retired_benchmarks/vision/classification_and_detection/tools/__init__.py create mode 100644 retired_benchmarks/vision/medical_imaging/3d-unet-brats19/__init__.py create mode 100644 retired_benchmarks/vision/medical_imaging/__init__.py create mode 100644 speech2text/__init__.py create mode 100644 speech2text/utils/__init__.py create mode 100644 text_to_image/__init__.py create mode 100644 text_to_image/tools/__init__.py create mode 100644 text_to_image/tools/clip/__init__.py create mode 100644 text_to_image/tools/fid/__init__.py create mode 100644 text_to_video/__init__.py create mode 100644 text_to_video/wan-2.2-t2v-a14b/__init__.py create mode 100644 tools/__init__.py create mode 100644 tools/submission/__init__.py create mode 100644 tools/submission/power/__init__.py create mode 100644 tools/submission/submission_checker/checks/power/__init__.py create mode 100644 tools/upscale_coco/__init__.py create mode 100644 vision/__init__.py create mode 100644 vision/classification_and_detection/__init__.py create mode 100644 vision/classification_and_detection/python/models/__init__.py create mode 100644 vision/classification_and_detection/tools/__init__.py create mode 100644 vision/classification_and_detection/yolo/__init__.py create mode 100644 vision/medical_imaging/3d-unet-kits19/__init__.py create mode 100644 vision/medical_imaging/__init__.py diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000..b0f808baef --- /dev/null +++ b/.coveragerc @@ -0,0 +1,8 @@ +[run] +source = . +omit = + nev/* + tests/* + retired_benchmarks/* + */site-packages/* + */dist-packages/* diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 27df9e33ff..5aca670a3f 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -11,7 +11,7 @@ on: branches: ["arav-codecov-impl"] paths: - "tests/**" - - "tools/submission/submission_checker/**" + - "*" - ".github/workflows/codecov.yml" jobs: diff --git a/automotive/3d-object-detection/__init__.py b/automotive/3d-object-detection/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/automotive/3d-object-detection/tools/__init__.py b/automotive/3d-object-detection/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/automotive/__init__.py b/automotive/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/calibration/BraTS/__init__.py b/calibration/BraTS/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/calibration/__init__.py b/calibration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/compliance/TEST01/__init__.py b/compliance/TEST01/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/compliance/TEST04/__init__.py b/compliance/TEST04/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/compliance/TEST06/__init__.py b/compliance/TEST06/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/compliance/TEST07/__init__.py b/compliance/TEST07/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/compliance/TEST08/__init__.py b/compliance/TEST08/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/compliance/TEST09/__init__.py b/compliance/TEST09/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/compliance/__init__.py b/compliance/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/graph/R-GAT/__init__.py b/graph/R-GAT/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/graph/R-GAT/dgl_utilities/__init__.py b/graph/R-GAT/dgl_utilities/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/graph/R-GAT/tools/__init__.py b/graph/R-GAT/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/graph/__init__.py b/graph/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/__init__.py b/language/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/bert/__init__.py b/language/bert/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/deepseek-r1/__init__.py b/language/deepseek-r1/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/gpt-j/__init__.py b/language/gpt-j/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/gpt-oss-120b/__init__.py b/language/gpt-oss-120b/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/gpt-oss-120b/sglang/__init__.py b/language/gpt-oss-120b/sglang/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/llama2-70b/__init__.py b/language/llama2-70b/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/llama3.1-405b/__init__.py b/language/llama3.1-405b/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/llama3.1-8b/__init__.py b/language/llama3.1-8b/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/mixtral-8x7b/__init__.py b/language/mixtral-8x7b/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/language/mixtral-8x7b/standalone_infer/__init__.py b/language/mixtral-8x7b/standalone_infer/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/loadgen/demos/__init__.py b/loadgen/demos/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/loadgen/demos/lon/__init__.py b/loadgen/demos/lon/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/loadgen/demos/token_metrics/__init__.py b/loadgen/demos/token_metrics/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/loadgen/docs/__init__.py b/loadgen/docs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/loadgen/docs/src/__init__.py b/loadgen/docs/src/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lon/__init__.py b/lon/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/multimodal/__init__.py b/multimodal/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/multimodal/qwen3-vl/__init__.py b/multimodal/qwen3-vl/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/multimodal/qwen3-vl/src/__init__.py b/multimodal/qwen3-vl/src/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/__init__.py b/recommendation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v2/__init__.py b/recommendation/dlrm_v2/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v2/pytorch/__init__.py b/recommendation/dlrm_v2/pytorch/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v2/pytorch/tools/__init__.py b/recommendation/dlrm_v2/pytorch/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v3/__init__.py b/recommendation/dlrm_v3/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v3/datasets/__init__.py b/recommendation/dlrm_v3/datasets/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v3/generative_recommenders/__init__.py b/recommendation/dlrm_v3/generative_recommenders/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v3/generative_recommenders/modules/__init__.py b/recommendation/dlrm_v3/generative_recommenders/modules/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v3/generative_recommenders/ops/__init__.py b/recommendation/dlrm_v3/generative_recommenders/ops/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v3/generative_recommenders/ops/pytorch/__init__.py b/recommendation/dlrm_v3/generative_recommenders/ops/pytorch/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recommendation/dlrm_v3/generative_recommenders/ops/triton/__init__.py b/recommendation/dlrm_v3/generative_recommenders/ops/triton/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/__init__.py b/retired_benchmarks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/never_adopted/__init__.py b/retired_benchmarks/never_adopted/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/never_adopted/language/__init__.py b/retired_benchmarks/never_adopted/language/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/never_adopted/language/gpt3/__init__.py b/retired_benchmarks/never_adopted/language/gpt3/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/__init__.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/recommendation/__init__.py b/retired_benchmarks/recommendation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/recommendation/dlrm/__init__.py b/retired_benchmarks/recommendation/dlrm/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/recommendation/dlrm/pytorch/__init__.py b/retired_benchmarks/recommendation/dlrm/pytorch/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/recommendation/dlrm/pytorch/tools/__init__.py b/retired_benchmarks/recommendation/dlrm/pytorch/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/speech_recognition/__init__.py b/retired_benchmarks/speech_recognition/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/speech_recognition/rnnt/__init__.py b/retired_benchmarks/speech_recognition/rnnt/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/speech_recognition/rnnt/pytorch/__init__.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/translation/__init__.py b/retired_benchmarks/translation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/translation/gnmt/__init__.py b/retired_benchmarks/translation/gnmt/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/translation/gnmt/tensorflow/__init__.py b/retired_benchmarks/translation/gnmt/tensorflow/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/vision/__init__.py b/retired_benchmarks/vision/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/vision/classification_and_detection/__init__.py b/retired_benchmarks/vision/classification_and_detection/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/vision/classification_and_detection/python/models/__init__.py b/retired_benchmarks/vision/classification_and_detection/python/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/vision/classification_and_detection/tools/__init__.py b/retired_benchmarks/vision/classification_and_detection/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/vision/medical_imaging/3d-unet-brats19/__init__.py b/retired_benchmarks/vision/medical_imaging/3d-unet-brats19/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/retired_benchmarks/vision/medical_imaging/__init__.py b/retired_benchmarks/vision/medical_imaging/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/speech2text/__init__.py b/speech2text/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/speech2text/utils/__init__.py b/speech2text/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/text_to_image/__init__.py b/text_to_image/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/text_to_image/tools/__init__.py b/text_to_image/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/text_to_image/tools/clip/__init__.py b/text_to_image/tools/clip/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/text_to_image/tools/fid/__init__.py b/text_to_image/tools/fid/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/text_to_video/__init__.py b/text_to_video/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/text_to_video/wan-2.2-t2v-a14b/__init__.py b/text_to_video/wan-2.2-t2v-a14b/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/submission/__init__.py b/tools/submission/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/submission/power/__init__.py b/tools/submission/power/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/submission/submission_checker/checks/power/__init__.py b/tools/submission/submission_checker/checks/power/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/upscale_coco/__init__.py b/tools/upscale_coco/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vision/__init__.py b/vision/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vision/classification_and_detection/__init__.py b/vision/classification_and_detection/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vision/classification_and_detection/python/models/__init__.py b/vision/classification_and_detection/python/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vision/classification_and_detection/tools/__init__.py b/vision/classification_and_detection/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vision/classification_and_detection/yolo/__init__.py b/vision/classification_and_detection/yolo/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vision/medical_imaging/3d-unet-kits19/__init__.py b/vision/medical_imaging/3d-unet-kits19/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vision/medical_imaging/__init__.py b/vision/medical_imaging/__init__.py new file mode 100644 index 0000000000..e69de29bb2