From dada3c7f20ce5cb60f3e0b45cb89ec3d571a6857 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 18 Jun 2026 20:16:39 +0530 Subject: [PATCH 1/9] initial set of additions to submission checker constants --- .../submission_checker/constants.py | 434 +++++++++++++++++- 1 file changed, 424 insertions(+), 10 deletions(-) diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index 2f4abd87f8..bef928c954 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -1,4 +1,327 @@ MODEL_CONFIG = { + "v6.1": { + "models": [ + "resnet", + "bert-99", + "bert-99.9", + "3d-unet-99", + "3d-unet-99.9", + "llama3.1-8b", + "llama3.1-8b-edge", + "llama2-70b-99", + "llama2-70b-99.9", + "stable-diffusion-xl", + "rgat", + "deepseek-r1", + "whisper", + "gpt-oss-120b", + "wan-2.2-t2v-a14b", + "qwen3-vl-235b-a22b", + "dlrm-v3", + "yolo-95", + "yolo-99", + "e2e", + ], + "required-scenarios-datacenter": { + "dlrm-v3": ["Server", "Offline"], + "3d-unet-99": ["Offline"], + "3d-unet-99.9": ["Offline"], + "llama3.1-8b": ["Offline"], + "llama2-70b-99": ["Offline"], + "llama2-70b-99.9": ["Offline"], + "rgat": ["Offline"], + "whisper": ["Offline"], + "deepseek-r1": ["Offline"], + "gpt-oss-120b": ["Offline"], + "qwen3-vl-235b-a22b": ["Server", "Offline"], + "wan-2.2-t2v-a14b": ["Offline", "SingleStream"], + "e2e": ["Offline"], + }, + "optional-scenarios-datacenter": { + "llama2-70b-99": ["Interactive", "Server"], + "llama2-70b-99.9": ["Interactive", "Server"], + "llama3.1-8b": ["Interactive", "Server"], + "deepseek-r1": ["Interactive", "Server"], + "gpt-oss-120b": ["Interactive", "Server"], + "qwen3-vl-235b-a22b": ["Interactive"], + }, + "required-scenarios-edge": { + "resnet": ["SingleStream", "MultiStream", "Offline"], + "bert-99": ["SingleStream", "Offline"], + "bert-99.9": ["SingleStream", "Offline"], + "3d-unet-99": ["SingleStream", "Offline"], + "3d-unet-99.9": ["SingleStream", "Offline"], + "llama3.1-8b-edge": ["SingleStream", "Offline"], + "stable-diffusion-xl": ["SingleStream", "Offline"], + "whisper": ["Offline"], + "yolo-95": ["SingleStream", "MultiStream", "Offline"], + "yolo-99": ["SingleStream", "MultiStream", "Offline"], + }, + "optional-scenarios-edge": {}, + "required-scenarios-datacenter-edge": { + "resnet": ["SingleStream", "MultiStream", "Offline", "Server"], + "bert-99": ["SingleStream", "Offline"], + "bert-99.9": ["SingleStream", "Offline"], + "3d-unet-99": ["SingleStream", "Offline"], + "3d-unet-99.9": ["SingleStream", "Offline"], + "llama3.1-8b": ["Offline"], + "llama3.1-8b-edge": ["SingleStream", "Offline"], + "llama2-70b-99": ["Offline"], + "llama2-70b-99.9": ["Offline"], + "stable-diffusion-xl": ["SingleStream", "Offline", "Server"], + "rgat": ["Offline"], + "deepseek-r1": ["Offline"], + "whisper": ["Offline"], + "gpt-oss-120b": ["Offline"], + "qwen3-vl-235b-a22b": ["Offline"], + "dlrm-v3": ["Offline", "Server"], + "yolo-95": ["SingleStream", "MultiStream", "Offline"], + "yolo-99": ["SingleStream", "MultiStream", "Offline"], + }, + "optional-scenarios-datacenter-edge": { + "llama2-70b-99": ["Interactive", "Server"], + "llama2-70b-99.9": ["Interactive", "Server"], + "llama3.1-8b": ["Interactive", "Server"], + "deepseek-r1": ["Interactive", "Server"], + "gpt-oss-120b": ["Interactive", "Server"], + "qwen3-vl-235b-a22b": ["Interactive", "Server"], + }, + "accuracy-target": { + "resnet": ("acc", 76.46 * 0.99), + "bert-99": ("F1", 90.874 * 0.99), + "bert-99.9": ("F1", 90.874 * 0.999), + "3d-unet-99": ("DICE", 0.86170 * 0.99), + "3d-unet-99.9": ("DICE", 0.86170 * 0.999), + "llama3.1-8b": ( + "ROUGE1", + 38.7792 * 0.99, + "ROUGE2", + 15.9075 * 0.99, + "ROUGEL", + 24.4957 * 0.99, + "ROUGELSUM", + 35.793 * 0.99, + "GEN_LEN", + 8167644 * 0.9, + ), + "llama3.1-8b-edge": ( + "ROUGE1", + 39.06 * 0.99, + "ROUGE2", + 16.1147 * 0.99, + "ROUGEL", + 24.6375 * 0.99, + "ROUGELSUM", + 36.124 * 0.99, + "GEN_LEN", + 3051113 * 0.9, + ), + "llama2-70b-99": ( + "ROUGE1", + 44.4312 * 0.99, + "ROUGE2", + 22.0352 * 0.99, + "ROUGEL", + 28.6162 * 0.99, + "TOKENS_PER_SAMPLE", + 294.45 * 0.9, + ), + "llama2-70b-99.9": ( + "ROUGE1", + 44.4312 * 0.999, + "ROUGE2", + 22.0352 * 0.999, + "ROUGEL", + 28.6162 * 0.999, + "TOKENS_PER_SAMPLE", + 294.45 * 0.9, + ), + "stable-diffusion-xl": ( + "CLIP_SCORE", + 31.68631873, + "FID_SCORE", + 23.01085758, + ), + "rgat": ("acc", 0.7286 * 0.99), + "deepseek-r1": ("exact_match", 0.99 * 81.3582, "TOKENS_PER_SAMPLE", 0.9 * 3886.2274), + "whisper": ("ACCURACY", (100.0 - 2.0671) * 0.99), + "gpt-oss-120b": ("exact_match", 83.13 * 0.99), + "qwen3-vl-235b-a22b": ("F1_HIERARCHICAL", 0.7903 * 0.99), + "dlrm-v3": ( + "DLRM_NE", + 0.86687 * 0.999, + "DLRM_ACC", + 0.69651 * 0.999, + "DLRM_AUC", + 0.78663 * 0.999, + ), + "yolo-95": ("mAP", 53.4 * 0.95), + "yolo-99": ("mAP", 53.4 * 0.99), + "wan-2.2-t2v-a14b": ("vbench_score", 70.48 * 0.99), + # TODO: Set e2e accuracy threshold once reference score is established + "e2e": ("E2E_ACCURACY", ""), + }, + "accuracy-upper-limit": { + "stable-diffusion-xl": ( + "CLIP_SCORE", + 31.81331801, + "FID_SCORE", + 23.95007626, + ), + "llama2-70b-99": ("TOKENS_PER_SAMPLE", 294.45 * 1.1), + "llama2-70b-99.9": ("TOKENS_PER_SAMPLE", 294.45 * 1.1), + "llama3.1-8b": ("GEN_LEN", 8167644 * 1.1), + "llama3.1-8b-edge": ("GEN_LEN", 3051113 * 1.1), + "deepseek-r1": ("TOKENS_PER_SAMPLE", 1.1 * 3886.2274), + "gpt-oss-120b": ("TOKENS_PER_SAMPLE", 1.1 * 9999), + }, + "accuracy-delta-perc": { + "stable-diffusion-xl": {"CLIP_SCORE": 1, "FID_SCORE": 2} + }, + "performance-sample-count": { + "resnet": 1024, + "bert-99": 10833, + "bert-99.9": 10833, + "3d-unet-99": 43, + "3d-unet-99.9": 43, + "llama3.1-8b": 13368, + "llama3.1-8b-edge": 5000, + "llama2-70b-99": 24576, + "llama2-70b-99.9": 24576, + "stable-diffusion-xl": 5000, + "rgat": 788379, + "deepseek-r1": 4388, + "whisper": 1633, + "gpt-oss-120b": 6396, + "qwen3-vl-235b-a22b": 48289, + "wan-2.2-t2v-a14b": 50, + "dlrm-v3": 349823, + "yolo-95": 64, + "yolo-99": 64, + "e2e": 824, + }, + "accuracy-sample-count": { + "gpt-oss-120b": 4395, + "wan-2.2-t2v-a14b": 248, + }, + "dataset-size": { + "resnet": 50000, + "bert-99": 10833, + "bert-99.9": 10833, + "3d-unet-99": 43, + "3d-unet-99.9": 43, + "llama3.1-8b": 13368, + "llama3.1-8b-edge": 5000, + "llama2-70b-99": 24576, + "llama2-70b-99.9": 24576, + "stable-diffusion-xl": 5000, + "rgat": 788379, + "deepseek-r1": 4388, + "whisper": 1633, + "gpt-oss-120b": 6396, + "qwen3-vl-235b-a22b": 48289, + "wan-2.2-t2v-a14b": 248, + "dlrm-v3": 349823, + "yolo-95": 1525, + "yolo-99": 1525, + "e2e": 824, + }, + "model_mapping": { + "ssd-resnet34": "retinanet", + "mobilenet": "resnet", + "resnet50": "resnet", + "llama3_1-405b": "llama3.1-405b", + "llama3_1-8b": "llama3.1-8b", + "llama3_1-8b-edge": "llama3.1-8b-edge", + }, + "seeds": { + # TODO: Update seeds for v6.1 + "qsl_rng_seed": 2465351861681999779, + "sample_index_rng_seed": 14276810075590677512, + "schedule_rng_seed": 3936089224930324775, + }, + "ignore_errors": [], + "latency-constraint": { + "resnet": {"Server": 15000000}, + "retinanet": {"Server": 100000000}, + "dlrm-v2-99": {"Server": 60000000}, + "dlrm-v2-99.9": {"Server": 60000000}, + "llama3.1-8b": {"Server": 20000000000}, + "stable-diffusion-xl": {"Server": 20000000000}, + "llama2-70b-99": {"Server": 20000000000}, + "llama2-70b-99.9": {"Server": 20000000000}, + "deepseek-r1": {"Server": 60000000000}, + "gpt-oss-120b": {"Server": 60000000000}, + "qwen3-vl-235b-a22b": {"Server": 60000000000}, + "dlrm-v3": {"Server": 60000000000}, + }, + "min-queries": { + "resnet": { + "SingleStream": 1024, + "MultiStream": 270336, + "Server": 270336, + "Offline": 1, + }, + "bert-99": {"SingleStream": 1024, "Offline": 1}, + "bert-99.9": {"SingleStream": 1024, "Offline": 1}, + "3d-unet-99": {"SingleStream": 1024, "Offline": 1}, + "3d-unet-99.9": {"SingleStream": 1024, "Offline": 1}, + "llama3.1-8b": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "llama3.1-8b-edge": {"SingleStream": 1024, "Offline": 1}, + "llama2-70b-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "llama2-70b-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "stable-diffusion-xl": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1, + }, + "rgat": {"SingleStream": 1024, "Offline": 1}, + "deepseek-r1": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "whisper": {"SingleStream": 1024, "Offline": 1}, + "gpt-oss-120b": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "qwen3-vl-235b-a22b": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "dlrm-v3": {"Server": 270336, "Offline": 1}, + "wan-2.2-t2v-a14b": {"SingleStream": 50, "Offline": 1}, + "yolo-95": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, + "yolo-99": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, + "e2e": {"Offline": 824}, + }, + "models_TEST01": [ + "resnet", + "bert-99", + "bert-99.9", + "3d-unet-99", + "3d-unet-99.9", + "stable-diffusion-xl", + "rgat", + "whisper", + "yolo-99", + "yolo-95", + ], + "models_TEST04": [ + "resnet", + "stable-diffusion-xl", + "wan-2.2-t2v-a14b", + ], + "models_TEST06": [ + "llama2-70b-99", + "llama2-70b-99.9", + "llama2-70b-interactive-99", + "llama2-70b-interactive-99.9", + "llama3.1-8b", + "llama3.1-8b-interactive", + "deepseek-r1", + ], + "models_TEST07": [ + "gpt-oss-120b", + ], + "models_TEST09": [ + "gpt-oss-120b", + ], + "models_TEST08": [ + "dlrm-v3", + ] + }, "v6.0": { "models": [ "resnet", @@ -1187,6 +1510,7 @@ "dlrm-v3": 349823, "qwen3-vl-235b-a22b": 48289, "wan-2.2-t2v-a14b": 50, + "e2e": 824, } SCENARIO_MAPPING = { @@ -1223,6 +1547,12 @@ "MultiStream": "early_stopping_latency_ms", "Server": "result_completed_samples_per_sec", }, + "v6.1": { + "Offline": "result_samples_per_second", + "SingleStream": "early_stopping_latency_ss", + "MultiStream": "early_stopping_latency_ms", + "Server": "result_completed_samples_per_sec", + }, } RESULT_FIELD_BENCHMARK_OVERWRITE = { @@ -1330,6 +1660,43 @@ "Offline": "result_tokens_per_second", }, }, + "v6.1": { + "gpt-oss-120b": { + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", + }, + "llama2-70b-99": { + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", + }, + "llama2-70b-99.9": { + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", + }, + "llama3.1-8b": { + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", + }, + "llama3.1-8b-edge": { + "Offline": "result_tokens_per_second", + "SingleStream": "result_90.00_percentile_latency_ns", + }, + "mixtral-8x7b": { + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", + }, + "llama3.1-405b": { + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", + }, + "deepseek-r1": { + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", + }, + "whisper": { + "Offline": "result_tokens_per_second", + }, + }, } LLM_LATENCY_LIMITS = { @@ -1426,6 +1793,7 @@ "exact_match": r".*'exact_match':\s([\d.]+).*", "vbench_score": r".*'vbench_score':\s([\d.]+).*", "F1_HIERARCHICAL": r'\{.*"f1":\s*([\d\.]+).*\}', + "E2E_ACCURACY": r"Accuracy:\s*([\d\.]+)", } SYSTEM_DESC_REQUIRED_FIELDS = [ @@ -1623,6 +1991,7 @@ "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_detail.txt", "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_detail.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_detail.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_detail.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_detail.txt", } @@ -1630,13 +1999,34 @@ "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_summary.txt", "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_summary.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_summary.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_summary.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_summary.txt", } +<<<<<<< Updated upstream +======= +PERFORMANCE_ENDPOINTS_PATH = { + "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", + "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", + "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", + "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", +} + +PERFORMANCE_CONFIG_ENDPOINTS_PATH = { + "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", + "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", + "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", + "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", +} + +>>>>>>> Stashed changes ACCURACY_LOG_PATH = { "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_detail.txt", "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_detail.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_detail.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_detail.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_detail.txt", } @@ -1644,6 +2034,7 @@ "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/accuracy.txt", "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/accuracy.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/accuracy.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/accuracy.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/accuracy.txt", } @@ -1651,13 +2042,34 @@ "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_accuracy.json", "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_accuracy.json", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_accuracy.json", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_accuracy.json", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_accuracy.json", } +<<<<<<< Updated upstream +======= +ACCURACY_ENDPOINTS_PATH = { + "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", + "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", + "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", + "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", +} + +ACCURACY_CONFIG_ENDPOINTS_PATH = { + "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", + "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", + "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", + "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", +} + +>>>>>>> Stashed changes POWER_DIR_PATH = { "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/power", "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/power", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/power", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/power", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/power", } @@ -1665,6 +2077,7 @@ "v5.0": "{division}/{submitter}/measurements/{system}/{benchmark}/{scenario}/{file}", "v5.1": "{division}/{submitter}/measurements/{system}/{benchmark}/{scenario}/{file}", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/measurements.json", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/measurements.json", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/measurements.json", } @@ -1672,6 +2085,7 @@ "v5.0": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST01/performance/run_1/mlperf_log_detail.txt", "v5.1": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST01/performance/run_1/mlperf_log_detail.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST01/performance/run_1/mlperf_log_detail.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST01/performance/run_1/mlperf_log_detail.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST01/performance/run_1/mlperf_log_detail.txt", } @@ -1679,6 +2093,7 @@ "v5.0": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST01/verify_accuracy.txt", "v5.1": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST01/verify_accuracy.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST01/verify_accuracy.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST01/verify_accuracy.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST01/verify_accuracy.txt", } @@ -1686,6 +2101,7 @@ "v5.0": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST04/performance/run_1/mlperf_log_detail.txt", "v5.1": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST04/performance/run_1/mlperf_log_detail.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST04/performance/run_1/mlperf_log_detail.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST04/performance/run_1/mlperf_log_detail.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST04/performance/run_1/mlperf_log_detail.txt", } @@ -1693,6 +2109,7 @@ "v5.0": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST04/verify_accuracy.txt", "v5.1": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST04/verify_accuracy.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST04/verify_accuracy.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST04/verify_accuracy.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST04/verify_accuracy.txt", } @@ -1700,38 +2117,33 @@ "v5.0": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST06/verify_accuracy.txt", "v5.1": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST06/verify_accuracy.txt", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST06/verify_accuracy.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST06/verify_accuracy.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST06/verify_accuracy.txt", } TEST07_ACC_PATH = { "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt", } TEST09_ACC_PATH = { "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST09/verify_output_len.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST09/verify_output_len.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST09/verify_output_len.txt", } TEST08_ACC_PATH = { "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST08/verify_accuracy.txt", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST08/verify_accuracy.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST08/verify_accuracy.txt", } -TEST07_ACC_PATH = { - "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt", - "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt", -} - -TEST09_ACC_PATH = { - "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST09/verify_output_len.txt", - "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST09/verify_output_len.txt", -} - COMPLIANCE_PATH = { "v5.0": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/", "v5.1": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/", "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/", + "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/", } @@ -1739,6 +2151,7 @@ "v5.0": "{division}/{submitter}/systems/{system}.json", "v5.1": "{division}/{submitter}/systems/{system}.json", "v6.0": "{division}/{submitter}/systems/{system}.json", + "v6.1": "{division}/{submitter}/systems/{system}.json", "default": "{division}/{submitter}/systems/{system}.json", } @@ -1746,5 +2159,6 @@ "v5.0": "{division}/{submitter}/code", "v5.1": "{division}/{submitter}/code", "v6.0": "{division}/{submitter}/src", + "v6.1": "{division}/{submitter}/src", "default": "{division}/{submitter}/src", } From 26903eae1f275ebee8b9b820078a9296b39b53e6 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 18 Jun 2026 20:21:26 +0530 Subject: [PATCH 2/9] resolve conflicts --- .../submission_checker/constants.py | 38 ------------------- 1 file changed, 38 deletions(-) diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index bef928c954..c181ad21b6 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -2003,25 +2003,6 @@ "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/mlperf_log_summary.txt", } -<<<<<<< Updated upstream -======= -PERFORMANCE_ENDPOINTS_PATH = { - "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", - "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", - "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", - "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", - "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/result_summary.json", -} - -PERFORMANCE_CONFIG_ENDPOINTS_PATH = { - "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", - "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", - "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", - "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", - "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/run_1/config.yaml", -} - ->>>>>>> Stashed changes ACCURACY_LOG_PATH = { "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_detail.txt", "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_detail.txt", @@ -2046,25 +2027,6 @@ "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/mlperf_log_accuracy.json", } -<<<<<<< Updated upstream -======= -ACCURACY_ENDPOINTS_PATH = { - "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", - "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", - "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", - "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", - "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/results.json", -} - -ACCURACY_CONFIG_ENDPOINTS_PATH = { - "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", - "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", - "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", - "v6.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", - "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/accuracy/config.yaml", -} - ->>>>>>> Stashed changes POWER_DIR_PATH = { "v5.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/power", "v5.1": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/performance/power", From b0adc5fe8ee0b05302ea81829d5c552d7d0f89bd Mon Sep 17 00:00:00 2001 From: mlc-automations <3246381+mlc-automations@users.noreply.github.com> Date: Thu, 18 Jun 2026 14:52:20 +0000 Subject: [PATCH 3/9] [Automated Commit] Format Codebase --- tools/submission/submission_checker/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index c181ad21b6..544eb52fbd 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -158,7 +158,8 @@ "yolo-95": ("mAP", 53.4 * 0.95), "yolo-99": ("mAP", 53.4 * 0.99), "wan-2.2-t2v-a14b": ("vbench_score", 70.48 * 0.99), - # TODO: Set e2e accuracy threshold once reference score is established + # TODO: Set e2e accuracy threshold once reference score is + # established "e2e": ("E2E_ACCURACY", ""), }, "accuracy-upper-limit": { From 11c602fa0219683e3345dd05aecf029ffda1dfea Mon Sep 17 00:00:00 2001 From: Pablo Gonzalez Date: Fri, 19 Jun 2026 11:53:01 -0500 Subject: [PATCH 4/9] Add e2e_vectorDB + new required videos (#2605) --- .../submission_checker/constants.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index 544eb52fbd..19aba993b6 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -21,6 +21,7 @@ "yolo-95", "yolo-99", "e2e", + "e2e_vectorDB" ], "required-scenarios-datacenter": { "dlrm-v3": ["Server", "Offline"], @@ -36,6 +37,7 @@ "qwen3-vl-235b-a22b": ["Server", "Offline"], "wan-2.2-t2v-a14b": ["Offline", "SingleStream"], "e2e": ["Offline"], + "e2e_vectorDB": ["Offline"] }, "optional-scenarios-datacenter": { "llama2-70b-99": ["Interactive", "Server"], @@ -161,6 +163,7 @@ # TODO: Set e2e accuracy threshold once reference score is # established "e2e": ("E2E_ACCURACY", ""), + "e2e_vectorDB": ("E2E_ACCURACY", ""), }, "accuracy-upper-limit": { "stable-diffusion-xl": ( @@ -200,6 +203,7 @@ "yolo-95": 64, "yolo-99": 64, "e2e": 824, + "e2e_vectorDB": 824 }, "accuracy-sample-count": { "gpt-oss-120b": 4395, @@ -226,6 +230,7 @@ "yolo-95": 1525, "yolo-99": 1525, "e2e": 824, + "e2e_vectorDB": 824, }, "model_mapping": { "ssd-resnet34": "retinanet", @@ -286,6 +291,7 @@ "yolo-95": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, "yolo-99": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, "e2e": {"Offline": 824}, + "e2e_vectorDB": {"Offline": 824}, }, "models_TEST01": [ "resnet", @@ -1463,6 +1469,29 @@ "96", ] }, + "v6.1": { + "videos": [ + # TODO: update with new videos if needed + "130", + "106", + "84", + "59", + "12", + "31", + "86", + "122", + "233", + "96", + ] + }, + }, + "e2e_vectorDB": { + "v6.1": { + "results": [ + # TODO: update name if needed + "results.json" + ] + }, } } REQUIRED_MEASURE_FILES = ["user.conf", "README.md"] @@ -1512,6 +1541,7 @@ "qwen3-vl-235b-a22b": 48289, "wan-2.2-t2v-a14b": 50, "e2e": 824, + "e2e_vectorDB": 824 } SCENARIO_MAPPING = { From 7e94bdc3ba30fe475bfe485805b5c1e27c2d7249 Mon Sep 17 00:00:00 2001 From: Pablo Gonzalez Date: Tue, 23 Jun 2026 10:25:53 -0500 Subject: [PATCH 5/9] Update benchmark name: e2e->rag --- .../submission_checker/constants.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index 19aba993b6..4e3622432a 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -20,8 +20,8 @@ "dlrm-v3", "yolo-95", "yolo-99", - "e2e", - "e2e_vectorDB" + "rag-qna", + "rag-db" ], "required-scenarios-datacenter": { "dlrm-v3": ["Server", "Offline"], @@ -36,8 +36,8 @@ "gpt-oss-120b": ["Offline"], "qwen3-vl-235b-a22b": ["Server", "Offline"], "wan-2.2-t2v-a14b": ["Offline", "SingleStream"], - "e2e": ["Offline"], - "e2e_vectorDB": ["Offline"] + "rag-qna": ["Offline"], + "rag-db": ["Offline"] }, "optional-scenarios-datacenter": { "llama2-70b-99": ["Interactive", "Server"], @@ -160,10 +160,10 @@ "yolo-95": ("mAP", 53.4 * 0.95), "yolo-99": ("mAP", 53.4 * 0.99), "wan-2.2-t2v-a14b": ("vbench_score", 70.48 * 0.99), - # TODO: Set e2e accuracy threshold once reference score is + # TODO: Set rag-qna accuracy threshold once reference score is # established - "e2e": ("E2E_ACCURACY", ""), - "e2e_vectorDB": ("E2E_ACCURACY", ""), + "rag-qna": ("E2E_ACCURACY", ""), + "rag-db": ("E2E_ACCURACY", ""), }, "accuracy-upper-limit": { "stable-diffusion-xl": ( @@ -202,8 +202,8 @@ "dlrm-v3": 349823, "yolo-95": 64, "yolo-99": 64, - "e2e": 824, - "e2e_vectorDB": 824 + "rag-qna": 824, + "rag-db": 2503 }, "accuracy-sample-count": { "gpt-oss-120b": 4395, @@ -229,8 +229,8 @@ "dlrm-v3": 349823, "yolo-95": 1525, "yolo-99": 1525, - "e2e": 824, - "e2e_vectorDB": 824, + "rag-qna": 824, + "rag-db": 2503, }, "model_mapping": { "ssd-resnet34": "retinanet", @@ -290,8 +290,8 @@ "wan-2.2-t2v-a14b": {"SingleStream": 50, "Offline": 1}, "yolo-95": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, "yolo-99": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, - "e2e": {"Offline": 824}, - "e2e_vectorDB": {"Offline": 824}, + "rag-qna": {"Offline": 824}, + "rag-db": {"Offline": 2503}, }, "models_TEST01": [ "resnet", @@ -1485,7 +1485,7 @@ ] }, }, - "e2e_vectorDB": { + "rag-db": { "v6.1": { "results": [ # TODO: update name if needed @@ -1540,8 +1540,8 @@ "dlrm-v3": 349823, "qwen3-vl-235b-a22b": 48289, "wan-2.2-t2v-a14b": 50, - "e2e": 824, - "e2e_vectorDB": 824 + "rag-qna": 824, + "rag-db": 824 } SCENARIO_MAPPING = { From a84dbcadd2d104e3720186827bca463ab9b17687 Mon Sep 17 00:00:00 2001 From: Pablo Gonzalez Date: Tue, 23 Jun 2026 11:39:18 -0500 Subject: [PATCH 6/9] Add v6.1 random seeds --- loadgen/mlperf.conf | 6 +++--- tools/submission/submission_checker/constants.py | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/loadgen/mlperf.conf b/loadgen/mlperf.conf index 4e96134c7c..b3bc744308 100644 --- a/loadgen/mlperf.conf +++ b/loadgen/mlperf.conf @@ -38,9 +38,9 @@ gpt-oss-120b-interactive.*.performance_sample_count_override = 6396 gpt-oss-120b-interactive.*.accuracy_sample_count_override = 4395 # Set seeds. -*.*.qsl_rng_seed = 2465351861681999779 -*.*.sample_index_rng_seed = 14276810075590677512 -*.*.schedule_rng_seed = 3936089224930324775 +*.*.qsl_rng_seed = 2085463073848966840 +*.*.sample_index_rng_seed = 2747215439041700203 +*.*.schedule_rng_seed = 16159082839903944936 # Set seeds for TEST_05 (not needed from v5.0 onwards) *.*.test05_qsl_rng_seed = 7975553102935885558 diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index 4e3622432a..67adb953f5 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -241,10 +241,9 @@ "llama3_1-8b-edge": "llama3.1-8b-edge", }, "seeds": { - # TODO: Update seeds for v6.1 - "qsl_rng_seed": 2465351861681999779, - "sample_index_rng_seed": 14276810075590677512, - "schedule_rng_seed": 3936089224930324775, + "qsl_rng_seed": 2085463073848966840, + "sample_index_rng_seed": 2747215439041700203, + "schedule_rng_seed": 16159082839903944936, }, "ignore_errors": [], "latency-constraint": { From aa65d76712030b5d708abd5ad9ae42ff86ec7a36 Mon Sep 17 00:00:00 2001 From: Pablo Gonzalez Date: Tue, 23 Jun 2026 16:11:40 -0500 Subject: [PATCH 7/9] Add SDXL seed + generate sample ids --- text_to_image/tools/sample_ids.py | 2 +- text_to_image/tools/sample_ids.txt | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/text_to_image/tools/sample_ids.py b/text_to_image/tools/sample_ids.py index 76089dc24a..11aa5d2e80 100644 --- a/text_to_image/tools/sample_ids.py +++ b/text_to_image/tools/sample_ids.py @@ -20,7 +20,7 @@ def get_args(): default=10, help="Dataset download location") parser.add_argument( - "--seed", "-s", type=int, default=1999337312, help="Dataset download location" + "--seed", "-s", type=int, default=3234044599, help="Dataset download location" ) args = parser.parse_args() return args diff --git a/text_to_image/tools/sample_ids.txt b/text_to_image/tools/sample_ids.txt index 8960e93684..201c81ba82 100644 --- a/text_to_image/tools/sample_ids.txt +++ b/text_to_image/tools/sample_ids.txt @@ -1,10 +1,10 @@ -1311 -2476 -3644 -2188 -4114 -52 -388 -1195 -3427 -2289 \ No newline at end of file +4475 +3627 +1046 +4937 +3867 +1671 +3789 +1206 +4111 +3093 \ No newline at end of file From a441acfcc2db55167edbaef55f269a4af9ee604e Mon Sep 17 00:00:00 2001 From: Pablo Gonzalez Date: Tue, 23 Jun 2026 16:16:05 -0500 Subject: [PATCH 8/9] Update sample IDs + remove rag_db file --- .../submission_checker/constants.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index 67adb953f5..f75a4afad4 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -1452,6 +1452,20 @@ "2289", ] }, + "v6.1": { + "images": [ + "4475", + "3627", + "1046", + "4937", + "3867", + "1671", + "3789", + "1206", + "4111", + "3093", + ] + } }, "wan-2.2-t2v-a14b": { "v6.0": { @@ -1484,14 +1498,6 @@ ] }, }, - "rag-db": { - "v6.1": { - "results": [ - # TODO: update name if needed - "results.json" - ] - }, - } } REQUIRED_MEASURE_FILES = ["user.conf", "README.md"] REQUIRED_POWER_MEASURE_FILES = ["analyzer_table.*", "power_settings.*"] From 60aaad1ddc0eaf5d70d3dbc9d741fe87dc1dd886 Mon Sep 17 00:00:00 2001 From: mlc-automations <3246381+mlc-automations@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:17:12 +0000 Subject: [PATCH 9/9] [Automated Commit] Format Codebase --- tools/submission/submission_checker/loader.py | 4 ++-- tools/submission/submission_checker/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/submission/submission_checker/loader.py b/tools/submission/submission_checker/loader.py index 61c176efa7..bfa9dba319 100644 --- a/tools/submission/submission_checker/loader.py +++ b/tools/submission/submission_checker/loader.py @@ -283,7 +283,7 @@ def load(self) -> Generator[SubmissionLogs, None, None]: private_id_json = self.load_single_log( private_id_json_path, "System") private_id = private_id_json[system] - except: + except BaseException: self.logger.warning( "%s Private id not cached for system %s", system_path, @@ -297,7 +297,7 @@ def load(self) -> Generator[SubmissionLogs, None, None]: system_path, private_id_json_path ) - + for benchmark in list_dir(system_path): benchmark_path = os.path.join(system_path, benchmark) if division.lower() in ["closed", "network"]: diff --git a/tools/submission/submission_checker/utils.py b/tools/submission/submission_checker/utils.py index 12fe13a770..e9e6d0d443 100644 --- a/tools/submission/submission_checker/utils.py +++ b/tools/submission/submission_checker/utils.py @@ -325,7 +325,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res): samples_per_query = 8 if (scenario_fixed in ["MultiStream"] - ) and scenario in ["SingleStream"]: + ) and scenario in ["SingleStream"]: power_metric = ( avg_power * power_duration * samples_per_query * 1000 / num_queries ) @@ -334,6 +334,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res): return is_valid, power_metric, scenario, avg_power_efficiency + _ADJECTIVES = [ "amber", "azure", "bold", "brave", "calm", "clear", "cool", "crisp", "dark", "deep", "deft", "epic", "fair", "fast", "firm", "flat", @@ -362,4 +363,3 @@ def generate_private_id(system_id: str) -> str: noun = _NOUNS[h[1] % len(_NOUNS)] suffix = h[2:4].hex() return f"{adj}-{noun}-{suffix}" -