From 0ee8fb9560b9412ab635a213cf17cff9b883a8aa Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Mon, 2 Feb 2026 12:24:55 -0500 Subject: [PATCH 01/11] Add shebangs in bulk pre-testing --- language/gpt-oss-120b/setup.sh | 2 ++ language/llama2-70b/build.sh | 1 + language/llama2-70b/run_accuracy.sh | 2 ++ language/llama2-70b/run_offline.sh | 2 ++ language/llama2-70b/run_server.sh | 2 +- language/llama3.1-405b/build.sh | 2 ++ language/llama3.1-405b/run_accuracy.sh | 2 ++ language/llama3.1-405b/run_offline.sh | 2 ++ language/llama3.1-405b/run_server.sh | 2 +- language/llama3.1-8b/build.sh | 2 ++ language/llama3.1-8b/run_accuracy.sh | 2 ++ language/llama3.1-8b/run_offline.sh | 2 ++ language/llama3.1-8b/run_server.sh | 2 +- language/mixtral-8x7b/build.sh | 2 ++ language/mixtral-8x7b/run_accuracy.sh | 2 ++ language/mixtral-8x7b/run_offline.sh | 2 ++ language/mixtral-8x7b/run_server.sh | 2 +- tools/upscale_coco/upscale.sh | 2 ++ 18 files changed, 31 insertions(+), 4 deletions(-) diff --git a/language/gpt-oss-120b/setup.sh b/language/gpt-oss-120b/setup.sh index 23188a0cbd..41f42c266c 100755 --- a/language/gpt-oss-120b/setup.sh +++ b/language/gpt-oss-120b/setup.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + pip install -r requirements.txt git_dir=$(git rev-parse --show-toplevel) pip install $git_dir/loadgen \ No newline at end of file diff --git a/language/llama2-70b/build.sh b/language/llama2-70b/build.sh index 87afb992fa..999f8ea0fb 100644 --- a/language/llama2-70b/build.sh +++ b/language/llama2-70b/build.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash set -e conda install pybind11==2.10.4 -c conda-forge -y diff --git a/language/llama2-70b/run_accuracy.sh b/language/llama2-70b/run_accuracy.sh index b4f7f8ad96..6e852a4bcd 100644 --- a/language/llama2-70b/run_accuracy.sh +++ b/language/llama2-70b/run_accuracy.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}" DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" diff --git a/language/llama2-70b/run_offline.sh b/language/llama2-70b/run_offline.sh index 7153ea7cab..50a8e6a1ed 100644 --- a/language/llama2-70b/run_offline.sh +++ b/language/llama2-70b/run_offline.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}" DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" diff --git a/language/llama2-70b/run_server.sh b/language/llama2-70b/run_server.sh index a71bf371e7..b19a6afcc6 100644 --- a/language/llama2-70b/run_server.sh +++ b/language/llama2-70b/run_server.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}" DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" diff --git a/language/llama3.1-405b/build.sh b/language/llama3.1-405b/build.sh index 87afb992fa..e7e7794cda 100644 --- a/language/llama3.1-405b/build.sh +++ b/language/llama3.1-405b/build.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + set -e conda install pybind11==2.10.4 -c conda-forge -y diff --git a/language/llama3.1-405b/run_accuracy.sh b/language/llama3.1-405b/run_accuracy.sh index 9a54d8f131..e4abe91210 100644 --- a/language/llama3.1-405b/run_accuracy.sh +++ b/language/llama3.1-405b/run_accuracy.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}" DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}" diff --git a/language/llama3.1-405b/run_offline.sh b/language/llama3.1-405b/run_offline.sh index 6b3a56e012..e0b9f91c24 100644 --- a/language/llama3.1-405b/run_offline.sh +++ b/language/llama3.1-405b/run_offline.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}" DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}" diff --git a/language/llama3.1-405b/run_server.sh b/language/llama3.1-405b/run_server.sh index 010a359de1..2197885ec8 100644 --- a/language/llama3.1-405b/run_server.sh +++ b/language/llama3.1-405b/run_server.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}" DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}" diff --git a/language/llama3.1-8b/build.sh b/language/llama3.1-8b/build.sh index 87afb992fa..e7e7794cda 100644 --- a/language/llama3.1-8b/build.sh +++ b/language/llama3.1-8b/build.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + set -e conda install pybind11==2.10.4 -c conda-forge -y diff --git a/language/llama3.1-8b/run_accuracy.sh b/language/llama3.1-8b/run_accuracy.sh index 52ec670339..e14e53544d 100644 --- a/language/llama3.1-8b/run_accuracy.sh +++ b/language/llama3.1-8b/run_accuracy.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:meta-llama/Meta-Llama-3.1-8B-Instruct}" DATASET_PATH="${DATASET_PATH:cnn_eval.json}" diff --git a/language/llama3.1-8b/run_offline.sh b/language/llama3.1-8b/run_offline.sh index 1d6bb271ad..7897d6ae69 100644 --- a/language/llama3.1-8b/run_offline.sh +++ b/language/llama3.1-8b/run_offline.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:meta-llama/Meta-Llama-3.1-8B-Instruct}" DATASET_PATH="${DATASET_PATH:cnn_eval.json}" diff --git a/language/llama3.1-8b/run_server.sh b/language/llama3.1-8b/run_server.sh index a870e9b37b..ba885aeba3 100644 --- a/language/llama3.1-8b/run_server.sh +++ b/language/llama3.1-8b/run_server.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash CHECKPOINT_PATH="${CHECKPOINT_PATH:meta-llama/Meta-Llama-3.1-8B-Instruct}" DATASET_PATH="${DATASET_PATH:cnn_eval.json}" diff --git a/language/mixtral-8x7b/build.sh b/language/mixtral-8x7b/build.sh index 87afb992fa..e7e7794cda 100644 --- a/language/mixtral-8x7b/build.sh +++ b/language/mixtral-8x7b/build.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + set -e conda install pybind11==2.10.4 -c conda-forge -y diff --git a/language/mixtral-8x7b/run_accuracy.sh b/language/mixtral-8x7b/run_accuracy.sh index 13da4e5daa..ae2c1e936b 100644 --- a/language/mixtral-8x7b/run_accuracy.sh +++ b/language/mixtral-8x7b/run_accuracy.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:mistralai/Mixtral-8x7B-Instruct-v0.1}" DATASET_PATH="${DATASET_PATH:dataset/2024_06_06_mixtral_15k_v4.pkl}" diff --git a/language/mixtral-8x7b/run_offline.sh b/language/mixtral-8x7b/run_offline.sh index 48afb50bd1..b32ed58ebc 100644 --- a/language/mixtral-8x7b/run_offline.sh +++ b/language/mixtral-8x7b/run_offline.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:mistralai/Mixtral-8x7B-Instruct-v0.1}" DATASET_PATH="${DATASET_PATH:dataset/2024_06_06_mixtral_15k_v4.pkl}" diff --git a/language/mixtral-8x7b/run_server.sh b/language/mixtral-8x7b/run_server.sh index ced7ea4432..f5ae85d082 100644 --- a/language/mixtral-8x7b/run_server.sh +++ b/language/mixtral-8x7b/run_server.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash CHECKPOINT_PATH="${CHECKPOINT_PATH:mistralai/Mixtral-8x7B-Instruct-v0.1}" DATASET_PATH="${DATASET_PATH:dataset/2024_06_06_mixtral_15k_v4.pkl}" diff --git a/tools/upscale_coco/upscale.sh b/tools/upscale_coco/upscale.sh index 1bcb7218ba..148e208297 100755 --- a/tools/upscale_coco/upscale.sh +++ b/tools/upscale_coco/upscale.sh @@ -1 +1,3 @@ +#!/usr/bin/env bash + python upscale_coco.py --inputs /scratch/ssd/data/coco/ --outputs ./coco700png --size 700 700 --format png From 3099c81739e0885618ec33b46542626bd956b8db Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Tue, 3 Feb 2026 09:08:39 -0500 Subject: [PATCH 02/11] Update to exec mode torebase --- language/llama2-70b/build.sh | 0 language/llama2-70b/run_accuracy.sh | 0 language/llama2-70b/run_offline.sh | 0 language/llama2-70b/run_server.sh | 0 language/llama3.1-405b/build.sh | 0 language/llama3.1-405b/run_accuracy.sh | 0 language/llama3.1-405b/run_offline.sh | 0 language/llama3.1-405b/run_server.sh | 0 language/llama3.1-8b/build.sh | 0 language/llama3.1-8b/run_accuracy.sh | 0 language/llama3.1-8b/run_offline.sh | 0 language/llama3.1-8b/run_server.sh | 0 language/mixtral-8x7b/build.sh | 0 language/mixtral-8x7b/run_accuracy.sh | 0 language/mixtral-8x7b/run_offline.sh | 0 language/mixtral-8x7b/run_server.sh | 0 16 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 language/llama2-70b/build.sh mode change 100644 => 100755 language/llama2-70b/run_accuracy.sh mode change 100644 => 100755 language/llama2-70b/run_offline.sh mode change 100644 => 100755 language/llama2-70b/run_server.sh mode change 100644 => 100755 language/llama3.1-405b/build.sh mode change 100644 => 100755 language/llama3.1-405b/run_accuracy.sh mode change 100644 => 100755 language/llama3.1-405b/run_offline.sh mode change 100644 => 100755 language/llama3.1-405b/run_server.sh mode change 100644 => 100755 language/llama3.1-8b/build.sh mode change 100644 => 100755 language/llama3.1-8b/run_accuracy.sh mode change 100644 => 100755 language/llama3.1-8b/run_offline.sh mode change 100644 => 100755 language/llama3.1-8b/run_server.sh mode change 100644 => 100755 language/mixtral-8x7b/build.sh mode change 100644 => 100755 language/mixtral-8x7b/run_accuracy.sh mode change 100644 => 100755 language/mixtral-8x7b/run_offline.sh mode change 100644 => 100755 language/mixtral-8x7b/run_server.sh diff --git a/language/llama2-70b/build.sh b/language/llama2-70b/build.sh old mode 100644 new mode 100755 diff --git a/language/llama2-70b/run_accuracy.sh b/language/llama2-70b/run_accuracy.sh old mode 100644 new mode 100755 diff --git a/language/llama2-70b/run_offline.sh b/language/llama2-70b/run_offline.sh old mode 100644 new mode 100755 diff --git a/language/llama2-70b/run_server.sh b/language/llama2-70b/run_server.sh old mode 100644 new mode 100755 diff --git a/language/llama3.1-405b/build.sh b/language/llama3.1-405b/build.sh old mode 100644 new mode 100755 diff --git a/language/llama3.1-405b/run_accuracy.sh b/language/llama3.1-405b/run_accuracy.sh old mode 100644 new mode 100755 diff --git a/language/llama3.1-405b/run_offline.sh b/language/llama3.1-405b/run_offline.sh old mode 100644 new mode 100755 diff --git a/language/llama3.1-405b/run_server.sh b/language/llama3.1-405b/run_server.sh old mode 100644 new mode 100755 diff --git a/language/llama3.1-8b/build.sh b/language/llama3.1-8b/build.sh old mode 100644 new mode 100755 diff --git a/language/llama3.1-8b/run_accuracy.sh b/language/llama3.1-8b/run_accuracy.sh old mode 100644 new mode 100755 diff --git a/language/llama3.1-8b/run_offline.sh b/language/llama3.1-8b/run_offline.sh old mode 100644 new mode 100755 diff --git a/language/llama3.1-8b/run_server.sh b/language/llama3.1-8b/run_server.sh old mode 100644 new mode 100755 diff --git a/language/mixtral-8x7b/build.sh b/language/mixtral-8x7b/build.sh old mode 100644 new mode 100755 diff --git a/language/mixtral-8x7b/run_accuracy.sh b/language/mixtral-8x7b/run_accuracy.sh old mode 100644 new mode 100755 diff --git a/language/mixtral-8x7b/run_offline.sh b/language/mixtral-8x7b/run_offline.sh old mode 100644 new mode 100755 diff --git a/language/mixtral-8x7b/run_server.sh b/language/mixtral-8x7b/run_server.sh old mode 100644 new mode 100755 From 0643ce5d806dcad9d9f91b7e9c69ed421142fc54 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Mon, 23 Feb 2026 11:21:42 -0500 Subject: [PATCH 03/11] Revert "Add shebangs in bulk pre-testing" This reverts commit 0ee8fb9560b9412ab635a213cf17cff9b883a8aa. --- language/gpt-oss-120b/setup.sh | 2 -- language/llama2-70b/build.sh | 1 - language/llama2-70b/run_accuracy.sh | 2 -- language/llama2-70b/run_offline.sh | 2 -- language/llama2-70b/run_server.sh | 2 +- language/llama3.1-405b/build.sh | 2 -- language/llama3.1-405b/run_accuracy.sh | 2 -- language/llama3.1-405b/run_offline.sh | 2 -- language/llama3.1-405b/run_server.sh | 2 +- language/llama3.1-8b/build.sh | 2 -- language/llama3.1-8b/run_accuracy.sh | 2 -- language/llama3.1-8b/run_offline.sh | 2 -- language/llama3.1-8b/run_server.sh | 2 +- language/mixtral-8x7b/build.sh | 2 -- language/mixtral-8x7b/run_accuracy.sh | 2 -- language/mixtral-8x7b/run_offline.sh | 2 -- language/mixtral-8x7b/run_server.sh | 2 +- tools/upscale_coco/upscale.sh | 2 -- 18 files changed, 4 insertions(+), 31 deletions(-) diff --git a/language/gpt-oss-120b/setup.sh b/language/gpt-oss-120b/setup.sh index 41f42c266c..23188a0cbd 100755 --- a/language/gpt-oss-120b/setup.sh +++ b/language/gpt-oss-120b/setup.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - pip install -r requirements.txt git_dir=$(git rev-parse --show-toplevel) pip install $git_dir/loadgen \ No newline at end of file diff --git a/language/llama2-70b/build.sh b/language/llama2-70b/build.sh index 999f8ea0fb..87afb992fa 100755 --- a/language/llama2-70b/build.sh +++ b/language/llama2-70b/build.sh @@ -1,4 +1,3 @@ -#!/usr/bin/env bash set -e conda install pybind11==2.10.4 -c conda-forge -y diff --git a/language/llama2-70b/run_accuracy.sh b/language/llama2-70b/run_accuracy.sh index 6e852a4bcd..b4f7f8ad96 100755 --- a/language/llama2-70b/run_accuracy.sh +++ b/language/llama2-70b/run_accuracy.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}" DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" diff --git a/language/llama2-70b/run_offline.sh b/language/llama2-70b/run_offline.sh index 50a8e6a1ed..7153ea7cab 100755 --- a/language/llama2-70b/run_offline.sh +++ b/language/llama2-70b/run_offline.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}" DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" diff --git a/language/llama2-70b/run_server.sh b/language/llama2-70b/run_server.sh index b19a6afcc6..a71bf371e7 100755 --- a/language/llama2-70b/run_server.sh +++ b/language/llama2-70b/run_server.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}" DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" diff --git a/language/llama3.1-405b/build.sh b/language/llama3.1-405b/build.sh index e7e7794cda..87afb992fa 100755 --- a/language/llama3.1-405b/build.sh +++ b/language/llama3.1-405b/build.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - set -e conda install pybind11==2.10.4 -c conda-forge -y diff --git a/language/llama3.1-405b/run_accuracy.sh b/language/llama3.1-405b/run_accuracy.sh index e4abe91210..9a54d8f131 100755 --- a/language/llama3.1-405b/run_accuracy.sh +++ b/language/llama3.1-405b/run_accuracy.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}" DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}" diff --git a/language/llama3.1-405b/run_offline.sh b/language/llama3.1-405b/run_offline.sh index e0b9f91c24..6b3a56e012 100755 --- a/language/llama3.1-405b/run_offline.sh +++ b/language/llama3.1-405b/run_offline.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}" DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}" diff --git a/language/llama3.1-405b/run_server.sh b/language/llama3.1-405b/run_server.sh index 2197885ec8..010a359de1 100755 --- a/language/llama3.1-405b/run_server.sh +++ b/language/llama3.1-405b/run_server.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}" DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}" diff --git a/language/llama3.1-8b/build.sh b/language/llama3.1-8b/build.sh index e7e7794cda..87afb992fa 100755 --- a/language/llama3.1-8b/build.sh +++ b/language/llama3.1-8b/build.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - set -e conda install pybind11==2.10.4 -c conda-forge -y diff --git a/language/llama3.1-8b/run_accuracy.sh b/language/llama3.1-8b/run_accuracy.sh index e14e53544d..52ec670339 100755 --- a/language/llama3.1-8b/run_accuracy.sh +++ b/language/llama3.1-8b/run_accuracy.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - CHECKPOINT_PATH="${CHECKPOINT_PATH:meta-llama/Meta-Llama-3.1-8B-Instruct}" DATASET_PATH="${DATASET_PATH:cnn_eval.json}" diff --git a/language/llama3.1-8b/run_offline.sh b/language/llama3.1-8b/run_offline.sh index 7897d6ae69..1d6bb271ad 100755 --- a/language/llama3.1-8b/run_offline.sh +++ b/language/llama3.1-8b/run_offline.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - CHECKPOINT_PATH="${CHECKPOINT_PATH:meta-llama/Meta-Llama-3.1-8B-Instruct}" DATASET_PATH="${DATASET_PATH:cnn_eval.json}" diff --git a/language/llama3.1-8b/run_server.sh b/language/llama3.1-8b/run_server.sh index ba885aeba3..a870e9b37b 100755 --- a/language/llama3.1-8b/run_server.sh +++ b/language/llama3.1-8b/run_server.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:meta-llama/Meta-Llama-3.1-8B-Instruct}" DATASET_PATH="${DATASET_PATH:cnn_eval.json}" diff --git a/language/mixtral-8x7b/build.sh b/language/mixtral-8x7b/build.sh index e7e7794cda..87afb992fa 100755 --- a/language/mixtral-8x7b/build.sh +++ b/language/mixtral-8x7b/build.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - set -e conda install pybind11==2.10.4 -c conda-forge -y diff --git a/language/mixtral-8x7b/run_accuracy.sh b/language/mixtral-8x7b/run_accuracy.sh index ae2c1e936b..13da4e5daa 100755 --- a/language/mixtral-8x7b/run_accuracy.sh +++ b/language/mixtral-8x7b/run_accuracy.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - CHECKPOINT_PATH="${CHECKPOINT_PATH:mistralai/Mixtral-8x7B-Instruct-v0.1}" DATASET_PATH="${DATASET_PATH:dataset/2024_06_06_mixtral_15k_v4.pkl}" diff --git a/language/mixtral-8x7b/run_offline.sh b/language/mixtral-8x7b/run_offline.sh index b32ed58ebc..48afb50bd1 100755 --- a/language/mixtral-8x7b/run_offline.sh +++ b/language/mixtral-8x7b/run_offline.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - CHECKPOINT_PATH="${CHECKPOINT_PATH:mistralai/Mixtral-8x7B-Instruct-v0.1}" DATASET_PATH="${DATASET_PATH:dataset/2024_06_06_mixtral_15k_v4.pkl}" diff --git a/language/mixtral-8x7b/run_server.sh b/language/mixtral-8x7b/run_server.sh index f5ae85d082..ced7ea4432 100755 --- a/language/mixtral-8x7b/run_server.sh +++ b/language/mixtral-8x7b/run_server.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash + CHECKPOINT_PATH="${CHECKPOINT_PATH:mistralai/Mixtral-8x7B-Instruct-v0.1}" DATASET_PATH="${DATASET_PATH:dataset/2024_06_06_mixtral_15k_v4.pkl}" diff --git a/tools/upscale_coco/upscale.sh b/tools/upscale_coco/upscale.sh index 148e208297..1bcb7218ba 100755 --- a/tools/upscale_coco/upscale.sh +++ b/tools/upscale_coco/upscale.sh @@ -1,3 +1 @@ -#!/usr/bin/env bash - python upscale_coco.py --inputs /scratch/ssd/data/coco/ --outputs ./coco700png --size 700 700 --format png From b70c15fc1bce25c4639dfcbe7b8bfe66f2d8b877 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Mon, 23 Feb 2026 11:21:43 -0500 Subject: [PATCH 04/11] Revert "Update to exec mode" This reverts commit 3099c81739e0885618ec33b46542626bd956b8db. --- language/llama2-70b/build.sh | 0 language/llama2-70b/run_accuracy.sh | 0 language/llama2-70b/run_offline.sh | 0 language/llama2-70b/run_server.sh | 0 language/llama3.1-405b/build.sh | 0 language/llama3.1-405b/run_accuracy.sh | 0 language/llama3.1-405b/run_offline.sh | 0 language/llama3.1-405b/run_server.sh | 0 language/llama3.1-8b/build.sh | 0 language/llama3.1-8b/run_accuracy.sh | 0 language/llama3.1-8b/run_offline.sh | 0 language/llama3.1-8b/run_server.sh | 0 language/mixtral-8x7b/build.sh | 0 language/mixtral-8x7b/run_accuracy.sh | 0 language/mixtral-8x7b/run_offline.sh | 0 language/mixtral-8x7b/run_server.sh | 0 16 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 language/llama2-70b/build.sh mode change 100755 => 100644 language/llama2-70b/run_accuracy.sh mode change 100755 => 100644 language/llama2-70b/run_offline.sh mode change 100755 => 100644 language/llama2-70b/run_server.sh mode change 100755 => 100644 language/llama3.1-405b/build.sh mode change 100755 => 100644 language/llama3.1-405b/run_accuracy.sh mode change 100755 => 100644 language/llama3.1-405b/run_offline.sh mode change 100755 => 100644 language/llama3.1-405b/run_server.sh mode change 100755 => 100644 language/llama3.1-8b/build.sh mode change 100755 => 100644 language/llama3.1-8b/run_accuracy.sh mode change 100755 => 100644 language/llama3.1-8b/run_offline.sh mode change 100755 => 100644 language/llama3.1-8b/run_server.sh mode change 100755 => 100644 language/mixtral-8x7b/build.sh mode change 100755 => 100644 language/mixtral-8x7b/run_accuracy.sh mode change 100755 => 100644 language/mixtral-8x7b/run_offline.sh mode change 100755 => 100644 language/mixtral-8x7b/run_server.sh diff --git a/language/llama2-70b/build.sh b/language/llama2-70b/build.sh old mode 100755 new mode 100644 diff --git a/language/llama2-70b/run_accuracy.sh b/language/llama2-70b/run_accuracy.sh old mode 100755 new mode 100644 diff --git a/language/llama2-70b/run_offline.sh b/language/llama2-70b/run_offline.sh old mode 100755 new mode 100644 diff --git a/language/llama2-70b/run_server.sh b/language/llama2-70b/run_server.sh old mode 100755 new mode 100644 diff --git a/language/llama3.1-405b/build.sh b/language/llama3.1-405b/build.sh old mode 100755 new mode 100644 diff --git a/language/llama3.1-405b/run_accuracy.sh b/language/llama3.1-405b/run_accuracy.sh old mode 100755 new mode 100644 diff --git a/language/llama3.1-405b/run_offline.sh b/language/llama3.1-405b/run_offline.sh old mode 100755 new mode 100644 diff --git a/language/llama3.1-405b/run_server.sh b/language/llama3.1-405b/run_server.sh old mode 100755 new mode 100644 diff --git a/language/llama3.1-8b/build.sh b/language/llama3.1-8b/build.sh old mode 100755 new mode 100644 diff --git a/language/llama3.1-8b/run_accuracy.sh b/language/llama3.1-8b/run_accuracy.sh old mode 100755 new mode 100644 diff --git a/language/llama3.1-8b/run_offline.sh b/language/llama3.1-8b/run_offline.sh old mode 100755 new mode 100644 diff --git a/language/llama3.1-8b/run_server.sh b/language/llama3.1-8b/run_server.sh old mode 100755 new mode 100644 diff --git a/language/mixtral-8x7b/build.sh b/language/mixtral-8x7b/build.sh old mode 100755 new mode 100644 diff --git a/language/mixtral-8x7b/run_accuracy.sh b/language/mixtral-8x7b/run_accuracy.sh old mode 100755 new mode 100644 diff --git a/language/mixtral-8x7b/run_offline.sh b/language/mixtral-8x7b/run_offline.sh old mode 100755 new mode 100644 diff --git a/language/mixtral-8x7b/run_server.sh b/language/mixtral-8x7b/run_server.sh old mode 100755 new mode 100644 From f013f01c74e3b5950d024bd09580bf64a53af681 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Mon, 23 Feb 2026 12:03:54 -0500 Subject: [PATCH 05/11] Initial loguru update --- tools/submission/README.md | 8 ++ tools/submission/log_parser.py | 10 +- tools/submission/power/power_checker.py | 7 +- tools/submission/preprocess_submission.py | 52 ++++---- tools/submission/requirements.txt | 1 + .../checks/accuracy_check.py | 32 ++--- .../submission_checker/checks/base.py | 18 +-- .../checks/compliance_check.py | 105 ++++++++------- .../checks/measurements_checks.py | 26 ++-- .../checks/performance_check.py | 126 +++++++++--------- .../checks/power/power_checker.py | 5 +- .../submission_checker/checks/power_check.py | 34 ++--- .../submission_checker/checks/system_check.py | 34 ++--- tools/submission/submission_checker/loader.py | 25 ++-- tools/submission/submission_checker/main.py | 59 ++++---- .../parsers/loadgen_parser.py | 4 +- tools/submission/submission_checker/utils.py | 29 ++-- tools/submission/summary.csv | 1 + tools/submission/truncate_accuracy_log.py | 32 ++--- 19 files changed, 298 insertions(+), 310 deletions(-) create mode 100644 tools/submission/requirements.txt create mode 100644 tools/submission/summary.csv diff --git a/tools/submission/README.md b/tools/submission/README.md index 6d620233b2..5d6299d8e6 100644 --- a/tools/submission/README.md +++ b/tools/submission/README.md @@ -2,6 +2,14 @@ Please follow the [official submission automation page](https://docs.mlcommons.org/inference/submission/) for doing a submission. It wraps all the submission related files listed below. +# Installation + +Install the prerequisites for the submission checker using the provided `requirements.txt` file: + +```bash +pip install -r requirements.txt +``` + ## `truncate_accuracy_log.py` (Mandatory) ### Inputs diff --git a/tools/submission/log_parser.py b/tools/submission/log_parser.py index f771c9e303..96f86c4f37 100755 --- a/tools/submission/log_parser.py +++ b/tools/submission/log_parser.py @@ -14,19 +14,13 @@ import argparse import json -import logging +from loguru import logger import os import re import sys # pylint: disable=missing-docstring -logging.basicConfig( - level=logging.INFO, - format="[%(asctime)s %(filename)s:%(lineno)d %(levelname)s] %(message)s", -) - - class MLPerfLog: def __init__(self, log_path, strict=True): """ @@ -39,7 +33,7 @@ def __init__(self, log_path, strict=True): self.endpoints_marker = ":::ENDPTS" self.marker = "" self.log_is_endpoints = False - self.logger = logging.getLogger("MLPerfLog") + self.logger = logger self.messages = [] with open(log_path, "r", encoding="utf-8") as f: for i, line in enumerate(f): diff --git a/tools/submission/power/power_checker.py b/tools/submission/power/power_checker.py index c723adbfd9..8d66751f76 100755 --- a/tools/submission/power/power_checker.py +++ b/tools/submission/power/power_checker.py @@ -24,12 +24,7 @@ import re import traceback import uuid -import logging - - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger("main") - +from loguru import logger as log class LineWithoutTimeStamp(Exception): pass diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py index 0101bbf12f..fa76ac1a38 100644 --- a/tools/submission/preprocess_submission.py +++ b/tools/submission/preprocess_submission.py @@ -3,7 +3,7 @@ """ import argparse -import logging +from loguru import logger as log import os import sys import shutil @@ -12,10 +12,6 @@ import submission_checker_old as checker -logging.basicConfig(level=logging.INFO) -log = logging.getLogger("main") - - HELP_TEXT = """ pick an existing submission directory and create a brand new submission tree with possible results being inferred from already measured ones. The original submission directory is not modified. @@ -92,7 +88,7 @@ def delete_empty_dirs(src): return False if all([delete_empty_dirs(os.path.join(src, file)) for file in os.listdir(src)]): - log.info("Removing empty dir: (%s)", src) + log.info("Removing empty dir: {src}", src=src) os.rmdir(src) return True @@ -388,15 +384,14 @@ def infer_scenario_results(args, config): for directory in ["results"]: log_path = os.path.join(division, submitter, directory) if not os.path.exists(log_path): - log.error("no submission in %s", log_path) + log.error("no submission in {log_path}", log_path=log_path) continue for system_desc in list_dir(log_path): system_id_json = os.path.join(division, submitter, "systems", system_desc + ".json") if not os.path.exists(system_id_json): - log.error("no system_desc for %s/%s/%s", division, submitter, - system_desc) + log.error("no system_desc for {division}/{submitter}/{system_desc}", division=division, submitter=submitter, system_desc=system_desc) continue with open(system_id_json) as system_info: @@ -405,9 +400,9 @@ def infer_scenario_results(args, config): valid_system_types = ["datacenter", "edge", "datacenter,edge", "edge,datacenter"] if system_type not in valid_system_types: - log.error("Division %s, submitter %s, " - "system %s has invalid system type (%s)", - division, submitter, system_id_json, system_type) + log.error("Division {division}, submitter {submitter}, " + "system {system_id_json} has invalid system type ({system_type})", + division=division, submitter=submitter, system_id_json=system_id_json, system_type=system_type) config.set_type(system_type) @@ -422,9 +417,9 @@ def infer_scenario_results(args, config): mlperf_model = config.get_mlperf_model( model, extra_model_mapping) if not mlperf_model: - log.error("Division %s, submitter %s, system %s has " - "invalid model (%s)", division, submitter, - system_id_json, model) + log.error("Division {division}, submitter {submitter}, system {system_id_json} has " + "invalid model ({model})", division=division, submitter=submitter, + system_id_json=system_id_json, model=model) continue if mlperf_model not in config.required: @@ -485,11 +480,11 @@ def infer_scenario_results(args, config): for tobeinferredpath in tobeinferredpaths: inferred_scenario = os.path.basename( tobeinferredpath) - log.info("Division %s, submitter %s, system %s, " - "model %s: \ - inferring %s results from %s", - division, submitter, system_desc, model, - inferred_scenario, "singlestream") + log.info("Division {division}, submitter {submitter}, system {system_desc}, " + "model {model}: " + "inferring {inferred_scenario} results from {singlestream}", + division=division, submitter=submitter, system_desc=system_desc, model=model, + inferred_scenario=inferred_scenario, singlestream="singlestream") shutil.copytree( scenario_path, tobeinferredpath) @@ -498,9 +493,9 @@ def infer_scenario_results(args, config): # infer MS from SS for tobeinferredpath in [ multistream_scenario_path]: - log.info("Division %s, submitter %s, system %s, model %s: \ - inferring %s results from %s", division, submitter, - system_desc, model, "multistream", "singlestream") + log.info("Division {division}, submitter {submitter}, system {system_desc}, model {model}: \ + inferring {inferred_scenario} results from {singlestream}", division=division, submitter=submitter, system_desc=system_desc, model=model, + inferred_scenario="multistream", singlestream="singlestream") shutil.copytree( scenario_path, multistream_scenario_path) elif not os.path.exists(offline_scenario_path): @@ -519,9 +514,9 @@ def infer_scenario_results(args, config): for tobeinferredpath in [ offline_scenario_path]: if not os.path.exists(tobeinferredpath): - log.info("Division %s, submitter %s, system %s, model %s: \ - inferring %s results from %s", division, submitter, - system_desc, model, "offline", "multistream") + log.info("Division {division}, submitter {submitter}, system {system_desc}, model {model}: \ + inferring {inferred_scenario} results from {multistream}", division=division, submitter=submitter, system_desc=system_desc, model=model, + inferred_scenario="offline", multistream="multistream") shutil.copytree( scenario_path, tobeinferredpath) @@ -538,9 +533,8 @@ def infer_scenario_results(args, config): low_accuracy_model_path = os.path.join(log_path, system_desc, low_accuracy_model) if not os.path.exists(low_accuracy_model_path): - log.info("Division %s, submitter %s, system %s: \ - copying %s results to %s", division, submitter, - system_desc, model, low_accuracy_model) + log.info("Division {division}, submitter {submitter}, system {system_desc}: \ + copying {model} results to {low_accuracy_model}", division=division, submitter=submitter, system_desc=system_desc, model=model, low_accuracy_model=low_accuracy_model) shutil.copytree(high_accuracy_model_path, low_accuracy_model_path) diff --git a/tools/submission/requirements.txt b/tools/submission/requirements.txt new file mode 100644 index 0000000000..7248303e54 --- /dev/null +++ b/tools/submission/requirements.txt @@ -0,0 +1 @@ +loguru \ No newline at end of file diff --git a/tools/submission/submission_checker/checks/accuracy_check.py b/tools/submission/submission_checker/checks/accuracy_check.py index a300aa1ebb..654ff4d31c 100644 --- a/tools/submission/submission_checker/checks/accuracy_check.py +++ b/tools/submission/submission_checker/checks/accuracy_check.py @@ -117,10 +117,10 @@ def accuracy_result_check(self): elif acc is not None: all_accuracy_valid = False self.log.warning( - "%s accuracy not met: expected=%f, found=%s", - self.path, - acc_target, - acc, + "{path} accuracy not met: expected={acc_target}, found={acc}", + path=self.path, + acc_target=acc_target, + acc=acc, ) if acc: result_acc[acc_type] = acc @@ -142,10 +142,10 @@ def accuracy_result_check(self): ): acc_limit_check = False self.log.warning( - "%s accuracy not met: upper limit=%f, found=%s", - self.path, - acc_limit, - acc, + "{path} accuracy not met: upper limit={acc_limit}, found={acc}", + path=self.path, + acc_limit=acc_limit, + acc=acc, ) acc = None if all(acc_seen) and hash_val: @@ -154,7 +154,7 @@ def accuracy_result_check(self): if acc_upper_limit is not None: is_valid &= acc_limit_check if not hash_val: - self.log.error("%s not hash value for accuracy.txt", self.path) + self.log.error("{path} not hash value for accuracy.txt", path=self.path) is_valid = False self.submission_logs.loader_data["accuracy_metrics"] = result_acc if self.division.lower() == "open": @@ -169,11 +169,11 @@ def accuracy_json_check(self): exceed `MAX_ACCURACY_LOG_SIZE`, False otherwise. """ if not os.path.exists(self.accuracy_json): - self.log.error("%s is missing", self.accuracy_json) + self.log.error("{accuracy_json} is missing", accuracy_json=self.accuracy_json) return False else: if os.stat(self.accuracy_json).st_size > MAX_ACCURACY_LOG_SIZE: - self.log.error("%s is not truncated", self.accuracy_json) + self.log.error("{accuracy_json} is not truncated", accuracy_json=self.accuracy_json) return False return True @@ -193,13 +193,13 @@ def loadgen_errors_check(self): for error in self.mlperf_log.get_errors(): if "Loadgen built with uncommitted changes!" not in error["value"]: has_other_errors = True - self.log.error("%s contains errors:", self.path) + self.log.error("{path} contains errors:", path=self.path) for error in self.mlperf_log.get_errors(): - self.log.error("%s", error["value"]) + self.log.error("{error_value}", error_value=error["value"]) if not self.config.ignore_uncommited or has_other_errors: self.log.error( - "%s has loadgen errors, number of errors: %s", self.path, self.mlperf_log.num_errors() + "{path} has loadgen errors, number of errors: {num_errors}", path=self.path, num_errors=self.mlperf_log.num_errors() ) return False return True @@ -218,7 +218,7 @@ def dataset_check(self): """ if self.config.skip_dataset_size_check: self.log.info( - "%s Skipping dataset size check", self.path + "{path} Skipping dataset size check", path=self.path ) return True expected_qsl_total_count = self.config.get_accuracy_sample_count( @@ -230,7 +230,7 @@ def dataset_check(self): if qsl_total_count != expected_qsl_total_count: self.log.error( - "%s accurcy run does not cover all dataset, accuracy samples: %s, dataset size: %s", self.path, qsl_total_count, expected_qsl_total_count + "{path} accuracy run does not cover all dataset, accuracy samples: {qsl_total_count}, dataset size: {expected_qsl_total_count}", path=self.path, qsl_total_count=qsl_total_count, expected_qsl_total_count=expected_qsl_total_count ) return False return True diff --git a/tools/submission/submission_checker/checks/base.py b/tools/submission/submission_checker/checks/base.py index 8e2a678fb9..5aacd84344 100644 --- a/tools/submission/submission_checker/checks/base.py +++ b/tools/submission/submission_checker/checks/base.py @@ -27,10 +27,10 @@ def run_checks(self): except BaseException: valid &= False self.log.error( - "Execution occurred in running check %s. Running %s in %s", - self.path, - check.__name__, - self.__class__.__name__) + "Execution occurred in running check {check_name}. Running {check_name} in {class_name}", + path=self.path, + check_name=check.__name__, + class_name=self.__class__.__name__) return valid def execute(self, check): @@ -39,13 +39,13 @@ def execute(self, check): def __call__(self): """Allows the check instance to be called like a function.""" - self.log.info("Starting %s for: %s", self.name, self.path) + self.log.info("Starting {name} for: {path}", name=self.name, path=self.path) valid = self.run_checks() if valid: - self.log.info("All %s checks passed for: %s", self.name, self.path) + self.log.info("All {name} checks passed for: {path}", name=self.name, path=self.path) else: self.log.error( - "Some %s Checks failed for: %s", - self.name, - self.path) + "Some {name} Checks failed for: {path}", + name=self.name, + path=self.path) return valid diff --git a/tools/submission/submission_checker/checks/compliance_check.py b/tools/submission/submission_checker/checks/compliance_check.py index a152383cbe..8b2f5cd3f4 100644 --- a/tools/submission/submission_checker/checks/compliance_check.py +++ b/tools/submission/submission_checker/checks/compliance_check.py @@ -110,8 +110,8 @@ def dir_exists_check(self): if self.division.lower() == "open": self.log.info( - "Compliance tests not needed for open division. Skipping tests on %s", - self.path) + "Compliance tests not needed for open division. Skipping tests on {path}", + path=self.path) return True is_valid = True for test in self.test_list: @@ -128,33 +128,35 @@ def dir_exists_check(self): "mlperf_log_detail.txt") if not os.path.exists(test_dir): self.log.error( - "Missing %s in compliance dir %s", - test, - self.compliance_dir) + "Missing {test} in compliance dir {compliance_dir}", + test=test, + compliance_dir=self.compliance_dir) is_valid = False # TEST01, TEST06, TEST07 and TEST08 require verify_accuracy.txt if test in ["TEST01", "TEST06", "TEST07", "TEST08"]: if not os.path.exists(acc_path): self.log.error( - "Missing accuracy file in compliance dir. Needs file %s", acc_path) + "Missing accuracy file in compliance dir. Needs file {acc_path}", + acc_path=acc_path) is_valid = False if test in ["TEST01", "TEST04"]: if not os.path.exists(perf_comp_path): self.log.error( - "Missing performance file in compliance dir. Needs file %s", - perf_comp_path) + "Missing performance file in compliance dir. Needs file {perf_comp_path}", + perf_comp_path=perf_comp_path) is_valid = False if not os.path.exists(perf_path): self.log.error( - "Missing perfomance file in compliance dir. Needs file %s", perf_path) + "Missing perfomance file in compliance dir. Needs file {perf_path}", + perf_path=perf_path) is_valid = False if test == "TEST09": output_len_path = os.path.join( self.compliance_dir, test, "verify_output_len.txt") if not os.path.exists(output_len_path): self.log.error( - "Missing output length verification file in compliance dir. Needs file %s", - output_len_path) + "Missing output length verification file in compliance dir. Needs file {output_len_path}", + output_len_path=output_len_path) is_valid = False return is_valid @@ -172,8 +174,8 @@ def performance_check(self): if self.division.lower() == "open": self.log.info( - "Compliance tests not needed for open division. Skipping tests on %s", - self.path) + "Compliance tests not needed for open division. Skipping tests on {path}", + path=self.path) return True is_valid = True for test in self.test_list: @@ -208,8 +210,8 @@ def accuracy_check(self): if self.division.lower() == "open": self.log.info( - "Compliance tests not needed for open division. Skipping tests on %s", - self.path) + "Compliance tests not needed for open division. Skipping tests on {path}", + path=self.path) return True is_valid = True for test in self.test_list: @@ -219,18 +221,18 @@ def accuracy_check(self): lines = [line.strip() for line in lines] if "TEST PASS" in lines: self.log.info( - "Compliance test accuracy check (deterministic mode) in %s passed", - test_dir, + "Compliance test accuracy check (deterministic mode) in {test_dir} passed", + test_dir=test_dir, ) else: self.log.info( - "Compliance test accuracy check (deterministic mode) in %s failed", - test_dir, + "Compliance test accuracy check (deterministic mode) in {test_dir} failed", + test_dir=test_dir, ) test_acc_path = os.path.join(test_dir, "accuracy") if not os.path.exists(test_acc_path): self.log.error( - "%s has no accuracy directory", test_dir) + "{test_dir} has no accuracy directory", test_dir=test_dir) is_valid = False else: diff = files_diff( @@ -239,9 +241,9 @@ def accuracy_check(self): ) if diff: self.log.error( - "%s has file list mismatch (%s)", - test_acc_path, - diff) + "{test_acc_path} has file list mismatch ({diff})", + test_acc_path=test_acc_path, + diff=diff) is_valid = False else: target = self.config.get_accuracy_target( @@ -301,8 +303,8 @@ def accuracy_check(self): is_valid = True else: self.log.error( - "Compliance test accuracy check (non-deterministic mode) in %s failed", - test_dir, + "Compliance test accuracy check (non-deterministic mode) in {test_dir} failed", + test_dir=test_dir, ) is_valid = False break @@ -330,14 +332,14 @@ def accuracy_check(self): content = f.read() if "TEST PASS" in content: self.log.info( - "TEST07 accuracy check in %s passed", test_dir) + "TEST07 accuracy check in {test_dir} passed", test_dir=test_dir) else: self.log.error( - "TEST07 accuracy check in %s failed", test_dir) + "TEST07 accuracy check in {test_dir} failed", test_dir=test_dir) is_valid = False else: self.log.error( - "TEST07 verify_accuracy.txt missing in %s", test_dir) + "TEST07 verify_accuracy.txt missing in {test_dir}", test_dir=test_dir) is_valid = False elif test == "TEST09": # TEST09: Verify output token length in performance mode @@ -349,14 +351,14 @@ def accuracy_check(self): content = f.read() if "TEST PASS" in content: self.log.info( - "TEST09 output length check in %s passed", test_dir) + "TEST09 output length check in {test_dir} passed", test_dir=test_dir) else: self.log.error( - "TEST09 output length check in %s failed", test_dir) + "TEST09 output length check in {test_dir} failed", test_dir=test_dir) is_valid = False else: self.log.error( - "TEST09 verify_output_len.txt missing in %s", test_dir) + "TEST09 verify_output_len.txt missing in {test_dir}", test_dir=test_dir) is_valid = False elif test == "TEST08": # TEST08 is used for dlrm-v3 streaming dataset compliance @@ -366,20 +368,20 @@ def accuracy_check(self): f"{test}_acc_result") if lines is None: self.log.error( - "TEST08 accuracy result file not found for %s", test_dir) + "TEST08 accuracy result file not found for {test_dir}", test_dir=test_dir) is_valid = False else: lines = [line.strip() for line in lines] if "TEST PASS" in lines: self.log.info( - "Compliance test TEST08 accuracy check in %s passed", - test_dir, + "Compliance test TEST08 accuracy check in {test_dir} passed", + test_dir=test_dir, ) else: self.log.error( - "Compliance test TEST08 accuracy check in %s failed. " + "Compliance test TEST08 accuracy check in {test_dir} failed. " "Expected 'TEST PASS' in verify_accuracy.txt", - test_dir, + test_dir=test_dir, ) is_valid = False elif test == "TEST07": @@ -391,14 +393,14 @@ def accuracy_check(self): content = f.read() if "TEST PASS" in content: self.log.info( - "TEST07 accuracy check in %s passed", test_dir) + "TEST07 accuracy check in {test_dir} passed", test_dir=test_dir) else: self.log.error( - "TEST07 accuracy check in %s failed", test_dir) + "TEST07 accuracy check in {test_dir} failed", test_dir=test_dir) is_valid = False else: self.log.error( - "TEST07 verify_accuracy.txt missing in %s", test_dir) + "TEST07 verify_accuracy.txt missing in {test_dir}", test_dir=test_dir) is_valid = False elif test == "TEST09": # TEST09: Verify output token length in performance mode @@ -410,14 +412,14 @@ def accuracy_check(self): content = f.read() if "TEST PASS" in content: self.log.info( - "TEST09 output length check in %s passed", test_dir) + "TEST09 output length check in {test_dir} passed", test_dir=test_dir) else: self.log.error( - "TEST09 output length check in %s failed", test_dir) + "TEST09 output length check in {test_dir} failed", test_dir=test_dir) is_valid = False else: self.log.error( - "TEST09 verify_output_len.txt missing in %s", test_dir) + "TEST09 verify_output_len.txt missing in {test_dir}", test_dir=test_dir) is_valid = False else: self.log.info(f"{test_dir} does not require accuracy check") @@ -437,8 +439,8 @@ def compliance_performance_check(self): if self.division.lower() == "open": self.log.info( - "Compliance tests not needed for open division. Skipping tests on %s", - self.path) + "Compliance tests not needed for open division. Skipping tests on {path}", + path=self.path) return True is_valid = True for test in self.test_list: @@ -446,7 +448,8 @@ def compliance_performance_check(self): if test in ["TEST01", "TEST04"]: fname = os.path.join(test_dir, "verify_performance.txt") if not os.path.exists(fname): - self.log.error("%s is missing in %s", fname, test_dir) + self.log.error( + "{fname} is missing in {test_dir}", fname=fname, test_dir=test_dir) is_valid = False else: with open(fname, "r") as f: @@ -457,15 +460,15 @@ def compliance_performance_check(self): break if is_valid == False: self.log.error( - "Compliance test performance check in %s failed", - test_dir) + "Compliance test performance check in {test_dir} failed", + test_dir=test_dir) # Check performance dir test_perf_path = os.path.join( test_dir, "performance", "run_1") if not os.path.exists(test_perf_path): self.log.error( - "%s has no performance/run_1 directory", test_dir) + "{test_dir} has no performance/run_1 directory", test_dir=test_dir) is_valid = False else: diff = files_diff( @@ -475,8 +478,8 @@ def compliance_performance_check(self): ) if diff: self.log.error( - "%s has file list mismatch (%s)", - test_perf_path, - diff) + "{test_perf_path} has file list mismatch ({diff})", + test_perf_path=test_perf_path, + diff=diff) is_valid = False return is_valid diff --git a/tools/submission/submission_checker/checks/measurements_checks.py b/tools/submission/submission_checker/checks/measurements_checks.py index 06b89f56fc..7c7bbe66aa 100644 --- a/tools/submission/submission_checker/checks/measurements_checks.py +++ b/tools/submission/submission_checker/checks/measurements_checks.py @@ -70,8 +70,8 @@ def missing_check(self): """ if self.measurements_json is None: self.log.error( - "%s measurements json file not found", - self.path + "{path} measurements json file not found", + path=self.path ) return False return True @@ -84,8 +84,8 @@ def directory_exist_check(self): """ if not os.path.exists(self.src_dir): self.log.error( - "%s src directory does not exist", - self.src_dir + "{src_dir} src directory does not exist", + src_dir=self.src_dir ) return False return True @@ -104,15 +104,20 @@ def required_files_check(self): files = list_files(self.measurements_dir) for i in REQUIRED_MEASURE_FILES: if i not in files: - self.log.error("%s is missing %s", self.measurements_dir, i) + self.log.error( + "{measurements_dir} is missing {file}", + measurements_dir=self.measurements_dir, + file=i + ) is_valid = False elif not self.config.skip_empty_files_check and ( os.stat(os.path.join(self.measurements_dir, i)).st_size == 0 ): self.log.error( - "%s is having empty %s", - self.measurements_dir, - i) + "{measurements_dir} is having empty {file}", + measurements_dir=self.measurements_dir, + file=i + ) is_valid = False return is_valid @@ -131,9 +136,10 @@ def required_fields_check(self): for k in SYSTEM_IMP_REQUIRED_FILES: if k not in self.measurements_json: is_valid = False - self.log.error("%s, field %s is missing", self.path, k) + self.log.error( + "{path}, field {k} is missing", path=self.path, k=k) elif check_empty_fields and not self.measurements_json[k]: is_valid = False self.log.error( - "%s, field %s is missing meaningful value", self.path, k) + "{path}, field {k} is missing meaningful value", path=self.path, k=k) return is_valid diff --git a/tools/submission/submission_checker/checks/performance_check.py b/tools/submission/submission_checker/checks/performance_check.py index 4923862b71..e83f2bc891 100644 --- a/tools/submission/submission_checker/checks/performance_check.py +++ b/tools/submission/submission_checker/checks/performance_check.py @@ -81,7 +81,7 @@ def missing_check(self): bool: True if `mlperf_log` is present, False otherwise. """ if self.mlperf_log is None: - self.log.error("Performance log missing at %s", self.path) + self.log.error("Performance log missing at {path}", path=self.path) return False return True @@ -101,13 +101,13 @@ def loadgen_errors_check(self): for error in self.mlperf_log.get_errors(): if "Loadgen built with uncommitted changes!" not in error["value"]: has_other_errors = True - self.log.error("%s contains errors:", self.path) + self.log.error("{path} contains errors:", path=self.path) for error in self.mlperf_log.get_errors(): - self.log.error("%s", error["value"]) + self.log.error("{error}", path=self.path, error=error["value"]) if not self.config.ignore_uncommited or has_other_errors: self.log.error( - "%s has loadgen errors, number of errors: %s", self.path, self.mlperf_log.num_errors() + "{path} has loadgen errors, number of errors: {num_errors}", path=self.path, num_errors=self.mlperf_log.num_errors() ) return False return True @@ -125,8 +125,8 @@ def equal_issue_check(self): if self.config.requires_equal_issue( self.model, self.division) and self.mlperf_log["effective_sample_concatenate_permutation"]: self.log.error( - "%s requires equal issue mode (sample_concatenate_permutation), expected=true, found=false", - self.path) + "{path} requires equal issue mode (sample_concatenate_permutation), expected=true, found=false", + path=self.path) return False return True @@ -145,10 +145,10 @@ def performance_sample_count_check(self): performance_sample_count = self.mlperf_log["effective_performance_sample_count"] if performance_sample_count < required_performance_sample_count: self.log.error( - "%s performance_sample_count, found %d, needs to be >= %d", - self.path, - performance_sample_count, - required_performance_sample_count, + "{path} performance_sample_count, found {performance_sample_count}, needs to be >= {required_performance_sample_count}", + path=self.path, + performance_sample_count=performance_sample_count, + required_performance_sample_count=required_performance_sample_count, ) return False return True @@ -169,26 +169,26 @@ def seeds_check(self): is_valid = True if qsl_rng_seed != config_seeds["qsl_rng_seed"]: self.log.error( - "%s qsl_rng_seed is wrong, expected=%s, found=%s", - self.path, - config_seeds["qsl_rng_seed"], - qsl_rng_seed, + "{path} qsl_rng_seed is wrong, expected={expected}, found={found}", + path=self.path, + expected=config_seeds["qsl_rng_seed"], + found=qsl_rng_seed, ) is_valid = False if sample_index_rng_seed != config_seeds["sample_index_rng_seed"]: self.log.error( - "%s sample_index_rng_seed is wrong, expected=%s, found=%s", - self.path, - config_seeds["sample_index_rng_seed"], - sample_index_rng_seed, + "{path} sample_index_rng_seed is wrong, expected={expected}, found={found}", + path=self.path, + expected=config_seeds["sample_index_rng_seed"], + found=sample_index_rng_seed, ) is_valid = False if schedule_rng_seed != config_seeds["schedule_rng_seed"]: self.log.error( - "%s schedule_rng_seed is wrong, expected=%s, found=%s", - self.path, - config_seeds["schedule_rng_seed"], - schedule_rng_seed, + "{path} schedule_rng_seed is wrong, expected={expected}, found={found}", + path=self.path, + expected=config_seeds["schedule_rng_seed"], + found=schedule_rng_seed, ) is_valid = False return is_valid @@ -209,8 +209,8 @@ def latency_check(self): if not self.mlperf_log["early_stopping_met"]: early_stopping_result = self.mlperf_log["early_stopping_result"] self.log.error( - "Early stopping condition was not met, msg=%s", - early_stopping_result, + "Early stopping condition was not met, msg={early_stopping_result}", + early_stopping_result=early_stopping_result, ) return False # If the scenario has a target latency (Server scenario), check @@ -221,17 +221,17 @@ def latency_check(self): if target_latency: early_stopping_latency_ns = self.mlperf_log["effective_target_latency_ns"] self.log.info( - "Target latency: %s, Early Stopping Latency: %s, Scenario: %s", - target_latency, - early_stopping_latency_ns, - self.scenario, + "Target latency: {target_latency}, Early Stopping Latency: {early_stopping_latency_ns}, Scenario: {scenario}", + target_latency=target_latency, + early_stopping_latency_ns=early_stopping_latency_ns, + scenario=self.scenario, ) if early_stopping_latency_ns > target_latency: self.log.error( - "%s Latency constraint with early stopping not met, expected=%s, found=%s", - self.path, - target_latency, - early_stopping_latency_ns, + "{path} Latency constraint with early stopping not met, expected={target_latency}, found={early_stopping_latency_ns}", + path=self.path, + target_latency=target_latency, + early_stopping_latency_ns=early_stopping_latency_ns, ) return False else: @@ -240,18 +240,18 @@ def latency_check(self): target_latency = self.config.latency_constraint.get( self.model, dict()).get(self.scenario) self.log.info( - "Target latency: %s, Latency: %s, Scenario: %s", - target_latency, - latency_99_percentile, - self.scenario, + "Target latency: {target_latency}, Latency: {latency_99_percentile}, Scenario: {scenario}", + target_latency=target_latency, + latency_99_percentile=latency_99_percentile, + scenario=self.scenario, ) if target_latency: if latency_99_percentile > target_latency: self.log.error( - "%s Latency constraint not met, expected=%s, found=%s", - self.path, - target_latency, - latency_99_percentile, + "{path} Latency constraint not met, expected={target_latency}, found={latency_99_percentile}", + path=self.path, + target_latency=target_latency, + latency_99_percentile=latency_99_percentile, ) return False return True @@ -275,19 +275,19 @@ def min_query_count_check(self): self.model, self.scenario) if required_min_query_count and min_query_count < required_min_query_count: self.log.error( - "%s Required minimum Query Count not met by user config, Expected=%s, Found=%s", - self.path, - required_min_query_count, - min_query_count, + "{path} Required minimum Query Count not met by user config, Expected={required_min_query_count}, Found={min_query_count}", + path=self.path, + required_min_query_count=required_min_query_count, + min_query_count=min_query_count, ) return False if self.scenario.lower() == "offline" and ( samples_per_query < OFFLINE_MIN_SPQ_SINCE_V4[self.model]) and self.division.lower() == "closed": self.log.error( - "%s Required minimum samples per query not met by user config, Expected=%s, Found=%s", - self.path, - OFFLINE_MIN_SPQ_SINCE_V4[self.model], - samples_per_query, + "{path} Required minimum samples per query not met by user config, Expected={expected}, Found={found}", + path=self.path, + expected=OFFLINE_MIN_SPQ_SINCE_V4[self.model], + found=samples_per_query, ) return False return True @@ -305,10 +305,10 @@ def min_duration_check(self): min_duration = self.mlperf_log["effective_min_duration_ms"] if min_duration < required_min_duration: self.log.error( - "%s Test duration less than 600s in user config. expected=%s, found=%s", - self.path, - required_min_duration, - min_duration, + "{path} Test duration less than 600s in user config. expected={required_min_duration}, found={min_duration}", + path=self.path, + required_min_duration=required_min_duration, + min_duration=min_duration, ) return False return True @@ -324,8 +324,8 @@ def network_check(self): """ if self.system_json is None: self.log.error( - "%s system json file not found", - self.path + "{path} system json file not found", + path=self.path ) return False is_network_mode_sys_spec_str = self.system_json.get( @@ -412,11 +412,11 @@ def inferred_check(self): self.scenario.lower(), self.scenario_fixed.lower()) != ("server", "interactive"): if "edge" not in self.system_json["system_type"].lower(): self.log.error( - "Result can not be inferred for %s suite for: %s. Scenario: %s, Scenario fixed: %s", - self.system_json["system_type"], - self.path, - self.scenario, - self.scenario_fixed) + "Result can not be inferred for {system_type} suite for: {path}. Scenario: {scenario}, Scenario fixed: {scenario_fixed}", + system_type=self.system_json["system_type"], + path=self.path, + scenario=self.scenario, + scenario_fixed=self.scenario_fixed) return False list_inferred = [ ("singlestream", "multistream"), @@ -426,10 +426,10 @@ def inferred_check(self): if (self.scenario.lower(), self.scenario_fixed.lower() ) not in list_inferred: self.log.error( - "Result for scenario %s can not be inferred from %s for: %s", - self.scenario_fixed, - self.scenario, - self.path) + "Result for scenario {scenario} can not be inferred from {scenario_fixed} for: {path}", + scenario_fixed=self.scenario_fixed, + scenario=self.scenario, + path=self.path) return False return True diff --git a/tools/submission/submission_checker/checks/power/power_checker.py b/tools/submission/submission_checker/checks/power/power_checker.py index bf6835133b..bfdefe934a 100755 --- a/tools/submission/submission_checker/checks/power/power_checker.py +++ b/tools/submission/submission_checker/checks/power/power_checker.py @@ -24,12 +24,9 @@ import re import traceback import uuid -import logging +from loguru import logger as log -logging.basicConfig(level=logging.INFO) -log = logging.getLogger("main") - class LineWithoutTimeStamp(Exception): pass diff --git a/tools/submission/submission_checker/checks/power_check.py b/tools/submission/submission_checker/checks/power_check.py index d3519a3503..9ec5080520 100644 --- a/tools/submission/submission_checker/checks/power_check.py +++ b/tools/submission/submission_checker/checks/power_check.py @@ -81,7 +81,7 @@ def required_files_check(self): if not self.has_power: return True - self.log.info("Checking necessary power files for %s", self.path) + self.log.info("Checking necessary power files for {path}", path=self.path) is_valid = True required_files = REQUIRED_PERF_FILES + REQUIRED_PERF_POWER_FILES diff = files_diff( @@ -90,9 +90,9 @@ def required_files_check(self): OPTIONAL_PERF_FILES) if diff: self.log.error( - "%s has file list mismatch (%s)", - self.testing_path, - diff) + "{path} has file list mismatch ({diff})", + path=self.testing_path, + diff=diff) is_valid = False diff = files_diff( list_files(self.ranging_path), @@ -100,16 +100,16 @@ def required_files_check(self): OPTIONAL_PERF_FILES) if diff: self.log.error( - "%s has file list mismatch (%s)", - self.ranging_path, - diff) + "{path} has file list mismatch ({diff})", + path=self.ranging_path, + diff=diff) is_valid = False diff = files_diff(list_files(self.power_path), REQUIRED_POWER_FILES) if diff: self.log.error( - "%s has file list mismatch (%s)", - self.power_path, - diff) + "{path} has file list mismatch ({diff})", + path=self.power_path, + diff=diff) is_valid = False return is_valid @@ -124,7 +124,7 @@ def external_power_check(self): False otherwise. """ if not self.config.skip_power_check and self.has_power: - self.log.info("Running external power checks for %s", self.path) + self.log.info("Running external power checks for {path}", path=self.path) python_version_major = int(sys.version.split(" ")[0].split(".")[0]) python_version_minor = int(sys.version.split(" ")[0].split(".")[1]) assert python_version_major == 3 and python_version_minor >= 7, ( @@ -136,8 +136,8 @@ def external_power_check(self): sys.stderr.flush() if check_power_result != 0: self.log.error( - "Power WG power_checker.py did not pass for: %s", - perf_path) + "Power WG power_checker.py did not pass for: {path}", + path=perf_path) return False return True @@ -192,10 +192,10 @@ def get_power_metric_check(self): if len(power_list) == 0: self.log.error( - "%s has no power samples falling in power range: %s - %s", - spl_fname, - power_begin, - power_end, + "{spl_fname} has no power samples falling in power range: {power_begin} - {power_end}", + spl_fname=spl_fname, + power_begin=power_begin, + power_end=power_end, ) is_valid = False else: diff --git a/tools/submission/submission_checker/checks/system_check.py b/tools/submission/submission_checker/checks/system_check.py index 54746c0408..dba1e65f5c 100644 --- a/tools/submission/submission_checker/checks/system_check.py +++ b/tools/submission/submission_checker/checks/system_check.py @@ -67,8 +67,8 @@ def missing_check(self): """ if self.system_json is None: self.log.error( - "%s system json file not found", - self.path + "{path} system json file not found", + path=self.path ) return False return True @@ -85,7 +85,7 @@ def availability_check(self): availability = self.system_json.get("status").lower() if availability not in VALID_AVAILABILITIES: self.log.error( - "%s has invalid status (%s)", self.path, availability + "{path} has invalid status ({availability})", path=self.path, availability=availability ) return False return True @@ -105,9 +105,9 @@ def system_type_check(self): if system_type not in valid_system_types: self.log.error( - "%s has invalid system type (%s)", - self.path, - system_type, + "{path} has invalid system type ({system_type})", + path=self.path, + system_type=system_type, ) return False # Maybe add this line if needed @@ -161,7 +161,7 @@ def required_fields_check(self): for k in required_fields: if k not in self.system_json: is_valid = False - self.log.error("%s, field %s is missing", self.path, k) + self.log.error("{path}, field {k} is missing", path=self.path, k=k) elif ( check_empty_fields and k in SYSTEM_DESC_MEANINGFUL_RESPONSE_REQUIRED_FIELDS @@ -169,7 +169,7 @@ def required_fields_check(self): ): is_valid = False self.log.error( - "%s, field %s requires a meaningful response but is empty", self.path, k + "{path}, field {k} requires a meaningful response but is empty", path=self.path, k=k ) elif ( check_empty_fields @@ -177,7 +177,7 @@ def required_fields_check(self): and not is_number(str(self.system_json[k])) ): self.log.error( - "%s, field %s requires a numeric response but is empty", self.path, k + "{path}, field {k} requires a numeric response but is empty", path=self.path, k=k ) return is_valid @@ -192,10 +192,10 @@ def submitter_check(self): """ if self.system_json.get("submitter").lower() != self.submitter.lower(): self.log.error( - "%s has submitter %s, directory has %s", - self.path, - self.system_json.get("submitter"), - self.submitter, + "{path} has submitter {system_json_submitter}, directory has {submitter}", + path=self.path, + system_json_submitter=self.system_json.get("submitter"), + submitter=self.submitter, ) return False return True @@ -211,10 +211,10 @@ def division_check(self): """ if self.system_json.get("division").lower() != self.division.lower(): self.log.error( - "%s has division %s, directory has %s", - self.path, - self.system_json.get("division"), - self.division, + "{path} has division {system_json_division}, directory has {division}", + path=self.path, + system_json_division=self.system_json.get("division"), + division=self.division, ) return False return True diff --git a/tools/submission/submission_checker/loader.py b/tools/submission/submission_checker/loader.py index 53ddf91dc2..f2961028d0 100644 --- a/tools/submission/submission_checker/loader.py +++ b/tools/submission/submission_checker/loader.py @@ -4,14 +4,9 @@ from .parsers.loadgen_parser import LoadgenParser from typing import Generator, Literal from .utils import * -import logging +from loguru import logger import json - -logging.basicConfig( - level=logging.INFO, - format="[%(asctime)s %(filename)s:%(lineno)d %(levelname)s] %(message)s", -) - +import sys class SubmissionLogs: """Container for parsed submission log artifacts and metadata. @@ -64,7 +59,7 @@ def __init__(self, root, version) -> None: """ self.root = root self.version = version - self.logger = logging.getLogger("LoadgenParser") + self.logger = logger self.perf_log_path = os.path.join( self.root, PERFORMANCE_LOG_PATH.get( version, PERFORMANCE_LOG_PATH["default"])) @@ -188,7 +183,7 @@ def load_single_log(self, path, log_type: Literal["Performance", "Accuracy", """ log = None if os.path.exists(path): - self.logger.info("Loading %s log from %s", log_type, path) + self.logger.info("Loading {log_type} log from {path}", log_type=log_type, path=path) if log_type in ["Performance", "Accuracy", "Test"]: log = LoadgenParser(path) elif log_type in ["System", "Measurements"]: @@ -201,14 +196,14 @@ def load_single_log(self, path, log_type: Literal["Performance", "Accuracy", log = path else: self.logger.info( - "Could not load %s log from %s, log type not recognized", - log_type, - path) + "Could not load {log_type} log from {path}, log type not recognized", + log_type=log_type, + path=path) else: self.logger.info( - "Could not load %s log from %s, path does not exist", - log_type, - path) + "Could not load {log_type} log from {path}, path does not exist", + log_type=log_type, + path=path) return log def load(self) -> Generator[SubmissionLogs, None, None]: diff --git a/tools/submission/submission_checker/main.py b/tools/submission/submission_checker/main.py index 56f6a5458f..28791c8720 100644 --- a/tools/submission/submission_checker/main.py +++ b/tools/submission/submission_checker/main.py @@ -1,5 +1,5 @@ import argparse -import logging +from loguru import logger import os import sys @@ -18,8 +18,7 @@ from .checks.power_check import PowerCheck from .results import ResultExporter -logging.basicConfig(level=logging.INFO) -log = logging.getLogger("main") +log = logger def get_args(): @@ -216,12 +215,12 @@ def main(): with_results = 0 for k, v in sorted(results.items()): if v: - log.info("Results %s %s", k, v) + log.info("Results {k} {v}", k=k, v=v) with_results += 1 log.info("---") for k, v in sorted(results.items()): if v is None: - log.error("NoResults %s", k) + log.error("NoResults {k}", k=k) closed_systems = systems.get("closed", {}) open_systems = systems.get("open", {}) @@ -309,48 +308,48 @@ def sum_dict_values(x): # print summary log.info("---") log.info( - "Results=%d, NoResults=%d, Power Results=%d", - with_results, - len(results) - with_results, - count_power_results, + "Results={with_results}, NoResults={no_results}, Power Results={power_results}", + with_results=with_results, + no_results=len(results) - with_results, + power_results=count_power_results, ) log.info("---") log.info( - "Closed Results=%d, Closed Power Results=%d\n", - count_closed_results, - count_closed_power_results, + "Closed Results={count_closed_results}, Closed Power Results={count_closed_power_results}\n", + count_closed_results=count_closed_results, + count_closed_power_results=count_closed_power_results, ) log.info( - "Open Results=%d, Open Power Results=%d\n", - count_open_results, - count_open_power_results, + "Open Results={count_open_results}, Open Power Results={count_open_power_results}\n", + count_open_results=count_open_results, + count_open_power_results=count_open_power_results, ) log.info( - "Network Results=%d, Network Power Results=%d\n", - count_network_results, - count_network_power_results, + "Network Results={count_network_results}, Network Power Results={count_network_power_results}\n", + count_network_results=count_network_results, + count_network_power_results=count_network_power_results, ) log.info("---") log.info( - "Systems=%d, Power Systems=%d", - number_systems, - number_power_systems) + "Systems={number_systems}, Power Systems={number_power_systems}", + number_systems=number_systems, + number_power_systems=number_power_systems) log.info( - "Closed Systems=%d, Closed Power Systems=%d", - number_closed_systems, - number_closed_power_systems, + "Closed Systems={number_closed_systems}, Closed Power Systems={number_closed_power_systems}", + number_closed_systems=number_closed_systems, + number_closed_power_systems=number_closed_power_systems, ) log.info( - "Open Systems=%d, Open Power Systems=%d", - number_open_systems, - number_open_power_systems, + "Open Systems={number_open_systems}, Open Power Systems={number_open_power_systems}", + number_open_systems=number_open_systems, + number_open_power_systems=number_open_power_systems, ) log.info( - "Network Systems=%d, Network Power Systems=%d", - number_network_systems, - number_network_power_systems, + "Network Systems={number_network_systems}, Network Power Systems={number_network_power_systems}", + number_network_systems=number_network_systems, + number_network_power_systems=number_network_power_systems, ) log.info("---") if len(results) != with_results: diff --git a/tools/submission/submission_checker/parsers/loadgen_parser.py b/tools/submission/submission_checker/parsers/loadgen_parser.py index b2812c0b78..0f875d6c35 100644 --- a/tools/submission/submission_checker/parsers/loadgen_parser.py +++ b/tools/submission/submission_checker/parsers/loadgen_parser.py @@ -14,7 +14,7 @@ import argparse import json -import logging +from loguru import logger import sys from .base import BaseParser @@ -32,7 +32,7 @@ def __init__(self, log_path, strict=True): self.endpoints_marker = ":::ENDPTS" self.marker = "" self.log_is_endpoints = False - self.logger = logging.getLogger("MLPerfLog") + self.logger = logger self.messages = {} with open(log_path, "r", encoding="utf-8") as f: for i, line in enumerate(f): diff --git a/tools/submission/submission_checker/utils.py b/tools/submission/submission_checker/utils.py index b2aabbdb87..f14842d32d 100644 --- a/tools/submission/submission_checker/utils.py +++ b/tools/submission/submission_checker/utils.py @@ -195,12 +195,11 @@ def get_inferred_result( def check_compliance_perf_dir(test_dir): is_valid = False - import logging - log = logging.getLogger("main") + from loguru import logger as log fname = os.path.join(test_dir, "verify_performance.txt") if not os.path.exists(fname): - log.error("%s is missing in %s", fname, test_dir) + log.error("{fname} is missing in {test_dir}", fname=fname, test_dir=test_dir) is_valid = False else: with open(fname, "r") as f: @@ -211,13 +210,14 @@ def check_compliance_perf_dir(test_dir): break if is_valid == False: log.error( - "Compliance test performance check in %s failed", - test_dir) + "Compliance test performance check in {test_dir} failed", + test_dir=test_dir + ) # Check performance dir test_perf_path = os.path.join(test_dir, "performance", "run_1") if not os.path.exists(test_perf_path): - log.error("%s has no performance/run_1 directory", test_dir) + log.error("{test_dir} has no performance/run_1 directory", test_dir=test_dir) is_valid = False else: diff = files_diff( @@ -227,9 +227,9 @@ def check_compliance_perf_dir(test_dir): ) if diff: log.error( - "%s has file list mismatch (%s)", - test_perf_path, - diff) + "{test_perf_path} has file list mismatch ({diff})", + test_perf_path=test_perf_path, + diff=diff) is_valid = False return is_valid @@ -238,8 +238,7 @@ def check_compliance_perf_dir(test_dir): def get_power_metric(config, scenario_fixed, log_path, is_valid, res): # parse the power logs import datetime - import logging - log = logging.getLogger("main") + from loguru import logger server_timezone = datetime.timedelta(0) client_timezone = datetime.timedelta(0) @@ -274,10 +273,10 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res): if len(power_list) == 0: log.error( - "%s has no power samples falling in power range: %s - %s", - spl_fname, - power_begin, - power_end, + "{spl_fname} has no power samples falling in power range: {power_begin} - {power_end}", + spl_fname=spl_fname, + power_begin=power_begin, + power_end=power_end, ) is_valid = False else: diff --git a/tools/submission/summary.csv b/tools/submission/summary.csv new file mode 100644 index 0000000000..b85f9e25d0 --- /dev/null +++ b/tools/submission/summary.csv @@ -0,0 +1 @@ +Organization,Availability,Division,SystemType,SystemName,Platform,Model,MlperfModel,Scenario,Result,Accuracy,number_of_nodes,host_processor_model_name,host_processors_per_node,host_processor_core_count,accelerator_model_name,accelerators_per_node,Location,framework,operating_system,notes,compliance,errors,version,inferred,has_power,Units,weight_data_types diff --git a/tools/submission/truncate_accuracy_log.py b/tools/submission/truncate_accuracy_log.py index 87bba5ab98..62c2444253 100755 --- a/tools/submission/truncate_accuracy_log.py +++ b/tools/submission/truncate_accuracy_log.py @@ -8,16 +8,12 @@ import argparse import hashlib -import logging +from loguru import logger as log import os import re import sys import shutil - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger("main") - MAX_ACCURACY_LOG_SIZE = 10 * 1024 VIEWABLE_SIZE = 4096 @@ -147,7 +143,7 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip): log_path = os.path.join(division, submitter, directory) if not os.path.exists(log_path): - log.error("no submission in %s", log_path) + log.error("no submission in {log_path}", log_path=log_path) continue for system_desc in list_dir(log_path): @@ -176,7 +172,7 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip): "TEST") and test != "TEST01": continue if not os.path.exists(acc_log): - log.error("%s missing", acc_log) + log.error("{acc_log} missing", acc_log=acc_log) continue if ( not os.path.exists(acc_txt) @@ -185,13 +181,13 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip): # compliance test directory will not have # an accuracy.txt file by default log.info( - "no accuracy.txt in compliance directory %s", - acc_path, + "no accuracy.txt in compliance directory {acc_path}", + acc_path=acc_path, ) else: if not os.path.exists(acc_txt): log.error( - "%s missing, generate to continue", acc_txt + "{acc_txt} missing, generate to continue", acc_txt=acc_txt ) continue with open(acc_txt, "r", encoding="utf-8") as f: @@ -204,8 +200,8 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip): size = os.stat(acc_log).st_size if hash_val and size < MAX_ACCURACY_LOG_SIZE: log.info( - "%s already has hash and size seems truncated", - acc_path, + "{acc_path} already has hash and size seems truncated", + acc_path=acc_path, ) continue @@ -221,9 +217,9 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip): ) if os.path.exists(dst): log.error( - "not processing %s because %s already exist", - acc_log, - dst, + "not processing {acc_log} because {dst} already exist", + acc_log=acc_log, + dst=dst, ) continue shutil.copy(acc_log, dst) @@ -233,7 +229,7 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip): with open(acc_txt, "a", encoding="utf-8") as f: f.write("\nhash={0}\n".format(hash_val)) truncate_file(acc_log) - log.info("%s truncated", acc_log) + log.info("{acc_log} truncated", acc_log=acc_log) # No need to iterate on compliance test # subdirectories in the results folder @@ -266,8 +262,8 @@ def main(): backup_location = args.output or args.backup log.info( - "Make sure you keep a backup of %s in case mlperf wants to see the original accuracy logs", - backup_location, + "Make sure you keep a backup of {backup_location} in case mlperf wants to see the original accuracy logs", + backup_location=backup_location, ) return 0 From 985c4360a6821efeafdbaad1ee7c767a5708eeb0 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Mon, 23 Feb 2026 12:16:45 -0500 Subject: [PATCH 06/11] Update logging format --- tools/submission/submission_checker/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/submission/submission_checker/main.py b/tools/submission/submission_checker/main.py index 28791c8720..f5f3a881b0 100644 --- a/tools/submission/submission_checker/main.py +++ b/tools/submission/submission_checker/main.py @@ -19,7 +19,8 @@ from .results import ResultExporter log = logger - +log.remove() +log.add(sys.stderr, format="[{time} {file.name}:{line} {level}] {message}") def get_args(): """Parse command-line arguments for the submission checker. From cb95dbd58383a1f396fceee963a88047912a229a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 23 Feb 2026 17:17:28 +0000 Subject: [PATCH 07/11] [Automated Commit] Format Codebase --- tools/submission/submission_checker/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/submission/submission_checker/main.py b/tools/submission/submission_checker/main.py index f5f3a881b0..1f59d9b8e7 100644 --- a/tools/submission/submission_checker/main.py +++ b/tools/submission/submission_checker/main.py @@ -20,7 +20,10 @@ log = logger log.remove() -log.add(sys.stderr, format="[{time} {file.name}:{line} {level}] {message}") +log.add( + sys.stderr, + format="[{time} {file.name}:{line} {level}] {message}") + def get_args(): """Parse command-line arguments for the submission checker. From 133d79cc7a7188112ccd1ede9a3aeaa20b4e9501 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 23 Feb 2026 17:22:30 +0000 Subject: [PATCH 08/11] [Automated Commit] Format Codebase --- text_to_video/wan-2.2-t2v-a14b/run_mlperf.py | 28 +++++++++++++------ tools/submission/generate_final_report.py | 6 ++-- .../checks/performance_check.py | 23 ++++++++------- tools/submission/submission_checker/loader.py | 6 +++- tools/submission/submission_checker/utils.py | 15 +++++++--- tools/submission/truncate_accuracy_log.py | 6 ++-- 6 files changed, 56 insertions(+), 28 deletions(-) diff --git a/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py b/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py index 147624b340..ab73c25966 100644 --- a/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py +++ b/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py @@ -46,7 +46,8 @@ def load_prompts(dataset_path): class Model: - def __init__(self, model_path, device, config, prompts, fixed_latent=None, rank=0): + def __init__(self, model_path, device, config, + prompts, fixed_latent=None, rank=0): self.device = device self.rank = rank self.height = config["height"] @@ -106,7 +107,8 @@ def flush_queries(self): class DebugModel: - def __init__(self, model_path, device, config, prompts, fixed_latent=None, rank=0): + def __init__(self, model_path, device, config, + prompts, fixed_latent=None, rank=0): self.prompts = prompts def issue_queries(self, query_samples): @@ -186,7 +188,8 @@ def get_args(): parser.add_argument( "--scenario", default="SingleStream", - help="mlperf benchmark scenario, one of " + str(list(SCENARIO_MAP.keys())), + help="mlperf benchmark scenario, one of " + + str(list(SCENARIO_MAP.keys())), ) parser.add_argument( "--user_conf", @@ -202,7 +205,10 @@ def get_args(): help="performance sample count", default=5000, ) - parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass") + parser.add_argument( + "--accuracy", + action="store_true", + help="enable accuracy pass") # Dont overwrite these for official submission parser.add_argument("--count", type=int, help="dataset items to use") parser.add_argument("--time", type=int, help="time to scan in seconds") @@ -271,7 +277,10 @@ def run_mlperf(args, config): audit_config = os.path.abspath(args.audit_conf) if os.path.exists(audit_config): - settings.FromConfig(audit_config, "wan-2.2-t2v-a14b", args.scenario) + settings.FromConfig( + audit_config, + "wan-2.2-t2v-a14b", + args.scenario) settings.scenario = SCENARIO_MAP[args.scenario] settings.mode = lg.TestMode.PerformanceOnly @@ -297,8 +306,10 @@ def run_mlperf(args, config): if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: - settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) - settings.multi_stream_expected_latency_ns = int(args.max_latency * NANO_SEC) + settings.server_target_latency_ns = int( + args.max_latency * NANO_SEC) + settings.multi_stream_expected_latency_ns = int( + args.max_latency * NANO_SEC) performance_sample_count = ( args.performance_sample_count @@ -311,7 +322,8 @@ def run_mlperf(args, config): count, performance_sample_count, load_query_samples, unload_query_samples ) - lg.StartTestWithLogSettings(sut, qsl, settings, log_settings, audit_config) + lg.StartTestWithLogSettings( + sut, qsl, settings, log_settings, audit_config) lg.DestroyQSL(qsl) lg.DestroySUT(sut) diff --git a/tools/submission/generate_final_report.py b/tools/submission/generate_final_report.py index f0cf069924..4d949bb6fb 100644 --- a/tools/submission/generate_final_report.py +++ b/tools/submission/generate_final_report.py @@ -101,11 +101,11 @@ def main(): "singlestream": "SingleStream", "multistream": "MultiStream", "server": "Server", - "interactive":"Interactive", + "interactive": "Interactive", "offline": "Offline", } - df["Scenario"] = df["Scenario"].apply(lambda x: scenario_map.get(str(x).lower(), x)) - + df["Scenario"] = df["Scenario"].apply( + lambda x: scenario_map.get(str(x).lower(), x)) output = args.input[:-4] writer = pd.ExcelWriter(output + ".xlsx", engine="xlsxwriter") diff --git a/tools/submission/submission_checker/checks/performance_check.py b/tools/submission/submission_checker/checks/performance_check.py index 4e213641a5..920b1b485b 100644 --- a/tools/submission/submission_checker/checks/performance_check.py +++ b/tools/submission/submission_checker/checks/performance_check.py @@ -85,13 +85,15 @@ def missing_check(self): self.log.error("Performance log missing at {path}", path=self.path) return False return True - + def scenarios_check(self): if self.submission_logs.loader_data.get("check_scenarios", False): return True else: - missing_scenarios = self.submission_logs.loader_data.get("missing_scenarios", []) - unknown_scenarios = self.submission_logs.loader_data.get("unknown_scenarios", []) + missing_scenarios = self.submission_logs.loader_data.get( + "missing_scenarios", []) + unknown_scenarios = self.submission_logs.loader_data.get( + "unknown_scenarios", []) if len(missing_scenarios) > 0: self.log.error( "%s does not have all required scenarios, missing %s", @@ -116,7 +118,8 @@ def loadgen_errors_check(self): bool: True if no blocking Loadgen errors are present, False otherwise. """ - compliance_skip = self.submission_logs.loader_data.get("compliance_skip", False) + compliance_skip = self.submission_logs.loader_data.get( + "compliance_skip", False) if self.mlperf_log.has_error(): has_critical_errors = False if self.config.ignore_uncommited: @@ -126,13 +129,13 @@ def loadgen_errors_check(self): self.log.error("{path} contains errors:", path=self.path) for error in self.mlperf_log.get_errors(): self.log.error("{error}", path=self.path, error=error["value"]) - if ( + if ( "Loadgen built with uncommitted changes!" not in error["value"] and ("Multiple conf files are used" not in error["value"]) ): has_critical_errors = True if ( - not compliance_skip + not compliance_skip and "Multiple conf files are used" in error["value"] ): has_critical_errors = True @@ -459,7 +462,7 @@ def inferred_check(self): ("singlestream", "offline") ] if (self.scenario.lower(), self.scenario_fixed.lower() - ) not in list_inferred: + ) not in list_inferred: self.log.error( "Result for scenario {scenario} can not be inferred from {scenario_fixed} for: {path}", scenario_fixed=self.scenario_fixed, @@ -548,12 +551,12 @@ def get_inferred_result(self, res): res = qps_wo_loadgen_overhead if (self.scenario_fixed in ["Offline"] - ) and self.scenario in ["MultiStream"]: + ) and self.scenario in ["MultiStream"]: inferred = True res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS) if (self.scenario_fixed in ["MultiStream"] - ) and self.scenario in ["SingleStream"]: + ) and self.scenario in ["SingleStream"]: inferred = True # samples_per_query does not match with the one reported in the logs # when inferring MultiStream from SingleStream @@ -570,6 +573,6 @@ def get_inferred_result(self, res): else: res = (latency_99_percentile * samples_per_query) / MS_TO_NS if (self.scenario_fixed in ["Interactive"] - ) and self.scenario not in ["Server"]: + ) and self.scenario not in ["Server"]: is_valid = False return res, is_valid diff --git a/tools/submission/submission_checker/loader.py b/tools/submission/submission_checker/loader.py index 51d6cac88a..94ed1ddd6f 100644 --- a/tools/submission/submission_checker/loader.py +++ b/tools/submission/submission_checker/loader.py @@ -9,6 +9,7 @@ import json import sys + class SubmissionLogs: """Container for parsed submission log artifacts and metadata. @@ -186,7 +187,10 @@ def load_single_log(self, path, log_type: Literal["Performance", "Accuracy", """ log = None if os.path.exists(path): - self.logger.info("Loading {log_type} log from {path}", log_type=log_type, path=path) + self.logger.info( + "Loading {log_type} log from {path}", + log_type=log_type, + path=path) if log_type in ["Performance", "Accuracy", "Test"]: log = LoadgenParser(path) elif log_type in ["System", "Measurements"]: diff --git a/tools/submission/submission_checker/utils.py b/tools/submission/submission_checker/utils.py index af6c0caf87..dabc430e20 100644 --- a/tools/submission/submission_checker/utils.py +++ b/tools/submission/submission_checker/utils.py @@ -107,17 +107,19 @@ def is_number(s): return True except ValueError: return False - + + def lower_list(l): return [str(e).lower() for e in l] + def contains_list(l1, l2): # Check if l1 contains all elements of l2 missing = [] for e in l2: if e not in l1: missing.append(e) - return missing, len(missing) == 0 + return missing, len(missing) == 0 def get_performance_metric( @@ -210,7 +212,10 @@ def check_compliance_perf_dir(test_dir): fname = os.path.join(test_dir, "verify_performance.txt") if not os.path.exists(fname): - log.error("{fname} is missing in {test_dir}", fname=fname, test_dir=test_dir) + log.error( + "{fname} is missing in {test_dir}", + fname=fname, + test_dir=test_dir) is_valid = False else: with open(fname, "r") as f: @@ -228,7 +233,9 @@ def check_compliance_perf_dir(test_dir): # Check performance dir test_perf_path = os.path.join(test_dir, "performance", "run_1") if not os.path.exists(test_perf_path): - log.error("{test_dir} has no performance/run_1 directory", test_dir=test_dir) + log.error( + "{test_dir} has no performance/run_1 directory", + test_dir=test_dir) is_valid = False else: diff = files_diff( diff --git a/tools/submission/truncate_accuracy_log.py b/tools/submission/truncate_accuracy_log.py index 2dcc45204f..d203ebed59 100755 --- a/tools/submission/truncate_accuracy_log.py +++ b/tools/submission/truncate_accuracy_log.py @@ -181,7 +181,8 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip): acc_txt = os.path.join( acc_path, "accuracy.txt") if not os.path.exists(acc_log): - log.error("{acc_log} missing", acc_log=acc_log) + log.error( + "{acc_log} missing", acc_log=acc_log) continue # TEST07 and TEST09 don't have accuracy.txt, @@ -252,7 +253,8 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip): f.write( "\nhash={0}\n".format(hash_val)) truncate_file(acc_log) - log.info("{acc_log} truncated", acc_log=acc_log) + log.info( + "{acc_log} truncated", acc_log=acc_log) def main(): From 3d2811781b2683076b9523655379d871e7c03c03 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Mon, 23 Feb 2026 12:45:05 -0500 Subject: [PATCH 09/11] Fix merge conflict breaking change --- .../checks/performance_check.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/submission/submission_checker/checks/performance_check.py b/tools/submission/submission_checker/checks/performance_check.py index 920b1b485b..9ca494d349 100644 --- a/tools/submission/submission_checker/checks/performance_check.py +++ b/tools/submission/submission_checker/checks/performance_check.py @@ -129,16 +129,16 @@ def loadgen_errors_check(self): self.log.error("{path} contains errors:", path=self.path) for error in self.mlperf_log.get_errors(): self.log.error("{error}", path=self.path, error=error["value"]) - if ( - "Loadgen built with uncommitted changes!" not in error["value"] - and ("Multiple conf files are used" not in error["value"]) - ): - has_critical_errors = True - if ( - not compliance_skip - and "Multiple conf files are used" in error["value"] - ): - has_critical_errors = True + if ( + "Loadgen built with uncommitted changes!" not in error["value"] + and ("Multiple conf files are used" not in error["value"]) + ): + has_critical_errors = True + if ( + not compliance_skip + and "Multiple conf files are used" in error["value"] + ): + has_critical_errors = True if has_critical_errors: self.log.error("%s contains errors:", self.path) From 938a0c41efd3541037fac6f4bf5be2a1723580ad Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 15 Apr 2026 17:25:48 +0000 Subject: [PATCH 10/11] [Automated Commit] Format Codebase --- .../submission_checker/checks/accuracy_check.py | 12 +++++++++--- tools/submission/submission_checker/constants.py | 8 ++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/submission/submission_checker/checks/accuracy_check.py b/tools/submission/submission_checker/checks/accuracy_check.py index 35592b9907..cac6b6fc7f 100644 --- a/tools/submission/submission_checker/checks/accuracy_check.py +++ b/tools/submission/submission_checker/checks/accuracy_check.py @@ -159,7 +159,9 @@ def accuracy_result_check(self): if acc_upper_limit is not None: is_valid &= acc_limit_check if not hash_val: - self.log.error("{path} not hash value for accuracy.txt", path=self.path) + self.log.error( + "{path} not hash value for accuracy.txt", + path=self.path) is_valid = False self.submission_logs.loader_data["accuracy_metrics"] = result_acc if self.division.lower() == "open": @@ -174,11 +176,15 @@ def accuracy_json_check(self): exceed `MAX_ACCURACY_LOG_SIZE`, False otherwise. """ if not os.path.exists(self.accuracy_json): - self.log.error("{accuracy_json} is missing", accuracy_json=self.accuracy_json) + self.log.error( + "{accuracy_json} is missing", + accuracy_json=self.accuracy_json) return False else: if os.stat(self.accuracy_json).st_size > MAX_ACCURACY_LOG_SIZE: - self.log.error("{accuracy_json} is not truncated", accuracy_json=self.accuracy_json) + self.log.error( + "{accuracy_json} is not truncated", + accuracy_json=self.accuracy_json) return False return True diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index dc45cd83d2..2f4abd87f8 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -1132,12 +1132,12 @@ "84", "59", "12", - "31", + "31", "86", - "122", - "233", + "122", + "233", "96", - ] + ] }, } } From f64a35adb56413832fd7e428f8348749acfbf3de Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Wed, 15 Apr 2026 23:01:09 +0530 Subject: [PATCH 11/11] trigger gh actions --- .github/workflows/format.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index cc5113a052..bd71bbacf5 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -1,4 +1,4 @@ -# Automatic code formatting +# Automatic code formatting. name: "Code formatting" on: push: