From 0db31b6cc40fc11579b1a4533be50e022df00691 Mon Sep 17 00:00:00 2001 From: Kaitlyn Davis Date: Thu, 12 Feb 2026 08:43:02 -0800 Subject: [PATCH 1/3] INFRA: Add s390x rsync+battery scripts Introduce helper scripts to rsync the repo to an s390x host and run a repeatable test battery there. README changes are intentionally excluded from this PR per repo policy. Signed-off-by: Kaitlyn Davis Signed-off-by: Kaitlyn Davis --- scripts/rsync-zdnn.exclude | 31 +++++++ scripts/s390x_run_battery.sh | 157 ++++++++++++++++++++++++++++++++++ scripts/s390x_sync_and_run.sh | 107 +++++++++++++++++++++++ 3 files changed, 295 insertions(+) create mode 100644 scripts/rsync-zdnn.exclude create mode 100755 scripts/s390x_run_battery.sh create mode 100755 scripts/s390x_sync_and_run.sh diff --git a/scripts/rsync-zdnn.exclude b/scripts/rsync-zdnn.exclude new file mode 100644 index 0000000..9c5e67f --- /dev/null +++ b/scripts/rsync-zdnn.exclude @@ -0,0 +1,31 @@ +# VCS and local metadata +.git/ + +# Local build artifacts +artifacts/ +autom4te.cache/ +config.log +config.status +config.make +config.h + +# In-tree object/output dirs used by zDNN builds +zdnn/obj/ +zdnn/lib/ +zdnn/lib32/ +zdnn/lib64/ +tests/obj/ +tests/bin/ + +# Generic binary artifacts +*.o +*.so +*.a +*.dylib +*.dll +*.exe +*.gcno +*.gcda +*.profraw +*.profdata +*.dSYM/ diff --git a/scripts/s390x_run_battery.sh b/scripts/s390x_run_battery.sh new file mode 100755 index 0000000..c11f083 --- /dev/null +++ b/scripts/s390x_run_battery.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash +set -u +set -o pipefail + +usage() { + cat < + --jobs N Parallel jobs for make. Default: detected CPU count. + --no-autoreconf Skip autoreconf step. + --skip-distclean Skip make distclean before configure. +USAGE +} + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +JOBS="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 4)" +DO_AUTORECONF=1 +DO_DISTCLEAN=1 +RESULTS_DIR="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --results-dir) + RESULTS_DIR="$2" + shift 2 + ;; + --jobs) + JOBS="$2" + shift 2 + ;; + --no-autoreconf) + DO_AUTORECONF=0 + shift + ;; + --skip-distclean) + DO_DISTCLEAN=0 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +if ! [[ "$JOBS" =~ ^[0-9]+$ ]] || [[ "$JOBS" -lt 1 ]]; then + echo "--jobs must be a positive integer" >&2 + exit 2 +fi + +if [[ -z "$RESULTS_DIR" ]]; then + RESULTS_DIR="$ROOT_DIR/artifacts/s390x-battery-$(date -u '+%Y%m%dT%H%M%SZ')" +fi + +mkdir -p "$RESULTS_DIR" +SUMMARY="$RESULTS_DIR/summary.txt" +FAILURES=0 + +run_step() { + local step="$1" + shift + local log="$RESULTS_DIR/${step}.log" + + { + echo "" + echo "=== STEP: ${step} ===" + echo "CMD: $*" + echo "UTC: $(date -u '+%Y-%m-%dT%H:%M:%SZ')" + } | tee -a "$SUMMARY" > "$log" + + if "$@" >> "$log" 2>&1; then + echo "RESULT: PASS (${step})" | tee -a "$SUMMARY" + else + local rc=$? + echo "RESULT: FAIL (${step}) rc=${rc}" | tee -a "$SUMMARY" + FAILURES=$((FAILURES + 1)) + fi +} + +run_step env_info bash -lc ' + set -e + echo "PWD: $(pwd)" + uname -a + echo + lscpu || true + echo + gcc --version || true + g++ --version || true + make --version | head -n 1 || true + autoreconf --version | head -n 1 || true + python3 --version || true + git rev-parse --short HEAD || true + git status -sb || true +' + +if [[ "$DO_DISTCLEAN" -eq 1 ]]; then + run_step distclean bash -lc ' + if [[ -f config.make ]]; then + make distclean + else + echo "config.make not present; skipping distclean" + fi + ' +fi + +if [[ "$DO_AUTORECONF" -eq 1 ]]; then + run_step autoreconf autoreconf . +fi + +run_step configure ./configure +run_step build make -j"$JOBS" all + +# Battery run #1: baseline runtime env. +run_step test_default make -j"$JOBS" test + +# Battery run #2: runtime precheck enabled. +run_step test_precheck env ZDNN_ENABLE_PRECHECK=true make -j"$JOBS" test + +# Battery run #3: precheck + full status diagnostics. +run_step test_precheck_statusdiag env ZDNN_ENABLE_PRECHECK=true ZDNN_STATUS_DIAG=0xFFFFFFFF make -j"$JOBS" test + +run_step collect_artifacts bash -lc ' + mkdir -p "'$RESULTS_DIR'/config" + cp -f config.log config.status config.make config.h "'$RESULTS_DIR'/config" 2>/dev/null || true + + if compgen -G "tests/bin/*.txt" > /dev/null; then + tar -czf "'$RESULTS_DIR'/test-bin-logs.tgz" tests/bin/*.txt + ls -1 tests/bin/*.txt > "'$RESULTS_DIR'/test-bin-files.txt" + else + echo "No tests/bin/*.txt files found" > "'$RESULTS_DIR'/test-bin-files.txt" + fi +' + +{ + echo "" + echo "Artifacts: $RESULTS_DIR" + echo "Failures: $FAILURES" +} | tee -a "$SUMMARY" + +if [[ "$FAILURES" -ne 0 ]]; then + exit 1 +fi + +exit 0 diff --git a/scripts/s390x_sync_and_run.sh b/scripts/s390x_sync_and_run.sh new file mode 100755 index 0000000..6e3cdd7 --- /dev/null +++ b/scripts/s390x_sync_and_run.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat < [--jobs N] [--run-id ID] [--no-delete] + +Rsync the current repo to an s390x host, run the remote build/test battery, +and sync result artifacts back locally. + +Arguments: + user@host SSH target + remote_dir Remote path where the repo copy will be staged + +Options: + --jobs N Parallel make jobs for remote battery script + --run-id ID Explicit artifact run id (default: UTC timestamp) + --no-delete Disable rsync --delete during sync +USAGE +} + +if [[ $# -lt 2 ]]; then + usage >&2 + exit 2 +fi + +TARGET_HOST="$1" +REMOTE_DIR="$2" +shift 2 + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +EXCLUDE_FILE="$ROOT_DIR/scripts/rsync-zdnn.exclude" +if [[ ! -f "$EXCLUDE_FILE" ]]; then + echo "Exclude file not found: $EXCLUDE_FILE" >&2 + exit 2 +fi + +RUN_ID="s390x-battery-$(date -u '+%Y%m%dT%H%M%SZ')" +JOBS="" +RSYNC_DELETE=1 + +while [[ $# -gt 0 ]]; do + case "$1" in + --jobs) + JOBS="$2" + shift 2 + ;; + --run-id) + RUN_ID="$2" + shift 2 + ;; + --no-delete) + RSYNC_DELETE=0 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +if [[ -n "$JOBS" ]] && { ! [[ "$JOBS" =~ ^[0-9]+$ ]] || [[ "$JOBS" -lt 1 ]]; }; then + echo "--jobs must be a positive integer" >&2 + exit 2 +fi + +REMOTE_RESULTS_DIR="$REMOTE_DIR/artifacts/$RUN_ID" +LOCAL_RESULTS_ROOT="$ROOT_DIR/artifacts" +LOCAL_RESULTS_DIR="$LOCAL_RESULTS_ROOT/$RUN_ID" +mkdir -p "$LOCAL_RESULTS_ROOT" + +RSYNC_FLAGS=(-az --exclude-from="$EXCLUDE_FILE") +if [[ "$RSYNC_DELETE" -eq 1 ]]; then + RSYNC_FLAGS+=(--delete) +fi + +echo "[1/4] Ensuring remote directory exists: $TARGET_HOST:$REMOTE_DIR" +ssh "$TARGET_HOST" "mkdir -p '$REMOTE_DIR'" + +echo "[2/4] Rsyncing source to remote" +rsync "${RSYNC_FLAGS[@]}" "$ROOT_DIR/" "$TARGET_HOST:$REMOTE_DIR/" + +echo "[3/4] Running remote battery" +REMOTE_RC=0 +REMOTE_CMD="cd '$REMOTE_DIR' && chmod +x scripts/s390x_run_battery.sh && scripts/s390x_run_battery.sh --results-dir '$REMOTE_RESULTS_DIR'" +if [[ -n "$JOBS" ]]; then + REMOTE_CMD+=" --jobs '$JOBS'" +fi +if ssh "$TARGET_HOST" "$REMOTE_CMD"; then + REMOTE_RC=0 +else + REMOTE_RC=$? +fi + +echo "[4/4] Syncing remote results back to local artifacts" +mkdir -p "$LOCAL_RESULTS_DIR" +rsync -az "$TARGET_HOST:$REMOTE_RESULTS_DIR/" "$LOCAL_RESULTS_DIR/" || true + +echo "Local results: $LOCAL_RESULTS_DIR" + +exit "$REMOTE_RC" From 481e969e183b9038b2457f6d762da8290201b060 Mon Sep 17 00:00:00 2001 From: Kaitlyn Davis Date: Thu, 12 Feb 2026 08:43:10 -0800 Subject: [PATCH 2/3] INFRA: Pass battery flags through sync wrapper Allow s390x sync-and-run wrapper to forward battery configuration flags so CI/local runs can tune coverage without editing scripts. Signed-off-by: Kaitlyn Davis Signed-off-by: Kaitlyn Davis --- scripts/s390x_sync_and_run.sh | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/scripts/s390x_sync_and_run.sh b/scripts/s390x_sync_and_run.sh index 6e3cdd7..e99b242 100755 --- a/scripts/s390x_sync_and_run.sh +++ b/scripts/s390x_sync_and_run.sh @@ -3,7 +3,7 @@ set -euo pipefail usage() { cat < [--jobs N] [--run-id ID] [--no-delete] +Usage: $(basename "$0") [--jobs N] [--run-id ID] [--no-delete] [--no-autoreconf] [--skip-distclean] Rsync the current repo to an s390x host, run the remote build/test battery, and sync result artifacts back locally. @@ -16,6 +16,8 @@ Options: --jobs N Parallel make jobs for remote battery script --run-id ID Explicit artifact run id (default: UTC timestamp) --no-delete Disable rsync --delete during sync + --no-autoreconf Pass --no-autoreconf to remote battery script + --skip-distclean Pass --skip-distclean to remote battery script USAGE } @@ -38,6 +40,8 @@ fi RUN_ID="s390x-battery-$(date -u '+%Y%m%dT%H%M%SZ')" JOBS="" RSYNC_DELETE=1 +REMOTE_NO_AUTORECONF=0 +REMOTE_SKIP_DISTCLEAN=0 while [[ $# -gt 0 ]]; do case "$1" in @@ -53,6 +57,14 @@ while [[ $# -gt 0 ]]; do RSYNC_DELETE=0 shift ;; + --no-autoreconf) + REMOTE_NO_AUTORECONF=1 + shift + ;; + --skip-distclean) + REMOTE_SKIP_DISTCLEAN=1 + shift + ;; -h|--help) usage exit 0 @@ -92,6 +104,12 @@ REMOTE_CMD="cd '$REMOTE_DIR' && chmod +x scripts/s390x_run_battery.sh && scripts if [[ -n "$JOBS" ]]; then REMOTE_CMD+=" --jobs '$JOBS'" fi +if [[ "$REMOTE_NO_AUTORECONF" -eq 1 ]]; then + REMOTE_CMD+=" --no-autoreconf" +fi +if [[ "$REMOTE_SKIP_DISTCLEAN" -eq 1 ]]; then + REMOTE_CMD+=" --skip-distclean" +fi if ssh "$TARGET_HOST" "$REMOTE_CMD"; then REMOTE_RC=0 else From 2c3cd72e4e68cac47fe47789b6c21832c5a81f8a Mon Sep 17 00:00:00 2001 From: Kaitlyn Davis Date: Thu, 12 Feb 2026 08:44:15 -0800 Subject: [PATCH 3/3] INFRA: Add s390x perf regression battery + CI workflow Add a GitHub Actions workflow plus scripts/env thresholds to automate s390x performance regression checks. README changes are intentionally excluded from this PR per repo policy. Signed-off-by: Kaitlyn Davis Signed-off-by: Kaitlyn Davis --- .github/workflows/s390x-perf-regression.yml | 51 +++++ scripts/s390x_perf_regression.sh | 240 ++++++++++++++++++++ scripts/s390x_perf_thresholds.env | 14 ++ scripts/s390x_run_battery.sh | 43 +++- scripts/s390x_sync_and_run.sh | 37 ++- 5 files changed, 382 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/s390x-perf-regression.yml create mode 100755 scripts/s390x_perf_regression.sh create mode 100644 scripts/s390x_perf_thresholds.env diff --git a/.github/workflows/s390x-perf-regression.yml b/.github/workflows/s390x-perf-regression.yml new file mode 100644 index 0000000..0de6e6c --- /dev/null +++ b/.github/workflows/s390x-perf-regression.yml @@ -0,0 +1,51 @@ +name: s390x Perf Regression + +on: + workflow_dispatch: + inputs: + jobs: + description: Parallel jobs for make build/test + required: false + default: "8" + perf_iterations: + description: Iterations per perf benchmark + required: false + default: "5" + schedule: + - cron: "0 6 * * *" + +jobs: + s390x-perf-regression: + name: s390x build + test + perf battery + runs-on: [self-hosted, linux, s390x] + timeout-minutes: 360 + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Ensure scripts are executable + run: chmod +x scripts/s390x_run_battery.sh scripts/s390x_perf_regression.sh + + - name: Run battery with perf regression gates + env: + JOBS: ${{ github.event.inputs.jobs || '8' }} + PERF_ITERATIONS: ${{ github.event.inputs.perf_iterations || '5' }} + RESULTS_DIR: artifacts/s390x-ci-${{ github.run_id }}-${{ github.run_attempt }} + run: | + scripts/s390x_run_battery.sh \ + --results-dir "${RESULTS_DIR}" \ + --jobs "${JOBS}" \ + --perf-regression \ + --perf-iterations "${PERF_ITERATIONS}" \ + --perf-thresholds scripts/s390x_perf_thresholds.env + + - name: Upload battery artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: s390x-perf-regression-${{ github.run_id }}-${{ github.run_attempt }} + path: artifacts/s390x-ci-${{ github.run_id }}-${{ github.run_attempt }} + if-no-files-found: warn diff --git a/scripts/s390x_perf_regression.sh b/scripts/s390x_perf_regression.sh new file mode 100755 index 0000000..224b5e1 --- /dev/null +++ b/scripts/s390x_perf_regression.sh @@ -0,0 +1,240 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat < + --iterations N Number of timing iterations per benchmark (default: 5) + --thresholds-file FILE Shell file defining PERF_MAX_SEC_* thresholds. + Default: scripts/s390x_perf_thresholds.env +USAGE +} + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +RESULTS_DIR="" +ITERATIONS=5 +THRESHOLDS_FILE="$ROOT_DIR/scripts/s390x_perf_thresholds.env" + +while [[ $# -gt 0 ]]; do + case "$1" in + --results-dir) + RESULTS_DIR="$2" + shift 2 + ;; + --iterations) + ITERATIONS="$2" + shift 2 + ;; + --thresholds-file) + THRESHOLDS_FILE="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +if ! [[ "$ITERATIONS" =~ ^[0-9]+$ ]] || [[ "$ITERATIONS" -lt 1 ]]; then + echo "--iterations must be a positive integer" >&2 + exit 2 +fi + +if [[ ! -f "$THRESHOLDS_FILE" ]]; then + echo "Threshold file not found: $THRESHOLDS_FILE" >&2 + exit 2 +fi + +if [[ -z "$RESULTS_DIR" ]]; then + RESULTS_DIR="$ROOT_DIR/artifacts/s390x-perf-$(date -u '+%Y%m%dT%H%M%SZ')" +fi +mkdir -p "$RESULTS_DIR" + +SUMMARY="$RESULTS_DIR/perf_summary.txt" +RAW_CSV="$RESULTS_DIR/perf_raw.csv" +REPORT_MD="$RESULTS_DIR/perf_report.md" + +# shellcheck disable=SC1090 +source "$THRESHOLDS_FILE" + +LD_RUNTIME_VAR="LD_LIBRARY_PATH" +S390X_SODIR="lib" + +read_make_var() { + local key="$1" + local file="$2" + awk -v key="$key" ' + $1 == key && ($2 == ":=" || $2 == "=") { + $1 = ""; + $2 = ""; + sub(/^[[:space:]]+/, "", $0); + gsub(/[[:space:]]+$/, "", $0); + print $0; + exit; + } + ' "$file" +} + +if [[ -f "$ROOT_DIR/config.make" ]]; then + maybe_ld_var="$(read_make_var LD_PATH_VAR "$ROOT_DIR/config.make")" + maybe_sodir="$(read_make_var SODIR "$ROOT_DIR/config.make")" + if [[ -n "$maybe_ld_var" ]]; then + LD_RUNTIME_VAR="$maybe_ld_var" + fi + if [[ -n "$maybe_sodir" ]]; then + S390X_SODIR="$maybe_sodir" + fi +fi +LD_RUNTIME_VALUE="$ROOT_DIR/zdnn/$S390X_SODIR" + +declare -a BENCHMARK_NAMES=( + "matmul_op" + "matmul_bcast_op" + "quantized_matmul_op" + "lstm_rnn" +) +declare -a BENCHMARK_CMDS=( + "tests/bin/testDriver_zdnn_matmul_op" + "tests/bin/testDriver_zdnn_matmul_bcast_op" + "tests/bin/testDriver_zdnn_quantized_matmul_op" + "tests/bin/testDriver_zdnn_lstm_rnn" +) +declare -a BENCHMARK_THRESHOLDS=( + "${PERF_MAX_SEC_MATMUL_OP:-40.0}" + "${PERF_MAX_SEC_MATMUL_BCAST_OP:-40.0}" + "${PERF_MAX_SEC_QUANTIZED_MATMUL_OP:-60.0}" + "${PERF_MAX_SEC_LSTM_RNN:-90.0}" +) + +printf "benchmark,iteration,seconds,threshold_seconds\n" > "$RAW_CSV" + +{ + echo "s390x performance regression run" + echo "UTC: $(date -u '+%Y-%m-%dT%H:%M:%SZ')" + echo "Iterations: $ITERATIONS" + echo "Thresholds file: $THRESHOLDS_FILE" + echo "Runtime library path: ${LD_RUNTIME_VAR}=${LD_RUNTIME_VALUE}" + echo "" +} > "$SUMMARY" + +{ + echo "# s390x Performance Regression Report" + echo "" + echo "- UTC: $(date -u '+%Y-%m-%dT%H:%M:%SZ')" + echo "- Iterations: $ITERATIONS" + echo "- Threshold file: \`$THRESHOLDS_FILE\`" + echo "" + echo "| Benchmark | Median (s) | Threshold (s) | Result |" + echo "|---|---:|---:|---|" +} > "$REPORT_MD" + +FAILURES=0 + +for idx in "${!BENCHMARK_NAMES[@]}"; do + name="${BENCHMARK_NAMES[$idx]}" + cmd="${BENCHMARK_CMDS[$idx]}" + threshold="${BENCHMARK_THRESHOLDS[$idx]}" + log_file="$RESULTS_DIR/perf_${name}.log" + + { + echo "=== Benchmark: $name ===" + echo "Command: $cmd" + echo "Threshold (median sec): $threshold" + echo "Iterations: $ITERATIONS" + } | tee -a "$SUMMARY" > "$log_file" + + if [[ ! -x "$cmd" ]]; then + echo "Missing benchmark binary: $cmd" | tee -a "$SUMMARY" >> "$log_file" + echo "| \`$name\` | n/a | $threshold | FAIL (missing binary) |" >> "$REPORT_MD" + FAILURES=$((FAILURES + 1)) + continue + fi + + samples=() + benchmark_failed=0 + for run_idx in $(seq 1 "$ITERATIONS"); do + start_ns="$(python3 -c 'import time; print(time.perf_counter_ns())')" + if env "$LD_RUNTIME_VAR=$LD_RUNTIME_VALUE" ZDNN_LOGLEVEL=off "$cmd" \ + >> "$log_file" 2>&1; then + end_ns="$(python3 -c 'import time; print(time.perf_counter_ns())')" + elapsed_sec="$(python3 - "$start_ns" "$end_ns" <<'PY' +import sys +start_ns = int(sys.argv[1]) +end_ns = int(sys.argv[2]) +print(f"{(end_ns - start_ns) / 1_000_000_000:.6f}") +PY +)" + samples+=("$elapsed_sec") + printf "%s,%d,%s,%s\n" "$name" "$run_idx" "$elapsed_sec" "$threshold" \ + >> "$RAW_CSV" + echo "run ${run_idx}: ${elapsed_sec}s" | tee -a "$SUMMARY" >> "$log_file" + else + rc=$? + echo "run ${run_idx}: command failed (rc=${rc})" | tee -a "$SUMMARY" \ + >> "$log_file" + benchmark_failed=1 + break + fi + done + + if [[ "$benchmark_failed" -ne 0 ]]; then + echo "| \`$name\` | n/a | $threshold | FAIL (command error) |" >> "$REPORT_MD" + FAILURES=$((FAILURES + 1)) + continue + fi + + median_sec="$(python3 - "${samples[@]}" <<'PY' +import statistics +import sys + +vals = [float(arg) for arg in sys.argv[1:]] +print(f"{statistics.median(vals):.6f}") +PY +)" + + if python3 - "$median_sec" "$threshold" <<'PY' +import sys +median = float(sys.argv[1]) +threshold = float(sys.argv[2]) +sys.exit(0 if median <= threshold else 1) +PY + then + echo "median: ${median_sec}s (PASS)" | tee -a "$SUMMARY" >> "$log_file" + echo "| \`$name\` | $median_sec | $threshold | PASS |" >> "$REPORT_MD" + else + echo "median: ${median_sec}s (FAIL threshold ${threshold}s)" | tee -a "$SUMMARY" \ + >> "$log_file" + echo "| \`$name\` | $median_sec | $threshold | FAIL |" >> "$REPORT_MD" + FAILURES=$((FAILURES + 1)) + fi + + echo "" | tee -a "$SUMMARY" >> "$log_file" +done + +{ + echo "" + echo "Raw results CSV: $RAW_CSV" + echo "Markdown report: $REPORT_MD" + echo "Failures: $FAILURES" +} | tee -a "$SUMMARY" + +if [[ "$FAILURES" -ne 0 ]]; then + exit 1 +fi + +exit 0 diff --git a/scripts/s390x_perf_thresholds.env b/scripts/s390x_perf_thresholds.env new file mode 100644 index 0000000..09920b8 --- /dev/null +++ b/scripts/s390x_perf_thresholds.env @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: Apache-2.0 + +# Default per-benchmark max-median runtime thresholds (seconds) for +# scripts/s390x_perf_regression.sh. +# +# Tune these values per hardware class as needed. The defaults below are +# intentionally conservative to catch major regressions while reducing +# false positives from moderate runtime variance. + +PERF_MAX_SEC_MATMUL_OP=40.0 +PERF_MAX_SEC_MATMUL_BCAST_OP=40.0 +PERF_MAX_SEC_QUANTIZED_MATMUL_OP=60.0 +PERF_MAX_SEC_LSTM_RNN=90.0 diff --git a/scripts/s390x_run_battery.sh b/scripts/s390x_run_battery.sh index c11f083..d90f779 100755 --- a/scripts/s390x_run_battery.sh +++ b/scripts/s390x_run_battery.sh @@ -4,7 +4,7 @@ set -o pipefail usage() { cat <&2 + exit 2 +fi + if [[ -z "$RESULTS_DIR" ]]; then RESULTS_DIR="$ROOT_DIR/artifacts/s390x-battery-$(date -u '+%Y%m%dT%H%M%SZ')" fi @@ -132,6 +157,18 @@ run_step test_precheck env ZDNN_ENABLE_PRECHECK=true make -j"$JOBS" test # Battery run #3: precheck + full status diagnostics. run_step test_precheck_statusdiag env ZDNN_ENABLE_PRECHECK=true ZDNN_STATUS_DIAG=0xFFFFFFFF make -j"$JOBS" test +if [[ "$DO_PERF_REGRESSION" -eq 1 ]]; then + PERF_CMD=(scripts/s390x_perf_regression.sh --results-dir "$RESULTS_DIR/perf") + if [[ -n "$PERF_ITERATIONS" ]]; then + PERF_CMD+=(--iterations "$PERF_ITERATIONS") + fi + if [[ -n "$PERF_THRESHOLDS" ]]; then + PERF_CMD+=(--thresholds-file "$PERF_THRESHOLDS") + fi + + run_step perf_regression "${PERF_CMD[@]}" +fi + run_step collect_artifacts bash -lc ' mkdir -p "'$RESULTS_DIR'/config" cp -f config.log config.status config.make config.h "'$RESULTS_DIR'/config" 2>/dev/null || true @@ -142,6 +179,10 @@ run_step collect_artifacts bash -lc ' else echo "No tests/bin/*.txt files found" > "'$RESULTS_DIR'/test-bin-files.txt" fi + + if [[ -d "'$RESULTS_DIR'/perf" ]]; then + tar -czf "'$RESULTS_DIR'/perf-logs.tgz" "'$RESULTS_DIR'/perf"/*.log 2>/dev/null || true + fi ' { diff --git a/scripts/s390x_sync_and_run.sh b/scripts/s390x_sync_and_run.sh index e99b242..3cc01ca 100755 --- a/scripts/s390x_sync_and_run.sh +++ b/scripts/s390x_sync_and_run.sh @@ -3,7 +3,7 @@ set -euo pipefail usage() { cat < [--jobs N] [--run-id ID] [--no-delete] [--no-autoreconf] [--skip-distclean] +Usage: $(basename "$0") [--jobs N] [--run-id ID] [--no-delete] [--no-autoreconf] [--skip-distclean] [--perf-regression] [--perf-iterations N] [--perf-thresholds FILE] Rsync the current repo to an s390x host, run the remote build/test battery, and sync result artifacts back locally. @@ -18,6 +18,9 @@ Options: --no-delete Disable rsync --delete during sync --no-autoreconf Pass --no-autoreconf to remote battery script --skip-distclean Pass --skip-distclean to remote battery script + --perf-regression Enable remote perf regression battery + --perf-iterations N Iterations per perf benchmark + --perf-thresholds FILE Threshold file path on remote host USAGE } @@ -42,6 +45,9 @@ JOBS="" RSYNC_DELETE=1 REMOTE_NO_AUTORECONF=0 REMOTE_SKIP_DISTCLEAN=0 +REMOTE_PERF_REGRESSION=0 +REMOTE_PERF_ITERATIONS="" +REMOTE_PERF_THRESHOLDS="" while [[ $# -gt 0 ]]; do case "$1" in @@ -65,6 +71,18 @@ while [[ $# -gt 0 ]]; do REMOTE_SKIP_DISTCLEAN=1 shift ;; + --perf-regression) + REMOTE_PERF_REGRESSION=1 + shift + ;; + --perf-iterations) + REMOTE_PERF_ITERATIONS="$2" + shift 2 + ;; + --perf-thresholds) + REMOTE_PERF_THRESHOLDS="$2" + shift 2 + ;; -h|--help) usage exit 0 @@ -82,6 +100,12 @@ if [[ -n "$JOBS" ]] && { ! [[ "$JOBS" =~ ^[0-9]+$ ]] || [[ "$JOBS" -lt 1 ]]; }; exit 2 fi +if [[ -n "$REMOTE_PERF_ITERATIONS" ]] && + { ! [[ "$REMOTE_PERF_ITERATIONS" =~ ^[0-9]+$ ]] || [[ "$REMOTE_PERF_ITERATIONS" -lt 1 ]]; }; then + echo "--perf-iterations must be a positive integer" >&2 + exit 2 +fi + REMOTE_RESULTS_DIR="$REMOTE_DIR/artifacts/$RUN_ID" LOCAL_RESULTS_ROOT="$ROOT_DIR/artifacts" LOCAL_RESULTS_DIR="$LOCAL_RESULTS_ROOT/$RUN_ID" @@ -100,7 +124,7 @@ rsync "${RSYNC_FLAGS[@]}" "$ROOT_DIR/" "$TARGET_HOST:$REMOTE_DIR/" echo "[3/4] Running remote battery" REMOTE_RC=0 -REMOTE_CMD="cd '$REMOTE_DIR' && chmod +x scripts/s390x_run_battery.sh && scripts/s390x_run_battery.sh --results-dir '$REMOTE_RESULTS_DIR'" +REMOTE_CMD="cd '$REMOTE_DIR' && chmod +x scripts/s390x_run_battery.sh scripts/s390x_perf_regression.sh && scripts/s390x_run_battery.sh --results-dir '$REMOTE_RESULTS_DIR'" if [[ -n "$JOBS" ]]; then REMOTE_CMD+=" --jobs '$JOBS'" fi @@ -110,6 +134,15 @@ fi if [[ "$REMOTE_SKIP_DISTCLEAN" -eq 1 ]]; then REMOTE_CMD+=" --skip-distclean" fi +if [[ "$REMOTE_PERF_REGRESSION" -eq 1 ]]; then + REMOTE_CMD+=" --perf-regression" +fi +if [[ -n "$REMOTE_PERF_ITERATIONS" ]]; then + REMOTE_CMD+=" --perf-iterations '$REMOTE_PERF_ITERATIONS'" +fi +if [[ -n "$REMOTE_PERF_THRESHOLDS" ]]; then + REMOTE_CMD+=" --perf-thresholds '$REMOTE_PERF_THRESHOLDS'" +fi if ssh "$TARGET_HOST" "$REMOTE_CMD"; then REMOTE_RC=0 else