diff --git a/.buildkite/ml_pipeline/step.py b/.buildkite/ml_pipeline/step.py index ed2fbb099..cb7db14d0 100644 --- a/.buildkite/ml_pipeline/step.py +++ b/.buildkite/ml_pipeline/step.py @@ -9,7 +9,7 @@ # limitation. class PipelineStep(list): - def generate_step(self, label, command): + def generate_step(self, label, command, soft_fail=False): command = command + " | buildkite-agent pipeline upload" step = { "label": label, @@ -19,6 +19,8 @@ def generate_step(self, label, command): "image": "python", } } + if soft_fail: + step["soft_fail"] = True return step def generate_step_template(self, platform, action, build_aarch64, build_x86_64): diff --git a/.buildkite/pipeline.json.py b/.buildkite/pipeline.json.py index 56b02f457..37cc4537d 100755 --- a/.buildkite/pipeline.json.py +++ b/.buildkite/pipeline.json.py @@ -65,7 +65,8 @@ def main(): # Check for build timing regressions against nightly baseline pipeline_steps.append(pipeline_steps.generate_step("Check build timing regressions", - ".buildkite/pipelines/check_build_regression.yml.sh")) + ".buildkite/pipelines/check_build_regression.yml.sh", + soft_fail=True)) pipeline["env"] = env pipeline["steps"] = pipeline_steps diff --git a/.buildkite/scripts/steps/run_tests.sh b/.buildkite/scripts/steps/run_tests.sh index 12b88c1bb..0c5c08125 100755 --- a/.buildkite/scripts/steps/run_tests.sh +++ b/.buildkite/scripts/steps/run_tests.sh @@ -105,6 +105,28 @@ else -P cmake/run-all-tests-parallel.cmake || TEST_OUTCOME=$? fi +# --- PyTorch allowlist validation --- +# When triggered from the PyTorch edge pipeline, run the Python-based +# allowlist validation which traces live HuggingFace models with the +# new PyTorch version and verifies every op is in ALLOWED_OPERATIONS. +VALIDATION_OUTCOME=0 +if [[ "${GITHUB_PR_COMMENT_VAR_ACTION:-}" == "run_pytorch_tests" ]] && [ -f cmake/run-validation.cmake ]; then + echo "--- Validating PyTorch allowlist against HuggingFace models" + cmake \ + -DSOURCE_DIR="$(pwd)" \ + -DVALIDATE_CONFIG="$(pwd)/dev-tools/extract_model_ops/validation_models.json" \ + -DVALIDATE_PT_DIR="$(pwd)/dev-tools/extract_model_ops/es_it_models" \ + -DVALIDATE_VERBOSE=TRUE \ + -DOPTIONAL=TRUE \ + -P cmake/run-validation.cmake || VALIDATION_OUTCOME=$? + + if [[ $VALIDATION_OUTCOME -ne 0 ]]; then + echo "^^^ +++" + echo "Allowlist validation failed — the new PyTorch version may introduce ops not in ALLOWED_OPERATIONS." + echo "See dev-tools/extract_model_ops/README.md for how to update the allowlist." + fi +fi + # Upload test results echo "--- Uploading test results" TEST_RESULTS_ARCHIVE=${OS}-${HARDWARE_ARCH}-unit_test_results.tgz @@ -117,4 +139,6 @@ else echo "No test results archive created" fi -exit $TEST_OUTCOME +if [[ $TEST_OUTCOME -ne 0 || $VALIDATION_OUTCOME -ne 0 ]]; then + exit 1 +fi