Model-Optimizer/.github/workflows/gpu_tests.yml at main · Distillative-AI/Model-Optimizer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
name: GPU tests

on:
  push:
    branches: ["pull-request/[0-9]+"]
    # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
  schedule:
    - cron: "0 0 * * *" # Nightly
  workflow_dispatch: # On-demand

# Cancel previous runs if new commit is pushed to the same PR
concurrency:
  group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
  cancel-in-progress: true

jobs:
  check-file-changes:
    if: startsWith(github.ref, 'refs/heads/pull-request/')
    runs-on: ubuntu-latest
    outputs:
      any_changed: ${{ steps.changed-tests.outputs.any_changed }}
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - id: get-pr-info
        uses: nv-gha-runners/get-pr-info@main
      # Get commit from main branch that is present in the PR to use as base for changed files
      - id: calculate-merge-base
        env:
          PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
        run: |
          (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}"
      - name: Check for changes in test-relevant directories
        id: changed-tests
        uses: step-security/changed-files@v46.0.5
        with:
          base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
          sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
          files: |
            .github/workflows/gpu_tests.yml
            modelopt/**
            tests/gpu/**
            pyproject.toml
            tox.ini
          fail_on_initial_diff_error: true
  wait-checks:
    needs: [check-file-changes]
    if: needs.check-file-changes.outputs.any_changed == 'true'
    uses: ./.github/workflows/_wait_for_checks.yml
    permissions:
      checks: read
    secrets: inherit
    with:
      match_pattern: "^DCO$|^linux$" # Wait for DCO and Unit tests / linux to pass
      delay: 300s
  gpu-tests-pr:
    needs: [check-file-changes, wait-checks]
    if: needs.check-file-changes.outputs.any_changed == 'true'
    strategy: &gpu_strategy
      fail-fast: false
      matrix:
        include:
          - example: gpu
            timeout: 45
            container_image: pytorch:26.01-py3
          - example: gpu-megatron
            timeout: 45
            container_image: pytorch:26.01-py3
          - example: gpu-trtllm
            timeout: 30
            container_image: tensorrt-llm/release:1.3.0rc5
    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
    timeout-minutes: ${{ matrix.timeout }}
    container: &gpu_container
      image: nvcr.io/nvidia/${{ matrix.container_image }}
      env:
        GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
        PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
        HF_TOKEN: ${{ secrets.HF_TOKEN }}
    steps: &gpu_steps
      - uses: actions/checkout@v6
      - uses: nv-gha-runners/setup-proxy-cache@main
      - name: Setup environment variables
        run: |
          echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
      - name: Run gpu tests
        run: pip install tox-current-env && tox -e cuda13-${{ matrix.example }} --current-env
  gpu-tests-non-pr:
    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
    strategy: *gpu_strategy
    runs-on: linux-amd64-gpu-rtxpro6000-latest-2
    timeout-minutes: ${{ matrix.timeout }}
    container: *gpu_container
    steps: *gpu_steps
  gpu-pr-required-check:
    # Run even if gpu-tests-pr is skipped
    if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
    needs: [check-file-changes, gpu-tests-pr]
    runs-on: ubuntu-latest
    steps:
      - name: Required GPU tests did not succeed
        if: ${{ needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && needs.gpu-tests-pr.result != 'success') }}
        run: exit 1