Skip to content

Add Developer Certificate of Origin to CONTRIBUTING.md (#22) #108

Add Developer Certificate of Origin to CONTRIBUTING.md (#22)

Add Developer Certificate of Origin to CONTRIBUTING.md (#22) #108

Workflow file for this run

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
# GPU tests live in a separate workflow because NVIDIA self-hosted runners
# block pull_request events entirely. Keeping them here avoids a confusing
# "Skipped" entry with unresolved matrix names on every PR.
name: CI / GPU
on:
workflow_dispatch:
push:
branches:
- main
- "pull-request/[0-9]+"
merge_group:
types:
- checks_requested
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
PIP_NO_CACHE_DIR: "1"
PIP_DISABLE_PIP_VERSION_CHECK: "1"
PIP_PREFER_BINARY: "1"
jobs:
gpu-tests:
runs-on: linux-amd64-gpu-rtxpro6000-latest-1
container:
image: ubuntu:24.04
options: -u root --security-opt seccomp=unconfined --shm-size 16g
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
python-version: ["3.11", "3.12", "3.13"]
name: "gpu / py${{ matrix.python-version }}"
steps:
- name: Setup proxy cache
uses: nv-gha-runners/setup-proxy-cache@main
with:
enable-apt: true
- name: Install system dependencies
run: |
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get install -y git git-lfs gcc software-properties-common
add-apt-repository -y ppa:deadsnakes/ppa
apt-get update
apt-get install -y \
python${{ matrix.python-version }} \
python${{ matrix.python-version }}-venv \
python${{ matrix.python-version }}-dev
git lfs install
- uses: actions/checkout@v4
with:
lfs: true
- name: Verify GPU
run: nvidia-smi
- name: Install dependencies and run tests
run: bash code/scripts/check_python_compat.sh
env:
PYTHON_BIN: python${{ matrix.python-version }}
MODE: train
SKIP_TESTS: "0"
REQUIRE_GPU: "1"
- name: Training + inference with LER check
shell: bash
run: |
source .venv_train_${{ matrix.python-version }}/bin/activate
bash code/scripts/smoke_run.sh 2>&1 | tee /tmp/ci_train.log
r=${PIPESTATUS[0]}; [ $r -ne 0 ] && exit $r
# 0.35: short run (16k samples, 2 epochs for stable LER across py versions)
python code/scripts/check_ler_from_log.py /tmp/ci_train.log --max-ler 0.35
env:
EXPERIMENT_NAME: ci_short
PREDECODER_TRAIN_SAMPLES: "16384"
PREDECODER_VAL_SAMPLES: "2048"
PREDECODER_TEST_SAMPLES: "2048"
PREDECODER_TRAIN_EPOCHS: "2"
# ---------------------------------------------------------------------------
# Mid-tier (~5-10 min): extended training + inference with LER check.
# Runs only after merge to main (not on PR branches) to save GPU time.
# Single Python version — multi-version coverage is handled by gpu-tests.
# ---------------------------------------------------------------------------
mid-gpu-tests:
if: github.ref == 'refs/heads/main'
needs: gpu-tests
runs-on: linux-amd64-gpu-rtxpro6000-latest-1
container:
image: ubuntu:24.04
options: -u root --security-opt seccomp=unconfined --shm-size 16g
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
timeout-minutes: 40
steps:
- name: Setup proxy cache
uses: nv-gha-runners/setup-proxy-cache@main
with:
enable-apt: true
- name: Install system dependencies
run: |
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get install -y git git-lfs gcc software-properties-common
add-apt-repository -y ppa:deadsnakes/ppa
apt-get update
apt-get install -y python3.13 python3.13-venv python3.13-dev
git lfs install
- uses: actions/checkout@v4
with:
lfs: true
- name: Verify GPU
run: nvidia-smi
- name: Install Python dependencies
run: |
python3.13 -m venv .venv_mid
. .venv_mid/bin/activate
python -m pip install --upgrade pip setuptools wheel
pip install -r code/requirements_public_train.txt
- name: Mid-tier training + inference with LER check (32k train, 2 epochs)
shell: bash
run: |
. .venv_mid/bin/activate
bash code/scripts/smoke_run.sh 2>&1 | tee /tmp/ci_mid.log
r=${PIPESTATUS[0]}; [ $r -ne 0 ] && exit $r
# 0.2: mid-tier (32k/2 epochs); loosen if flaky
python code/scripts/check_ler_from_log.py /tmp/ci_mid.log --max-ler 0.2
env:
EXPERIMENT_NAME: ci_mid
PREDECODER_TRAIN_SAMPLES: "32768"
PREDECODER_VAL_SAMPLES: "4096"
PREDECODER_TEST_SAMPLES: "4096"
PREDECODER_TRAIN_EPOCHS: "2"
- name: HE compile tests (torch.compile + autotune on GPU)
run: |
. .venv_mid/bin/activate
PYTHONPATH=code python -m unittest discover -s code/tests/mid -p "test_*.py" -v
# ---------------------------------------------------------------------------
# GPU coverage: captures GPU-specific code paths missed by the CPU coverage job
# ---------------------------------------------------------------------------
gpu-coverage:
runs-on: linux-amd64-gpu-rtxpro6000-latest-1
container:
image: ubuntu:24.04
options: -u root --security-opt seccomp=unconfined --shm-size 16g
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
timeout-minutes: 20
steps:
- name: Setup proxy cache
uses: nv-gha-runners/setup-proxy-cache@main
with:
enable-apt: true
- name: Install system dependencies
run: |
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get install -y git git-lfs python3 python3-pip python3-venv
git lfs install
- uses: actions/checkout@v4
with:
lfs: true
- name: Verify GPU
run: nvidia-smi
- name: Install Python dependencies
run: |
python3 -m venv .venv_gpu_cov
. .venv_gpu_cov/bin/activate
python -m pip install --upgrade pip setuptools wheel
pip install -r code/requirements_public_inference.txt
pip install -r code/requirements_ci.txt
- name: Run tests with GPU coverage
run: |
. .venv_gpu_cov/bin/activate
PYTHONPATH=code coverage run -m unittest discover -s code/tests -p "test_*.py"
coverage report
coverage html -d htmlcov-gpu
coverage xml -o coverage-gpu.xml
- name: Upload GPU coverage artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: gpu-coverage-report
path: |
htmlcov-gpu/
coverage-gpu.xml