|
| 1 | +name: "Build and Publish OpenImpala GPU Wheels (CUDA)" |
| 2 | + |
| 3 | +on: |
| 4 | + release: |
| 5 | + types: |
| 6 | + - published |
| 7 | + workflow_dispatch: |
| 8 | + |
| 9 | +jobs: |
| 10 | + build_gpu_wheels: |
| 11 | + name: Build CUDA GPU wheels |
| 12 | + runs-on: ubuntu-latest |
| 13 | + |
| 14 | + steps: |
| 15 | + - name: Checkout repository |
| 16 | + uses: actions/checkout@v4 |
| 17 | + with: |
| 18 | + submodules: recursive |
| 19 | + |
| 20 | + - name: Set up Python |
| 21 | + uses: actions/setup-python@v5 |
| 22 | + with: |
| 23 | + python-version: "3.x" |
| 24 | + |
| 25 | + - name: Install cibuildwheel |
| 26 | + run: python -m pip install cibuildwheel==2.16.5 |
| 27 | + |
| 28 | + # Cache the compiled GPU dependencies (HDF5, libtiff, HYPRE+CUDA, AMReX+CUDA) |
| 29 | + # These take ~15-20 minutes to build from source with CUDA support. |
| 30 | + - name: Cache native GPU dependencies |
| 31 | + uses: actions/cache@v4 |
| 32 | + with: |
| 33 | + path: .cibw-deps-cache |
| 34 | + key: cibw-deps-gpu-cuda12-x86_64-hdf5_1.14.6-tiff_4.6.0-hypre_2.31.0-amrex_25.03-v1 |
| 35 | + |
| 36 | + - name: Build GPU wheels |
| 37 | + run: python -m cibuildwheel --output-dir wheelhouse |
| 38 | + env: |
| 39 | + CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*" |
| 40 | + CIBW_SKIP: "*musllinux* *i686*" |
| 41 | + CIBW_ARCHS_LINUX: "x86_64" |
| 42 | + |
| 43 | + # Use NVIDIA's CUDA-enabled manylinux image (CUDA 12.6, AlmaLinux 8) |
| 44 | + # This provides nvcc, CUDA runtime, and cuBLAS/cuSPARSE out of the box. |
| 45 | + CIBW_MANYLINUX_X86_64_IMAGE: sameli/manylinux_2_28_x86_64_cuda_12.6 |
| 46 | + |
| 47 | + # Build all dependencies with CUDA support. |
| 48 | + # HDF5 and libtiff are CPU-only (no GPU path needed). |
| 49 | + # HYPRE is built with --with-cuda for GPU-accelerated solves. |
| 50 | + # AMReX is built with -DAMReX_GPU_BACKEND=CUDA for device kernels. |
| 51 | + CIBW_BEFORE_ALL_LINUX: > |
| 52 | + dnf install -y epel-release && |
| 53 | + dnf --enablerepo=powertools install -y |
| 54 | + openmpi-devel gcc-gfortran gcc-c++ wget git |
| 55 | + zlib-devel libjpeg-turbo-devel python3-pip && |
| 56 | + pip3 install "cmake>=3.28,<4" && |
| 57 | + export PATH=/usr/lib64/openmpi/bin:/usr/local/cuda/bin:$PATH && |
| 58 | + export CUDA_HOME=/usr/local/cuda && |
| 59 | + if [ -f /project/.cibw-deps-cache/deps.tar.gz ]; then |
| 60 | + echo "=== Restoring cached GPU dependencies ===" && |
| 61 | + tar xzf /project/.cibw-deps-cache/deps.tar.gz -C / ; |
| 62 | + else |
| 63 | + echo "=== Building GPU dependencies from source ===" && |
| 64 | + wget -q https://github.com/HDFGroup/hdf5/releases/download/hdf5_1.14.6/hdf5-1.14.6.tar.gz && |
| 65 | + tar xzf hdf5-1.14.6.tar.gz && |
| 66 | + cd hdf5-1.14.6 && |
| 67 | + CC=mpicc CXX=mpicxx ./configure |
| 68 | + --prefix=/usr/local |
| 69 | + --enable-parallel |
| 70 | + --enable-cxx |
| 71 | + --enable-unsupported |
| 72 | + --disable-shared |
| 73 | + --with-pic && |
| 74 | + make -j$(nproc) && |
| 75 | + make install && |
| 76 | + cd .. && |
| 77 | + wget -q https://download.osgeo.org/libtiff/tiff-4.6.0.tar.gz && |
| 78 | + tar xzf tiff-4.6.0.tar.gz && |
| 79 | + cd tiff-4.6.0 && |
| 80 | + cmake -S . -B build |
| 81 | + -DCMAKE_INSTALL_PREFIX=/usr/local |
| 82 | + -DCMAKE_BUILD_TYPE=Release |
| 83 | + -DBUILD_SHARED_LIBS=OFF |
| 84 | + -DCMAKE_POSITION_INDEPENDENT_CODE=ON && |
| 85 | + cmake --build build -j$(nproc) && |
| 86 | + cmake --install build && |
| 87 | + cd .. && |
| 88 | + wget -q https://github.com/hypre-space/hypre/archive/v2.31.0.tar.gz && |
| 89 | + tar xzf v2.31.0.tar.gz && |
| 90 | + cd hypre-2.31.0/src && |
| 91 | + ./configure --prefix=/usr/local --with-MPI --with-cuda |
| 92 | + --with-cuda-home=/usr/local/cuda --enable-shared=no |
| 93 | + CC=mpicc CXX=mpicxx FC=mpif90 |
| 94 | + CFLAGS="-O2 -fPIC" CXXFLAGS="-O2 -fPIC" FFLAGS="-O2 -fPIC" |
| 95 | + CUDA_HOME=/usr/local/cuda && |
| 96 | + make -j$(nproc) && |
| 97 | + make install && |
| 98 | + cd ../.. && |
| 99 | + git clone --depth 1 --branch 25.03 https://github.com/AMReX-Codes/amrex.git /tmp/amrex && |
| 100 | + cmake -S /tmp/amrex -B /tmp/amrex/build |
| 101 | + -DCMAKE_INSTALL_PREFIX=/usr/local |
| 102 | + -DCMAKE_BUILD_TYPE=Release |
| 103 | + -DBUILD_SHARED_LIBS=OFF |
| 104 | + -DAMReX_MPI=ON |
| 105 | + -DAMReX_OMP=ON |
| 106 | + -DAMReX_SPACEDIM=3 |
| 107 | + -DAMReX_FORTRAN=ON |
| 108 | + -DAMReX_PARTICLES=OFF |
| 109 | + -DAMReX_GPU_BACKEND=CUDA |
| 110 | + -DAMReX_CUDA_ARCH=60;70;75;80;86;89;90 |
| 111 | + -DCMAKE_POSITION_INDEPENDENT_CODE=ON |
| 112 | + -DCMAKE_CUDA_ARCHITECTURES="60;70;75;80;86;89;90" && |
| 113 | + cmake --build /tmp/amrex/build -j$(nproc) && |
| 114 | + cmake --install /tmp/amrex/build && |
| 115 | + mkdir -p /project/.cibw-deps-cache && |
| 116 | + tar czf /project/.cibw-deps-cache/deps.tar.gz /usr/local ; |
| 117 | + fi |
| 118 | +
|
| 119 | + CIBW_BEFORE_BUILD: pip install "cmake>=3.28,<4" |
| 120 | + |
| 121 | + # Point to MPI, CUDA, and our compiled GPU dependencies. |
| 122 | + CIBW_ENVIRONMENT_LINUX: > |
| 123 | + PATH="/usr/lib64/openmpi/bin:/usr/local/cuda/bin:$PATH" |
| 124 | + CUDA_HOME="/usr/local/cuda" |
| 125 | + CMAKE_C_COMPILER="mpicc" |
| 126 | + CMAKE_CXX_COMPILER="mpicxx" |
| 127 | + CMAKE_PREFIX_PATH="/usr/local" |
| 128 | + CMAKE_GENERATOR="Unix Makefiles" |
| 129 | + CMAKE_ARGS="-DGPU_BACKEND=CUDA -DCMAKE_CUDA_ARCHITECTURES=60;70;75;80;86;89;90" |
| 130 | +
|
| 131 | + # Vendor libraries but exclude host-specific MPI, OpenMP, Fortran runtime, |
| 132 | + # and CUDA runtime libraries (users must have CUDA toolkit installed). |
| 133 | + CIBW_REPAIR_WHEEL_COMMAND_LINUX: > |
| 134 | + auditwheel repair -w {dest_dir} {wheel} |
| 135 | + --exclude libmpi.so |
| 136 | + --exclude libmpi.so.12 |
| 137 | + --exclude libmpi.so.40 |
| 138 | + --exclude libmpi_cxx.so |
| 139 | + --exclude libmpi_cxx.so.1 |
| 140 | + --exclude libmpi_cxx.so.40 |
| 141 | + --exclude libopen-rte.so |
| 142 | + --exclude libopen-rte.so.40 |
| 143 | + --exclude libopen-pal.so |
| 144 | + --exclude libopen-pal.so.40 |
| 145 | + --exclude libmpi_mpifh.so |
| 146 | + --exclude libmpi_mpifh.so.40 |
| 147 | + --exclude libgomp.so.1 |
| 148 | + --exclude libgfortran.so.5 |
| 149 | + --exclude libquadmath.so.0 |
| 150 | + --exclude libcuda.so |
| 151 | + --exclude libcuda.so.1 |
| 152 | + --exclude libcudart.so |
| 153 | + --exclude libcudart.so.12 |
| 154 | + --exclude libcublas.so |
| 155 | + --exclude libcublas.so.12 |
| 156 | + --exclude libcublasLt.so |
| 157 | + --exclude libcublasLt.so.12 |
| 158 | + --exclude libcusparse.so |
| 159 | + --exclude libcusparse.so.12 |
| 160 | + --exclude libcurand.so |
| 161 | + --exclude libcurand.so.10 |
| 162 | + --exclude libnvJitLink.so |
| 163 | + --exclude libnvJitLink.so.12 |
| 164 | +
|
| 165 | + - name: Upload wheels as artifacts |
| 166 | + uses: actions/upload-artifact@v4 |
| 167 | + with: |
| 168 | + name: cibw-wheels-gpu |
| 169 | + path: ./wheelhouse/*.whl |
| 170 | + |
| 171 | + publish_to_pypi: |
| 172 | + name: Publish GPU wheels to PyPI |
| 173 | + needs: build_gpu_wheels |
| 174 | + runs-on: ubuntu-latest |
| 175 | + environment: pypi |
| 176 | + permissions: |
| 177 | + id-token: write |
| 178 | + |
| 179 | + steps: |
| 180 | + - name: Download wheel artifacts |
| 181 | + uses: actions/download-artifact@v4 |
| 182 | + with: |
| 183 | + name: cibw-wheels-gpu |
| 184 | + path: dist/ |
| 185 | + |
| 186 | + - name: Publish to PyPI |
| 187 | + uses: pypa/gh-action-pypi-publish@release/v1 |
| 188 | + with: |
| 189 | + skip-existing: true |
0 commit comments