Build Wheels (CUDA) #102
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build Wheels (CUDA) | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| release_tag: | |
| description: Release tag to upload wheel assets to | |
| required: false | |
| type: string | |
| permissions: | |
| contents: write | |
| jobs: | |
| define_matrix: | |
| name: Define Build Matrix | |
| runs-on: ubuntu-22.04 | |
| outputs: | |
| matrix: ${{ steps.set-matrix.outputs.matrix }} | |
| defaults: | |
| run: | |
| shell: pwsh | |
| steps: | |
| - name: Define Job Output | |
| id: set-matrix | |
| run: | | |
| $matrix = @{ | |
| 'os' = @('ubuntu-22.04', 'windows-2022') | |
| # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic, | |
| # so one builder per toolkit version is sufficient. | |
| 'pyver' = @("3.9") | |
| 'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "13.0.2", "13.2.1") | |
| 'releasetag' = @("basic") | |
| 'exclude' = @( | |
| @{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' }, | |
| @{ 'os' = 'windows-2022'; 'cuda' = '12.2.2' }, | |
| @{ 'os' = 'windows-2022'; 'cuda' = '12.3.2' } | |
| ) | |
| } | |
| $matrixOut = ConvertTo-Json $matrix -Compress | |
| Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT | |
| build_wheels: | |
| name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }} | |
| needs: define_matrix | |
| runs-on: ${{ matrix.os }} | |
| strategy: | |
| matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} | |
| defaults: | |
| run: | |
| shell: pwsh | |
| env: | |
| CUDAVER: ${{ matrix.cuda }} | |
| AVXVER: ${{ matrix.releasetag }} | |
| steps: | |
| - name: Set up MSVC for CUDA 11.8 | |
| if: runner.os == 'Windows' && matrix.cuda == '11.8.0' | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x64 | |
| toolset: 14.29 | |
| - name: Set up MSVC | |
| if: runner.os == 'Windows' && matrix.cuda != '11.8.0' | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x64 | |
| - uses: actions/checkout@v6 | |
| with: | |
| submodules: "recursive" | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: ${{ matrix.pyver }} | |
| cache: 'pip' | |
| - name: Setup Mamba | |
| uses: conda-incubator/setup-miniconda@v4.0.1 | |
| with: | |
| activate-environment: "llamacpp" | |
| python-version: ${{ matrix.pyver }} | |
| miniforge-version: latest | |
| add-pip-as-python-dependency: true | |
| auto-activate-base: false | |
| - name: Install Dependencies | |
| env: | |
| MAMBA_DOWNLOAD_FAILFAST: "0" | |
| MAMBA_NO_LOW_SPEED_LIMIT: "1" | |
| run: | | |
| $cudaVersion = $env:CUDAVER | |
| $cudaChannel = "nvidia/label/cuda-$cudaVersion" | |
| if ($cudaVersion -eq '11.8.0') { | |
| if ($IsLinux) { | |
| $cudaPackages = @( | |
| "${cudaChannel}::cuda-nvcc_linux-64=11.8.0", | |
| "${cudaChannel}::cuda-cccl=11.8.89", | |
| "${cudaChannel}::cuda-cudart=11.8.89", | |
| "${cudaChannel}::cuda-cudart-dev=11.8.89", | |
| "${cudaChannel}::cuda-driver-dev=11.8.89", | |
| "${cudaChannel}::libcublas=11.11.3.6", | |
| "${cudaChannel}::libcublas-dev=11.11.3.6" | |
| ) | |
| } elseif ($IsWindows) { | |
| $cudaPackages = @( | |
| "${cudaChannel}::cuda-nvcc_win-64=11.8.0", | |
| "${cudaChannel}::cuda-cccl=11.8.89", | |
| "${cudaChannel}::cuda-cudart=11.8.89", | |
| "${cudaChannel}::cuda-cudart-dev=11.8.89", | |
| "${cudaChannel}::libcublas=11.11.3.6", | |
| "${cudaChannel}::libcublas-dev=11.11.3.6" | |
| ) | |
| } else { | |
| throw 'Unsupported CUDA wheel build platform' | |
| } | |
| mamba install -y --channel-priority flexible --override-channels -c $cudaChannel $cudaPackages | |
| } elseif ($IsLinux) { | |
| mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev" | |
| } elseif ($IsWindows) { | |
| if ($cudaVersion -like '12.5.*' -or [version]$cudaVersion -ge [version]"13.0") { | |
| # The Windows 12.5+ toolkit meta-package pulls compiler activation | |
| # scripts that overflow cmd.exe after MSVC is already initialized. | |
| mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-libraries-dev=$cudaVersion" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev" | |
| } else { | |
| mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev" | |
| } | |
| } else { | |
| throw 'Unsupported CUDA wheel build platform' | |
| } | |
| if ($LASTEXITCODE -ne 0) { | |
| exit $LASTEXITCODE | |
| } | |
| if ($IsWindows) { | |
| python -m pip install build wheel ninja | |
| } else { | |
| sudo apt-get update | |
| sudo apt-get install -y patchelf | |
| python -m pip install auditwheel build wheel | |
| } | |
| - name: Build Wheel | |
| run: | | |
| $pathSeparator = if ($IsWindows) { ';' } else { ':' } | |
| if ($IsWindows) { | |
| $cudaRoot = Join-Path $env:CONDA_PREFIX 'Library' | |
| } elseif (Test-Path (Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/include/cuda_runtime.h')) { | |
| $cudaRoot = Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux' | |
| } else { | |
| $cudaRoot = $env:CONDA_PREFIX | |
| } | |
| $env:CUDA_PATH = $cudaRoot | |
| $env:CUDA_HOME = $cudaRoot | |
| $env:CUDAToolkit_ROOT = $cudaRoot | |
| $env:CUDA_TOOLKIT_ROOT_DIR = $cudaRoot | |
| $cudaHostCompilerArg = '' | |
| $cudaRootCmake = $cudaRoot.Replace('\', '/') | |
| $env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRootCmake -DCUDA_TOOLKIT_ROOT_DIR=$cudaRootCmake" | |
| if ($IsLinux) { | |
| if ([version]$env:CUDAVER -lt [version]"12.0" -and (Test-Path '/usr/bin/g++-11')) { | |
| $env:CC = '/usr/bin/gcc-11' | |
| $env:CXX = '/usr/bin/g++-11' | |
| $env:CUDAHOSTCXX = '/usr/bin/g++-11' | |
| $cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX" | |
| } elseif (Test-Path '/usr/bin/g++-12') { | |
| $env:CC = '/usr/bin/gcc-12' | |
| $env:CXX = '/usr/bin/g++-12' | |
| $env:CUDAHOSTCXX = '/usr/bin/g++-12' | |
| $cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX" | |
| } | |
| $env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRoot -DCUDA_TOOLKIT_ROOT_DIR=$cudaRoot$cudaHostCompilerArg" | |
| $env:CPATH = "$cudaRoot/include$pathSeparator$env:CPATH" | |
| $env:CPLUS_INCLUDE_PATH = "$cudaRoot/include$pathSeparator$env:CPLUS_INCLUDE_PATH" | |
| $env:LIBRARY_PATH = "$cudaRoot/lib$pathSeparator$env:CONDA_PREFIX/lib$pathSeparator$env:LIBRARY_PATH" | |
| $env:LD_LIBRARY_PATH = "$cudaRoot/lib$pathSeparator$env:CONDA_PREFIX/lib$pathSeparator$env:LD_LIBRARY_PATH" | |
| $cudaLibraryPaths = @( | |
| (Join-Path $cudaRoot 'lib'), | |
| (Join-Path $cudaRoot 'lib64'), | |
| (Join-Path $env:CONDA_PREFIX 'lib') | |
| ) | Where-Object { Test-Path $_ } | |
| Write-Output "CUDA_LIBRARY_PATHS=$($cudaLibraryPaths -join ':')" >> $env:GITHUB_ENV | |
| } elseif ($IsWindows) { | |
| $ninjaPath = ((Get-Command ninja -ErrorAction Stop).Source).Replace('\', '/') | |
| $env:CMAKE_GENERATOR = 'Ninja' | |
| $env:CMAKE_MAKE_PROGRAM = $ninjaPath | |
| $env:PATH = "$(Join-Path $cudaRoot 'bin')$pathSeparator$env:PATH" | |
| } | |
| if ($IsWindows) { | |
| $nvccCandidates = @( | |
| (Join-Path $cudaRoot 'bin\nvcc.exe'), | |
| (Join-Path $env:CONDA_PREFIX 'Library\bin\nvcc.exe'), | |
| (Join-Path $env:CONDA_PREFIX 'bin\nvcc.exe') | |
| ) | |
| } else { | |
| $nvccCandidates = @( | |
| (Join-Path $env:CONDA_PREFIX 'bin/nvcc'), | |
| (Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/bin/nvcc') | |
| ) | |
| } | |
| $nvccPath = $nvccCandidates | Where-Object { Test-Path $_ } | Select-Object -First 1 | |
| if (-not $nvccPath) { | |
| throw 'Failed to find nvcc in the conda environment' | |
| } | |
| $env:CUDACXX = $nvccPath | |
| $env:PATH = "$(Split-Path $nvccPath)$pathSeparator$env:PATH" | |
| if ($IsWindows) { | |
| $nvccPathCmake = $nvccPath.Replace('\', '/') | |
| $env:CUDACXX = $nvccPathCmake | |
| $env:CMAKE_ARGS = "-DCMAKE_CUDA_COMPILER=$nvccPathCmake -DCMAKE_CUDA_COMPILER_ARG1=-allow-unsupported-compiler -DCMAKE_MAKE_PROGRAM=$env:CMAKE_MAKE_PROGRAM $env:CMAKE_ARGS" | |
| } | |
| $nvccVersion = ((& $nvccPath --version) | Select-String 'release ([0-9]+\.[0-9]+)').Matches[0].Groups[1].Value | |
| if (-not $nvccVersion) { | |
| throw 'Failed to detect the installed CUDA toolkit version' | |
| } | |
| $cudaTagVersion = $nvccVersion.Replace('.','') | |
| $env:VERBOSE = '1' | |
| $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual" | |
| if ([version]$nvccVersion -lt [version]"12.0") { | |
| # CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls. | |
| $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real" | |
| } elseif ([version]$nvccVersion -ge [version]"13.0") { | |
| # CUDA 13 dropped offline compilation support for pre-Turing targets. | |
| $cudaArchs = "75-real;80-real;86-real;89-real;90-real;90-virtual" | |
| } | |
| # Build real cubins for the supported GPUs and keep | |
| # one forward-compatible PTX target instead of embedding PTX for every | |
| # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit. | |
| $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS" | |
| $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off' | |
| if ($IsLinux) { | |
| $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_OPENMP=OFF' | |
| } | |
| python -m build --wheel | |
| # Publish tags that reflect the actual installed toolkit version. | |
| Write-Output "CUDA_VERSION=$cudaTagVersion" >> $env:GITHUB_ENV | |
| - name: Repair Linux wheel | |
| if: runner.os == 'Linux' | |
| shell: bash | |
| run: | | |
| set -euxo pipefail | |
| mkdir -p wheelhouse | |
| export LD_LIBRARY_PATH="$PWD/llama_cpp/lib:${CUDA_LIBRARY_PATHS}:${LD_LIBRARY_PATH:-}" | |
| auditwheel_bin="${CONDA}/envs/llamacpp/bin/auditwheel" | |
| "${auditwheel_bin}" repair \ | |
| --exclude libcuda.so \ | |
| --exclude libcuda.so.1 \ | |
| --exclude libcudart.so.11.0 \ | |
| --exclude libcudart.so.12 \ | |
| --exclude libcudart.so.13 \ | |
| --exclude libcublas.so.11 \ | |
| --exclude libcublas.so.12 \ | |
| --exclude libcublas.so.13 \ | |
| --exclude libcublasLt.so.11 \ | |
| --exclude libcublasLt.so.12 \ | |
| --exclude libcublasLt.so.13 \ | |
| -w wheelhouse \ | |
| dist/*.whl | |
| rm dist/*.whl | |
| cp wheelhouse/*.whl dist/ | |
| "${auditwheel_bin}" show dist/*.whl | |
| - uses: softprops/action-gh-release@v3 | |
| if: startsWith(github.ref, 'refs/tags/') || (github.event_name == 'workflow_dispatch' && inputs.release_tag != '') | |
| with: | |
| files: dist/* | |
| # Set tag_name to <tag>-cu<cuda_version>. | |
| tag_name: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }}-cu${{ env.CUDA_VERSION }} | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |