Skip to content

Build Wheels (CUDA) #102

Build Wheels (CUDA)

Build Wheels (CUDA) #102

name: Build Wheels (CUDA)
on:
workflow_dispatch:
inputs:
release_tag:
description: Release tag to upload wheel assets to
required: false
type: string
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-22.04
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-22.04', 'windows-2022')
# wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic,
# so one builder per toolkit version is sufficient.
'pyver' = @("3.9")
'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "13.0.2", "13.2.1")
'releasetag' = @("basic")
'exclude' = @(
@{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' },
@{ 'os' = 'windows-2022'; 'cuda' = '12.2.2' },
@{ 'os' = 'windows-2022'; 'cuda' = '12.3.2' }
)
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
CUDAVER: ${{ matrix.cuda }}
AVXVER: ${{ matrix.releasetag }}
steps:
- name: Set up MSVC for CUDA 11.8
if: runner.os == 'Windows' && matrix.cuda == '11.8.0'
uses: ilammy/msvc-dev-cmd@v1
with:
arch: x64
toolset: 14.29
- name: Set up MSVC
if: runner.os == 'Windows' && matrix.cuda != '11.8.0'
uses: ilammy/msvc-dev-cmd@v1
with:
arch: x64
- uses: actions/checkout@v6
with:
submodules: "recursive"
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.pyver }}
cache: 'pip'
- name: Setup Mamba
uses: conda-incubator/setup-miniconda@v4.0.1
with:
activate-environment: "llamacpp"
python-version: ${{ matrix.pyver }}
miniforge-version: latest
add-pip-as-python-dependency: true
auto-activate-base: false
- name: Install Dependencies
env:
MAMBA_DOWNLOAD_FAILFAST: "0"
MAMBA_NO_LOW_SPEED_LIMIT: "1"
run: |
$cudaVersion = $env:CUDAVER
$cudaChannel = "nvidia/label/cuda-$cudaVersion"
if ($cudaVersion -eq '11.8.0') {
if ($IsLinux) {
$cudaPackages = @(
"${cudaChannel}::cuda-nvcc_linux-64=11.8.0",
"${cudaChannel}::cuda-cccl=11.8.89",
"${cudaChannel}::cuda-cudart=11.8.89",
"${cudaChannel}::cuda-cudart-dev=11.8.89",
"${cudaChannel}::cuda-driver-dev=11.8.89",
"${cudaChannel}::libcublas=11.11.3.6",
"${cudaChannel}::libcublas-dev=11.11.3.6"
)
} elseif ($IsWindows) {
$cudaPackages = @(
"${cudaChannel}::cuda-nvcc_win-64=11.8.0",
"${cudaChannel}::cuda-cccl=11.8.89",
"${cudaChannel}::cuda-cudart=11.8.89",
"${cudaChannel}::cuda-cudart-dev=11.8.89",
"${cudaChannel}::libcublas=11.11.3.6",
"${cudaChannel}::libcublas-dev=11.11.3.6"
)
} else {
throw 'Unsupported CUDA wheel build platform'
}
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel $cudaPackages
} elseif ($IsLinux) {
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
} elseif ($IsWindows) {
if ($cudaVersion -like '12.5.*' -or [version]$cudaVersion -ge [version]"13.0") {
# The Windows 12.5+ toolkit meta-package pulls compiler activation
# scripts that overflow cmd.exe after MSVC is already initialized.
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-libraries-dev=$cudaVersion" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
} else {
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
}
} else {
throw 'Unsupported CUDA wheel build platform'
}
if ($LASTEXITCODE -ne 0) {
exit $LASTEXITCODE
}
if ($IsWindows) {
python -m pip install build wheel ninja
} else {
sudo apt-get update
sudo apt-get install -y patchelf
python -m pip install auditwheel build wheel
}
- name: Build Wheel
run: |
$pathSeparator = if ($IsWindows) { ';' } else { ':' }
if ($IsWindows) {
$cudaRoot = Join-Path $env:CONDA_PREFIX 'Library'
} elseif (Test-Path (Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/include/cuda_runtime.h')) {
$cudaRoot = Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux'
} else {
$cudaRoot = $env:CONDA_PREFIX
}
$env:CUDA_PATH = $cudaRoot
$env:CUDA_HOME = $cudaRoot
$env:CUDAToolkit_ROOT = $cudaRoot
$env:CUDA_TOOLKIT_ROOT_DIR = $cudaRoot
$cudaHostCompilerArg = ''
$cudaRootCmake = $cudaRoot.Replace('\', '/')
$env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRootCmake -DCUDA_TOOLKIT_ROOT_DIR=$cudaRootCmake"
if ($IsLinux) {
if ([version]$env:CUDAVER -lt [version]"12.0" -and (Test-Path '/usr/bin/g++-11')) {
$env:CC = '/usr/bin/gcc-11'
$env:CXX = '/usr/bin/g++-11'
$env:CUDAHOSTCXX = '/usr/bin/g++-11'
$cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX"
} elseif (Test-Path '/usr/bin/g++-12') {
$env:CC = '/usr/bin/gcc-12'
$env:CXX = '/usr/bin/g++-12'
$env:CUDAHOSTCXX = '/usr/bin/g++-12'
$cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX"
}
$env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRoot -DCUDA_TOOLKIT_ROOT_DIR=$cudaRoot$cudaHostCompilerArg"
$env:CPATH = "$cudaRoot/include$pathSeparator$env:CPATH"
$env:CPLUS_INCLUDE_PATH = "$cudaRoot/include$pathSeparator$env:CPLUS_INCLUDE_PATH"
$env:LIBRARY_PATH = "$cudaRoot/lib$pathSeparator$env:CONDA_PREFIX/lib$pathSeparator$env:LIBRARY_PATH"
$env:LD_LIBRARY_PATH = "$cudaRoot/lib$pathSeparator$env:CONDA_PREFIX/lib$pathSeparator$env:LD_LIBRARY_PATH"
$cudaLibraryPaths = @(
(Join-Path $cudaRoot 'lib'),
(Join-Path $cudaRoot 'lib64'),
(Join-Path $env:CONDA_PREFIX 'lib')
) | Where-Object { Test-Path $_ }
Write-Output "CUDA_LIBRARY_PATHS=$($cudaLibraryPaths -join ':')" >> $env:GITHUB_ENV
} elseif ($IsWindows) {
$ninjaPath = ((Get-Command ninja -ErrorAction Stop).Source).Replace('\', '/')
$env:CMAKE_GENERATOR = 'Ninja'
$env:CMAKE_MAKE_PROGRAM = $ninjaPath
$env:PATH = "$(Join-Path $cudaRoot 'bin')$pathSeparator$env:PATH"
}
if ($IsWindows) {
$nvccCandidates = @(
(Join-Path $cudaRoot 'bin\nvcc.exe'),
(Join-Path $env:CONDA_PREFIX 'Library\bin\nvcc.exe'),
(Join-Path $env:CONDA_PREFIX 'bin\nvcc.exe')
)
} else {
$nvccCandidates = @(
(Join-Path $env:CONDA_PREFIX 'bin/nvcc'),
(Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/bin/nvcc')
)
}
$nvccPath = $nvccCandidates | Where-Object { Test-Path $_ } | Select-Object -First 1
if (-not $nvccPath) {
throw 'Failed to find nvcc in the conda environment'
}
$env:CUDACXX = $nvccPath
$env:PATH = "$(Split-Path $nvccPath)$pathSeparator$env:PATH"
if ($IsWindows) {
$nvccPathCmake = $nvccPath.Replace('\', '/')
$env:CUDACXX = $nvccPathCmake
$env:CMAKE_ARGS = "-DCMAKE_CUDA_COMPILER=$nvccPathCmake -DCMAKE_CUDA_COMPILER_ARG1=-allow-unsupported-compiler -DCMAKE_MAKE_PROGRAM=$env:CMAKE_MAKE_PROGRAM $env:CMAKE_ARGS"
}
$nvccVersion = ((& $nvccPath --version) | Select-String 'release ([0-9]+\.[0-9]+)').Matches[0].Groups[1].Value
if (-not $nvccVersion) {
throw 'Failed to detect the installed CUDA toolkit version'
}
$cudaTagVersion = $nvccVersion.Replace('.','')
$env:VERBOSE = '1'
$cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual"
if ([version]$nvccVersion -lt [version]"12.0") {
# CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls.
$cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real"
} elseif ([version]$nvccVersion -ge [version]"13.0") {
# CUDA 13 dropped offline compilation support for pre-Turing targets.
$cudaArchs = "75-real;80-real;86-real;89-real;90-real;90-virtual"
}
# Build real cubins for the supported GPUs and keep
# one forward-compatible PTX target instead of embedding PTX for every
# SM. This keeps the wheel under GitHub's 2 GiB release-asset limit.
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
if ($IsLinux) {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_OPENMP=OFF'
}
python -m build --wheel
# Publish tags that reflect the actual installed toolkit version.
Write-Output "CUDA_VERSION=$cudaTagVersion" >> $env:GITHUB_ENV
- name: Repair Linux wheel
if: runner.os == 'Linux'
shell: bash
run: |
set -euxo pipefail
mkdir -p wheelhouse
export LD_LIBRARY_PATH="$PWD/llama_cpp/lib:${CUDA_LIBRARY_PATHS}:${LD_LIBRARY_PATH:-}"
auditwheel_bin="${CONDA}/envs/llamacpp/bin/auditwheel"
"${auditwheel_bin}" repair \
--exclude libcuda.so \
--exclude libcuda.so.1 \
--exclude libcudart.so.11.0 \
--exclude libcudart.so.12 \
--exclude libcudart.so.13 \
--exclude libcublas.so.11 \
--exclude libcublas.so.12 \
--exclude libcublas.so.13 \
--exclude libcublasLt.so.11 \
--exclude libcublasLt.so.12 \
--exclude libcublasLt.so.13 \
-w wheelhouse \
dist/*.whl
rm dist/*.whl
cp wheelhouse/*.whl dist/
"${auditwheel_bin}" show dist/*.whl
- uses: softprops/action-gh-release@v3
if: startsWith(github.ref, 'refs/tags/') || (github.event_name == 'workflow_dispatch' && inputs.release_tag != '')
with:
files: dist/*
# Set tag_name to <tag>-cu<cuda_version>.
tag_name: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }}-cu${{ env.CUDA_VERSION }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}