Skip to content

Commit 43c92a7

Browse files
authored
feat(ci): add CUDA 11.8 wheel builds (abetlen#2238)
* feat(ci): add CUDA 11.8 wheel builds * fix(ci): make CUDA 11.8 wheel builds version-consistent * fix(ci): allow non-CUDA dependencies for CUDA 11.8 wheels * fix(ci): omit Hopper targets from CUDA 11.8 wheels * fix(ci): use GCC 11 for CUDA 11.8 Linux wheels * fix(ci): use MSVC 14.29 for CUDA 11.8 Windows wheels
1 parent c7af423 commit 43c92a7

3 files changed

Lines changed: 50 additions & 7 deletions

File tree

.github/workflows/build-wheels-cuda.yaml

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
# wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic,
2525
# so one builder per toolkit version is sufficient.
2626
'pyver' = @("3.9")
27-
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1")
27+
'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1")
2828
'releasetag' = @("basic")
2929
'exclude' = @(
3030
@{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' },
@@ -50,8 +50,15 @@ jobs:
5050
AVXVER: ${{ matrix.releasetag }}
5151

5252
steps:
53+
- name: Set up MSVC for CUDA 11.8
54+
if: runner.os == 'Windows' && matrix.cuda == '11.8.0'
55+
uses: ilammy/msvc-dev-cmd@v1
56+
with:
57+
arch: x64
58+
toolset: 14.29
59+
5360
- name: Set up MSVC
54-
if: runner.os == 'Windows'
61+
if: runner.os == 'Windows' && matrix.cuda != '11.8.0'
5562
uses: ilammy/msvc-dev-cmd@v1
5663
with:
5764
arch: x64
@@ -81,7 +88,31 @@ jobs:
8188
run: |
8289
$cudaVersion = $env:CUDAVER
8390
$cudaChannel = "nvidia/label/cuda-$cudaVersion"
84-
if ($IsLinux) {
91+
if ($cudaVersion -eq '11.8.0') {
92+
if ($IsLinux) {
93+
$cudaPackages = @(
94+
"${cudaChannel}::cuda-nvcc_linux-64=11.8.0",
95+
"${cudaChannel}::cuda-cccl=11.8.89",
96+
"${cudaChannel}::cuda-cudart=11.8.89",
97+
"${cudaChannel}::cuda-cudart-dev=11.8.89",
98+
"${cudaChannel}::cuda-driver-dev=11.8.89",
99+
"${cudaChannel}::libcublas=11.11.3.6",
100+
"${cudaChannel}::libcublas-dev=11.11.3.6"
101+
)
102+
} elseif ($IsWindows) {
103+
$cudaPackages = @(
104+
"${cudaChannel}::cuda-nvcc_win-64=11.8.0",
105+
"${cudaChannel}::cuda-cccl=11.8.89",
106+
"${cudaChannel}::cuda-cudart=11.8.89",
107+
"${cudaChannel}::cuda-cudart-dev=11.8.89",
108+
"${cudaChannel}::libcublas=11.11.3.6",
109+
"${cudaChannel}::libcublas-dev=11.11.3.6"
110+
)
111+
} else {
112+
throw 'Unsupported CUDA wheel build platform'
113+
}
114+
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel $cudaPackages
115+
} elseif ($IsLinux) {
85116
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
86117
} elseif ($IsWindows) {
87118
if ($cudaVersion -like '12.5.*') {
@@ -122,7 +153,12 @@ jobs:
122153
$cudaRootCmake = $cudaRoot.Replace('\', '/')
123154
$env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRootCmake -DCUDA_TOOLKIT_ROOT_DIR=$cudaRootCmake"
124155
if ($IsLinux) {
125-
if (Test-Path '/usr/bin/g++-12') {
156+
if ([version]$env:CUDAVER -lt [version]"12.0" -and (Test-Path '/usr/bin/g++-11')) {
157+
$env:CC = '/usr/bin/gcc-11'
158+
$env:CXX = '/usr/bin/g++-11'
159+
$env:CUDAHOSTCXX = '/usr/bin/g++-11'
160+
$cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX"
161+
} elseif (Test-Path '/usr/bin/g++-12') {
126162
$env:CC = '/usr/bin/gcc-12'
127163
$env:CXX = '/usr/bin/g++-12'
128164
$env:CUDAHOSTCXX = '/usr/bin/g++-12'
@@ -169,10 +205,15 @@ jobs:
169205
}
170206
$cudaTagVersion = $nvccVersion.Replace('.','')
171207
$env:VERBOSE = '1'
208+
$cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual"
209+
if ([version]$nvccVersion -lt [version]"12.0") {
210+
# CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls.
211+
$cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real"
212+
}
172213
# Build real cubins for the supported GPUs, including Pascal, and keep
173214
# one forward-compatible PTX target instead of embedding PTX for every
174215
# SM. This keeps the wheel under GitHub's 2 GiB release-asset limit.
175-
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
216+
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
176217
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
177218
python -m build --wheel
178219
# Publish tags that reflect the actual installed toolkit version.

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
- feat(ci): add CUDA 11.8 wheel builds by @abetlen in #2238
1011
- fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237
1112

1213
## [0.3.24]

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
125125

126126
It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements:
127127

128-
- CUDA Version is 12.1, 12.2, 12.3, 12.4 or 12.5
129-
- NVIDIA GPU compute capability is 6.0 or newer
128+
- CUDA Version is 11.8, 12.1, 12.2, 12.3, 12.4 or 12.5
129+
- NVIDIA GPU compute capability is 6.0 through 8.9 for CUDA 11.8 wheels, or 6.0 or newer for CUDA 12 wheels
130130
- Python Version is 3.10, 3.11 or 3.12
131131

132132
```bash
@@ -135,6 +135,7 @@ pip install llama-cpp-python \
135135
```
136136

137137
Where `<cuda-version>` is one of the following:
138+
- `cu118`: CUDA 11.8
138139
- `cu121`: CUDA 12.1
139140
- `cu122`: CUDA 12.2
140141
- `cu123`: CUDA 12.3

0 commit comments

Comments
 (0)