Skip to content

Commit 718a1ca

Browse files
authored
feat(ci): add CUDA 13 wheel builds (abetlen#2239)
1 parent 43c92a7 commit 718a1ca

3 files changed

Lines changed: 12 additions & 6 deletions

File tree

.github/workflows/build-wheels-cuda.yaml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
# wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic,
2525
# so one builder per toolkit version is sufficient.
2626
'pyver' = @("3.9")
27-
'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1")
27+
'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "13.0.2", "13.2.1")
2828
'releasetag' = @("basic")
2929
'exclude' = @(
3030
@{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' },
@@ -115,8 +115,8 @@ jobs:
115115
} elseif ($IsLinux) {
116116
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
117117
} elseif ($IsWindows) {
118-
if ($cudaVersion -like '12.5.*') {
119-
# The Windows 12.5 toolkit meta-package pulls compiler activation
118+
if ($cudaVersion -like '12.5.*' -or [version]$cudaVersion -ge [version]"13.0") {
119+
# The Windows 12.5+ toolkit meta-package pulls compiler activation
120120
# scripts that overflow cmd.exe after MSVC is already initialized.
121121
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-libraries-dev=$cudaVersion" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
122122
} else {
@@ -209,8 +209,11 @@ jobs:
209209
if ([version]$nvccVersion -lt [version]"12.0") {
210210
# CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls.
211211
$cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real"
212+
} elseif ([version]$nvccVersion -ge [version]"13.0") {
213+
# CUDA 13 dropped offline compilation support for pre-Turing targets.
214+
$cudaArchs = "75-real;80-real;86-real;89-real;90-real;90-virtual"
212215
}
213-
# Build real cubins for the supported GPUs, including Pascal, and keep
216+
# Build real cubins for the supported GPUs and keep
214217
# one forward-compatible PTX target instead of embedding PTX for every
215218
# SM. This keeps the wheel under GitHub's 2 GiB release-asset limit.
216219
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
- feat(ci): add CUDA 13.0 and 13.2 wheel builds by @abetlen in #2239
1011
- feat(ci): add CUDA 11.8 wheel builds by @abetlen in #2238
1112
- fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237
1213

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
125125

126126
It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements:
127127

128-
- CUDA Version is 11.8, 12.1, 12.2, 12.3, 12.4 or 12.5
129-
- NVIDIA GPU compute capability is 6.0 through 8.9 for CUDA 11.8 wheels, or 6.0 or newer for CUDA 12 wheels
128+
- CUDA Version is 11.8, 12.1, 12.2, 12.3, 12.4, 12.5, 13.0 or 13.2
129+
- NVIDIA GPU compute capability is 6.0 through 8.9 for CUDA 11.8 wheels, 6.0 or newer for CUDA 12 wheels, or 7.5 or newer for CUDA 13 wheels
130130
- Python Version is 3.10, 3.11 or 3.12
131131

132132
```bash
@@ -141,6 +141,8 @@ Where `<cuda-version>` is one of the following:
141141
- `cu123`: CUDA 12.3
142142
- `cu124`: CUDA 12.4
143143
- `cu125`: CUDA 12.5
144+
- `cu130`: CUDA 13.0
145+
- `cu132`: CUDA 13.2
144146

145147
For example, to install the CUDA 12.1 wheel:
146148

0 commit comments

Comments
 (0)