|
24 | 24 | # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic, |
25 | 25 | # so one builder per toolkit version is sufficient. |
26 | 26 | 'pyver' = @("3.9") |
27 | | - 'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1") |
| 27 | + 'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "13.0.2", "13.2.1") |
28 | 28 | 'releasetag' = @("basic") |
29 | 29 | 'exclude' = @( |
30 | 30 | @{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' }, |
@@ -115,8 +115,8 @@ jobs: |
115 | 115 | } elseif ($IsLinux) { |
116 | 116 | mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev" |
117 | 117 | } elseif ($IsWindows) { |
118 | | - if ($cudaVersion -like '12.5.*') { |
119 | | - # The Windows 12.5 toolkit meta-package pulls compiler activation |
| 118 | + if ($cudaVersion -like '12.5.*' -or [version]$cudaVersion -ge [version]"13.0") { |
| 119 | + # The Windows 12.5+ toolkit meta-package pulls compiler activation |
120 | 120 | # scripts that overflow cmd.exe after MSVC is already initialized. |
121 | 121 | mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-libraries-dev=$cudaVersion" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev" |
122 | 122 | } else { |
@@ -209,8 +209,11 @@ jobs: |
209 | 209 | if ([version]$nvccVersion -lt [version]"12.0") { |
210 | 210 | # CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls. |
211 | 211 | $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real" |
| 212 | + } elseif ([version]$nvccVersion -ge [version]"13.0") { |
| 213 | + # CUDA 13 dropped offline compilation support for pre-Turing targets. |
| 214 | + $cudaArchs = "75-real;80-real;86-real;89-real;90-real;90-virtual" |
212 | 215 | } |
213 | | - # Build real cubins for the supported GPUs, including Pascal, and keep |
| 216 | + # Build real cubins for the supported GPUs and keep |
214 | 217 | # one forward-compatible PTX target instead of embedding PTX for every |
215 | 218 | # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit. |
216 | 219 | $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS" |
|
0 commit comments