|
24 | 24 | # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic, |
25 | 25 | # so one builder per toolkit version is sufficient. |
26 | 26 | 'pyver' = @("3.9") |
27 | | - 'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1") |
| 27 | + 'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1") |
28 | 28 | 'releasetag' = @("basic") |
29 | 29 | 'exclude' = @( |
30 | 30 | @{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' }, |
|
50 | 50 | AVXVER: ${{ matrix.releasetag }} |
51 | 51 |
|
52 | 52 | steps: |
| 53 | + - name: Set up MSVC for CUDA 11.8 |
| 54 | + if: runner.os == 'Windows' && matrix.cuda == '11.8.0' |
| 55 | + uses: ilammy/msvc-dev-cmd@v1 |
| 56 | + with: |
| 57 | + arch: x64 |
| 58 | + toolset: 14.29 |
| 59 | + |
53 | 60 | - name: Set up MSVC |
54 | | - if: runner.os == 'Windows' |
| 61 | + if: runner.os == 'Windows' && matrix.cuda != '11.8.0' |
55 | 62 | uses: ilammy/msvc-dev-cmd@v1 |
56 | 63 | with: |
57 | 64 | arch: x64 |
|
81 | 88 | run: | |
82 | 89 | $cudaVersion = $env:CUDAVER |
83 | 90 | $cudaChannel = "nvidia/label/cuda-$cudaVersion" |
84 | | - if ($IsLinux) { |
| 91 | + if ($cudaVersion -eq '11.8.0') { |
| 92 | + if ($IsLinux) { |
| 93 | + $cudaPackages = @( |
| 94 | + "${cudaChannel}::cuda-nvcc_linux-64=11.8.0", |
| 95 | + "${cudaChannel}::cuda-cccl=11.8.89", |
| 96 | + "${cudaChannel}::cuda-cudart=11.8.89", |
| 97 | + "${cudaChannel}::cuda-cudart-dev=11.8.89", |
| 98 | + "${cudaChannel}::cuda-driver-dev=11.8.89", |
| 99 | + "${cudaChannel}::libcublas=11.11.3.6", |
| 100 | + "${cudaChannel}::libcublas-dev=11.11.3.6" |
| 101 | + ) |
| 102 | + } elseif ($IsWindows) { |
| 103 | + $cudaPackages = @( |
| 104 | + "${cudaChannel}::cuda-nvcc_win-64=11.8.0", |
| 105 | + "${cudaChannel}::cuda-cccl=11.8.89", |
| 106 | + "${cudaChannel}::cuda-cudart=11.8.89", |
| 107 | + "${cudaChannel}::cuda-cudart-dev=11.8.89", |
| 108 | + "${cudaChannel}::libcublas=11.11.3.6", |
| 109 | + "${cudaChannel}::libcublas-dev=11.11.3.6" |
| 110 | + ) |
| 111 | + } else { |
| 112 | + throw 'Unsupported CUDA wheel build platform' |
| 113 | + } |
| 114 | + mamba install -y --channel-priority flexible --override-channels -c $cudaChannel $cudaPackages |
| 115 | + } elseif ($IsLinux) { |
85 | 116 | mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev" |
86 | 117 | } elseif ($IsWindows) { |
87 | 118 | if ($cudaVersion -like '12.5.*') { |
@@ -122,7 +153,12 @@ jobs: |
122 | 153 | $cudaRootCmake = $cudaRoot.Replace('\', '/') |
123 | 154 | $env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRootCmake -DCUDA_TOOLKIT_ROOT_DIR=$cudaRootCmake" |
124 | 155 | if ($IsLinux) { |
125 | | - if (Test-Path '/usr/bin/g++-12') { |
| 156 | + if ([version]$env:CUDAVER -lt [version]"12.0" -and (Test-Path '/usr/bin/g++-11')) { |
| 157 | + $env:CC = '/usr/bin/gcc-11' |
| 158 | + $env:CXX = '/usr/bin/g++-11' |
| 159 | + $env:CUDAHOSTCXX = '/usr/bin/g++-11' |
| 160 | + $cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX" |
| 161 | + } elseif (Test-Path '/usr/bin/g++-12') { |
126 | 162 | $env:CC = '/usr/bin/gcc-12' |
127 | 163 | $env:CXX = '/usr/bin/g++-12' |
128 | 164 | $env:CUDAHOSTCXX = '/usr/bin/g++-12' |
@@ -169,10 +205,15 @@ jobs: |
169 | 205 | } |
170 | 206 | $cudaTagVersion = $nvccVersion.Replace('.','') |
171 | 207 | $env:VERBOSE = '1' |
| 208 | + $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual" |
| 209 | + if ([version]$nvccVersion -lt [version]"12.0") { |
| 210 | + # CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls. |
| 211 | + $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real" |
| 212 | + } |
172 | 213 | # Build real cubins for the supported GPUs, including Pascal, and keep |
173 | 214 | # one forward-compatible PTX target instead of embedding PTX for every |
174 | 215 | # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit. |
175 | | - $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS" |
| 216 | + $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS" |
176 | 217 | $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off' |
177 | 218 | python -m build --wheel |
178 | 219 | # Publish tags that reflect the actual installed toolkit version. |
|
0 commit comments