Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
30e9e92
fix: CI build - ubuntu-22.04, MSVC setup for Windows, update action v…
wejoncy Mar 21, 2026
3c783c3
ci: add workflow_dispatch for manual testing
wejoncy Mar 21, 2026
958cec9
ci: sync MSVC fix to mannu_build.yml
wejoncy Mar 21, 2026
ff276d2
Revert "ci: sync MSVC fix to mannu_build.yml"
wejoncy Mar 21, 2026
bb8a0fa
ci: upgrade CUDA to 12.9.0, PyTorch 2.7.0
wejoncy Mar 21, 2026
f55825e
ci: pin mamba cuda version to prevent mismatch
wejoncy Mar 21, 2026
a915d56
ci: CUDA 13.0.0 + PyTorch 2.9.0
wejoncy Mar 21, 2026
2572422
ci: fix cuda headers (cuda-cudart-dev) and Windows DISTUTILS_USE_SDK
wejoncy Mar 21, 2026
919a290
ci: debug and fix CUDA paths for conda CUDA 13.0
wejoncy Mar 21, 2026
d78842d
ci: add cuda-cccl for thrust/cub headers (CUDA 13)
wejoncy Mar 21, 2026
f5a3ec7
ci: symlink CUDA headers for g++, upgrade to PyTorch 2.10.0
wejoncy Mar 21, 2026
903ffc7
ci: use CXXFLAGS -I for thrust include path, simplify build step
wejoncy Mar 21, 2026
5e8960c
ci: split build into Linux(bash) and Windows(pwsh) to fix shell escaping
wejoncy Mar 21, 2026
c5206ff
ci: fix Linux conda activation (bash -el), Windows use CUDA 12.6 (MSV…
wejoncy Mar 21, 2026
7f19767
fix: skip compute_70 for CUDA >= 13.0 (no longer supported)
wejoncy Mar 21, 2026
45a909b
ci: patch torch compiled_autograd.h for MSVC+CUDA13 std:: bug, unify …
wejoncy Mar 21, 2026
2f7237d
feat: Python 3.11-3.13, add sm_90/sm_100/sm_120 (H100/B200/RTX5090), …
wejoncy Mar 21, 2026
f86780d
cleanup: remove test branch trigger, update Python/GPU arch versions
wejoncy Mar 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/mannu_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

strategy:
matrix:
os: [ubuntu-20.04, windows-latest]
os: [ubuntu-22.04, windows-latest]
pyver: ["3.10"]
cuda: ["12.6.0"]
defaults:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
- name: Install CUDA ${{ matrix.cuda-version }}
if: github.event.pull_request.merged == true
run: |
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ubuntu-20.04
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ubuntu-22.04

- name: Install PyTorch 2.2.2 with CUDA ${{ matrix.cuda-version }}
if: github.event.pull_request.merged == true
Expand Down
84 changes: 68 additions & 16 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
push:
tags:
- "v*"
workflow_dispatch:

jobs:
build_wheels:
Expand All @@ -12,10 +13,11 @@ jobs:
# needs: release

strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, windows-latest]
pyver: ["3.10", "3.11", "3.12"]
cuda: ["12.4.1"]
os: [ubuntu-22.04, windows-latest]
pyver: ["3.11", "3.12", "3.13"]
cuda: ["13.0.0"]
defaults:
run:
shell: pwsh
Expand All @@ -36,14 +38,18 @@ jobs:
docker-images: true
swap-storage: false

- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v3
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.pyver }}

- name: Setup MSVC
if: runner.os == 'Windows'
uses: ilammy/msvc-dev-cmd@v1

- name: Setup Mamba
uses: conda-incubator/setup-miniconda@v2.2.0
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: "build"
python-version: ${{ matrix.pyver }}
Expand All @@ -56,15 +62,15 @@ jobs:
- name: Install Dependencies
run: |
# Install CUDA toolkit
mamba install -y 'cuda' -c "nvidia/label/cuda-${env:CUDA_VERSION}"
mamba install -y "cuda=${env:CUDA_VERSION}" "cuda-cudart-dev" "cuda-nvcc" "cuda-cccl" -c "nvidia/label/cuda-${env:CUDA_VERSION}"
# Env variables
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX

# Install torch
$cudaVersion = $env:CUDA_VERSION.Replace('.', '')
$cudaVersionPytorch = $cudaVersion.Substring(0, $cudaVersion.Length - 1)
if ([int]$cudaVersionPytorch -gt 121) { $pytorchVersion = "torch==2.5.1" } else {$pytorchVersion = "torch==2.4.1"}
if ([int]$cudaVersionPytorch -gt 129) { $pytorchVersion = "torch==2.10.0" } elseif ([int]$cudaVersionPytorch -gt 121) { $pytorchVersion = "torch==2.5.1" } else {$pytorchVersion = "torch==2.4.1"}
echo "pytorchVersion=$pytorchVersion"
echo "cudaVersion=<$cudaVersion>"
echo "cudaVersionPytorch=$cudaVersionPytorch"
Expand All @@ -76,20 +82,65 @@ jobs:
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "import os; print('CUDA_HOME:', os.getenv('CUDA_HOME', None))"
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
- name: Build Wheel
- name: Build Wheel (Linux)
if: runner.os == 'Linux'
shell: bash -el {0}
run: |
export CUDA_PATH=$CONDA_PREFIX
export CUDA_HOME=$CONDA_PREFIX

echo "CONDA_PREFIX=$CONDA_PREFIX"

# Find thrust include dir
THRUST_H=$(find $CONDA_PREFIX -path '*/thrust/complex.h' 2>/dev/null | head -1)
if [ -n "$THRUST_H" ]; then
CUDA_INCLUDE=$(dirname $(dirname "$THRUST_H"))
echo "Found thrust in: $CUDA_INCLUDE"
export CXXFLAGS="-I$CUDA_INCLUDE"
export CFLAGS="-I$CUDA_INCLUDE"
CUDA_BASE=$(dirname "$CUDA_INCLUDE")
if [ -f "$CUDA_BASE/bin/nvcc" ]; then
export CUDA_HOME=$CUDA_BASE
export CUDA_PATH=$CUDA_BASE
fi
fi
echo "CUDA_HOME=$CUDA_HOME"
echo "CXXFLAGS=$CXXFLAGS"

export GENERAL_TORCH=1
python setup.py sdist bdist_wheel -k --plat-name manylinux2014_x86_64
ls dist/*.whl

- name: Build Wheel (Windows)
if: runner.os == 'Windows'
run: |
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
# Only add +cu118 to wheel if not releasing on PyPi

# Find nvcc.exe and set CUDA_HOME
$nvcc = Get-ChildItem -Path $env:CONDA_PREFIX -Recurse -Filter "nvcc.exe" -ErrorAction SilentlyContinue | Select-Object -First 1
if ($nvcc) {
$cudaDir = Split-Path (Split-Path $nvcc.FullName)
echo "Setting CUDA_HOME to $cudaDir"
$env:CUDA_HOME = $cudaDir
$env:CUDA_PATH = $cudaDir
}

# Patch PyTorch compiled_autograd.h for MSVC + CUDA 13 std:: ambiguity
# See https://github.com/pytorch/pytorch/pull/144707#issuecomment-2692282551
$header = python -c "import torch; import os; print(os.path.join(os.path.dirname(torch.__file__), 'include', 'torch', 'csrc', 'dynamo', 'compiled_autograd.h'))"
if (Test-Path $header) {
(Get-Content $header) -replace '\} else if constexpr \(::std::is_same_v<T, ::std::string>\) \{', '// } else if constexpr (::std::is_same_v<T, ::std::string>) {' -replace 'return at::StringType::get\(\);', '// return at::StringType::get();' | Set-Content $header
echo "Patched compiled_autograd.h"
}

if ( $env:CUDA_VERSION -eq $env:PYPI_CUDA_VERSION ){
$env:PYPI_BUILD = 1
}
# echo "{CUDA_VERSION}=$env:CUDA_VERSION" >> $GITHUB_ENV
$env:GENERAL_TORCH = 1 # OptionalCUDAGuard
python setup.py sdist bdist_wheel -k $env:PLAT_ARG.split()
$env:GENERAL_TORCH = 1
$env:DISTUTILS_USE_SDK = 1
python setup.py sdist bdist_wheel -k --plat-name win_amd64
ls dist/*.whl
env:
PLAT_ARG: ${{ contains(runner.os, 'Linux') && '--plat-name manylinux2014_x86_64' || '--plat-name win_amd64' }}

- uses: actions/upload-artifact@v4
with:
Expand All @@ -102,11 +153,12 @@ jobs:
name: Create Release
runs-on: ubuntu-latest
needs: build_wheels
if: startsWith(github.ref, 'refs/tags/')
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Extract branch info
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["setuptools==69.5.1", "wheel", "packaging", "ninja>=1.11.1", "torch==2.2.2"]
requires = ["setuptools==69.5.1", "wheel", "packaging", "ninja>=1.11.1", "torch==2.10.0"]

build-backend = "setuptools.build_meta"

Expand Down
22 changes: 14 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from packaging.version import parse, Version
import setuptools
from torch.utils.cpp_extension import BuildExtension, CUDA_HOME, CUDAExtension
from torch.utils.cpp_extension import BuildExtension, CUDA_HOME, CUDAExtension

ROOT_DIR = os.path.dirname(__file__)

Expand Down Expand Up @@ -72,31 +72,38 @@ def get_nvcc_cuda_version(cuda_dir: str = "") -> Version:
def get_compute_capabilities(compute_capabilities: Set[int], lower: int = 70):
# Collect the compute capabilities of all available GPUs.
if len(compute_capabilities) == 0 and (is_pypi_build() or not torch.cuda.is_available()):
if lower <= 70:
nvcc_cuda_version = get_nvcc_cuda_version()
if lower <= 70 and nvcc_cuda_version < Version("13.0"):
compute_capabilities.add(70)
if lower <= 75:
compute_capabilities.add(75)
compute_capabilities.add(80)
compute_capabilities.add(86)
compute_capabilities.add(89)

if len(compute_capabilities) == 0:
compute_capabilities.add(90)
if nvcc_cuda_version >= Version("12.8"):
compute_capabilities.add(100)
compute_capabilities.add(120)
for i in range(torch.cuda.device_count()):
major, minor = torch.cuda.get_device_capability(i)
if major*10+minor < lower:
raise RuntimeError(f"GPUs with compute capability less than {lower} are not supported.")
compute_capabilities.add(major * 10 + minor)

if len(compute_capabilities) == 0:
compute_capabilities.add(70)
nvcc_cuda_version = get_nvcc_cuda_version()
if nvcc_cuda_version < Version("13.0"):
compute_capabilities.add(70)
compute_capabilities.add(75)
compute_capabilities.add(80)
nvcc_cuda_version = get_nvcc_cuda_version()
if nvcc_cuda_version > Version("11.1"):
compute_capabilities.add(86)
if nvcc_cuda_version > Version("11.8"):
compute_capabilities.add(89)
compute_capabilities.add(90)
if nvcc_cuda_version >= Version("12.8"):
compute_capabilities.add(100)
compute_capabilities.add(120)

print(f"build pacakge for archs: {compute_capabilities}")
capability_flags = []
Expand Down Expand Up @@ -209,15 +216,14 @@ def get_gpu_ver():
"Documentation": "https://github.com/wejoncy/QLLM",
},
classifiers=[
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"License :: OSI Approved :: Apache Software License",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
packages=setuptools.find_packages(exclude=("")),
python_requires=">=3.10",
python_requires=">=3.11",
install_requires=get_requirements(),
ext_modules=build_cuda_extensions(),
cmdclass={'build_ext': BuildExtension},
Expand Down
Loading