diff --git a/.github/workflows/mannu_build.yml b/.github/workflows/mannu_build.yml index 9400fed..6be3fcc 100644 --- a/.github/workflows/mannu_build.yml +++ b/.github/workflows/mannu_build.yml @@ -21,7 +21,7 @@ jobs: strategy: matrix: - os: [ubuntu-20.04, windows-latest] + os: [ubuntu-22.04, windows-latest] pyver: ["3.10"] cuda: ["12.6.0"] defaults: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 84d0ae6..492f125 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -43,7 +43,7 @@ jobs: - name: Install CUDA ${{ matrix.cuda-version }} if: github.event.pull_request.merged == true run: | - bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ubuntu-20.04 + bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ubuntu-22.04 - name: Install PyTorch 2.2.2 with CUDA ${{ matrix.cuda-version }} if: github.event.pull_request.merged == true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 494d004..9e2a216 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,6 +4,7 @@ on: push: tags: - "v*" + workflow_dispatch: jobs: build_wheels: @@ -12,10 +13,11 @@ jobs: # needs: release strategy: + fail-fast: false matrix: - os: [ubuntu-20.04, windows-latest] - pyver: ["3.10", "3.11", "3.12"] - cuda: ["12.4.1"] + os: [ubuntu-22.04, windows-latest] + pyver: ["3.11", "3.12", "3.13"] + cuda: ["13.0.0"] defaults: run: shell: pwsh @@ -36,14 +38,18 @@ jobs: docker-images: true swap-storage: false - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.pyver }} + - name: Setup MSVC + if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + - name: Setup Mamba - uses: conda-incubator/setup-miniconda@v2.2.0 + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: "build" python-version: ${{ matrix.pyver }} @@ -56,7 +62,7 @@ jobs: - name: Install Dependencies run: | # Install CUDA toolkit - mamba install -y 'cuda' -c "nvidia/label/cuda-${env:CUDA_VERSION}" + mamba install -y "cuda=${env:CUDA_VERSION}" "cuda-cudart-dev" "cuda-nvcc" "cuda-cccl" -c "nvidia/label/cuda-${env:CUDA_VERSION}" # Env variables $env:CUDA_PATH = $env:CONDA_PREFIX $env:CUDA_HOME = $env:CONDA_PREFIX @@ -64,7 +70,7 @@ jobs: # Install torch $cudaVersion = $env:CUDA_VERSION.Replace('.', '') $cudaVersionPytorch = $cudaVersion.Substring(0, $cudaVersion.Length - 1) - if ([int]$cudaVersionPytorch -gt 121) { $pytorchVersion = "torch==2.5.1" } else {$pytorchVersion = "torch==2.4.1"} + if ([int]$cudaVersionPytorch -gt 129) { $pytorchVersion = "torch==2.10.0" } elseif ([int]$cudaVersionPytorch -gt 121) { $pytorchVersion = "torch==2.5.1" } else {$pytorchVersion = "torch==2.4.1"} echo "pytorchVersion=$pytorchVersion" echo "cudaVersion=<$cudaVersion>" echo "cudaVersionPytorch=$cudaVersionPytorch" @@ -76,20 +82,65 @@ jobs: python -c "import torch; print('CUDA:', torch.version.cuda)" python -c "import os; print('CUDA_HOME:', os.getenv('CUDA_HOME', None))" python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" - - name: Build Wheel + - name: Build Wheel (Linux) + if: runner.os == 'Linux' + shell: bash -el {0} + run: | + export CUDA_PATH=$CONDA_PREFIX + export CUDA_HOME=$CONDA_PREFIX + + echo "CONDA_PREFIX=$CONDA_PREFIX" + + # Find thrust include dir + THRUST_H=$(find $CONDA_PREFIX -path '*/thrust/complex.h' 2>/dev/null | head -1) + if [ -n "$THRUST_H" ]; then + CUDA_INCLUDE=$(dirname $(dirname "$THRUST_H")) + echo "Found thrust in: $CUDA_INCLUDE" + export CXXFLAGS="-I$CUDA_INCLUDE" + export CFLAGS="-I$CUDA_INCLUDE" + CUDA_BASE=$(dirname "$CUDA_INCLUDE") + if [ -f "$CUDA_BASE/bin/nvcc" ]; then + export CUDA_HOME=$CUDA_BASE + export CUDA_PATH=$CUDA_BASE + fi + fi + echo "CUDA_HOME=$CUDA_HOME" + echo "CXXFLAGS=$CXXFLAGS" + + export GENERAL_TORCH=1 + python setup.py sdist bdist_wheel -k --plat-name manylinux2014_x86_64 + ls dist/*.whl + + - name: Build Wheel (Windows) + if: runner.os == 'Windows' run: | $env:CUDA_PATH = $env:CONDA_PREFIX $env:CUDA_HOME = $env:CONDA_PREFIX - # Only add +cu118 to wheel if not releasing on PyPi + + # Find nvcc.exe and set CUDA_HOME + $nvcc = Get-ChildItem -Path $env:CONDA_PREFIX -Recurse -Filter "nvcc.exe" -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($nvcc) { + $cudaDir = Split-Path (Split-Path $nvcc.FullName) + echo "Setting CUDA_HOME to $cudaDir" + $env:CUDA_HOME = $cudaDir + $env:CUDA_PATH = $cudaDir + } + + # Patch PyTorch compiled_autograd.h for MSVC + CUDA 13 std:: ambiguity + # See https://github.com/pytorch/pytorch/pull/144707#issuecomment-2692282551 + $header = python -c "import torch; import os; print(os.path.join(os.path.dirname(torch.__file__), 'include', 'torch', 'csrc', 'dynamo', 'compiled_autograd.h'))" + if (Test-Path $header) { + (Get-Content $header) -replace '\} else if constexpr \(::std::is_same_v\) \{', '// } else if constexpr (::std::is_same_v) {' -replace 'return at::StringType::get\(\);', '// return at::StringType::get();' | Set-Content $header + echo "Patched compiled_autograd.h" + } + if ( $env:CUDA_VERSION -eq $env:PYPI_CUDA_VERSION ){ $env:PYPI_BUILD = 1 } - # echo "{CUDA_VERSION}=$env:CUDA_VERSION" >> $GITHUB_ENV - $env:GENERAL_TORCH = 1 # OptionalCUDAGuard - python setup.py sdist bdist_wheel -k $env:PLAT_ARG.split() + $env:GENERAL_TORCH = 1 + $env:DISTUTILS_USE_SDK = 1 + python setup.py sdist bdist_wheel -k --plat-name win_amd64 ls dist/*.whl - env: - PLAT_ARG: ${{ contains(runner.os, 'Linux') && '--plat-name manylinux2014_x86_64' || '--plat-name win_amd64' }} - uses: actions/upload-artifact@v4 with: @@ -102,11 +153,12 @@ jobs: name: Create Release runs-on: ubuntu-latest needs: build_wheels + if: startsWith(github.ref, 'refs/tags/') outputs: upload_url: ${{ steps.create_release.outputs.upload_url }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Extract branch info shell: bash diff --git a/pyproject.toml b/pyproject.toml index 8a3d653..0b671c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools==69.5.1", "wheel", "packaging", "ninja>=1.11.1", "torch==2.2.2"] +requires = ["setuptools==69.5.1", "wheel", "packaging", "ninja>=1.11.1", "torch==2.10.0"] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index bbc4f55..9c0513e 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ from packaging.version import parse, Version import setuptools -from torch.utils.cpp_extension import BuildExtension, CUDA_HOME, CUDAExtension +from torch.utils.cpp_extension import BuildExtension, CUDA_HOME, CUDAExtension ROOT_DIR = os.path.dirname(__file__) @@ -72,15 +72,18 @@ def get_nvcc_cuda_version(cuda_dir: str = "") -> Version: def get_compute_capabilities(compute_capabilities: Set[int], lower: int = 70): # Collect the compute capabilities of all available GPUs. if len(compute_capabilities) == 0 and (is_pypi_build() or not torch.cuda.is_available()): - if lower <= 70: + nvcc_cuda_version = get_nvcc_cuda_version() + if lower <= 70 and nvcc_cuda_version < Version("13.0"): compute_capabilities.add(70) if lower <= 75: compute_capabilities.add(75) compute_capabilities.add(80) compute_capabilities.add(86) compute_capabilities.add(89) - - if len(compute_capabilities) == 0: + compute_capabilities.add(90) + if nvcc_cuda_version >= Version("12.8"): + compute_capabilities.add(100) + compute_capabilities.add(120) for i in range(torch.cuda.device_count()): major, minor = torch.cuda.get_device_capability(i) if major*10+minor < lower: @@ -88,15 +91,19 @@ def get_compute_capabilities(compute_capabilities: Set[int], lower: int = 70): compute_capabilities.add(major * 10 + minor) if len(compute_capabilities) == 0: - compute_capabilities.add(70) + nvcc_cuda_version = get_nvcc_cuda_version() + if nvcc_cuda_version < Version("13.0"): + compute_capabilities.add(70) compute_capabilities.add(75) compute_capabilities.add(80) - nvcc_cuda_version = get_nvcc_cuda_version() if nvcc_cuda_version > Version("11.1"): compute_capabilities.add(86) if nvcc_cuda_version > Version("11.8"): compute_capabilities.add(89) compute_capabilities.add(90) + if nvcc_cuda_version >= Version("12.8"): + compute_capabilities.add(100) + compute_capabilities.add(120) print(f"build pacakge for archs: {compute_capabilities}") capability_flags = [] @@ -209,7 +216,6 @@ def get_gpu_ver(): "Documentation": "https://github.com/wejoncy/QLLM", }, classifiers=[ - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", @@ -217,7 +223,7 @@ def get_gpu_ver(): "Topic :: Scientific/Engineering :: Artificial Intelligence", ], packages=setuptools.find_packages(exclude=("")), - python_requires=">=3.10", + python_requires=">=3.11", install_requires=get_requirements(), ext_modules=build_cuda_extensions(), cmdclass={'build_ext': BuildExtension},