diff --git a/.github/workflows/mannu_build.yml b/.github/workflows/mannu_build.yml
index 9400fed..6be3fcc 100644
--- a/.github/workflows/mannu_build.yml
+++ b/.github/workflows/mannu_build.yml
@@ -21,7 +21,7 @@ jobs:
       
       strategy:
         matrix:
-          os: [ubuntu-20.04, windows-latest]
+          os: [ubuntu-22.04, windows-latest]
           pyver: ["3.10"]
           cuda: ["12.6.0"]
       defaults:
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 84d0ae6..492f125 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -43,7 +43,7 @@ jobs:
     - name: Install CUDA ${{ matrix.cuda-version }}
       if: github.event.pull_request.merged == true
       run: |
-        bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ubuntu-20.04
+        bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ubuntu-22.04
 
     - name: Install PyTorch 2.2.2 with CUDA ${{ matrix.cuda-version }}
       if: github.event.pull_request.merged == true
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 494d004..9e2a216 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -4,6 +4,7 @@ on:
   push:
     tags:
       - "v*"
+  workflow_dispatch:
 
 jobs:
   build_wheels:
@@ -12,10 +13,11 @@ jobs:
     # needs: release
     
     strategy:
+      fail-fast: false
       matrix:
-        os: [ubuntu-20.04, windows-latest]
-        pyver: ["3.10", "3.11", "3.12"]
-        cuda: ["12.4.1"]
+        os: [ubuntu-22.04, windows-latest]
+        pyver: ["3.11", "3.12", "3.13"]
+        cuda: ["13.0.0"]
     defaults:
       run:
         shell: pwsh
@@ -36,14 +38,18 @@ jobs:
           docker-images: true
           swap-storage: false
         
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v3
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.pyver }}
 
+      - name: Setup MSVC
+        if: runner.os == 'Windows'
+        uses: ilammy/msvc-dev-cmd@v1
+
       - name: Setup Mamba
-        uses: conda-incubator/setup-miniconda@v2.2.0
+        uses: conda-incubator/setup-miniconda@v3
         with:
           activate-environment: "build"
           python-version: ${{ matrix.pyver }}
@@ -56,7 +62,7 @@ jobs:
       - name: Install Dependencies
         run: |
           # Install CUDA toolkit
-          mamba install -y 'cuda' -c "nvidia/label/cuda-${env:CUDA_VERSION}"
+          mamba install -y "cuda=${env:CUDA_VERSION}" "cuda-cudart-dev" "cuda-nvcc" "cuda-cccl" -c "nvidia/label/cuda-${env:CUDA_VERSION}"
           # Env variables
           $env:CUDA_PATH = $env:CONDA_PREFIX
           $env:CUDA_HOME = $env:CONDA_PREFIX
@@ -64,7 +70,7 @@ jobs:
           # Install torch
           $cudaVersion = $env:CUDA_VERSION.Replace('.', '')
           $cudaVersionPytorch = $cudaVersion.Substring(0, $cudaVersion.Length - 1)
-          if ([int]$cudaVersionPytorch -gt 121) { $pytorchVersion = "torch==2.5.1" } else {$pytorchVersion = "torch==2.4.1"}
+          if ([int]$cudaVersionPytorch -gt 129) { $pytorchVersion = "torch==2.10.0" } elseif ([int]$cudaVersionPytorch -gt 121) { $pytorchVersion = "torch==2.5.1" } else {$pytorchVersion = "torch==2.4.1"}
           echo "pytorchVersion=$pytorchVersion"
           echo "cudaVersion=<$cudaVersion>"
           echo "cudaVersionPytorch=$cudaVersionPytorch"
@@ -76,20 +82,65 @@ jobs:
           python -c "import torch; print('CUDA:', torch.version.cuda)"
           python -c "import os; print('CUDA_HOME:', os.getenv('CUDA_HOME', None))"
           python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
-      - name: Build Wheel
+      - name: Build Wheel (Linux)
+        if: runner.os == 'Linux'
+        shell: bash -el {0}
+        run: |
+          export CUDA_PATH=$CONDA_PREFIX
+          export CUDA_HOME=$CONDA_PREFIX
+          
+          echo "CONDA_PREFIX=$CONDA_PREFIX"
+          
+          # Find thrust include dir
+          THRUST_H=$(find $CONDA_PREFIX -path '*/thrust/complex.h' 2>/dev/null | head -1)
+          if [ -n "$THRUST_H" ]; then
+            CUDA_INCLUDE=$(dirname $(dirname "$THRUST_H"))
+            echo "Found thrust in: $CUDA_INCLUDE"
+            export CXXFLAGS="-I$CUDA_INCLUDE"
+            export CFLAGS="-I$CUDA_INCLUDE"
+            CUDA_BASE=$(dirname "$CUDA_INCLUDE")
+            if [ -f "$CUDA_BASE/bin/nvcc" ]; then
+              export CUDA_HOME=$CUDA_BASE
+              export CUDA_PATH=$CUDA_BASE
+            fi
+          fi
+          echo "CUDA_HOME=$CUDA_HOME"
+          echo "CXXFLAGS=$CXXFLAGS"
+          
+          export GENERAL_TORCH=1
+          python setup.py sdist bdist_wheel -k --plat-name manylinux2014_x86_64
+          ls dist/*.whl
+
+      - name: Build Wheel (Windows)
+        if: runner.os == 'Windows'
         run: |
           $env:CUDA_PATH = $env:CONDA_PREFIX
           $env:CUDA_HOME = $env:CONDA_PREFIX
-          # Only add +cu118 to wheel if not releasing on PyPi
+          
+          # Find nvcc.exe and set CUDA_HOME
+          $nvcc = Get-ChildItem -Path $env:CONDA_PREFIX -Recurse -Filter "nvcc.exe" -ErrorAction SilentlyContinue | Select-Object -First 1
+          if ($nvcc) {
+            $cudaDir = Split-Path (Split-Path $nvcc.FullName)
+            echo "Setting CUDA_HOME to $cudaDir"
+            $env:CUDA_HOME = $cudaDir
+            $env:CUDA_PATH = $cudaDir
+          }
+          
+          # Patch PyTorch compiled_autograd.h for MSVC + CUDA 13 std:: ambiguity
+          # See https://github.com/pytorch/pytorch/pull/144707#issuecomment-2692282551
+          $header = python -c "import torch; import os; print(os.path.join(os.path.dirname(torch.__file__), 'include', 'torch', 'csrc', 'dynamo', 'compiled_autograd.h'))"
+          if (Test-Path $header) {
+            (Get-Content $header) -replace '\} else if constexpr \(::std::is_same_v<T, ::std::string>\) \{', '// } else if constexpr (::std::is_same_v<T, ::std::string>) {' -replace 'return at::StringType::get\(\);', '// return at::StringType::get();' | Set-Content $header
+            echo "Patched compiled_autograd.h"
+          }
+          
           if ( $env:CUDA_VERSION -eq $env:PYPI_CUDA_VERSION ){
             $env:PYPI_BUILD = 1
           }
-          # echo "{CUDA_VERSION}=$env:CUDA_VERSION" >> $GITHUB_ENV
-          $env:GENERAL_TORCH = 1 # OptionalCUDAGuard
-          python setup.py sdist bdist_wheel -k $env:PLAT_ARG.split()
+          $env:GENERAL_TORCH = 1
+          $env:DISTUTILS_USE_SDK = 1
+          python setup.py sdist bdist_wheel -k --plat-name win_amd64
           ls dist/*.whl
-        env:
-          PLAT_ARG: ${{ contains(runner.os, 'Linux') && '--plat-name manylinux2014_x86_64' || '--plat-name win_amd64' }}
 
       - uses: actions/upload-artifact@v4
         with:
@@ -102,11 +153,12 @@ jobs:
     name: Create Release	
     runs-on: ubuntu-latest	
     needs: build_wheels
+    if: startsWith(github.ref, 'refs/tags/')
     outputs:	
       upload_url: ${{ steps.create_release.outputs.upload_url }}	
     steps:	
       - name: Checkout	
-        uses: actions/checkout@v3	
+        uses: actions/checkout@v4	
 
       - name: Extract branch info	
         shell: bash	
diff --git a/pyproject.toml b/pyproject.toml
index 8a3d653..0b671c6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools==69.5.1", "wheel", "packaging", "ninja>=1.11.1", "torch==2.2.2"]
+requires = ["setuptools==69.5.1", "wheel", "packaging", "ninja>=1.11.1", "torch==2.10.0"]
 
 build-backend = "setuptools.build_meta"
 
diff --git a/setup.py b/setup.py
index bbc4f55..9c0513e 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 
 from packaging.version import parse, Version
 import setuptools
-from torch.utils.cpp_extension import BuildExtension, CUDA_HOME, CUDAExtension
+from torch.utils.cpp_extension import BuildExtension,  CUDA_HOME, CUDAExtension
 
 ROOT_DIR = os.path.dirname(__file__)
 
@@ -72,15 +72,18 @@ def get_nvcc_cuda_version(cuda_dir: str = "") -> Version:
 def get_compute_capabilities(compute_capabilities: Set[int], lower: int = 70):
     # Collect the compute capabilities of all available GPUs.
     if len(compute_capabilities) == 0 and (is_pypi_build() or not torch.cuda.is_available()):
-        if lower <= 70:
+        nvcc_cuda_version = get_nvcc_cuda_version()
+        if lower <= 70 and nvcc_cuda_version < Version("13.0"):
             compute_capabilities.add(70)
         if lower <= 75:
             compute_capabilities.add(75)
         compute_capabilities.add(80)
         compute_capabilities.add(86)
         compute_capabilities.add(89)
-
-    if len(compute_capabilities) == 0:
+        compute_capabilities.add(90)
+        if nvcc_cuda_version >= Version("12.8"):
+            compute_capabilities.add(100)
+            compute_capabilities.add(120)
         for i in range(torch.cuda.device_count()):
             major, minor = torch.cuda.get_device_capability(i)
             if major*10+minor < lower:
@@ -88,15 +91,19 @@ def get_compute_capabilities(compute_capabilities: Set[int], lower: int = 70):
             compute_capabilities.add(major * 10 + minor)
 
     if len(compute_capabilities) == 0:
-        compute_capabilities.add(70)
+        nvcc_cuda_version = get_nvcc_cuda_version()
+        if nvcc_cuda_version < Version("13.0"):
+            compute_capabilities.add(70)
         compute_capabilities.add(75)
         compute_capabilities.add(80)
-        nvcc_cuda_version = get_nvcc_cuda_version()
         if nvcc_cuda_version > Version("11.1"):
             compute_capabilities.add(86)
         if nvcc_cuda_version > Version("11.8"):
             compute_capabilities.add(89)
             compute_capabilities.add(90)
+        if nvcc_cuda_version >= Version("12.8"):
+            compute_capabilities.add(100)
+            compute_capabilities.add(120)
 
     print(f"build pacakge for archs: {compute_capabilities}")
     capability_flags = []
@@ -209,7 +216,6 @@ def get_gpu_ver():
         "Documentation": "https://github.com/wejoncy/QLLM",
     },
     classifiers=[
-        "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
         "Programming Language :: Python :: 3.12",
         "Programming Language :: Python :: 3.13",
@@ -217,7 +223,7 @@ def get_gpu_ver():
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
     packages=setuptools.find_packages(exclude=("")),
-    python_requires=">=3.10",
+    python_requires=">=3.11",
     install_requires=get_requirements(),
     ext_modules=build_cuda_extensions(),
     cmdclass={'build_ext': BuildExtension},