diff --git a/README.md b/README.md index 1b21b137..bf8160a4 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,13 @@ Python package. This builder is a core component of the larger kernel build/distribution system. +**Torch 2.9 note:** kernel-builder currently builds Torch 2.9 extensions based on +the [fifth release candidate](https://dev-discuss.pytorch.org/t/reminder-calls-for-features-upcoming-branch-cut/3225). +If you upload Torch 2.9 kernels, please validate them against +the final Torch 2.9.0 release. In the unlikely case of an ABI-breaking +change, you can rebuild and upload your kernel once kernel-builder +is updated for the final release + ## 🚀 Quick Start We recommend using [Nix](https://nixos.org/download.html) to build kernels. To speed up builds, first enable the Hugging Face binary cache: diff --git a/build-variants.json b/build-variants.json index 7f74c1a1..eb4e85d6 100644 --- a/build-variants.json +++ b/build-variants.json @@ -2,13 +2,17 @@ "aarch64-darwin": { "metal": [ "torch27-metal-aarch64-darwin", - "torch28-metal-aarch64-darwin" + "torch28-metal-aarch64-darwin", + "torch29-metal-aarch64-darwin" ] }, "aarch64-linux": { "cuda": [ "torch27-cxx11-cu128-aarch64-linux", - "torch28-cxx11-cu129-aarch64-linux" + "torch28-cxx11-cu129-aarch64-linux", + "torch29-cxx11-cu126-aarch64-linux", + "torch29-cxx11-cu128-aarch64-linux", + "torch29-cxx11-cu130-aarch64-linux" ] }, "x86_64-linux": { @@ -18,16 +22,22 @@ "torch27-cxx11-cu128-x86_64-linux", "torch28-cxx11-cu126-x86_64-linux", "torch28-cxx11-cu128-x86_64-linux", - "torch28-cxx11-cu129-x86_64-linux" + "torch28-cxx11-cu129-x86_64-linux", + "torch29-cxx11-cu126-x86_64-linux", + "torch29-cxx11-cu128-x86_64-linux", + "torch29-cxx11-cu130-x86_64-linux" ], "rocm": [ "torch27-cxx11-rocm63-x86_64-linux", "torch28-cxx11-rocm63-x86_64-linux", - "torch28-cxx11-rocm64-x86_64-linux" + "torch28-cxx11-rocm64-x86_64-linux", + "torch29-cxx11-rocm63-x86_64-linux", + "torch29-cxx11-rocm64-x86_64-linux" ], "xpu": [ "torch27-cxx11-xpu20250-x86_64-linux", - "torch28-cxx11-xpu20251-x86_64-linux" + "torch28-cxx11-xpu20251-x86_64-linux", + "torch29-cxx11-xpu20252-x86_64-linux" ] } } diff --git a/build2cmake/src/cuda_supported_archs.json b/build2cmake/src/cuda_supported_archs.json index 41a56d3f..608d15f8 100644 --- a/build2cmake/src/cuda_supported_archs.json +++ b/build2cmake/src/cuda_supported_archs.json @@ -1 +1,14 @@ -["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0"] +[ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0", + "10.0", + "10.1", + "11.0", + "12.0" +] diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/cuda/preamble.cmake index 4abea735..7b6114de 100644 --- a/build2cmake/src/templates/cuda/preamble.cmake +++ b/build2cmake/src/templates/cuda/preamble.cmake @@ -35,6 +35,9 @@ if (NOT TARGET_DEVICE STREQUAL "cuda" AND endif() if(DEFINED CMAKE_CUDA_COMPILER_VERSION AND + CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + set(CUDA_DEFAULT_KERNEL_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;11.0;12.0+PTX") +elseif(DEFINED CMAKE_CUDA_COMPILER_VERSION AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) set(CUDA_DEFAULT_KERNEL_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0+PTX") else() diff --git a/docs/build-variants.md b/docs/build-variants.md index a9363dfd..ed7e3f2b 100644 --- a/docs/build-variants.md +++ b/docs/build-variants.md @@ -9,11 +9,15 @@ available. This list will be updated as new PyTorch versions are released. - `torch27-metal-aarch64-darwin` - `torch28-metal-aarch64-darwin` +- `torch29-metal-aarch64-darwin` ## CUDA aarch64-linux - `torch27-cxx11-cu128-aarch64-linux` - `torch28-cxx11-cu129-aarch64-linux` +- `torch29-cxx11-cu126-aarch64-linux` +- `torch29-cxx11-cu128-aarch64-linux` +- `torch29-cxx11-cu130-aarch64-linux` ## CUDA x86_64-linux @@ -23,17 +27,23 @@ available. This list will be updated as new PyTorch versions are released. - `torch28-cxx11-cu126-x86_64-linux` - `torch28-cxx11-cu128-x86_64-linux` - `torch28-cxx11-cu129-x86_64-linux` +- `torch29-cxx11-cu126-x86_64-linux` +- `torch29-cxx11-cu128-x86_64-linux` +- `torch29-cxx11-cu130-x86_64-linux` ## ROCm x86_64-linux - `torch27-cxx11-rocm63-x86_64-linux` - `torch28-cxx11-rocm63-x86_64-linux` - `torch28-cxx11-rocm64-x86_64-linux` +- `torch29-cxx11-rocm63-x86_64-linux` +- `torch29-cxx11-rocm64-x86_64-linux` ## XPU x86_64-linux - `torch27-cxx11-xpu20250-x86_64-linux` - `torch28-cxx11-xpu20251-x86_64-linux` +- `torch29-cxx11-xpu20252-x86_64-linux` ## Universal diff --git a/docs/nix.md b/docs/nix.md index e70500a8..2e2bc47b 100644 --- a/docs/nix.md +++ b/docs/nix.md @@ -6,6 +6,7 @@ We recommend installing Nix in the following way: - Linux: use the [official Nix installer](https://nixos.org/download/). - macOS: use the [Determinate Nix installer](https://docs.determinate.systems/determinate-nix/). + In addition, Xcode 16.x is currently required to build kernels. ## Getting started diff --git a/flake.lock b/flake.lock index 4e8a118a..47007e10 100644 --- a/flake.lock +++ b/flake.lock @@ -73,11 +73,11 @@ "nixpkgs": "nixpkgs" }, "locked": { - "lastModified": 1759385472, - "narHash": "sha256-a1YMZp3Yc1RJfLIObRKBTTbjMKL91IYbzTjG/HNZN+I=", + "lastModified": 1759493343, + "narHash": "sha256-8fhl0gwMAnOkQbogPIVq+Fha+Yeq52FaRXfwF+F9Q+k=", "owner": "huggingface", "repo": "hf-nix", - "rev": "050dd78a64cb58fb1f9fb29ca498c73107a9a13e", + "rev": "b1fc3a18b52447a0f24bc6884418edc5e66082b9", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 738e7866..0fa09108 100644 --- a/flake.nix +++ b/flake.nix @@ -144,39 +144,35 @@ }) buildSets ); - # Dependencies that should be cached. + # Dependencies that should be cached, the structure of the output + # path is: /- forCache = let filterDist = lib.filter (output: output != "dist"); - # Get all `torch` outputs except for `dist`. Not all outputs - # are dependencies of `out`, but we'll need the `cxxdev` and - # `dev` outputs for kernel builds. - torchOutputs = builtins.listToAttrs ( - lib.flatten ( - # Map over build sets. - map ( - buildSet: - # Map over all outputs of `torch` in a buildset. - map (output: { - name = "${buildVersion buildSet}-${output}"; - value = buildSet.torch.${output}; - }) (filterDist buildSet.torch.outputs) - ) buildSets - ) - ); - oldLinuxStdenvs = builtins.listToAttrs ( - map (buildSet: { - name = "stdenv-${buildVersion buildSet}"; - value = buildSet.pkgs.stdenvGlibc_2_27; - }) buildSets - ); + # Get all outputs except for `dist` (which is the built wheel for Torch). + allOutputs = + drv: + map (output: { + name = "${drv.pname or drv.name}-${output}"; + path = drv.${output}; + }) (filterDist drv.outputs or [ "out" ]); + buildSetOutputs = + buildSet: + with buildSet.pkgs; + ( + allOutputs buildSet.torch + ++ allOutputs build2cmake + ++ allOutputs kernel-abi-check + ++ allOutputs python3Packages.kernels + ++ lib.optionals stdenv.hostPlatform.isLinux (allOutputs stdenvGlibc_2_27) + ); + buildSetLinkFarm = buildSet: pkgs.linkFarm (buildVersion buildSet) (buildSetOutputs buildSet); in pkgs.linkFarm "packages-for-cache" ( - { - inherit build2cmake kernel-abi-check; - } - // torchOutputs - // lib.optionalAttrs nixpkgs.legacyPackages.${system}.stdenv.isLinux oldLinuxStdenvs + map (buildSet: { + name = buildVersion buildSet; + path = buildSetLinkFarm buildSet; + }) buildSets ); }; } diff --git a/lib/torch-extension/default.nix b/lib/torch-extension/default.nix index 4cc583dc..cd23ae27 100644 --- a/lib/torch-extension/default.nix +++ b/lib/torch-extension/default.nix @@ -55,7 +55,7 @@ let onednn-xpu = xpuPackages.onednn-xpu.override { inherit stdenv oneapi-torch-dev; }; # On Darwin, we need the host's xcrun for `xcrun metal` to compile Metal shaders. - # t's not supported by the nixpkgs shim. + # It's not supported by the nixpkgs shim. xcrunHost = writeScriptBin "xcrunHost" '' # Use system SDK for Metal files. unset DEVELOPER_DIR @@ -152,8 +152,11 @@ stdenv.mkDerivation (prevAttrs: { TORCH_CUDA_ARCH_LIST = if cudaPackages.cudaOlder "12.8" then "7.0;7.5;8.0;8.6;8.9;9.0" + else if cudaPackages.cudaOlder "13.0" then + "7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0" else - "7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0"; + # sm_101 has been renamed to sm_110 in CUDA 13. + "7.5;8.0;8.6;8.9;9.0;10.0;11.0;12.0"; } // lib.optionalAttrs rocmSupport { PYTORCH_ROCM_ARCH = lib.concatStringsSep ";" torch.rocmArchs; diff --git a/lib/version-utils.nix b/lib/version-utils.nix index c4002c1a..8474ab78 100644 --- a/lib/version-utils.nix +++ b/lib/version-utils.nix @@ -1,6 +1,10 @@ { lib }: +let + inherit (lib) versions; +in { - flattenVersion = version: lib.replaceStrings [ "." ] [ "" ] (lib.versions.pad 2 version); + flattenVersion = + version: lib.replaceStrings [ "." ] [ "" ] (versions.majorMinor (versions.pad 2 version)); abiString = cxx11Abi: if cxx11Abi then "cxx11" else "cxx98"; } diff --git a/versions.nix b/versions.nix index fa5fb5de..5b43fea1 100644 --- a/versions.nix +++ b/versions.nix @@ -39,7 +39,6 @@ systems = [ "x86_64-linux" ]; bundleBuild = false; } - { torchVersion = "2.7"; xpuVersion = "2025.0.2"; @@ -47,13 +46,6 @@ systems = [ "x86_64-linux" ]; bundleBuild = true; } - { - torchVersion = "2.8"; - xpuVersion = "2025.1.3"; - cxx11Abi = true; - systems = [ "x86_64-linux" ]; - bundleBuild = true; - } { torchVersion = "2.7"; cxx11Abi = true; @@ -62,6 +54,13 @@ bundleBuild = true; } + { + torchVersion = "2.8"; + xpuVersion = "2025.1.3"; + cxx11Abi = true; + systems = [ "x86_64-linux" ]; + bundleBuild = true; + } { torchVersion = "2.8"; cudaVersion = "12.6"; @@ -111,23 +110,73 @@ systems = [ "aarch64-darwin" ]; bundleBuild = true; } - - # Non-standard versions; not included in bundle builds. { - torchVersion = "2.8"; - cudaVersion = "12.4"; + torchVersion = "2.9"; + xpuVersion = "2025.2.1"; + cxx11Abi = true; + systems = [ "x86_64-linux" ]; + bundleBuild = true; + } + { + torchVersion = "2.9"; + cudaVersion = "12.6"; cxx11Abi = true; systems = [ "x86_64-linux" "aarch64-linux" ]; + bundleBuild = true; } { - torchVersion = "2.8"; - rocmVersion = "7.0.1"; + torchVersion = "2.9"; + cudaVersion = "12.8"; + cxx11Abi = true; + systems = [ + "x86_64-linux" + "aarch64-linux" + ]; + bundleBuild = true; + } + { + torchVersion = "2.9"; + cudaVersion = "13.0"; + cxx11Abi = true; + systems = [ + "x86_64-linux" + "aarch64-linux" + ]; + bundleBuild = true; + } + { + torchVersion = "2.9"; + rocmVersion = "6.3.4"; cxx11Abi = true; systems = [ "x86_64-linux" ]; - bundleBuild = false; + bundleBuild = true; + } + { + torchVersion = "2.9"; + rocmVersion = "6.4.2"; + cxx11Abi = true; + systems = [ "x86_64-linux" ]; + bundleBuild = true; + } + { + torchVersion = "2.9"; + cxx11Abi = true; + metal = true; + systems = [ "aarch64-darwin" ]; + bundleBuild = true; } + # Non-standard versions; not included in bundle builds. + { + torchVersion = "2.8"; + cudaVersion = "12.4"; + cxx11Abi = true; + systems = [ + "x86_64-linux" + "aarch64-linux" + ]; + } ]