Skip to content
This repository was archived by the owner on Jan 27, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ Python package.

This builder is a core component of the larger kernel build/distribution system.

**Torch 2.9 note:** kernel-builder currently builds Torch 2.9 extensions based on
the [fifth release candidate](https://dev-discuss.pytorch.org/t/reminder-calls-for-features-upcoming-branch-cut/3225).
If you upload Torch 2.9 kernels, please validate them against
the final Torch 2.9.0 release. In the unlikely case of an ABI-breaking
change, you can rebuild and upload your kernel once kernel-builder
is updated for the final release

## 🚀 Quick Start

We recommend using [Nix](https://nixos.org/download.html) to build kernels. To speed up builds, first enable the Hugging Face binary cache:
Expand Down
20 changes: 15 additions & 5 deletions build-variants.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
"aarch64-darwin": {
"metal": [
"torch27-metal-aarch64-darwin",
"torch28-metal-aarch64-darwin"
"torch28-metal-aarch64-darwin",
"torch29-metal-aarch64-darwin"
]
},
"aarch64-linux": {
"cuda": [
"torch27-cxx11-cu128-aarch64-linux",
"torch28-cxx11-cu129-aarch64-linux"
"torch28-cxx11-cu129-aarch64-linux",
"torch29-cxx11-cu126-aarch64-linux",
"torch29-cxx11-cu128-aarch64-linux",
"torch29-cxx11-cu130-aarch64-linux"
]
},
"x86_64-linux": {
Expand All @@ -18,16 +22,22 @@
"torch27-cxx11-cu128-x86_64-linux",
"torch28-cxx11-cu126-x86_64-linux",
"torch28-cxx11-cu128-x86_64-linux",
"torch28-cxx11-cu129-x86_64-linux"
"torch28-cxx11-cu129-x86_64-linux",
"torch29-cxx11-cu126-x86_64-linux",
"torch29-cxx11-cu128-x86_64-linux",
"torch29-cxx11-cu130-x86_64-linux"
],
"rocm": [
"torch27-cxx11-rocm63-x86_64-linux",
"torch28-cxx11-rocm63-x86_64-linux",
"torch28-cxx11-rocm64-x86_64-linux"
"torch28-cxx11-rocm64-x86_64-linux",
"torch29-cxx11-rocm63-x86_64-linux",
"torch29-cxx11-rocm64-x86_64-linux"
],
"xpu": [
"torch27-cxx11-xpu20250-x86_64-linux",
"torch28-cxx11-xpu20251-x86_64-linux"
"torch28-cxx11-xpu20251-x86_64-linux",
"torch29-cxx11-xpu20252-x86_64-linux"
]
}
}
15 changes: 14 additions & 1 deletion build2cmake/src/cuda_supported_archs.json
Original file line number Diff line number Diff line change
@@ -1 +1,14 @@
["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0"]
[
"7.0",
"7.2",
"7.5",
"8.0",
"8.6",
"8.7",
"8.9",
"9.0",
"10.0",
"10.1",
"11.0",
"12.0"
]
3 changes: 3 additions & 0 deletions build2cmake/src/templates/cuda/preamble.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ if (NOT TARGET_DEVICE STREQUAL "cuda" AND
endif()

if(DEFINED CMAKE_CUDA_COMPILER_VERSION AND
CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
set(CUDA_DEFAULT_KERNEL_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;11.0;12.0+PTX")
elseif(DEFINED CMAKE_CUDA_COMPILER_VERSION AND
CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
set(CUDA_DEFAULT_KERNEL_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0+PTX")
else()
Expand Down
10 changes: 10 additions & 0 deletions docs/build-variants.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@ available. This list will be updated as new PyTorch versions are released.

- `torch27-metal-aarch64-darwin`
- `torch28-metal-aarch64-darwin`
- `torch29-metal-aarch64-darwin`

## CUDA aarch64-linux

- `torch27-cxx11-cu128-aarch64-linux`
- `torch28-cxx11-cu129-aarch64-linux`
- `torch29-cxx11-cu126-aarch64-linux`
- `torch29-cxx11-cu128-aarch64-linux`
- `torch29-cxx11-cu130-aarch64-linux`

## CUDA x86_64-linux

Expand All @@ -23,17 +27,23 @@ available. This list will be updated as new PyTorch versions are released.
- `torch28-cxx11-cu126-x86_64-linux`
- `torch28-cxx11-cu128-x86_64-linux`
- `torch28-cxx11-cu129-x86_64-linux`
- `torch29-cxx11-cu126-x86_64-linux`
- `torch29-cxx11-cu128-x86_64-linux`
- `torch29-cxx11-cu130-x86_64-linux`

## ROCm x86_64-linux

- `torch27-cxx11-rocm63-x86_64-linux`
- `torch28-cxx11-rocm63-x86_64-linux`
- `torch28-cxx11-rocm64-x86_64-linux`
- `torch29-cxx11-rocm63-x86_64-linux`
- `torch29-cxx11-rocm64-x86_64-linux`

## XPU x86_64-linux

- `torch27-cxx11-xpu20250-x86_64-linux`
- `torch28-cxx11-xpu20251-x86_64-linux`
- `torch29-cxx11-xpu20252-x86_64-linux`

## Universal

Expand Down
1 change: 1 addition & 0 deletions docs/nix.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ We recommend installing Nix in the following way:

- Linux: use the [official Nix installer](https://nixos.org/download/).
- macOS: use the [Determinate Nix installer](https://docs.determinate.systems/determinate-nix/).
In addition, Xcode 16.x is currently required to build kernels.

## Getting started

Expand Down
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 24 additions & 28 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -144,39 +144,35 @@
}) buildSets
);

# Dependencies that should be cached.
# Dependencies that should be cached, the structure of the output
# path is: <build variant>/<dependency>-<output>
forCache =
let
filterDist = lib.filter (output: output != "dist");
# Get all `torch` outputs except for `dist`. Not all outputs
# are dependencies of `out`, but we'll need the `cxxdev` and
# `dev` outputs for kernel builds.
torchOutputs = builtins.listToAttrs (
lib.flatten (
# Map over build sets.
map (
buildSet:
# Map over all outputs of `torch` in a buildset.
map (output: {
name = "${buildVersion buildSet}-${output}";
value = buildSet.torch.${output};
}) (filterDist buildSet.torch.outputs)
) buildSets
)
);
oldLinuxStdenvs = builtins.listToAttrs (
map (buildSet: {
name = "stdenv-${buildVersion buildSet}";
value = buildSet.pkgs.stdenvGlibc_2_27;
}) buildSets
);
# Get all outputs except for `dist` (which is the built wheel for Torch).
allOutputs =
drv:
map (output: {
name = "${drv.pname or drv.name}-${output}";
path = drv.${output};
}) (filterDist drv.outputs or [ "out" ]);
buildSetOutputs =
buildSet:
with buildSet.pkgs;
(
allOutputs buildSet.torch
++ allOutputs build2cmake
++ allOutputs kernel-abi-check
++ allOutputs python3Packages.kernels
++ lib.optionals stdenv.hostPlatform.isLinux (allOutputs stdenvGlibc_2_27)
);
buildSetLinkFarm = buildSet: pkgs.linkFarm (buildVersion buildSet) (buildSetOutputs buildSet);
in
pkgs.linkFarm "packages-for-cache" (
{
inherit build2cmake kernel-abi-check;
}
// torchOutputs
// lib.optionalAttrs nixpkgs.legacyPackages.${system}.stdenv.isLinux oldLinuxStdenvs
map (buildSet: {
name = buildVersion buildSet;
path = buildSetLinkFarm buildSet;
}) buildSets
);
};
}
Expand Down
7 changes: 5 additions & 2 deletions lib/torch-extension/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ let
onednn-xpu = xpuPackages.onednn-xpu.override { inherit stdenv oneapi-torch-dev; };

# On Darwin, we need the host's xcrun for `xcrun metal` to compile Metal shaders.
# t's not supported by the nixpkgs shim.
# It's not supported by the nixpkgs shim.
xcrunHost = writeScriptBin "xcrunHost" ''
# Use system SDK for Metal files.
unset DEVELOPER_DIR
Expand Down Expand Up @@ -152,8 +152,11 @@ stdenv.mkDerivation (prevAttrs: {
TORCH_CUDA_ARCH_LIST =
if cudaPackages.cudaOlder "12.8" then
"7.0;7.5;8.0;8.6;8.9;9.0"
else if cudaPackages.cudaOlder "13.0" then
"7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0"
else
"7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0";
# sm_101 has been renamed to sm_110 in CUDA 13.
"7.5;8.0;8.6;8.9;9.0;10.0;11.0;12.0";
Comment on lines +158 to +159
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do torch kernels not compile using cuda 13.0 on sm < 7.5 ? if so why are we keeping 7.0 in CUDA_DEFAULT_KERNEL_ARCHS

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missed that, removed from CUDA_DEFAULT_KERNEL_ARCHS now 👍.

}
// lib.optionalAttrs rocmSupport {
PYTORCH_ROCM_ARCH = lib.concatStringsSep ";" torch.rocmArchs;
Expand Down
6 changes: 5 additions & 1 deletion lib/version-utils.nix
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
{ lib }:

let
inherit (lib) versions;
in
{
flattenVersion = version: lib.replaceStrings [ "." ] [ "" ] (lib.versions.pad 2 version);
flattenVersion =
version: lib.replaceStrings [ "." ] [ "" ] (versions.majorMinor (versions.pad 2 version));
abiString = cxx11Abi: if cxx11Abi then "cxx11" else "cxx98";
}
79 changes: 64 additions & 15 deletions versions.nix
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,13 @@
systems = [ "x86_64-linux" ];
bundleBuild = false;
}

{
torchVersion = "2.7";
xpuVersion = "2025.0.2";
cxx11Abi = true;
systems = [ "x86_64-linux" ];
bundleBuild = true;
}
{
torchVersion = "2.8";
xpuVersion = "2025.1.3";
cxx11Abi = true;
systems = [ "x86_64-linux" ];
bundleBuild = true;
}
{
torchVersion = "2.7";
cxx11Abi = true;
Expand All @@ -62,6 +54,13 @@
bundleBuild = true;
}

{
torchVersion = "2.8";
xpuVersion = "2025.1.3";
cxx11Abi = true;
systems = [ "x86_64-linux" ];
bundleBuild = true;
}
{
torchVersion = "2.8";
cudaVersion = "12.6";
Expand Down Expand Up @@ -111,23 +110,73 @@
systems = [ "aarch64-darwin" ];
bundleBuild = true;
}

# Non-standard versions; not included in bundle builds.
{
torchVersion = "2.8";
cudaVersion = "12.4";
torchVersion = "2.9";
xpuVersion = "2025.2.1";
cxx11Abi = true;
systems = [ "x86_64-linux" ];
bundleBuild = true;
}
{
torchVersion = "2.9";
cudaVersion = "12.6";
cxx11Abi = true;
systems = [
"x86_64-linux"
"aarch64-linux"
];
bundleBuild = true;
}
{
torchVersion = "2.8";
rocmVersion = "7.0.1";
torchVersion = "2.9";
cudaVersion = "12.8";
cxx11Abi = true;
systems = [
"x86_64-linux"
"aarch64-linux"
];
bundleBuild = true;
}
{
torchVersion = "2.9";
cudaVersion = "13.0";
cxx11Abi = true;
systems = [
"x86_64-linux"
"aarch64-linux"
];
bundleBuild = true;
}
{
torchVersion = "2.9";
rocmVersion = "6.3.4";
cxx11Abi = true;
systems = [ "x86_64-linux" ];
bundleBuild = false;
bundleBuild = true;
}
{
torchVersion = "2.9";
rocmVersion = "6.4.2";
cxx11Abi = true;
systems = [ "x86_64-linux" ];
bundleBuild = true;
}
{
torchVersion = "2.9";
cxx11Abi = true;
metal = true;
systems = [ "aarch64-darwin" ];
bundleBuild = true;
}

# Non-standard versions; not included in bundle builds.
{
torchVersion = "2.8";
cudaVersion = "12.4";
cxx11Abi = true;
systems = [
"x86_64-linux"
"aarch64-linux"
];
}
]
Loading