From 5791f780a1939d65b62b23216b50684952d0933f Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Fri, 26 Jun 2026 03:13:31 -0700 Subject: [PATCH 1/3] update nccl Signed-off-by: Phuong Nguyen --- 3rdparty/nccl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/nccl b/3rdparty/nccl index 808d2433dd..696e971fb0 160000 --- a/3rdparty/nccl +++ b/3rdparty/nccl @@ -1 +1 @@ -Subproject commit 808d2433dda3cccc80f8172a94a6b117359e7102 +Subproject commit 696e971fb092a26f81a8c24b432beecdbbe3064e From 57ee361399a4f49b3e59f5c7443f42a703b0f0e6 Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Sat, 27 Jun 2026 02:34:06 -0700 Subject: [PATCH 2/3] Discover NCCL prefix matching the runtime loader resolution order Signed-off-by: Phuong Nguyen --- setup.py | 56 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/setup.py b/setup.py index 64ed120268..892bde3afd 100644 --- a/setup.py +++ b/setup.py @@ -156,7 +156,12 @@ def setup_requirements() -> Tuple[List[str], List[str]]: def _discover_nccl_home() -> str: - """Resolve NCCL_HOME: honor env var, else probe well-known prefixes, else ldconfig.""" + """Resolve NCCL_HOME, preferring the NCCL the dynamic loader resolves at runtime. + + Probes in order: NCCL_HOME env var, ldconfig cache, well-known prefixes, then a + pip-installed nvidia-nccl-cu* wheel. To test a non-default NCCL (e.g. a wheel), set + NCCL_HOME and ensure the runtime loader resolves the same lib (e.g. LD_LIBRARY_PATH). + """ env_home = os.environ.get("NCCL_HOME") if env_home: if (Path(env_home) / "include" / "nccl.h").exists(): @@ -170,28 +175,11 @@ def _discover_nccl_home() -> str: # Include Debian/Ubuntu multiarch subdirs (e.g. lib/aarch64-linux-gnu). lib_subdirs = ("lib", "lib64", "lib/aarch64-linux-gnu", "lib/x86_64-linux-gnu") - # pip-installed NCCL (nvidia-nccl-cu* wheel) lives under nvidia/nccl in - # site-packages and has no top-level include/lib layout. - try: - import importlib.util - - spec = importlib.util.find_spec("nvidia.nccl") - if spec is not None and spec.submodule_search_locations: - pip_root = Path(next(iter(spec.submodule_search_locations))) - if (pip_root / "include" / "nccl.h").exists() and any( - (pip_root / sub / name).exists() for sub in lib_subdirs for name in lib_names - ): - return str(pip_root) - except (ImportError, ValueError): - pass - - for cand in ("/opt/nvidia/nccl", "/usr/local/nccl", "/usr"): - p = Path(cand) - if (p / "include" / "nccl.h").exists() and any( - (p / sub / name).exists() for sub in lib_subdirs for name in lib_names - ): - return str(p) - + # Prefer the NCCL the dynamic loader will actually resolve at runtime so the + # EP build links against the same libnccl that gets loaded. libtransformer_engine + # carries no NCCL RUNPATH, so the loader uses ldconfig/system paths; building + # against a different NCCL (e.g. a pip wheel) causes ABI mismatches. ldconfig is + # the ground truth for runtime resolution, so consult it before well-known prefixes. try: out = subprocess.check_output(["ldconfig", "-p"], stderr=subprocess.DEVNULL).decode() for line in out.splitlines(): @@ -205,6 +193,28 @@ def _discover_nccl_home() -> str: except (subprocess.CalledProcessError, FileNotFoundError): pass + for cand in ("/opt/nvidia/nccl", "/usr/local/nccl", "/usr"): + p = Path(cand) + if (p / "include" / "nccl.h").exists() and any( + (p / sub / name).exists() for sub in lib_subdirs for name in lib_names + ): + return str(p) + + # Fall back to a pip-installed NCCL (nvidia-nccl-cu* wheel) under nvidia/nccl + # in site-packages, used only when no system NCCL is present. + try: + import importlib.util + + spec = importlib.util.find_spec("nvidia.nccl") + if spec is not None and spec.submodule_search_locations: + pip_root = Path(next(iter(spec.submodule_search_locations))) + if (pip_root / "include" / "nccl.h").exists() and any( + (pip_root / sub / name).exists() for sub in lib_subdirs for name in lib_names + ): + return str(pip_root) + except (ImportError, ValueError): + pass + raise RuntimeError( "Could not locate NCCL core (nccl.h + libnccl.so). Set NCCL_HOME to the install prefix." ) From e5a19c3cd8fbd4ae18bf5d3dc3a93888966b7f5d Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Sun, 28 Jun 2026 23:14:50 -0700 Subject: [PATCH 3/3] nccl with relax num_dispatch_tokens%64!=0 Signed-off-by: Phuong Nguyen --- 3rdparty/nccl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/nccl b/3rdparty/nccl index 696e971fb0..a6b5de08b6 160000 --- a/3rdparty/nccl +++ b/3rdparty/nccl @@ -1 +1 @@ -Subproject commit 696e971fb092a26f81a8c24b432beecdbbe3064e +Subproject commit a6b5de08b6af4f938cef541ae6e4d405632f89a4