diff --git a/README.md b/README.md
index daea6f2..6070584 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,8 @@ intellifold predict ./examples/5S8I_A.yaml --out_dir ./output --cache ./cache_da
 
 To more complete installation instructions and usage, please refer to the [Installation Guide](docs/installation.md).
 
+> **Note**: On newer NVIDIA GPU architectures, install a PyTorch build that already supports your device before installing IntelliFold. DeepSpeed is only required for the optional DS4Sci attention kernels.
+
 
 ## 🔍 Inference
 
@@ -114,4 +116,3 @@ The IntelliFold project, including code and model parameters, is made available
 ## 📬 Contact Us
 
 If you have any questions or are interested in collaboration, please feel free to contact us at contact@intfold.com.
-
diff --git a/docs/installation.md b/docs/installation.md
index 3fdc116..2442137 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -7,12 +7,14 @@
     cd IntelliFold
     ```
 
-2. **Create and activate the environment(recommended)**
+2. **Create and activate the environment**
     ```bash
     conda env create -f environment.yaml
     conda activate intellifold
     ```
 
+   This is the standard install path for the default dependency stack.
+
 3. **Install the package**
     - From PyPI (recommended):
       ```bash
@@ -27,6 +29,26 @@
       pip install -e .
       ```
 
+   **Newer NVIDIA GPUs**
+
+   On newer GPU architectures, avoid the default torch install path in `environment.yaml`. Instead, create a minimal environment first, then install a PyTorch build that already supports your device before installing IntelliFold. For example, on Blackwell GPUs:
+
+   ```bash
+   conda create -n intellifold python=3.11 mmseqs2 pip -c conda-forge -c bioconda
+   conda activate intellifold
+   ```
+
+   ```bash
+   pip install --index-url https://download.pytorch.org/whl/cu128 torch torchvision torchaudio
+   pip install -e .
+   ```
+
+   If you want to use the optional DeepSpeed DS4Sci attention kernels, install the extra dependency after the base package:
+
+   ```bash
+   pip install -e .[deepspeed]
+   ```
+
 4. **(Optional) Download IntelliFold Cache Data Manually**<br>
     By default, model weights and CCD data are downloaded automatically(the directory is `~/.intellifold`) when you run the inference. But you can also download by yourself.
     To download manually from [Our HuggingFace Repository](https://huggingface.co/intelligenAI/intellifold):  
@@ -49,4 +71,4 @@
     ├── protein_id_groups.json
     └── nucleic_acid_id_groups.json
     ```
-    Place the downloaded files in the `cache_data/` directory before running inference.
\ No newline at end of file
+    Place the downloaded files in the `cache_data/` directory before running inference.
diff --git a/docs/kernels.md b/docs/kernels.md
index 57e2e22..e979655 100644
--- a/docs/kernels.md
+++ b/docs/kernels.md
@@ -5,6 +5,7 @@
   export LAYERNORM_TYPE=fast_layernorm
   ```
   If the environment variable `LAYERNORM_TYPE` is set to `fast_layernorm`, the model will employ the layernorm we have developed; otherwise, the naive PyTorch layernorm will be adopted. The kernels will be compiled when `fast_layernorm` is called for the first time.
+  If you are building on a newer GPU architecture, set `TORCH_CUDA_ARCH_LIST` before the first run to target the desired architecture explicitly. If it is not set, PyTorch will compile for the visible GPUs by default.
 - **[DeepSpeed DS4Sci_EvoformerAttention kernel](https://www.deepspeed.ai/tutorials/ds4sci_evoformerattention/)** is a memory-efficient attention kernel developed as part of a collaboration between OpenFold and the DeepSpeed4Science initiative. To use this feature, run the following command:
   ```bash
   export USE_DEEPSPEED_EVO_ATTENTION=true
@@ -16,4 +17,6 @@
   export CUTLASS_PATH=/path/to/cutlass
   ```
 
-  The kernels will be compiled when DS4Sci_EvoformerAttention is called for the first time.
\ No newline at end of file
+  The kernels will be compiled when DS4Sci_EvoformerAttention is called for the first time.
+  The first run may take noticeably longer because the CUDA extension is compiled lazily.
+  If DeepSpeed or DS4Sci is unavailable, IntelliFold falls back to the standard attention path instead of blocking baseline inference.
diff --git a/environment.yaml b/environment.yaml
index 28ac17d..4ef3d65 100644
--- a/environment.yaml
+++ b/environment.yaml
@@ -12,9 +12,8 @@ dependencies:
   - pip
   - pip:
       - rdkit==2024.3.2
-      - torch==2.6.0
+      - torch>=2.6.0
       - accelerate==1.1.1
-      - deepspeed==0.16.4
       - biopython==1.85
       - numpy==1.24.0
       - pandas==2.2.3
diff --git a/intellifold/openfold/inference_config.py b/intellifold/openfold/inference_config.py
index 07510a6..5103620 100644
--- a/intellifold/openfold/inference_config.py
+++ b/intellifold/openfold/inference_config.py
@@ -1,6 +1,7 @@
 
 import os
 from intellifold.openfold.config import model_config
+from intellifold.openfold.utils.kernel.deepspeed_compat import resolve_ds4s_request
     
 def get_model_config(args):
     """
@@ -15,7 +16,7 @@ def get_model_config(args):
     
     is_low_precision = True
     if os.environ.get("USE_DEEPSPEED_EVO_ATTENTION", False) == "true":
-        use_deepspeed_evoformer_attention = True
+        use_deepspeed_evoformer_attention = resolve_ds4s_request(True)
         cutlass_path_env = os.getenv("CUTLASS_PATH", None)
         msg = (
                 "if use ds4sci, set `CUTLASS_PATH` environment variable according to the instructions at https://www.deepspeed.ai/tutorials/ds4sci_evoformerattention/. \n"
@@ -23,9 +24,10 @@ def get_model_config(args):
                 "git clone -b v3.5.1 https://github.com/NVIDIA/cutlass.git  /path/to/cutlass \n"
                 "export CUTLASS_PATH=/path/to/cutlass \n"
             )
-        assert (
-            cutlass_path_env is not None and os.path.exists(cutlass_path_env)
-        ), msg
+        if use_deepspeed_evoformer_attention:
+            assert (
+                cutlass_path_env is not None and os.path.exists(cutlass_path_env)
+            ), msg
     else:
         use_deepspeed_evoformer_attention = False
     
diff --git a/intellifold/openfold/model/primitives.py b/intellifold/openfold/model/primitives.py
index af3191f..7800e3f 100644
--- a/intellifold/openfold/model/primitives.py
+++ b/intellifold/openfold/model/primitives.py
@@ -11,15 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import importlib
 import math
 from typing import Optional, Callable, List, Tuple
 import numpy as np
 
-deepspeed_is_installed = importlib.util.find_spec("deepspeed") is not None
-ds4s_is_installed = deepspeed_is_installed and importlib.util.find_spec("deepspeed.ops.deepspeed4science") is not None
-if deepspeed_is_installed:
-    import deepspeed
+from intellifold.openfold.utils.kernel.deepspeed_compat import detect_deepspeed_status
+
+deepspeed_is_installed, ds4s_is_installed = detect_deepspeed_status(
+    warn_on_import_error=True
+)
 
 if ds4s_is_installed:
     from intellifold.openfold.utils.kernel.traceable_evoformer_attn import DS4Sci_EvoformerAttention
diff --git a/intellifold/openfold/utils/kernel/deepspeed_compat.py b/intellifold/openfold/utils/kernel/deepspeed_compat.py
new file mode 100644
index 0000000..8f79348
--- /dev/null
+++ b/intellifold/openfold/utils/kernel/deepspeed_compat.py
@@ -0,0 +1,57 @@
+# Copyright 2024 IntelliGen-AI and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib
+import warnings
+
+
+def detect_deepspeed_status(*, warn_on_import_error: bool = True):
+    deepspeed_is_installed = importlib.util.find_spec("deepspeed") is not None
+    if not deepspeed_is_installed:
+        return False, False
+
+    try:
+        import deepspeed  # noqa: F401
+    except Exception as exc:
+        if warn_on_import_error:
+            warnings.warn(
+                "DeepSpeed is installed but could not be imported. "
+                "DS4Sci kernels will be unavailable. "
+                f"Original error: {exc}",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+        return False, False
+
+    ds4s_is_installed = (
+        importlib.util.find_spec("deepspeed.ops.deepspeed4science") is not None
+    )
+    return True, ds4s_is_installed
+
+
+def resolve_ds4s_request(enabled: bool):
+    if not enabled:
+        return False
+
+    _, ds4s_is_installed = detect_deepspeed_status(warn_on_import_error=True)
+    if ds4s_is_installed:
+        return True
+
+    warnings.warn(
+        "USE_DEEPSPEED_EVO_ATTENTION=true but DeepSpeed DS4Sci kernels are "
+        "unavailable. Falling back to the standard attention path.",
+        RuntimeWarning,
+        stacklevel=2,
+    )
+    return False
diff --git a/intellifold/openfold/utils/layer_norm/torch_ext_compile.py b/intellifold/openfold/utils/layer_norm/torch_ext_compile.py
index 2a8b767..87b136d 100644
--- a/intellifold/openfold/utils/layer_norm/torch_ext_compile.py
+++ b/intellifold/openfold/utils/layer_norm/torch_ext_compile.py
@@ -18,8 +18,30 @@
 from torch.utils.cpp_extension import load
 
 
+def _cuda_arch_flags():
+    raw = os.environ.get("TORCH_CUDA_ARCH_LIST")
+    if not raw:
+        return []
+
+    flags = []
+    for entry in raw.replace(",", ";").split(";"):
+        entry = entry.strip()
+        if not entry:
+            continue
+        wants_ptx = entry.endswith("+PTX")
+        entry = entry.removesuffix("+PTX").replace("sm_", "").replace("compute_", "")
+        if "." in entry:
+            major, minor = entry.split(".", 1)
+            arch = f"{major}{minor}"
+        else:
+            arch = entry
+        flags.extend(["-gencode", f"arch=compute_{arch},code=sm_{arch}"])
+        if wants_ptx:
+            flags.extend(["-gencode", f"arch=compute_{arch},code=compute_{arch}"])
+    return flags
+
+
 def compile(name, sources, extra_include_paths, build_directory):
-    os.environ["TORCH_CUDA_ARCH_LIST"] = "7.0;8.0"
     return load(
         name=name,
         sources=sources,
@@ -42,14 +64,7 @@ def compile(name, sources, extra_include_paths, build_directory):
             "-U__CUDA_NO_HALF_CONVERSIONS__",
             "--expt-relaxed-constexpr",
             "--expt-extended-lambda",
-            "-gencode",
-            "arch=compute_70,code=sm_70",
-            "-gencode",
-            "arch=compute_80,code=sm_80",
-            "-gencode",
-            "arch=compute_86,code=sm_86",
-            "-gencode",
-            "arch=compute_90,code=sm_90",
+            *_cuda_arch_flags(),
         ],
         verbose=True,
         build_directory=build_directory,
diff --git a/intellifold/openfold/v2_flash_inference_config.py b/intellifold/openfold/v2_flash_inference_config.py
index 3b6ed98..bf41a77 100644
--- a/intellifold/openfold/v2_flash_inference_config.py
+++ b/intellifold/openfold/v2_flash_inference_config.py
@@ -1,6 +1,7 @@
 
 import os
 from intellifold.openfold.config import model_config
+from intellifold.openfold.utils.kernel.deepspeed_compat import resolve_ds4s_request
     
 def get_model_config(args):
     """
@@ -15,7 +16,7 @@ def get_model_config(args):
     
     is_low_precision = True
     if os.environ.get("USE_DEEPSPEED_EVO_ATTENTION", False) == "true":
-        use_deepspeed_evoformer_attention = True
+        use_deepspeed_evoformer_attention = resolve_ds4s_request(True)
         cutlass_path_env = os.getenv("CUTLASS_PATH", None)
         msg = (
                 "if use ds4sci, set `CUTLASS_PATH` environment variable according to the instructions at https://www.deepspeed.ai/tutorials/ds4sci_evoformerattention/. \n"
@@ -23,9 +24,10 @@ def get_model_config(args):
                 "git clone -b v3.5.1 https://github.com/NVIDIA/cutlass.git  /path/to/cutlass \n"
                 "export CUTLASS_PATH=/path/to/cutlass \n"
             )
-        assert (
-            cutlass_path_env is not None and os.path.exists(cutlass_path_env)
-        ), msg
+        if use_deepspeed_evoformer_attention:
+            assert (
+                cutlass_path_env is not None and os.path.exists(cutlass_path_env)
+            ), msg
     else:
         use_deepspeed_evoformer_attention = False
     
diff --git a/intellifold/openfold/v2_inference_config.py b/intellifold/openfold/v2_inference_config.py
index c26fdaf..fcdd779 100644
--- a/intellifold/openfold/v2_inference_config.py
+++ b/intellifold/openfold/v2_inference_config.py
@@ -3,6 +3,7 @@
 import importlib
 import ml_collections as mlc
 import os
+from intellifold.openfold.utils.kernel.deepspeed_compat import resolve_ds4s_request
 
 def model_config(
     low_prec=False, 
@@ -29,7 +30,7 @@ def get_model_config(args):
     
     is_low_precision = True
     if os.environ.get("USE_DEEPSPEED_EVO_ATTENTION", False) == "true":
-        use_deepspeed_evoformer_attention = True
+        use_deepspeed_evoformer_attention = resolve_ds4s_request(True)
         cutlass_path_env = os.getenv("CUTLASS_PATH", None)
         msg = (
                 "if use ds4sci, set `CUTLASS_PATH` environment variable according to the instructions at https://www.deepspeed.ai/tutorials/ds4sci_evoformerattention/. \n"
@@ -37,9 +38,10 @@ def get_model_config(args):
                 "git clone -b v3.5.1 https://github.com/NVIDIA/cutlass.git  /path/to/cutlass \n"
                 "export CUTLASS_PATH=/path/to/cutlass \n"
             )
-        assert (
-            cutlass_path_env is not None and os.path.exists(cutlass_path_env)
-        ), msg
+        if use_deepspeed_evoformer_attention:
+            assert (
+                cutlass_path_env is not None and os.path.exists(cutlass_path_env)
+            ), msg
     else:
         use_deepspeed_evoformer_attention = False
     
diff --git a/setup.py b/setup.py
index b9fb90c..8ad620c 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,28 @@
 from setuptools import setup, find_packages
 
+install_requires = [
+    "accelerate==1.1.1",
+    "biopython==1.85",
+    "click==8.1.8",
+    "einops==0.8.0",
+    "einx==0.3.0",
+    "ihm==2.5",
+    "mashumaro==3.14",
+    "ml_collections==1.0.0",
+    "modelcif==1.2",
+    "networkx==3.4.2",
+    "numba==0.61.0",
+    "numpy==1.24.0",
+    "pandas==2.2.3",
+    "PyYAML==6.0.2",
+    "rdkit==2024.3.2",
+    "requests==2.32.3",
+    "scipy==1.14.1",
+    "torch>=2.6.0",
+    "torchdiffeq==0.2.5",
+    "tqdm==4.67.1",
+]
+
 setup(
     name="intellifold",
     python_requires=">=3.10",
@@ -9,29 +32,12 @@
     author="IntelliGen AI",
     author_email="contact@intfold.com",
     url="https://github.com/IntelliGen-AI/IntelliFold",
-    install_requires = [
-        "accelerate==1.1.1",
-        "biopython==1.85",
-        "click==8.1.8",
-        "deepspeed==0.16.4",
-        "einops==0.8.0",
-        "einx==0.3.0",
-        "ihm==2.5",
-        "mashumaro==3.14",
-        "ml_collections==1.0.0",
-        "modelcif==1.2",
-        "networkx==3.4.2",
-        "numba==0.61.0",
-        "numpy==1.24.0",
-        "pandas==2.2.3",
-        "PyYAML==6.0.2",
-        "rdkit==2024.3.2",
-        "requests==2.32.3",
-        "scipy==1.14.1",
-        "torch==2.6.0",
-        "torchdiffeq==0.2.5",
-        "tqdm==4.67.1",
-    ],
+    install_requires=install_requires,
+    extras_require={
+        "deepspeed": [
+            "deepspeed>=0.16.4",
+        ],
+    },
     include_package_data=True,
     package_data={
         "intellifold": ["openfold/utils/layer_norm/kernel/*"],
@@ -43,4 +49,4 @@
             "intellifold = runner.intellifold_inference:intellifold_cli",
         ],
     },
-)
\ No newline at end of file
+)