dotsimulate · forkni · Apr 23, 2026
diff --git a/sd_installer/__init__.py b/sd_installer/__init__.py
@@ -16,4 +16,5 @@
 from .installer import Installer
 from .verifier import Verifier
 
+
 __all__ = ["Installer", "Verifier", "__version__"]
diff --git a/sd_installer/__main__.py b/sd_installer/__main__.py
@@ -2,5 +2,6 @@
 
 from .cli import main
 
+
 if __name__ == "__main__":
     exit(main())
diff --git a/sd_installer/cli.py b/sd_installer/cli.py
@@ -15,13 +15,12 @@
 """
 
 import argparse
-import os
 import sys
 from pathlib import Path
 
 
 def find_base_folder() -> Path:
-    """
+    r"""
     Find the StreamDiffusion base folder (where setup.py lives).
 
     Runtime structure:
@@ -47,9 +46,9 @@ def find_base_folder() -> Path:
     # __file__ = .../StreamDiffusion-installer/sd_installer/cli.py
     # We want: .../StreamDiffusion/
     this_file = Path(__file__).resolve()
-    sd_installer_pkg = this_file.parent          # sd_installer/
-    installer_repo = sd_installer_pkg.parent     # StreamDiffusion-installer/
-    base = installer_repo.parent                 # StreamDiffusion/
+    sd_installer_pkg = this_file.parent  # sd_installer/
+    installer_repo = sd_installer_pkg.parent  # StreamDiffusion-installer/
+    base = installer_repo.parent  # StreamDiffusion/
     if (base / "setup.py").exists():
         return base
 
@@ -85,7 +84,7 @@ def cmd_check(args):
         if venv_path.exists():
             print(f"Venv: Found at {venv_path}")
         else:
-            print(f"Venv: Not found (will be created during install)")
+            print("Venv: Not found (will be created during install)")
 
         # Check StreamDiffusion setup.py (base folder IS StreamDiffusion)
         setup_py = base / "setup.py"
@@ -197,15 +196,15 @@ def cmd_diagnose(args):
         print(f"  [{status}] {check['name']}")
         if check["error"]:
             # Print just the last line of the error
-            error_line = check["error"].split('\n')[-1][:60]
+            error_line = check["error"].split("\n")[-1][:60]
             print(f"         {error_line}")
 
     return 0
 
 
 def cmd_repair(args):
     """Auto-fix known issues."""
-    from .verifier import Verifier, KNOWN_ERRORS
+    from .verifier import Verifier
 
     try:
         base = Path(args.base_folder) if args.base_folder else find_base_folder()
@@ -236,6 +235,7 @@ def cmd_repair(args):
     for check in info["checks"]:
         if not check["passed"] and check["error"]:
             from .verifier import match_known_error
+
             fix = match_known_error(check["error"])
             if fix:
                 fixes_needed.append((check["name"], fix))
@@ -245,10 +245,15 @@ def cmd_repair(args):
         # numpy 2.x
         numpy_ver = info["versions"].get("numpy", "")
         if numpy_ver.startswith("2."):
-            fixes_needed.append(("numpy version", {
-                "cause": f"numpy {numpy_ver} detected (2.x breaks things)",
-                "fix": "pip install numpy==1.26.4 --force-reinstall"
-            }))
+            fixes_needed.append(
+                (
+                    "numpy version",
+                    {
+                        "cause": f"numpy {numpy_ver} detected (2.x breaks things)",
+                        "fix": "pip install numpy==1.26.4 --force-reinstall",
+                    },
+                )
+            )
 
     if not fixes_needed:
         print("No known issues detected that can be auto-fixed.")
@@ -264,18 +269,19 @@ def cmd_repair(args):
 
     if not args.yes:
         response = input("Apply fixes? [y/N]: ")
-        if response.lower() != 'y':
+        if response.lower() != "y":
             print("Aborted.")
             return 0
 
     # Apply fixes
     import subprocess
+
     for name, fix in fixes_needed:
         print(f"Applying fix for {name}...")
         cmd = [str(python_exe), "-m", "pip"] + fix["fix"].replace("pip ", "").split()
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode == 0:
-            print(f"  OK")
+            print("  OK")
         else:
             print(f"  FAILED: {result.stderr}")
 
@@ -314,7 +320,7 @@ def cmd_generate_bat(args):
 
 def cmd_install_tensorrt(args):
     """Install TensorRT packages."""
-    from .tensorrt import install, get_cuda_version_from_torch
+    from .tensorrt import get_cuda_version_from_torch, install
 
     print("StreamDiffusionTD TensorRT Installation")
     print("=" * 40)
@@ -381,7 +387,8 @@ def main():
     # repair command
     repair_parser = subparsers.add_parser("repair", help="Auto-fix known issues")
     repair_parser.add_argument(
-        "-y", "--yes",
+        "-y",
+        "--yes",
         action="store_true",
         help="Apply fixes without prompting",
     )

diff --git a/sd_installer/installer.py b/sd_installer/installer.py
@@ -11,12 +11,11 @@
 5. Verify imports - Catch failures immediately
 """
 
-import os
-import sys
 import subprocess
-import shutil
+import sys
 from pathlib import Path
-from typing import Optional, Callable
+from typing import Callable, Optional
+
 
 # Version pins - packages NOT in setup.py that must be manually pinned
 MANUAL_PINS = {
@@ -65,7 +64,7 @@
     "cu128": {
         "torch": "2.7.0",
         "torchvision": "0.22.0",
-        "torchaudio": "2.7.0",
+        "torchaudio": None,
         "index_url": "https://download.pytorch.org/whl/cu128",
         "cuda_python": "12.9.0",
         "xformers": None,  # Not needed - PyTorch 2.7+ has native SDPA
@@ -103,10 +102,7 @@ def __init__(
 
         # Validate CUDA version
         if cuda_version not in PYTORCH_CONFIGS:
-            raise ValueError(
-                f"Unsupported CUDA version: {cuda_version}. "
-                f"Supported: {list(PYTORCH_CONFIGS.keys())}"
-            )
+            raise ValueError(f"Unsupported CUDA version: {cuda_version}. Supported: {list(PYTORCH_CONFIGS.keys())}")
 
         self.pytorch_config = PYTORCH_CONFIGS[cuda_version]
 
@@ -238,7 +234,7 @@ def phase3b_insightface(self):
 
         version_str = result.stdout.strip()
         try:
-            major, minor = map(int, version_str.split('.'))
+            major, minor = map(int, version_str.split("."))
             py_version = (major, minor)
         except ValueError:
             print(f"  WARNING: Could not parse Python version '{version_str}', skipping insightface pre-install")
@@ -269,19 +265,21 @@ def phase5_missing_pins(self):
 
         # Force reinstall varshith15 diffusers (other deps may have overwritten it)
         self._report_progress("Ensuring varshith15 diffusers fork with kvo_cache support...", 5, 8)
-        self._run_pip([
-            "--force-reinstall", "--no-deps",
-            "diffusers @ git+https://github.com/varshith15/diffusers.git@3e3b72f557e91546894340edabc845e894f00922"
-        ])
+        self._run_pip(
+            [
+                "--force-reinstall",
+                "--no-deps",
+                "diffusers @ git+https://github.com/varshith15/diffusers.git@3e3b72f557e91546894340edabc845e894f00922",
+            ]
+        )
 
     def phase6_conflict_prone(self):
         """Phase 6: Fix conflict-prone packages with --no-deps."""
         self._report_progress("Fixing conflict-prone packages...", 6, 8)
 
         # Remove conflicting opencv variants
         subprocess.run(
-            [str(self.python_exe), "-m", "pip", "uninstall", "-y",
-             "opencv-python-headless", "opencv-contrib-python"],
+            [str(self.python_exe), "-m", "pip", "uninstall", "-y", "opencv-python-headless", "opencv-contrib-python"],
             capture_output=True,
         )
 
@@ -386,7 +384,7 @@ def generate_batch_file(self, output_path: Optional[str] = None, python_exe: Opt
 pause
 '''
 
-        with open(output_path, 'w', encoding='utf-8') as f:
+        with open(output_path, "w", encoding="utf-8") as f:
             f.write(content)
 
         print(f"Generated batch file: {output_path}")

diff --git a/sd_installer/tensorrt.py b/sd_installer/tensorrt.py
@@ -3,9 +3,10 @@
 
 Standalone module that doesn't rely on streamdiffusion package imports.
 """
+
+import platform
 import subprocess
 import sys
-import platform
 from typing import Optional
 
 
@@ -17,7 +18,7 @@ def run_pip(command: str):
 def is_installed(package_name: str) -> bool:
     """Check if a package is installed"""
     try:
-        __import__(package_name.replace('-', '_'))
+        __import__(package_name.replace("-", "_"))
         return True
     except ImportError:
         return False
@@ -27,6 +28,7 @@ def version(package_name: str) -> Optional[str]:
     """Get version of installed package"""
     try:
         import importlib.metadata
+
         return importlib.metadata.version(package_name)
     except Exception:
         return None
@@ -74,6 +76,7 @@ def install(cu: Optional[str] = None):
         if current_version_str:
             try:
                 from packaging.version import Version
+
                 current_version = Version(current_version_str)
                 if current_version < Version("10.8.0"):
                     print("Uninstalling old TensorRT version...")
@@ -84,9 +87,11 @@ def install(cu: Optional[str] = None):
                     print("Uninstalling old TensorRT version...")
                     run_pip("uninstall -y tensorrt")
 
-    # For CUDA 12.8+ (RTX 5090/Blackwell support), use TensorRT 10.12+
+    # For CUDA 12.8+ (RTX 5090/Blackwell support), use TensorRT 10.16+
+    # 10.16.1.11 is the first Blackwell-Windows-production release and fixes
+    # the 78% FP8 perf regression that shipped in 10.12–10.13 on SM_120.
     if cuda_version_float >= 12.8:
-        print("Installing TensorRT 10.12+ for CUDA 12.8+ (Blackwell GPU support)...")
+        print("Installing TensorRT 10.16+ for CUDA 12.8+ (Blackwell GPU support)...")
 
         # Install cuDNN 9 for CUDA 12
         cudnn_name = "nvidia-cudnn-cu12==9.7.1.26"
@@ -96,7 +101,7 @@ def install(cu: Optional[str] = None):
         # tensorrt_cu12 is the CUDA 12 wrapper that owns tensorrt/__init__.py
         # and depends on tensorrt_cu12_libs + tensorrt_cu12_bindings.
         # All three are normal wheels with Requires-Dist (no pip-inside-pip).
-        trt_version = "10.12.0.36"
+        trt_version = "10.16.1.11"
         print(f"Installing TensorRT {trt_version} for CUDA {cu}...")
         run_pip(f"install --extra-index-url https://pypi.nvidia.com tensorrt_cu12=={trt_version} --no-cache-dir")
 
@@ -111,7 +116,7 @@ def install(cu: Optional[str] = None):
         # tensorrt_cu12 is the CUDA 12 wrapper that owns tensorrt/__init__.py
         # and depends on tensorrt_cu12_libs + tensorrt_cu12_bindings.
         # All three are normal wheels with Requires-Dist (no pip-inside-pip).
-        trt_version = "10.12.0.36"
+        trt_version = "10.16.1.11"
         print(f"Installing TensorRT {trt_version} for CUDA {cu}...")
         run_pip(f"install --extra-index-url https://pypi.nvidia.com tensorrt_cu12=={trt_version} --no-cache-dir")
 
@@ -126,9 +131,7 @@ def install(cu: Optional[str] = None):
         # Install TensorRT for CUDA 11
         tensorrt_version = "tensorrt==9.0.1.post11.dev4"
         print(f"Installing TensorRT for CUDA {cu}: {tensorrt_version}")
-        run_pip(
-            f"install --extra-index-url https://pypi.nvidia.com {tensorrt_version} --no-cache-dir"
-        )
+        run_pip(f"install --extra-index-url https://pypi.nvidia.com {tensorrt_version} --no-cache-dir")
     else:
         print(f"Unsupported CUDA version: {cu}")
         print("Supported versions: CUDA 11.x, 12.x, 12.8+")
@@ -137,18 +140,25 @@ def install(cu: Optional[str] = None):
     # Install additional TensorRT tools
     if not is_installed("polygraphy"):
         print("Installing polygraphy...")
-        run_pip(
-            "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir"
-        )
+        run_pip("install polygraphy==0.49.26 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir")
     if not is_installed("onnx_graphsurgeon"):
         print("Installing onnx-graphsurgeon...")
-        run_pip(
-            "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir"
-        )
-    if platform.system() == 'Windows' and not is_installed("pywin32"):
+        run_pip("install onnx-graphsurgeon==0.6.1 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir")
+
+    # FP8 quantization dependencies (CUDA 12 only).
+    # Previously missing — caused ImportError in fp8_quantize.py when users enabled FP8.
+    # Aligns with FLUX pyproject.toml (nvidia-modelopt >= 0.19.0).
+    if cuda_major == "12":
+        print("Installing FP8 quantization dependencies (modelopt, cupy)...")
+        run_pip("install nvidia-modelopt[onnx]>=0.19.0 cupy-cuda12x==13.6.0 numpy==1.26.4 --no-cache-dir")
+        # modelopt's resolver downgrades onnxruntime-gpu to 1.22.0; re-assert 1.24.4.
+        # --no-deps avoids triggering a conflicting re-solve.
+        run_pip("install onnxruntime-gpu==1.24.4 --no-deps --no-cache-dir")
+
+    if platform.system() == "Windows" and not is_installed("pywin32"):
         print("Installing pywin32...")
         run_pip("install pywin32==306 --no-cache-dir")
-    if platform.system() == 'Windows' and not is_installed("triton"):
+    if platform.system() == "Windows" and not is_installed("triton"):
         print("Installing triton-windows...")
         run_pip("install triton-windows==3.4.0.post21 --no-cache-dir")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,4 +16,5 @@
		from .installer import Installer
		from .verifier import Verifier


		__all__ = ["Installer", "Verifier", "__version__"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,5 +2,6 @@

		from .cli import main


		if __name__ == "__main__":
		exit(main())