diff --git a/sd_installer/__init__.py b/sd_installer/__init__.py index 6c059c8..0cbabfc 100644 --- a/sd_installer/__init__.py +++ b/sd_installer/__init__.py @@ -16,4 +16,5 @@ from .installer import Installer from .verifier import Verifier + __all__ = ["Installer", "Verifier", "__version__"] diff --git a/sd_installer/__main__.py b/sd_installer/__main__.py index 2a0fbda..e878b7a 100644 --- a/sd_installer/__main__.py +++ b/sd_installer/__main__.py @@ -2,5 +2,6 @@ from .cli import main + if __name__ == "__main__": exit(main()) diff --git a/sd_installer/cli.py b/sd_installer/cli.py index 9834f44..3f7110c 100644 --- a/sd_installer/cli.py +++ b/sd_installer/cli.py @@ -15,13 +15,12 @@ """ import argparse -import os import sys from pathlib import Path def find_base_folder() -> Path: - """ + r""" Find the StreamDiffusion base folder (where setup.py lives). Runtime structure: @@ -47,9 +46,9 @@ def find_base_folder() -> Path: # __file__ = .../StreamDiffusion-installer/sd_installer/cli.py # We want: .../StreamDiffusion/ this_file = Path(__file__).resolve() - sd_installer_pkg = this_file.parent # sd_installer/ - installer_repo = sd_installer_pkg.parent # StreamDiffusion-installer/ - base = installer_repo.parent # StreamDiffusion/ + sd_installer_pkg = this_file.parent # sd_installer/ + installer_repo = sd_installer_pkg.parent # StreamDiffusion-installer/ + base = installer_repo.parent # StreamDiffusion/ if (base / "setup.py").exists(): return base @@ -85,7 +84,7 @@ def cmd_check(args): if venv_path.exists(): print(f"Venv: Found at {venv_path}") else: - print(f"Venv: Not found (will be created during install)") + print("Venv: Not found (will be created during install)") # Check StreamDiffusion setup.py (base folder IS StreamDiffusion) setup_py = base / "setup.py" @@ -197,7 +196,7 @@ def cmd_diagnose(args): print(f" [{status}] {check['name']}") if check["error"]: # Print just the last line of the error - error_line = check["error"].split('\n')[-1][:60] + error_line = check["error"].split("\n")[-1][:60] print(f" {error_line}") return 0 @@ -205,7 +204,7 @@ def cmd_diagnose(args): def cmd_repair(args): """Auto-fix known issues.""" - from .verifier import Verifier, KNOWN_ERRORS + from .verifier import Verifier try: base = Path(args.base_folder) if args.base_folder else find_base_folder() @@ -236,6 +235,7 @@ def cmd_repair(args): for check in info["checks"]: if not check["passed"] and check["error"]: from .verifier import match_known_error + fix = match_known_error(check["error"]) if fix: fixes_needed.append((check["name"], fix)) @@ -245,10 +245,15 @@ def cmd_repair(args): # numpy 2.x numpy_ver = info["versions"].get("numpy", "") if numpy_ver.startswith("2."): - fixes_needed.append(("numpy version", { - "cause": f"numpy {numpy_ver} detected (2.x breaks things)", - "fix": "pip install numpy==1.26.4 --force-reinstall" - })) + fixes_needed.append( + ( + "numpy version", + { + "cause": f"numpy {numpy_ver} detected (2.x breaks things)", + "fix": "pip install numpy==1.26.4 --force-reinstall", + }, + ) + ) if not fixes_needed: print("No known issues detected that can be auto-fixed.") @@ -264,18 +269,19 @@ def cmd_repair(args): if not args.yes: response = input("Apply fixes? [y/N]: ") - if response.lower() != 'y': + if response.lower() != "y": print("Aborted.") return 0 # Apply fixes import subprocess + for name, fix in fixes_needed: print(f"Applying fix for {name}...") cmd = [str(python_exe), "-m", "pip"] + fix["fix"].replace("pip ", "").split() result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode == 0: - print(f" OK") + print(" OK") else: print(f" FAILED: {result.stderr}") @@ -314,7 +320,7 @@ def cmd_generate_bat(args): def cmd_install_tensorrt(args): """Install TensorRT packages.""" - from .tensorrt import install, get_cuda_version_from_torch + from .tensorrt import get_cuda_version_from_torch, install print("StreamDiffusionTD TensorRT Installation") print("=" * 40) @@ -381,7 +387,8 @@ def main(): # repair command repair_parser = subparsers.add_parser("repair", help="Auto-fix known issues") repair_parser.add_argument( - "-y", "--yes", + "-y", + "--yes", action="store_true", help="Apply fixes without prompting", ) diff --git a/sd_installer/installer.py b/sd_installer/installer.py index 11214f0..d982d87 100644 --- a/sd_installer/installer.py +++ b/sd_installer/installer.py @@ -11,12 +11,11 @@ 5. Verify imports - Catch failures immediately """ -import os -import sys import subprocess -import shutil +import sys from pathlib import Path -from typing import Optional, Callable +from typing import Callable, Optional + # Version pins - packages NOT in setup.py that must be manually pinned MANUAL_PINS = { @@ -65,7 +64,7 @@ "cu128": { "torch": "2.7.0", "torchvision": "0.22.0", - "torchaudio": "2.7.0", + "torchaudio": None, "index_url": "https://download.pytorch.org/whl/cu128", "cuda_python": "12.9.0", "xformers": None, # Not needed - PyTorch 2.7+ has native SDPA @@ -103,10 +102,7 @@ def __init__( # Validate CUDA version if cuda_version not in PYTORCH_CONFIGS: - raise ValueError( - f"Unsupported CUDA version: {cuda_version}. " - f"Supported: {list(PYTORCH_CONFIGS.keys())}" - ) + raise ValueError(f"Unsupported CUDA version: {cuda_version}. Supported: {list(PYTORCH_CONFIGS.keys())}") self.pytorch_config = PYTORCH_CONFIGS[cuda_version] @@ -238,7 +234,7 @@ def phase3b_insightface(self): version_str = result.stdout.strip() try: - major, minor = map(int, version_str.split('.')) + major, minor = map(int, version_str.split(".")) py_version = (major, minor) except ValueError: print(f" WARNING: Could not parse Python version '{version_str}', skipping insightface pre-install") @@ -269,10 +265,13 @@ def phase5_missing_pins(self): # Force reinstall varshith15 diffusers (other deps may have overwritten it) self._report_progress("Ensuring varshith15 diffusers fork with kvo_cache support...", 5, 8) - self._run_pip([ - "--force-reinstall", "--no-deps", - "diffusers @ git+https://github.com/varshith15/diffusers.git@3e3b72f557e91546894340edabc845e894f00922" - ]) + self._run_pip( + [ + "--force-reinstall", + "--no-deps", + "diffusers @ git+https://github.com/varshith15/diffusers.git@3e3b72f557e91546894340edabc845e894f00922", + ] + ) def phase6_conflict_prone(self): """Phase 6: Fix conflict-prone packages with --no-deps.""" @@ -280,8 +279,7 @@ def phase6_conflict_prone(self): # Remove conflicting opencv variants subprocess.run( - [str(self.python_exe), "-m", "pip", "uninstall", "-y", - "opencv-python-headless", "opencv-contrib-python"], + [str(self.python_exe), "-m", "pip", "uninstall", "-y", "opencv-python-headless", "opencv-contrib-python"], capture_output=True, ) @@ -386,7 +384,7 @@ def generate_batch_file(self, output_path: Optional[str] = None, python_exe: Opt pause ''' - with open(output_path, 'w', encoding='utf-8') as f: + with open(output_path, "w", encoding="utf-8") as f: f.write(content) print(f"Generated batch file: {output_path}") diff --git a/sd_installer/tensorrt.py b/sd_installer/tensorrt.py index d4b46e1..a048f67 100644 --- a/sd_installer/tensorrt.py +++ b/sd_installer/tensorrt.py @@ -3,9 +3,10 @@ Standalone module that doesn't rely on streamdiffusion package imports. """ + +import platform import subprocess import sys -import platform from typing import Optional @@ -17,7 +18,7 @@ def run_pip(command: str): def is_installed(package_name: str) -> bool: """Check if a package is installed""" try: - __import__(package_name.replace('-', '_')) + __import__(package_name.replace("-", "_")) return True except ImportError: return False @@ -27,6 +28,7 @@ def version(package_name: str) -> Optional[str]: """Get version of installed package""" try: import importlib.metadata + return importlib.metadata.version(package_name) except Exception: return None @@ -74,6 +76,7 @@ def install(cu: Optional[str] = None): if current_version_str: try: from packaging.version import Version + current_version = Version(current_version_str) if current_version < Version("10.8.0"): print("Uninstalling old TensorRT version...") @@ -84,9 +87,11 @@ def install(cu: Optional[str] = None): print("Uninstalling old TensorRT version...") run_pip("uninstall -y tensorrt") - # For CUDA 12.8+ (RTX 5090/Blackwell support), use TensorRT 10.12+ + # For CUDA 12.8+ (RTX 5090/Blackwell support), use TensorRT 10.16+ + # 10.16.1.11 is the first Blackwell-Windows-production release and fixes + # the 78% FP8 perf regression that shipped in 10.12–10.13 on SM_120. if cuda_version_float >= 12.8: - print("Installing TensorRT 10.12+ for CUDA 12.8+ (Blackwell GPU support)...") + print("Installing TensorRT 10.16+ for CUDA 12.8+ (Blackwell GPU support)...") # Install cuDNN 9 for CUDA 12 cudnn_name = "nvidia-cudnn-cu12==9.7.1.26" @@ -96,7 +101,7 @@ def install(cu: Optional[str] = None): # tensorrt_cu12 is the CUDA 12 wrapper that owns tensorrt/__init__.py # and depends on tensorrt_cu12_libs + tensorrt_cu12_bindings. # All three are normal wheels with Requires-Dist (no pip-inside-pip). - trt_version = "10.12.0.36" + trt_version = "10.16.1.11" print(f"Installing TensorRT {trt_version} for CUDA {cu}...") run_pip(f"install --extra-index-url https://pypi.nvidia.com tensorrt_cu12=={trt_version} --no-cache-dir") @@ -111,7 +116,7 @@ def install(cu: Optional[str] = None): # tensorrt_cu12 is the CUDA 12 wrapper that owns tensorrt/__init__.py # and depends on tensorrt_cu12_libs + tensorrt_cu12_bindings. # All three are normal wheels with Requires-Dist (no pip-inside-pip). - trt_version = "10.12.0.36" + trt_version = "10.16.1.11" print(f"Installing TensorRT {trt_version} for CUDA {cu}...") run_pip(f"install --extra-index-url https://pypi.nvidia.com tensorrt_cu12=={trt_version} --no-cache-dir") @@ -126,9 +131,7 @@ def install(cu: Optional[str] = None): # Install TensorRT for CUDA 11 tensorrt_version = "tensorrt==9.0.1.post11.dev4" print(f"Installing TensorRT for CUDA {cu}: {tensorrt_version}") - run_pip( - f"install --extra-index-url https://pypi.nvidia.com {tensorrt_version} --no-cache-dir" - ) + run_pip(f"install --extra-index-url https://pypi.nvidia.com {tensorrt_version} --no-cache-dir") else: print(f"Unsupported CUDA version: {cu}") print("Supported versions: CUDA 11.x, 12.x, 12.8+") @@ -137,18 +140,25 @@ def install(cu: Optional[str] = None): # Install additional TensorRT tools if not is_installed("polygraphy"): print("Installing polygraphy...") - run_pip( - "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir" - ) + run_pip("install polygraphy==0.49.26 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir") if not is_installed("onnx_graphsurgeon"): print("Installing onnx-graphsurgeon...") - run_pip( - "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir" - ) - if platform.system() == 'Windows' and not is_installed("pywin32"): + run_pip("install onnx-graphsurgeon==0.6.1 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir") + + # FP8 quantization dependencies (CUDA 12 only). + # Previously missing — caused ImportError in fp8_quantize.py when users enabled FP8. + # Aligns with FLUX pyproject.toml (nvidia-modelopt >= 0.19.0). + if cuda_major == "12": + print("Installing FP8 quantization dependencies (modelopt, cupy)...") + run_pip("install nvidia-modelopt[onnx]>=0.19.0 cupy-cuda12x==13.6.0 numpy==1.26.4 --no-cache-dir") + # modelopt's resolver downgrades onnxruntime-gpu to 1.22.0; re-assert 1.24.4. + # --no-deps avoids triggering a conflicting re-solve. + run_pip("install onnxruntime-gpu==1.24.4 --no-deps --no-cache-dir") + + if platform.system() == "Windows" and not is_installed("pywin32"): print("Installing pywin32...") run_pip("install pywin32==306 --no-cache-dir") - if platform.system() == 'Windows' and not is_installed("triton"): + if platform.system() == "Windows" and not is_installed("triton"): print("Installing triton-windows...") run_pip("install triton-windows==3.4.0.post21 --no-cache-dir") diff --git a/sd_installer/verifier.py b/sd_installer/verifier.py index 2cc484c..fa23d60 100644 --- a/sd_installer/verifier.py +++ b/sd_installer/verifier.py @@ -12,6 +12,7 @@ @dataclass class VerificationResult: """Result of a single verification check.""" + name: str passed: bool message: str @@ -23,67 +24,39 @@ class VerificationResult: ( "torch CUDA", "import torch; assert torch.cuda.is_available(), 'CUDA not available'; print(f'{torch.__version__}+cu{torch.version.cuda} | {torch.cuda.get_device_name(0)}')", - "PyTorch with CUDA" - ), - ( - "StreamDiffusion", - "from streamdiffusion.config import load_config; print('OK')", - "StreamDiffusion core" - ), - ( - "timm RotaryEmbedding", - "from timm.layers import RotaryEmbedding; print('OK')", - "timm (>=1.0.24 required)" - ), - ( - "mediapipe", - "import mediapipe as mp; mp.solutions.drawing_utils; print('OK')", - "mediapipe solutions" - ), - ( - "transformers MT5", - "from transformers import MT5Tokenizer; print('OK')", - "transformers (MT5Tokenizer)" - ), - ( - "huggingface_hub", - "from huggingface_hub import hf_hub_download; print('OK')", - "huggingface_hub" + "PyTorch with CUDA", ), + ("StreamDiffusion", "from streamdiffusion.config import load_config; print('OK')", "StreamDiffusion core"), + ("timm RotaryEmbedding", "from timm.layers import RotaryEmbedding; print('OK')", "timm (>=1.0.24 required)"), + ("mediapipe", "import mediapipe as mp; mp.solutions.drawing_utils; print('OK')", "mediapipe solutions"), + ("transformers MT5", "from transformers import MT5Tokenizer; print('OK')", "transformers (MT5Tokenizer)"), + ("huggingface_hub", "from huggingface_hub import hf_hub_download; print('OK')", "huggingface_hub"), ( "numpy version", "import numpy; v = numpy.__version__; assert v.startswith('1.'), f'numpy 2.x detected: {v}'; print(v)", - "numpy (<2.0.0 required)" + "numpy (<2.0.0 required)", ), ( "diffusers fork", "import inspect; from diffusers.models.attention_processor import Attention; assert 'kvo_cache' in inspect.signature(Attention.forward).parameters, 'Missing kvo_cache'; print('OK')", - "diffusers (varshith15 fork with kvo_cache)" - ), - ( - "accelerate", - "from accelerate import Accelerator; print('OK')", - "accelerate" - ), - ( - "controlnet_aux", - "from controlnet_aux import OpenposeDetector; print('OK')", - "controlnet_aux" + "diffusers (varshith15 fork with kvo_cache)", ), + ("accelerate", "from accelerate import Accelerator; print('OK')", "accelerate"), + ("controlnet_aux", "from controlnet_aux import OpenposeDetector; print('OK')", "controlnet_aux"), ( "peft (USE_PEFT_BACKEND)", "from diffusers.utils import USE_PEFT_BACKEND; assert USE_PEFT_BACKEND, 'peft not detected'; print('OK')", - "peft (required for Cached Attention/StreamV2V)" + "peft (required for Cached Attention/StreamV2V)", ), ( "protobuf version", "import google.protobuf; v = google.protobuf.__version__; major = int(v.split('.')[0]); assert major < 5, f'protobuf {v} (>=5.x breaks TRT engine builds)'; print(v)", - "protobuf (<5.0 required for TRT)" + "protobuf (<5.0 required for TRT)", ), ( "onnx version", "import onnx; v = onnx.__version__; parts = [int(x) for x in v.split('.')[:2]]; assert parts[0] == 1 and parts[1] < 20, f'onnx {v} (>=1.20 removes float32_to_bfloat16)'; print(v)", - "onnx (<1.20 required for TRT)" + "onnx (<1.20 required for TRT)", ), ] @@ -178,7 +151,7 @@ def run_all(self, verbose: bool = True) -> bool: print(f"FAIL: {result.message}") if result.error: # Print first line of error - error_line = result.error.split('\n')[-1] + error_line = result.error.split("\n")[-1] print(f" {error_line}") if verbose: @@ -211,10 +184,12 @@ def diagnose(self) -> dict: try: result = subprocess.run( [self.python_exe, "-c", gpu_code], - capture_output=True, text=True, timeout=30, + capture_output=True, + text=True, + timeout=30, ) if result.returncode == 0: - lines = result.stdout.strip().split('\n') + lines = result.stdout.strip().split("\n") info["gpu"]["name"] = lines[0] info["gpu"]["vram_mb"] = int(lines[1]) info["gpu"]["compute_capability"] = lines[2] @@ -224,12 +199,14 @@ def diagnose(self) -> dict: # Run all checks and collect detailed info for name, code, description in VERIFICATION_CHECKS: result = self.check(name, code, description) - info["checks"].append({ - "name": name, - "passed": result.passed, - "message": result.message, - "error": result.error, - }) + info["checks"].append( + { + "name": name, + "passed": result.passed, + "message": result.message, + "error": result.error, + } + ) # Get version information for key packages version_checks = [ @@ -290,8 +267,8 @@ def diagnose(self) -> dict: "fix": "pip install accelerate==1.10.0", }, "'onnx.helper' has no attribute 'float32_to_bfloat16'": { - "cause": "onnx version too new", - "fix": "pip install onnx==1.18.0", + "cause": "onnx-graphsurgeon too old for onnx>=1.19 (float32_to_bfloat16 was removed)", + "fix": "pip install onnx-graphsurgeon==0.6.1 --extra-index-url https://pypi.ngc.nvidia.com", }, "Missing kvo_cache": { "cause": "Wrong diffusers installed (vanilla instead of varshith15 fork)",