From e98928c1ecba6347cbc3d233aa3edb260bd65441 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Thu, 26 Feb 2026 20:50:14 +0100 Subject: [PATCH] Add H200 and B200 support for Crusoe provider Made-with: Cursor --- src/gpuhunt/_internal/constraints.py | 7 +++++++ src/gpuhunt/_internal/models.py | 1 + src/gpuhunt/providers/crusoe.py | 30 +++++++++++++++++++++------- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/gpuhunt/_internal/constraints.py b/src/gpuhunt/_internal/constraints.py index c5d7ee3..ffd06a3 100644 --- a/src/gpuhunt/_internal/constraints.py +++ b/src/gpuhunt/_internal/constraints.py @@ -177,6 +177,7 @@ def is_nvidia_superchip(gpu_name: str) -> bool: NvidiaGPUInfo(name="H200", memory=141, compute_capability=(9, 0)), NvidiaGPUInfo(name="H200NVL", memory=141, compute_capability=(9, 0)), NvidiaGPUInfo(name="B200", memory=180, compute_capability=(10, 0)), + NvidiaGPUInfo(name="GB200", memory=186, compute_capability=(10, 0)), NvidiaGPUInfo(name="L4", memory=24, compute_capability=(8, 9)), NvidiaGPUInfo(name="L40", memory=48, compute_capability=(8, 9)), NvidiaGPUInfo(name="L40S", memory=48, compute_capability=(8, 9)), @@ -253,6 +254,12 @@ def is_nvidia_superchip(gpu_name: str) -> bool: architecture=AMDArchitecture.CDNA3, device_ids=(0x74A5,), ), + AMDGPUInfo( + name="MI355X", + memory=288, + architecture=AMDArchitecture.CDNA4, + device_ids=(0x75A3,), + ), ] KNOWN_TPUS: list[TPUInfo] = [TPUInfo(name=version, memory=0) for version in _TPU_VERSIONS] diff --git a/src/gpuhunt/_internal/models.py b/src/gpuhunt/_internal/models.py index e620e7b..2e4eb07 100644 --- a/src/gpuhunt/_internal/models.py +++ b/src/gpuhunt/_internal/models.py @@ -32,6 +32,7 @@ class AMDArchitecture(enum.Enum): CDNA = "CDNA" CDNA2 = "CDNA2" CDNA3 = "CDNA3" + CDNA4 = "CDNA4" @classmethod def cast(cls, value: Union["AMDArchitecture", str]) -> "AMDArchitecture": diff --git a/src/gpuhunt/providers/crusoe.py b/src/gpuhunt/providers/crusoe.py index 8cfd5a3..dfc7875 100644 --- a/src/gpuhunt/providers/crusoe.py +++ b/src/gpuhunt/providers/crusoe.py @@ -10,7 +10,12 @@ import requests -from gpuhunt._internal.models import AcceleratorVendor, QueryFilter, RawCatalogItem +from gpuhunt._internal.models import ( + AcceleratorVendor, + CPUArchitecture, + QueryFilter, + RawCatalogItem, +) from gpuhunt.providers import AbstractProvider logger = logging.getLogger(__name__) @@ -26,15 +31,13 @@ "A100-PCIe-80GB": ("A100", AcceleratorVendor.NVIDIA, 80), "A100-SXM-80GB": ("A100", AcceleratorVendor.NVIDIA, 80), "H100-SXM-80GB": ("H100", AcceleratorVendor.NVIDIA, 80), + "H200-SXM-141GB": ("H200", AcceleratorVendor.NVIDIA, 141), + "B200-SXM-180GB": ("B200", AcceleratorVendor.NVIDIA, 180), + "GB200-NVL-186GB": ("GB200", AcceleratorVendor.NVIDIA, 186), "L40S-48GB": ("L40S", AcceleratorVendor.NVIDIA, 48), "A40-PCIe-48GB": ("A40", AcceleratorVendor.NVIDIA, 48), "MI300X-192GB": ("MI300X", AcceleratorVendor.AMD, 192), - # TODO: The following GPUs are listed on https://crusoe.ai/cloud/pricing but not yet - # returned by the instance types API. Add them once Crusoe exposes them: - # - H200 141GB ($4.29/GPU-hr on-demand, spot: contact sales) - # - GB200 186GB (contact sales) - # - B200 180GB (contact sales) - # - MI355X 288GB ($3.45 listed but not confirmed; also missing from KNOWN_AMD_GPUS) + "MI355X_288GB": ("MI355X", AcceleratorVendor.AMD, 288), } # Per-GPU-hour pricing from https://crusoe.ai/cloud/pricing @@ -44,6 +47,10 @@ "A100-PCIe-80GB": (1.65, 1.20), "A100-SXM-80GB": (1.95, 1.30), "H100-SXM-80GB": (3.90, 1.60), + "H200-SXM-141GB": (4.29, None), + # TODO: B200 estimated from B200/H100 ratio on other providers; update once Crusoe publishes rates. + # GB200 and MI355X pricing not known yet; update once Crusoe publishes rates. + "B200-SXM-180GB": (7.25, None), "L40S-48GB": (1.00, 0.50), "A40-PCIe-48GB": (0.90, 0.40), "MI300X-192GB": (3.45, 0.95), @@ -137,6 +144,13 @@ def _request(self, method: str, path: str, params: Optional[dict] = None) -> req return requests.request(method, url, headers=headers, params=params, timeout=TIMEOUT) +def _get_cpu_arch(spec: dict) -> str: + cpu_type = spec.get("cpu_type", "") + if cpu_type == "arm64": + return CPUArchitecture.ARM.value + return CPUArchitecture.X86.value + + def _get_available_type_locations(capacities: list[dict]) -> dict[str, list[str]]: best_qty: dict[tuple[str, str], int] = defaultdict(int) for cap in capacities: @@ -191,6 +205,7 @@ def _make_gpu_items( gpu_memory=gpu_memory, spot=None, disk_size=float(spec["disk_gb"]) if spec.get("disk_gb") else None, + cpu_arch=_get_cpu_arch(spec), # disk_gb: ephemeral NVMe size in GB (0 = no ephemeral disk). # Used by dstack to decide whether to create a persistent data disk. provider_data={"disk_gb": spec.get("disk_gb", 0)}, @@ -231,6 +246,7 @@ def _make_cpu_items(product_name: str, spec: dict, locations: list[str]) -> list gpu_memory=None, spot=False, disk_size=float(spec["disk_gb"]) if spec.get("disk_gb") else None, + cpu_arch=_get_cpu_arch(spec), provider_data={"disk_gb": spec.get("disk_gb", 0)}, )