diff --git a/docs/docs/concepts/backends.md b/docs/docs/concepts/backends.md index 81a6cf48e0..6b4d16a637 100644 --- a/docs/docs/concepts/backends.md +++ b/docs/docs/concepts/backends.md @@ -579,34 +579,6 @@ gcloud projects list --format="json(projectId)" Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. Additionally, [Cloud NAT](https://cloud.google.com/nat/docs/overview) must be configured to provide access to external resources for provisioned instances. -## Hot Aisle - -Log in to the SSH TUI as described in the [Hot Aisle Quick Start :material-arrow-top-right-thin:{ .external }](https://hotaisle.xyz/quick-start/). -Create a new team and generate an API key for the member in the team. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: -- name: main - backends: - - type: hotaisle - team_handle: hotaisle-team-handle - creds: - type: api_key - api_key: 9c27a4bb7a8e472fae12ab34.3f2e3c1db75b9a0187fd2196c6b3e56d2b912e1c439ba08d89e7b6fcd4ef1d3f -``` - -
- -??? info "Required permissions" - The API key must have the following roles assigned: - - * **Owner role for the user** - Required for creating and managing SSH keys - * **Operator role for the team** - Required for managing virtual machines within the team - ## Lambda Log into your [Lambda Cloud :material-arrow-top-right-thin:{ .external }](https://lambdalabs.com/service/gpu-cloud) account, click API keys in the sidebar, and then click the `Generate API key` @@ -937,6 +909,104 @@ projects: +## AMD Developer Cloud +Log into your [AMD Developer Cloud :material-arrow-top-right-thin:{ .external }](https://amd.digitalocean.com/login) account. Click `API` in the sidebar and click the button `Generate New Token`. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: +- name: main + backends: + - type: amddevcloud + project_name: my-amd-project + creds: + type: api_key + api_key: dop_v1_71ea79a0c4bf2ffa70ac9d2a7b2689d2b41768567b22ebabe58a80066dcc5e92 +``` + +
+ +??? info "Project Name" + **project_name** configuration is optional. If it is not provided, the default project is used. + +??? info "Required permissions" + The API key must have the following scopes assigned: + + * **account** - read + * **droplet** - create,read,update,delete,admin + * **project** - create,read,update,delete + * **regions** - read + * **sizes** - read + * **ssh_key** - create,read,update,delete + + + +## Digital Ocean +Log into your [Digital Ocean :material-arrow-top-right-thin:{ .external }](https://cloud.digitalocean.com/login) account. Click `API` in the sidebar and click the button `Generate New Token`. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: +- name: main + backends: + - type: digitalocean + project_name: my-digital-ocean-project + creds: + type: api_key + api_key: dop_v1_71ea79a0c4bf2ffa70ac9d2a7b2689d2b41768567b22ebabe58a80066dcc5e92 +``` + +
+ +??? info "Project Name" + **project_name** configuration is optional. If it is not provided, the default project is used. + +??? info "Required permissions" + The API key must have the following scopes assigned: + + * **account** - read + * **droplet** - create,read,update,delete,admin + * **project** - create,read,update,delete + * **regions** - read + * **sizes** - read + * **ssh_key** - create,read,update,delete + + +## Hot Aisle + +Log in to the SSH TUI as described in the [Hot Aisle Quick Start :material-arrow-top-right-thin:{ .external }](https://hotaisle.xyz/quick-start/). +Create a new team and generate an API key for the member in the team. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: +- name: main + backends: + - type: hotaisle + team_handle: hotaisle-team-handle + creds: + type: api_key + api_key: 9c27a4bb7a8e472fae12ab34.3f2e3c1db75b9a0187fd2196c6b3e56d2b912e1c439ba08d89e7b6fcd4ef1d3f +``` + +
+ +??? info "Required permissions" + The API key must have the following roles assigned: + + * **Owner role for the user** - Required for creating and managing SSH keys + * **Operator role for the team** - Required for managing virtual machines within the team + + ## CloudRift Log into your [CloudRift :material-arrow-top-right-thin:{ .external }](https://console.cloudrift.ai/) console, click `API Keys` in the sidebar and click the button to create a new API key. diff --git a/docs/docs/reference/server/config.yml.md b/docs/docs/reference/server/config.yml.md index a8dc3b2cad..25f649ddfd 100644 --- a/docs/docs/reference/server/config.yml.md +++ b/docs/docs/reference/server/config.yml.md @@ -126,22 +126,6 @@ to configure [backends](../../concepts/backends.md) and other [server-level sett type: required: true -##### `projects[n].backends[type=hotaisle]` { #hotaisle data-toc-label="hotaisle" } - -#SCHEMA# dstack._internal.core.backends.hotaisle.models.HotAisleBackendConfigWithCreds - overrides: - show_root_heading: false - type: - required: true - item_id_prefix: hotaisle- - -###### `projects[n].backends[type=hotaisle].creds` { #hotaisle-creds data-toc-label="creds" } - -#SCHEMA# dstack._internal.core.backends.hotaisle.models.HotAisleAPIKeyCreds - overrides: - show_root_heading: false - type: - required: true ##### `projects[n].backends[type=lambda]` { #lambda data-toc-label="lambda" } @@ -332,6 +316,57 @@ to configure [backends](../../concepts/backends.md) and other [server-level sett type: required: true +##### `projects[n].backends[type=amddevcloud]` { #amddevcloud data-toc-label="amddevcloud" } + +#SCHEMA# dstack._internal.core.backends.digitalocean_base.models.BaseDigitalOceanBackendConfigWithCreds + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: amddevcloud- + +###### `projects[n].backends[type=amddevcloud].creds` { #amddevcloud-creds data-toc-label="creds" } + +#SCHEMA# dstack._internal.core.backends.digitalocean_base.models.BaseDigitalOceanAPIKeyCreds + overrides: + show_root_heading: false + type: + required: true + +##### `projects[n].backends[type=digitalocean]` { #digitalocean data-toc-label="digitalocean" } + +#SCHEMA# dstack._internal.core.backends.digitalocean_base.models.BaseDigitalOceanBackendConfigWithCreds + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: digitalocean- + +###### `projects[n].backends[type=digitalocean].creds` { #digitalocean-creds data-toc-label="creds" } + +#SCHEMA# dstack._internal.core.backends.digitalocean_base.models.BaseDigitalOceanAPIKeyCreds + overrides: + show_root_heading: false + type: + required: true + +##### `projects[n].backends[type=hotaisle]` { #hotaisle data-toc-label="hotaisle" } + +#SCHEMA# dstack._internal.core.backends.hotaisle.models.HotAisleBackendConfigWithCreds + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: hotaisle- + +###### `projects[n].backends[type=hotaisle].creds` { #hotaisle-creds data-toc-label="creds" } + +#SCHEMA# dstack._internal.core.backends.hotaisle.models.HotAisleAPIKeyCreds + overrides: + show_root_heading: false + type: + required: true + ##### `projects[n].backends[type=cloudrift]` { #cloudrift data-toc-label="cloudrift" } #SCHEMA# dstack._internal.core.backends.cloudrift.models.CloudRiftBackendConfigWithCreds diff --git a/pyproject.toml b/pyproject.toml index a14ac0cbbf..342f6571af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-multipart>=0.0.16", "filelock", "psutil", - "gpuhunt==0.1.7", + "gpuhunt==0.1.8", "argcomplete>=3.5.0", "ignore-python>=0.2.0", "orjson", diff --git a/src/dstack/_internal/core/backends/amddevcloud/__init__.py b/src/dstack/_internal/core/backends/amddevcloud/__init__.py new file mode 100644 index 0000000000..16e553969f --- /dev/null +++ b/src/dstack/_internal/core/backends/amddevcloud/__init__.py @@ -0,0 +1 @@ +# This package contains the implementation for the AMDDevCloud backend. diff --git a/src/dstack/_internal/core/backends/amddevcloud/backend.py b/src/dstack/_internal/core/backends/amddevcloud/backend.py new file mode 100644 index 0000000000..9a0477d760 --- /dev/null +++ b/src/dstack/_internal/core/backends/amddevcloud/backend.py @@ -0,0 +1,16 @@ +from dstack._internal.core.backends.amddevcloud.compute import AMDDevCloudCompute +from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend +from dstack._internal.core.backends.digitalocean_base.models import BaseDigitalOceanConfig +from dstack._internal.core.models.backends.base import BackendType + + +class AMDDevCloudBackend(BaseDigitalOceanBackend): + TYPE = BackendType.AMDDEVCLOUD + COMPUTE_CLASS = AMDDevCloudCompute + + def __init__(self, config: BaseDigitalOceanConfig, api_url: str): + self.config = config + self._compute = AMDDevCloudCompute(self.config, api_url=api_url, type=self.TYPE) + + def compute(self) -> AMDDevCloudCompute: + return self._compute diff --git a/src/dstack/_internal/core/backends/amddevcloud/compute.py b/src/dstack/_internal/core/backends/amddevcloud/compute.py new file mode 100644 index 0000000000..945eb63f93 --- /dev/null +++ b/src/dstack/_internal/core/backends/amddevcloud/compute.py @@ -0,0 +1,5 @@ +from dstack._internal.core.backends.digitalocean_base.compute import BaseDigitalOceanCompute + + +class AMDDevCloudCompute(BaseDigitalOceanCompute): + pass diff --git a/src/dstack/_internal/core/backends/amddevcloud/configurator.py b/src/dstack/_internal/core/backends/amddevcloud/configurator.py new file mode 100644 index 0000000000..1f464fc549 --- /dev/null +++ b/src/dstack/_internal/core/backends/amddevcloud/configurator.py @@ -0,0 +1,28 @@ +from typing import Optional + +from dstack._internal.core.backends.amddevcloud.backend import AMDDevCloudBackend +from dstack._internal.core.backends.digitalocean_base.api_client import DigitalOceanAPIClient +from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend +from dstack._internal.core.backends.digitalocean_base.configurator import ( + BaseDigitalOceanConfigurator, +) +from dstack._internal.core.backends.digitalocean_base.models import AnyBaseDigitalOceanCreds +from dstack._internal.core.models.backends.base import ( + BackendType, +) + + +class AMDDevCloudConfigurator(BaseDigitalOceanConfigurator): + TYPE = BackendType.AMDDEVCLOUD + BACKEND_CLASS = AMDDevCloudBackend + API_URL = "https://api-amd.digitalocean.com" + + def get_backend(self, record) -> BaseDigitalOceanBackend: + config = self._get_config(record) + return AMDDevCloudBackend(config=config, api_url=self.API_URL) + + def _validate_creds(self, creds: AnyBaseDigitalOceanCreds, project_name: Optional[str] = None): + api_client = DigitalOceanAPIClient(creds.api_key, self.API_URL) + api_client.validate_api_key() + if project_name: + api_client.validate_project_name(project_name) diff --git a/src/dstack/_internal/core/backends/base/offers.py b/src/dstack/_internal/core/backends/base/offers.py index ed9b44a08a..d3d004172b 100644 --- a/src/dstack/_internal/core/backends/base/offers.py +++ b/src/dstack/_internal/core/backends/base/offers.py @@ -34,6 +34,8 @@ def get_catalog_offers( provider = backend.value if backend == BackendType.LAMBDA: provider = "lambdalabs" + if backend == BackendType.AMDDEVCLOUD: + provider = "digitalocean" q = requirements_to_query_filter(requirements) q.provider = [provider] offers = [] diff --git a/src/dstack/_internal/core/backends/configurators.py b/src/dstack/_internal/core/backends/configurators.py index a2df6a4e63..6284dd0a58 100644 --- a/src/dstack/_internal/core/backends/configurators.py +++ b/src/dstack/_internal/core/backends/configurators.py @@ -5,6 +5,12 @@ _CONFIGURATOR_CLASSES: List[Type[Configurator]] = [] +try: + from dstack._internal.core.backends.amddevcloud.configurator import AMDDevCloudConfigurator + + _CONFIGURATOR_CLASSES.append(AMDDevCloudConfigurator) +except ImportError: + pass try: from dstack._internal.core.backends.aws.configurator import AWSConfigurator @@ -47,6 +53,15 @@ except ImportError: pass +try: + from dstack._internal.core.backends.digitalocean.configurator import ( + DigitalOceanConfigurator, + ) + + _CONFIGURATOR_CLASSES.append(DigitalOceanConfigurator) +except ImportError: + pass + try: from dstack._internal.core.backends.gcp.configurator import GCPConfigurator diff --git a/src/dstack/_internal/core/backends/digitalocean/__init__.py b/src/dstack/_internal/core/backends/digitalocean/__init__.py new file mode 100644 index 0000000000..0f0092fd9f --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean/__init__.py @@ -0,0 +1 @@ +# DigitalOcean backend for dstack diff --git a/src/dstack/_internal/core/backends/digitalocean/backend.py b/src/dstack/_internal/core/backends/digitalocean/backend.py new file mode 100644 index 0000000000..fc09b4c03d --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean/backend.py @@ -0,0 +1,16 @@ +from dstack._internal.core.backends.digitalocean.compute import DigitalOceanCompute +from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend +from dstack._internal.core.backends.digitalocean_base.models import BaseDigitalOceanConfig +from dstack._internal.core.models.backends.base import BackendType + + +class DigitalOceanBackend(BaseDigitalOceanBackend): + TYPE = BackendType.DIGITALOCEAN + COMPUTE_CLASS = DigitalOceanCompute + + def __init__(self, config: BaseDigitalOceanConfig, api_url: str): + self.config = config + self._compute = DigitalOceanCompute(self.config, api_url=api_url, type=self.TYPE) + + def compute(self) -> DigitalOceanCompute: + return self._compute diff --git a/src/dstack/_internal/core/backends/digitalocean/compute.py b/src/dstack/_internal/core/backends/digitalocean/compute.py new file mode 100644 index 0000000000..e3b26d0261 --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean/compute.py @@ -0,0 +1,5 @@ +from ..digitalocean_base.compute import BaseDigitalOceanCompute + + +class DigitalOceanCompute(BaseDigitalOceanCompute): + pass diff --git a/src/dstack/_internal/core/backends/digitalocean/configurator.py b/src/dstack/_internal/core/backends/digitalocean/configurator.py new file mode 100644 index 0000000000..0453723128 --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean/configurator.py @@ -0,0 +1,31 @@ +from typing import Optional + +from dstack._internal.core.backends.base.configurator import BackendRecord +from dstack._internal.core.backends.digitalocean.backend import DigitalOceanBackend +from dstack._internal.core.backends.digitalocean_base.api_client import DigitalOceanAPIClient +from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend +from dstack._internal.core.backends.digitalocean_base.configurator import ( + BaseDigitalOceanConfigurator, +) +from dstack._internal.core.backends.digitalocean_base.models import ( + AnyBaseDigitalOceanCreds, +) +from dstack._internal.core.models.backends.base import ( + BackendType, +) + + +class DigitalOceanConfigurator(BaseDigitalOceanConfigurator): + TYPE = BackendType.DIGITALOCEAN + BACKEND_CLASS = DigitalOceanBackend + API_URL = "https://api.digitalocean.com" + + def get_backend(self, record: BackendRecord) -> BaseDigitalOceanBackend: + config = self._get_config(record) + return DigitalOceanBackend(config=config, api_url=self.API_URL) + + def _validate_creds(self, creds: AnyBaseDigitalOceanCreds, project_name: Optional[str] = None): + api_client = DigitalOceanAPIClient(creds.api_key, self.API_URL) + api_client.validate_api_key() + if project_name: + api_client.validate_project_name(project_name) diff --git a/src/dstack/_internal/core/backends/digitalocean_base/__init__.py b/src/dstack/_internal/core/backends/digitalocean_base/__init__.py new file mode 100644 index 0000000000..cc8247e940 --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean_base/__init__.py @@ -0,0 +1 @@ +# This package contains the base classes for DigitalOcean and AMDDevCloud backends. diff --git a/src/dstack/_internal/core/backends/digitalocean_base/api_client.py b/src/dstack/_internal/core/backends/digitalocean_base/api_client.py new file mode 100644 index 0000000000..afedd36ffc --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean_base/api_client.py @@ -0,0 +1,104 @@ +from typing import Any, Dict, List, Optional + +import requests + +from dstack._internal.core.backends.base.configurator import raise_invalid_credentials_error +from dstack._internal.core.errors import NoCapacityError +from dstack._internal.utils.logging import get_logger + +logger = get_logger(__name__) + + +class DigitalOceanAPIClient: + def __init__(self, api_key: str, api_url: str): + self.api_key = api_key + self.base_url = api_url + + def validate_api_key(self) -> bool: + try: + response = self._make_request("GET", "/v2/account") + response.raise_for_status() + return True + except requests.HTTPError as e: + status = e.response.status_code + if status == 401: + raise_invalid_credentials_error( + fields=[["creds", "api_key"]], details="Invaild API key" + ) + raise e + + def validate_project_name(self, project_name: str) -> bool: + if self.get_project_id(project_name) is None: + raise_invalid_credentials_error( + fields=[["project_name"]], + details=f"Project with name '{project_name}' does not exist", + ) + return True + + def list_ssh_keys(self) -> List[Dict[str, Any]]: + response = self._make_request("GET", "/v2/account/keys") + response.raise_for_status() + return response.json()["ssh_keys"] + + def list_projects(self) -> List[Dict[str, Any]]: + response = self._make_request("GET", "/v2/projects") + response.raise_for_status() + return response.json()["projects"] + + def get_project_id(self, project_name: str) -> Optional[str]: + projects = self.list_projects() + for project in projects: + if project["name"] == project_name: + return project["id"] + return None + + def create_ssh_key(self, name: str, public_key: str) -> Dict[str, Any]: + payload = {"name": name, "public_key": public_key} + response = self._make_request("POST", "/v2/account/keys", json=payload) + response.raise_for_status() + return response.json()["ssh_key"] + + def get_or_create_ssh_key(self, name: str, public_key: str) -> int: + ssh_keys = self.list_ssh_keys() + for ssh_key in ssh_keys: + if ssh_key["public_key"].strip() == public_key.strip(): + return ssh_key["id"] + + ssh_key = self.create_ssh_key(name, public_key) + return ssh_key["id"] + + def create_droplet(self, droplet_config: Dict[str, Any]) -> Dict[str, Any]: + response = self._make_request("POST", "/v2/droplets", json=droplet_config) + if response.status_code == 422: + raise NoCapacityError(response.json()["message"]) + response.raise_for_status() + return response.json()["droplet"] + + def get_droplet(self, droplet_id: str) -> Dict[str, Any]: + response = self._make_request("GET", f"/v2/droplets/{droplet_id}") + response.raise_for_status() + return response.json()["droplet"] + + def delete_droplet(self, droplet_id: str) -> None: + response = self._make_request("DELETE", f"/v2/droplets/{droplet_id}") + if response.status_code == 404: + logger.debug("DigitalOcean droplet %s not found", droplet_id) + return + response.raise_for_status() + + def _make_request( + self, method: str, endpoint: str, json: Optional[Dict[str, Any]] = None, timeout: int = 30 + ) -> requests.Response: + url = f"{self.base_url}{endpoint}" + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + response = requests.request( + method=method, + url=url, + headers=headers, + json=json, + timeout=timeout, + ) + return response diff --git a/src/dstack/_internal/core/backends/digitalocean_base/backend.py b/src/dstack/_internal/core/backends/digitalocean_base/backend.py new file mode 100644 index 0000000000..42884b3072 --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean_base/backend.py @@ -0,0 +1,5 @@ +from dstack._internal.core.backends.base.backend import Backend + + +class BaseDigitalOceanBackend(Backend): + pass diff --git a/src/dstack/_internal/core/backends/digitalocean_base/compute.py b/src/dstack/_internal/core/backends/digitalocean_base/compute.py new file mode 100644 index 0000000000..6a5325c54e --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean_base/compute.py @@ -0,0 +1,180 @@ +from typing import List, Optional + +import gpuhunt +from gpuhunt.providers.digitalocean import DigitalOceanProvider + +from dstack._internal.core.backends.base.backend import Compute +from dstack._internal.core.backends.base.compute import ( + ComputeWithCreateInstanceSupport, + generate_unique_instance_name, + get_user_data, +) +from dstack._internal.core.backends.base.offers import get_catalog_offers +from dstack._internal.core.backends.digitalocean_base.api_client import DigitalOceanAPIClient +from dstack._internal.core.backends.digitalocean_base.models import BaseDigitalOceanConfig +from dstack._internal.core.errors import BackendError +from dstack._internal.core.models.backends.base import BackendType +from dstack._internal.core.models.instances import ( + InstanceAvailability, + InstanceConfiguration, + InstanceOfferWithAvailability, +) +from dstack._internal.core.models.placement import PlacementGroup +from dstack._internal.core.models.runs import JobProvisioningData, Requirements +from dstack._internal.utils.logging import get_logger + +logger = get_logger(__name__) + +MAX_INSTANCE_NAME_LEN = 60 + +# Setup commands for DigitalOcean instances +SETUP_COMMANDS = [ + "sudo ufw delete limit ssh", + "sudo ufw allow ssh", +] + +DOCKER_INSTALL_COMMANDS = [ + "export DEBIAN_FRONTEND=noninteractive", + "mkdir -p /etc/apt/keyrings", + "curl --max-time 60 -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg", + 'echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null', + "apt-get update", + "apt-get --assume-yes install docker-ce docker-ce-cli containerd.io docker-compose-plugin", +] + + +class BaseDigitalOceanCompute( + ComputeWithCreateInstanceSupport, + Compute, +): + def __init__(self, config: BaseDigitalOceanConfig, api_url: str, type: BackendType): + super().__init__() + self.config = config + self.api_client = DigitalOceanAPIClient(config.creds.api_key, api_url) + self.catalog = gpuhunt.Catalog(balance_resources=False, auto_reload=False) + self.BACKEND_TYPE = type + self.catalog.add_provider( + DigitalOceanProvider(api_key=config.creds.api_key, api_url=api_url) + ) + + def get_offers( + self, requirements: Optional[Requirements] = None + ) -> List[InstanceOfferWithAvailability]: + offers = get_catalog_offers( + backend=self.BACKEND_TYPE, + locations=self.config.regions, + requirements=requirements, + catalog=self.catalog, + ) + return [ + InstanceOfferWithAvailability( + **offer.dict(), + availability=InstanceAvailability.AVAILABLE, + ) + for offer in offers + ] + + def create_instance( + self, + instance_offer: InstanceOfferWithAvailability, + instance_config: InstanceConfiguration, + placement_group: Optional[PlacementGroup], + ) -> JobProvisioningData: + instance_name = generate_unique_instance_name( + instance_config, max_length=MAX_INSTANCE_NAME_LEN + ) + + project_ssh_key = instance_config.ssh_keys[0] + ssh_key_id = self.api_client.get_or_create_ssh_key( + name=f"dstack-{instance_config.project_name}", + public_key=project_ssh_key.public, + ) + size_slug = instance_offer.instance.name + + if not instance_offer.instance.resources.gpus: + backend_specific_commands = SETUP_COMMANDS + DOCKER_INSTALL_COMMANDS + else: + backend_specific_commands = SETUP_COMMANDS + + project_id = None + if self.config.project_name: + project_id = self.api_client.get_project_id(self.config.project_name) + if project_id is None: + raise BackendError(f"Project {self.config.project_name} does not exist") + droplet_config = { + "name": instance_name, + "region": instance_offer.region, + "size": size_slug, + "image": self._get_image_for_instance(instance_offer), + "ssh_keys": [ssh_key_id], + "backups": False, + "ipv6": False, + "monitoring": False, + "tags": [], + "user_data": get_user_data( + authorized_keys=instance_config.get_public_keys(), + backend_specific_commands=backend_specific_commands, + ), + **({"project_id": project_id} if project_id is not None else {}), + } + + droplet = self.api_client.create_droplet(droplet_config) + + return JobProvisioningData( + backend=instance_offer.backend, + instance_type=instance_offer.instance, + instance_id=str(droplet["id"]), + hostname=None, + internal_ip=None, + region=instance_offer.region, + price=instance_offer.price, + username="root", + ssh_port=22, + dockerized=True, + ssh_proxy=None, + backend_data=None, + ) + + def update_provisioning_data( + self, + provisioning_data: JobProvisioningData, + project_ssh_public_key: str, + project_ssh_private_key: str, + ): + droplet = self.api_client.get_droplet(provisioning_data.instance_id) + if droplet["status"] == "active": + for network in droplet["networks"]["v4"]: + if network["type"] == "public": + provisioning_data.hostname = network["ip_address"] + break + + def terminate_instance( + self, instance_id: str, region: str, backend_data: Optional[str] = None + ): + self.api_client.delete_droplet(instance_id) + + def _get_image_for_instance(self, instance_offer: InstanceOfferWithAvailability) -> str: + if not instance_offer.instance.resources.gpus: + # No GPUs, use CPU image + return "ubuntu-24-04-x64" + + gpu_count = len(instance_offer.instance.resources.gpus) + gpu_vendor = instance_offer.instance.resources.gpus[0].vendor + + if gpu_vendor == gpuhunt.AcceleratorVendor.AMD: + # AMD GPU + return "digitaloceanai-rocmjupyter" + else: + # NVIDIA GPUs - DO only supports 1 and 8 GPU configurations. + # DO says for single GPU plans using GPUs other than H100s use "gpu-h100x1-base". DO does not provide guidance for x8 GPUs so assuming the same applies. + # See (https://docs.digitalocean.com/products/droplets/getting-started/recommended-gpu-setup/#aiml-ready-image) + if gpu_count == 8: + return "gpu-h100x8-base" + elif gpu_count == 1: + return "gpu-h100x1-base" + else: + # For Unsupported GPU count - use single GPU image and log warning + logger.warning( + f"Unsupported NVIDIA GPU count: {gpu_count}, using single GPU image" + ) + return "gpu-h100x1-base" diff --git a/src/dstack/_internal/core/backends/digitalocean_base/configurator.py b/src/dstack/_internal/core/backends/digitalocean_base/configurator.py new file mode 100644 index 0000000000..b57559f1ae --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean_base/configurator.py @@ -0,0 +1,54 @@ +import json +from typing import Optional + +from dstack._internal.core.backends.base.configurator import ( + BackendRecord, + Configurator, +) +from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend +from dstack._internal.core.backends.digitalocean_base.models import ( + AnyBaseDigitalOceanBackendConfig, + AnyBaseDigitalOceanCreds, + BaseDigitalOceanBackendConfig, + BaseDigitalOceanBackendConfigWithCreds, + BaseDigitalOceanConfig, + BaseDigitalOceanCreds, + BaseDigitalOceanStoredConfig, +) + + +class BaseDigitalOceanConfigurator(Configurator): + def validate_config( + self, config: BaseDigitalOceanBackendConfigWithCreds, default_creds_enabled: bool + ): + self._validate_creds(config.creds, config.project_name) + + def create_backend( + self, project_name: str, config: BaseDigitalOceanBackendConfigWithCreds + ) -> BackendRecord: + return BackendRecord( + config=BaseDigitalOceanStoredConfig( + **BaseDigitalOceanBackendConfig.__response__.parse_obj(config).dict() + ).json(), + auth=BaseDigitalOceanCreds.parse_obj(config.creds).json(), + ) + + def get_backend_config( + self, record: BackendRecord, include_creds: bool + ) -> AnyBaseDigitalOceanBackendConfig: + config = self._get_config(record) + if include_creds: + return BaseDigitalOceanBackendConfigWithCreds.__response__.parse_obj(config) + return BaseDigitalOceanBackendConfig.__response__.parse_obj(config) + + def get_backend(self, record: BackendRecord) -> BaseDigitalOceanBackend: + pass + + def _get_config(self, record: BackendRecord) -> BaseDigitalOceanConfig: + return BaseDigitalOceanConfig.__response__( + **json.loads(record.config), + creds=BaseDigitalOceanCreds.parse_raw(record.auth), + ) + + def _validate_creds(self, creds: AnyBaseDigitalOceanCreds, project_name: Optional[str] = None): + pass diff --git a/src/dstack/_internal/core/backends/digitalocean_base/models.py b/src/dstack/_internal/core/backends/digitalocean_base/models.py new file mode 100644 index 0000000000..e3d179fcc3 --- /dev/null +++ b/src/dstack/_internal/core/backends/digitalocean_base/models.py @@ -0,0 +1,43 @@ +from typing import Annotated, List, Literal, Optional, Union + +from pydantic import Field + +from dstack._internal.core.models.common import CoreModel + + +class BaseDigitalOceanAPIKeyCreds(CoreModel): + type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key" + api_key: Annotated[str, Field(description="The API key")] + + +AnyBaseDigitalOceanCreds = BaseDigitalOceanAPIKeyCreds +BaseDigitalOceanCreds = AnyBaseDigitalOceanCreds + + +class BaseDigitalOceanBackendConfig(CoreModel): + type: Annotated[ + Literal["amddevcloud", "digitalocean"], + Field(description="The type of backend"), + ] + project_name: Annotated[Optional[str], Field(description="The name of the project")] = None + regions: Annotated[ + Optional[List[str]], + Field(description="The list of regions. Omit to use all regions"), + ] = None + + +class BaseDigitalOceanBackendConfigWithCreds(BaseDigitalOceanBackendConfig): + creds: Annotated[AnyBaseDigitalOceanCreds, Field(description="The credentials")] + + +AnyBaseDigitalOceanBackendConfig = Union[ + BaseDigitalOceanBackendConfig, BaseDigitalOceanBackendConfigWithCreds +] + + +class BaseDigitalOceanStoredConfig(BaseDigitalOceanBackendConfig): + pass + + +class BaseDigitalOceanConfig(BaseDigitalOceanStoredConfig): + creds: AnyBaseDigitalOceanCreds diff --git a/src/dstack/_internal/core/backends/models.py b/src/dstack/_internal/core/backends/models.py index 1097459704..1715080f83 100644 --- a/src/dstack/_internal/core/backends/models.py +++ b/src/dstack/_internal/core/backends/models.py @@ -20,6 +20,10 @@ DataCrunchBackendConfig, DataCrunchBackendConfigWithCreds, ) +from dstack._internal.core.backends.digitalocean_base.models import ( + BaseDigitalOceanBackendConfig, + BaseDigitalOceanBackendConfigWithCreds, +) from dstack._internal.core.backends.dstack.models import ( DstackBackendConfig, DstackBaseBackendConfig, @@ -77,6 +81,7 @@ CloudRiftBackendConfig, CudoBackendConfig, DataCrunchBackendConfig, + BaseDigitalOceanBackendConfig, GCPBackendConfig, HotAisleBackendConfig, KubernetesBackendConfig, @@ -100,6 +105,7 @@ CloudRiftBackendConfigWithCreds, CudoBackendConfigWithCreds, DataCrunchBackendConfigWithCreds, + BaseDigitalOceanBackendConfigWithCreds, GCPBackendConfigWithCreds, HotAisleBackendConfigWithCreds, KubernetesBackendConfigWithCreds, @@ -122,6 +128,7 @@ CloudRiftBackendConfigWithCreds, CudoBackendConfigWithCreds, DataCrunchBackendConfigWithCreds, + BaseDigitalOceanBackendConfigWithCreds, GCPBackendFileConfigWithCreds, HotAisleBackendFileConfigWithCreds, KubernetesBackendFileConfigWithCreds, diff --git a/src/dstack/_internal/core/models/backends/base.py b/src/dstack/_internal/core/models/backends/base.py index 067e181f6b..3d33e75b62 100644 --- a/src/dstack/_internal/core/models/backends/base.py +++ b/src/dstack/_internal/core/models/backends/base.py @@ -4,13 +4,15 @@ class BackendType(str, enum.Enum): """ Attributes: + AMDDEVCLOUD (BackendType): AMD Developer Cloud AWS (BackendType): Amazon Web Services AZURE (BackendType): Microsoft Azure CLOUDRIFT (BackendType): CloudRift CUDO (BackendType): Cudo + DATACRUNCH (BackendType): DataCrunch + DIGITALOCEAN (BackendType): DigitalOcean DSTACK (BackendType): dstack Sky GCP (BackendType): Google Cloud Platform - DATACRUNCH (BackendType): DataCrunch HOTAISLE (BackendType): Hot Aisle KUBERNETES (BackendType): Kubernetes LAMBDA (BackendType): Lambda Cloud @@ -22,11 +24,13 @@ class BackendType(str, enum.Enum): VULTR (BackendType): Vultr """ + AMDDEVCLOUD = "amddevcloud" AWS = "aws" AZURE = "azure" CLOUDRIFT = "cloudrift" CUDO = "cudo" DATACRUNCH = "datacrunch" + DIGITALOCEAN = "digitalocean" DSTACK = "dstack" GCP = "gcp" HOTAISLE = "hotaisle"