Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions docs/docs/concepts/backends.md
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,34 @@ gcloud projects list --format="json(projectId)"
Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets.
Additionally, [Cloud NAT](https://cloud.google.com/nat/docs/overview) must be configured to provide access to external resources for provisioned instances.

## Hot Aisle

Log in to the SSH TUI as described in the [Hot Aisle Quick Start :material-arrow-top-right-thin:{ .external }](https://hotaisle.xyz/quick-start/).
Create a new team and generate an API key for the member in the team.

Then, go ahead and configure the backend:

<div editor-title="~/.dstack/server/config.yml">

```yaml
projects:
- name: main
backends:
- type: hotaisle
team_handle: hotaisle-team-handle
creds:
type: api_key
api_key: 9c27a4bb7a8e472fae12ab34.3f2e3c1db75b9a0187fd2196c6b3e56d2b912e1c439ba08d89e7b6fcd4ef1d3f
```

</div>

??? info "Required permissions"
The API key must have the following roles assigned:

* **Owner role for the user** - Required for creating and managing SSH keys
* **Operator role for the team** - Required for managing virtual machines within the team

## Lambda

Log into your [Lambda Cloud :material-arrow-top-right-thin:{ .external }](https://lambdalabs.com/service/gpu-cloud) account, click API keys in the sidebar, and then click the `Generate API key`
Expand Down
19 changes: 18 additions & 1 deletion docs/docs/reference/server/config.yml.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ to configure [backends](../../concepts/backends.md) and other [sever-level setti
overrides:
show_root_heading: false
backends:
type: 'Union[AWSBackendConfigWithCreds, AzureBackendConfigWithCreds, GCPBackendConfigWithCreds, LambdaBackendConfigWithCreds, NebiusBackendConfigWithCreds, RunpodBackendConfigWithCreds, VastAIBackendConfigWithCreds, KubernetesConfig]'
type: 'Union[AWSBackendConfigWithCreds, AzureBackendConfigWithCreds, GCPBackendConfigWithCreds, HotAisleBackendConfigWithCreds, LambdaBackendConfigWithCreds, NebiusBackendConfigWithCreds, RunpodBackendConfigWithCreds, VastAIBackendConfigWithCreds, KubernetesConfig]'

#### `projects[n].backends` { #backends data-toc-label="backends" }

Expand Down Expand Up @@ -126,6 +126,23 @@ to configure [backends](../../concepts/backends.md) and other [sever-level setti
type:
required: true

##### `projects[n].backends[type=hotaisle]` { #hotaisle data-toc-label="hotaisle" }

#SCHEMA# dstack._internal.core.backends.hotaisle.models.HotAisleBackendConfigWithCreds
overrides:
show_root_heading: false
type:
required: true
item_id_prefix: hotaisle-

###### `projects[n].backends[type=hotaisle].creds` { #hotaisle-creds data-toc-label="creds" }

#SCHEMA# dstack._internal.core.backends.hotaisle.models.HotAisleAPIKeyCreds
overrides:
show_root_heading: false
type:
required: true

##### `projects[n].backends[type=lambda]` { #lambda data-toc-label="lambda" }

#SCHEMA# dstack._internal.core.backends.lambdalabs.models.LambdaBackendConfigWithCreds
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"python-multipart>=0.0.16",
"filelock",
"psutil",
"gpuhunt==0.1.6",
"gpuhunt==0.1.7",
"argcomplete>=3.5.0",
"ignore-python>=0.2.0",
"orjson",
Expand Down
9 changes: 9 additions & 0 deletions src/dstack/_internal/core/backends/configurators.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@
except ImportError:
pass

try:
from dstack._internal.core.backends.hotaisle.configurator import (
HotAisleConfigurator,
)

_CONFIGURATOR_CLASSES.append(HotAisleConfigurator)
except ImportError:
pass

try:
from dstack._internal.core.backends.kubernetes.configurator import (
KubernetesConfigurator,
Expand Down
1 change: 1 addition & 0 deletions src/dstack/_internal/core/backends/hotaisle/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Hotaisle backend for dstack
109 changes: 109 additions & 0 deletions src/dstack/_internal/core/backends/hotaisle/api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from typing import Any, Dict, Optional

import requests

from dstack._internal.core.backends.base.configurator import raise_invalid_credentials_error
from dstack._internal.utils.logging import get_logger

API_URL = "https://admin.hotaisle.app/api"

logger = get_logger(__name__)


class HotAisleAPIClient:
def __init__(self, api_key: str, team_handle: str):
self.api_key = api_key
self.team_handle = team_handle

def validate_api_key(self) -> bool:
try:
self._validate_user_and_team()
return True
except requests.HTTPError as e:
if e.response.status_code == 401:
raise_invalid_credentials_error(
fields=[["creds", "api_key"]], details="Invalid API key"
)
elif e.response.status_code == 403:
raise_invalid_credentials_error(
fields=[["creds", "api_key"]],
details="Authenticated user does note have required permissions",
)
raise e
except ValueError as e:
error_message = str(e)
if "No Hot Aisle teams found" in error_message:
raise_invalid_credentials_error(
fields=[["creds", "api_key"]],
details="Valid API key but no teams found for this user",
)
elif "not found" in error_message:
raise_invalid_credentials_error(
fields=[["team_handle"]], details=f"Team handle '{self.team_handle}' not found"
)
Comment on lines +33 to +43
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(nit) Looking for patterns in our own error messages and then raising with another error message looks quite redundant. It's also error-prone, because we can change the error message in _validate_user_and_team and forget to change it here.

Some alternatives I can suggest:

  • Raise with the same error message - raise_invalid_credentials_error(details=str(e), ...)
  • Call raise_invalid_credentials_error directly in _validate_user_and_team
  • (my favorite) Merge validate_api_key and _validate_user_and_team into one method and call raise_invalid_credentials_error directly

raise e

def _validate_user_and_team(self) -> None:
url = f"{API_URL}/user/"
response = self._make_request("GET", url)
response.raise_for_status()
user_data = response.json()

teams = user_data.get("teams", [])
if not teams:
raise ValueError("No Hot Aisle teams found for this user")

available_teams = [team["handle"] for team in teams]
if self.team_handle not in available_teams:
raise ValueError(f"Hot Aisle team '{self.team_handle}' not found.")

def upload_ssh_key(self, public_key: str) -> bool:
url = f"{API_URL}/user/ssh_keys/"
payload = {"authorized_key": public_key}

response = self._make_request("POST", url, json=payload)

if response.status_code == 409:
return True # Key already exists - success
response.raise_for_status()
return True

def create_virtual_machine(self, vm_payload: Dict[str, Any]) -> Dict[str, Any]:
url = f"{API_URL}/teams/{self.team_handle}/virtual_machines/"
response = self._make_request("POST", url, json=vm_payload)
response.raise_for_status()
vm_data = response.json()
return vm_data

def get_vm_state(self, vm_name: str) -> str:
url = f"{API_URL}/teams/{self.team_handle}/virtual_machines/{vm_name}/state/"
response = self._make_request("GET", url)
response.raise_for_status()
state_data = response.json()
return state_data["state"]

def terminate_virtual_machine(self, vm_name: str) -> None:
url = f"{API_URL}/teams/{self.team_handle}/virtual_machines/{vm_name}/"
response = self._make_request("DELETE", url)
if response.status_code == 404:
logger.debug("Hot Aisle virtual machine %s not found", vm_name)
return
response.raise_for_status()

def _make_request(
self, method: str, url: str, json: Optional[Dict[str, Any]] = None, timeout: int = 30
) -> requests.Response:
headers = {
"accept": "application/json",
"Authorization": f"Token {self.api_key}",
}
if json is not None:
headers["Content-Type"] = "application/json"

return requests.request(
method=method,
url=url,
headers=headers,
json=json,
timeout=timeout,
)
16 changes: 16 additions & 0 deletions src/dstack/_internal/core/backends/hotaisle/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from dstack._internal.core.backends.base.backend import Backend
from dstack._internal.core.backends.hotaisle.compute import HotAisleCompute
from dstack._internal.core.backends.hotaisle.models import HotAisleConfig
from dstack._internal.core.models.backends.base import BackendType


class HotAisleBackend(Backend):
TYPE = BackendType.HOTAISLE
COMPUTE_CLASS = HotAisleCompute

def __init__(self, config: HotAisleConfig):
self.config = config
self._compute = HotAisleCompute(self.config)

def compute(self) -> HotAisleCompute:
return self._compute
Loading
Loading