Skip to content

Commit 5b28643

Browse files
authored
Allow customizing gateway instance type (#3384)
Add an optional `instance_type` gateway configuration property that allows overriding the default (typically very small) gateway instance type. ```yaml type: gateway name: example-gateway backend: aws region: eu-west-1 instance_type: t3.large domain: example.com ``` Supported backends: - `aws` - `gcp` Not supported: - `kubernetes` (instance types are not differentiated) - `azure` (gateway support currently broken) The commit also includes improved gateway provisioning error handling in case the user specifies an invalid instance type.
1 parent 16bcfd3 commit 5b28643

File tree

8 files changed

+85
-19
lines changed

8 files changed

+85
-19
lines changed

docs/docs/concepts/gateways.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,27 @@ You can create gateways with the `aws`, `azure`, `gcp`, or `kubernetes` backends
5656
Gateways in `kubernetes` backend require an external load balancer. Managed Kubernetes solutions usually include a load balancer.
5757
For self-hosted Kubernetes, you must provide a load balancer by yourself.
5858

59+
### Instance type
60+
61+
By default, `dstack` provisions a small, low-cost instance for the gateway. If you expect to run high-traffic services, you can configure a larger instance type using the `instance_type` property.
62+
63+
<div editor-title="gateway.dstack.yml">
64+
65+
```yaml
66+
type: gateway
67+
name: example-gateway
68+
69+
backend: aws
70+
region: eu-west-1
71+
72+
# (Optional) Override the gateway instance type
73+
instance_type: t3.large
74+
75+
domain: example.com
76+
```
77+
78+
</div>
79+
5980
### Router
6081

6182
By default, the gateway uses its own load balancer to route traffic between replicas. However, you can delegate this responsibility to a specific router by setting the `router` property. Currently, the only supported external router is `sglang`.

src/dstack/_internal/core/backends/aws/compute.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
logger = get_logger(__name__)
7777
# gp2 volumes can be 1GB-16TB, dstack AMIs are 100GB
7878
CONFIGURABLE_DISK_SIZE = Range[Memory](min=Memory.parse("100GB"), max=Memory.parse("16TB"))
79+
DEFAULT_GATEWAY_INSTANCE_TYPE = "t3.micro"
7980

8081

8182
class AWSGatewayBackendData(CoreModel):
@@ -454,22 +455,27 @@ def create_gateway(
454455
project_id=configuration.project_name,
455456
vpc_id=vpc_id,
456457
)
457-
response = ec2_resource.create_instances(
458-
**aws_resources.create_instances_struct(
459-
disk_size=10,
460-
image_id=aws_resources.get_gateway_image_id(ec2_client),
461-
instance_type="t3.micro",
462-
iam_instance_profile=None,
463-
user_data=get_gateway_user_data(
464-
configuration.ssh_key_pub, router=configuration.router
465-
),
466-
tags=tags,
467-
security_group_id=security_group_id,
468-
spot=False,
469-
subnet_id=subnet_id,
470-
allocate_public_ip=configuration.public_ip,
471-
)
458+
instance_struct = aws_resources.create_instances_struct(
459+
disk_size=10,
460+
image_id=aws_resources.get_gateway_image_id(ec2_client),
461+
instance_type=configuration.instance_type or DEFAULT_GATEWAY_INSTANCE_TYPE,
462+
iam_instance_profile=None,
463+
user_data=get_gateway_user_data(
464+
configuration.ssh_key_pub, router=configuration.router
465+
),
466+
tags=tags,
467+
security_group_id=security_group_id,
468+
spot=False,
469+
subnet_id=subnet_id,
470+
allocate_public_ip=configuration.public_ip,
472471
)
472+
try:
473+
response = ec2_resource.create_instances(**instance_struct)
474+
except botocore.exceptions.ClientError as e:
475+
msg = f"AWS Error: {e.response['Error']['Code']}"
476+
if e.response["Error"].get("Message"):
477+
msg += f": {e.response['Error']['Message']}"
478+
raise ComputeError(msg)
473479
instance = response[0]
474480
instance.wait_until_running()
475481
instance.reload() # populate instance.public_ip_address

src/dstack/_internal/core/backends/azure/compute.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
logger = get_logger(__name__)
8080
# OS disks can be 1GB-4095GB, dstack images are 30GB
8181
CONFIGURABLE_DISK_SIZE = Range[Memory](min=Memory.parse("30GB"), max=Memory.parse("4095GB"))
82+
DEFAULT_GATEWAY_INSTANCE_TYPE = "Standard_B1ms"
8283

8384

8485
class AzureCompute(
@@ -230,6 +231,13 @@ def create_gateway(
230231
self,
231232
configuration: GatewayComputeConfiguration,
232233
) -> GatewayProvisioningData:
234+
if configuration.instance_type is not None:
235+
# TODO: support instance_type. Requires selecting a VM image to avoid errors like this:
236+
# > The selected VM size 'Standard_E4s_v6' cannot boot Hypervisor Generation '1'
237+
raise ComputeError(
238+
"The `azure` backend does not support the `instance_type`"
239+
" gateway configuration property"
240+
)
233241
logger.info(
234242
"Launching %s gateway instance in %s...",
235243
configuration.instance_name,
@@ -275,7 +283,7 @@ def create_gateway(
275283
managed_identity_name=None,
276284
managed_identity_resource_group=None,
277285
image_reference=_get_gateway_image_ref(),
278-
vm_size="Standard_B1ms",
286+
vm_size=DEFAULT_GATEWAY_INSTANCE_TYPE,
279287
instance_name=instance_name,
280288
user_data=get_gateway_user_data(
281289
configuration.ssh_key_pub, router=configuration.router

src/dstack/_internal/core/backends/gcp/compute.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
)
8989
RESOURCE_NAME_PATTERN = re.compile(r"[a-z0-9-]+")
9090
TPU_VERSIONS = [tpu.name for tpu in KNOWN_TPUS]
91+
DEFAULT_GATEWAY_INSTANCE_TYPE = "e2-medium"
9192

9293

9394
class GCPOfferBackendData(CoreModel):
@@ -596,7 +597,7 @@ def create_gateway(
596597
request.instance_resource = gcp_resources.create_instance_struct(
597598
disk_size=10,
598599
image_id=_get_gateway_image_id(),
599-
machine_type="e2-medium",
600+
machine_type=configuration.instance_type or DEFAULT_GATEWAY_INSTANCE_TYPE,
600601
accelerators=[],
601602
spot=False,
602603
user_data=get_gateway_user_data(
@@ -612,8 +613,14 @@ def create_gateway(
612613
subnetwork=subnetwork,
613614
allocate_public_ip=configuration.public_ip,
614615
)
615-
operation = self.instances_client.insert(request=request)
616-
gcp_resources.wait_for_extended_operation(operation, "instance creation")
616+
try:
617+
operation = self.instances_client.insert(request=request)
618+
gcp_resources.wait_for_extended_operation(operation, "instance creation")
619+
except (
620+
google.api_core.exceptions.ServiceUnavailable,
621+
google.api_core.exceptions.ClientError,
622+
) as e:
623+
raise ComputeError(f"GCP error: {e.message}")
617624
instance = self.instances_client.get(
618625
project=self.config.project_id, zone=zone, instance=instance_name
619626
)

src/dstack/_internal/core/backends/kubernetes/compute.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,11 @@ def create_gateway(
370370
# TODO: By default EKS creates a Classic Load Balancer for Load Balancer services.
371371
# Consider deploying an NLB. It seems it requires some extra configuration on the cluster:
372372
# https://docs.aws.amazon.com/eks/latest/userguide/network-load-balancing.html
373+
if configuration.instance_type is not None:
374+
raise ComputeError(
375+
"The `kubernetes` backend does not support the `instance_type`"
376+
" gateway configuration property"
377+
)
373378
instance_name = generate_unique_gateway_instance_name(configuration)
374379
commands = _get_gateway_commands(
375380
authorized_keys=[configuration.ssh_key_pub], router=configuration.router

src/dstack/_internal/core/models/gateways.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,16 @@ class GatewayConfiguration(CoreModel):
5151
default: Annotated[bool, Field(description="Make the gateway default")] = False
5252
backend: Annotated[BackendType, Field(description="The gateway backend")]
5353
region: Annotated[str, Field(description="The gateway region")]
54+
instance_type: Annotated[
55+
Optional[str],
56+
Field(
57+
description=(
58+
"Backend-specific instance type to use for the gateway instance."
59+
" Omit to use the backend's default, which is typically a small non-GPU instance"
60+
),
61+
min_length=1,
62+
),
63+
] = None
5464
router: Annotated[
5565
Optional[AnyRouterConfig],
5666
Field(description="The router configuration"),
@@ -115,6 +125,7 @@ class GatewayComputeConfiguration(CoreModel):
115125
instance_name: str
116126
backend: BackendType
117127
region: str
128+
instance_type: Optional[str] = None
118129
public_ip: bool
119130
ssh_key_pub: str
120131
certificate: Optional[AnyGatewayCertificate] = None

src/dstack/_internal/server/services/gateways/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ async def create_gateway_compute(
104104
instance_name=configuration.name,
105105
backend=configuration.backend,
106106
region=configuration.region,
107+
instance_type=configuration.instance_type,
107108
public_ip=configuration.public_ip,
108109
ssh_key_pub=gateway_ssh_public_key,
109110
certificate=configuration.certificate,

src/tests/_internal/server/routers/test_gateways.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ async def test_list(self, test_db, session: AsyncSession, client: AsyncClient):
7070
"name": gateway.name,
7171
"backend": backend.type.value,
7272
"region": gateway.region,
73+
"instance_type": None,
7374
"router": None,
7475
"domain": gateway.wildcard_domain,
7576
"default": False,
@@ -122,6 +123,7 @@ async def test_get(self, test_db, session: AsyncSession, client: AsyncClient):
122123
"name": gateway.name,
123124
"backend": backend.type.value,
124125
"region": gateway.region,
126+
"instance_type": None,
125127
"router": None,
126128
"domain": gateway.wildcard_domain,
127129
"default": False,
@@ -203,6 +205,7 @@ async def test_create_gateway(self, test_db, session: AsyncSession, client: Asyn
203205
"name": "test",
204206
"backend": backend.type.value,
205207
"region": "us",
208+
"instance_type": None,
206209
"router": None,
207210
"domain": None,
208211
"default": True,
@@ -256,6 +259,7 @@ async def test_create_gateway_without_name(
256259
"name": "random-name",
257260
"backend": backend.type.value,
258261
"region": "us",
262+
"instance_type": None,
259263
"router": None,
260264
"domain": None,
261265
"default": True,
@@ -359,6 +363,7 @@ async def test_set_default_gateway(self, test_db, session: AsyncSession, client:
359363
"name": gateway.name,
360364
"backend": backend.type.value,
361365
"region": gateway.region,
366+
"instance_type": None,
362367
"router": None,
363368
"domain": gateway.wildcard_domain,
364369
"default": True,
@@ -482,6 +487,7 @@ def get_backend(project, backend_type):
482487
"name": gateway_gcp.name,
483488
"backend": backend_gcp.type.value,
484489
"region": gateway_gcp.region,
490+
"instance_type": None,
485491
"router": None,
486492
"domain": gateway_gcp.wildcard_domain,
487493
"default": False,
@@ -552,6 +558,7 @@ async def test_set_wildcard_domain(self, test_db, session: AsyncSession, client:
552558
"name": gateway.name,
553559
"backend": backend.type.value,
554560
"region": gateway.region,
561+
"instance_type": None,
555562
"router": None,
556563
"domain": "test.com",
557564
"default": False,

0 commit comments

Comments
 (0)