Skip to content

Commit ebfd895

Browse files
[UX] Improve dstack fleet output layout (#3529)
* [UX] Make `dstack fleet` show more useful information * Do not show "(no instances" row placeholder for empty fleets; removed unecessary docstrings and comments. * Linter and pyright * PR review feedback
1 parent b293672 commit ebfd895

2 files changed

Lines changed: 762 additions & 78 deletions

File tree

Lines changed: 260 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
from typing import List
1+
from typing import Any, Dict, List, Optional, Union
22

33
from rich.table import Table
44

55
from dstack._internal.cli.utils.common import add_row_from_dict, console
66
from dstack._internal.core.models.backends.base import BackendType
7-
from dstack._internal.core.models.fleets import Fleet, FleetStatus
8-
from dstack._internal.core.models.instances import InstanceStatus
7+
from dstack._internal.core.models.fleets import Fleet, FleetNodesSpec, FleetStatus
8+
from dstack._internal.core.models.instances import Instance, InstanceStatus
9+
from dstack._internal.core.models.resources import GPUSpec, ResourcesSpec
910
from dstack._internal.utils.common import DateFormatter, pretty_date
1011

1112

@@ -18,89 +19,270 @@ def get_fleets_table(
1819
fleets: List[Fleet], verbose: bool = False, format_date: DateFormatter = pretty_date
1920
) -> Table:
2021
table = Table(box=None)
21-
table.add_column("FLEET", no_wrap=True)
22+
23+
# Columns
24+
table.add_column("NAME", style="bold", no_wrap=True)
25+
table.add_column("NODES")
2226
if verbose:
23-
table.add_column("RESERVATION")
24-
table.add_column("INSTANCE")
27+
table.add_column("RESOURCES")
28+
else:
29+
table.add_column("GPU")
30+
table.add_column("SPOT")
2531
table.add_column("BACKEND")
26-
if verbose:
27-
table.add_column("REGION")
28-
table.add_column("RESOURCES")
2932
table.add_column("PRICE")
30-
table.add_column("STATUS")
31-
table.add_column("CREATED")
32-
33+
table.add_column("STATUS", no_wrap=True)
34+
table.add_column("CREATED", no_wrap=True)
3335
if verbose:
3436
table.add_column("ERROR")
3537

3638
for fleet in fleets:
37-
for i, instance in enumerate(fleet.instances):
38-
resources = ""
39-
if instance.instance_type is not None and (
40-
instance.backend != BackendType.REMOTE
41-
or instance.status not in [InstanceStatus.PENDING, InstanceStatus.PROVISIONING]
42-
):
43-
resources = instance.instance_type.resources.pretty_format(include_spot=True)
44-
45-
status = instance.status.value
46-
total_blocks = instance.total_blocks
47-
busy_blocks = instance.busy_blocks
48-
if (
49-
instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
50-
and total_blocks is not None
51-
and total_blocks > 1
52-
):
53-
status = f"{busy_blocks}/{total_blocks} {InstanceStatus.BUSY.value}"
54-
if instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]:
55-
if instance.unreachable:
56-
status += "\n(unreachable)"
57-
elif not instance.health_status.is_healthy():
58-
status += f"\n({instance.health_status.value})"
59-
60-
backend = instance.backend or ""
61-
if backend == "remote":
62-
backend = "ssh"
63-
64-
region = ""
65-
if instance.region:
66-
region = f"{instance.region}"
67-
if verbose:
68-
if instance.availability_zone:
69-
region += f" ({instance.availability_zone})"
70-
else:
71-
backend += f" ({instance.region})"
72-
error = ""
73-
if instance.status == InstanceStatus.TERMINATED and instance.termination_reason:
74-
error = f"{instance.termination_reason}"
75-
row = {
76-
"FLEET": fleet.name if i == 0 else "",
77-
"RESERVATION": fleet.spec.configuration.reservation or "" if i == 0 else "",
78-
"INSTANCE": str(instance.instance_num),
79-
"BACKEND": backend,
80-
"REGION": region,
81-
"RESOURCES": resources,
82-
"PRICE": f"${instance.price:.4f}".rstrip("0").rstrip(".")
83-
if instance.price is not None
84-
else "",
85-
"STATUS": status,
39+
# Fleet row
40+
config = fleet.spec.configuration
41+
merged_profile = fleet.spec.merged_profile
42+
43+
# Detect SSH fleet vs backend fleet
44+
if config.ssh_config is not None:
45+
# SSH fleet: fixed number of hosts, no cloud billing
46+
nodes = str(len(config.ssh_config.hosts))
47+
backend = "ssh"
48+
spot_policy = "-"
49+
max_price = "-"
50+
else:
51+
# Backend fleet: dynamic nodes, cloud billing
52+
nodes = _format_nodes(config.nodes)
53+
backend = _format_backends(config.backends)
54+
spot_policy = "-"
55+
if merged_profile and merged_profile.spot_policy:
56+
spot_policy = merged_profile.spot_policy.value
57+
# Format as "$0..$X.XX" range, or "-" if not set
58+
if merged_profile and merged_profile.max_price is not None:
59+
max_price = f"$0..{_format_price(merged_profile.max_price)}"
60+
else:
61+
max_price = "-"
62+
63+
# In verbose mode, append placement to nodes if cluster
64+
if verbose and config.placement and config.placement.value == "cluster":
65+
nodes = f"{nodes} (cluster)"
66+
67+
fleet_row: Dict[Union[str, int], Any] = {
68+
"NAME": fleet.name,
69+
"NODES": nodes,
70+
"BACKEND": backend,
71+
"PRICE": max_price,
72+
"SPOT": spot_policy,
73+
"STATUS": _format_fleet_status(fleet),
74+
"CREATED": format_date(fleet.created_at),
75+
}
76+
77+
if verbose:
78+
fleet_row["RESOURCES"] = config.resources.pretty_format() if config.resources else "-"
79+
fleet_row["ERROR"] = ""
80+
else:
81+
fleet_row["GPU"] = _format_fleet_gpu(config.resources)
82+
83+
add_row_from_dict(table, fleet_row)
84+
85+
# Instance rows (indented)
86+
for instance in fleet.instances:
87+
# Check if this is an SSH instance
88+
is_ssh_instance = instance.backend == BackendType.REMOTE
89+
90+
# Format backend with region (and AZ in verbose mode)
91+
if verbose and instance.availability_zone:
92+
# In verbose mode, show AZ instead of region (AZ is more specific)
93+
backend_with_region = _format_backend(instance.backend, instance.availability_zone)
94+
else:
95+
backend_with_region = _format_backend(instance.backend, instance.region)
96+
97+
# Get spot info from instance resources (not applicable to SSH)
98+
if is_ssh_instance:
99+
instance_spot = "-"
100+
instance_price = "-"
101+
else:
102+
instance_spot = "-"
103+
if (
104+
instance.instance_type is not None
105+
and instance.instance_type.resources is not None
106+
):
107+
instance_spot = (
108+
"spot" if instance.instance_type.resources.spot else "on-demand"
109+
)
110+
instance_price = _format_price(instance.price)
111+
112+
instance_row: Dict[Union[str, int], Any] = {
113+
"NAME": f" instance={instance.instance_num}",
114+
"NODES": "",
115+
"BACKEND": backend_with_region,
116+
"PRICE": instance_price,
117+
"SPOT": instance_spot,
118+
"STATUS": _format_instance_status(instance),
86119
"CREATED": format_date(instance.created),
87-
"ERROR": error,
88120
}
89-
add_row_from_dict(table, row)
90-
91-
if len(fleet.instances) == 0 and fleet.status != FleetStatus.TERMINATING:
92-
row = {
93-
"FLEET": fleet.name,
94-
"RESERVATION": "-",
95-
"INSTANCE": "-",
96-
"BACKEND": "-",
97-
"REGION": "-",
98-
"RESOURCES": "-",
99-
"PRICE": "-",
100-
"STATUS": "-",
101-
"CREATED": format_date(fleet.created_at),
102-
"ERROR": "-",
103-
}
104-
add_row_from_dict(table, row)
121+
122+
if verbose:
123+
instance_row["RESOURCES"] = _format_instance_resources(instance)
124+
error = ""
125+
if instance.status == InstanceStatus.TERMINATED and instance.termination_reason:
126+
error = instance.termination_reason
127+
instance_row["ERROR"] = error
128+
else:
129+
instance_row["GPU"] = _format_instance_gpu(instance)
130+
131+
add_row_from_dict(table, instance_row, style="secondary")
105132

106133
return table
134+
135+
136+
def _format_nodes(nodes: Optional[FleetNodesSpec]) -> str:
137+
"""Format nodes spec as '0..1', '3', '2..10', etc."""
138+
if nodes is None:
139+
return "-"
140+
if nodes.min == nodes.max:
141+
return str(nodes.min)
142+
if nodes.max is None:
143+
return f"{nodes.min}.."
144+
return f"{nodes.min}..{nodes.max}"
145+
146+
147+
def _format_backends(backends: Optional[List[BackendType]]) -> str:
148+
if backends is None or len(backends) == 0:
149+
return "*"
150+
return ", ".join(b.value.replace("remote", "ssh") for b in backends)
151+
152+
153+
def _format_range(min_val: Optional[Any], max_val: Optional[Any]) -> str:
154+
if min_val is None and max_val is None:
155+
return ""
156+
if min_val == max_val:
157+
return str(min_val)
158+
if max_val is None:
159+
return f"{min_val}.."
160+
if min_val is None:
161+
return f"..{max_val}"
162+
return f"{min_val}..{max_val}"
163+
164+
165+
def _format_fleet_gpu(resources: Optional[ResourcesSpec]) -> str:
166+
"""Extract GPU-only info from fleet requirements, handling ranges."""
167+
if resources is None or resources.gpu is None:
168+
return "-"
169+
170+
gpu: GPUSpec = resources.gpu
171+
172+
# Check if there's actually a GPU requirement
173+
count = gpu.count
174+
if count is None or (count.min == 0 and (count.max is None or count.max == 0)):
175+
return "-"
176+
177+
parts = []
178+
179+
# GPU name(s)
180+
if gpu.name:
181+
parts.append(",".join(gpu.name))
182+
else:
183+
parts.append("gpu")
184+
185+
# GPU memory (range)
186+
if gpu.memory is not None:
187+
mem_str = _format_range(gpu.memory.min, gpu.memory.max)
188+
if mem_str:
189+
parts.append(mem_str)
190+
191+
# GPU count (range)
192+
count_str = _format_range(count.min, count.max)
193+
if count_str:
194+
parts.append(count_str)
195+
196+
return ":".join(parts)
197+
198+
199+
def _format_fleet_status(fleet: Fleet) -> str:
200+
status = fleet.status
201+
status_text = status.value
202+
203+
color_map = {
204+
FleetStatus.SUBMITTED: "grey",
205+
FleetStatus.ACTIVE: "white",
206+
FleetStatus.TERMINATING: "deep_sky_blue1",
207+
FleetStatus.TERMINATED: "grey",
208+
FleetStatus.FAILED: "indian_red1",
209+
}
210+
color = color_map.get(status, "white")
211+
is_finished = status in [FleetStatus.TERMINATED, FleetStatus.FAILED]
212+
status_style = f"bold {color}" if not is_finished else color
213+
return f"[{status_style}]{status_text}[/]"
214+
215+
216+
def _format_instance_status(instance: Instance) -> str:
217+
"""Format instance status with colors and health info."""
218+
status = instance.status
219+
status_text = status.value
220+
221+
total_blocks = instance.total_blocks
222+
busy_blocks = instance.busy_blocks
223+
if (
224+
status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
225+
and total_blocks is not None
226+
and total_blocks > 1
227+
):
228+
status_text = f"{busy_blocks}/{total_blocks} {InstanceStatus.BUSY.value}"
229+
230+
# Add health status
231+
health_suffix = ""
232+
if status in [InstanceStatus.IDLE, InstanceStatus.BUSY]:
233+
if instance.unreachable:
234+
health_suffix = " (unreachable)"
235+
elif not instance.health_status.is_healthy():
236+
health_suffix = f" ({instance.health_status.value})"
237+
238+
color_map = {
239+
InstanceStatus.PENDING: "deep_sky_blue1",
240+
InstanceStatus.PROVISIONING: "deep_sky_blue1",
241+
InstanceStatus.IDLE: "sea_green3",
242+
InstanceStatus.BUSY: "white",
243+
InstanceStatus.TERMINATING: "deep_sky_blue1",
244+
InstanceStatus.TERMINATED: "grey",
245+
}
246+
color = color_map.get(status, "white")
247+
is_finished = status == InstanceStatus.TERMINATED
248+
status_style = f"bold {color}" if not is_finished else color
249+
return f"[{status_style}]{status_text}{health_suffix}[/]"
250+
251+
252+
def _format_backend(backend: Optional[BackendType], region: Optional[str]) -> str:
253+
if backend is None:
254+
return "-"
255+
backend_str = backend.value
256+
if backend == BackendType.REMOTE:
257+
backend_str = "ssh"
258+
if region:
259+
backend_str += f" ({region})"
260+
return backend_str
261+
262+
263+
def _format_price(price: Optional[float]) -> str:
264+
if price is None:
265+
return "-"
266+
return f"${price:.4f}".rstrip("0").rstrip(".")
267+
268+
269+
def _format_instance_gpu(instance: Instance) -> str:
270+
if instance.instance_type is None:
271+
return "-"
272+
if instance.backend == BackendType.REMOTE and instance.status in [
273+
InstanceStatus.PENDING,
274+
InstanceStatus.PROVISIONING,
275+
]:
276+
return "-"
277+
return instance.instance_type.resources.pretty_format(gpu_only=True, include_spot=False) or "-"
278+
279+
280+
def _format_instance_resources(instance: Instance) -> str:
281+
if instance.instance_type is None:
282+
return "-"
283+
if instance.backend == BackendType.REMOTE and instance.status in [
284+
InstanceStatus.PENDING,
285+
InstanceStatus.PROVISIONING,
286+
]:
287+
return "-"
288+
return instance.instance_type.resources.pretty_format(include_spot=False)

0 commit comments

Comments
 (0)