Skip to content

Commit 2fadf4a

Browse files
committed
Misc. updates
- DB query and schema optimizations - Event logging - `DSTACK_FF_EVENTS` feature flag - More input validation - More unit tests - Various docs
1 parent 92b90f5 commit 2fadf4a

20 files changed

Lines changed: 673 additions & 145 deletions

File tree

docs/docs/reference/environment-variables.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,12 @@ For more details on the options below, refer to the [server deployment](../guide
132132
- `DSTACK_SERVER_INSTANCE_HEALTH_TTL_SECONDS`{ #DSTACK_SERVER_INSTANCE_HEALTH_TTL_SECONDS } – Maximum age of instance health checks.
133133
- `DSTACK_SERVER_INSTANCE_HEALTH_MIN_COLLECT_INTERVAL_SECONDS`{ #DSTACK_SERVER_INSTANCE_HEALTH_MIN_COLLECT_INTERVAL_SECONDS } – Minimum time interval between consecutive health checks of the same instance.
134134

135+
<!--
136+
TODO: uncomment after dropping DSTACK_FF_EVENTS
137+
138+
- `DSTACK_SERVER_EVENTS_TTL_SECONDS` { #DSTACK_SERVER_EVENTS_TTL_SECONDS } - Maximum age of event records. Set to `0` to disable event storage. Defaults to 30 days.
139+
-->
140+
135141
??? info "Internal environment variables"
136142
The following environment variables are intended for development purposes:
137143

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
# TODO: docs
2-
31
import uuid
42
from datetime import datetime
53
from enum import Enum
6-
from typing import Optional
4+
from typing import Annotated, Optional
5+
6+
from pydantic import Field
77

88
from dstack._internal.core.models.common import CoreModel
99

@@ -18,16 +18,50 @@ class EventTargetType(str, Enum):
1818

1919

2020
class EventTarget(CoreModel):
21-
type: str # Holds EventTargetType; str for adding new types without breaking compatibility
22-
project_id: Optional[uuid.UUID]
23-
id: uuid.UUID
24-
name: str
21+
type: Annotated[
22+
str, # not using EventTargetType to allow adding new types without breaking compatibility
23+
Field(
24+
description=(
25+
f"Type of the target entity."
26+
f" One of: {', '.join([f'`{t}`' for t in EventTargetType])}"
27+
)
28+
),
29+
]
30+
project_id: Annotated[
31+
Optional[uuid.UUID],
32+
Field(
33+
description=(
34+
"ID of the project the target entity belongs to,"
35+
" or `null` for target types not bound to a project (e.g., users)"
36+
)
37+
),
38+
]
39+
id: Annotated[uuid.UUID, Field(description="ID of the target entity")]
40+
name: Annotated[str, Field(description="Name of the target entity")]
2541

2642

2743
class Event(CoreModel):
2844
id: uuid.UUID
2945
message: str
3046
recorded_at: datetime
31-
actor_user_id: Optional[uuid.UUID]
32-
actor_user: Optional[str]
33-
targets: list[EventTarget]
47+
actor_user_id: Annotated[
48+
Optional[uuid.UUID],
49+
Field(
50+
description=(
51+
"ID of the user who performed the action that triggered the event,"
52+
" or `null` if the action was performed by the system"
53+
)
54+
),
55+
]
56+
actor_user: Annotated[
57+
Optional[str],
58+
Field(
59+
description=(
60+
"Name of the user who performed the action that triggered the event,"
61+
" or `null` if the action was performed by the system"
62+
)
63+
),
64+
]
65+
targets: Annotated[
66+
list[EventTarget], Field(description="List of entities affected by the event")
67+
]

src/dstack/_internal/server/app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
get_client_version,
6767
get_server_client_error_details,
6868
)
69-
from dstack._internal.settings import DSTACK_VERSION
69+
from dstack._internal.settings import DSTACK_VERSION, FeatureFlags
7070
from dstack._internal.utils.logging import get_logger
7171
from dstack._internal.utils.ssh import check_required_ssh_version
7272

@@ -229,7 +229,7 @@ def register_routes(app: FastAPI, ui: bool = True):
229229
app.include_router(model_proxy.router, prefix="/proxy/models", tags=["model-proxy"])
230230
app.include_router(prometheus.router)
231231
app.include_router(files.router)
232-
app.include_router(events.root_router)
232+
app.include_router(events.root_router, include_in_schema=FeatureFlags.EVENTS)
233233

234234
@app.exception_handler(ForbiddenError)
235235
async def forbidden_error_handler(request: Request, exc: ForbiddenError):

src/dstack/_internal/server/background/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
process_terminating_jobs,
3434
)
3535
from dstack._internal.server.background.tasks.process_volumes import process_submitted_volumes
36+
from dstack._internal.settings import FeatureFlags
3637

3738
_scheduler = AsyncIOScheduler()
3839

@@ -70,7 +71,8 @@ def start_background_tasks() -> AsyncIOScheduler:
7071
_scheduler.add_job(process_probes, IntervalTrigger(seconds=3, jitter=1))
7172
_scheduler.add_job(collect_metrics, IntervalTrigger(seconds=10), max_instances=1)
7273
_scheduler.add_job(delete_metrics, IntervalTrigger(minutes=5), max_instances=1)
73-
_scheduler.add_job(delete_events, IntervalTrigger(minutes=7), max_instances=1)
74+
if FeatureFlags.EVENTS:
75+
_scheduler.add_job(delete_events, IntervalTrigger(minutes=7), max_instances=1)
7476
if settings.ENABLE_PROMETHEUS_METRICS:
7577
_scheduler.add_job(
7678
collect_prometheus_metrics, IntervalTrigger(seconds=10), max_instances=1

src/dstack/_internal/server/migrations/versions/f27d4a29cd38_add_events_and_event_targets.py renamed to src/dstack/_internal/server/migrations/versions/22d74df9897e_add_events_and_event_targets.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""Add events and event_targets
22
3-
Revision ID: f27d4a29cd38
4-
Revises: 7d1ec2b920ac
5-
Create Date: 2025-11-26 01:01:46.305815
3+
Revision ID: 22d74df9897e
4+
Revises: 5fd659afca82
5+
Create Date: 2025-12-04 20:56:08.003504
66
77
"""
88

@@ -13,8 +13,8 @@
1313
import dstack._internal.server.models
1414

1515
# revision identifiers, used by Alembic.
16-
revision = "f27d4a29cd38"
17-
down_revision = "7d1ec2b920ac"
16+
revision = "22d74df9897e"
17+
down_revision = "5fd659afca82"
1818
branch_labels = None
1919
depends_on = None
2020

@@ -68,9 +68,15 @@ def upgrade() -> None:
6868
sa.PrimaryKeyConstraint("id", name=op.f("pk_event_targets")),
6969
)
7070
with op.batch_alter_table("event_targets", schema=None) as batch_op:
71+
batch_op.create_index(
72+
batch_op.f("ix_event_targets_entity_id"), ["entity_id"], unique=False
73+
)
7174
batch_op.create_index(
7275
batch_op.f("ix_event_targets_entity_project_id"), ["entity_project_id"], unique=False
7376
)
77+
batch_op.create_index(
78+
batch_op.f("ix_event_targets_entity_type"), ["entity_type"], unique=False
79+
)
7480
batch_op.create_index(batch_op.f("ix_event_targets_event_id"), ["event_id"], unique=False)
7581

7682
# ### end Alembic commands ###
@@ -80,7 +86,9 @@ def downgrade() -> None:
8086
# ### commands auto generated by Alembic - please adjust! ###
8187
with op.batch_alter_table("event_targets", schema=None) as batch_op:
8288
batch_op.drop_index(batch_op.f("ix_event_targets_event_id"))
89+
batch_op.drop_index(batch_op.f("ix_event_targets_entity_type"))
8390
batch_op.drop_index(batch_op.f("ix_event_targets_entity_project_id"))
91+
batch_op.drop_index(batch_op.f("ix_event_targets_entity_id"))
8492

8593
op.drop_table("event_targets")
8694
with op.batch_alter_table("events", schema=None) as batch_op:
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Add ix_instances_fleet_id
2+
3+
Revision ID: 5fd659afca82
4+
Revises: d4d9dc26cf58
5+
Create Date: 2025-12-04 20:52:07.015334
6+
7+
"""
8+
9+
from alembic import op
10+
11+
# revision identifiers, used by Alembic.
12+
revision = "5fd659afca82"
13+
down_revision = "d4d9dc26cf58"
14+
branch_labels = None
15+
depends_on = None
16+
17+
18+
def upgrade() -> None:
19+
# ### commands auto generated by Alembic - please adjust! ###
20+
with op.batch_alter_table("instances", schema=None) as batch_op:
21+
batch_op.create_index(batch_op.f("ix_instances_fleet_id"), ["fleet_id"], unique=False)
22+
23+
# ### end Alembic commands ###
24+
25+
26+
def downgrade() -> None:
27+
# ### commands auto generated by Alembic - please adjust! ###
28+
with op.batch_alter_table("instances", schema=None) as batch_op:
29+
batch_op.drop_index(batch_op.f("ix_instances_fleet_id"))
30+
31+
# ### end Alembic commands ###
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Add ix_jobs_run_id
2+
3+
Revision ID: d4d9dc26cf58
4+
Revises: 006512f572b4
5+
Create Date: 2025-12-04 20:48:10.543248
6+
7+
"""
8+
9+
from alembic import op
10+
11+
# revision identifiers, used by Alembic.
12+
revision = "d4d9dc26cf58"
13+
down_revision = "006512f572b4"
14+
branch_labels = None
15+
depends_on = None
16+
17+
18+
def upgrade() -> None:
19+
# ### commands auto generated by Alembic - please adjust! ###
20+
with op.batch_alter_table("jobs", schema=None) as batch_op:
21+
batch_op.create_index(batch_op.f("ix_jobs_run_id"), ["run_id"], unique=False)
22+
23+
# ### end Alembic commands ###
24+
25+
26+
def downgrade() -> None:
27+
# ### commands auto generated by Alembic - please adjust! ###
28+
with op.batch_alter_table("jobs", schema=None) as batch_op:
29+
batch_op.drop_index(batch_op.f("ix_jobs_run_id"))
30+
31+
# ### end Alembic commands ###

src/dstack/_internal/server/models.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,9 @@ class JobModel(BaseModel):
408408
project_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("projects.id", ondelete="CASCADE"))
409409
project: Mapped["ProjectModel"] = relationship()
410410

411-
run_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("runs.id", ondelete="CASCADE"))
411+
run_id: Mapped[uuid.UUID] = mapped_column(
412+
ForeignKey("runs.id", ondelete="CASCADE"), index=True
413+
)
412414
run: Mapped["RunModel"] = relationship()
413415

414416
# Jobs need to reference fleets because we may choose an optimal fleet for a master job
@@ -602,7 +604,7 @@ class InstanceModel(BaseModel):
602604
)
603605
pool: Mapped[Optional["PoolModel"]] = relationship(back_populates="instances")
604606

605-
fleet_id: Mapped[Optional[uuid.UUID]] = mapped_column(ForeignKey("fleets.id"))
607+
fleet_id: Mapped[Optional[uuid.UUID]] = mapped_column(ForeignKey("fleets.id"), index=True)
606608
fleet: Mapped[Optional["FleetModel"]] = relationship(back_populates="instances")
607609

608610
compute_group_id: Mapped[Optional[uuid.UUID]] = mapped_column(ForeignKey("compute_groups.id"))
@@ -858,16 +860,14 @@ class SecretModel(BaseModel):
858860
class EventModel(BaseModel):
859861
__tablename__ = "events"
860862

861-
id: Mapped[uuid.UUID] = mapped_column(
862-
UUIDType(binary=False), primary_key=True, default=uuid.uuid4
863-
)
863+
id: Mapped[uuid.UUID] = mapped_column(UUIDType(binary=False), primary_key=True)
864864
message: Mapped[str] = mapped_column(Text)
865865
recorded_at: Mapped[datetime] = mapped_column(NaiveDateTime, index=True)
866866

867867
actor_user_id: Mapped[Optional[uuid.UUID]] = mapped_column(
868868
ForeignKey("users.id", ondelete="CASCADE"), nullable=True, index=True
869869
)
870-
user: Mapped[Optional["UserModel"]] = relationship()
870+
actor_user: Mapped[Optional["UserModel"]] = relationship()
871871

872872
targets: Mapped[List["EventTargetModel"]] = relationship(back_populates="event")
873873

@@ -889,6 +889,8 @@ class EventTargetModel(BaseModel):
889889
)
890890
entity_project: Mapped[Optional["ProjectModel"]] = relationship()
891891

892-
entity_type: Mapped[EventTargetType] = mapped_column(EnumAsString(EventTargetType, 100))
893-
entity_id: Mapped[uuid.UUID] = mapped_column(UUIDType(binary=False))
892+
entity_type: Mapped[EventTargetType] = mapped_column(
893+
EnumAsString(EventTargetType, 100), index=True
894+
)
895+
entity_id: Mapped[uuid.UUID] = mapped_column(UUIDType(binary=False), index=True)
894896
entity_name: Mapped[str] = mapped_column(String(200))

src/dstack/_internal/server/routers/events.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from sqlalchemy.ext.asyncio import AsyncSession
33

44
import dstack._internal.server.services.events as events_services
5+
from dstack._internal.core.errors import ServerClientError
56
from dstack._internal.core.models.events import Event
67
from dstack._internal.server.db import get_session
78
from dstack._internal.server.models import UserModel
@@ -11,6 +12,7 @@
1112
CustomORJSONResponse,
1213
get_base_api_additional_responses,
1314
)
15+
from dstack._internal.settings import FeatureFlags
1416

1517
root_router = APIRouter(
1618
prefix="/api/events",
@@ -28,9 +30,14 @@ async def list_events(
2830
"""
2931
Returns events visible to the current user.
3032
33+
Regular users can see events related to themselves and to projects they are members of.
34+
Global admins can see all events.
35+
3136
The results are paginated. To get the next page, pass `recorded_at` and `id` of
3237
the last event from the previous page as `prev_recorded_at` and `prev_id`.
3338
"""
39+
if not FeatureFlags.EVENTS:
40+
raise ServerClientError("Events are disabled on this server")
3441
return CustomORJSONResponse(
3542
await events_services.list_events(
3643
session=session,

0 commit comments

Comments
 (0)