Skip to content

Commit d1ea5e5

Browse files
committed
Handle terminating deleted instances
1 parent 3ec2f23 commit d1ea5e5

File tree

2 files changed

+33
-7
lines changed

2 files changed

+33
-7
lines changed

src/dstack/_internal/server/background/tasks/process_instances.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pydantic import ValidationError
1212
from sqlalchemy import and_, delete, func, not_, select
1313
from sqlalchemy.ext.asyncio import AsyncSession
14-
from sqlalchemy.orm import joinedload, with_loader_criteria
14+
from sqlalchemy.orm import joinedload
1515

1616
from dstack._internal import settings
1717
from dstack._internal.core.backends.base.compute import (
@@ -218,9 +218,8 @@ async def _process_instance(session: AsyncSession, instance: InstanceModel):
218218
.options(joinedload(InstanceModel.project).joinedload(ProjectModel.backends))
219219
.options(joinedload(InstanceModel.jobs).load_only(JobModel.id, JobModel.status))
220220
.options(
221-
joinedload(InstanceModel.fleet).joinedload(FleetModel.instances),
222-
with_loader_criteria(
223-
InstanceModel, InstanceModel.deleted == False, include_aliases=True
221+
joinedload(InstanceModel.fleet).joinedload(
222+
FleetModel.instances.and_(InstanceModel.deleted == False)
224223
),
225224
)
226225
.execution_options(populate_existing=True)
@@ -233,9 +232,8 @@ async def _process_instance(session: AsyncSession, instance: InstanceModel):
233232
.options(joinedload(InstanceModel.project))
234233
.options(joinedload(InstanceModel.jobs).load_only(JobModel.id, JobModel.status))
235234
.options(
236-
joinedload(InstanceModel.fleet).joinedload(FleetModel.instances),
237-
with_loader_criteria(
238-
InstanceModel, InstanceModel.deleted == False, include_aliases=True
235+
joinedload(InstanceModel.fleet).joinedload(
236+
FleetModel.instances.and_(InstanceModel.deleted == False)
239237
),
240238
)
241239
.execution_options(populate_existing=True)

src/tests/_internal/server/background/tasks/test_process_instances.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,34 @@ async def test_terminate(self, test_db, session: AsyncSession):
597597
assert instance.deleted_at is not None
598598
assert instance.finished_at is not None
599599

600+
@pytest.mark.asyncio
601+
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
602+
async def test_terminates_terminating_deleted_instance(self, test_db, session: AsyncSession):
603+
# There was a race condition when instance could stay in Terminating while marked as deleted.
604+
# TODO:
605+
project = await create_project(session=session)
606+
instance = await create_instance(
607+
session=session, project=project, status=InstanceStatus.TERMINATING
608+
)
609+
instance.deleted = True
610+
instance.termination_reason = InstanceTerminationReason.IDLE_TIMEOUT
611+
instance.last_job_processed_at = instance.deleted_at = (
612+
get_current_datetime() + dt.timedelta(minutes=-19)
613+
)
614+
await session.commit()
615+
616+
with self.mock_terminate_in_backend() as mock:
617+
await process_instances()
618+
mock.assert_called_once()
619+
620+
await session.refresh(instance)
621+
622+
assert instance is not None
623+
assert instance.status == InstanceStatus.TERMINATED
624+
assert instance.deleted == True
625+
assert instance.deleted_at is not None
626+
assert instance.finished_at is not None
627+
600628
@pytest.mark.asyncio
601629
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
602630
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)