From 73256ba5cd177b21652eb6f8a9ba0f12cba812b6 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 5 Sep 2025 12:16:19 +0500 Subject: [PATCH 1/2] Forbid deleting projects with active resources --- .../server/services/backends/handlers.py | 2 + .../_internal/server/services/projects.py | 53 +++++++- .../_internal/server/routers/test_projects.py | 120 +++++++++++++++++- 3 files changed, 172 insertions(+), 3 deletions(-) diff --git a/src/dstack/_internal/server/services/backends/handlers.py b/src/dstack/_internal/server/services/backends/handlers.py index 77f8d9832f..f3f5bab68c 100644 --- a/src/dstack/_internal/server/services/backends/handlers.py +++ b/src/dstack/_internal/server/services/backends/handlers.py @@ -20,6 +20,8 @@ async def delete_backends_safe( error: bool = True, ): try: + # FIXME: The checks are not under lock, + # so there can be dangling active resources due to race conditions. await _check_active_instances( session=session, project=project, diff --git a/src/dstack/_internal/server/services/projects.py b/src/dstack/_internal/server/services/projects.py index 2ec37523e4..f5b5acd407 100644 --- a/src/dstack/_internal/server/services/projects.py +++ b/src/dstack/_internal/server/services/projects.py @@ -14,8 +14,16 @@ from dstack._internal.core.backends.models import BackendInfo from dstack._internal.core.errors import ForbiddenError, ResourceExistsError, ServerClientError from dstack._internal.core.models.projects import Member, MemberPermissions, Project +from dstack._internal.core.models.runs import RunStatus from dstack._internal.core.models.users import GlobalRole, ProjectRole -from dstack._internal.server.models import MemberModel, ProjectModel, UserModel +from dstack._internal.server.models import ( + FleetModel, + MemberModel, + ProjectModel, + RunModel, + UserModel, + VolumeModel, +) from dstack._internal.server.schemas.projects import MemberSetting from dstack._internal.server.services import users from dstack._internal.server.services.backends import ( @@ -178,6 +186,19 @@ async def delete_projects( raise ForbiddenError() if all(name in projects_names for name in user_project_names): raise ServerClientError("Cannot delete the only project") + + res = await session.execute( + select(ProjectModel.id).where(ProjectModel.name.in_(projects_names)) + ) + project_ids = res.scalars().all() + if len(project_ids) != len(projects_names): + raise ServerClientError("Failed to delete non-existent projects") + + for project_id in project_ids: + # FIXME: The checks are not under lock, + # so there can be dangling active resources due to race conditions. + await _check_project_has_active_resources(session=session, project_id=project_id) + timestamp = str(int(get_current_datetime().timestamp())) new_project_name = "_deleted_" + timestamp + ProjectModel.name await session.execute( @@ -614,6 +635,36 @@ def _is_project_admin( return False +async def _check_project_has_active_resources(session: AsyncSession, project_id: uuid.UUID): + res = await session.execute( + select(RunModel.run_name).where( + RunModel.project_id == project_id, + RunModel.status.not_in(RunStatus.finished_statuses()), + ) + ) + run_names = list(res.scalars().all()) + if len(run_names) > 0: + raise ServerClientError(f"Failed to delete project with active runs: {run_names}") + res = await session.execute( + select(FleetModel.name).where( + FleetModel.project_id == project_id, + FleetModel.deleted.is_(False), + ) + ) + fleet_names = list(res.scalars().all()) + if len(fleet_names) > 0: + raise ServerClientError(f"Failed to delete project with active fleets: {fleet_names}") + res = await session.execute( + select(VolumeModel.name).where( + VolumeModel.project_id == project_id, + VolumeModel.deleted.is_(False), + ) + ) + volume_names = list(res.scalars().all()) + if len(volume_names) > 0: + raise ServerClientError(f"Failed to delete project with active volumes: {volume_names}") + + async def remove_project_members( session: AsyncSession, user: UserModel, diff --git a/src/tests/_internal/server/routers/test_projects.py b/src/tests/_internal/server/routers/test_projects.py index d53e8a84fe..2fe7cc1888 100644 --- a/src/tests/_internal/server/routers/test_projects.py +++ b/src/tests/_internal/server/routers/test_projects.py @@ -8,14 +8,20 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from dstack._internal.core.models.fleets import FleetStatus +from dstack._internal.core.models.runs import RunStatus from dstack._internal.core.models.users import GlobalRole, ProjectRole from dstack._internal.server.models import MemberModel, ProjectModel from dstack._internal.server.services.permissions import DefaultPermissions from dstack._internal.server.services.projects import add_project_member from dstack._internal.server.testing.common import ( create_backend, + create_fleet, create_project, + create_repo, + create_run, create_user, + create_volume, default_permissions_context, get_auth_headers, ) @@ -484,6 +490,19 @@ async def test_deletes_projects(self, test_db, session: AsyncSession, client: As assert project1.deleted assert not project2.deleted + @pytest.mark.asyncio + @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) + async def test_returns_400_if_project_does_not_exist( + self, test_db, session: AsyncSession, client: AsyncClient + ): + user = await create_user(session=session, global_role=GlobalRole.ADMIN) + response = await client.post( + "/api/projects/delete", + headers=get_auth_headers(user.token), + json={"projects_names": ["random_project"]}, + ) + assert response.status_code == 400 + @pytest.mark.asyncio @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) async def test_returns_403_if_not_project_admin( @@ -505,7 +524,7 @@ async def test_returns_403_if_not_project_admin( json={"projects_names": [project1.name, project2.name]}, ) assert response.status_code == 403 - res = await session.execute(select(ProjectModel)) + res = await session.execute(select(ProjectModel).where(ProjectModel.deleted.is_(False))) assert len(res.all()) == 2 @pytest.mark.asyncio @@ -521,8 +540,105 @@ async def test_returns_403_if_not_project_member( json={"projects_names": [project.name]}, ) assert response.status_code == 403 - res = await session.execute(select(ProjectModel)) + res = await session.execute(select(ProjectModel).where(ProjectModel.deleted.is_(False))) + assert len(res.all()) == 1 + + @pytest.mark.asyncio + @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) + async def test_errors_if_project_has_active_runs( + self, test_db, session: AsyncSession, client: AsyncClient + ): + user = await create_user(session=session, global_role=GlobalRole.ADMIN) + project = await create_project(session=session, name="project") + repo = await create_repo(session=session, project_id=project.id) + run = await create_run( + session=session, + project=project, + repo=repo, + user=user, + status=RunStatus.SUBMITTED, + ) + response = await client.post( + "/api/projects/delete", + headers=get_auth_headers(user.token), + json={"projects_names": [project.name]}, + ) + assert response.status_code == 400 + res = await session.execute(select(ProjectModel).where(ProjectModel.deleted.is_(False))) + assert len(res.all()) == 1 + run.status = RunStatus.TERMINATED + await session.commit() + response = await client.post( + "/api/projects/delete", + headers=get_auth_headers(user.token), + json={"projects_names": [project.name]}, + ) + assert response.status_code == 200 + res = await session.execute(select(ProjectModel).where(ProjectModel.deleted.is_(False))) + assert len(res.all()) == 0 + + @pytest.mark.asyncio + @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) + async def test_errors_if_project_has_active_fleets( + self, test_db, session: AsyncSession, client: AsyncClient + ): + user = await create_user(session=session, global_role=GlobalRole.ADMIN) + project = await create_project(session=session, name="project") + fleet = await create_fleet( + session=session, + project=project, + deleted=False, + ) + response = await client.post( + "/api/projects/delete", + headers=get_auth_headers(user.token), + json={"projects_names": [project.name]}, + ) + assert response.status_code == 400 + res = await session.execute(select(ProjectModel).where(ProjectModel.deleted.is_(False))) assert len(res.all()) == 1 + fleet.status = FleetStatus.TERMINATED + fleet.deleted = True + await session.commit() + response = await client.post( + "/api/projects/delete", + headers=get_auth_headers(user.token), + json={"projects_names": [project.name]}, + ) + assert response.status_code == 200 + res = await session.execute(select(ProjectModel).where(ProjectModel.deleted.is_(False))) + assert len(res.all()) == 0 + + @pytest.mark.asyncio + @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) + async def test_errors_if_project_has_active_volumes( + self, test_db, session: AsyncSession, client: AsyncClient + ): + user = await create_user(session=session, global_role=GlobalRole.ADMIN) + project = await create_project(session=session, name="project") + volume = await create_volume( + session=session, + project=project, + user=user, + ) + response = await client.post( + "/api/projects/delete", + headers=get_auth_headers(user.token), + json={"projects_names": [project.name]}, + ) + assert response.status_code == 400 + res = await session.execute(select(ProjectModel).where(ProjectModel.deleted.is_(False))) + assert len(res.all()) == 1 + volume.deleted = True + await session.commit() + response = await client.post( + "/api/projects/delete", + headers=get_auth_headers(user.token), + json={"projects_names": [project.name]}, + ) + assert response.status_code == 200 + res = await session.execute(select(ProjectModel).where(ProjectModel.deleted.is_(False))) + assert len(res.all()) == 0 class TestGetProject: From a2cd79a222e5131b13d0ffae28e23dbe731bee21 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 5 Sep 2025 12:21:14 +0500 Subject: [PATCH 2/2] Handle active fleet in deleted project --- .../_internal/server/background/tasks/process_fleets.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/dstack/_internal/server/background/tasks/process_fleets.py b/src/dstack/_internal/server/background/tasks/process_fleets.py index bd8e760a6d..176b56644b 100644 --- a/src/dstack/_internal/server/background/tasks/process_fleets.py +++ b/src/dstack/_internal/server/background/tasks/process_fleets.py @@ -177,6 +177,14 @@ def _maintain_fleet_nodes_min( def _autodelete_fleet(fleet_model: FleetModel) -> bool: + if fleet_model.project.deleted: + # It used to be possible to delete project with active resources: + # https://github.com/dstackai/dstack/issues/3077 + fleet_model.status = FleetStatus.TERMINATED + fleet_model.deleted = True + logger.info("Fleet %s deleted due to deleted project", fleet_model.name) + return True + if is_fleet_in_use(fleet_model) or not is_fleet_empty(fleet_model): return False