diff --git a/src/dstack/_internal/core/compatibility/runs.py b/src/dstack/_internal/core/compatibility/runs.py index 385f9bd8fa..97f90c8d2e 100644 --- a/src/dstack/_internal/core/compatibility/runs.py +++ b/src/dstack/_internal/core/compatibility/runs.py @@ -97,6 +97,8 @@ def get_run_spec_excludes(run_spec: RunSpec) -> Optional[Dict]: configuration_excludes["rate_limits"] = True if configuration.shell is None: configuration_excludes["shell"] = True + if configuration.docker is None: + configuration_excludes["docker"] = True if configuration.priority is None: configuration_excludes["priority"] = True if configuration.startup_order is None: diff --git a/src/dstack/_internal/core/models/configurations.py b/src/dstack/_internal/core/models/configurations.py index bc6ba3235c..92ae999ba0 100644 --- a/src/dstack/_internal/core/models/configurations.py +++ b/src/dstack/_internal/core/models/configurations.py @@ -194,12 +194,14 @@ class BaseRunConfiguration(CoreModel): ] = None python: Annotated[ Optional[PythonVersion], - Field(description="The major version of Python. Mutually exclusive with `image`"), + Field( + description="The major version of Python. Mutually exclusive with `image` and `docker`" + ), ] = None nvcc: Annotated[ Optional[bool], Field( - description="Use image with NVIDIA CUDA Compiler (NVCC) included. Mutually exclusive with `image`" + description="Use image with NVIDIA CUDA Compiler (NVCC) included. Mutually exclusive with `image` and `docker`" ), ] = None single_branch: Annotated[ @@ -244,6 +246,12 @@ class BaseRunConfiguration(CoreModel): volumes: Annotated[ List[Union[MountPoint, str]], Field(description="The volumes mount points") ] = [] + docker: Annotated[ + Optional[bool], + Field( + description="Use Docker inside the container. Mutually exclusive with `image`, `python`, and `nvcc`. Overrides `privileged`" + ), + ] = None # deprecated since 0.18.31; task, service -- no effect; dev-environment -- executed right before `init` setup: CommandsList = [] @@ -259,6 +267,18 @@ def convert_python(cls, v, values) -> Optional[PythonVersion]: return PythonVersion(v) return v + @validator("docker", pre=True, always=True) + def _docker(cls, v, values) -> Optional[bool]: + if v is True and values.get("image"): + raise KeyError("`image` and `docker` are mutually exclusive fields") + if v is True and values.get("python"): + raise KeyError("`python` and `docker` are mutually exclusive fields") + if v is True and values.get("nvcc"): + raise KeyError("`nvcc` and `docker` are mutually exclusive fields") + # Ideally, we'd like to also prohibit privileged=False when docker=True, + # but it's not possible to do so without breaking backwards compatibility. + return v + @validator("volumes", each_item=True) def convert_volumes(cls, v) -> MountPoint: if isinstance(v, str): diff --git a/src/dstack/_internal/server/services/jobs/configurators/base.py b/src/dstack/_internal/server/services/jobs/configurators/base.py index 6ef0ca7712..465a24fb0d 100644 --- a/src/dstack/_internal/server/services/jobs/configurators/base.py +++ b/src/dstack/_internal/server/services/jobs/configurators/base.py @@ -171,6 +171,8 @@ async def _commands(self) -> List[str]: return result def _dstack_image_commands(self) -> List[str]: + if self.run_spec.configuration.docker is True: + return ["start-dockerd"] if ( self.run_spec.configuration.image is not None or self.run_spec.configuration.entrypoint is not None @@ -201,7 +203,9 @@ def _home_dir(self) -> Optional[str]: return self.run_spec.configuration.home_dir def _image_name(self) -> str: - if self.run_spec.configuration.image is not None: + if self.run_spec.configuration.docker is True: + return settings.DSTACK_DIND_IMAGE + elif self.run_spec.configuration.image is not None: return self.run_spec.configuration.image return get_default_image(nvcc=bool(self.run_spec.configuration.nvcc)) @@ -215,6 +219,8 @@ async def _user(self) -> Optional[UnixUser]: return UnixUser.parse(user) def _privileged(self) -> bool: + if self.run_spec.configuration.docker is True: + return True return self.run_spec.configuration.privileged def _single_branch(self) -> bool: diff --git a/src/dstack/_internal/settings.py b/src/dstack/_internal/settings.py index 2636a3b362..52fd008001 100644 --- a/src/dstack/_internal/settings.py +++ b/src/dstack/_internal/settings.py @@ -17,6 +17,7 @@ DSTACK_BASE_IMAGE_UBUNTU_VERSION = os.getenv( "DSTACK_BASE_IMAGE_UBUNTU_VERSION", version.base_image_ubuntu_version ) +DSTACK_DIND_IMAGE = os.getenv("DSTACK_DIND_IMAGE", "dstackai/dind") class FeatureFlags: diff --git a/src/tests/_internal/server/routers/test_runs.py b/src/tests/_internal/server/routers/test_runs.py index 65c4fcd131..c2e50794a6 100644 --- a/src/tests/_internal/server/routers/test_runs.py +++ b/src/tests/_internal/server/routers/test_runs.py @@ -77,8 +77,52 @@ def get_dev_env_run_plan_dict( action: ApplyAction = ApplyAction.CREATE, current_resource: Optional[Run] = None, privileged: bool = False, + docker: bool = False, volumes: List[MountPoint] = [], ) -> Dict: + # When docker=True, commands should start with start-dockerd + if docker: + commands = [ + "/bin/bash", + "-i", + "-c", + "start-dockerd && (echo pip install ipykernel... && " + "pip install -q --no-cache-dir " + 'ipykernel 2> /dev/null) || echo "no ' + 'pip, ipykernel was not installed" ' + "&& echo '' && echo To open in VS " + "Code Desktop, use link below: && " + "echo '' && echo ' " + "vscode://vscode-remote/ssh-remote+dry-run/workflow' " + "&& echo '' && echo 'To connect via " + "SSH, use: `ssh dry-run`' && echo '' " + "&& echo -n 'To exit, press Ctrl+C.' " + "&& tail -f /dev/null", + ] + image_name = "dstackai/dind" + else: + commands = [ + "/bin/bash", + "-i", + "-c", + "uv venv --python 3.13 --prompt workflow --seed /workflow/.venv > /dev/null 2>&1" + " && echo 'source /workflow/.venv/bin/activate' >> ~/.bashrc" + " && source /workflow/.venv/bin/activate" + " && (echo pip install ipykernel... && " + "pip install -q --no-cache-dir " + 'ipykernel 2> /dev/null) || echo "no ' + 'pip, ipykernel was not installed" ' + "&& echo '' && echo To open in VS " + "Code Desktop, use link below: && " + "echo '' && echo ' " + "vscode://vscode-remote/ssh-remote+dry-run/workflow' " + "&& echo '' && echo 'To connect via " + "SSH, use: `ssh dry-run`' && echo '' " + "&& echo -n 'To exit, press Ctrl+C.' " + "&& tail -f /dev/null", + ] + image_name = "dstackai/base:0.10-base-ubuntu22.04" + run_spec = { "configuration": { "entrypoint": None, @@ -90,11 +134,12 @@ def get_dev_env_run_plan_dict( "version": None, "image": None, "user": None, + "docker": docker, "shell": None, "privileged": privileged, "init": [], "ports": [], - "python": "3.13", + "python": "3.13" if not docker else None, "nvcc": None, "registry_auth": None, "setup": [], @@ -166,31 +211,12 @@ def get_dev_env_run_plan_dict( { "job_spec": { "app_specs": [], - "commands": [ - "/bin/bash", - "-i", - "-c", - "uv venv --python 3.13 --prompt workflow --seed /workflow/.venv > /dev/null 2>&1" - " && echo 'source /workflow/.venv/bin/activate' >> ~/.bashrc" - " && source /workflow/.venv/bin/activate" - " && (echo pip install ipykernel... && " - "pip install -q --no-cache-dir " - 'ipykernel 2> /dev/null) || echo "no ' - 'pip, ipykernel was not installed" ' - "&& echo '' && echo To open in VS " - "Code Desktop, use link below: && " - "echo '' && echo ' " - "vscode://vscode-remote/ssh-remote+dry-run/workflow' " - "&& echo '' && echo 'To connect via " - "SSH, use: `ssh dry-run`' && echo '' " - "&& echo -n 'To exit, press Ctrl+C.' " - "&& tail -f /dev/null", - ], + "commands": commands, "env": {}, "home_dir": "/root", - "image_name": "dstackai/base:0.10-base-ubuntu22.04", + "image_name": image_name, "user": None, - "privileged": privileged, + "privileged": True if docker else privileged, "job_name": f"{run_name}-0-0", "replica_num": 0, "job_num": 0, @@ -223,7 +249,7 @@ def get_dev_env_run_plan_dict( } ], "current_resource": current_resource.dict() if current_resource else None, - "action": action, + "action": action.value, } @@ -238,8 +264,52 @@ def get_dev_env_run_dict( last_processed_at: str = "2023-01-02T03:04:00+00:00", finished_at: Optional[str] = "2023-01-02T03:04:00+00:00", privileged: bool = False, + docker: Optional[bool] = None, deleted: bool = False, ) -> Dict: + # When docker=True, commands should start with start-dockerd and use dind image + if docker: + commands = [ + "/bin/bash", + "-i", + "-c", + "start-dockerd && (echo pip install ipykernel... && " + "pip install -q --no-cache-dir " + 'ipykernel 2> /dev/null) || echo "no ' + 'pip, ipykernel was not installed" ' + "&& echo '' && echo To open in VS " + "Code Desktop, use link below: && " + "echo '' && echo ' " + "vscode://vscode-remote/ssh-remote+test-run/workflow' " + "&& echo '' && echo 'To connect via " + "SSH, use: `ssh test-run`' && echo '' " + "&& echo -n 'To exit, press Ctrl+C.' " + "&& tail -f /dev/null", + ] + image_name = "dstackai/dind" + else: + commands = [ + "/bin/bash", + "-i", + "-c", + "uv venv --python 3.13 --prompt workflow --seed /workflow/.venv > /dev/null 2>&1" + " && echo 'source /workflow/.venv/bin/activate' >> ~/.bashrc" + " && source /workflow/.venv/bin/activate" + " && (echo pip install ipykernel... && " + "pip install -q --no-cache-dir " + 'ipykernel 2> /dev/null) || echo "no ' + 'pip, ipykernel was not installed" ' + "&& echo '' && echo To open in VS " + "Code Desktop, use link below: && " + "echo '' && echo ' " + "vscode://vscode-remote/ssh-remote+test-run/workflow' " + "&& echo '' && echo 'To connect via " + "SSH, use: `ssh test-run`' && echo '' " + "&& echo -n 'To exit, press Ctrl+C.' " + "&& tail -f /dev/null", + ] + image_name = "dstackai/base:0.10-base-ubuntu22.04" + return { "id": run_id, "project_name": project_name, @@ -259,11 +329,12 @@ def get_dev_env_run_dict( "version": None, "image": None, "user": None, + "docker": docker, "shell": None, "privileged": privileged, "init": [], "ports": [], - "python": "3.13", + "python": "3.13" if not docker else None, "nvcc": None, "registry_auth": None, "setup": [], @@ -330,31 +401,12 @@ def get_dev_env_run_dict( { "job_spec": { "app_specs": [], - "commands": [ - "/bin/bash", - "-i", - "-c", - "uv venv --python 3.13 --prompt workflow --seed /workflow/.venv > /dev/null 2>&1" - " && echo 'source /workflow/.venv/bin/activate' >> ~/.bashrc" - " && source /workflow/.venv/bin/activate" - " && (echo pip install ipykernel... && " - "pip install -q --no-cache-dir " - 'ipykernel 2> /dev/null) || echo "no ' - 'pip, ipykernel was not installed" ' - "&& echo '' && echo To open in VS " - "Code Desktop, use link below: && " - "echo '' && echo ' " - "vscode://vscode-remote/ssh-remote+test-run/workflow' " - "&& echo '' && echo 'To connect via " - "SSH, use: `ssh test-run`' && echo '' " - "&& echo -n 'To exit, press Ctrl+C.' " - "&& tail -f /dev/null", - ], + "commands": commands, "env": {}, "home_dir": "/root", - "image_name": "dstackai/base:0.10-base-ubuntu22.04", + "image_name": image_name, "user": None, - "privileged": privileged, + "privileged": True if docker else privileged, "job_name": f"{run_name}-0-0", "replica_num": 0, "job_num": 0, @@ -740,10 +792,10 @@ async def test_returns_403_if_not_project_member( assert response.status_code == 403 @pytest.mark.asyncio - @pytest.mark.parametrize("privileged", [None, False]) + @pytest.mark.parametrize("privileged", [False]) @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) async def test_returns_run_plan_privileged_false( - self, test_db, session: AsyncSession, client: AsyncClient, privileged: Optional[bool] + self, test_db, session: AsyncSession, client: AsyncClient, privileged: bool ): user = await create_user(session=session, global_role=GlobalRole.USER) project = await create_project(session=session, owner=user) @@ -778,7 +830,7 @@ async def test_returns_run_plan_privileged_false( offers=[offer_aws, offer_runpod], total_offers=2, max_price=2.0, - privileged=False, + privileged=privileged, ) run_spec = copy.deepcopy(run_plan_dict["run_spec"]) if privileged is None: @@ -864,6 +916,68 @@ async def test_returns_run_plan_privileged_true( assert response.status_code == 200, response.json() assert response.json() == run_plan_dict + @pytest.mark.asyncio + @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) + async def test_returns_run_plan_docker_true( + self, + test_db, + session: AsyncSession, + client: AsyncClient, + ): + user = await create_user(session=session, global_role=GlobalRole.USER) + project = await create_project(session=session, owner=user) + await add_project_member( + session=session, project=project, user=user, project_role=ProjectRole.USER + ) + repo = await create_repo(session=session, project_id=project.id) + offer_aws = InstanceOfferWithAvailability( + backend=BackendType.AWS, + instance=InstanceType( + name="instance", + resources=Resources(cpus=1, memory_mib=512, spot=False, gpus=[]), + ), + region="us", + price=1.0, + availability=InstanceAvailability.AVAILABLE, + ) + offer_runpod = InstanceOfferWithAvailability( + backend=BackendType.RUNPOD, + instance=InstanceType( + name="instance", + resources=Resources(cpus=1, memory_mib=512, spot=False, gpus=[]), + ), + region="us", + price=2.0, + availability=InstanceAvailability.AVAILABLE, + ) + run_plan_dict = get_dev_env_run_plan_dict( + project_name=project.name, + username=user.name, + repo_id=repo.name, + offers=[offer_aws], + total_offers=1, + max_price=1.0, + docker=True, + ) + body = {"run_spec": run_plan_dict["run_spec"]} + with patch("dstack._internal.server.services.backends.get_project_backends") as m: + backend_mock_aws = Mock() + backend_mock_aws.TYPE = BackendType.AWS + backend_mock_aws.compute.return_value.get_offers_cached.return_value = [offer_aws] + backend_mock_runpod = Mock() + backend_mock_runpod.TYPE = BackendType.RUNPOD + backend_mock_runpod.compute.return_value.get_offers_cached.return_value = [ + offer_runpod + ] + m.return_value = [backend_mock_aws, backend_mock_runpod] + response = await client.post( + f"/api/project/{project.name}/runs/get_plan", + headers=get_auth_headers(user.token), + json=body, + ) + assert response.status_code == 200, response.json() + assert response.json() == run_plan_dict + @pytest.mark.asyncio @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) async def test_returns_run_plan_instance_volumes( @@ -927,7 +1041,6 @@ async def test_returns_run_plan_instance_volumes( assert response.json() == run_plan_dict @pytest.mark.asyncio - @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) @pytest.mark.parametrize( ("old_conf", "new_conf", "action"), [ @@ -1204,6 +1317,55 @@ async def test_submits_run( job = res.scalar() assert job is not None + @pytest.mark.asyncio + @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) + async def test_submits_run_docker_true( + self, test_db, session: AsyncSession, client: AsyncClient + ): + user = await create_user(session=session, global_role=GlobalRole.USER) + project = await create_project(session=session, owner=user) + await add_project_member( + session=session, project=project, user=user, project_role=ProjectRole.USER + ) + run_id = UUID("1b0e1b45-2f8c-4ab6-8010-a0d1a3e44e0e") + submitted_at = datetime(2023, 1, 2, 3, 4, tzinfo=timezone.utc) + submitted_at_formatted = "2023-01-02T03:04:00+00:00" + last_processed_at_formatted = submitted_at_formatted + repo = await create_repo(session=session, project_id=project.id) + run_dict = get_dev_env_run_dict( + run_id=str(run_id), + job_id=str(run_id), + project_name=project.name, + username=user.name, + submitted_at=submitted_at_formatted, + last_processed_at=last_processed_at_formatted, + finished_at=None, + run_name="test-run", + repo_id=repo.name, + docker=True, + privileged=True, # docker=True automatically enables privileged mode + ) + body = {"run_spec": run_dict["run_spec"]} + with ( + patch("uuid.uuid4") as uuid_mock, + patch("dstack._internal.utils.common.get_current_datetime") as datetime_mock, + ): + uuid_mock.return_value = run_id + datetime_mock.return_value = submitted_at + response = await client.post( + f"/api/project/{project.name}/runs/submit", + headers=get_auth_headers(user.token), + json=body, + ) + assert response.status_code == 200, response.json() + assert response.json() == run_dict + res = await session.execute(select(RunModel)) + run = res.scalar() + assert run is not None + res = await session.execute(select(JobModel)) + job = res.scalar() + assert job is not None + @pytest.mark.asyncio @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True) async def test_submits_run_without_run_name(