Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,20 @@ async def update_model_deployment(
...

@abstractmethod
async def get_model_deployment_status(self, deployment: ModelDeployment) -> DeploymentStatusUpdate:
async def get_model_deployment_status(
self,
deployment: ModelDeployment,
config: Optional[ModelDeploymentConfig] = None,
model_entity: Optional[ModelEntity] = None,
) -> DeploymentStatusUpdate:
"""Get the current status of a model deployment.

Args:
deployment: The ModelDeployment object to check
config: The ModelDeploymentConfig for this deployment. Some backends
need it to advance creation (e.g. the k8s vLLM path emits the
serving Deployment once the weight-puller Job completes).
model_entity: Optional Model entity from Entity Store.

Returns:
DeploymentStatusUpdate with the current deployment status
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,12 @@ async def update_model_deployment(
return delete_result
return await self.create_model_deployment(deployment, config, model_entity)

async def get_model_deployment_status(self, deployment: ModelDeployment) -> DeploymentStatusUpdate:
async def get_model_deployment_status(
self,
deployment: ModelDeployment,
config: Optional[ModelDeploymentConfig] = None,
model_entity: Optional[ModelEntity] = None,
) -> DeploymentStatusUpdate:
"""Get the status of a Docker model deployment.

While the deployment is still progressing through the creation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,27 @@
from nmp.core.models.app import ModelWeightsType, get_model_weights_type, is_multi_llm_image, parse_model_name_revision
from nmp.core.models.app.constants import MODEL_MANAGED_BY_LABEL, MODEL_MANAGED_BY_MODELS_CONTROLLER
from nmp.core.models.app.utils import _get_k8s_safe_name
from nmp.core.models.controllers.backends import vllm_compiler
from nmp.core.models.controllers.backends.backends import DeploymentStatusUpdate
from nmp.core.models.controllers.backends.common import DeploymentConfigView, deployment_config_view
from nmp.core.models.controllers.backends.docker import vllm_compiler
from nmp.core.models.controllers.backends.common import deployment_config_view
from nmp.core.models.controllers.backends.docker.config import (
MODELS_DOCKER_NIM_MULTI_GPU_SHM_SIZE,
MODELS_DOCKER_NIM_MULTI_GPU_SHM_SIZE_PER_GPU,
DockerBackendConfig,
)
from nmp.core.models.controllers.backends.engine import (
ENGINE_HEALTH_PATHS,
ENGINE_LABEL,
ENGINE_NIM,
ENGINE_VLLM,
HEALTH_PATH_LABEL,
)
from nmp.core.models.controllers.backends.engine import (
config_engine as _config_engine,
)
from nmp.core.models.controllers.backends.engine import (
resolve_health_path as _resolve_health_path,
)
from requests.exceptions import ConnectionError as RequestsConnectionError
from requests.exceptions import ReadTimeout
from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponential
Expand All @@ -64,44 +77,6 @@
NGC_IMAGE_REGISTRY = os.getenv("NGC_IMAGE_REGISTRY", "nvcr.io")
NGC_IMAGE_REGISTRY_USER_NAME = os.getenv("NGC_IMAGE_REGISTRY_USER_NAME", "$oauthtoken")

ENGINE_NIM = "nim"
ENGINE_VLLM = "vllm"
ENGINE_GENERIC = "generic"

# Docker label recording the engine, read back at status time to pick the health probe.
ENGINE_LABEL = "nmp.nvidia.com/engine"

# Docker label recording the resolved readiness-probe path, read back at status
# time. Stamped at create so status polling doesn't need the deployment config.
HEALTH_PATH_LABEL = "nmp.nvidia.com/health-path"

# Per-engine readiness probe paths (relative to the container host URL).
ENGINE_HEALTH_PATHS: dict[str, str] = {
ENGINE_NIM: "/v1/health/ready",
ENGINE_VLLM: "/health",
}


def _config_engine(config: Any) -> str:
"""Return the engine discriminant as a lowercase string (defaults to nim)."""
engine = getattr(config, "engine", None)
if engine is None:
return ENGINE_NIM
# engine may be an enum or a plain string depending on the SDK model.
return str(getattr(engine, "value", engine)).lower()


def _resolve_health_path(engine: str, view: DeploymentConfigView) -> str:
"""Resolve the readiness-probe path for a deployment.

Precedence: an explicit ``executor_config.health_check_path`` wins; otherwise
fall back to the engine's standard endpoint. ``generic`` containers have no
engine default, so they fall back to the NIM path unless they set their own.
"""
if getattr(view, "health_check_path", None):
return view.health_check_path
return ENGINE_HEALTH_PATHS.get(engine, ENGINE_HEALTH_PATHS[ENGINE_NIM])


def _should_retry_docker_error(exception: BaseException) -> bool:
"""Determine if a Docker exception should be retried."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Backend-agnostic engine dispatch + readiness-probe helpers.

The ``engine`` discriminant on a ``ModelDeploymentConfig`` selects the compiler
path (nim / vllm / generic). These constants and helpers are shared by every
service backend (docker container labels, k8s object labels) so engine selection
and readiness-probe resolution behave identically regardless of where the
deployment runs.
"""

from typing import Any

from nmp.core.models.controllers.backends.common import DeploymentConfigView

ENGINE_NIM = "nim"
ENGINE_VLLM = "vllm"
ENGINE_GENERIC = "generic"

# Label recording the engine, read back at status time to pick the health probe.
# Used as a docker container label and a k8s object/pod label.
ENGINE_LABEL = "nmp.nvidia.com/engine"

# Label recording the resolved readiness-probe path, read back at status time.
# Stamped at create so status polling doesn't need the deployment config.
HEALTH_PATH_LABEL = "nmp.nvidia.com/health-path"

# Per-engine readiness probe paths (relative to the container/pod host URL).
ENGINE_HEALTH_PATHS: dict[str, str] = {
ENGINE_NIM: "/v1/health/ready",
ENGINE_VLLM: "/health",
}


def config_engine(config: Any) -> str:
"""Return the engine discriminant as a lowercase string (defaults to nim)."""
engine = getattr(config, "engine", None)
if engine is None:
return ENGINE_NIM
# engine may be an enum or a plain string depending on the SDK model.
return str(getattr(engine, "value", engine)).lower()


def resolve_health_path(engine: str, view: DeploymentConfigView) -> str:
"""Resolve the readiness-probe path for a deployment.

Precedence: an explicit ``executor_config.health_check_path`` wins; otherwise
fall back to the engine's standard endpoint. ``generic`` containers have no
engine default, so they fall back to the NIM path unless they set their own.
"""
explicit_path = getattr(view, "health_check_path", None)
if explicit_path:
return explicit_path
return ENGINE_HEALTH_PATHS.get(engine, ENGINE_HEALTH_PATHS[ENGINE_NIM])
Loading
Loading