diff --git a/services/core/models/src/nmp/core/models/controllers/backends/docker/config.py b/services/core/models/src/nmp/core/models/controllers/backends/docker/config.py index 427eb1596b..1df1b74d68 100644 --- a/services/core/models/src/nmp/core/models/controllers/backends/docker/config.py +++ b/services/core/models/src/nmp/core/models/controllers/backends/docker/config.py @@ -173,6 +173,31 @@ class DockerBackendConfig(BaseModel): description="BusyBox image tag used for helper containers.", ) + lora_sidecar_image_name: str = Field( + default="nmp-api", + description=( + "Image name (without registry/tag) used for the LoRA adapters sidecar container. " + "Registry and tag are taken from NMP_IMAGE_REGISTRY / NMP_IMAGE_TAG. " + "The sidecar is invoked via the lora_sidecar_command." + ), + ) + + lora_sidecar_command: list[str] = Field( + default=["--sidecars", "adapters", "--port", "60830"], + description=( + "Command passed to the LoRA sidecar container. " + "Default uses the nmp-platform-runner entrypoint present in nmp-api. " + ), + ) + + lora_sidecar_entrypoint: str = Field( + default="", + description=( + "Optional entrypoint override for the LoRA sidecar container. " + "Leave empty to use the image's default entrypoint (correct for nmp-api). " + ), + ) + # ========================================================================== # PENDING timeout and crash loop detection # ========================================================================== diff --git a/services/core/models/src/nmp/core/models/controllers/backends/docker/creation_reconciler.py b/services/core/models/src/nmp/core/models/controllers/backends/docker/creation_reconciler.py index 784338959e..a704c84794 100644 --- a/services/core/models/src/nmp/core/models/controllers/backends/docker/creation_reconciler.py +++ b/services/core/models/src/nmp/core/models/controllers/backends/docker/creation_reconciler.py @@ -1082,7 +1082,7 @@ async def cleanup_and_error(status_message: str, error_details: dict) -> tuple[D if view.lora_enabled: cfg = get_platform_config() - image = get_qualified_image("nmp-api") + image = get_qualified_image(self._backend_config.lora_sidecar_image_name) sidecar_envs = cfg.to_shared_envvars() sidecar_envs.update(env_vars) # The adapters sidecar is engine-agnostic: it downloads enabled LoRA @@ -1105,7 +1105,7 @@ async def cleanup_and_error(status_message: str, error_details: dict) -> tuple[D "image": image, "name": f"{container_name}-sidecar", "environment": sidecar_envs, - "command": ["--sidecars", "adapters", "--port", "60830"], + "command": self._backend_config.lora_sidecar_command, "detach": True, "volumes": { state.volume_name: {"bind": "/model-store", "mode": "rw"}, @@ -1120,6 +1120,8 @@ async def cleanup_and_error(status_message: str, error_details: dict) -> tuple[D "healthcheck": {"test": ["NONE"]}, "ports": {}, } + if self._backend_config.lora_sidecar_entrypoint: + sidecar_args["entrypoint"] = self._backend_config.lora_sidecar_entrypoint self._assign_network(sidecar_args) diff --git a/services/core/models/src/nmp/core/models/controllers/backends/k8s_nim_operator/config.py b/services/core/models/src/nmp/core/models/controllers/backends/k8s_nim_operator/config.py index 5d6badd62b..d3fbd70aba 100644 --- a/services/core/models/src/nmp/core/models/controllers/backends/k8s_nim_operator/config.py +++ b/services/core/models/src/nmp/core/models/controllers/backends/k8s_nim_operator/config.py @@ -35,6 +35,32 @@ class K8sNimOperatorConfig(BaseModel): ge=1, description="PEFT refresh interval in seconds (only used when lora_enabled is true)", ) + lora_sidecar_image_name: str = Field( + default="nmp-api", + description=( + "Image name (without registry/tag) for the LoRA adapters sidecar container. " + "Registry and tag are taken from the platform config (NMP_IMAGE_REGISTRY / NMP_IMAGE_TAG). " + "Override to 'nmp-automodel-tasks' for local dev when that image is already available " + "but nmp-api is not." + ), + ) + lora_sidecar_command: list[str] = Field( + default=["nemo", "services", "run", "--sidecars", "adapters"], + description=( + "Kubernetes container command (entrypoint) for the LoRA sidecar. " + "Default uses the nmp-platform-runner entrypoint present in nmp-api. " + "When using nmp-automodel-tasks set to ['python'] and set lora_sidecar_args to " + "['-m', 'nmp.core.models.sidecars.adapters.main']." + ), + ) + lora_sidecar_args: list[str] = Field( + default=[], + description=( + "Kubernetes container args for the LoRA sidecar (appended after lora_sidecar_command). " + "Leave empty for nmp-api. " + "Set to ['-m', 'nmp.core.models.sidecars.adapters.main'] when using nmp-automodel-tasks." + ), + ) # Security context default_user_id: Optional[int] = Field( diff --git a/services/core/models/src/nmp/core/models/controllers/backends/k8s_nim_operator/nimservice_compiler.py b/services/core/models/src/nmp/core/models/controllers/backends/k8s_nim_operator/nimservice_compiler.py index e8066a07b5..545657602a 100644 --- a/services/core/models/src/nmp/core/models/controllers/backends/k8s_nim_operator/nimservice_compiler.py +++ b/services/core/models/src/nmp/core/models/controllers/backends/k8s_nim_operator/nimservice_compiler.py @@ -362,18 +362,13 @@ def compile_nimservice( ContainerSpec( name=_get_k8s_safe_name(resource_name, max_length=63, suffix="-lora-sidecar", name_type="label"), image=Image( - repository=f"{platform_config.image_registry}/nmp-api", + repository=f"{platform_config.image_registry}/{backend_config.lora_sidecar_image_name}", tag=platform_config.image_tag, pullPolicy="IfNotPresent", pullSecrets=image_pull_secrets if image_pull_secrets else None, ), - command=[ - "nemo", - "services", - "run", - "--sidecars", - "adapters", - ], + command=backend_config.lora_sidecar_command, + args=backend_config.lora_sidecar_args if backend_config.lora_sidecar_args else None, env=sidecar_env_vars, ) ]